Data Visualization for Construction
Overview
Based on DDC methodology (Chapter 4.1), this skill provides comprehensive data visualization techniques for construction analytics. Visual insights drive better decisions - from cost breakdowns to schedule analysis.
Book Reference: "Аналитика данных и принятие решений" / "Data Analytics and Decision Making"
"Визуализация данных превращает сложные наборы данных в понятные графики, которые могут использоваться для принятия решений на всех уровнях проекта." — DDC Book, Chapter 4.1
Quick Start
import pandas as pd import matplotlib.pyplot as plt import seaborn as sns
Load construction data
df = pd.read_excel("project_data.xlsx")
Quick bar chart - volumes by category
fig, ax = plt.subplots(figsize=(10, 6)) df.groupby('Category')['Volume_m3'].sum().plot(kind='bar', ax=ax) ax.set_title('Volume by Category') ax.set_ylabel('Volume (m³)') plt.tight_layout() plt.savefig('volume_by_category.png', dpi=150) plt.show()
Matplotlib Fundamentals
Basic Charts for Construction
import matplotlib.pyplot as plt import pandas as pd import numpy as np
def create_cost_breakdown_pie(df, cost_col='Cost', category_col='Category'): """Create pie chart for cost breakdown""" costs = df.groupby(category_col)[cost_col].sum()
fig, ax = plt.subplots(figsize=(10, 8))
# Create pie with percentage labels
wedges, texts, autotexts = ax.pie(
costs.values,
labels=costs.index,
autopct='%1.1f%%',
startangle=90,
colors=plt.cm.Set3.colors
)
ax.set_title('Cost Breakdown by Category', fontsize=14, fontweight='bold')
# Add total in center
ax.text(0, 0, f'Total:\n${costs.sum():,.0f}',
ha='center', va='center', fontsize=12)
plt.tight_layout()
return fig
def create_volume_bar_chart(df, volume_col='Volume_m3', category_col='Category'): """Create horizontal bar chart for volumes""" volumes = df.groupby(category_col)[volume_col].sum().sort_values()
fig, ax = plt.subplots(figsize=(10, 6))
bars = ax.barh(volumes.index, volumes.values, color='steelblue')
# Add value labels
for bar, value in zip(bars, volumes.values):
ax.text(value + volumes.max() * 0.01, bar.get_y() + bar.get_height()/2,
f'{value:,.0f} m³', va='center', fontsize=10)
ax.set_xlabel('Volume (m³)')
ax.set_title('Material Volumes by Category', fontsize=14, fontweight='bold')
ax.set_xlim(0, volumes.max() * 1.15)
plt.tight_layout()
return fig
def create_level_comparison(df, value_col='Volume_m3', level_col='Level'): """Create grouped bar chart comparing levels""" pivot = df.pivot_table( values=value_col, index=level_col, columns='Category', aggfunc='sum', fill_value=0 )
fig, ax = plt.subplots(figsize=(12, 6))
pivot.plot(kind='bar', ax=ax, width=0.8)
ax.set_xlabel('Building Level')
ax.set_ylabel('Volume (m³)')
ax.set_title('Volume Distribution by Level and Category', fontsize=14, fontweight='bold')
ax.legend(title='Category', bbox_to_anchor=(1.02, 1), loc='upper left')
plt.xticks(rotation=45)
plt.tight_layout()
return fig
Time Series Visualization
def create_progress_chart(df, date_col='Date', value_col='Cumulative_Progress'): """Create S-curve progress chart""" df = df.sort_values(date_col)
fig, ax = plt.subplots(figsize=(12, 6))
# Actual progress
ax.plot(df[date_col], df[value_col],
'b-', linewidth=2, label='Actual Progress')
# Planned progress (if available)
if 'Planned_Progress' in df.columns:
ax.plot(df[date_col], df['Planned_Progress'],
'g--', linewidth=2, label='Planned Progress')
ax.fill_between(df[date_col], 0, df[value_col], alpha=0.3)
ax.set_xlabel('Date')
ax.set_ylabel('Progress (%)')
ax.set_title('Project S-Curve', fontsize=14, fontweight='bold')
ax.legend()
ax.grid(True, alpha=0.3)
# Format dates
fig.autofmt_xdate()
plt.tight_layout()
return fig
def create_gantt_chart(df, start_col='Start', end_col='End', task_col='Task'): """Create simple Gantt chart""" df = df.sort_values(start_col)
fig, ax = plt.subplots(figsize=(14, len(df) * 0.5 + 2))
# Plot each task as horizontal bar
for i, (_, row) in enumerate(df.iterrows()):
start = pd.to_datetime(row[start_col])
end = pd.to_datetime(row[end_col])
duration = (end - start).days
ax.barh(i, duration, left=start, height=0.6,
align='center', color='steelblue', alpha=0.8)
ax.set_yticks(range(len(df)))
ax.set_yticklabels(df[task_col])
ax.set_xlabel('Date')
ax.set_title('Project Schedule - Gantt Chart', fontsize=14, fontweight='bold')
ax.grid(axis='x', alpha=0.3)
fig.autofmt_xdate()
plt.tight_layout()
return fig
Seaborn for Statistical Visualization
Distribution Analysis
import seaborn as sns
def create_distribution_analysis(df, value_col='Volume_m3', category_col='Category'): """Create distribution plots for construction data""" fig, axes = plt.subplots(2, 2, figsize=(14, 10))
# 1. Histogram with KDE
sns.histplot(data=df, x=value_col, kde=True, ax=axes[0, 0])
axes[0, 0].set_title('Volume Distribution')
# 2. Box plot by category
sns.boxplot(data=df, x=category_col, y=value_col, ax=axes[0, 1])
axes[0, 1].set_xticklabels(axes[0, 1].get_xticklabels(), rotation=45)
axes[0, 1].set_title('Volume by Category')
# 3. Violin plot
sns.violinplot(data=df, x=category_col, y=value_col, ax=axes[1, 0])
axes[1, 0].set_xticklabels(axes[1, 0].get_xticklabels(), rotation=45)
axes[1, 0].set_title('Volume Distribution by Category')
# 4. Strip plot with jitter
sns.stripplot(data=df, x=category_col, y=value_col,
ax=axes[1, 1], alpha=0.5, jitter=True)
axes[1, 1].set_xticklabels(axes[1, 1].get_xticklabels(), rotation=45)
axes[1, 1].set_title('Individual Elements')
plt.tight_layout()
return fig
def create_correlation_heatmap(df, numeric_cols=None): """Create correlation heatmap for numeric columns""" if numeric_cols is None: numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
corr_matrix = df[numeric_cols].corr()
fig, ax = plt.subplots(figsize=(10, 8))
sns.heatmap(corr_matrix,
annot=True,
cmap='RdYlBu_r',
center=0,
fmt='.2f',
square=True,
ax=ax)
ax.set_title('Correlation Matrix', fontsize=14, fontweight='bold')
plt.tight_layout()
return fig
Category Analysis
def create_category_summary(df, category_col='Category', value_col='Volume_m3', cost_col='Cost'): """Create comprehensive category summary visualization""" fig, axes = plt.subplots(2, 2, figsize=(14, 10))
# 1. Count by category
category_counts = df[category_col].value_counts()
sns.barplot(x=category_counts.index, y=category_counts.values, ax=axes[0, 0])
axes[0, 0].set_title('Element Count by Category')
axes[0, 0].set_xticklabels(axes[0, 0].get_xticklabels(), rotation=45)
# 2. Total volume by category
volumes = df.groupby(category_col)[value_col].sum().sort_values(ascending=False)
sns.barplot(x=volumes.index, y=volumes.values, ax=axes[0, 1])
axes[0, 1].set_title('Total Volume by Category')
axes[0, 1].set_xticklabels(axes[0, 1].get_xticklabels(), rotation=45)
# 3. Average cost by category
if cost_col in df.columns:
avg_cost = df.groupby(category_col)[cost_col].mean().sort_values(ascending=False)
sns.barplot(x=avg_cost.index, y=avg_cost.values, ax=axes[1, 0])
axes[1, 0].set_title('Average Cost by Category')
axes[1, 0].set_xticklabels(axes[1, 0].get_xticklabels(), rotation=45)
# 4. Volume vs Cost scatter
if cost_col in df.columns:
sns.scatterplot(data=df, x=value_col, y=cost_col,
hue=category_col, alpha=0.7, ax=axes[1, 1])
axes[1, 1].set_title('Volume vs Cost')
axes[1, 1].legend(bbox_to_anchor=(1.02, 1), loc='upper left')
plt.tight_layout()
return fig
Plotly for Interactive Dashboards
Interactive Charts
import plotly.express as px import plotly.graph_objects as go from plotly.subplots import make_subplots
def create_interactive_cost_breakdown(df, category_col='Category', cost_col='Cost'): """Create interactive sunburst chart""" # Aggregate by category and material agg_df = df.groupby([category_col, 'Material'])[cost_col].sum().reset_index()
fig = px.sunburst(
agg_df,
path=[category_col, 'Material'],
values=cost_col,
title='Cost Breakdown by Category and Material'
)
fig.update_layout(height=600)
return fig
def create_interactive_3d_scatter(df, x_col='Volume_m3', y_col='Cost', z_col='Weight_kg', color_col='Category'): """Create 3D scatter plot for multi-dimensional analysis""" fig = px.scatter_3d( df, x=x_col, y=y_col, z=z_col, color=color_col, hover_data=['ElementId'], title='3D Analysis: Volume vs Cost vs Weight' )
fig.update_layout(height=700)
return fig
def create_interactive_timeline(df, date_col='Date', value_col='Progress', category_col='Phase'): """Create interactive timeline with range slider""" fig = px.line( df, x=date_col, y=value_col, color=category_col, title='Project Progress Timeline' )
fig.update_layout(
xaxis=dict(
rangeselector=dict(
buttons=list([
dict(count=1, label="1m", step="month", stepmode="backward"),
dict(count=3, label="3m", step="month", stepmode="backward"),
dict(count=6, label="6m", step="month", stepmode="backward"),
dict(step="all", label="All")
])
),
rangeslider=dict(visible=True),
type="date"
),
height=500
)
return fig
Dashboard Layout
def create_project_dashboard(df): """Create comprehensive project dashboard""" fig = make_subplots( rows=2, cols=2, subplot_titles=( 'Cost by Category', 'Volume Distribution', 'Elements by Level', 'Progress Over Time' ), specs=[ [{"type": "pie"}, {"type": "bar"}], [{"type": "bar"}, {"type": "scatter"}] ] )
# 1. Cost pie chart
costs = df.groupby('Category')['Cost'].sum()
fig.add_trace(
go.Pie(labels=costs.index, values=costs.values, name='Cost'),
row=1, col=1
)
# 2. Volume bar chart
volumes = df.groupby('Category')['Volume_m3'].sum().sort_values(ascending=True)
fig.add_trace(
go.Bar(x=volumes.values, y=volumes.index, orientation='h', name='Volume'),
row=1, col=2
)
# 3. Elements by level
level_counts = df.groupby('Level').size()
fig.add_trace(
go.Bar(x=level_counts.index, y=level_counts.values, name='Count'),
row=2, col=1
)
# 4. Progress scatter (if available)
if 'Date' in df.columns and 'Progress' in df.columns:
fig.add_trace(
go.Scatter(x=df['Date'], y=df['Progress'], mode='lines+markers', name='Progress'),
row=2, col=2
)
fig.update_layout(
height=800,
title_text='Project Analytics Dashboard',
showlegend=False
)
return fig
Construction-Specific Visualizations
Heatmaps for Level Analysis
def create_level_heatmap(df, level_col='Level', category_col='Category', value_col='Volume_m3'): """Create heatmap for level-by-category analysis""" pivot = df.pivot_table( values=value_col, index=level_col, columns=category_col, aggfunc='sum', fill_value=0 )
fig, ax = plt.subplots(figsize=(12, 8))
sns.heatmap(
pivot,
annot=True,
fmt=',.0f',
cmap='YlOrRd',
ax=ax,
cbar_kws={'label': 'Volume (m³)'}
)
ax.set_title('Volume Distribution: Level × Category', fontsize=14, fontweight='bold')
plt.tight_layout()
return fig
def create_material_treemap(df, category_col='Category', material_col='Material', value_col='Volume_m3'): """Create treemap for hierarchical material analysis""" agg_df = df.groupby([category_col, material_col])[value_col].sum().reset_index()
fig = px.treemap(
agg_df,
path=[category_col, material_col],
values=value_col,
title='Material Distribution Treemap',
color=value_col,
color_continuous_scale='Blues'
)
fig.update_layout(height=600)
return fig
Cost Analysis Charts
def create_cost_analysis_dashboard(df): """Create comprehensive cost analysis visualization""" fig, axes = plt.subplots(2, 3, figsize=(18, 10))
# 1. Cost distribution histogram
sns.histplot(data=df, x='Cost', bins=30, ax=axes[0, 0])
axes[0, 0].set_title('Cost Distribution')
axes[0, 0].axvline(df['Cost'].mean(), color='r', linestyle='--', label='Mean')
axes[0, 0].axvline(df['Cost'].median(), color='g', linestyle='--', label='Median')
axes[0, 0].legend()
# 2. Cost by category (box plot)
sns.boxplot(data=df, x='Category', y='Cost', ax=axes[0, 1])
axes[0, 1].set_xticklabels(axes[0, 1].get_xticklabels(), rotation=45)
axes[0, 1].set_title('Cost Range by Category')
# 3. Cumulative cost
sorted_costs = df.sort_values('Cost', ascending=False)
sorted_costs['Cumulative_Cost'] = sorted_costs['Cost'].cumsum()
sorted_costs['Cumulative_Pct'] = sorted_costs['Cumulative_Cost'] / sorted_costs['Cost'].sum() * 100
axes[0, 2].plot(range(len(sorted_costs)), sorted_costs['Cumulative_Pct'])
axes[0, 2].axhline(80, color='r', linestyle='--', alpha=0.5)
axes[0, 2].set_xlabel('Number of Elements')
axes[0, 2].set_ylabel('Cumulative Cost %')
axes[0, 2].set_title('Pareto Analysis (80/20)')
# 4. Cost per unit volume
df['Cost_per_m3'] = df['Cost'] / df['Volume_m3'].replace(0, np.nan)
by_cat = df.groupby('Category')['Cost_per_m3'].mean().sort_values(ascending=True)
axes[1, 0].barh(by_cat.index, by_cat.values)
axes[1, 0].set_title('Average Cost per m³ by Category')
# 5. Top 10 elements by cost
top10 = df.nlargest(10, 'Cost')
axes[1, 1].barh(top10['ElementId'], top10['Cost'])
axes[1, 1].set_title('Top 10 Elements by Cost')
# 6. Cost vs Volume scatter with regression
sns.regplot(data=df, x='Volume_m3', y='Cost', ax=axes[1, 2],
scatter_kws={'alpha': 0.5})
axes[1, 2].set_title('Cost vs Volume (with Trend)')
plt.tight_layout()
return fig
Export and Reporting
Save Visualizations
def save_all_visualizations(df, output_dir='reports/charts'): """Generate and save all standard visualizations""" import os os.makedirs(output_dir, exist_ok=True)
# Generate charts
charts = {
'cost_breakdown': create_cost_breakdown_pie(df),
'volume_bars': create_volume_bar_chart(df),
'distribution': create_distribution_analysis(df),
'level_heatmap': create_level_heatmap(df)
}
# Save each chart
saved_files = []
for name, fig in charts.items():
filepath = f"{output_dir}/{name}.png"
fig.savefig(filepath, dpi=150, bbox_inches='tight')
plt.close(fig)
saved_files.append(filepath)
return saved_files
def create_pdf_report(df, output_path='project_report.pdf'): """Create PDF report with multiple visualizations""" from matplotlib.backends.backend_pdf import PdfPages
with PdfPages(output_path) as pdf:
# Page 1: Overview
fig1 = create_cost_breakdown_pie(df)
pdf.savefig(fig1)
plt.close(fig1)
# Page 2: Volume analysis
fig2 = create_volume_bar_chart(df)
pdf.savefig(fig2)
plt.close(fig2)
# Page 3: Distribution
fig3 = create_distribution_analysis(df)
pdf.savefig(fig3)
plt.close(fig3)
# Page 4: Heatmap
fig4 = create_level_heatmap(df)
pdf.savefig(fig4)
plt.close(fig4)
return output_path
Quick Reference
Chart Type Best For Library
Bar Chart Category comparisons Matplotlib/Seaborn
Pie Chart Cost breakdown Matplotlib
Heatmap Level × Category matrix Seaborn
Box Plot Distribution by group Seaborn
Scatter Relationship analysis Matplotlib/Plotly
Treemap Hierarchical data Plotly
Sunburst Multi-level breakdown Plotly
Gantt Schedule visualization Matplotlib
S-Curve Progress tracking Matplotlib
Color Palettes for Construction
Professional color palettes
CONSTRUCTION_COLORS = { 'primary': ['#2C3E50', '#3498DB', '#1ABC9C', '#F39C12', '#E74C3C'], 'materials': { 'Concrete': '#95A5A6', 'Steel': '#34495E', 'Timber': '#D35400', 'Brick': '#C0392B', 'Glass': '#3498DB' }, 'categories': { 'Structural': '#2C3E50', 'Architectural': '#3498DB', 'MEP': '#27AE60', 'Finishes': '#F39C12' } }
Resources
-
Book: "Data-Driven Construction" by Artem Boiko, Chapter 4.1
-
Website: https://datadrivenconstruction.io
-
Matplotlib: https://matplotlib.org
-
Seaborn: https://seaborn.pydata.org
-
Plotly: https://plotly.com/python
Next Steps
-
See pandas-construction-analysis for data preparation
-
See cost-prediction for predictive analytics
-
See qto-report for quantity extraction