summer_course_2024/clusters.py

125 lines
3.7 KiB
Python

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
def extract_building_data(scenario, demand_types):
"""
Extracts energy demand data for each building from the scenario dictionary.
Args:
scenario (dict): Scenario dictionary containing building data.
demand_types (list): List of demand types to extract.
Returns:
pd.DataFrame: DataFrame with buildings as rows and demand types as columns.
"""
data = []
building_ids = []
for building_id, demand_data in scenario.items():
building_row = []
for demand_type in demand_types:
building_row.extend(demand_data[demand_type])
data.append(building_row)
building_ids.append(building_id)
return pd.DataFrame(data, index=building_ids,
columns=[f"{demand_type}_{i}" for demand_type in demand_types for i in range(8760)])
# Plot the data
print('test')
def cluster_buildings(scenario, demand_types, n_clusters=4,n_components=2):
"""
Clusters buildings based on their energy demand.
Args:
scenario (dict): Scenario dictionary containing building data.
demand_types (list): List of demand types to use for clustering.
n_clusters (int): Number of clusters to form.
Returns:
pd.DataFrame: DataFrame with building IDs and their corresponding cluster labels.
"""
# Extract building data
building_data = extract_building_data(scenario, demand_types)
# Standardize the data
scaler = StandardScaler()
building_data_scaled = scaler.fit_transform(building_data)
# Optional: Dimensionality reduction with PCA
pca = PCA(n_components)
building_data_pca = pca.fit_transform(building_data_scaled)
# Clustering with K-Means
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
kmeans.fit(building_data_pca)
# Assign cluster labels to buildings
clusters = kmeans.labels_
# Create a DataFrame with building IDs and cluster labels
clustered_buildings = pd.DataFrame({
'building_id': building_data.index,
'cluster': clusters
})
return clustered_buildings
# Example usage
scenario_data = scenario['efficient with PV+4Pipe+DHW'] # Use 'baseline' or 'efficient with PV'
demand_types = [
'heating_consumption_kWh'
]
# Cluster buildings
clustered_buildings = cluster_buildings(scenario_data, demand_types, n_clusters=4,n_components=20)
# Visualize clusters
plt.figure(figsize=(10, 6))
plt.scatter(clustered_buildings['building_id'], clustered_buildings['cluster'], c=clustered_buildings['cluster'],
cmap='viridis')
plt.xlabel('Building ID')
plt.ylabel('Cluster')
plt.title('Building Clusters Based on Energy Demand')
plt.colorbar(label='Cluster')
# Save the plot
plt.savefig(os.path.join(output_path, 'clusters.png'))
plt.close()
# Plot the data
print('test')
output_path_clusters = output_path / 'clustered_buildings_4.xlsx'
clustered_buildings.to_excel(output_path_clusters, index=False)
#clusters made in QGIS
clusters_path=output_path/ "clusters" / 'updated_buildings_with_clusters.geojson'
with open(clusters_path, 'r') as f:
clusters = json.load(f)
def extract_cluster(key, clusters):
for idx, feature in enumerate(clusters['features']):
if str(feature['properties']['id']) == str(key):
cluster=feature['properties']['Cluster_ID']
return cluster
cluster_dic={}
scenario_data = scenario['efficient with PV+4Pipe+DHW']
for key, data in scenario_data.items():
cluster_dic[key]=extract_cluster(key, clusters)
#
# cluster_df = pd.DataFrame.from_dict(cluster_dic, orient='index')
# cluster_df.to_csv(output_path/'clusters.csv')