125 lines
3.7 KiB
Python
125 lines
3.7 KiB
Python
|
|
import numpy as np
|
|
import pandas as pd
|
|
from sklearn.preprocessing import StandardScaler
|
|
from sklearn.decomposition import PCA
|
|
from sklearn.cluster import KMeans
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
def extract_building_data(scenario, demand_types):
|
|
"""
|
|
Extracts energy demand data for each building from the scenario dictionary.
|
|
|
|
Args:
|
|
scenario (dict): Scenario dictionary containing building data.
|
|
demand_types (list): List of demand types to extract.
|
|
|
|
Returns:
|
|
pd.DataFrame: DataFrame with buildings as rows and demand types as columns.
|
|
"""
|
|
data = []
|
|
building_ids = []
|
|
|
|
for building_id, demand_data in scenario.items():
|
|
building_row = []
|
|
for demand_type in demand_types:
|
|
building_row.extend(demand_data[demand_type])
|
|
data.append(building_row)
|
|
building_ids.append(building_id)
|
|
|
|
return pd.DataFrame(data, index=building_ids,
|
|
columns=[f"{demand_type}_{i}" for demand_type in demand_types for i in range(8760)])
|
|
|
|
# Plot the data
|
|
print('test')
|
|
def cluster_buildings(scenario, demand_types, n_clusters=4,n_components=2):
|
|
"""
|
|
Clusters buildings based on their energy demand.
|
|
|
|
Args:
|
|
scenario (dict): Scenario dictionary containing building data.
|
|
demand_types (list): List of demand types to use for clustering.
|
|
n_clusters (int): Number of clusters to form.
|
|
|
|
Returns:
|
|
pd.DataFrame: DataFrame with building IDs and their corresponding cluster labels.
|
|
"""
|
|
# Extract building data
|
|
building_data = extract_building_data(scenario, demand_types)
|
|
|
|
# Standardize the data
|
|
scaler = StandardScaler()
|
|
building_data_scaled = scaler.fit_transform(building_data)
|
|
|
|
# Optional: Dimensionality reduction with PCA
|
|
pca = PCA(n_components)
|
|
building_data_pca = pca.fit_transform(building_data_scaled)
|
|
|
|
# Clustering with K-Means
|
|
kmeans = KMeans(n_clusters=n_clusters, random_state=42)
|
|
kmeans.fit(building_data_pca)
|
|
|
|
# Assign cluster labels to buildings
|
|
clusters = kmeans.labels_
|
|
|
|
# Create a DataFrame with building IDs and cluster labels
|
|
clustered_buildings = pd.DataFrame({
|
|
'building_id': building_data.index,
|
|
'cluster': clusters
|
|
})
|
|
|
|
return clustered_buildings
|
|
|
|
|
|
# Example usage
|
|
scenario_data = scenario['efficient with PV+4Pipe+DHW'] # Use 'baseline' or 'efficient with PV'
|
|
demand_types = [
|
|
'heating_consumption_kWh'
|
|
]
|
|
|
|
# Cluster buildings
|
|
clustered_buildings = cluster_buildings(scenario_data, demand_types, n_clusters=4,n_components=20)
|
|
|
|
# Visualize clusters
|
|
plt.figure(figsize=(10, 6))
|
|
plt.scatter(clustered_buildings['building_id'], clustered_buildings['cluster'], c=clustered_buildings['cluster'],
|
|
cmap='viridis')
|
|
plt.xlabel('Building ID')
|
|
plt.ylabel('Cluster')
|
|
plt.title('Building Clusters Based on Energy Demand')
|
|
plt.colorbar(label='Cluster')
|
|
# Save the plot
|
|
plt.savefig(os.path.join(output_path, 'clusters.png'))
|
|
plt.close()
|
|
|
|
# Plot the data
|
|
print('test')
|
|
|
|
|
|
|
|
output_path_clusters = output_path / 'clustered_buildings_4.xlsx'
|
|
clustered_buildings.to_excel(output_path_clusters, index=False)
|
|
|
|
#clusters made in QGIS
|
|
clusters_path=output_path/ "clusters" / 'updated_buildings_with_clusters.geojson'
|
|
|
|
with open(clusters_path, 'r') as f:
|
|
clusters = json.load(f)
|
|
|
|
def extract_cluster(key, clusters):
|
|
|
|
for idx, feature in enumerate(clusters['features']):
|
|
if str(feature['properties']['id']) == str(key):
|
|
cluster=feature['properties']['Cluster_ID']
|
|
|
|
return cluster
|
|
cluster_dic={}
|
|
scenario_data = scenario['efficient with PV+4Pipe+DHW']
|
|
for key, data in scenario_data.items():
|
|
cluster_dic[key]=extract_cluster(key, clusters)
|
|
|
|
#
|
|
# cluster_df = pd.DataFrame.from_dict(cluster_dic, orient='index')
|
|
# cluster_df.to_csv(output_path/'clusters.csv')
|