import numpy as np import pandas as pd from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA from sklearn.cluster import KMeans import matplotlib.pyplot as plt def extract_building_data(scenario, demand_types): """ Extracts energy demand data for each building from the scenario dictionary. Args: scenario (dict): Scenario dictionary containing building data. demand_types (list): List of demand types to extract. Returns: pd.DataFrame: DataFrame with buildings as rows and demand types as columns. """ data = [] building_ids = [] for building_id, demand_data in scenario.items(): building_row = [] for demand_type in demand_types: building_row.extend(demand_data[demand_type]) data.append(building_row) building_ids.append(building_id) return pd.DataFrame(data, index=building_ids, columns=[f"{demand_type}_{i}" for demand_type in demand_types for i in range(8760)]) # Plot the data print('test') def cluster_buildings(scenario, demand_types, n_clusters=4,n_components=2): """ Clusters buildings based on their energy demand. Args: scenario (dict): Scenario dictionary containing building data. demand_types (list): List of demand types to use for clustering. n_clusters (int): Number of clusters to form. Returns: pd.DataFrame: DataFrame with building IDs and their corresponding cluster labels. """ # Extract building data building_data = extract_building_data(scenario, demand_types) # Standardize the data scaler = StandardScaler() building_data_scaled = scaler.fit_transform(building_data) # Optional: Dimensionality reduction with PCA pca = PCA(n_components) building_data_pca = pca.fit_transform(building_data_scaled) # Clustering with K-Means kmeans = KMeans(n_clusters=n_clusters, random_state=42) kmeans.fit(building_data_pca) # Assign cluster labels to buildings clusters = kmeans.labels_ # Create a DataFrame with building IDs and cluster labels clustered_buildings = pd.DataFrame({ 'building_id': building_data.index, 'cluster': clusters }) return clustered_buildings # Example usage scenario_data = scenario['efficient with PV+4Pipe+DHW'] # Use 'baseline' or 'efficient with PV' demand_types = [ 'heating_consumption_kWh' ] # Cluster buildings clustered_buildings = cluster_buildings(scenario_data, demand_types, n_clusters=4,n_components=20) # Visualize clusters plt.figure(figsize=(10, 6)) plt.scatter(clustered_buildings['building_id'], clustered_buildings['cluster'], c=clustered_buildings['cluster'], cmap='viridis') plt.xlabel('Building ID') plt.ylabel('Cluster') plt.title('Building Clusters Based on Energy Demand') plt.colorbar(label='Cluster') # Save the plot plt.savefig(os.path.join(output_path, 'clusters.png')) plt.close() # Plot the data print('test') output_path_clusters = output_path / 'clustered_buildings_4.xlsx' clustered_buildings.to_excel(output_path_clusters, index=False) #clusters made in QGIS clusters_path=output_path/ "clusters" / 'updated_buildings_with_clusters.geojson' with open(clusters_path, 'r') as f: clusters = json.load(f) def extract_cluster(key, clusters): for idx, feature in enumerate(clusters['features']): if str(feature['properties']['id']) == str(key): cluster=feature['properties']['Cluster_ID'] return cluster cluster_dic={} scenario_data = scenario['efficient with PV+4Pipe+DHW'] for key, data in scenario_data.items(): cluster_dic[key]=extract_cluster(key, clusters) # # cluster_df = pd.DataFrame.from_dict(cluster_dic, orient='index') # cluster_df.to_csv(output_path/'clusters.csv')