import pandas as pd import glob import os main_df = pd.read_csv('../your_output_file.csv') final_columns = main_df.columns.tolist() def process_archetype_file(filepath): filename = os.path.basename(filepath) base_name = filename.replace('_Results.csv', '') parts = base_name.split('_', 1) usage = parts[0] vintage = parts[1] if len(parts) > 1 else '' df = pd.read_csv(filepath, encoding='latin-1') column_mapping = { 'Heating Demand [kWh/m²]': 'Heating', 'Cooling Demand [kWh/m²]': 'Cooling', 'SHW Demand [kWh/m²]': 'DHW', 'Electricity Demand [kWh/m²]': 'Equipment and lighting', 'Superficie [m²]': 'Surface' } for old_col, new_col in column_mapping.items(): if old_col in df.columns: df.rename(columns={old_col: new_col}, inplace=True) if 'Date/Time' in df.columns: df['Date/Time'] = pd.to_datetime(df['Date/Time'], errors='coerce') df.rename(columns={'Date/Time': 'Timestamps'}, inplace=True) else: pass if 'Type_of_building' not in df.columns: df['Type_of_building'] = '' if 'Equipment' not in df.columns: df['Equipment'] = 0 if 'Lighting' not in df.columns: df['Lighting'] = 0 if 'Heating' not in df.columns: df['Heating'] = 0 if 'Cooling' not in df.columns: df['Cooling'] = 0 if 'DHW' not in df.columns: df['DHW'] = 0 if 'Equipment and lighting' not in df.columns: df['Equipment and lighting'] = 0 if 'Surface' not in df.columns: df['Surface'] = 0 if 'Timestamps' not in df.columns: df['Timestamps'] = pd.NaT df['Usage'] = usage df['Vintage'] = vintage df = df[final_columns] return df for filepath in glob.glob("./data/*_Results.csv"): archetype_df = process_archetype_file(filepath) main_df = pd.concat([main_df, archetype_df], ignore_index=True) main_df.to_csv("energy_demand_data_combined.csv", index=False)