hub/scripts/combine_demand_csv.py

68 lines
1.9 KiB
Python

import pandas as pd
import glob
import os
main_df = pd.read_csv('../your_output_file.csv')
final_columns = main_df.columns.tolist()
def process_archetype_file(filepath):
filename = os.path.basename(filepath)
base_name = filename.replace('_Results.csv', '')
parts = base_name.split('_', 1)
usage = parts[0]
vintage = parts[1] if len(parts) > 1 else ''
df = pd.read_csv(filepath, encoding='latin-1')
column_mapping = {
'Heating Demand [kWh/m²]': 'Heating',
'Cooling Demand [kWh/m²]': 'Cooling',
'SHW Demand [kWh/m²]': 'DHW',
'Electricity Demand [kWh/m²]': 'Equipment and lighting',
'Superficie [m²]': 'Surface'
}
for old_col, new_col in column_mapping.items():
if old_col in df.columns:
df.rename(columns={old_col: new_col}, inplace=True)
if 'Date/Time' in df.columns:
df['Date/Time'] = pd.to_datetime(df['Date/Time'], errors='coerce')
df.rename(columns={'Date/Time': 'Timestamps'}, inplace=True)
else:
pass
if 'Type_of_building' not in df.columns:
df['Type_of_building'] = ''
if 'Equipment' not in df.columns:
df['Equipment'] = 0
if 'Lighting' not in df.columns:
df['Lighting'] = 0
if 'Heating' not in df.columns:
df['Heating'] = 0
if 'Cooling' not in df.columns:
df['Cooling'] = 0
if 'DHW' not in df.columns:
df['DHW'] = 0
if 'Equipment and lighting' not in df.columns:
df['Equipment and lighting'] = 0
if 'Surface' not in df.columns:
df['Surface'] = 0
if 'Timestamps' not in df.columns:
df['Timestamps'] = pd.NaT
df['Usage'] = usage
df['Vintage'] = vintage
df = df[final_columns]
return df
for filepath in glob.glob("./data/*_Results.csv"):
archetype_df = process_archetype_file(filepath)
main_df = pd.concat([main_df, archetype_df], ignore_index=True)
main_df.to_csv("energy_demand_data_combined.csv", index=False)