diff --git a/functions/__Init__.py b/functions/__Init__.py index 9853eca..de344b9 100644 --- a/functions/__Init__.py +++ b/functions/__Init__.py @@ -1,11 +1,11 @@ -from .population import process_buffer_population, push_population,write_population +from .population import process_buffer_population, process_travels, push_population,write_population from .network import process_network, push_network, network_write from .metro import process_metro, push_metro, metro_write from .bus import process_bus, push_bus, bus_write from .helpers import buffer_creator __all__ = [ - 'process_buffer_population', 'push_population', 'write_population', + 'process_buffer_population', 'process_travels','push_population', 'write_population', 'process_network', 'push_network', 'network_write', 'process_metro', 'push_metro', 'metro_write', 'process_bus', 'push_bus', 'bus_write', diff --git a/functions/population.py b/functions/population.py index d55510e..e8cfebf 100644 --- a/functions/population.py +++ b/functions/population.py @@ -23,12 +23,41 @@ def process_buffer_population(data): PERSON_LIST.append(person_obj) return pandas.DataFrame(PERSON_LIST) -def push_population(data,mode): +def process_travels(data): + transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True) + activities_list = [] = [] + elements = BeautifulSoup(data, 'xml') + for person in elements.find_all('person'): + person_id = person['id'] + plan = person.find('plan') + + for i, activity in enumerate(plan.find_all('activity'), start=1): + activity_obj = { + 'id': person_id, + 'activity_order': i, + 'activity_type': activity.get('type'), + 'facility': activity.get('facility'), + 'start_time': activity.get('start_time') or None, + 'end_time': activity.get('end_time') or None + } + + if 'x' in activity.attrs and 'y' in activity.attrs: + lon, lat = transformer.transform(float(activity['x']), float(activity['y'])) + activity_obj['coordinates'] = Point(lon, lat) + else: + activity_obj['coordinates'] = None + + activities_list.append(activity_obj) + + return pandas.DataFrame(activities_list) + + +def push_population(name,data,mode): GDF = geopandas.GeoDataFrame(data, crs='EPSG:4326') GDF['geom'] = GDF['coordinates'].apply(lambda x: WKTElement(x.wkt, srid=os.getenv("SRID"))) engine = create_engine(f'postgresql://{os.getenv("USER")}:{os.getenv("PASS")}@{os.getenv("HOST_NAME")}/{os.getenv("DATA_BASE")}', echo=False) GDF.to_sql( - name='agents', + name=name, con=engine, if_exists=mode, chunksize=os.getenv("CHUNK_SIZE"), @@ -36,10 +65,10 @@ def push_population(data,mode): index=False ) -def write_population(data, file): +def write_population(name,data, file): directory = file.parent id = datetime.datetime.now().strftime("%Y%m%d") - csv = directory / (file.stem + id +".csv") + csv = directory / (file.stem + f"-{name}-{id}.csv") if csv.exists(): data.to_csv(csv, mode='a',index=False) else: diff --git a/main.py b/main.py index 5cec2f7..8b858a9 100644 --- a/main.py +++ b/main.py @@ -7,7 +7,7 @@ from pathlib import Path from typing import Tuple from classes import City, DBMode, RTMode -from functions import buffer_creator, process_buffer_population, push_population, write_population +from functions import buffer_creator, process_buffer_population,process_travels, push_population, write_population from styles import print_help called= "population" @@ -88,16 +88,28 @@ def population( with open(log_file,'r',encoding='utf-8') as l: log_list = l.read().splitlines() while current_chunk < max_chunk: + if current_chunk < range[0] or current_chunk > range[1]: + processed_line, buffer = buffer_creator(file, os.getenv("DIVIDER"), processed_line, int(os.getenv("CHUNK_SIZE"))) + current_chunk += 1 + continue if log and current_chunk in log_list: continue processed_line, buffer = buffer_creator(file, os.getenv("DIVIDER"), processed_line, int(os.getenv("CHUNK_SIZE"))) - dataframe = process_buffer_population(buffer) - print(dataframe) - if cleandata: - dataframe = dataframe.dropna() - if push: - push_population(dataframe, mode) - else: - write_population(dataframe,file) + if "agents" in common_tables: + dataframe = process_buffer_population(buffer) + if cleandata: + dataframe = dataframe.dropna() + if push: + push_population("agents",dataframe, mode) + else: + write_population("agents",dataframe, file) + + if "travels" in common_tables: + dataframe_travels = process_travels(buffer) + if push: + push_population("travels",dataframe_travels, mode) + else: + write_population("travels",dataframe_travels, file) + if log: f = open(log_file, "a") f.write(f"\n{current_chunk}")