process_travels added

2024-11-14 17:40:28 -05:00 · 2024-09-10 18:28:23 -04:00 · 2024-09-10 18:28:23 -04:00 · 8cd94090a3
commit 8cd94090a3
parent 52be6c060e
3 changed files with 56 additions and 15 deletions
--- a/functions/Init.py
+++ b/functions/Init.py
@ -1,11 +1,11 @@
-from .population import process_buffer_population, push_population,write_population
+from .population import process_buffer_population, process_travels, push_population,write_population
 from .network import process_network, push_network, network_write
 from .metro import process_metro, push_metro, metro_write
 from .bus import process_bus, push_bus, bus_write
 from .helpers import buffer_creator

 __all__ = [
-    'process_buffer_population', 'push_population', 'write_population', 
+    'process_buffer_population', 'process_travels','push_population', 'write_population', 
    'process_network', 'push_network', 'network_write',
    'process_metro', 'push_metro', 'metro_write',
    'process_bus', 'push_bus', 'bus_write',
--- a/functions/population.py
+++ b/functions/population.py
@ -23,12 +23,41 @@ def process_buffer_population(data):
        PERSON_LIST.append(person_obj)
    return pandas.DataFrame(PERSON_LIST)

-def push_population(data,mode):
+def process_travels(data):
+    transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True)
+    activities_list = [] = []
+    elements = BeautifulSoup(data, 'xml')
+    for person in elements.find_all('person'):
+        person_id = person['id']
+        plan = person.find('plan')
+        
+        for i, activity in enumerate(plan.find_all('activity'), start=1):
+            activity_obj = {
+                'id': person_id,
+                'activity_order': i,
+                'activity_type': activity.get('type'),
+                'facility': activity.get('facility'),
+                'start_time': activity.get('start_time') or None,
+                'end_time': activity.get('end_time') or None
+            }
+            
+            if 'x' in activity.attrs and 'y' in activity.attrs:
+                lon, lat = transformer.transform(float(activity['x']), float(activity['y']))
+                activity_obj['coordinates'] = Point(lon, lat)
+            else:
+                activity_obj['coordinates'] = None
+
+            activities_list.append(activity_obj)
+
+    return pandas.DataFrame(activities_list)
+
+
+def push_population(name,data,mode):
    GDF = geopandas.GeoDataFrame(data, crs='EPSG:4326')
    GDF['geom'] = GDF['coordinates'].apply(lambda x: WKTElement(x.wkt, srid=os.getenv("SRID")))
    engine = create_engine(f'postgresql://{os.getenv("USER")}:{os.getenv("PASS")}@{os.getenv("HOST_NAME")}/{os.getenv("DATA_BASE")}', echo=False)
    GDF.to_sql(
-        name='agents',
+        name=name,
        con=engine,
        if_exists=mode,
        chunksize=os.getenv("CHUNK_SIZE"),
@ -36,10 +65,10 @@ def push_population(data,mode):
        index=False
    )

-def write_population(data, file):
+def write_population(name,data, file):
    directory = file.parent
    id = datetime.datetime.now().strftime("%Y%m%d")
-    csv = directory / (file.stem + id +".csv")
+    csv = directory / (file.stem + f"-{name}-{id}.csv")
    if csv.exists():
         data.to_csv(csv, mode='a',index=False)
    else:
--- a/main.py
+++ b/main.py
@ -7,7 +7,7 @@ from pathlib import Path
 from typing import Tuple

 from classes import City, DBMode, RTMode
-from functions import buffer_creator, process_buffer_population, push_population, write_population
+from functions import buffer_creator, process_buffer_population,process_travels, push_population, write_population
 from styles import print_help

 called= "population"
@ -88,16 +88,28 @@ def population(
        with open(log_file,'r',encoding='utf-8') as l:
            log_list = l.read().splitlines()
    while current_chunk < max_chunk:
+        if current_chunk < range[0] or current_chunk > range[1]:
+            processed_line, buffer = buffer_creator(file, os.getenv("DIVIDER"), processed_line, int(os.getenv("CHUNK_SIZE")))
+            current_chunk += 1
+            continue
        if log and current_chunk in log_list: continue
        processed_line, buffer = buffer_creator(file, os.getenv("DIVIDER"), processed_line, int(os.getenv("CHUNK_SIZE")))
+        if "agents" in common_tables:
            dataframe = process_buffer_population(buffer)
-        print(dataframe)
            if cleandata:
                dataframe = dataframe.dropna()
            if push:
-            push_population(dataframe, mode)
+                push_population("agents",dataframe, mode)
            else:
-            write_population(dataframe,file)
+                write_population("agents",dataframe, file)
+
+        if "travels" in common_tables:
+            dataframe_travels = process_travels(buffer)
+            if push:
+                push_population("travels",dataframe_travels, mode)
+            else:
+                write_population("travels",dataframe_travels, file)
+        
        if log:
            f = open(log_file, "a")
            f.write(f"\n{current_chunk}")