diff --git a/functions/population.py b/functions/population.py index e8cfebf..91285f6 100644 --- a/functions/population.py +++ b/functions/population.py @@ -10,7 +10,7 @@ def camel_to_snake(name): def process_buffer_population(data): transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True) PERSON_LIST = [] - elements = BeautifulSoup(data,'html.parser') + elements = BeautifulSoup(data, 'lxml-xml') for person in elements.find_all('person'): person_obj = {} person_obj['id'] = person['id'] @@ -25,8 +25,8 @@ def process_buffer_population(data): def process_travels(data): transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True) - activities_list = [] = [] - elements = BeautifulSoup(data, 'xml') + activities_list = [] + elements = BeautifulSoup(data, 'lxml-xml') for person in elements.find_all('person'): person_id = person['id'] plan = person.find('plan') diff --git a/main.py b/main.py index 8b858a9..bf12a43 100644 --- a/main.py +++ b/main.py @@ -5,6 +5,9 @@ from typing_extensions import Annotated from typing import Optional from pathlib import Path from typing import Tuple +from rich.console import Console +from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn +import time from classes import City, DBMode, RTMode from functions import buffer_creator, process_buffer_population,process_travels, push_population, write_population @@ -33,6 +36,7 @@ def population( push: bool = typer.Option(False, "--push", "-p", help="Save the output directly to the database When mentioned. Otherwise, Saves as a [green bold]CSV file[/green bold] in the input directory]"), mode: Optional[DBMode] = typer.Option(None, help="Specify either [underline]'append'[/underline] or [underline]'drop'[/underline] when pushing data", show_default=False), ): + console = Console() all_tables = ["agents","travels"] common_tables = [item for item in tables if item in ["all"] + all_tables] if len(common_tables) == 0: @@ -81,40 +85,53 @@ def population( else: log_file.touch() info_printer(f"Log file {log_file} created") + + with Progress( + SpinnerColumn(), + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TaskProgressColumn(), + console=console + ) as progress: + task = progress.add_task("[cyan]Processing chunks...", total=max_chunk) - current_chunk = 0 - processed_line = 0 - if log: - with open(log_file,'r',encoding='utf-8') as l: - log_list = l.read().splitlines() - while current_chunk < max_chunk: - if current_chunk < range[0] or current_chunk > range[1]: - processed_line, buffer = buffer_creator(file, os.getenv("DIVIDER"), processed_line, int(os.getenv("CHUNK_SIZE"))) - current_chunk += 1 - continue - if log and current_chunk in log_list: continue - processed_line, buffer = buffer_creator(file, os.getenv("DIVIDER"), processed_line, int(os.getenv("CHUNK_SIZE"))) - if "agents" in common_tables: - dataframe = process_buffer_population(buffer) - if cleandata: - dataframe = dataframe.dropna() - if push: - push_population("agents",dataframe, mode) - else: - write_population("agents",dataframe, file) - - if "travels" in common_tables: - dataframe_travels = process_travels(buffer) - if push: - push_population("travels",dataframe_travels, mode) - else: - write_population("travels",dataframe_travels, file) - + current_chunk = 0 + processed_line = 0 if log: - f = open(log_file, "a") - f.write(f"\n{current_chunk}") - f.close() - current_chunk += 1 + with open(log_file,'r',encoding='utf-8') as l: + log_list = l.read().splitlines() + while current_chunk < max_chunk: + if current_chunk < range[0] or current_chunk > range[1]: + processed_line, buffer = buffer_creator(file, os.getenv("DIVIDER"), processed_line, int(os.getenv("CHUNK_SIZE"))) + current_chunk += 1 + continue + if log and current_chunk in log_list: continue + processed_line, buffer = buffer_creator(file, os.getenv("DIVIDER"), processed_line, int(os.getenv("CHUNK_SIZE"))) + if "agents" in common_tables: + dataframe = process_buffer_population(buffer) + if cleandata: + dataframe = dataframe.dropna() + if push: + push_population("agents",dataframe, mode) + else: + write_population("agents",dataframe, file) + + if "travels" in common_tables: + dataframe_travels = process_travels(buffer) + if push: + push_population("travels",dataframe_travels, mode) + else: + write_population("travels",dataframe_travels, file) + + if log: + f = open(log_file, "a") + f.write(f"\n{current_chunk}") + f.close() + current_chunk += 1 + time.sleep(2) + progress.update(task, advance=1) + progress.update(task, visible=False) + console.print("[green]Processing complete![/green]") @app.command() def network(