progress bar added.

This commit is contained in:
Kian 2024-09-10 20:01:51 -04:00
parent 8cd94090a3
commit 3541f9f10e
2 changed files with 52 additions and 35 deletions

View File

@ -10,7 +10,7 @@ def camel_to_snake(name):
def process_buffer_population(data): def process_buffer_population(data):
transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True) transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True)
PERSON_LIST = [] PERSON_LIST = []
elements = BeautifulSoup(data,'html.parser') elements = BeautifulSoup(data, 'lxml-xml')
for person in elements.find_all('person'): for person in elements.find_all('person'):
person_obj = {} person_obj = {}
person_obj['id'] = person['id'] person_obj['id'] = person['id']
@ -25,8 +25,8 @@ def process_buffer_population(data):
def process_travels(data): def process_travels(data):
transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True) transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True)
activities_list = [] = [] activities_list = []
elements = BeautifulSoup(data, 'xml') elements = BeautifulSoup(data, 'lxml-xml')
for person in elements.find_all('person'): for person in elements.find_all('person'):
person_id = person['id'] person_id = person['id']
plan = person.find('plan') plan = person.find('plan')

81
main.py
View File

@ -5,6 +5,9 @@ from typing_extensions import Annotated
from typing import Optional from typing import Optional
from pathlib import Path from pathlib import Path
from typing import Tuple from typing import Tuple
from rich.console import Console
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn
import time
from classes import City, DBMode, RTMode from classes import City, DBMode, RTMode
from functions import buffer_creator, process_buffer_population,process_travels, push_population, write_population from functions import buffer_creator, process_buffer_population,process_travels, push_population, write_population
@ -33,6 +36,7 @@ def population(
push: bool = typer.Option(False, "--push", "-p", help="Save the output directly to the database When mentioned. Otherwise, Saves as a [green bold]CSV file[/green bold] in the input directory]"), push: bool = typer.Option(False, "--push", "-p", help="Save the output directly to the database When mentioned. Otherwise, Saves as a [green bold]CSV file[/green bold] in the input directory]"),
mode: Optional[DBMode] = typer.Option(None, help="Specify either [underline]'append'[/underline] or [underline]'drop'[/underline] when pushing data", show_default=False), mode: Optional[DBMode] = typer.Option(None, help="Specify either [underline]'append'[/underline] or [underline]'drop'[/underline] when pushing data", show_default=False),
): ):
console = Console()
all_tables = ["agents","travels"] all_tables = ["agents","travels"]
common_tables = [item for item in tables if item in ["all"] + all_tables] common_tables = [item for item in tables if item in ["all"] + all_tables]
if len(common_tables) == 0: if len(common_tables) == 0:
@ -81,40 +85,53 @@ def population(
else: else:
log_file.touch() log_file.touch()
info_printer(f"Log file {log_file} created") info_printer(f"Log file {log_file} created")
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
console=console
) as progress:
task = progress.add_task("[cyan]Processing chunks...", total=max_chunk)
current_chunk = 0 current_chunk = 0
processed_line = 0 processed_line = 0
if log:
with open(log_file,'r',encoding='utf-8') as l:
log_list = l.read().splitlines()
while current_chunk < max_chunk:
if current_chunk < range[0] or current_chunk > range[1]:
processed_line, buffer = buffer_creator(file, os.getenv("DIVIDER"), processed_line, int(os.getenv("CHUNK_SIZE")))
current_chunk += 1
continue
if log and current_chunk in log_list: continue
processed_line, buffer = buffer_creator(file, os.getenv("DIVIDER"), processed_line, int(os.getenv("CHUNK_SIZE")))
if "agents" in common_tables:
dataframe = process_buffer_population(buffer)
if cleandata:
dataframe = dataframe.dropna()
if push:
push_population("agents",dataframe, mode)
else:
write_population("agents",dataframe, file)
if "travels" in common_tables:
dataframe_travels = process_travels(buffer)
if push:
push_population("travels",dataframe_travels, mode)
else:
write_population("travels",dataframe_travels, file)
if log: if log:
f = open(log_file, "a") with open(log_file,'r',encoding='utf-8') as l:
f.write(f"\n{current_chunk}") log_list = l.read().splitlines()
f.close() while current_chunk < max_chunk:
current_chunk += 1 if current_chunk < range[0] or current_chunk > range[1]:
processed_line, buffer = buffer_creator(file, os.getenv("DIVIDER"), processed_line, int(os.getenv("CHUNK_SIZE")))
current_chunk += 1
continue
if log and current_chunk in log_list: continue
processed_line, buffer = buffer_creator(file, os.getenv("DIVIDER"), processed_line, int(os.getenv("CHUNK_SIZE")))
if "agents" in common_tables:
dataframe = process_buffer_population(buffer)
if cleandata:
dataframe = dataframe.dropna()
if push:
push_population("agents",dataframe, mode)
else:
write_population("agents",dataframe, file)
if "travels" in common_tables:
dataframe_travels = process_travels(buffer)
if push:
push_population("travels",dataframe_travels, mode)
else:
write_population("travels",dataframe_travels, file)
if log:
f = open(log_file, "a")
f.write(f"\n{current_chunk}")
f.close()
current_chunk += 1
time.sleep(2)
progress.update(task, advance=1)
progress.update(task, visible=False)
console.print("[green]Processing complete![/green]")
@app.command() @app.command()
def network( def network(