mirror of
https://github.com/knejadshamsi/zele-utils.git
synced 2024-11-14 17:40:28 -05:00
progress bar added.
This commit is contained in:
parent
8cd94090a3
commit
3541f9f10e
|
@ -10,7 +10,7 @@ def camel_to_snake(name):
|
||||||
def process_buffer_population(data):
|
def process_buffer_population(data):
|
||||||
transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True)
|
transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True)
|
||||||
PERSON_LIST = []
|
PERSON_LIST = []
|
||||||
elements = BeautifulSoup(data,'html.parser')
|
elements = BeautifulSoup(data, 'lxml-xml')
|
||||||
for person in elements.find_all('person'):
|
for person in elements.find_all('person'):
|
||||||
person_obj = {}
|
person_obj = {}
|
||||||
person_obj['id'] = person['id']
|
person_obj['id'] = person['id']
|
||||||
|
@ -25,8 +25,8 @@ def process_buffer_population(data):
|
||||||
|
|
||||||
def process_travels(data):
|
def process_travels(data):
|
||||||
transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True)
|
transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True)
|
||||||
activities_list = [] = []
|
activities_list = []
|
||||||
elements = BeautifulSoup(data, 'xml')
|
elements = BeautifulSoup(data, 'lxml-xml')
|
||||||
for person in elements.find_all('person'):
|
for person in elements.find_all('person'):
|
||||||
person_id = person['id']
|
person_id = person['id']
|
||||||
plan = person.find('plan')
|
plan = person.find('plan')
|
||||||
|
|
81
main.py
81
main.py
|
@ -5,6 +5,9 @@ from typing_extensions import Annotated
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Tuple
|
from typing import Tuple
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn
|
||||||
|
import time
|
||||||
|
|
||||||
from classes import City, DBMode, RTMode
|
from classes import City, DBMode, RTMode
|
||||||
from functions import buffer_creator, process_buffer_population,process_travels, push_population, write_population
|
from functions import buffer_creator, process_buffer_population,process_travels, push_population, write_population
|
||||||
|
@ -33,6 +36,7 @@ def population(
|
||||||
push: bool = typer.Option(False, "--push", "-p", help="Save the output directly to the database When mentioned. Otherwise, Saves as a [green bold]CSV file[/green bold] in the input directory]"),
|
push: bool = typer.Option(False, "--push", "-p", help="Save the output directly to the database When mentioned. Otherwise, Saves as a [green bold]CSV file[/green bold] in the input directory]"),
|
||||||
mode: Optional[DBMode] = typer.Option(None, help="Specify either [underline]'append'[/underline] or [underline]'drop'[/underline] when pushing data", show_default=False),
|
mode: Optional[DBMode] = typer.Option(None, help="Specify either [underline]'append'[/underline] or [underline]'drop'[/underline] when pushing data", show_default=False),
|
||||||
):
|
):
|
||||||
|
console = Console()
|
||||||
all_tables = ["agents","travels"]
|
all_tables = ["agents","travels"]
|
||||||
common_tables = [item for item in tables if item in ["all"] + all_tables]
|
common_tables = [item for item in tables if item in ["all"] + all_tables]
|
||||||
if len(common_tables) == 0:
|
if len(common_tables) == 0:
|
||||||
|
@ -81,40 +85,53 @@ def population(
|
||||||
else:
|
else:
|
||||||
log_file.touch()
|
log_file.touch()
|
||||||
info_printer(f"Log file {log_file} created")
|
info_printer(f"Log file {log_file} created")
|
||||||
|
|
||||||
|
with Progress(
|
||||||
|
SpinnerColumn(),
|
||||||
|
TextColumn("[progress.description]{task.description}"),
|
||||||
|
BarColumn(),
|
||||||
|
TaskProgressColumn(),
|
||||||
|
console=console
|
||||||
|
) as progress:
|
||||||
|
task = progress.add_task("[cyan]Processing chunks...", total=max_chunk)
|
||||||
|
|
||||||
current_chunk = 0
|
current_chunk = 0
|
||||||
processed_line = 0
|
processed_line = 0
|
||||||
if log:
|
|
||||||
with open(log_file,'r',encoding='utf-8') as l:
|
|
||||||
log_list = l.read().splitlines()
|
|
||||||
while current_chunk < max_chunk:
|
|
||||||
if current_chunk < range[0] or current_chunk > range[1]:
|
|
||||||
processed_line, buffer = buffer_creator(file, os.getenv("DIVIDER"), processed_line, int(os.getenv("CHUNK_SIZE")))
|
|
||||||
current_chunk += 1
|
|
||||||
continue
|
|
||||||
if log and current_chunk in log_list: continue
|
|
||||||
processed_line, buffer = buffer_creator(file, os.getenv("DIVIDER"), processed_line, int(os.getenv("CHUNK_SIZE")))
|
|
||||||
if "agents" in common_tables:
|
|
||||||
dataframe = process_buffer_population(buffer)
|
|
||||||
if cleandata:
|
|
||||||
dataframe = dataframe.dropna()
|
|
||||||
if push:
|
|
||||||
push_population("agents",dataframe, mode)
|
|
||||||
else:
|
|
||||||
write_population("agents",dataframe, file)
|
|
||||||
|
|
||||||
if "travels" in common_tables:
|
|
||||||
dataframe_travels = process_travels(buffer)
|
|
||||||
if push:
|
|
||||||
push_population("travels",dataframe_travels, mode)
|
|
||||||
else:
|
|
||||||
write_population("travels",dataframe_travels, file)
|
|
||||||
|
|
||||||
if log:
|
if log:
|
||||||
f = open(log_file, "a")
|
with open(log_file,'r',encoding='utf-8') as l:
|
||||||
f.write(f"\n{current_chunk}")
|
log_list = l.read().splitlines()
|
||||||
f.close()
|
while current_chunk < max_chunk:
|
||||||
current_chunk += 1
|
if current_chunk < range[0] or current_chunk > range[1]:
|
||||||
|
processed_line, buffer = buffer_creator(file, os.getenv("DIVIDER"), processed_line, int(os.getenv("CHUNK_SIZE")))
|
||||||
|
current_chunk += 1
|
||||||
|
continue
|
||||||
|
if log and current_chunk in log_list: continue
|
||||||
|
processed_line, buffer = buffer_creator(file, os.getenv("DIVIDER"), processed_line, int(os.getenv("CHUNK_SIZE")))
|
||||||
|
if "agents" in common_tables:
|
||||||
|
dataframe = process_buffer_population(buffer)
|
||||||
|
if cleandata:
|
||||||
|
dataframe = dataframe.dropna()
|
||||||
|
if push:
|
||||||
|
push_population("agents",dataframe, mode)
|
||||||
|
else:
|
||||||
|
write_population("agents",dataframe, file)
|
||||||
|
|
||||||
|
if "travels" in common_tables:
|
||||||
|
dataframe_travels = process_travels(buffer)
|
||||||
|
if push:
|
||||||
|
push_population("travels",dataframe_travels, mode)
|
||||||
|
else:
|
||||||
|
write_population("travels",dataframe_travels, file)
|
||||||
|
|
||||||
|
if log:
|
||||||
|
f = open(log_file, "a")
|
||||||
|
f.write(f"\n{current_chunk}")
|
||||||
|
f.close()
|
||||||
|
current_chunk += 1
|
||||||
|
time.sleep(2)
|
||||||
|
progress.update(task, advance=1)
|
||||||
|
progress.update(task, visible=False)
|
||||||
|
console.print("[green]Processing complete![/green]")
|
||||||
|
|
||||||
@app.command()
|
@app.command()
|
||||||
def network(
|
def network(
|
||||||
|
|
Loading…
Reference in New Issue
Block a user