From 52be6c060eef26ef2ed189539e3cf62ec51f84cd Mon Sep 17 00:00:00 2001 From: Kian <105409698+knejadshamsi@users.noreply.github.com> Date: Tue, 10 Sep 2024 17:40:50 -0400 Subject: [PATCH] population added + help improved --- classes.py | 12 ++++ functions/__Init__.py | 8 ++- functions/helpers.py | 16 +++++ functions/population.py | 47 +++++++++++++-- main.py | 129 ++++++++++++++++++++++++++++++---------- styles/__Init__.py | 5 ++ styles/help.py | 38 ++++++++++++ 7 files changed, 214 insertions(+), 41 deletions(-) create mode 100644 classes.py create mode 100644 functions/helpers.py create mode 100644 styles/__Init__.py create mode 100644 styles/help.py diff --git a/classes.py b/classes.py new file mode 100644 index 0000000..dbe3458 --- /dev/null +++ b/classes.py @@ -0,0 +1,12 @@ +from enum import Enum + +class City(str, Enum): + mtl = "mtl" + +class DBMode(str, Enum): + drop = "drop" + append = "append" + +class RTMode(str, Enum): + online = "online" + offline = "offline" \ No newline at end of file diff --git a/functions/__Init__.py b/functions/__Init__.py index 5e947c4..9853eca 100644 --- a/functions/__Init__.py +++ b/functions/__Init__.py @@ -1,11 +1,13 @@ -from .population import process_population, push_population,population_write +from .population import process_buffer_population, push_population,write_population from .network import process_network, push_network, network_write from .metro import process_metro, push_metro, metro_write from .bus import process_bus, push_bus, bus_write +from .helpers import buffer_creator __all__ = [ - 'process_population', 'push_population', 'population_write', + 'process_buffer_population', 'push_population', 'write_population', 'process_network', 'push_network', 'network_write', 'process_metro', 'push_metro', 'metro_write', - 'process_bus', 'push_bus', 'bus_write' + 'process_bus', 'push_bus', 'bus_write', + 'buffer_creator' ] diff --git a/functions/helpers.py b/functions/helpers.py new file mode 100644 index 0000000..b7f6219 --- /dev/null +++ b/functions/helpers.py @@ -0,0 +1,16 @@ +def buffer_creator(file,divider,start_line, chunk_size): + buffer = [] + line_number = start_line + current_line = 0 + divider_count = 0 + with open(file,'r',encoding='utf-8') as f: + for line in f: + current_line += 1 + if (current_line <= line_number): continue + if (line.strip()== divider): + divider_count = divider_count + 1 + if divider_count == chunk_size: break + continue + buffer.append(line.strip()) + return current_line,(' ').join(buffer) + \ No newline at end of file diff --git a/functions/population.py b/functions/population.py index 13aecb4..d55510e 100644 --- a/functions/population.py +++ b/functions/population.py @@ -1,9 +1,46 @@ +from bs4 import BeautifulSoup +import pandas,geopandas, pyproj, re, os, datetime +from shapely.geometry import Point +from sqlalchemy import create_engine +from geoalchemy2 import Geometry, WKTElement -def process_population(data, cleandata): - print(data, cleandata) +def camel_to_snake(name): + return re.sub(r'(? 0: + success_printer(f"{max_chunk} Chunks found") + elif max_chunk == 0: + error_printer("Unable to find Chunks") + raise typer.Exit() + if not range: + range = [0,max_chunk-2] + info_printer(f"Chunk Range: {range}") + directory = file.parent + log_file = directory / (file.stem + ".log") + if not log: + notice_printer("Log file not created") + else: + if log_file.exists(): + notice_printer(f"Log file {log_file} already exists") + else: + log_file.touch() + info_printer(f"Log file {log_file} created") + + current_chunk = 0 + processed_line = 0 + if log: + with open(log_file,'r',encoding='utf-8') as l: + log_list = l.read().splitlines() + while current_chunk < max_chunk: + if log and current_chunk in log_list: continue + processed_line, buffer = buffer_creator(file, os.getenv("DIVIDER"), processed_line, int(os.getenv("CHUNK_SIZE"))) + dataframe = process_buffer_population(buffer) + print(dataframe) + if cleandata: + dataframe = dataframe.dropna() + if push: + push_population(dataframe, mode) + else: + write_population(dataframe,file) + if log: + f = open(log_file, "a") + f.write(f"\n{current_chunk}") + f.close() + current_chunk += 1 @app.command() def network( file: Annotated[Path, typer.Argument(help="Relative path to the file.", show_default=False)], cleandata: bool = typer.Option(False, "--cleandata", "-cd", help="Clean the data if this flag is used."), - push: bool = typer.Option(False, "--push", "-p", help="Push the data into Database."), + push: bool = typer.Option(False, "--push", "-p", help="Push the data into Database.\nIf you want the output to be saved in [green bold].csv[/green bold] format, do not mention this flag."), mode: Optional[DBMode] = typer.Option(None, help="Specify either 'amend' or 'drop' when pushing data", show_default=False), ): if not file.exists(): - print("File did does not exist!") + error_parser("File did does not exist!") raise typer.Exit() data = process_network(file,cleandata) if push: diff --git a/styles/__Init__.py b/styles/__Init__.py new file mode 100644 index 0000000..5fbac92 --- /dev/null +++ b/styles/__Init__.py @@ -0,0 +1,5 @@ +from .help import print_help + +__all__ = [ + 'print_help', + ] diff --git a/styles/help.py b/styles/help.py new file mode 100644 index 0000000..a1e4af9 --- /dev/null +++ b/styles/help.py @@ -0,0 +1,38 @@ +import sys +from rich.table import Table +from rich.console import Console +from rich.console import Group +from rich.padding import Padding +from rich.panel import Panel + +def population_help(): + line1 = "This CLI tool processes [yellow bold]MATSim population XML files[/yellow bold] and prepares the data for storage in either [green bold].csv[/green bold] file or in [blue bold]PostgreSQL[/blue bold] database with [blue bold]PostGIS[/blue bold] integration." + line2 = "It extracts key data such as coordinates, converting them into a format ready for geospatial.\nUse the available [underline bold]options[/underline bold] to modify the behavior, such as [underline bold]cleaning[/underline bold] the data with missing values, or choosing to either [underline bold]replace[/underline bold] or [underline bold]append[/underline bold] data in the target table." + line3 = "The resulting table structure includes columns such as the following:" + line4 = "[red bold]NOTE:[/red bold] Ensure PostgreSQL connection details are provided via a [underline bold bright_cyan].env[/underline bold bright_cyan] file.\n[red bold]NOTE:[/red bold] By default if a [underline].log[/underline] exist with the same name in the same directory of file, It will use that to prcoess the file" + table = Table("id","lon","lat","geom","time","age","sex","person_id","economic_sector","household_id","household_income") + table.add_row("1","45.89977111012078","-73.26605847316777","0101000020E61000005CED21B32BF3464013451E1A075152C0","07:00:00","4","1","1","0","1","4") + table.add_row("2","45.89977111012078","-73.26605847316777","0101000020E61000005CED21B32BF3464013451E1A075152C0","08:00:00","4","2","2","0","1","4") + lines = f"{line1} \n{line2} \n{line3}" + panel = Padding(Panel(Padding(Group(lines,Padding(table, (1,0)),line4), (1,1)), title="About",title_align="left"), (1,0,0,0)) + return panel + +def network_help(): + test = Padding("Hello network", (1,1)) + return Panel(test, title="About",title_align="left") + +def metro_help(): + test = Padding("Hello network", (1,1)) + return Panel(test, title="About",title_align="left") + +def bus_help(): + test = Padding("Hello network", (1,1)) + return Panel(test, title="About",title_align="left") + +def print_help(): + console = Console() + if "--help" in sys.argv or "-h" in sys.argv: + if (sys.argv[1] == "population"): + console.print(population_help()) + elif (sys.argv[1] == "network"): + console.print(network_help()) \ No newline at end of file