mirror of
https://github.com/knejadshamsi/zele-utils.git
synced 2024-12-22 23:55:53 -05:00
network functions added pt.1
This commit is contained in:
parent
3541f9f10e
commit
e8dda40c9f
@ -1,11 +1,11 @@
|
||||
from .population import process_buffer_population, process_travels, push_population,write_population
|
||||
from .population import process_buffer_population, process_travels, push_to_db_coords,write_to_csv
|
||||
from .network import process_network, push_network, network_write
|
||||
from .metro import process_metro, push_metro, metro_write
|
||||
from .bus import process_bus, push_bus, bus_write
|
||||
from .helpers import buffer_creator
|
||||
|
||||
__all__ = [
|
||||
'process_buffer_population', 'process_travels','push_population', 'write_population',
|
||||
'process_buffer_population', 'process_travels','push_to_db_coords', 'write_to_csv',
|
||||
'process_network', 'push_network', 'network_write',
|
||||
'process_metro', 'push_metro', 'metro_write',
|
||||
'process_bus', 'push_bus', 'bus_write',
|
||||
|
@ -1,3 +1,7 @@
|
||||
import geopandas, os, datetime
|
||||
from sqlalchemy import create_engine
|
||||
from geoalchemy2 import Geometry, WKTElement
|
||||
|
||||
def buffer_creator(file,divider,start_line, chunk_size):
|
||||
buffer = []
|
||||
line_number = start_line
|
||||
@ -13,4 +17,25 @@ def buffer_creator(file,divider,start_line, chunk_size):
|
||||
continue
|
||||
buffer.append(line.strip())
|
||||
return current_line,(' ').join(buffer)
|
||||
|
||||
|
||||
def push_to_db_coords(name,data,mode):
|
||||
GDF = geopandas.GeoDataFrame(data, crs='EPSG:4326')
|
||||
GDF['geom'] = GDF['coordinates'].apply(lambda x: WKTElement(x.wkt, srid=os.getenv("SRID")))
|
||||
engine = create_engine(f'postgresql://{os.getenv("USER")}:{os.getenv("PASS")}@{os.getenv("HOST_NAME")}/{os.getenv("DATA_BASE")}', echo=False)
|
||||
GDF.to_sql(
|
||||
name=name,
|
||||
con=engine,
|
||||
if_exists=mode,
|
||||
chunksize=os.getenv("CHUNK_SIZE"),
|
||||
dtype={'geom': Geometry('Point', srid=os.getenv("SRID"))},
|
||||
index=False
|
||||
)
|
||||
|
||||
def write_to_csv(name,data, file):
|
||||
directory = file.parent
|
||||
id = datetime.datetime.now().strftime("%Y%m%d")
|
||||
csv = directory / (file.stem + f"-{name}-{id}.csv")
|
||||
if csv.exists():
|
||||
data.to_csv(csv, mode='a',index=False)
|
||||
else:
|
||||
data.to_csv(csv,index=False)
|
@ -1,9 +1,32 @@
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas,pyproj, re
|
||||
from shapely.geometry import Point
|
||||
|
||||
def process_network(data, cleandata):
|
||||
print(data, cleandata)
|
||||
def process_nodes(data):
|
||||
ELEMENT_LIST = []
|
||||
elements = BeautifulSoup(data,'lxml-xml')
|
||||
for element in elements.find_all("node"):
|
||||
ELEMENT_LIST.append(dict(element.attrs))
|
||||
return pandas.DataFrame(ELEMENT_LIST)
|
||||
|
||||
def push_network(data,mode):
|
||||
print(data,mode)
|
||||
def process_links(data):
|
||||
ELEMENT_LIST = []
|
||||
elements = BeautifulSoup(data,'lxml-xml')
|
||||
for element in elements.find_all("link"):
|
||||
ELEMENT_LIST.append(dict(element.attrs))
|
||||
return pandas.DataFrame(ELEMENT_LIST)
|
||||
|
||||
def network_write(data):
|
||||
print(data)
|
||||
def process_links_attr(data):
|
||||
ELEMENT_LIST = []
|
||||
elements = BeautifulSoup(data,'lxml-xml')
|
||||
for element in elements.find_all("link"):
|
||||
ELEMENT_DICT = {}
|
||||
if element.find_all("attribute"):
|
||||
for attr in element.find_all("attribute"):
|
||||
ELEMENT_DICT.update({attr["name"]: attr.get_text()})
|
||||
else:
|
||||
continue
|
||||
ELEMENT_DICT["id"]=element.getattr("id")
|
||||
ELEMENT_LIST.append(ELEMENT_DICT)
|
||||
|
||||
return pandas.DataFrame(ELEMENT_LIST)
|
@ -1,8 +1,6 @@
|
||||
from bs4 import BeautifulSoup
|
||||
import pandas,geopandas, pyproj, re, os, datetime
|
||||
import pandas,pyproj, re
|
||||
from shapely.geometry import Point
|
||||
from sqlalchemy import create_engine
|
||||
from geoalchemy2 import Geometry, WKTElement
|
||||
|
||||
def camel_to_snake(name):
|
||||
return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
|
||||
@ -52,24 +50,3 @@ def process_travels(data):
|
||||
return pandas.DataFrame(activities_list)
|
||||
|
||||
|
||||
def push_population(name,data,mode):
|
||||
GDF = geopandas.GeoDataFrame(data, crs='EPSG:4326')
|
||||
GDF['geom'] = GDF['coordinates'].apply(lambda x: WKTElement(x.wkt, srid=os.getenv("SRID")))
|
||||
engine = create_engine(f'postgresql://{os.getenv("USER")}:{os.getenv("PASS")}@{os.getenv("HOST_NAME")}/{os.getenv("DATA_BASE")}', echo=False)
|
||||
GDF.to_sql(
|
||||
name=name,
|
||||
con=engine,
|
||||
if_exists=mode,
|
||||
chunksize=os.getenv("CHUNK_SIZE"),
|
||||
dtype={'geom': Geometry('Point', srid=os.getenv("SRID"))},
|
||||
index=False
|
||||
)
|
||||
|
||||
def write_population(name,data, file):
|
||||
directory = file.parent
|
||||
id = datetime.datetime.now().strftime("%Y%m%d")
|
||||
csv = directory / (file.stem + f"-{name}-{id}.csv")
|
||||
if csv.exists():
|
||||
data.to_csv(csv, mode='a',index=False)
|
||||
else:
|
||||
data.to_csv(csv,index=False)
|
89
main.py
89
main.py
@ -10,7 +10,8 @@ from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskPr
|
||||
import time
|
||||
|
||||
from classes import City, DBMode, RTMode
|
||||
from functions import buffer_creator, process_buffer_population,process_travels, push_population, write_population
|
||||
from functions import buffer_creator, process_buffer_population,process_travels, push_to_db_coords, write_to_csv
|
||||
from functions import process_nodes,process_links,process_links_attr
|
||||
from styles import print_help
|
||||
|
||||
called= "population"
|
||||
@ -112,16 +113,16 @@ def population(
|
||||
if cleandata:
|
||||
dataframe = dataframe.dropna()
|
||||
if push:
|
||||
push_population("agents",dataframe, mode)
|
||||
push_to_db_coords("agents",dataframe, mode)
|
||||
else:
|
||||
write_population("agents",dataframe, file)
|
||||
write_to_csv("agents",dataframe, file)
|
||||
|
||||
if "travels" in common_tables:
|
||||
dataframe_travels = process_travels(buffer)
|
||||
if push:
|
||||
push_population("travels",dataframe_travels, mode)
|
||||
push_to_db_coords("travels",dataframe_travels, mode)
|
||||
else:
|
||||
write_population("travels",dataframe_travels, file)
|
||||
write_to_csv("travels",dataframe_travels, file)
|
||||
|
||||
if log:
|
||||
f = open(log_file, "a")
|
||||
@ -135,19 +136,77 @@ def population(
|
||||
|
||||
@app.command()
|
||||
def network(
|
||||
file: Annotated[Path, typer.Argument(help="Relative path to the file.", show_default=False)],
|
||||
cleandata: bool = typer.Option(False, "--cleandata", "-cd", help="Clean the data if this flag is used."),
|
||||
push: bool = typer.Option(False, "--push", "-p", help="Push the data into Database.\nIf you want the output to be saved in [green bold].csv[/green bold] format, do not mention this flag."),
|
||||
mode: Optional[DBMode] = typer.Option(None, help="Specify either 'amend' or 'drop' when pushing data", show_default=False),
|
||||
file: Annotated[Path, typer.Argument(help="Provide the relative path to the [yellow bold underline]XML file[/yellow bold underline].", show_default=False)],
|
||||
tables: list[str] = typer.Argument(..., help="Tables to include: [underline bold]nodes[/underline bold], [underline bold]links[/underline bold], [underline bold]links_attr[/underline bold]. Use [underline bold]all[/underline bold] for everything.",show_default=False),
|
||||
cleandata: bool = typer.Option(False, "--cleandata", "-cd", help="Drop the rows that have missing values."),
|
||||
push: bool = typer.Option(False, "--push", "-p", help="Save the output directly to the database When mentioned. Otherwise, Saves as a [green bold]CSV file[/green bold] in the input directory]"),
|
||||
mode: Optional[DBMode] = typer.Option(None, help="Specify either [underline]'append'[/underline] or [underline]'drop'[/underline] when pushing data", show_default=False),
|
||||
):
|
||||
if not file.exists():
|
||||
error_parser("File did does not exist!")
|
||||
console = Console()
|
||||
all_tables = ["nodes","links","links_attr"]
|
||||
common_tables = [item for item in tables if item in ["all"] + all_tables]
|
||||
if len(common_tables) == 0:
|
||||
error_printer("Incorrect table input")
|
||||
raise typer.Exit()
|
||||
elif "all" in common_tables:
|
||||
common_tables = all_tables
|
||||
info_printer(f"Tables to inlude: {common_tables}")
|
||||
if not file.exists():
|
||||
error_printer("File not found")
|
||||
raise typer.Exit()
|
||||
try:
|
||||
f = open(file, 'r', encoding='utf-8')
|
||||
success_printer("File Opened")
|
||||
except:
|
||||
error_printer("Unable to read file")
|
||||
raise typer.Exit()
|
||||
|
||||
count = sum(1 for _ in f)
|
||||
if count == 0:
|
||||
error_printer("File empty")
|
||||
raise typer.Exit()
|
||||
data = process_network(file,cleandata)
|
||||
if push:
|
||||
push_network(data, mode)
|
||||
else:
|
||||
network_write(data)
|
||||
success_printer(f"{count + 1} lines read")
|
||||
f.close()
|
||||
BUFFER = []
|
||||
DEVIDER_COUNT = 0
|
||||
with open(file,'r',encoding='utf-8') as f:
|
||||
for line in f:
|
||||
if line.strip() == os.getenv("DIVIDER"):
|
||||
DEVIDER_COUNT = DEVIDER_COUNT + 1
|
||||
if DEVIDER_COUNT == 2 and "nodes" in common_tables:
|
||||
dataframe = process_nodes(BUFFER)
|
||||
if cleandata:
|
||||
dataframe = dataframe.dropna()
|
||||
if push:
|
||||
push_to_db_coords("nodes", dataframe, mode)
|
||||
else:
|
||||
write_to_csv("nodes", dataframe,file)
|
||||
BUFFER = []
|
||||
if DEVIDER_COUNT == 3:
|
||||
if "links" in common_tables:
|
||||
dataframe = process_links(BUFFER)
|
||||
if cleandata:
|
||||
dataframe = dataframe.dropna()
|
||||
if push:
|
||||
push_to_db_coords("links", dataframe, mode)
|
||||
else:
|
||||
write_to_csv("links", dataframe, file)
|
||||
if "links_attr" in common_tables:
|
||||
dataframe = process_links_attr(BUFFER)
|
||||
if cleandata:
|
||||
dataframe = dataframe.dropna()
|
||||
if push:
|
||||
push_to_db_coords("links_attr", dataframe, mode)
|
||||
else:
|
||||
write_to_csv("links_attr", dataframe, file)
|
||||
BUFFER = []
|
||||
if DEVIDER_COUNT < 1:
|
||||
continue
|
||||
if DEVIDER_COUNT > 2:
|
||||
continue
|
||||
BUFFER.append(line)
|
||||
console.print("[green]Processing complete![/green]")
|
||||
|
||||
@app.command()
|
||||
def metro(
|
||||
|
Loading…
Reference in New Issue
Block a user