From e8dda40c9f1391401704600bd7cc09e3b3961f7c Mon Sep 17 00:00:00 2001 From: Kian <105409698+knejadshamsi@users.noreply.github.com> Date: Wed, 11 Sep 2024 10:56:17 -0400 Subject: [PATCH] network functions added pt.1 --- functions/__Init__.py | 4 +- functions/helpers.py | 27 ++++++++++++- functions/network.py | 35 +++++++++++++--- functions/population.py | 25 +----------- main.py | 89 ++++++++++++++++++++++++++++++++++------- 5 files changed, 132 insertions(+), 48 deletions(-) diff --git a/functions/__Init__.py b/functions/__Init__.py index de344b9..1a55335 100644 --- a/functions/__Init__.py +++ b/functions/__Init__.py @@ -1,11 +1,11 @@ -from .population import process_buffer_population, process_travels, push_population,write_population +from .population import process_buffer_population, process_travels, push_to_db_coords,write_to_csv from .network import process_network, push_network, network_write from .metro import process_metro, push_metro, metro_write from .bus import process_bus, push_bus, bus_write from .helpers import buffer_creator __all__ = [ - 'process_buffer_population', 'process_travels','push_population', 'write_population', + 'process_buffer_population', 'process_travels','push_to_db_coords', 'write_to_csv', 'process_network', 'push_network', 'network_write', 'process_metro', 'push_metro', 'metro_write', 'process_bus', 'push_bus', 'bus_write', diff --git a/functions/helpers.py b/functions/helpers.py index b7f6219..ad4555a 100644 --- a/functions/helpers.py +++ b/functions/helpers.py @@ -1,3 +1,7 @@ +import geopandas, os, datetime +from sqlalchemy import create_engine +from geoalchemy2 import Geometry, WKTElement + def buffer_creator(file,divider,start_line, chunk_size): buffer = [] line_number = start_line @@ -13,4 +17,25 @@ def buffer_creator(file,divider,start_line, chunk_size): continue buffer.append(line.strip()) return current_line,(' ').join(buffer) - \ No newline at end of file + +def push_to_db_coords(name,data,mode): + GDF = geopandas.GeoDataFrame(data, crs='EPSG:4326') + GDF['geom'] = GDF['coordinates'].apply(lambda x: WKTElement(x.wkt, srid=os.getenv("SRID"))) + engine = create_engine(f'postgresql://{os.getenv("USER")}:{os.getenv("PASS")}@{os.getenv("HOST_NAME")}/{os.getenv("DATA_BASE")}', echo=False) + GDF.to_sql( + name=name, + con=engine, + if_exists=mode, + chunksize=os.getenv("CHUNK_SIZE"), + dtype={'geom': Geometry('Point', srid=os.getenv("SRID"))}, + index=False + ) + +def write_to_csv(name,data, file): + directory = file.parent + id = datetime.datetime.now().strftime("%Y%m%d") + csv = directory / (file.stem + f"-{name}-{id}.csv") + if csv.exists(): + data.to_csv(csv, mode='a',index=False) + else: + data.to_csv(csv,index=False) \ No newline at end of file diff --git a/functions/network.py b/functions/network.py index 097cdbd..655eb7a 100644 --- a/functions/network.py +++ b/functions/network.py @@ -1,9 +1,32 @@ +from bs4 import BeautifulSoup +import pandas,pyproj, re +from shapely.geometry import Point -def process_network(data, cleandata): - print(data, cleandata) +def process_nodes(data): + ELEMENT_LIST = [] + elements = BeautifulSoup(data,'lxml-xml') + for element in elements.find_all("node"): + ELEMENT_LIST.append(dict(element.attrs)) + return pandas.DataFrame(ELEMENT_LIST) -def push_network(data,mode): - print(data,mode) +def process_links(data): + ELEMENT_LIST = [] + elements = BeautifulSoup(data,'lxml-xml') + for element in elements.find_all("link"): + ELEMENT_LIST.append(dict(element.attrs)) + return pandas.DataFrame(ELEMENT_LIST) -def network_write(data): - print(data) \ No newline at end of file +def process_links_attr(data): + ELEMENT_LIST = [] + elements = BeautifulSoup(data,'lxml-xml') + for element in elements.find_all("link"): + ELEMENT_DICT = {} + if element.find_all("attribute"): + for attr in element.find_all("attribute"): + ELEMENT_DICT.update({attr["name"]: attr.get_text()}) + else: + continue + ELEMENT_DICT["id"]=element.getattr("id") + ELEMENT_LIST.append(ELEMENT_DICT) + + return pandas.DataFrame(ELEMENT_LIST) \ No newline at end of file diff --git a/functions/population.py b/functions/population.py index 91285f6..028cd6c 100644 --- a/functions/population.py +++ b/functions/population.py @@ -1,8 +1,6 @@ from bs4 import BeautifulSoup -import pandas,geopandas, pyproj, re, os, datetime +import pandas,pyproj, re from shapely.geometry import Point -from sqlalchemy import create_engine -from geoalchemy2 import Geometry, WKTElement def camel_to_snake(name): return re.sub(r'(? 2: + continue + BUFFER.append(line) + console.print("[green]Processing complete![/green]") @app.command() def metro(