network functions added pt.2

This commit is contained in:
Kian 2024-09-12 18:44:32 -04:00
parent e8dda40c9f
commit 24ae7adfc8
3 changed files with 110 additions and 58 deletions

View File

@ -1,12 +1,12 @@
from .population import process_buffer_population, process_travels, push_to_db_coords,write_to_csv from .population import process_buffer_population, process_travels
from .network import process_network, push_network, network_write from .network import process_nodes, process_links, process_links_attr
from .metro import process_metro, push_metro, metro_write from .metro import process_metro, push_metro, metro_write
from .bus import process_bus, push_bus, bus_write from .bus import process_bus, push_bus, bus_write
from .helpers import buffer_creator from .helpers import buffer_creator,push_to_db_coords,write_to_csv
__all__ = [ __all__ = [
'process_buffer_population', 'process_travels','push_to_db_coords', 'write_to_csv', 'process_buffer_population', 'process_travels','push_to_db_coords', 'write_to_csv',
'process_network', 'push_network', 'network_write', 'process_nodes', 'process_links', 'process_links_attr'
'process_metro', 'push_metro', 'metro_write', 'process_metro', 'push_metro', 'metro_write',
'process_bus', 'push_bus', 'bus_write', 'process_bus', 'push_bus', 'bus_write',
'buffer_creator' 'buffer_creator'

View File

@ -3,30 +3,32 @@ import pandas,pyproj, re
from shapely.geometry import Point from shapely.geometry import Point
def process_nodes(data): def process_nodes(data):
transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True)
ELEMENT_LIST = [] ELEMENT_LIST = []
elements = BeautifulSoup(data,'lxml-xml') for node in data:
for element in elements.find_all("node"): attr = dict(node.attrs)
ELEMENT_LIST.append(dict(element.attrs)) lat, lon = transformer.transform(attr['x'], attr['y'])
attr['coordinates'] = Point(lon,lat)
ELEMENT_LIST.append(attr)
return pandas.DataFrame(ELEMENT_LIST) return pandas.DataFrame(ELEMENT_LIST)
def process_links(data): def process_links(data):
ELEMENT_LIST = [] ELEMENT_LIST = []
elements = BeautifulSoup(data,'lxml-xml') for element in data:
for element in elements.find_all("link"):
ELEMENT_LIST.append(dict(element.attrs)) ELEMENT_LIST.append(dict(element.attrs))
return pandas.DataFrame(ELEMENT_LIST) return pandas.DataFrame(ELEMENT_LIST)
def process_links_attr(data): def process_links_attr(data):
ELEMENT_LIST = [] ELEMENT_LIST = []
elements = BeautifulSoup(data,'lxml-xml') for element in data:
for element in elements.find_all("link"): if not element.find("attributes"):
ELEMENT_DICT = {}
if element.find_all("attribute"):
for attr in element.find_all("attribute"):
ELEMENT_DICT.update({attr["name"]: attr.get_text()})
else:
continue continue
ELEMENT_DICT["id"]=element.getattr("id") ELEMENT_DICT = {"id": element.get("id")}
for attr in element.find("attributes").find_all("attribute"):
attr_name = attr.get("name")
attr_value = attr.get_text()
attr_name = attr_name.replace(":", "_")
ELEMENT_DICT[attr_name] = attr_value
ELEMENT_LIST.append(ELEMENT_DICT) ELEMENT_LIST.append(ELEMENT_DICT)
return pandas.DataFrame(ELEMENT_LIST) return pandas.DataFrame(ELEMENT_LIST)

130
main.py
View File

@ -1,4 +1,5 @@
import os, typer import os,math,typer
from bs4 import BeautifulSoup
from dotenv import load_dotenv from dotenv import load_dotenv
from rich import print from rich import print
from typing_extensions import Annotated from typing_extensions import Annotated
@ -14,6 +15,8 @@ from functions import buffer_creator, process_buffer_population,process_travels,
from functions import process_nodes,process_links,process_links_attr from functions import process_nodes,process_links,process_links_attr
from styles import print_help from styles import print_help
import xml.etree.ElementTree as ET
called= "population" called= "population"
app = typer.Typer(rich_markup_mode="rich") app = typer.Typer(rich_markup_mode="rich")
load_dotenv() load_dotenv()
@ -94,7 +97,10 @@ def population(
TaskProgressColumn(), TaskProgressColumn(),
console=console console=console
) as progress: ) as progress:
task = progress.add_task("[cyan]Processing chunks...", total=max_chunk) if max_chunk > 2:
task = progress.add_task("[cyan]Processing chunks...", total=max_chunk)
else:
task = progress.add_task("[cyan]Processing chunks...", total=max_chunk, visible=False)
current_chunk = 0 current_chunk = 0
processed_line = 0 processed_line = 0
@ -166,47 +172,91 @@ def network(
error_printer("File empty") error_printer("File empty")
raise typer.Exit() raise typer.Exit()
else: else:
success_printer(f"{count + 1} lines read") success_printer(f"{count + 1} lines found")
f.close() f.close()
BUFFER = [] BUFFER = []
DEVIDER_COUNT = 0 DEVIDER_COUNT = 0
with open(file,'r',encoding='utf-8') as f: with Progress(
for line in f: SpinnerColumn(),
if line.strip() == os.getenv("DIVIDER"): TextColumn("[progress.description]{task.description}"),
DEVIDER_COUNT = DEVIDER_COUNT + 1 BarColumn(),
if DEVIDER_COUNT == 2 and "nodes" in common_tables: TaskProgressColumn(),
dataframe = process_nodes(BUFFER) console=console
if cleandata: ) as progress:
dataframe = dataframe.dropna() with open(file,'r',encoding='utf-8') as f:
if push: for line in f:
push_to_db_coords("nodes", dataframe, mode) if line.strip() == os.getenv("DIVIDER"):
else: DEVIDER_COUNT = DEVIDER_COUNT + 1
write_to_csv("nodes", dataframe,file) if DEVIDER_COUNT == 1 or DEVIDER_COUNT > 3: continue
BUFFER = [] if DEVIDER_COUNT == 2:
if DEVIDER_COUNT == 3: if "node" not in common_tables:
if "links" in common_tables: BUFFER.clear()
dataframe = process_links(BUFFER) continue
if cleandata: else:
dataframe = dataframe.dropna() try:
if push: element = BeautifulSoup((' ').join(BUFFER), 'lxml-xml')
push_to_db_coords("links", dataframe, mode) total_nodes = element.find_all("node")
else: except:
write_to_csv("links", dataframe, file) error_printer("node process failed")
if "links_attr" in common_tables: total_chunks = math.ceil(len(total_nodes)/int(os.getenv("CHUNK_SIZE")))
dataframe = process_links_attr(BUFFER) if total_chunks > 2:
if cleandata: node_task = progress.add_task("[cyan]Processing [bold]nodes[/bold] chunks...", total=total_chunks)
dataframe = dataframe.dropna() else:
if push: node_task = progress.add_task("[cyan]Processing [bold]nodes[/bold] chunks...", total=total_chunks, visible=False)
push_to_db_coords("links_attr", dataframe, mode) success_printer(f"Chunk count: {total_chunks}")
else: currernt_chunks= 0
write_to_csv("links_attr", dataframe, file) while currernt_chunks < total_chunks:
BUFFER = [] size = int(os.getenv("CHUNK_SIZE"))
if DEVIDER_COUNT < 1: new_Chunk = total_nodes[currernt_chunks*size:min(len(total_nodes),(currernt_chunks+1)*size)]
continue dataframe = process_nodes(new_Chunk)
if DEVIDER_COUNT > 2: if cleandata:
continue dataframe = dataframe.dropna()
BUFFER.append(line) if push:
console.print("[green]Processing complete![/green]") push_to_db_coords("nodes", dataframe, mode)
else:
write_to_csv("nodes", dataframe,file)
progress.update(node_task, advance=1)
currernt_chunks =+ 1
BUFFER.clear()
if DEVIDER_COUNT == 3:
try:
element = BeautifulSoup((' ').join(BUFFER), 'lxml-xml')
total_links = element.find_all("link")
except:
error_printer("node process failed")
total_chunks = math.ceil(len(total_links)/int(os.getenv("CHUNK_SIZE")))
success_printer(f"Chunk count: {total_chunks}")
if total_chunks > 2:
link_task = progress.add_task("[cyan]Processing [bold]links[/bold] chunks...", total=total_chunks)
else:
link_task = progress.add_task("[cyan]Processing [bold]links[/bold] chunks...", total=total_chunks, visible=False)
currernt_chunks= 0
while currernt_chunks < total_chunks:
size = int(os.getenv("CHUNK_SIZE"))
new_Chunk = total_links[currernt_chunks*size:min(len(total_links),(currernt_chunks+1)*size)]
if "links" in common_tables:
dataframe = process_links(new_Chunk)
if cleandata:
dataframe = dataframe.dropna()
if push:
push_to_db_coords("links", dataframe, mode)
else:
write_to_csv("links", dataframe, file)
if "links_attr" in common_tables:
dataframe = process_links_attr(new_Chunk)
if cleandata:
dataframe = dataframe.dropna()
if push:
push_to_db_coords("links_attr", dataframe, mode)
else:
write_to_csv("links_attr", dataframe, file)
progress.update(link_task, advance=1)
currernt_chunks +=1
BUFFER.clear()
continue
continue
BUFFER.append(line.strip())
console.print("[green]Processing complete![/green]")
@app.command() @app.command()
def metro( def metro(