network functions added pt.2

This commit is contained in:
Kian 2024-09-12 18:44:32 -04:00
parent e8dda40c9f
commit 24ae7adfc8
3 changed files with 110 additions and 58 deletions

View File

@ -1,12 +1,12 @@
from .population import process_buffer_population, process_travels, push_to_db_coords,write_to_csv
from .network import process_network, push_network, network_write
from .population import process_buffer_population, process_travels
from .network import process_nodes, process_links, process_links_attr
from .metro import process_metro, push_metro, metro_write
from .bus import process_bus, push_bus, bus_write
from .helpers import buffer_creator
from .helpers import buffer_creator,push_to_db_coords,write_to_csv
__all__ = [
'process_buffer_population', 'process_travels','push_to_db_coords', 'write_to_csv',
'process_network', 'push_network', 'network_write',
'process_nodes', 'process_links', 'process_links_attr'
'process_metro', 'push_metro', 'metro_write',
'process_bus', 'push_bus', 'bus_write',
'buffer_creator'

View File

@ -3,30 +3,32 @@ import pandas,pyproj, re
from shapely.geometry import Point
def process_nodes(data):
transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True)
ELEMENT_LIST = []
elements = BeautifulSoup(data,'lxml-xml')
for element in elements.find_all("node"):
ELEMENT_LIST.append(dict(element.attrs))
for node in data:
attr = dict(node.attrs)
lat, lon = transformer.transform(attr['x'], attr['y'])
attr['coordinates'] = Point(lon,lat)
ELEMENT_LIST.append(attr)
return pandas.DataFrame(ELEMENT_LIST)
def process_links(data):
ELEMENT_LIST = []
elements = BeautifulSoup(data,'lxml-xml')
for element in elements.find_all("link"):
for element in data:
ELEMENT_LIST.append(dict(element.attrs))
return pandas.DataFrame(ELEMENT_LIST)
def process_links_attr(data):
ELEMENT_LIST = []
elements = BeautifulSoup(data,'lxml-xml')
for element in elements.find_all("link"):
ELEMENT_DICT = {}
if element.find_all("attribute"):
for attr in element.find_all("attribute"):
ELEMENT_DICT.update({attr["name"]: attr.get_text()})
else:
for element in data:
if not element.find("attributes"):
continue
ELEMENT_DICT["id"]=element.getattr("id")
ELEMENT_LIST.append(ELEMENT_DICT)
ELEMENT_DICT = {"id": element.get("id")}
for attr in element.find("attributes").find_all("attribute"):
attr_name = attr.get("name")
attr_value = attr.get_text()
attr_name = attr_name.replace(":", "_")
ELEMENT_DICT[attr_name] = attr_value
ELEMENT_LIST.append(ELEMENT_DICT)
return pandas.DataFrame(ELEMENT_LIST)

130
main.py
View File

@ -1,4 +1,5 @@
import os, typer
import os,math,typer
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from rich import print
from typing_extensions import Annotated
@ -14,6 +15,8 @@ from functions import buffer_creator, process_buffer_population,process_travels,
from functions import process_nodes,process_links,process_links_attr
from styles import print_help
import xml.etree.ElementTree as ET
called= "population"
app = typer.Typer(rich_markup_mode="rich")
load_dotenv()
@ -94,7 +97,10 @@ def population(
TaskProgressColumn(),
console=console
) as progress:
task = progress.add_task("[cyan]Processing chunks...", total=max_chunk)
if max_chunk > 2:
task = progress.add_task("[cyan]Processing chunks...", total=max_chunk)
else:
task = progress.add_task("[cyan]Processing chunks...", total=max_chunk, visible=False)
current_chunk = 0
processed_line = 0
@ -166,47 +172,91 @@ def network(
error_printer("File empty")
raise typer.Exit()
else:
success_printer(f"{count + 1} lines read")
success_printer(f"{count + 1} lines found")
f.close()
BUFFER = []
DEVIDER_COUNT = 0
with open(file,'r',encoding='utf-8') as f:
for line in f:
if line.strip() == os.getenv("DIVIDER"):
DEVIDER_COUNT = DEVIDER_COUNT + 1
if DEVIDER_COUNT == 2 and "nodes" in common_tables:
dataframe = process_nodes(BUFFER)
if cleandata:
dataframe = dataframe.dropna()
if push:
push_to_db_coords("nodes", dataframe, mode)
else:
write_to_csv("nodes", dataframe,file)
BUFFER = []
if DEVIDER_COUNT == 3:
if "links" in common_tables:
dataframe = process_links(BUFFER)
if cleandata:
dataframe = dataframe.dropna()
if push:
push_to_db_coords("links", dataframe, mode)
else:
write_to_csv("links", dataframe, file)
if "links_attr" in common_tables:
dataframe = process_links_attr(BUFFER)
if cleandata:
dataframe = dataframe.dropna()
if push:
push_to_db_coords("links_attr", dataframe, mode)
else:
write_to_csv("links_attr", dataframe, file)
BUFFER = []
if DEVIDER_COUNT < 1:
continue
if DEVIDER_COUNT > 2:
continue
BUFFER.append(line)
console.print("[green]Processing complete![/green]")
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
console=console
) as progress:
with open(file,'r',encoding='utf-8') as f:
for line in f:
if line.strip() == os.getenv("DIVIDER"):
DEVIDER_COUNT = DEVIDER_COUNT + 1
if DEVIDER_COUNT == 1 or DEVIDER_COUNT > 3: continue
if DEVIDER_COUNT == 2:
if "node" not in common_tables:
BUFFER.clear()
continue
else:
try:
element = BeautifulSoup((' ').join(BUFFER), 'lxml-xml')
total_nodes = element.find_all("node")
except:
error_printer("node process failed")
total_chunks = math.ceil(len(total_nodes)/int(os.getenv("CHUNK_SIZE")))
if total_chunks > 2:
node_task = progress.add_task("[cyan]Processing [bold]nodes[/bold] chunks...", total=total_chunks)
else:
node_task = progress.add_task("[cyan]Processing [bold]nodes[/bold] chunks...", total=total_chunks, visible=False)
success_printer(f"Chunk count: {total_chunks}")
currernt_chunks= 0
while currernt_chunks < total_chunks:
size = int(os.getenv("CHUNK_SIZE"))
new_Chunk = total_nodes[currernt_chunks*size:min(len(total_nodes),(currernt_chunks+1)*size)]
dataframe = process_nodes(new_Chunk)
if cleandata:
dataframe = dataframe.dropna()
if push:
push_to_db_coords("nodes", dataframe, mode)
else:
write_to_csv("nodes", dataframe,file)
progress.update(node_task, advance=1)
currernt_chunks =+ 1
BUFFER.clear()
if DEVIDER_COUNT == 3:
try:
element = BeautifulSoup((' ').join(BUFFER), 'lxml-xml')
total_links = element.find_all("link")
except:
error_printer("node process failed")
total_chunks = math.ceil(len(total_links)/int(os.getenv("CHUNK_SIZE")))
success_printer(f"Chunk count: {total_chunks}")
if total_chunks > 2:
link_task = progress.add_task("[cyan]Processing [bold]links[/bold] chunks...", total=total_chunks)
else:
link_task = progress.add_task("[cyan]Processing [bold]links[/bold] chunks...", total=total_chunks, visible=False)
currernt_chunks= 0
while currernt_chunks < total_chunks:
size = int(os.getenv("CHUNK_SIZE"))
new_Chunk = total_links[currernt_chunks*size:min(len(total_links),(currernt_chunks+1)*size)]
if "links" in common_tables:
dataframe = process_links(new_Chunk)
if cleandata:
dataframe = dataframe.dropna()
if push:
push_to_db_coords("links", dataframe, mode)
else:
write_to_csv("links", dataframe, file)
if "links_attr" in common_tables:
dataframe = process_links_attr(new_Chunk)
if cleandata:
dataframe = dataframe.dropna()
if push:
push_to_db_coords("links_attr", dataframe, mode)
else:
write_to_csv("links_attr", dataframe, file)
progress.update(link_task, advance=1)
currernt_chunks +=1
BUFFER.clear()
continue
continue
BUFFER.append(line.strip())
console.print("[green]Processing complete![/green]")
@app.command()
def metro(