mirror of
https://github.com/knejadshamsi/zele-utils.git
synced 2024-12-22 23:55:53 -05:00
network functions added pt.2
This commit is contained in:
parent
e8dda40c9f
commit
24ae7adfc8
@ -1,12 +1,12 @@
|
||||
from .population import process_buffer_population, process_travels, push_to_db_coords,write_to_csv
|
||||
from .network import process_network, push_network, network_write
|
||||
from .population import process_buffer_population, process_travels
|
||||
from .network import process_nodes, process_links, process_links_attr
|
||||
from .metro import process_metro, push_metro, metro_write
|
||||
from .bus import process_bus, push_bus, bus_write
|
||||
from .helpers import buffer_creator
|
||||
from .helpers import buffer_creator,push_to_db_coords,write_to_csv
|
||||
|
||||
__all__ = [
|
||||
'process_buffer_population', 'process_travels','push_to_db_coords', 'write_to_csv',
|
||||
'process_network', 'push_network', 'network_write',
|
||||
'process_nodes', 'process_links', 'process_links_attr'
|
||||
'process_metro', 'push_metro', 'metro_write',
|
||||
'process_bus', 'push_bus', 'bus_write',
|
||||
'buffer_creator'
|
||||
|
@ -3,30 +3,32 @@ import pandas,pyproj, re
|
||||
from shapely.geometry import Point
|
||||
|
||||
def process_nodes(data):
|
||||
transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True)
|
||||
ELEMENT_LIST = []
|
||||
elements = BeautifulSoup(data,'lxml-xml')
|
||||
for element in elements.find_all("node"):
|
||||
ELEMENT_LIST.append(dict(element.attrs))
|
||||
for node in data:
|
||||
attr = dict(node.attrs)
|
||||
lat, lon = transformer.transform(attr['x'], attr['y'])
|
||||
attr['coordinates'] = Point(lon,lat)
|
||||
ELEMENT_LIST.append(attr)
|
||||
return pandas.DataFrame(ELEMENT_LIST)
|
||||
|
||||
def process_links(data):
|
||||
ELEMENT_LIST = []
|
||||
elements = BeautifulSoup(data,'lxml-xml')
|
||||
for element in elements.find_all("link"):
|
||||
for element in data:
|
||||
ELEMENT_LIST.append(dict(element.attrs))
|
||||
return pandas.DataFrame(ELEMENT_LIST)
|
||||
|
||||
def process_links_attr(data):
|
||||
ELEMENT_LIST = []
|
||||
elements = BeautifulSoup(data,'lxml-xml')
|
||||
for element in elements.find_all("link"):
|
||||
ELEMENT_DICT = {}
|
||||
if element.find_all("attribute"):
|
||||
for attr in element.find_all("attribute"):
|
||||
ELEMENT_DICT.update({attr["name"]: attr.get_text()})
|
||||
else:
|
||||
for element in data:
|
||||
if not element.find("attributes"):
|
||||
continue
|
||||
ELEMENT_DICT["id"]=element.getattr("id")
|
||||
ELEMENT_DICT = {"id": element.get("id")}
|
||||
for attr in element.find("attributes").find_all("attribute"):
|
||||
attr_name = attr.get("name")
|
||||
attr_value = attr.get_text()
|
||||
attr_name = attr_name.replace(":", "_")
|
||||
ELEMENT_DICT[attr_name] = attr_value
|
||||
|
||||
ELEMENT_LIST.append(ELEMENT_DICT)
|
||||
|
||||
return pandas.DataFrame(ELEMENT_LIST)
|
130
main.py
130
main.py
@ -1,4 +1,5 @@
|
||||
import os, typer
|
||||
import os,math,typer
|
||||
from bs4 import BeautifulSoup
|
||||
from dotenv import load_dotenv
|
||||
from rich import print
|
||||
from typing_extensions import Annotated
|
||||
@ -14,6 +15,8 @@ from functions import buffer_creator, process_buffer_population,process_travels,
|
||||
from functions import process_nodes,process_links,process_links_attr
|
||||
from styles import print_help
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
called= "population"
|
||||
app = typer.Typer(rich_markup_mode="rich")
|
||||
load_dotenv()
|
||||
@ -94,7 +97,10 @@ def population(
|
||||
TaskProgressColumn(),
|
||||
console=console
|
||||
) as progress:
|
||||
task = progress.add_task("[cyan]Processing chunks...", total=max_chunk)
|
||||
if max_chunk > 2:
|
||||
task = progress.add_task("[cyan]Processing chunks...", total=max_chunk)
|
||||
else:
|
||||
task = progress.add_task("[cyan]Processing chunks...", total=max_chunk, visible=False)
|
||||
|
||||
current_chunk = 0
|
||||
processed_line = 0
|
||||
@ -166,47 +172,91 @@ def network(
|
||||
error_printer("File empty")
|
||||
raise typer.Exit()
|
||||
else:
|
||||
success_printer(f"{count + 1} lines read")
|
||||
success_printer(f"{count + 1} lines found")
|
||||
f.close()
|
||||
BUFFER = []
|
||||
DEVIDER_COUNT = 0
|
||||
with open(file,'r',encoding='utf-8') as f:
|
||||
for line in f:
|
||||
if line.strip() == os.getenv("DIVIDER"):
|
||||
DEVIDER_COUNT = DEVIDER_COUNT + 1
|
||||
if DEVIDER_COUNT == 2 and "nodes" in common_tables:
|
||||
dataframe = process_nodes(BUFFER)
|
||||
if cleandata:
|
||||
dataframe = dataframe.dropna()
|
||||
if push:
|
||||
push_to_db_coords("nodes", dataframe, mode)
|
||||
else:
|
||||
write_to_csv("nodes", dataframe,file)
|
||||
BUFFER = []
|
||||
if DEVIDER_COUNT == 3:
|
||||
if "links" in common_tables:
|
||||
dataframe = process_links(BUFFER)
|
||||
if cleandata:
|
||||
dataframe = dataframe.dropna()
|
||||
if push:
|
||||
push_to_db_coords("links", dataframe, mode)
|
||||
else:
|
||||
write_to_csv("links", dataframe, file)
|
||||
if "links_attr" in common_tables:
|
||||
dataframe = process_links_attr(BUFFER)
|
||||
if cleandata:
|
||||
dataframe = dataframe.dropna()
|
||||
if push:
|
||||
push_to_db_coords("links_attr", dataframe, mode)
|
||||
else:
|
||||
write_to_csv("links_attr", dataframe, file)
|
||||
BUFFER = []
|
||||
if DEVIDER_COUNT < 1:
|
||||
continue
|
||||
if DEVIDER_COUNT > 2:
|
||||
continue
|
||||
BUFFER.append(line)
|
||||
console.print("[green]Processing complete![/green]")
|
||||
with Progress(
|
||||
SpinnerColumn(),
|
||||
TextColumn("[progress.description]{task.description}"),
|
||||
BarColumn(),
|
||||
TaskProgressColumn(),
|
||||
console=console
|
||||
) as progress:
|
||||
with open(file,'r',encoding='utf-8') as f:
|
||||
for line in f:
|
||||
if line.strip() == os.getenv("DIVIDER"):
|
||||
DEVIDER_COUNT = DEVIDER_COUNT + 1
|
||||
if DEVIDER_COUNT == 1 or DEVIDER_COUNT > 3: continue
|
||||
if DEVIDER_COUNT == 2:
|
||||
if "node" not in common_tables:
|
||||
BUFFER.clear()
|
||||
continue
|
||||
else:
|
||||
try:
|
||||
element = BeautifulSoup((' ').join(BUFFER), 'lxml-xml')
|
||||
total_nodes = element.find_all("node")
|
||||
except:
|
||||
error_printer("node process failed")
|
||||
total_chunks = math.ceil(len(total_nodes)/int(os.getenv("CHUNK_SIZE")))
|
||||
if total_chunks > 2:
|
||||
node_task = progress.add_task("[cyan]Processing [bold]nodes[/bold] chunks...", total=total_chunks)
|
||||
else:
|
||||
node_task = progress.add_task("[cyan]Processing [bold]nodes[/bold] chunks...", total=total_chunks, visible=False)
|
||||
success_printer(f"Chunk count: {total_chunks}")
|
||||
currernt_chunks= 0
|
||||
while currernt_chunks < total_chunks:
|
||||
size = int(os.getenv("CHUNK_SIZE"))
|
||||
new_Chunk = total_nodes[currernt_chunks*size:min(len(total_nodes),(currernt_chunks+1)*size)]
|
||||
dataframe = process_nodes(new_Chunk)
|
||||
if cleandata:
|
||||
dataframe = dataframe.dropna()
|
||||
if push:
|
||||
push_to_db_coords("nodes", dataframe, mode)
|
||||
else:
|
||||
write_to_csv("nodes", dataframe,file)
|
||||
progress.update(node_task, advance=1)
|
||||
currernt_chunks =+ 1
|
||||
BUFFER.clear()
|
||||
if DEVIDER_COUNT == 3:
|
||||
try:
|
||||
element = BeautifulSoup((' ').join(BUFFER), 'lxml-xml')
|
||||
total_links = element.find_all("link")
|
||||
except:
|
||||
error_printer("node process failed")
|
||||
total_chunks = math.ceil(len(total_links)/int(os.getenv("CHUNK_SIZE")))
|
||||
success_printer(f"Chunk count: {total_chunks}")
|
||||
if total_chunks > 2:
|
||||
link_task = progress.add_task("[cyan]Processing [bold]links[/bold] chunks...", total=total_chunks)
|
||||
else:
|
||||
link_task = progress.add_task("[cyan]Processing [bold]links[/bold] chunks...", total=total_chunks, visible=False)
|
||||
currernt_chunks= 0
|
||||
while currernt_chunks < total_chunks:
|
||||
size = int(os.getenv("CHUNK_SIZE"))
|
||||
new_Chunk = total_links[currernt_chunks*size:min(len(total_links),(currernt_chunks+1)*size)]
|
||||
if "links" in common_tables:
|
||||
dataframe = process_links(new_Chunk)
|
||||
if cleandata:
|
||||
dataframe = dataframe.dropna()
|
||||
if push:
|
||||
push_to_db_coords("links", dataframe, mode)
|
||||
else:
|
||||
write_to_csv("links", dataframe, file)
|
||||
if "links_attr" in common_tables:
|
||||
dataframe = process_links_attr(new_Chunk)
|
||||
if cleandata:
|
||||
dataframe = dataframe.dropna()
|
||||
if push:
|
||||
push_to_db_coords("links_attr", dataframe, mode)
|
||||
else:
|
||||
write_to_csv("links_attr", dataframe, file)
|
||||
progress.update(link_task, advance=1)
|
||||
currernt_chunks +=1
|
||||
BUFFER.clear()
|
||||
continue
|
||||
continue
|
||||
BUFFER.append(line.strip())
|
||||
console.print("[green]Processing complete![/green]")
|
||||
|
||||
@app.command()
|
||||
def metro(
|
||||
|
Loading…
Reference in New Issue
Block a user