mirror of
https://github.com/knejadshamsi/zele-utils.git
synced 2024-11-14 17:40:28 -05:00
network functions added pt.2
This commit is contained in:
parent
e8dda40c9f
commit
24ae7adfc8
|
@ -1,12 +1,12 @@
|
||||||
from .population import process_buffer_population, process_travels, push_to_db_coords,write_to_csv
|
from .population import process_buffer_population, process_travels
|
||||||
from .network import process_network, push_network, network_write
|
from .network import process_nodes, process_links, process_links_attr
|
||||||
from .metro import process_metro, push_metro, metro_write
|
from .metro import process_metro, push_metro, metro_write
|
||||||
from .bus import process_bus, push_bus, bus_write
|
from .bus import process_bus, push_bus, bus_write
|
||||||
from .helpers import buffer_creator
|
from .helpers import buffer_creator,push_to_db_coords,write_to_csv
|
||||||
|
|
||||||
__all__ = [
|
__all__ = [
|
||||||
'process_buffer_population', 'process_travels','push_to_db_coords', 'write_to_csv',
|
'process_buffer_population', 'process_travels','push_to_db_coords', 'write_to_csv',
|
||||||
'process_network', 'push_network', 'network_write',
|
'process_nodes', 'process_links', 'process_links_attr'
|
||||||
'process_metro', 'push_metro', 'metro_write',
|
'process_metro', 'push_metro', 'metro_write',
|
||||||
'process_bus', 'push_bus', 'bus_write',
|
'process_bus', 'push_bus', 'bus_write',
|
||||||
'buffer_creator'
|
'buffer_creator'
|
||||||
|
|
|
@ -3,30 +3,32 @@ import pandas,pyproj, re
|
||||||
from shapely.geometry import Point
|
from shapely.geometry import Point
|
||||||
|
|
||||||
def process_nodes(data):
|
def process_nodes(data):
|
||||||
|
transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True)
|
||||||
ELEMENT_LIST = []
|
ELEMENT_LIST = []
|
||||||
elements = BeautifulSoup(data,'lxml-xml')
|
for node in data:
|
||||||
for element in elements.find_all("node"):
|
attr = dict(node.attrs)
|
||||||
ELEMENT_LIST.append(dict(element.attrs))
|
lat, lon = transformer.transform(attr['x'], attr['y'])
|
||||||
|
attr['coordinates'] = Point(lon,lat)
|
||||||
|
ELEMENT_LIST.append(attr)
|
||||||
return pandas.DataFrame(ELEMENT_LIST)
|
return pandas.DataFrame(ELEMENT_LIST)
|
||||||
|
|
||||||
def process_links(data):
|
def process_links(data):
|
||||||
ELEMENT_LIST = []
|
ELEMENT_LIST = []
|
||||||
elements = BeautifulSoup(data,'lxml-xml')
|
for element in data:
|
||||||
for element in elements.find_all("link"):
|
|
||||||
ELEMENT_LIST.append(dict(element.attrs))
|
ELEMENT_LIST.append(dict(element.attrs))
|
||||||
return pandas.DataFrame(ELEMENT_LIST)
|
return pandas.DataFrame(ELEMENT_LIST)
|
||||||
|
|
||||||
def process_links_attr(data):
|
def process_links_attr(data):
|
||||||
ELEMENT_LIST = []
|
ELEMENT_LIST = []
|
||||||
elements = BeautifulSoup(data,'lxml-xml')
|
for element in data:
|
||||||
for element in elements.find_all("link"):
|
if not element.find("attributes"):
|
||||||
ELEMENT_DICT = {}
|
|
||||||
if element.find_all("attribute"):
|
|
||||||
for attr in element.find_all("attribute"):
|
|
||||||
ELEMENT_DICT.update({attr["name"]: attr.get_text()})
|
|
||||||
else:
|
|
||||||
continue
|
continue
|
||||||
ELEMENT_DICT["id"]=element.getattr("id")
|
ELEMENT_DICT = {"id": element.get("id")}
|
||||||
|
for attr in element.find("attributes").find_all("attribute"):
|
||||||
|
attr_name = attr.get("name")
|
||||||
|
attr_value = attr.get_text()
|
||||||
|
attr_name = attr_name.replace(":", "_")
|
||||||
|
ELEMENT_DICT[attr_name] = attr_value
|
||||||
|
|
||||||
ELEMENT_LIST.append(ELEMENT_DICT)
|
ELEMENT_LIST.append(ELEMENT_DICT)
|
||||||
|
|
||||||
return pandas.DataFrame(ELEMENT_LIST)
|
return pandas.DataFrame(ELEMENT_LIST)
|
130
main.py
130
main.py
|
@ -1,4 +1,5 @@
|
||||||
import os, typer
|
import os,math,typer
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from rich import print
|
from rich import print
|
||||||
from typing_extensions import Annotated
|
from typing_extensions import Annotated
|
||||||
|
@ -14,6 +15,8 @@ from functions import buffer_creator, process_buffer_population,process_travels,
|
||||||
from functions import process_nodes,process_links,process_links_attr
|
from functions import process_nodes,process_links,process_links_attr
|
||||||
from styles import print_help
|
from styles import print_help
|
||||||
|
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
called= "population"
|
called= "population"
|
||||||
app = typer.Typer(rich_markup_mode="rich")
|
app = typer.Typer(rich_markup_mode="rich")
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
@ -94,7 +97,10 @@ def population(
|
||||||
TaskProgressColumn(),
|
TaskProgressColumn(),
|
||||||
console=console
|
console=console
|
||||||
) as progress:
|
) as progress:
|
||||||
task = progress.add_task("[cyan]Processing chunks...", total=max_chunk)
|
if max_chunk > 2:
|
||||||
|
task = progress.add_task("[cyan]Processing chunks...", total=max_chunk)
|
||||||
|
else:
|
||||||
|
task = progress.add_task("[cyan]Processing chunks...", total=max_chunk, visible=False)
|
||||||
|
|
||||||
current_chunk = 0
|
current_chunk = 0
|
||||||
processed_line = 0
|
processed_line = 0
|
||||||
|
@ -166,47 +172,91 @@ def network(
|
||||||
error_printer("File empty")
|
error_printer("File empty")
|
||||||
raise typer.Exit()
|
raise typer.Exit()
|
||||||
else:
|
else:
|
||||||
success_printer(f"{count + 1} lines read")
|
success_printer(f"{count + 1} lines found")
|
||||||
f.close()
|
f.close()
|
||||||
BUFFER = []
|
BUFFER = []
|
||||||
DEVIDER_COUNT = 0
|
DEVIDER_COUNT = 0
|
||||||
with open(file,'r',encoding='utf-8') as f:
|
with Progress(
|
||||||
for line in f:
|
SpinnerColumn(),
|
||||||
if line.strip() == os.getenv("DIVIDER"):
|
TextColumn("[progress.description]{task.description}"),
|
||||||
DEVIDER_COUNT = DEVIDER_COUNT + 1
|
BarColumn(),
|
||||||
if DEVIDER_COUNT == 2 and "nodes" in common_tables:
|
TaskProgressColumn(),
|
||||||
dataframe = process_nodes(BUFFER)
|
console=console
|
||||||
if cleandata:
|
) as progress:
|
||||||
dataframe = dataframe.dropna()
|
with open(file,'r',encoding='utf-8') as f:
|
||||||
if push:
|
for line in f:
|
||||||
push_to_db_coords("nodes", dataframe, mode)
|
if line.strip() == os.getenv("DIVIDER"):
|
||||||
else:
|
DEVIDER_COUNT = DEVIDER_COUNT + 1
|
||||||
write_to_csv("nodes", dataframe,file)
|
if DEVIDER_COUNT == 1 or DEVIDER_COUNT > 3: continue
|
||||||
BUFFER = []
|
if DEVIDER_COUNT == 2:
|
||||||
if DEVIDER_COUNT == 3:
|
if "node" not in common_tables:
|
||||||
if "links" in common_tables:
|
BUFFER.clear()
|
||||||
dataframe = process_links(BUFFER)
|
continue
|
||||||
if cleandata:
|
else:
|
||||||
dataframe = dataframe.dropna()
|
try:
|
||||||
if push:
|
element = BeautifulSoup((' ').join(BUFFER), 'lxml-xml')
|
||||||
push_to_db_coords("links", dataframe, mode)
|
total_nodes = element.find_all("node")
|
||||||
else:
|
except:
|
||||||
write_to_csv("links", dataframe, file)
|
error_printer("node process failed")
|
||||||
if "links_attr" in common_tables:
|
total_chunks = math.ceil(len(total_nodes)/int(os.getenv("CHUNK_SIZE")))
|
||||||
dataframe = process_links_attr(BUFFER)
|
if total_chunks > 2:
|
||||||
if cleandata:
|
node_task = progress.add_task("[cyan]Processing [bold]nodes[/bold] chunks...", total=total_chunks)
|
||||||
dataframe = dataframe.dropna()
|
else:
|
||||||
if push:
|
node_task = progress.add_task("[cyan]Processing [bold]nodes[/bold] chunks...", total=total_chunks, visible=False)
|
||||||
push_to_db_coords("links_attr", dataframe, mode)
|
success_printer(f"Chunk count: {total_chunks}")
|
||||||
else:
|
currernt_chunks= 0
|
||||||
write_to_csv("links_attr", dataframe, file)
|
while currernt_chunks < total_chunks:
|
||||||
BUFFER = []
|
size = int(os.getenv("CHUNK_SIZE"))
|
||||||
if DEVIDER_COUNT < 1:
|
new_Chunk = total_nodes[currernt_chunks*size:min(len(total_nodes),(currernt_chunks+1)*size)]
|
||||||
continue
|
dataframe = process_nodes(new_Chunk)
|
||||||
if DEVIDER_COUNT > 2:
|
if cleandata:
|
||||||
continue
|
dataframe = dataframe.dropna()
|
||||||
BUFFER.append(line)
|
if push:
|
||||||
console.print("[green]Processing complete![/green]")
|
push_to_db_coords("nodes", dataframe, mode)
|
||||||
|
else:
|
||||||
|
write_to_csv("nodes", dataframe,file)
|
||||||
|
progress.update(node_task, advance=1)
|
||||||
|
currernt_chunks =+ 1
|
||||||
|
BUFFER.clear()
|
||||||
|
if DEVIDER_COUNT == 3:
|
||||||
|
try:
|
||||||
|
element = BeautifulSoup((' ').join(BUFFER), 'lxml-xml')
|
||||||
|
total_links = element.find_all("link")
|
||||||
|
except:
|
||||||
|
error_printer("node process failed")
|
||||||
|
total_chunks = math.ceil(len(total_links)/int(os.getenv("CHUNK_SIZE")))
|
||||||
|
success_printer(f"Chunk count: {total_chunks}")
|
||||||
|
if total_chunks > 2:
|
||||||
|
link_task = progress.add_task("[cyan]Processing [bold]links[/bold] chunks...", total=total_chunks)
|
||||||
|
else:
|
||||||
|
link_task = progress.add_task("[cyan]Processing [bold]links[/bold] chunks...", total=total_chunks, visible=False)
|
||||||
|
currernt_chunks= 0
|
||||||
|
while currernt_chunks < total_chunks:
|
||||||
|
size = int(os.getenv("CHUNK_SIZE"))
|
||||||
|
new_Chunk = total_links[currernt_chunks*size:min(len(total_links),(currernt_chunks+1)*size)]
|
||||||
|
if "links" in common_tables:
|
||||||
|
dataframe = process_links(new_Chunk)
|
||||||
|
if cleandata:
|
||||||
|
dataframe = dataframe.dropna()
|
||||||
|
if push:
|
||||||
|
push_to_db_coords("links", dataframe, mode)
|
||||||
|
else:
|
||||||
|
write_to_csv("links", dataframe, file)
|
||||||
|
if "links_attr" in common_tables:
|
||||||
|
dataframe = process_links_attr(new_Chunk)
|
||||||
|
if cleandata:
|
||||||
|
dataframe = dataframe.dropna()
|
||||||
|
if push:
|
||||||
|
push_to_db_coords("links_attr", dataframe, mode)
|
||||||
|
else:
|
||||||
|
write_to_csv("links_attr", dataframe, file)
|
||||||
|
progress.update(link_task, advance=1)
|
||||||
|
currernt_chunks +=1
|
||||||
|
BUFFER.clear()
|
||||||
|
continue
|
||||||
|
continue
|
||||||
|
BUFFER.append(line.strip())
|
||||||
|
console.print("[green]Processing complete![/green]")
|
||||||
|
|
||||||
@app.command()
|
@app.command()
|
||||||
def metro(
|
def metro(
|
||||||
|
|
Loading…
Reference in New Issue
Block a user