network functions added pt.2

2024-11-14 17:40:28 -05:00 · 2024-09-12 18:44:32 -04:00 · 2024-09-12 18:44:32 -04:00 · 24ae7adfc8
commit 24ae7adfc8
parent e8dda40c9f
3 changed files with 110 additions and 58 deletions
--- a/functions/Init.py
+++ b/functions/Init.py
@ -1,12 +1,12 @@
-from .population import process_buffer_population, process_travels, push_to_db_coords,write_to_csv
-from .network import process_network, push_network, network_write
+from .population import process_buffer_population, process_travels
+from .network import process_nodes, process_links, process_links_attr
 from .metro import process_metro, push_metro, metro_write
 from .bus import process_bus, push_bus, bus_write
-from .helpers import buffer_creator
+from .helpers import buffer_creator,push_to_db_coords,write_to_csv

 __all__ = [
    'process_buffer_population', 'process_travels','push_to_db_coords', 'write_to_csv', 
-    'process_network', 'push_network', 'network_write',
+    'process_nodes', 'process_links', 'process_links_attr'
    'process_metro', 'push_metro', 'metro_write',
    'process_bus', 'push_bus', 'bus_write',
    'buffer_creator'
--- a/functions/network.py
+++ b/functions/network.py
@ -3,30 +3,32 @@ import pandas,pyproj, re
 from shapely.geometry import Point

 def process_nodes(data):
+    transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True)
    ELEMENT_LIST = []
-    elements = BeautifulSoup(data,'lxml-xml')
-    for element in elements.find_all("node"):
-        ELEMENT_LIST.append(dict(element.attrs))
+    for node in data:
+        attr = dict(node.attrs)
+        lat, lon = transformer.transform(attr['x'], attr['y'])
+        attr['coordinates'] = Point(lon,lat)
+        ELEMENT_LIST.append(attr)
    return pandas.DataFrame(ELEMENT_LIST)

 def process_links(data):
    ELEMENT_LIST = []
-    elements = BeautifulSoup(data,'lxml-xml')
-    for element in elements.find_all("link"):
+    for element in data:
        ELEMENT_LIST.append(dict(element.attrs))
    return pandas.DataFrame(ELEMENT_LIST)

 def process_links_attr(data):
    ELEMENT_LIST = []
-    elements = BeautifulSoup(data,'lxml-xml')
-    for element in elements.find_all("link"):
-        ELEMENT_DICT = {}
-        if element.find_all("attribute"):
-            for attr in element.find_all("attribute"):
-                ELEMENT_DICT.update({attr["name"]: attr.get_text()})
-        else:
+    for element in data:
+        if not element.find("attributes"):
            continue
-        ELEMENT_DICT["id"]=element.getattr("id")
+        ELEMENT_DICT = {"id": element.get("id")}
+        for attr in element.find("attributes").find_all("attribute"):
+            attr_name = attr.get("name")
+            attr_value = attr.get_text()
+            attr_name = attr_name.replace(":", "_")
+            ELEMENT_DICT[attr_name] = attr_value
+        
        ELEMENT_LIST.append(ELEMENT_DICT)
-
    return pandas.DataFrame(ELEMENT_LIST)
--- a/main.py
+++ b/main.py
@ -1,4 +1,5 @@
-import os, typer
+import os,math,typer
+from bs4 import BeautifulSoup
 from dotenv import load_dotenv
 from rich import print
 from typing_extensions import Annotated
@ -14,6 +15,8 @@ from functions import buffer_creator, process_buffer_population,process_travels,
 from functions import process_nodes,process_links,process_links_attr
 from styles import print_help

+import xml.etree.ElementTree as ET
+
 called= "population"
 app = typer.Typer(rich_markup_mode="rich")
 load_dotenv()
@ -94,7 +97,10 @@ def population(
        TaskProgressColumn(),
        console=console
    ) as progress:
-        task = progress.add_task("[cyan]Processing chunks...", total=max_chunk)
+        if max_chunk > 2:
+            task = progress.add_task("[cyan]Processing chunks...", total=max_chunk)
+        else:
+            task = progress.add_task("[cyan]Processing chunks...", total=max_chunk, visible=False)

        current_chunk = 0
        processed_line = 0
@ -166,47 +172,91 @@ def network(
        error_printer("File empty")
        raise typer.Exit()
    else:
-        success_printer(f"{count + 1} lines read")
+        success_printer(f"{count + 1} lines found")
    f.close()
    BUFFER = []
    DEVIDER_COUNT = 0
-    with open(file,'r',encoding='utf-8') as f:
-        for line in f:
-            if line.strip() == os.getenv("DIVIDER"):
-                DEVIDER_COUNT = DEVIDER_COUNT + 1
-            if DEVIDER_COUNT == 2 and "nodes" in common_tables:
-                dataframe = process_nodes(BUFFER)
-                if cleandata:
-                    dataframe = dataframe.dropna()
-                if push:
-                    push_to_db_coords("nodes", dataframe, mode)
-                else:
-                    write_to_csv("nodes", dataframe,file)
-                BUFFER = []
-            if DEVIDER_COUNT == 3:
-                if "links" in common_tables:
-                    dataframe = process_links(BUFFER)
-                    if cleandata:
-                        dataframe = dataframe.dropna()
-                    if push:
-                        push_to_db_coords("links", dataframe, mode)
-                    else:
-                        write_to_csv("links", dataframe, file)
-                if "links_attr" in common_tables:
-                    dataframe = process_links_attr(BUFFER)
-                    if cleandata:
-                        dataframe = dataframe.dropna()
-                    if push:
-                        push_to_db_coords("links_attr", dataframe, mode)
-                    else:
-                        write_to_csv("links_attr", dataframe, file)
-                BUFFER = []
-            if DEVIDER_COUNT < 1:
-                continue
-            if DEVIDER_COUNT > 2:
-                continue
-            BUFFER.append(line)
-    console.print("[green]Processing complete![/green]")
+    with Progress(
+        SpinnerColumn(),
+        TextColumn("[progress.description]{task.description}"),
+        BarColumn(),
+        TaskProgressColumn(),
+        console=console
+    ) as progress:
+        with open(file,'r',encoding='utf-8') as f:
+            for line in f:
+                if line.strip() == os.getenv("DIVIDER"):
+                    DEVIDER_COUNT = DEVIDER_COUNT + 1
+                    if DEVIDER_COUNT == 1 or DEVIDER_COUNT > 3: continue
+                    if DEVIDER_COUNT == 2:
+                        if "node" not in common_tables:
+                            BUFFER.clear()
+                            continue
+                        else:
+                            try:
+                                element = BeautifulSoup((' ').join(BUFFER), 'lxml-xml')
+                                total_nodes = element.find_all("node")
+                            except:
+                                error_printer("node process failed")
+                            total_chunks = math.ceil(len(total_nodes)/int(os.getenv("CHUNK_SIZE")))
+                            if total_chunks > 2:
+                                node_task = progress.add_task("[cyan]Processing [bold]nodes[/bold] chunks...", total=total_chunks)
+                            else:
+                                node_task = progress.add_task("[cyan]Processing [bold]nodes[/bold] chunks...", total=total_chunks, visible=False)
+                            success_printer(f"Chunk count: {total_chunks}")
+                            currernt_chunks= 0
+                            while currernt_chunks < total_chunks:
+                                size = int(os.getenv("CHUNK_SIZE"))
+                                new_Chunk = total_nodes[currernt_chunks*size:min(len(total_nodes),(currernt_chunks+1)*size)]
+                                dataframe = process_nodes(new_Chunk)
+                                if cleandata:
+                                    dataframe = dataframe.dropna()
+                                if push:
+                                    push_to_db_coords("nodes", dataframe, mode)
+                                else:
+                                    write_to_csv("nodes", dataframe,file)
+                                progress.update(node_task, advance=1)
+                                currernt_chunks =+ 1
+                            BUFFER.clear()
+                    if DEVIDER_COUNT == 3:
+                        try:
+                            element = BeautifulSoup((' ').join(BUFFER), 'lxml-xml')
+                            total_links = element.find_all("link")
+                        except:
+                            error_printer("node process failed")
+                        total_chunks = math.ceil(len(total_links)/int(os.getenv("CHUNK_SIZE")))
+                        success_printer(f"Chunk count: {total_chunks}")
+                        if total_chunks > 2:
+                            link_task = progress.add_task("[cyan]Processing [bold]links[/bold] chunks...", total=total_chunks)
+                        else:
+                            link_task = progress.add_task("[cyan]Processing [bold]links[/bold] chunks...", total=total_chunks, visible=False)
+                        currernt_chunks= 0
+                        while currernt_chunks < total_chunks:
+                            size = int(os.getenv("CHUNK_SIZE"))
+                            new_Chunk = total_links[currernt_chunks*size:min(len(total_links),(currernt_chunks+1)*size)]
+                            if "links" in common_tables:
+                                dataframe = process_links(new_Chunk)
+                                if cleandata:
+                                    dataframe = dataframe.dropna()
+                                if push:
+                                    push_to_db_coords("links", dataframe, mode)
+                                else:
+                                    write_to_csv("links", dataframe, file)
+                            if "links_attr" in common_tables:
+                                dataframe = process_links_attr(new_Chunk)
+                                if cleandata:
+                                    dataframe = dataframe.dropna()
+                                if push:
+                                    push_to_db_coords("links_attr", dataframe, mode)
+                                else:
+                                    write_to_csv("links_attr", dataframe, file)
+                            progress.update(link_task, advance=1)
+                            currernt_chunks +=1
+                        BUFFER.clear()
+                        continue
+                    continue
+                BUFFER.append(line.strip())
+        console.print("[green]Processing complete![/green]")

@app.command()
 def metro(