2024-09-11 10:56:17 -04:00
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
import pandas,pyproj, re
|
|
|
|
from shapely.geometry import Point
|
2024-09-07 18:13:36 -04:00
|
|
|
|
2024-09-11 10:56:17 -04:00
|
|
|
def process_nodes(data):
|
2024-09-12 18:44:32 -04:00
|
|
|
transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True)
|
2024-09-11 10:56:17 -04:00
|
|
|
ELEMENT_LIST = []
|
2024-09-12 18:44:32 -04:00
|
|
|
for node in data:
|
|
|
|
attr = dict(node.attrs)
|
|
|
|
lat, lon = transformer.transform(attr['x'], attr['y'])
|
|
|
|
attr['coordinates'] = Point(lon,lat)
|
|
|
|
ELEMENT_LIST.append(attr)
|
2024-09-11 10:56:17 -04:00
|
|
|
return pandas.DataFrame(ELEMENT_LIST)
|
2024-09-07 18:13:36 -04:00
|
|
|
|
2024-09-11 10:56:17 -04:00
|
|
|
def process_links(data):
|
|
|
|
ELEMENT_LIST = []
|
2024-09-12 18:44:32 -04:00
|
|
|
for element in data:
|
2024-09-11 10:56:17 -04:00
|
|
|
ELEMENT_LIST.append(dict(element.attrs))
|
|
|
|
return pandas.DataFrame(ELEMENT_LIST)
|
2024-09-07 18:13:36 -04:00
|
|
|
|
2024-09-11 10:56:17 -04:00
|
|
|
def process_links_attr(data):
|
|
|
|
ELEMENT_LIST = []
|
2024-09-12 18:44:32 -04:00
|
|
|
for element in data:
|
|
|
|
if not element.find("attributes"):
|
2024-09-11 10:56:17 -04:00
|
|
|
continue
|
2024-09-12 18:44:32 -04:00
|
|
|
ELEMENT_DICT = {"id": element.get("id")}
|
|
|
|
for attr in element.find("attributes").find_all("attribute"):
|
|
|
|
attr_name = attr.get("name")
|
|
|
|
attr_value = attr.get_text()
|
|
|
|
attr_name = attr_name.replace(":", "_")
|
|
|
|
ELEMENT_DICT[attr_name] = attr_value
|
|
|
|
|
2024-09-11 10:56:17 -04:00
|
|
|
ELEMENT_LIST.append(ELEMENT_DICT)
|
|
|
|
return pandas.DataFrame(ELEMENT_LIST)
|