from bs4 import BeautifulSoup import pandas,pyproj, re from shapely.geometry import Point def process_nodes(data): transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True) ELEMENT_LIST = [] for node in data: attr = dict(node.attrs) lat, lon = transformer.transform(attr['x'], attr['y']) attr['coordinates'] = Point(lon,lat) ELEMENT_LIST.append(attr) return pandas.DataFrame(ELEMENT_LIST) def process_links(data): ELEMENT_LIST = [] for element in data: ELEMENT_LIST.append(dict(element.attrs)) return pandas.DataFrame(ELEMENT_LIST) def process_links_attr(data): ELEMENT_LIST = [] for element in data: if not element.find("attributes"): continue ELEMENT_DICT = {"id": element.get("id")} for attr in element.find("attributes").find_all("attribute"): attr_name = attr.get("name") attr_value = attr.get_text() attr_name = attr_name.replace(":", "_") ELEMENT_DICT[attr_name] = attr_value ELEMENT_LIST.append(ELEMENT_DICT) return pandas.DataFrame(ELEMENT_LIST)