zele-utils/functions/population.py

53 lines
2.0 KiB
Python
Raw Normal View History

2024-09-10 17:40:50 -04:00
from bs4 import BeautifulSoup
2024-09-11 10:56:17 -04:00
import pandas,pyproj, re
2024-09-10 17:40:50 -04:00
from shapely.geometry import Point
2024-09-07 18:13:36 -04:00
2024-09-10 17:40:50 -04:00
def camel_to_snake(name):
return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
def process_buffer_population(data):
transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True)
PERSON_LIST = []
2024-09-10 20:01:51 -04:00
elements = BeautifulSoup(data, 'lxml-xml')
2024-09-10 17:40:50 -04:00
for person in elements.find_all('person'):
person_obj = {}
person_obj['id'] = person['id']
activity = person.find('plan').find('activity')
lat, lon = transformer.transform(activity['x'], activity['y'])
person_obj['coordinates'] = Point(lon,lat)
person_obj['time'] = activity['end_time']
for attr in person.find_all('attribute'):
person_obj[camel_to_snake(attr['name'])] = attr.get_text()
PERSON_LIST.append(person_obj)
return pandas.DataFrame(PERSON_LIST)
2024-09-07 18:13:36 -04:00
2024-09-10 18:28:23 -04:00
def process_travels(data):
transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True)
2024-09-10 20:01:51 -04:00
activities_list = []
elements = BeautifulSoup(data, 'lxml-xml')
2024-09-10 18:28:23 -04:00
for person in elements.find_all('person'):
person_id = person['id']
plan = person.find('plan')
for i, activity in enumerate(plan.find_all('activity'), start=1):
activity_obj = {
'id': person_id,
'activity_order': i,
'activity_type': activity.get('type'),
'facility': activity.get('facility'),
'start_time': activity.get('start_time') or None,
'end_time': activity.get('end_time') or None
}
if 'x' in activity.attrs and 'y' in activity.attrs:
lon, lat = transformer.transform(float(activity['x']), float(activity['y']))
activity_obj['coordinates'] = Point(lon, lat)
else:
activity_obj['coordinates'] = None
activities_list.append(activity_obj)
return pandas.DataFrame(activities_list)