2024-09-10 17:40:50 -04:00
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
import pandas,geopandas, pyproj, re, os, datetime
|
|
|
|
from shapely.geometry import Point
|
|
|
|
from sqlalchemy import create_engine
|
|
|
|
from geoalchemy2 import Geometry, WKTElement
|
2024-09-07 18:13:36 -04:00
|
|
|
|
2024-09-10 17:40:50 -04:00
|
|
|
def camel_to_snake(name):
|
|
|
|
return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
|
|
|
|
|
|
|
|
def process_buffer_population(data):
|
|
|
|
transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True)
|
|
|
|
PERSON_LIST = []
|
2024-09-10 20:01:51 -04:00
|
|
|
elements = BeautifulSoup(data, 'lxml-xml')
|
2024-09-10 17:40:50 -04:00
|
|
|
for person in elements.find_all('person'):
|
|
|
|
person_obj = {}
|
|
|
|
person_obj['id'] = person['id']
|
|
|
|
activity = person.find('plan').find('activity')
|
|
|
|
lat, lon = transformer.transform(activity['x'], activity['y'])
|
|
|
|
person_obj['coordinates'] = Point(lon,lat)
|
|
|
|
person_obj['time'] = activity['end_time']
|
|
|
|
for attr in person.find_all('attribute'):
|
|
|
|
person_obj[camel_to_snake(attr['name'])] = attr.get_text()
|
|
|
|
PERSON_LIST.append(person_obj)
|
|
|
|
return pandas.DataFrame(PERSON_LIST)
|
2024-09-07 18:13:36 -04:00
|
|
|
|
2024-09-10 18:28:23 -04:00
|
|
|
def process_travels(data):
|
|
|
|
transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True)
|
2024-09-10 20:01:51 -04:00
|
|
|
activities_list = []
|
|
|
|
elements = BeautifulSoup(data, 'lxml-xml')
|
2024-09-10 18:28:23 -04:00
|
|
|
for person in elements.find_all('person'):
|
|
|
|
person_id = person['id']
|
|
|
|
plan = person.find('plan')
|
|
|
|
|
|
|
|
for i, activity in enumerate(plan.find_all('activity'), start=1):
|
|
|
|
activity_obj = {
|
|
|
|
'id': person_id,
|
|
|
|
'activity_order': i,
|
|
|
|
'activity_type': activity.get('type'),
|
|
|
|
'facility': activity.get('facility'),
|
|
|
|
'start_time': activity.get('start_time') or None,
|
|
|
|
'end_time': activity.get('end_time') or None
|
|
|
|
}
|
|
|
|
|
|
|
|
if 'x' in activity.attrs and 'y' in activity.attrs:
|
|
|
|
lon, lat = transformer.transform(float(activity['x']), float(activity['y']))
|
|
|
|
activity_obj['coordinates'] = Point(lon, lat)
|
|
|
|
else:
|
|
|
|
activity_obj['coordinates'] = None
|
|
|
|
|
|
|
|
activities_list.append(activity_obj)
|
|
|
|
|
|
|
|
return pandas.DataFrame(activities_list)
|
|
|
|
|
|
|
|
|
|
|
|
def push_population(name,data,mode):
|
2024-09-10 17:40:50 -04:00
|
|
|
GDF = geopandas.GeoDataFrame(data, crs='EPSG:4326')
|
|
|
|
GDF['geom'] = GDF['coordinates'].apply(lambda x: WKTElement(x.wkt, srid=os.getenv("SRID")))
|
|
|
|
engine = create_engine(f'postgresql://{os.getenv("USER")}:{os.getenv("PASS")}@{os.getenv("HOST_NAME")}/{os.getenv("DATA_BASE")}', echo=False)
|
|
|
|
GDF.to_sql(
|
2024-09-10 18:28:23 -04:00
|
|
|
name=name,
|
2024-09-10 17:40:50 -04:00
|
|
|
con=engine,
|
|
|
|
if_exists=mode,
|
|
|
|
chunksize=os.getenv("CHUNK_SIZE"),
|
|
|
|
dtype={'geom': Geometry('Point', srid=os.getenv("SRID"))},
|
|
|
|
index=False
|
|
|
|
)
|
2024-09-07 18:13:36 -04:00
|
|
|
|
2024-09-10 18:28:23 -04:00
|
|
|
def write_population(name,data, file):
|
2024-09-10 17:40:50 -04:00
|
|
|
directory = file.parent
|
|
|
|
id = datetime.datetime.now().strftime("%Y%m%d")
|
2024-09-10 18:28:23 -04:00
|
|
|
csv = directory / (file.stem + f"-{name}-{id}.csv")
|
2024-09-10 17:40:50 -04:00
|
|
|
if csv.exists():
|
|
|
|
data.to_csv(csv, mode='a',index=False)
|
|
|
|
else:
|
|
|
|
data.to_csv(csv,index=False)
|