zele-utils/functions/population.py

46 lines
1.8 KiB
Python
Raw Normal View History

2024-09-10 17:40:50 -04:00
from bs4 import BeautifulSoup
import pandas,geopandas, pyproj, re, os, datetime
from shapely.geometry import Point
from sqlalchemy import create_engine
from geoalchemy2 import Geometry, WKTElement
2024-09-07 18:13:36 -04:00
2024-09-10 17:40:50 -04:00
def camel_to_snake(name):
return re.sub(r'(?<!^)(?=[A-Z])', '_', name).lower()
def process_buffer_population(data):
transformer = pyproj.Transformer.from_crs('EPSG:2950', 'EPSG:4326', always_xy=True)
PERSON_LIST = []
elements = BeautifulSoup(data,'html.parser')
for person in elements.find_all('person'):
person_obj = {}
person_obj['id'] = person['id']
activity = person.find('plan').find('activity')
lat, lon = transformer.transform(activity['x'], activity['y'])
person_obj['coordinates'] = Point(lon,lat)
person_obj['time'] = activity['end_time']
for attr in person.find_all('attribute'):
person_obj[camel_to_snake(attr['name'])] = attr.get_text()
PERSON_LIST.append(person_obj)
return pandas.DataFrame(PERSON_LIST)
2024-09-07 18:13:36 -04:00
def push_population(data,mode):
2024-09-10 17:40:50 -04:00
GDF = geopandas.GeoDataFrame(data, crs='EPSG:4326')
GDF['geom'] = GDF['coordinates'].apply(lambda x: WKTElement(x.wkt, srid=os.getenv("SRID")))
engine = create_engine(f'postgresql://{os.getenv("USER")}:{os.getenv("PASS")}@{os.getenv("HOST_NAME")}/{os.getenv("DATA_BASE")}', echo=False)
GDF.to_sql(
name='agents',
con=engine,
if_exists=mode,
chunksize=os.getenv("CHUNK_SIZE"),
dtype={'geom': Geometry('Point', srid=os.getenv("SRID"))},
index=False
)
2024-09-07 18:13:36 -04:00
2024-09-10 17:40:50 -04:00
def write_population(data, file):
directory = file.parent
id = datetime.datetime.now().strftime("%Y%m%d")
csv = directory / (file.stem + id +".csv")
if csv.exists():
data.to_csv(csv, mode='a',index=False)
else:
data.to_csv(csv,index=False)