Update load_data to use API
This commit is contained in:
parent
513c8c1a46
commit
3441bf88e2
@ -1,7 +0,0 @@
|
||||
{
|
||||
"crs": 27700,
|
||||
"mapping": [
|
||||
["Date_sou_1", "date_source", "lambda old_, new_: new_"],
|
||||
["Year_C", "date_year", "lambda old_, new_: int(new_)"]
|
||||
]
|
||||
}
|
@ -1,162 +0,0 @@
|
||||
"""Join shapefile data to buildings
|
||||
|
||||
- read through shapes
|
||||
- locate building in current database (by centroid)
|
||||
- update building with data
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
import fiona
|
||||
import psycopg2
|
||||
|
||||
from shapely.geometry import shape
|
||||
|
||||
|
||||
def main(source_file, config_path, transform_config_path):
|
||||
"""Load config, read files and save features to the database
|
||||
"""
|
||||
conf = read_config(config_path)
|
||||
transform_config = read_config(transform_config_path)
|
||||
data_mapping = [
|
||||
# from_fieldname, to_fieldname, mapping(old_val, source_val)->new_val
|
||||
(from_, to_, eval(transform))
|
||||
for from_, to_, transform in transform_config['mapping']
|
||||
]
|
||||
dbconf = conf['database']
|
||||
conn = psycopg2.connect(**dbconf)
|
||||
|
||||
with fiona.open(source_file, 'r') as source:
|
||||
epsg_code = transform_config['crs']
|
||||
|
||||
with conn.cursor() as cur:
|
||||
for feature in source:
|
||||
geometry_id = find_geom(cur, feature, epsg_code)
|
||||
if geometry_id is not None:
|
||||
save_data(
|
||||
cur, feature['properties'], data_mapping, geometry_id)
|
||||
else:
|
||||
print("Skipping", feature['properties'])
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
|
||||
def save_data(cur, props, data_conf, geometry_id):
|
||||
"""Save data to a building
|
||||
"""
|
||||
cur.execute(
|
||||
"""SELECT building_id, building_doc FROM buildings
|
||||
WHERE
|
||||
geometry_id = %s
|
||||
""", (
|
||||
geometry_id,
|
||||
)
|
||||
)
|
||||
building = cur.fetchone()
|
||||
if building is None:
|
||||
doc = update_from_props({}, props, data_conf)
|
||||
cur.execute(
|
||||
"""INSERT INTO buildings
|
||||
(
|
||||
building_doc,
|
||||
geometry_id
|
||||
)
|
||||
VALUES
|
||||
(
|
||||
%s::jsonb,
|
||||
%s
|
||||
)
|
||||
""", (
|
||||
json.dumps(doc),
|
||||
geometry_id
|
||||
)
|
||||
)
|
||||
else:
|
||||
building_id, old_doc = building
|
||||
doc = update_from_props(old_doc, props, data_conf)
|
||||
cur.execute(
|
||||
"""UPDATE buildings
|
||||
SET
|
||||
building_doc = %s::jsonb
|
||||
WHERE
|
||||
building_id = %s
|
||||
""", (
|
||||
json.dumps(doc),
|
||||
building_id
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
def find_geom(cur, feature, epsg_code):
|
||||
"""Find a building geometry
|
||||
"""
|
||||
# match on TOID
|
||||
|
||||
# match on best intersection
|
||||
wkb_hex = shape(feature['geometry']).wkb_hex
|
||||
cur.execute(
|
||||
"""SELECT geometry_id,
|
||||
ST_Area(
|
||||
ST_Intersection(
|
||||
ST_Transform(
|
||||
ST_SetSRID(%s::geometry, %s),
|
||||
3857
|
||||
),
|
||||
geometry_geom
|
||||
)
|
||||
) as intersection_area
|
||||
FROM geometries
|
||||
WHERE
|
||||
ST_Intersects(
|
||||
ST_Transform(
|
||||
ST_SetSRID(%s::geometry, %s),
|
||||
3857
|
||||
),
|
||||
geometry_geom
|
||||
)
|
||||
ORDER BY intersection_area DESC
|
||||
""", (
|
||||
wkb_hex,
|
||||
epsg_code,
|
||||
wkb_hex,
|
||||
epsg_code
|
||||
)
|
||||
)
|
||||
results = cur.fetchall()
|
||||
if results:
|
||||
# print(feature['properties']['fid'], "matched", len(results))
|
||||
return results[0]
|
||||
else:
|
||||
return results
|
||||
|
||||
|
||||
def update_from_props(doc, props, mapping):
|
||||
"""Expect mapping to be a list of transforms
|
||||
- from_fieldname (expect to find in source feature['properties'])
|
||||
- to_fieldname (expect to create or find in existing doc)
|
||||
- transform(old_val, new_val) function/lambda to do any processing
|
||||
"""
|
||||
for from_, to_, transform in mapping:
|
||||
if to_ not in doc:
|
||||
doc[to_] = None
|
||||
doc[to_] = transform(doc[to_], props[from_])
|
||||
return doc
|
||||
|
||||
|
||||
def read_config(config_path):
|
||||
"""Read a JSON config file containing database connection details
|
||||
"""
|
||||
with open(config_path, 'r') as fh:
|
||||
conf = json.load(fh)
|
||||
return conf
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) != 4:
|
||||
print(
|
||||
"Usage: {} ./path/to/source/file.csv ./path/to/dbconfig.json ./path/to/mapping.json".format(
|
||||
os.path.basename(__file__)
|
||||
))
|
||||
exit()
|
||||
main(sys.argv[1], sys.argv[2], sys.argv[3])
|
@ -1,6 +0,0 @@
|
||||
{
|
||||
"crs": 27700,
|
||||
"mapping": [
|
||||
["Storeys", "size_storeys", "lambda old_, new_: int(new_) if new_ is not None else None"]
|
||||
]
|
||||
}
|
140
etl/join_building_data/load_data.py
Normal file
140
etl/join_building_data/load_data.py
Normal file
@ -0,0 +1,140 @@
|
||||
"""Join shapefile data to buildings
|
||||
|
||||
This is effectively an example script using the HTTP API, tailored to particular collected
|
||||
datasets for Camden (age data) and Fitzrovia (number of storeys).
|
||||
|
||||
- read through shapes
|
||||
- locate building by toid
|
||||
- else locate building by representative point
|
||||
- update building with data
|
||||
"""
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
from functools import partial
|
||||
|
||||
import fiona
|
||||
import pyproj
|
||||
import requests
|
||||
from shapely.geometry import shape
|
||||
from shapely.ops import transform
|
||||
|
||||
|
||||
osgb_to_ll = partial(
|
||||
pyproj.transform,
|
||||
pyproj.Proj(init='epsg:27700'),
|
||||
pyproj.Proj(init='epsg:4326')
|
||||
)
|
||||
|
||||
|
||||
def main(base_url, api_key, process, source_file):
|
||||
"""Read from file, update buildings
|
||||
"""
|
||||
with fiona.open(source_file, 'r') as source:
|
||||
for feature in source:
|
||||
props = feature['properties']
|
||||
|
||||
if process == "camden":
|
||||
toid, data = process_camden(props)
|
||||
else:
|
||||
toid, data = process_fitzrovia(props)
|
||||
|
||||
if data is None:
|
||||
continue
|
||||
|
||||
building_id = find_building(toid, feature['geometry'], base_url)
|
||||
if not building_id:
|
||||
print("no_match", toid, "-")
|
||||
continue
|
||||
|
||||
save_data(building_id, data, api_key, base_url)
|
||||
|
||||
|
||||
def process_camden(props):
|
||||
toid = osgb_toid(props['TOID'])
|
||||
data = {
|
||||
'date_year': props['Year_C'],
|
||||
'date_source_detail': props['Date_sou_1']
|
||||
}
|
||||
return toid, data
|
||||
|
||||
|
||||
def process_fitzrovia(props):
|
||||
toid = osgb_toid(props['TOID'])
|
||||
storeys = props['Storeys']
|
||||
|
||||
if storeys is None:
|
||||
return toid, None
|
||||
|
||||
if props['Basement'] == 'Yes':
|
||||
data = {
|
||||
'size_storeys_core': int(storeys) - 1,
|
||||
'size_storeys_basement': 1
|
||||
}
|
||||
else:
|
||||
data = {
|
||||
'size_storeys_core': int(storeys),
|
||||
'size_storeys_basement': 0
|
||||
}
|
||||
return toid, data
|
||||
|
||||
|
||||
def osgb_toid(toid):
|
||||
if toid is None:
|
||||
toid = ""
|
||||
return "osgb" + toid.lstrip("0")
|
||||
|
||||
|
||||
def save_data(building_id, data, api_key, base_url):
|
||||
"""Save data to a building
|
||||
"""
|
||||
r = requests.post(
|
||||
"{}/building/{}.json?api_key={}".format(base_url, building_id, api_key),
|
||||
json=data
|
||||
)
|
||||
|
||||
|
||||
def find_building(toid, geom, base_url):
|
||||
"""Find building_id by TOID or location
|
||||
"""
|
||||
r = requests.get(base_url + "/buildings/reference", params={
|
||||
'key': 'toid',
|
||||
'id': toid
|
||||
})
|
||||
buildings = r.json()
|
||||
if buildings and len(buildings) == 1:
|
||||
bid = buildings[0]['building_id']
|
||||
print("match_by_toid", toid, bid)
|
||||
return bid
|
||||
|
||||
# try location
|
||||
poly = shape(geom)
|
||||
point_osgb = poly.centroid
|
||||
if not poly.contains(point_osgb):
|
||||
point_osgb = poly.representative_point()
|
||||
|
||||
point_ll = transform(osgb_to_ll, point_osgb)
|
||||
r = requests.get(base_url + "/buildings/locate", params={
|
||||
'lng': point_ll.x,
|
||||
'lat': point_ll.y
|
||||
})
|
||||
buildings = r.json()
|
||||
if buildings and len(buildings) == 1:
|
||||
bid = buildings[0]['building_id']
|
||||
print("match_by_location", toid, bid)
|
||||
return bid
|
||||
|
||||
return None
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
try:
|
||||
url, api_key, process, filename = sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]
|
||||
except IndexError:
|
||||
print(
|
||||
"Usage: {} <URL> <api_key> <camden|fitzrovia> ./path/to/camden.shp".format(
|
||||
os.path.basename(__file__)
|
||||
))
|
||||
exit()
|
||||
|
||||
main(url, api_key, process, filename)
|
Loading…
Reference in New Issue
Block a user