colouring-montreal/etl/1_load_geometries.py

106 lines
2.4 KiB
Python
Raw Normal View History

2018-08-01 08:12:56 -04:00
"""Load geometries from GeoJSON to Postgres
2018-07-17 04:09:13 -04:00
2018-08-01 08:12:56 -04:00
- create 'geometry' record with {
id: <polygon-guid>,
doc: {source_id: <toid>},
geom: <geom-wkb_hex>
}
2018-09-09 06:32:27 -04:00
- create corresponding 'building' record with {
id: <building-guid>,
doc: {},
geom_id: <polygon-guid>
}
2018-07-17 04:09:13 -04:00
"""
import glob
2018-08-01 08:12:56 -04:00
import json
2018-07-17 04:09:13 -04:00
import os
import sys
2018-08-01 08:12:56 -04:00
import fiona
2018-07-17 04:09:13 -04:00
import psycopg2
2018-08-01 08:12:56 -04:00
from shapely.geometry import shape
2018-07-17 04:09:13 -04:00
def main(source_dir, config_path):
"""Load config, read files and save features to the database
"""
conf = read_config(config_path)
dbconf = conf['database']
conn = psycopg2.connect(**dbconf)
2018-08-01 08:12:56 -04:00
source_files = glob.glob("{}/*.geojson".format(source_dir))
2018-07-17 04:09:13 -04:00
2018-08-01 08:49:16 -04:00
loaded = {}
2018-08-01 08:12:56 -04:00
for source_file in source_files:
with fiona.open(source_file, 'r') as source:
2018-07-17 04:09:13 -04:00
with conn.cursor() as cur:
2018-08-01 08:12:56 -04:00
for feature in source:
2018-08-01 08:49:16 -04:00
fid = feature['properties']['fid']
if fid not in loaded:
save_feature(cur, feature)
loaded[fid] = True
else:
print("Skipping", fid)
2018-07-17 04:09:13 -04:00
conn.commit()
2018-08-01 08:49:16 -04:00
conn.close()
2018-07-17 04:09:13 -04:00
def save_feature(cur, feature):
"""Save a feature with geometry and source id
"""
2018-08-01 08:12:56 -04:00
cur.execute(
"""INSERT INTO geometries
2018-07-17 04:09:13 -04:00
(
geometry_doc,
geometry_geom
)
VALUES
(
2018-08-01 08:49:16 -04:00
%s::jsonb,
2018-07-17 04:09:13 -04:00
ST_SetSRID(%s::geometry, %s)
)
2018-09-09 06:32:27 -04:00
RETURNING geometry_id
2018-07-17 04:09:13 -04:00
""", (
json.dumps({
'source_id': feature['properties']['fid']
}),
2018-08-01 08:12:56 -04:00
shape(feature['geometry']).wkb_hex,
2018-07-17 04:09:13 -04:00
3857
)
)
2018-09-09 06:32:27 -04:00
geom_id, = cur.fetchone()
cur.execute(
"""INSERT INTO buildings
(
building_doc,
geometry_id
)
VALUES
(
%s::jsonb,
%s
)
""", (
json.dumps({}),
geom_id
)
)
2018-07-17 04:09:13 -04:00
def read_config(config_path):
"""Read a JSON config file containing database connection details
"""
2018-08-01 08:12:56 -04:00
with open(config_path, 'r') as conf_fh:
conf = json.load(conf_fh)
2018-07-17 04:09:13 -04:00
return conf
if __name__ == '__main__':
if len(sys.argv) != 3:
print("Usage: {} ./path/to/source/dir ./path/to/dbconfig.json".format(
os.path.basename(__file__)
))
exit()
main(sys.argv[1], sys.argv[2])