colouring-montreal/etl/load_uprns.py

131 lines
3.0 KiB
Python
Raw Normal View History

2018-08-01 08:49:03 -04:00
"""Load buildings from CSV to Postgres
- create 'building' record with {
geometry_id: <polygon-guid>,
all_uprns: [<uprn>, ...],
uprn: <min_uprn>
}
"""
import csv
import json
import os
import sys
import psycopg2
def main(source_file, config_path):
"""Load config, read files and save features to the database
"""
conf = read_config(config_path)
dbconf = conf['database']
conn = psycopg2.connect(**dbconf)
with conn.cursor() as cur:
with open(source_file, 'r') as source_fh:
reader = csv.reader(source_fh)
2018-08-01 10:48:31 -04:00
next(reader)
2018-08-01 08:49:03 -04:00
for uprn, _, _, lat, lng in reader:
2018-08-01 10:48:31 -04:00
geometry_id = find_geom(cur, float(lat), float(lng))
2018-08-01 08:49:03 -04:00
if geometry_id is not None:
2018-08-01 10:48:31 -04:00
save_building(cur, int(uprn), geometry_id)
2018-08-01 08:49:03 -04:00
else:
print("No geometry for", uprn)
conn.commit()
conn.close()
def find_geom(cur, lat, lng):
"""Find a building geometry
"""
cur.execute(
"""SELECT geometry_id FROM geometries
WHERE
2018-08-01 10:48:31 -04:00
ST_Within(
ST_Transform(
ST_SetSRID(ST_Point(%s, %s), 4326),
3857
),
2018-08-01 08:49:03 -04:00
geometry_geom
)
""", (
2018-08-01 10:48:31 -04:00
lng,
lat
2018-08-01 08:49:03 -04:00
)
)
result = cur.fetchone()
if result is not None:
2018-08-01 10:48:31 -04:00
id_, = result
return id_
2018-08-01 08:49:03 -04:00
else:
return result
def save_building(cur, uprn, geometry_id):
"""Save a building
"""
cur.execute(
"""SELECT building_id FROM buildings
WHERE
2018-08-01 10:48:31 -04:00
geometry_id = %s
2018-08-01 08:49:03 -04:00
""", (
2018-08-01 10:48:31 -04:00
geometry_id,
2018-08-01 08:49:03 -04:00
)
)
2018-09-25 15:46:16 -04:00
building = cur.fetchone()
2018-08-01 08:49:03 -04:00
if building is None:
cur.execute(
"""INSERT INTO buildings
(
building_doc,
geometry_id
)
VALUES
(
%s::jsonb,
%s
)
""", (
json.dumps({
'uprns': [uprn]
}),
geometry_id
)
)
else:
building_id = building[0]
cur.execute(
"""UPDATE buildings
SET
building_doc = jsonb_insert(
building_doc,
'{uprns, -1}', -- insert at end of 'uprns' array
2018-08-01 10:48:31 -04:00
'%s'::jsonb,
2018-08-01 08:49:03 -04:00
true -- insert after location
)
WHERE
building_id = %s
""", (
uprn,
building_id
)
)
def read_config(config_path):
"""Read a JSON config file containing database connection details
"""
with open(config_path, 'r') as fh:
conf = json.load(fh)
return conf
if __name__ == '__main__':
2018-09-25 15:46:16 -04:00
if len(sys.argv) != 2:
print("Usage: {} ./path/to/source/file.csv".format(
2018-08-01 08:49:03 -04:00
os.path.basename(__file__)
))
exit()
main(sys.argv[1], sys.argv[2])