colouring-montreal/etl/load_uprns.py

79 lines
1.8 KiB
Python
Raw Normal View History

2018-09-25 16:47:58 -04:00
#!/usr/bin/env python
2018-08-01 08:49:03 -04:00
"""Load buildings from CSV to Postgres
2018-09-25 16:47:58 -04:00
- update 'building' record with {
all_uprns: [<uprn>, ...],
uprn: <min_uprn>
}
2018-08-01 08:49:03 -04:00
"""
import csv
2018-09-25 16:47:58 -04:00
import glob
2018-08-01 08:49:03 -04:00
import json
import os
import sys
2018-09-25 16:47:58 -04:00
from multiprocessing import Pool
2018-08-01 08:49:03 -04:00
import psycopg2
2018-09-25 16:47:58 -04:00
def main(addressbase_dir):
"""Read files and save features to the database
2018-08-01 08:49:03 -04:00
"""
2018-09-25 16:47:58 -04:00
ab_paths = list(glob.glob(os.path.join(addressbase_dir, "*.gml.csv.filtered")))
2018-08-01 08:49:03 -04:00
2018-09-25 16:47:58 -04:00
# parallel map over tiles
with Pool() as p:
p.map(load_file, ab_paths)
def load_file(source_file):
"""Load UPRN data from CSVs
"""
config = {
'host': os.environ['PGHOST'],
'port': os.environ['PGPORT'],
'dbname': os.environ['PGDATABASE'],
'user': os.environ['PGUSER'],
'password': os.environ['PGPASSWORD'],
}
conn = psycopg2.connect(**config)
2018-08-01 08:49:03 -04:00
with conn.cursor() as cur:
with open(source_file, 'r') as source_fh:
reader = csv.reader(source_fh)
2018-08-01 10:48:31 -04:00
next(reader)
2018-09-25 16:47:58 -04:00
for toid, uprn, wkt, uprn_relations in reader:
save_building(cur, int(uprn), toid, json.loads(uprn_relations))
2018-08-01 08:49:03 -04:00
conn.commit()
conn.close()
2018-09-25 16:47:58 -04:00
def save_building(cur, uprn, toid, uprn_relations):
2018-08-01 08:49:03 -04:00
"""Save a building
"""
cur.execute(
2018-09-25 16:47:58 -04:00
"""UPDATE buildings
SET uprn = %s, building_doc = %s::jsonb
WHERE geometry_id = (
SELECT geometry_id FROM geometries
WHERE
source_id = %s
)
2018-08-01 08:49:03 -04:00
""", (
2018-09-25 16:47:58 -04:00
uprn,
json.dumps({
'uprn_relations': uprn_relations
}),
toid
2018-08-01 08:49:03 -04:00
)
)
if __name__ == '__main__':
2018-09-25 15:46:16 -04:00
if len(sys.argv) != 2:
2018-09-25 16:47:58 -04:00
print("Usage: {} ./path/to/addressbase_dir/".format(
2018-08-01 08:49:03 -04:00
os.path.basename(__file__)
))
exit()
2018-09-25 16:47:58 -04:00
main(sys.argv[1])