diff --git a/etl/create_building_records_cl.sh b/etl/create_building_records_cl.sh new file mode 100644 index 00000000..57464bb2 --- /dev/null +++ b/etl/create_building_records_cl.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash + +# +# Create corresponding 'building' record with +# id: , +# doc: {}, +# geom_id: +# +psql -d colouringlondon -c "INSERT INTO buildings ( geometry_id, ref_toid ) SELECT geometry_id, source_id from geometries;" diff --git a/etl/load_geometries_cl.sh b/etl/load_geometries_cl.sh new file mode 100644 index 00000000..81f73ac2 --- /dev/null +++ b/etl/load_geometries_cl.sh @@ -0,0 +1,32 @@ +#!/usr/bin/env bash + +# +# Load geometries from GeoJSON to Postgres +# - assume postgres connection details are set in the environment using PGUSER, PGHOST etc. +# +: ${1?"Usage: $0 ./path/to/mastermap/dir"} + +mastermap_dir=$1 + +# +# Create 'geometry' record with +# id: , +# source_id: , +# geom: +# +find $mastermap_dir -type f -name '*.3857.csv' \ +-printf "$mastermap_dir/%f\n" | \ +parallel \ +cat {} '|' psql -d colouringlondon -c "\"COPY geometries ( geometry_geom, source_id ) FROM stdin WITH CSV HEADER;\"" + +# +# Delete any duplicated geometries (by TOID) +# +psql -d colouringlondon -c "DELETE FROM geometries a USING ( + SELECT MIN(ctid) as ctid, source_id + FROM geometries + GROUP BY source_id + HAVING COUNT(*) > 1 +) b +WHERE a.source_id = b.source_id +AND a.ctid <> b.ctid;"