colouring-montreal/etl/load_geometries.sh

31 lines
838 B
Bash
Raw Normal View History

#!/usr/bin/env bash
# Load geometries from GeoJSON to Postgres
# - assume postgres connection details are set in the environment using PGUSER, PGHOST etc.
2022-03-29 09:48:27 -04:00
: ${1?"Usage: $0 ./path/to/mastermap/dir"}
mastermap_dir=$1
# Create 'geometry' record with
# id: <polygon-guid>,
# source_id: <toid>,
# geom: <geom>
2022-03-29 09:48:27 -04:00
echo "Copy geometries to db..."
find $mastermap_dir -type f -name '*.3857.csv' \
-printf "$mastermap_dir/%f\n" | \
parallel \
2022-04-14 04:23:47 -04:00
cat {} '|' psql -c "\"COPY geometries ( geometry_geom, source_id ) FROM stdin WITH CSV HEADER;\""
# Delete any duplicated geometries (by TOID)
2022-03-29 09:48:27 -04:00
echo "Delete duplicate geometries..."
2022-03-28 10:06:23 -04:00
psql -c "DELETE FROM geometries a USING (
SELECT MIN(ctid) as ctid, source_id
FROM geometries
GROUP BY source_id
HAVING COUNT(*) > 1
) b
WHERE a.source_id = b.source_id
AND a.ctid <> b.ctid;"