update new geometries loading

This commit is contained in:
Ed Chalstrey 2022-04-14 09:55:58 +01:00
parent a6ccd6bbfa
commit d730f37190

View File

@ -12,10 +12,13 @@ mastermap_dir=$1
# source_id: <toid>,
# geom: <geom>
echo "Creating temporary geometries table for input data..."
echo "Removing temp tables if previously created..."
psql -c "DROP TABLE IF EXISTS new_geometries;"
psql -c "DROP TABLE IF EXISTS release_geometries;"
echo "Creating temporary geometries table for OS release geometries..."
psql -c "CREATE TABLE IF NOT EXISTS release_geometries (
geometry_id serial PRIMARY KEY,
source_id varchar(30),
source_id varchar(30) PRIMARY KEY,
geometry_geom geometry(GEOMETRY, 3857)
);"
@ -25,16 +28,19 @@ find $mastermap_dir -type f -name '*.3857.csv' \
parallel \
cat {} '|' psql -c "\"COPY release_geometries ( geometry_geom, source_id ) FROM stdin WITH CSV HEADER;\""
# Copy release_geometries into existing geometries table
psql -c "INSERT INTO geometries ( geometry_geom, source_id ) SELECT geometry_geom, source_id FROM release_geometries;"
echo "Creating temporary geometries table for new geometries only..."
psql -c "CREATE TABLE IF NOT EXISTS new_geometries (
source_id varchar(30) PRIMARY KEY,
geometry_geom geometry(GEOMETRY, 3857)
);"
# Delete any duplicated geometries (by TOID)
echo "Delete duplicate geometries..."
psql -c "DELETE FROM geometries a USING (
SELECT MIN(ctid) as ctid, source_id
FROM geometries
GROUP BY source_id
HAVING COUNT(*) > 1
) b
WHERE a.source_id = b.source_id
AND a.ctid <> b.ctid;"
echo "Finding geometries that are new to this release..."
psql -c "INSERT INTO new_geometries ( source_id, geometry_geom )
SELECT source_id, geometry_geom
FROM release_geometries
WHERE NOT EXISTS ( SELECT source_id FROM geometries );"
echo "Adding new geometries to geometries table..."
psql -c "INSERT INTO geometries ( source_id, geometry_geom )
SELECT source_id, geometry_geom
FROM new_geometries;"