Merge pull request #808 from colouring-cities/os-data-updating-simplified

Add update scripts for Ordnance Survey data
This commit is contained in:
Ed Chalstrey 2022-04-22 14:18:38 +01:00 committed by GitHub
commit cb15abe36b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 202 additions and 16 deletions

View File

@ -7,6 +7,8 @@ export async function queryBuildingsAtPoint(lng: number, lat: number) {
FROM buildings as b, geometries as g
WHERE
b.geometry_id = g.geometry_id
AND
b.latest_demolish_date IS NULL
AND
ST_Intersects(
ST_Transform(
@ -45,6 +47,8 @@ export async function queryBuildingsByReference(key: string, ref: string) {
buildings as b, building_properties as p
WHERE
b.building_id = p.building_id
AND
b.latest_demolish_date IS NULL
AND
p.uprn = $1
`,

View File

@ -181,7 +181,7 @@ function getDataConfig(tileset: string): DataConfig {
if(table == undefined) {
throw new Error('Invalid tileset requested');
}
const query = `(
SELECT
d.*,
@ -192,6 +192,11 @@ function getDataConfig(tileset: string): DataConfig {
JOIN
geometries AS g
ON d.geometry_id = g.geometry_id
JOIN
buildings AS b
ON d.geometry_id = b.geometry_id
WHERE
b.latest_demolish_date IS NULL
) AS data
`;

View File

@ -11,7 +11,7 @@ The scripts in this directory are used to extract, transform and load (ETL) the
# :arrow_down: Downloading Ordnance Survey data
The building geometries are sourced from Ordnance Survey (OS) MasterMap (Topography Layer). To get the required datasets, you'll need to complete the following steps:
The building geometries are sourced from Ordnance Survey (OS) MasterMap (Topography Layer).
1. Sign up for the Ordnance Survey [Data Exploration License](https://www.ordnancesurvey.co.uk/business-government/licensing-agreements/data-exploration-sign-up). You should receive an e-mail with a link to log in to the platform (this could take up to a week).
2. Navigate to https://orders.ordnancesurvey.co.uk/orders and click the button for: ✏️ Order. From here you should be able to click another button to add a product.
@ -29,9 +29,7 @@ Before creating or updating a Colouring London database, you'll need to make sur
# :new_moon: Creating a Colouring London database from scratch
## Prerequisites
You should already have set up PostgreSQL and created a database in an Ubuntu environment. Make sure to create environment variables to use `psql` if you haven't already:
You should already have set up PostgreSQL and created a database in an Ubuntu environment. If not, follow one of the linked guides: [setup dev environment](../docs/setup-dev-environment.md) or [setup prod environment](../docs/setup-production-environment.md). Open a terminal in Ubuntu and create the environment variables to use `psql` if you haven't already:
```bash
export PGPASSWORD=<pgpassword>
@ -53,8 +51,6 @@ creation steps below.
You should already have installed GNU parallel, which is used to speed up loading bulk data.
## Processing and loading Ordnance Survey data
Move into the `etl` directory and set execute permission on all scripts.
```bash
@ -74,7 +70,7 @@ Filter MasterMap 'building' polygons.
sudo ./filter_transform_mastermap_for_loading.sh /path/to/mastermap_dir
```
Load all building outlines. Note: you should ensure that `mastermap_dir` has permissions that will allow the linux `find` command to work without using sudo.
Load all geometries. Note: you should ensure that `mastermap_dir` has permissions that will allow the linux `find` command to work without using sudo.
```bash
./load_geometries.sh /path/to/mastermap_dir
@ -86,11 +82,14 @@ Index geometries.
psql < ../migrations/002.index-geometries.up.sql
```
<!-- TODO: Drop outside limit. -->
Drop geometries outside London boundary.
<!-- ```bash
./drop_outside_limit.sh /path/to/boundary_file
```` -->
```bash
cd ~/colouring-london/app/public/geometries
ogr2ogr -t_srs EPSG:3857 -f "ESRI Shapefile" boundary.shp boundary-detailed.geojson
cd ~/colouring-london/etl/
./drop_outside_limit.sh ~/colouring-london/app/public/geometries/boundary.shp
```
Create a building record per outline.
@ -106,4 +105,62 @@ ls ~/colouring-london/migrations/*.up.sql 2>/dev/null | while read -r migration;
# :full_moon: Updating the Colouring London database with new OS data
TODO: this section should instruct how to update and existing db
In the Ubuntu environment where the database exists, set up the environment variables to make the following steps simpler.
```bash
export PGPASSWORD=<pgpassword>
export PGUSER=<username>
export PGHOST=localhost
export PGDATABASE=<colouringlondondb>
```
Move into the `etl` directory and set execute permission on all scripts.
```bash
cd ~/colouring-london/etl
chmod +x *.sh
```
Extract the new MasterMap data (this step could take a while).
```bash
sudo ./extract_mastermap.sh /path/to/mastermap_dir
```
Filter MasterMap 'building' polygons.
```bash
sudo ./filter_transform_mastermap_for_loading.sh /path/to/mastermap_dir
```
Load all new geometries. This step will only load geometries that are not already present (based on the `TOID`). Note: you should ensure that `mastermap_dir` has permissions that will allow the linux `find` command to work without using sudo.
```bash
./load_new_geometries.sh /path/to/mastermap_dir
```
Drop new geometries outside London boundary.
```bash
cd ~/colouring-london/app/public/geometries
ogr2ogr -t_srs EPSG:3857 -f "ESRI Shapefile" boundary.shp boundary-detailed.geojson
cd ~/colouring-london/etl/
./drop_outside_limit_new_geometries.sh ~/colouring-london/app/public/geometries/boundary.shp
```
Add new geometries to existing geometries table.
```bash
./add_new_geometries.sh
```
Create building record to match each new geometry that doesn't already have a linked building.
```bash
./create_new_building_records.sh
```
Mark buildings with geometries not present in the update as demolished.
```bash
./mark_demolitions.sh
```

View File

@ -0,0 +1,4 @@
echo "Adding new geometries to geometries table..."
psql -c "INSERT INTO geometries ( source_id, geometry_geom )
SELECT source_id, geometry_geom
FROM new_geometries;"

View File

@ -6,4 +6,6 @@
# doc: {},
# geom_id: <polygon-guid>
#
psql -c "INSERT INTO buildings ( geometry_id, ref_toid ) SELECT geometry_id, source_id from geometries;"
psql -c "INSERT INTO buildings ( geometry_id, ref_toid )
SELECT geometry_id, source_id
FROM geometries;"

View File

@ -0,0 +1,14 @@
#!/usr/bin/env bash
#
# Create corresponding 'building' record with
# id: <building-guid>,
# doc: {},
# geom_id: <polygon-guid>
#
psql -c "INSERT INTO buildings ( geometry_id, ref_toid )
SELECT geometry_id, source_id
FROM geometries AS g
WHERE EXISTS ( SELECT source_id
FROM new_geometries AS ng
WHERE g.source_id = ng.source_id);"

View File

@ -8,11 +8,11 @@
: ${1?"Usage: $0 ./path/to/boundary"}
boundary_file=$1
# Load boundary
echo "Load boundary..."
psql -c "DROP TABLE IF EXISTS boundary"
shp2pgsql -s 3857 $boundary_file boundary | psql
# Delete geometries (hence buildings, building_properties)
echo "Delete geometries (hence buildings, building_properties)..."
psql -c "DELETE FROM geometries as g
USING boundary as b
WHERE b.gid = 1 AND NOT ST_ContainsProperly(b.geom, g.geometry_geom);"

View File

@ -0,0 +1,18 @@
#!/usr/bin/env bash
#
# Load boundary and filter geometries
# - boundary MUST be epsg:3857
# use: ogr2ogr -t_srs EPSG:3857 boundary.3857.shp boundary.shp
#
: ${1?"Usage: $0 ./path/to/boundary"}
boundary_file=$1
echo "Load boundary..."
psql -c "DROP TABLE IF EXISTS boundary"
shp2pgsql -s 3857 $boundary_file boundary | psql
echo "Delete geometries (hence buildings, building_properties)..."
psql -c "DELETE FROM new_geometries as g
USING boundary as b
WHERE b.gid = 1 AND NOT ST_ContainsProperly(b.geom, g.geometry_geom);"

View File

@ -0,0 +1,55 @@
#!/usr/bin/env bash
# Load geometries from GeoJSON to Postgres
# - assume postgres connection details are set in the environment using PGUSER, PGHOST etc.
: ${1?"Usage: $0 ./path/to/mastermap/dir"}
mastermap_dir=$1
# Create 'geometry' record with
# id: <polygon-guid>,
# source_id: <toid>,
# geom: <geom>
echo "Removing temp tables if previously created..."
psql -c "DROP TABLE IF EXISTS new_geometries;"
psql -c "DROP TABLE IF EXISTS release_geometries;"
echo "Creating temporary geometries table for OS release geometries..."
psql -c "CREATE TABLE IF NOT EXISTS release_geometries (
geometry_id serial,
source_id varchar(30),
geometry_geom geometry(GEOMETRY, 3857)
);"
echo "Copy geometries to db..."
find $mastermap_dir -type f -name '*.3857.csv' \
-printf "$mastermap_dir/%f\n" | \
parallel \
cat {} '|' psql -c "\"COPY release_geometries ( geometry_geom, source_id ) FROM stdin WITH CSV HEADER;\""
echo "Creating temporary geometries table for new geometries only..."
psql -c "CREATE TABLE IF NOT EXISTS new_geometries (
source_id varchar(30),
geometry_geom geometry(GEOMETRY, 3857)
);"
# Delete any duplicated geometries (by TOID)
echo "Delete duplicate geometries..."
psql -c "DELETE FROM release_geometries a USING (
SELECT MIN(ctid) as ctid, source_id
FROM release_geometries
GROUP BY source_id
HAVING COUNT(*) > 1
) b
WHERE a.source_id = b.source_id
AND a.ctid <> b.ctid;"
echo "Finding geometries that are new to this release..."
psql -c "INSERT INTO new_geometries ( source_id, geometry_geom )
SELECT source_id, geometry_geom
FROM release_geometries AS r
WHERE NOT EXISTS ( SELECT source_id
FROM geometries AS g
WHERE g.source_id = r.source_id);"

23
etl/mark_demolitions.sh Normal file
View File

@ -0,0 +1,23 @@
#!/usr/bin/env bash
psql -c "DROP TABLE IF EXISTS old_geometries;"
echo "Creating temporary table for geometries in the db not present in new data..."
psql -c "CREATE TABLE IF NOT EXISTS old_geometries (
source_id varchar(30) PRIMARY KEY,
geometry_geom geometry(GEOMETRY, 3857)
);"
echo "Find geometries in the db not present in new data..."
psql -c "INSERT INTO old_geometries ( source_id, geometry_geom )
SELECT source_id, geometry_geom
FROM geometries AS g
WHERE NOT EXISTS ( SELECT source_id
FROM release_geometries AS r
WHERE g.source_id = r.source_id);"
echo "Set each building's latest_demolish_date for today if linked geometry in the db not present in new data..."
psql -c "UPDATE buildings
SET latest_demolish_date = CURRENT_DATE
FROM old_geometries AS og
WHERE buildings.ref_toid = og.source_id;"

View File

@ -0,0 +1,2 @@
ALTER TABLE buildings
DROP COLUMN IF EXISTS latest_demolish_date;

View File

@ -0,0 +1,2 @@
ALTER TABLE buildings
ADD COLUMN IF NOT EXISTS latest_demolish_date DATE;