Merge pull request #808 from colouring-cities/os-data-updating-simplified
Add update scripts for Ordnance Survey data
This commit is contained in:
commit
cb15abe36b
@ -7,6 +7,8 @@ export async function queryBuildingsAtPoint(lng: number, lat: number) {
|
||||
FROM buildings as b, geometries as g
|
||||
WHERE
|
||||
b.geometry_id = g.geometry_id
|
||||
AND
|
||||
b.latest_demolish_date IS NULL
|
||||
AND
|
||||
ST_Intersects(
|
||||
ST_Transform(
|
||||
@ -45,6 +47,8 @@ export async function queryBuildingsByReference(key: string, ref: string) {
|
||||
buildings as b, building_properties as p
|
||||
WHERE
|
||||
b.building_id = p.building_id
|
||||
AND
|
||||
b.latest_demolish_date IS NULL
|
||||
AND
|
||||
p.uprn = $1
|
||||
`,
|
||||
|
@ -181,7 +181,7 @@ function getDataConfig(tileset: string): DataConfig {
|
||||
if(table == undefined) {
|
||||
throw new Error('Invalid tileset requested');
|
||||
}
|
||||
|
||||
|
||||
const query = `(
|
||||
SELECT
|
||||
d.*,
|
||||
@ -192,6 +192,11 @@ function getDataConfig(tileset: string): DataConfig {
|
||||
JOIN
|
||||
geometries AS g
|
||||
ON d.geometry_id = g.geometry_id
|
||||
JOIN
|
||||
buildings AS b
|
||||
ON d.geometry_id = b.geometry_id
|
||||
WHERE
|
||||
b.latest_demolish_date IS NULL
|
||||
) AS data
|
||||
`;
|
||||
|
||||
|
@ -11,7 +11,7 @@ The scripts in this directory are used to extract, transform and load (ETL) the
|
||||
|
||||
# :arrow_down: Downloading Ordnance Survey data
|
||||
|
||||
The building geometries are sourced from Ordnance Survey (OS) MasterMap (Topography Layer). To get the required datasets, you'll need to complete the following steps:
|
||||
The building geometries are sourced from Ordnance Survey (OS) MasterMap (Topography Layer).
|
||||
|
||||
1. Sign up for the Ordnance Survey [Data Exploration License](https://www.ordnancesurvey.co.uk/business-government/licensing-agreements/data-exploration-sign-up). You should receive an e-mail with a link to log in to the platform (this could take up to a week).
|
||||
2. Navigate to https://orders.ordnancesurvey.co.uk/orders and click the button for: ✏️ Order. From here you should be able to click another button to add a product.
|
||||
@ -29,9 +29,7 @@ Before creating or updating a Colouring London database, you'll need to make sur
|
||||
|
||||
# :new_moon: Creating a Colouring London database from scratch
|
||||
|
||||
## Prerequisites
|
||||
|
||||
You should already have set up PostgreSQL and created a database in an Ubuntu environment. Make sure to create environment variables to use `psql` if you haven't already:
|
||||
You should already have set up PostgreSQL and created a database in an Ubuntu environment. If not, follow one of the linked guides: [setup dev environment](../docs/setup-dev-environment.md) or [setup prod environment](../docs/setup-production-environment.md). Open a terminal in Ubuntu and create the environment variables to use `psql` if you haven't already:
|
||||
|
||||
```bash
|
||||
export PGPASSWORD=<pgpassword>
|
||||
@ -53,8 +51,6 @@ creation steps below.
|
||||
|
||||
You should already have installed GNU parallel, which is used to speed up loading bulk data.
|
||||
|
||||
## Processing and loading Ordnance Survey data
|
||||
|
||||
Move into the `etl` directory and set execute permission on all scripts.
|
||||
|
||||
```bash
|
||||
@ -74,7 +70,7 @@ Filter MasterMap 'building' polygons.
|
||||
sudo ./filter_transform_mastermap_for_loading.sh /path/to/mastermap_dir
|
||||
```
|
||||
|
||||
Load all building outlines. Note: you should ensure that `mastermap_dir` has permissions that will allow the linux `find` command to work without using sudo.
|
||||
Load all geometries. Note: you should ensure that `mastermap_dir` has permissions that will allow the linux `find` command to work without using sudo.
|
||||
|
||||
```bash
|
||||
./load_geometries.sh /path/to/mastermap_dir
|
||||
@ -86,11 +82,14 @@ Index geometries.
|
||||
psql < ../migrations/002.index-geometries.up.sql
|
||||
```
|
||||
|
||||
<!-- TODO: Drop outside limit. -->
|
||||
Drop geometries outside London boundary.
|
||||
|
||||
<!-- ```bash
|
||||
./drop_outside_limit.sh /path/to/boundary_file
|
||||
```` -->
|
||||
```bash
|
||||
cd ~/colouring-london/app/public/geometries
|
||||
ogr2ogr -t_srs EPSG:3857 -f "ESRI Shapefile" boundary.shp boundary-detailed.geojson
|
||||
cd ~/colouring-london/etl/
|
||||
./drop_outside_limit.sh ~/colouring-london/app/public/geometries/boundary.shp
|
||||
```
|
||||
|
||||
Create a building record per outline.
|
||||
|
||||
@ -106,4 +105,62 @@ ls ~/colouring-london/migrations/*.up.sql 2>/dev/null | while read -r migration;
|
||||
|
||||
# :full_moon: Updating the Colouring London database with new OS data
|
||||
|
||||
TODO: this section should instruct how to update and existing db
|
||||
In the Ubuntu environment where the database exists, set up the environment variables to make the following steps simpler.
|
||||
```bash
|
||||
export PGPASSWORD=<pgpassword>
|
||||
export PGUSER=<username>
|
||||
export PGHOST=localhost
|
||||
export PGDATABASE=<colouringlondondb>
|
||||
```
|
||||
|
||||
Move into the `etl` directory and set execute permission on all scripts.
|
||||
|
||||
```bash
|
||||
cd ~/colouring-london/etl
|
||||
chmod +x *.sh
|
||||
```
|
||||
|
||||
Extract the new MasterMap data (this step could take a while).
|
||||
|
||||
```bash
|
||||
sudo ./extract_mastermap.sh /path/to/mastermap_dir
|
||||
```
|
||||
|
||||
Filter MasterMap 'building' polygons.
|
||||
|
||||
```bash
|
||||
sudo ./filter_transform_mastermap_for_loading.sh /path/to/mastermap_dir
|
||||
```
|
||||
|
||||
Load all new geometries. This step will only load geometries that are not already present (based on the `TOID`). Note: you should ensure that `mastermap_dir` has permissions that will allow the linux `find` command to work without using sudo.
|
||||
|
||||
```bash
|
||||
./load_new_geometries.sh /path/to/mastermap_dir
|
||||
```
|
||||
|
||||
Drop new geometries outside London boundary.
|
||||
|
||||
```bash
|
||||
cd ~/colouring-london/app/public/geometries
|
||||
ogr2ogr -t_srs EPSG:3857 -f "ESRI Shapefile" boundary.shp boundary-detailed.geojson
|
||||
cd ~/colouring-london/etl/
|
||||
./drop_outside_limit_new_geometries.sh ~/colouring-london/app/public/geometries/boundary.shp
|
||||
```
|
||||
|
||||
Add new geometries to existing geometries table.
|
||||
|
||||
```bash
|
||||
./add_new_geometries.sh
|
||||
```
|
||||
|
||||
Create building record to match each new geometry that doesn't already have a linked building.
|
||||
|
||||
```bash
|
||||
./create_new_building_records.sh
|
||||
```
|
||||
|
||||
Mark buildings with geometries not present in the update as demolished.
|
||||
|
||||
```bash
|
||||
./mark_demolitions.sh
|
||||
```
|
||||
|
4
etl/add_new_geometries.sh
Normal file
4
etl/add_new_geometries.sh
Normal file
@ -0,0 +1,4 @@
|
||||
echo "Adding new geometries to geometries table..."
|
||||
psql -c "INSERT INTO geometries ( source_id, geometry_geom )
|
||||
SELECT source_id, geometry_geom
|
||||
FROM new_geometries;"
|
@ -6,4 +6,6 @@
|
||||
# doc: {},
|
||||
# geom_id: <polygon-guid>
|
||||
#
|
||||
psql -c "INSERT INTO buildings ( geometry_id, ref_toid ) SELECT geometry_id, source_id from geometries;"
|
||||
psql -c "INSERT INTO buildings ( geometry_id, ref_toid )
|
||||
SELECT geometry_id, source_id
|
||||
FROM geometries;"
|
||||
|
14
etl/create_new_building_records.sh
Normal file
14
etl/create_new_building_records.sh
Normal file
@ -0,0 +1,14 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
#
|
||||
# Create corresponding 'building' record with
|
||||
# id: <building-guid>,
|
||||
# doc: {},
|
||||
# geom_id: <polygon-guid>
|
||||
#
|
||||
psql -c "INSERT INTO buildings ( geometry_id, ref_toid )
|
||||
SELECT geometry_id, source_id
|
||||
FROM geometries AS g
|
||||
WHERE EXISTS ( SELECT source_id
|
||||
FROM new_geometries AS ng
|
||||
WHERE g.source_id = ng.source_id);"
|
@ -8,11 +8,11 @@
|
||||
: ${1?"Usage: $0 ./path/to/boundary"}
|
||||
boundary_file=$1
|
||||
|
||||
# Load boundary
|
||||
echo "Load boundary..."
|
||||
psql -c "DROP TABLE IF EXISTS boundary"
|
||||
shp2pgsql -s 3857 $boundary_file boundary | psql
|
||||
|
||||
# Delete geometries (hence buildings, building_properties)
|
||||
echo "Delete geometries (hence buildings, building_properties)..."
|
||||
psql -c "DELETE FROM geometries as g
|
||||
USING boundary as b
|
||||
WHERE b.gid = 1 AND NOT ST_ContainsProperly(b.geom, g.geometry_geom);"
|
||||
|
18
etl/drop_outside_limit_new_geometries.sh
Normal file
18
etl/drop_outside_limit_new_geometries.sh
Normal file
@ -0,0 +1,18 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
#
|
||||
# Load boundary and filter geometries
|
||||
# - boundary MUST be epsg:3857
|
||||
# use: ogr2ogr -t_srs EPSG:3857 boundary.3857.shp boundary.shp
|
||||
#
|
||||
: ${1?"Usage: $0 ./path/to/boundary"}
|
||||
boundary_file=$1
|
||||
|
||||
echo "Load boundary..."
|
||||
psql -c "DROP TABLE IF EXISTS boundary"
|
||||
shp2pgsql -s 3857 $boundary_file boundary | psql
|
||||
|
||||
echo "Delete geometries (hence buildings, building_properties)..."
|
||||
psql -c "DELETE FROM new_geometries as g
|
||||
USING boundary as b
|
||||
WHERE b.gid = 1 AND NOT ST_ContainsProperly(b.geom, g.geometry_geom);"
|
55
etl/load_new_geometries.sh
Normal file
55
etl/load_new_geometries.sh
Normal file
@ -0,0 +1,55 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
# Load geometries from GeoJSON to Postgres
|
||||
# - assume postgres connection details are set in the environment using PGUSER, PGHOST etc.
|
||||
|
||||
: ${1?"Usage: $0 ./path/to/mastermap/dir"}
|
||||
|
||||
mastermap_dir=$1
|
||||
|
||||
# Create 'geometry' record with
|
||||
# id: <polygon-guid>,
|
||||
# source_id: <toid>,
|
||||
# geom: <geom>
|
||||
|
||||
echo "Removing temp tables if previously created..."
|
||||
psql -c "DROP TABLE IF EXISTS new_geometries;"
|
||||
psql -c "DROP TABLE IF EXISTS release_geometries;"
|
||||
|
||||
echo "Creating temporary geometries table for OS release geometries..."
|
||||
psql -c "CREATE TABLE IF NOT EXISTS release_geometries (
|
||||
geometry_id serial,
|
||||
source_id varchar(30),
|
||||
geometry_geom geometry(GEOMETRY, 3857)
|
||||
);"
|
||||
|
||||
echo "Copy geometries to db..."
|
||||
find $mastermap_dir -type f -name '*.3857.csv' \
|
||||
-printf "$mastermap_dir/%f\n" | \
|
||||
parallel \
|
||||
cat {} '|' psql -c "\"COPY release_geometries ( geometry_geom, source_id ) FROM stdin WITH CSV HEADER;\""
|
||||
|
||||
echo "Creating temporary geometries table for new geometries only..."
|
||||
psql -c "CREATE TABLE IF NOT EXISTS new_geometries (
|
||||
source_id varchar(30),
|
||||
geometry_geom geometry(GEOMETRY, 3857)
|
||||
);"
|
||||
|
||||
# Delete any duplicated geometries (by TOID)
|
||||
echo "Delete duplicate geometries..."
|
||||
psql -c "DELETE FROM release_geometries a USING (
|
||||
SELECT MIN(ctid) as ctid, source_id
|
||||
FROM release_geometries
|
||||
GROUP BY source_id
|
||||
HAVING COUNT(*) > 1
|
||||
) b
|
||||
WHERE a.source_id = b.source_id
|
||||
AND a.ctid <> b.ctid;"
|
||||
|
||||
echo "Finding geometries that are new to this release..."
|
||||
psql -c "INSERT INTO new_geometries ( source_id, geometry_geom )
|
||||
SELECT source_id, geometry_geom
|
||||
FROM release_geometries AS r
|
||||
WHERE NOT EXISTS ( SELECT source_id
|
||||
FROM geometries AS g
|
||||
WHERE g.source_id = r.source_id);"
|
23
etl/mark_demolitions.sh
Normal file
23
etl/mark_demolitions.sh
Normal file
@ -0,0 +1,23 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
psql -c "DROP TABLE IF EXISTS old_geometries;"
|
||||
|
||||
echo "Creating temporary table for geometries in the db not present in new data..."
|
||||
psql -c "CREATE TABLE IF NOT EXISTS old_geometries (
|
||||
source_id varchar(30) PRIMARY KEY,
|
||||
geometry_geom geometry(GEOMETRY, 3857)
|
||||
);"
|
||||
|
||||
echo "Find geometries in the db not present in new data..."
|
||||
psql -c "INSERT INTO old_geometries ( source_id, geometry_geom )
|
||||
SELECT source_id, geometry_geom
|
||||
FROM geometries AS g
|
||||
WHERE NOT EXISTS ( SELECT source_id
|
||||
FROM release_geometries AS r
|
||||
WHERE g.source_id = r.source_id);"
|
||||
|
||||
echo "Set each building's latest_demolish_date for today if linked geometry in the db not present in new data..."
|
||||
psql -c "UPDATE buildings
|
||||
SET latest_demolish_date = CURRENT_DATE
|
||||
FROM old_geometries AS og
|
||||
WHERE buildings.ref_toid = og.source_id;"
|
2
migrations/027.demolish-date.down.sql
Normal file
2
migrations/027.demolish-date.down.sql
Normal file
@ -0,0 +1,2 @@
|
||||
ALTER TABLE buildings
|
||||
DROP COLUMN IF EXISTS latest_demolish_date;
|
2
migrations/027.demolish-date.up.sql
Normal file
2
migrations/027.demolish-date.up.sql
Normal file
@ -0,0 +1,2 @@
|
||||
ALTER TABLE buildings
|
||||
ADD COLUMN IF NOT EXISTS latest_demolish_date DATE;
|
Loading…
Reference in New Issue
Block a user