Rename etl scripts
This commit is contained in:
parent
ab32c36c98
commit
bddd7e769f
25
etl/README.md
Normal file
25
etl/README.md
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
# Data loading
|
||||||
|
|
||||||
|
The scripts in this directory are used to extract, transform and load (ETL) the core datasets
|
||||||
|
for Colouring London:
|
||||||
|
|
||||||
|
1. Building geometries, sourced from Ordnance Survey MasterMap (Topography Layer)
|
||||||
|
1. Unique Property Reference Numbers (UPRNs), sourced from Ordnance Survey AddressBase
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
Before running any of these scripts, you will need the OS data for your area of
|
||||||
|
interest. AddressBase and MasterMap are available directly from [Ordnance
|
||||||
|
Survey](https://www.ordnancesurvey.co.uk/).
|
||||||
|
|
||||||
|
To help test the Colouring London app, `get_test_polygons.py` will attempt to save a small
|
||||||
|
(1.5km²) extract from OpenStreetMap to a format suitable for loading to the database.
|
||||||
|
|
||||||
|
The scripts should be run in the following order:
|
||||||
|
|
||||||
|
1. extract_addressbase.sh
|
||||||
|
1. extract_mastermap.sh
|
||||||
|
1. filter_transform_mastermap_for_loading.sh
|
||||||
|
1. load_geometries.sh
|
||||||
|
1. create_building_records.sh
|
||||||
|
1. load_uprns.py
|
9
etl/create_building_records.sh
Normal file
9
etl/create_building_records.sh
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
#!/usr/bin/env bash
|
||||||
|
|
||||||
|
#
|
||||||
|
# Create corresponding 'building' record with
|
||||||
|
# id: <building-guid>,
|
||||||
|
# doc: {},
|
||||||
|
# geom_id: <polygon-guid>
|
||||||
|
#
|
||||||
|
psql -c "INSERT INTO buildings ( geometry_id ) SELECT geometry_id from geometries;"
|
@ -1,4 +1,10 @@
|
|||||||
"""Download and load a small open dataset for testing
|
"""Download and load a small open dataset for testing
|
||||||
|
|
||||||
|
Run this to create a CSV of buildings geometries.
|
||||||
|
|
||||||
|
Then run:
|
||||||
|
- load_geometries.sh (loading geometries to the database)
|
||||||
|
- create_buildings.sh (creating empty building records for each geometry)
|
||||||
"""
|
"""
|
||||||
# -*- coding: utf-8 -*-
|
# -*- coding: utf-8 -*-
|
||||||
import os
|
import os
|
||||||
@ -21,8 +27,8 @@ fig, ax = osmnx.plot_buildings(gdf_proj, bgcolor='#333333', color='w', figsize=(
|
|||||||
save=True, show=False, close=True,
|
save=True, show=False, close=True,
|
||||||
filename='test_buildings_preview', dpi=600)
|
filename='test_buildings_preview', dpi=600)
|
||||||
|
|
||||||
# save as geojson
|
# save
|
||||||
test_data_file = os.path.join(os.path.dirname(__file__), 'test_buildings.geojson')
|
test_data_file = os.path.join(os.path.dirname(__file__), 'test_buildings.csv')
|
||||||
|
|
||||||
gdf_to_save = gdf_proj.reset_index(
|
gdf_to_save = gdf_proj.reset_index(
|
||||||
)[
|
)[
|
||||||
@ -32,5 +38,5 @@ gdf_to_save = gdf_proj.reset_index(
|
|||||||
gdf_to_save.rename(
|
gdf_to_save.rename(
|
||||||
columns={'index': 'fid'}
|
columns={'index': 'fid'}
|
||||||
).to_file(
|
).to_file(
|
||||||
test_data_file, driver='GeoJSON'
|
test_data_file, driver='CSV'
|
||||||
)
|
)
|
@ -30,11 +30,3 @@ psql -c "DELETE FROM geometries a USING (
|
|||||||
) b
|
) b
|
||||||
WHERE a.source_id = b.source_id
|
WHERE a.source_id = b.source_id
|
||||||
AND a.ctid <> b.ctid;"
|
AND a.ctid <> b.ctid;"
|
||||||
|
|
||||||
#
|
|
||||||
# Create corresponding 'building' record with
|
|
||||||
# id: <building-guid>,
|
|
||||||
# doc: {},
|
|
||||||
# geom_id: <polygon-guid>
|
|
||||||
#
|
|
||||||
psql -c "INSERT INTO buildings ( geometry_id ) SELECT geometry_id from geometries;"
|
|
@ -73,7 +73,7 @@ def save_building(cur, uprn, geometry_id):
|
|||||||
geometry_id,
|
geometry_id,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
building = cur.fetchone()
|
building = cur.fetchone()
|
||||||
if building is None:
|
if building is None:
|
||||||
cur.execute(
|
cur.execute(
|
||||||
"""INSERT INTO buildings
|
"""INSERT INTO buildings
|
||||||
@ -122,8 +122,8 @@ def read_config(config_path):
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
if len(sys.argv) != 3:
|
if len(sys.argv) != 2:
|
||||||
print("Usage: {} ./path/to/source/file.csv ./path/to/dbconfig.json".format(
|
print("Usage: {} ./path/to/source/file.csv".format(
|
||||||
os.path.basename(__file__)
|
os.path.basename(__file__)
|
||||||
))
|
))
|
||||||
exit()
|
exit()
|
Loading…
Reference in New Issue
Block a user