diff --git a/etl/README.md b/etl/README.md index 99c9768e..f6e478b5 100644 --- a/etl/README.md +++ b/etl/README.md @@ -25,19 +25,6 @@ The building geometries are sourced from Ordnance Survey (OS) MasterMap (Topogra 4. You should be then able to check out your basket and download the files. Note: there may be multiple `.zip` files to download for MasterMap due to the size of the dataset. 6. Unzip the MasterMap `.zip` files and move all the `.gz` files from each to a single folder in a convenient location. We will use this folder in later steps. -## Downloading OS Open TOID data - -1. Navigate to the download page at https://osdatahub.os.uk/downloads/open/OpenTOID -2. Select the area of the map you require location data for (e.g. the squares covering London) and download the data in CSV format: - -![](screenshot/OpenTOID.png) - -3. Unzip the `.zip` file(s) to get the CSV files and move them to a single folder in a convenient location. We will use this folder in later steps. - -# :penguin: Making data available to Ubuntu - -Before creating or updating a Colouring London database, you'll need to make sure the downloaded OS files are available to the Ubuntu machine where the database is hosted. If you are using Virtualbox, you could host share folder(s) containing the OS files with the VM (e.g. [see these instructions for Mac](https://medium.com/macoclock/share-folder-between-macos-and-ubuntu-4ce84fb5c1ad)). - # :new_moon: Creating a Colouring London database from scratch ## Prerequisites diff --git a/etl/convert_opentoid_bng_latlon.py b/etl/convert_opentoid_bng_latlon.py deleted file mode 100644 index c2893edd..00000000 --- a/etl/convert_opentoid_bng_latlon.py +++ /dev/null @@ -1,31 +0,0 @@ -"""Convert BNG values in OpenTOID data to latitude/longitude""" -import csv -import glob -import os -import sys -from convertbng.cutil import convert_lonlat -from pandas import read_csv - - -csv.field_size_limit(sys.maxsize) - - -def main(opentoid_path): - ot_paths = sorted(glob.glob(os.path.join(opentoid_path, "*.csv"))) - for ot_path in ot_paths: - convert_opentoid_coordinates(ot_path) - - -def convert_opentoid_coordinates(ot_path): - """Overwrite the input csv, adding the longitute/latitude from eastings/northings""" - output_path = str(ot_path).replace(".csv", "_converted.csv") - ot_data = read_csv(ot_path) - ot_data['longitude'], ot_data['latitude'] = convert_lonlat(ot_data['EASTING'], ot_data['NORTHING']) - ot_data.to_csv(output_path, index=False) - - -if __name__ == '__main__': - if len(sys.argv) != 2: - print("Usage: convert_opentoid_bng_latlon.py ./path/to/opentoid/dir") - exit(-1) - main(sys.argv[1]) diff --git a/etl/load_coordinates.sh b/etl/load_coordinates.sh deleted file mode 100644 index 9b0e852a..00000000 --- a/etl/load_coordinates.sh +++ /dev/null @@ -1,48 +0,0 @@ -#!/usr/bin/env bash - -# Assign latitude and longitude to buildings with OpenTOID data -# - assume postgres connection details are set in the environment using PGUSER, PGHOST etc. - -: ${1?"Usage: $0 ./path/to/opentoid/dir"} - -opentoid_dir=$1 - -# Move this to 001.core.up.sql if needed, or otherwise delete as temp table -echo "Creating table for open_toid coordinates..." -psql -c "DROP TABLE IF EXISTS open_toid" -psql -c "CREATE TABLE open_toid ( - toid varchar, - version_number float, - version_date date, - source_product varchar, - easting float, - northing float, - longitude float, - latitude float -);" - -echo "Loading Open TOID CSV(s) to temporary table..." -find $opentoid_dir -type f -name '*_converted.csv' \ --printf "$opentoid_dir/%f\n" | \ -parallel \ -cat {} '|' psql -c "\"COPY open_toid ( toid, version_number, version_date, source_product, easting, northing, longitude, latitude ) FROM stdin WITH CSV HEADER;\"" - -echo "Updating the buildings table with coordinates..." -psql -c "UPDATE buildings - SET location_latitude = open_toid.latitude, - location_longitude = open_toid.longitude - FROM open_toid - WHERE open_toid.toid = buildings.ref_toid -;" - -# Add these columns here rather than in 001.core.up.sql for legacy reasons -psql -c "ALTER TABLE geometries ADD longitude float;" -psql -c "ALTER TABLE geometries ADD latitude float;" - -echo "Updating the geometries table with coordinates..." -psql -c "UPDATE geometries - SET latitude = open_toid.latitude, - longitude = open_toid.longitude - FROM open_toid - WHERE open_toid.toid = geometries.source_id -;" \ No newline at end of file diff --git a/etl/requirements.txt b/etl/requirements.txt index fd1fbde9..55188359 100644 --- a/etl/requirements.txt +++ b/etl/requirements.txt @@ -5,4 +5,3 @@ psycopg2==2.7.5 shapely==1.7 retrying==1.3.3 requests==2.23.0 -convertbng==0.6.25 \ No newline at end of file diff --git a/etl/screenshot/OpenTOID.png b/etl/screenshot/OpenTOID.png deleted file mode 100644 index d550be79..00000000 Binary files a/etl/screenshot/OpenTOID.png and /dev/null differ