colouring-montreal/etl/extract_addressbase.sh

64 lines
1.4 KiB
Bash
Raw Normal View History

2018-09-21 06:10:39 -04:00
#!/usr/bin/env bash
#
# Extract address points from OS Addressbase GML
# - as supplied in 5km tiles, zip/gz archives
#
: ${1?"Usage: $0 ./path/to/data/dir"}
data_dir=$1
#
# Unzip to GML
#
find $data_dir -type f -name '*.zip' -printf "%f\n" | \
parallel \
unzip -u $data_dir/{} -d $data_dir
2018-09-21 06:10:39 -04:00
#
# Extract to CSV
2018-09-21 06:10:39 -04:00
#
# Relevant fields:
# WKT
# crossReference (list of TOID/other references)
# source (list of cross-reference sources: 7666MT refers to MasterMap Topo)
# uprn
# parentUPRN
# logicalStatus: 1 (one) is approved (otherwise historical, provisional)
#
find $data_dir -type f -name '*.gml' -printf "%f\n" | \
parallel \
ogr2ogr -f CSV \
-select crossReference,source,uprn,parentUPRN,logicalStatus \
$data_dir/{}.csv $data_dir/{} BasicLandPropertyUnit \
-lco GEOMETRY=AS_WKT
2018-09-21 06:10:39 -04:00
#
2018-10-02 16:12:46 -04:00
# Filter
#
find $data_dir -type f -name '*.gml.csv' -printf "%f\n" | \
2018-09-21 06:10:39 -04:00
parallel \
python filter_addressbase_csv.py $data_dir/{}
2018-10-02 16:12:46 -04:00
#
# Transform to 3857 (web mercator)
#
find $data_dir -type f -name '*.filtered.csv' -printf "%f\n" | \
parallel \
ogr2ogr \
-f CSV $data_dir/{}.3857.csv \
-s_srs "EPSG:4326" \
-t_srs "EPSG:3857" \
$data_dir/{} \
-lco GEOMETRY=AS_WKT
#
# Update to EWKT (with SRID indicator for loading to Postgres)
#
find $data_dir -type f -name '*.3857.csv' -printf "%f\n" | \
parallel \
cat $data_dir/{} "|" sed "'s/^\"POINT/\"SRID=3857;POINT/'" "|" cut -f 1,3,4,5 -d "','" ">" $data_dir/{}.loadable