colouring-montreal/etl/0_extract_addressbase.sh

41 lines
899 B
Bash
Raw Normal View History

2018-09-21 06:10:39 -04:00
#!/usr/bin/env bash
#
# Extract address points from OS Addressbase GML
# - as supplied in 5km tiles, zip/gz archives
#
: ${1?"Usage: $0 ./path/to/data/dir"}
data_dir=$1
#
# Unzip to GML
#
# find $data_dir -name '*.zip' -print0 | xargs -0 -P 4 -n 1 unzip
#
# Extract (subset) to CSV
#
# Relevant fields:
# WKT
# crossReference (list of TOID/other references)
# source (list of cross-reference sources: 7666MT refers to MasterMap Topo)
# uprn
# parentUPRN
# logicalStatus: 1 (one) is approved (otherwise historical, provisional)
#
# find $data_dir -type f -name '*.gml' -printf "%f\n" | \
# parallel \
# ogr2ogr -f CSV \
# -select crossReference,source,uprn,parentUPRN,logicalStatus \
# {}.csv {} BasicLandPropertyUnit \
# -lco GEOMETRY=AS_WKT
rm $data_dir/*.gml
find $data_dir -type f -name '*.gml.csv' -printf "$data_dir%f\n" | \
parallel \
python filter_addressbase_csv.py {}