2018-09-21 06:10:39 -04:00
|
|
|
#!/usr/bin/env bash
|
|
|
|
|
|
|
|
#
|
|
|
|
# Extract address points from OS Addressbase GML
|
|
|
|
# - as supplied in 5km tiles, zip/gz archives
|
|
|
|
#
|
|
|
|
: ${1?"Usage: $0 ./path/to/data/dir"}
|
|
|
|
|
|
|
|
data_dir=$1
|
|
|
|
|
|
|
|
#
|
|
|
|
# Unzip to GML
|
|
|
|
#
|
|
|
|
|
2018-09-25 14:20:41 -04:00
|
|
|
find $data_dir -type f -name '*.zip' -printf "%f\n" | \
|
|
|
|
parallel \
|
|
|
|
unzip -u $data_dir/{} -d $data_dir
|
2018-09-21 06:10:39 -04:00
|
|
|
|
|
|
|
#
|
2018-09-25 14:20:41 -04:00
|
|
|
# Extract to CSV
|
2018-09-21 06:10:39 -04:00
|
|
|
#
|
|
|
|
# Relevant fields:
|
|
|
|
# WKT
|
|
|
|
# crossReference (list of TOID/other references)
|
|
|
|
# source (list of cross-reference sources: 7666MT refers to MasterMap Topo)
|
|
|
|
# uprn
|
|
|
|
# parentUPRN
|
|
|
|
# logicalStatus: 1 (one) is approved (otherwise historical, provisional)
|
|
|
|
#
|
|
|
|
|
2018-09-25 14:20:41 -04:00
|
|
|
find $data_dir -type f -name '*.gml' -printf "%f\n" | \
|
|
|
|
parallel \
|
|
|
|
ogr2ogr -f CSV \
|
|
|
|
-select crossReference,source,uprn,parentUPRN,logicalStatus \
|
|
|
|
$data_dir/{}.csv $data_dir/{} BasicLandPropertyUnit \
|
|
|
|
-lco GEOMETRY=AS_WKT
|
2018-09-21 06:10:39 -04:00
|
|
|
|
2018-09-25 14:20:41 -04:00
|
|
|
#
|
|
|
|
# Filter, grouping by TOID
|
|
|
|
#
|
2018-09-21 06:10:39 -04:00
|
|
|
|
2018-09-25 14:20:41 -04:00
|
|
|
find $data_dir -type f -name '*.gml.csv' -printf "%f\n" | \
|
2018-09-21 06:10:39 -04:00
|
|
|
parallel \
|
2018-09-25 14:20:41 -04:00
|
|
|
python filter_addressbase_csv.py $data_dir/{}
|