colouring-montreal/etl/extract_mastermap.sh
2022-03-28 14:02:47 +01:00

40 lines
886 B
Bash
Executable File

#!/usr/bin/env bash
#
# Extract MasterMap
#
: ${1?"Usage: $0 ./path/to/mastermap/dir"}
data_dir=$1
#
# Extract buildings from *.gz to CSV
#
# Features where::
# descriptiveGroup = '(1:Building)'
#
# Use `fid` as source ID, aka TOID.
#
find $data_dir -type f -name '*.gz' -printf "%f\n" | \
parallel \
gunzip $data_dir/{} -k -S gml
rename 's/$/.gml/' $data_dir/*[^gzvt]
# Note: previously the rename cmd above resulted in some temp files being renamed to .gml
# so I have specified the start of the filename (appears to be consistent for all OS MasterMap downloads)
# we may need to update this below for other downloads
find $data_dir -type f -name '*5690395*.gml' -printf "%f\n" | \
parallel \
ogr2ogr \
-select fid,descriptiveGroup \
-f CSV $data_dir/{}.csv \
$data_dir/{} \
TopographicArea \
-lco GEOMETRY=AS_WKT
rm $data_dir/*.gfs
rm $data_dir/*.gml