mastermap filtering without using addressbase
This commit is contained in:
parent
d822dfaaec
commit
0e35a7cca2
@ -1,7 +1,6 @@
|
|||||||
"""Filter MasterMap to buildings and addressbase-matches
|
"""Filter MasterMap to buildings
|
||||||
|
|
||||||
- WHERE descriptiveGroup includes 'Building'
|
- WHERE descriptiveGroup includes 'Building'
|
||||||
- OR toid in addressbase_toids
|
|
||||||
"""
|
"""
|
||||||
import csv
|
import csv
|
||||||
import glob
|
import glob
|
||||||
@ -13,25 +12,28 @@ from multiprocessing import Pool
|
|||||||
|
|
||||||
csv.field_size_limit(sys.maxsize)
|
csv.field_size_limit(sys.maxsize)
|
||||||
|
|
||||||
def main(ab_path, mm_path):
|
def main(mastermap_path):
|
||||||
mm_paths = sorted(glob.glob(os.path.join(mm_path, "*.gml.csv")))
|
mm_paths = sorted(glob.glob(os.path.join(mastermap_path, "*.gml.csv")))
|
||||||
toid_paths = sorted(glob.glob(os.path.join(ab_path, "ab_toids_*.txt")))
|
# toid_paths = sorted(glob.glob(os.path.join(ab_path, "ab_toids_*.txt")))
|
||||||
|
|
||||||
try:
|
# try:
|
||||||
assert len(mm_paths) == len(toid_paths)
|
# assert len(mm_paths) == len(toid_paths)
|
||||||
except AssertionError:
|
# except AssertionError:
|
||||||
print(mm_paths)
|
# print(mm_paths)
|
||||||
print(toid_paths)
|
# print(toid_paths)
|
||||||
zipped_paths = zip(mm_paths, toid_paths)
|
# zipped_paths = zip(mm_paths, toid_paths)
|
||||||
|
|
||||||
# parallel map over tiles
|
# parallel map over tiles
|
||||||
with Pool() as p:
|
# with Pool() as p:
|
||||||
p.starmap(filter, zipped_paths)
|
# p.starmap(filter, zipped_paths)
|
||||||
|
|
||||||
|
for mm_path in mm_paths:
|
||||||
|
filter(mm_path)
|
||||||
|
|
||||||
def filter(mm_path, toid_path):
|
def filter(mm_path):
|
||||||
with open(toid_path, 'r') as fh:
|
# with open(toid_path, 'r') as fh:
|
||||||
r = csv.reader(fh)
|
# r = csv.reader(fh)
|
||||||
toids = set(line[0] for line in r)
|
# toids = set(line[0] for line in r)
|
||||||
|
|
||||||
output_path = "{}.filtered.csv".format(str(mm_path).replace(".gml.csv", ""))
|
output_path = "{}.filtered.csv".format(str(mm_path).replace(".gml.csv", ""))
|
||||||
alt_output_path = "{}.filtered_not_building.csv".format(str(mm_path).replace(".gml.csv", ""))
|
alt_output_path = "{}.filtered_not_building.csv".format(str(mm_path).replace(".gml.csv", ""))
|
||||||
@ -48,13 +50,13 @@ def filter(mm_path, toid_path):
|
|||||||
if 'Building' in line['descriptiveGroup']:
|
if 'Building' in line['descriptiveGroup']:
|
||||||
w.writerow(line)
|
w.writerow(line)
|
||||||
|
|
||||||
elif line['fid'] in toids:
|
# elif line['fid'] in toids:
|
||||||
alt_w.writerow(line)
|
# alt_w.writerow(line)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
if len(sys.argv) != 3:
|
if len(sys.argv) != 2:
|
||||||
print("Usage: filter_mastermap.py ./path/to/addressbase/dir ./path/to/mastermap/dir")
|
print("Usage: filter_mastermap.py ./path/to/mastermap/dir")
|
||||||
exit(-1)
|
exit(-1)
|
||||||
main(sys.argv[1], sys.argv[2])
|
main(sys.argv[1])
|
||||||
|
@ -3,23 +3,21 @@
|
|||||||
#
|
#
|
||||||
# Filter and transform for loading
|
# Filter and transform for loading
|
||||||
#
|
#
|
||||||
: ${1?"Usage: $0 ./path/to/addressbase/dir ./path/to/mastermap/dir"}
|
: ${1?"Usage: $0 ./path/to/mastermap/dir"}
|
||||||
# : ${2?"Usage: $0 ./path/to/addressbase/dir ./path/to/mastermap/dir"}
|
|
||||||
|
|
||||||
# addressbase_dir=$1
|
|
||||||
mastermap_dir=$1
|
mastermap_dir=$1
|
||||||
|
|
||||||
#
|
#
|
||||||
# Check which TOIDs are matched against UPRNs
|
# Check which TOIDs are matched against UPRNs
|
||||||
#
|
#
|
||||||
colouringlondon/bin/python check_ab_mm_match.py $addressbase_dir $mastermap_dir
|
# colouringlondon/bin/python check_ab_mm_match.py $addressbase_dir $mastermap_dir
|
||||||
|
|
||||||
#
|
#
|
||||||
# Filter
|
# Filter
|
||||||
# - WHERE descriptiveGroup = '(1:Building)'
|
# - WHERE descriptiveGroup = '(1:Building)'
|
||||||
# - OR toid in addressbase_toids
|
# - OR toid in addressbase_toids
|
||||||
#
|
#
|
||||||
colouringlondon/bin/python filter_mastermap.py $addressbase_dir $mastermap_dir
|
colouringlondon/bin/python filter_mastermap.py $mastermap_dir
|
||||||
|
|
||||||
#
|
#
|
||||||
# Transform to 3857 (web mercator)
|
# Transform to 3857 (web mercator)
|
||||||
|
Loading…
Reference in New Issue
Block a user