mastermap filtering without using addressbase

This commit is contained in:
Ed Chalstrey 2022-03-17 15:43:23 +00:00
parent d822dfaaec
commit 0e35a7cca2
2 changed files with 27 additions and 27 deletions

View File

@ -1,7 +1,6 @@
"""Filter MasterMap to buildings and addressbase-matches """Filter MasterMap to buildings
- WHERE descriptiveGroup includes 'Building' - WHERE descriptiveGroup includes 'Building'
- OR toid in addressbase_toids
""" """
import csv import csv
import glob import glob
@ -13,25 +12,28 @@ from multiprocessing import Pool
csv.field_size_limit(sys.maxsize) csv.field_size_limit(sys.maxsize)
def main(ab_path, mm_path): def main(mastermap_path):
mm_paths = sorted(glob.glob(os.path.join(mm_path, "*.gml.csv"))) mm_paths = sorted(glob.glob(os.path.join(mastermap_path, "*.gml.csv")))
toid_paths = sorted(glob.glob(os.path.join(ab_path, "ab_toids_*.txt"))) # toid_paths = sorted(glob.glob(os.path.join(ab_path, "ab_toids_*.txt")))
try: # try:
assert len(mm_paths) == len(toid_paths) # assert len(mm_paths) == len(toid_paths)
except AssertionError: # except AssertionError:
print(mm_paths) # print(mm_paths)
print(toid_paths) # print(toid_paths)
zipped_paths = zip(mm_paths, toid_paths) # zipped_paths = zip(mm_paths, toid_paths)
# parallel map over tiles # parallel map over tiles
with Pool() as p: # with Pool() as p:
p.starmap(filter, zipped_paths) # p.starmap(filter, zipped_paths)
for mm_path in mm_paths:
filter(mm_path)
def filter(mm_path, toid_path): def filter(mm_path):
with open(toid_path, 'r') as fh: # with open(toid_path, 'r') as fh:
r = csv.reader(fh) # r = csv.reader(fh)
toids = set(line[0] for line in r) # toids = set(line[0] for line in r)
output_path = "{}.filtered.csv".format(str(mm_path).replace(".gml.csv", "")) output_path = "{}.filtered.csv".format(str(mm_path).replace(".gml.csv", ""))
alt_output_path = "{}.filtered_not_building.csv".format(str(mm_path).replace(".gml.csv", "")) alt_output_path = "{}.filtered_not_building.csv".format(str(mm_path).replace(".gml.csv", ""))
@ -48,13 +50,13 @@ def filter(mm_path, toid_path):
if 'Building' in line['descriptiveGroup']: if 'Building' in line['descriptiveGroup']:
w.writerow(line) w.writerow(line)
elif line['fid'] in toids: # elif line['fid'] in toids:
alt_w.writerow(line) # alt_w.writerow(line)
if __name__ == '__main__': if __name__ == '__main__':
if len(sys.argv) != 3: if len(sys.argv) != 2:
print("Usage: filter_mastermap.py ./path/to/addressbase/dir ./path/to/mastermap/dir") print("Usage: filter_mastermap.py ./path/to/mastermap/dir")
exit(-1) exit(-1)
main(sys.argv[1], sys.argv[2]) main(sys.argv[1])

View File

@ -3,23 +3,21 @@
# #
# Filter and transform for loading # Filter and transform for loading
# #
: ${1?"Usage: $0 ./path/to/addressbase/dir ./path/to/mastermap/dir"} : ${1?"Usage: $0 ./path/to/mastermap/dir"}
# : ${2?"Usage: $0 ./path/to/addressbase/dir ./path/to/mastermap/dir"}
# addressbase_dir=$1
mastermap_dir=$1 mastermap_dir=$1
# #
# Check which TOIDs are matched against UPRNs # Check which TOIDs are matched against UPRNs
# #
colouringlondon/bin/python check_ab_mm_match.py $addressbase_dir $mastermap_dir # colouringlondon/bin/python check_ab_mm_match.py $addressbase_dir $mastermap_dir
# #
# Filter # Filter
# - WHERE descriptiveGroup = '(1:Building)' # - WHERE descriptiveGroup = '(1:Building)'
# - OR toid in addressbase_toids # - OR toid in addressbase_toids
# #
colouringlondon/bin/python filter_mastermap.py $addressbase_dir $mastermap_dir colouringlondon/bin/python filter_mastermap.py $mastermap_dir
# #
# Transform to 3857 (web mercator) # Transform to 3857 (web mercator)