diff --git a/etl/filter_mastermap.py b/etl/filter_mastermap.py index de4a02d0..2167a53c 100644 --- a/etl/filter_mastermap.py +++ b/etl/filter_mastermap.py @@ -14,6 +14,7 @@ csv.field_size_limit(sys.maxsize) def main(mastermap_path): mm_paths = sorted(glob.glob(os.path.join(mastermap_path, "*.gml.csv"))) for mm_path in mm_paths: + print(mm_path) filter_mastermap(mm_path) @@ -28,8 +29,11 @@ def filter_mastermap(mm_path): w = csv.DictWriter(output_fh, fieldnames=output_fieldnames) w.writeheader() for line in r: - if 'Building' in line['descriptiveGroup']: - w.writerow(line) + try: + if 'Building' in line['descriptiveGroup']: + w.writerow(line) + except TypeError: # when descriptiveGroup is missing, ignore this Polygon + pass if __name__ == '__main__': diff --git a/tests/test_filter.py b/tests/test_filter.py index aae7842d..16284657 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -7,17 +7,17 @@ def test_filter_mastermap(): """Test that MasterMap CSV can be correctly filtered to include only buildings.""" input_file = "tests/test_mastermap.gml.csv" # Test csv with one building and one non-building output_file = input_file.replace('gml', 'filtered') - filter_mastermap(input_file) # creates test_mastermap.filtered.csv + filter_mastermap(input_file) # creates output_file with open(output_file, newline='') as csvfile: csv_array = list(csv.reader(csvfile)) assert len(csv_array) == 2 # assert that length is 2 because just one row after header -def test_filter_mastermap_missing_type(): +def test_filter_mastermap_missing_descriptivegroup(): """Test that MasterMap CSV can be correctly filtered when the polygon does not have a type specified.""" - input_file = "tests/test_mastermap_missing_type.gml.csv" # Test csv with one building and one non-building + input_file = "tests/test_mastermap_missing_descriptivegroup.gml.csv" # Test csv with one building and one non-building output_file = input_file.replace('gml', 'filtered') - filter_mastermap(input_file) # creates test_mastermap.filtered.csv + filter_mastermap(input_file) # creates output_file with open(output_file, newline='') as csvfile: csv_array = list(csv.reader(csvfile)) - assert len(csv_array) == 2 # assert that length is 2 because just one row after header \ No newline at end of file + assert len(csv_array) == 1 # assert that length is 1 because just header \ No newline at end of file diff --git a/tests/test_mastermap_missing_descriptivegroup.filtered.csv b/tests/test_mastermap_missing_descriptivegroup.filtered.csv new file mode 100644 index 00000000..8de75769 --- /dev/null +++ b/tests/test_mastermap_missing_descriptivegroup.filtered.csv @@ -0,0 +1 @@ +WKT,fid,descriptiveGroup diff --git a/tests/test_mastermap_missing_type.gml.csv b/tests/test_mastermap_missing_descriptivegroup.gml.csv similarity index 100% rename from tests/test_mastermap_missing_type.gml.csv rename to tests/test_mastermap_missing_descriptivegroup.gml.csv