diff --git a/etl/__init__.py b/etl/__init__.py new file mode 100644 index 00000000..a9f46b58 --- /dev/null +++ b/etl/__init__.py @@ -0,0 +1 @@ +from .filter_mastermap import filter_mastermap \ No newline at end of file diff --git a/etl/filter_mastermap.py b/etl/filter_mastermap.py index 76c1d559..be68f756 100644 --- a/etl/filter_mastermap.py +++ b/etl/filter_mastermap.py @@ -14,20 +14,19 @@ csv.field_size_limit(sys.maxsize) def main(mastermap_path): mm_paths = sorted(glob.glob(os.path.join(mastermap_path, "*.gml.csv"))) for mm_path in mm_paths: - filter(mm_path) + filter_mastermap(mm_path) -def filter(mm_path) +def filter_mastermap(mm_path) output_path = "{}.filtered.csv".format(str(mm_path).replace(".gml.csv", "")) output_fieldnames = ('WKT', 'fid', 'descriptiveGroup') # Open the input csv with all polygons, buildings and others with open(mm_path, 'r') as fh: r = csv.DictReader(fh) - # Open a new buildings csv + # Open a new output csv that will contain just buildings with open(output_path, 'w') as output_fh: w = csv.DictWriter(output_fh, fieldnames=output_fieldnames) - w.writeheader() - # Then write to the output csv buildings only + w.writeheader() for line in r: if 'Building' in line['descriptiveGroup']: w.writerow(line) diff --git a/tests/test_filter.py b/tests/test_filter.py new file mode 100644 index 00000000..cf2a2083 --- /dev/null +++ b/tests/test_filter.py @@ -0,0 +1,8 @@ +import pytest +from etl import filter_mastermap + +def test_filter_mastermap(): + """Test that MasterMap CSV can be correctly filtered to include only buildings.""" + input_file = "" + expected_output = "" + assert filter_mastermap(input_file) == expected_output \ No newline at end of file