colouring-montreal/etl/0_extract_addressbase.py
2018-07-27 15:48:00 +01:00

55 lines
1.6 KiB
Python

"""Extract address points from CSV in *.zip
Relevant CSV columns::
0 - UPRN
1 - TOID (may not match given version of OS MasterMap)
16 - OSGB Easting
17 - OSGB Northing
18 - Latitude
19 - Longitude
"""
import csv
import glob
import io
import os
import sys
from zipfile import ZipFile
def main(source_dir, output_file):
with open(output_file, 'w', encoding='utf8', newline='') as fh:
w = csv.writer(fh)
w.writerow(('UPRN', 'easting', 'northing', 'lat', 'lng'))
for address in read_addresses(source_dir):
w.writerow(address)
def read_addresses(source_dir):
zips = glob.glob(os.path.join(source_dir, '*.zip'))
n = len(zips)
for i, zipname in enumerate(zips):
with ZipFile(zipname) as zipfile:
names = zipfile.namelist()
csvname = names[0]
print("Processing {} ({} of {})".format(csvname, i+1, n))
with zipfile.open(csvname) as csvfile:
fh = io.TextIOWrapper(csvfile)
r = csv.reader(fh)
for line in r:
uprn = line[0]
# toid = line[1] # skip - we do our own matching to geometries
easting = line[16]
northing = line[17]
lat = line[18]
lng = float(line[19])
yield uprn, easting, northing, lat, lng
if __name__ == '__main__':
if len(sys.argv) != 3:
print("Usage: {} ./path/to/source/dir ./path/to/output/file".format(
os.path.basename(__file__)
))
exit()
main(sys.argv[1], sys.argv[2])