colouring-montreal/etl/0_extract_addressbase.py

55 lines
1.6 KiB
Python
Raw Normal View History

2018-05-30 09:31:23 -04:00
"""Extract address points from CSV in *.zip
Relevant CSV columns::
0 - UPRN
1 - TOID (may not match given version of OS MasterMap)
16 - OSGB Easting
17 - OSGB Northing
18 - Latitude
19 - Longitude
"""
2018-05-30 13:35:15 -04:00
import csv
import glob
import io
import os
import sys
from zipfile import ZipFile
def main(source_dir, output_file):
with open(output_file, 'w', encoding='utf8', newline='') as fh:
w = csv.writer(fh)
w.writerow(('UPRN', 'easting', 'northing', 'lat', 'lng'))
for address in read_addresses(source_dir):
w.writerow(address)
def read_addresses(source_dir):
zips = glob.glob(os.path.join(source_dir, '*.zip'))
n = len(zips)
for i, zipname in enumerate(zips):
with ZipFile(zipname) as zipfile:
names = zipfile.namelist()
csvname = names[0]
print("Processing {} ({} of {})".format(csvname, i+1, n))
with zipfile.open(csvname) as csvfile:
fh = io.TextIOWrapper(csvfile)
r = csv.reader(fh)
for line in r:
uprn = line[0]
2018-07-27 10:47:33 -04:00
# toid = line[1] # skip - we do our own matching to geometries
2018-05-30 13:35:15 -04:00
easting = line[16]
northing = line[17]
lat = line[18]
lng = float(line[19])
yield uprn, easting, northing, lat, lng
if __name__ == '__main__':
if len(sys.argv) != 3:
print("Usage: {} ./path/to/source/dir ./path/to/output/file".format(
os.path.basename(__file__)
))
exit()
main(sys.argv[1], sys.argv[2])