Extract OS AddressBase to CSV

This commit is contained in:
Tom Russell 2018-05-30 18:35:15 +01:00
parent 7c97aba950
commit dd72ccd108

View File

@ -8,3 +8,47 @@ Relevant CSV columns::
18 - Latitude 18 - Latitude
19 - Longitude 19 - Longitude
""" """
import csv
import glob
import io
import os
import sys
from zipfile import ZipFile
def main(source_dir, output_file):
with open(output_file, 'w', encoding='utf8', newline='') as fh:
w = csv.writer(fh)
w.writerow(('UPRN', 'easting', 'northing', 'lat', 'lng'))
for address in read_addresses(source_dir):
w.writerow(address)
def read_addresses(source_dir):
zips = glob.glob(os.path.join(source_dir, '*.zip'))
n = len(zips)
for i, zipname in enumerate(zips):
with ZipFile(zipname) as zipfile:
names = zipfile.namelist()
csvname = names[0]
print("Processing {} ({} of {})".format(csvname, i+1, n))
with zipfile.open(csvname) as csvfile:
fh = io.TextIOWrapper(csvfile)
r = csv.reader(fh)
for line in r:
uprn = line[0]
toid = line[1]
easting = line[16]
northing = line[17]
lat = line[18]
lng = float(line[19])
yield uprn, easting, northing, lat, lng
if __name__ == '__main__':
if len(sys.argv) != 3:
print("Usage: {} ./path/to/source/dir ./path/to/output/file".format(
os.path.basename(__file__)
))
exit()
main(sys.argv[1], sys.argv[2])