Merge pull request #441 from tomalrussell/fix/etl_load_csv

Fix load_csv script
This commit is contained in:
Tom Russell 2019-10-02 12:32:03 +01:00 committed by GitHub
commit 2bb0bde3eb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -1,5 +1,13 @@
"""Join csv data to buildings
Example usage (replace URL with test/staging/localhost as necessary, API key with real key for
the appropriate site):
python load_csv.py \
https://colouring.london \
a0a00000-0a00-0aaa-a0a0-0000aaaa0000 \
data.csv
This script uses the HTTP API, and can process CSV files which identify buildings by id, TOID,
UPRN.
@ -32,33 +40,39 @@ def main(base_url, api_key, source_file):
"""Read from file, update buildings
"""
with open(source_file, 'r') as source:
reader = csv.reader(source)
reader = csv.DictReader(source)
for line in reader:
building_id = find_building(line, base_url)
if building_id is None:
continue
update_building(building_id, line, api_key, base_url)
response_code, response_data = update_building(building_id, line, api_key, base_url)
if response_code != 200:
print('ERROR', building_id, response_code, response_data)
def update_building(building_id, data, api_key, base_url):
"""Save data to a building
"""
r = requests.post(
"{}/buildings/{}.json?api_key={}".format(base_url, building_id, api_key),
"{}/api/buildings/{}.json".format(base_url, building_id),
params={'api_key': api_key},
json=data
)
return r.status_code, r.json()
def find_building(data, base_url):
if 'toid' in data:
building_id = find_by_reference(base_url, 'toid', data['toid'])
if building_id is not None:
print("match_by_toid", data['toid'], building_id)
return building_id
if 'uprn' in data:
building_id = find_by_reference(base_url, 'uprn', data['uprn'])
if building_id is not None:
print("match_by_uprn", data['uprn'], building_id)
return building_id
@ -69,13 +83,13 @@ def find_building(data, base_url):
def find_by_reference(base_url, ref_key, ref_id):
"""Find building_id by TOID or UPRN
"""
r = requests.get(base_url + "/buildings/reference", params={
r = requests.get("{}/api/buildings/reference".format(base_url), params={
'key': ref_key,
'id': ref_id
})
buildings = r.json()
if buildings and len(buildings) == 1:
if buildings and 'error' not in buildings and len(buildings) == 1:
building_id = buildings[0]['building_id']
else:
building_id = None