Allow specifying JSON columns for CSV bulk import
This commit is contained in:
parent
26ca7f8873
commit
82a50d77d6
@ -8,6 +8,17 @@ the appropriate site):
|
|||||||
a0a00000-0a00-0aaa-a0a0-0000aaaa0000 \
|
a0a00000-0a00-0aaa-a0a0-0000aaaa0000 \
|
||||||
data.csv
|
data.csv
|
||||||
|
|
||||||
|
The optional last argument specifies which columns should be parsed as JSON values.
|
||||||
|
This is required for example for columns of array type to be processed by the API correctly.
|
||||||
|
Otherwise, those values would be treated as a string and not an array.
|
||||||
|
|
||||||
|
An example usage with the json_columns argument (other values in the example are placeholders):
|
||||||
|
python load_csv.py \
|
||||||
|
https://colouring.london \
|
||||||
|
a0a00000-0a00-0aaa-a0a0-0000aaaa0000 \
|
||||||
|
data.csv \
|
||||||
|
current_landuse_group,date_url
|
||||||
|
|
||||||
This script uses the HTTP API, and can process CSV files which identify buildings by id, TOID,
|
This script uses the HTTP API, and can process CSV files which identify buildings by id, TOID,
|
||||||
UPRN.
|
UPRN.
|
||||||
|
|
||||||
@ -23,6 +34,7 @@ The process:
|
|||||||
- else lookup by toid
|
- else lookup by toid
|
||||||
- else lookup by uprn
|
- else lookup by uprn
|
||||||
- else locate building by representative point
|
- else locate building by representative point
|
||||||
|
- (optional) parse JSON column values
|
||||||
- update building
|
- update building
|
||||||
|
|
||||||
TODO extend to allow latitude,longitude or easting,northing columns and lookup by location.
|
TODO extend to allow latitude,longitude or easting,northing columns and lookup by location.
|
||||||
@ -36,13 +48,14 @@ import sys
|
|||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
|
||||||
def main(base_url, api_key, source_file):
|
def main(base_url, api_key, source_file, json_columns):
|
||||||
"""Read from file, update buildings
|
"""Read from file, update buildings
|
||||||
"""
|
"""
|
||||||
with open(source_file, 'r') as source:
|
with open(source_file, 'r') as source:
|
||||||
reader = csv.DictReader(source)
|
reader = csv.DictReader(source)
|
||||||
for line in reader:
|
for line in reader:
|
||||||
building_id = find_building(line, base_url)
|
building_id = find_building(line, base_url)
|
||||||
|
line = parse_json_columns(line, json_columns)
|
||||||
|
|
||||||
if building_id is None:
|
if building_id is None:
|
||||||
continue
|
continue
|
||||||
@ -101,15 +114,22 @@ def find_by_reference(base_url, ref_key, ref_id):
|
|||||||
|
|
||||||
return building_id
|
return building_id
|
||||||
|
|
||||||
|
def parse_json_columns(row, json_columns):
|
||||||
|
for col in json_columns:
|
||||||
|
row[col] = json.loads(row[col])
|
||||||
|
|
||||||
|
return row
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
try:
|
try:
|
||||||
url, api_key, filename = sys.argv[1], sys.argv[2], sys.argv[3]
|
url, api_key, filename = sys.argv[1], sys.argv[2], sys.argv[3]
|
||||||
except IndexError:
|
except IndexError:
|
||||||
print(
|
print(
|
||||||
"Usage: {} <URL> <api_key> ./path/to/data.csv".format(
|
"Usage: {} <URL> <api_key> ./path/to/data.csv [<json_columns>]".format(
|
||||||
os.path.basename(__file__)
|
os.path.basename(__file__)
|
||||||
))
|
))
|
||||||
exit()
|
exit()
|
||||||
|
|
||||||
main(url, api_key, filename)
|
json_columns = sys.argv[4].split(',') if len(sys.argv) > 4 else []
|
||||||
|
|
||||||
|
main(url, api_key, filename, json_columns)
|
||||||
|
Loading…
Reference in New Issue
Block a user