Add data extract maintenance script
This commit is contained in:
parent
0dee6ae167
commit
be13cd94f5
51
maintenance/extract_data/export_attributes.sql
Normal file
51
maintenance/extract_data/export_attributes.sql
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
SELECT
|
||||||
|
building_id,
|
||||||
|
ref_toid,
|
||||||
|
ref_osm_id,
|
||||||
|
revision_id,
|
||||||
|
location_name,
|
||||||
|
location_number,
|
||||||
|
location_street,
|
||||||
|
location_line_two,
|
||||||
|
location_town,
|
||||||
|
location_postcode,
|
||||||
|
location_latitude,
|
||||||
|
location_longitude,
|
||||||
|
date_year,
|
||||||
|
date_lower,
|
||||||
|
date_upper,
|
||||||
|
date_source,
|
||||||
|
date_source_detail,
|
||||||
|
facade_year,
|
||||||
|
facade_upper,
|
||||||
|
facade_lower,
|
||||||
|
facade_source,
|
||||||
|
facade_source_detail,
|
||||||
|
size_storeys_attic,
|
||||||
|
size_storeys_core,
|
||||||
|
size_storeys_basement,
|
||||||
|
size_height_apex,
|
||||||
|
size_floor_area_ground,
|
||||||
|
size_floor_area_total,
|
||||||
|
size_width_frontage,
|
||||||
|
likes_total,
|
||||||
|
planning_portal_link,
|
||||||
|
planning_in_conservation_area,
|
||||||
|
planning_conservation_area_name,
|
||||||
|
planning_in_list,
|
||||||
|
planning_list_id,
|
||||||
|
planning_heritage_at_risk_id,
|
||||||
|
planning_world_list_id,
|
||||||
|
planning_in_glher,
|
||||||
|
planning_glher_url,
|
||||||
|
planning_in_apa,
|
||||||
|
planning_apa_name,
|
||||||
|
planning_apa_tier,
|
||||||
|
planning_in_local_list,
|
||||||
|
planning_local_list_url,
|
||||||
|
planning_in_historic_area_assessment,
|
||||||
|
planning_historic_area_assessment_url,
|
||||||
|
planning_list_cat,
|
||||||
|
planning_list_grade,
|
||||||
|
date_link
|
||||||
|
FROM buildings
|
3
maintenance/extract_data/export_edit_history.sql
Normal file
3
maintenance/extract_data/export_edit_history.sql
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
SELECT log_id as revision_id, log_timestamp as revision_timestamp, building_id, forward_patch, reverse_patch, u.username as user
|
||||||
|
FROM logs l
|
||||||
|
JOIN users u ON l.user_id = u.user_id
|
3
maintenance/extract_data/export_uprns.sql
Normal file
3
maintenance/extract_data/export_uprns.sql
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
SELECT building_id, uprn, parent_uprn
|
||||||
|
FROM building_properties
|
||||||
|
WHERE building_id IS NOT NULL
|
112
maintenance/extract_data/extract_data.py
Normal file
112
maintenance/extract_data/extract_data.py
Normal file
@ -0,0 +1,112 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import csv
|
||||||
|
import datetime
|
||||||
|
from io import StringIO
|
||||||
|
import os
|
||||||
|
from pathlib import Path
|
||||||
|
import zipfile
|
||||||
|
|
||||||
|
import psycopg2
|
||||||
|
|
||||||
|
|
||||||
|
def get_connection():
|
||||||
|
return psycopg2.connect(
|
||||||
|
host=os.environ['PGHOST'],
|
||||||
|
dbname=os.environ['PGDATABASE'],
|
||||||
|
user=os.environ['PGUSER'],
|
||||||
|
password=os.environ['PGPASSWORD']
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_with_server_side_cursor(
|
||||||
|
connection,
|
||||||
|
query,
|
||||||
|
on_row,
|
||||||
|
row_batch_size=10000
|
||||||
|
):
|
||||||
|
with connection.cursor('server_side') as cur:
|
||||||
|
cur.itersize = row_batch_size
|
||||||
|
cur.execute(query)
|
||||||
|
|
||||||
|
header_saved = False
|
||||||
|
|
||||||
|
for row in cur:
|
||||||
|
if not header_saved:
|
||||||
|
columns = [c[0] for c in cur.description]
|
||||||
|
on_row(columns)
|
||||||
|
header_saved = True
|
||||||
|
on_row(row)
|
||||||
|
|
||||||
|
|
||||||
|
def db_to_csv(connection, query):
|
||||||
|
string_io = StringIO()
|
||||||
|
writer = csv.writer(string_io)
|
||||||
|
|
||||||
|
fetch_with_server_side_cursor(
|
||||||
|
connection,
|
||||||
|
query,
|
||||||
|
lambda row: writer.writerow(row)
|
||||||
|
)
|
||||||
|
|
||||||
|
return string_io.getvalue()
|
||||||
|
|
||||||
|
|
||||||
|
def get_extract_zip_file_path(current_time):
|
||||||
|
base_dir = Path(os.environ['EXTRACTS_DIRECTORY'])
|
||||||
|
file_name = f"data-extract-{current_time:%Y-%m-%d}.zip"
|
||||||
|
return base_dir / file_name
|
||||||
|
|
||||||
|
|
||||||
|
def add_extract_record_to_database(connection, zip_file_path, extracted_time):
|
||||||
|
with connection.cursor() as cur:
|
||||||
|
truncated_time = extracted_time.replace(second=0, microsecond=0)
|
||||||
|
cur.execute('''INSERT INTO
|
||||||
|
bulk_extracts (extracted_on, extract_path)
|
||||||
|
VALUES
|
||||||
|
(%s, %s)
|
||||||
|
''', (truncated_time, str(zip_file_path)))
|
||||||
|
|
||||||
|
connection.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def read_sql(rel_path_from_script):
|
||||||
|
script_directory = Path(__file__).resolve().parent
|
||||||
|
sql_path = script_directory / rel_path_from_script
|
||||||
|
return sql_path.read_text()
|
||||||
|
|
||||||
|
|
||||||
|
building_attr_query = read_sql('./export_attributes.sql')
|
||||||
|
building_uprn_query = read_sql('./export_uprns.sql')
|
||||||
|
edit_history_query = read_sql('./export_edit_history.sql')
|
||||||
|
|
||||||
|
|
||||||
|
def make_data_extract(current_time, connection, zip_file_path):
|
||||||
|
zip_file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
with zipfile.ZipFile(zip_file_path, mode='w') as newzip:
|
||||||
|
newzip.writestr('building_attributes.csv',
|
||||||
|
db_to_csv(connection, building_attr_query))
|
||||||
|
newzip.writestr('building_uprns.csv',
|
||||||
|
db_to_csv(connection, building_uprn_query))
|
||||||
|
newzip.writestr('edit_history.csv',
|
||||||
|
db_to_csv(connection, edit_history_query))
|
||||||
|
|
||||||
|
# TODO: add README
|
||||||
|
|
||||||
|
add_extract_record_to_database(connection, zip_file_path, current_time)
|
||||||
|
except:
|
||||||
|
zip_file_path.unlink()
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
current_time = datetime.datetime.utcnow()
|
||||||
|
conn = get_connection()
|
||||||
|
zip_file_path = get_extract_zip_file_path(current_time)
|
||||||
|
make_data_extract(current_time, conn, zip_file_path)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
1
maintenance/requirements.txt
Normal file
1
maintenance/requirements.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
psycopg2==2.8.3
|
Loading…
Reference in New Issue
Block a user