Add data extract maintenance script
This commit is contained in:
parent
0dee6ae167
commit
be13cd94f5
51
maintenance/extract_data/export_attributes.sql
Normal file
51
maintenance/extract_data/export_attributes.sql
Normal file
@ -0,0 +1,51 @@
|
||||
SELECT
|
||||
building_id,
|
||||
ref_toid,
|
||||
ref_osm_id,
|
||||
revision_id,
|
||||
location_name,
|
||||
location_number,
|
||||
location_street,
|
||||
location_line_two,
|
||||
location_town,
|
||||
location_postcode,
|
||||
location_latitude,
|
||||
location_longitude,
|
||||
date_year,
|
||||
date_lower,
|
||||
date_upper,
|
||||
date_source,
|
||||
date_source_detail,
|
||||
facade_year,
|
||||
facade_upper,
|
||||
facade_lower,
|
||||
facade_source,
|
||||
facade_source_detail,
|
||||
size_storeys_attic,
|
||||
size_storeys_core,
|
||||
size_storeys_basement,
|
||||
size_height_apex,
|
||||
size_floor_area_ground,
|
||||
size_floor_area_total,
|
||||
size_width_frontage,
|
||||
likes_total,
|
||||
planning_portal_link,
|
||||
planning_in_conservation_area,
|
||||
planning_conservation_area_name,
|
||||
planning_in_list,
|
||||
planning_list_id,
|
||||
planning_heritage_at_risk_id,
|
||||
planning_world_list_id,
|
||||
planning_in_glher,
|
||||
planning_glher_url,
|
||||
planning_in_apa,
|
||||
planning_apa_name,
|
||||
planning_apa_tier,
|
||||
planning_in_local_list,
|
||||
planning_local_list_url,
|
||||
planning_in_historic_area_assessment,
|
||||
planning_historic_area_assessment_url,
|
||||
planning_list_cat,
|
||||
planning_list_grade,
|
||||
date_link
|
||||
FROM buildings
|
3
maintenance/extract_data/export_edit_history.sql
Normal file
3
maintenance/extract_data/export_edit_history.sql
Normal file
@ -0,0 +1,3 @@
|
||||
SELECT log_id as revision_id, log_timestamp as revision_timestamp, building_id, forward_patch, reverse_patch, u.username as user
|
||||
FROM logs l
|
||||
JOIN users u ON l.user_id = u.user_id
|
3
maintenance/extract_data/export_uprns.sql
Normal file
3
maintenance/extract_data/export_uprns.sql
Normal file
@ -0,0 +1,3 @@
|
||||
SELECT building_id, uprn, parent_uprn
|
||||
FROM building_properties
|
||||
WHERE building_id IS NOT NULL
|
112
maintenance/extract_data/extract_data.py
Normal file
112
maintenance/extract_data/extract_data.py
Normal file
@ -0,0 +1,112 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import csv
|
||||
import datetime
|
||||
from io import StringIO
|
||||
import os
|
||||
from pathlib import Path
|
||||
import zipfile
|
||||
|
||||
import psycopg2
|
||||
|
||||
|
||||
def get_connection():
|
||||
return psycopg2.connect(
|
||||
host=os.environ['PGHOST'],
|
||||
dbname=os.environ['PGDATABASE'],
|
||||
user=os.environ['PGUSER'],
|
||||
password=os.environ['PGPASSWORD']
|
||||
)
|
||||
|
||||
|
||||
def fetch_with_server_side_cursor(
|
||||
connection,
|
||||
query,
|
||||
on_row,
|
||||
row_batch_size=10000
|
||||
):
|
||||
with connection.cursor('server_side') as cur:
|
||||
cur.itersize = row_batch_size
|
||||
cur.execute(query)
|
||||
|
||||
header_saved = False
|
||||
|
||||
for row in cur:
|
||||
if not header_saved:
|
||||
columns = [c[0] for c in cur.description]
|
||||
on_row(columns)
|
||||
header_saved = True
|
||||
on_row(row)
|
||||
|
||||
|
||||
def db_to_csv(connection, query):
|
||||
string_io = StringIO()
|
||||
writer = csv.writer(string_io)
|
||||
|
||||
fetch_with_server_side_cursor(
|
||||
connection,
|
||||
query,
|
||||
lambda row: writer.writerow(row)
|
||||
)
|
||||
|
||||
return string_io.getvalue()
|
||||
|
||||
|
||||
def get_extract_zip_file_path(current_time):
|
||||
base_dir = Path(os.environ['EXTRACTS_DIRECTORY'])
|
||||
file_name = f"data-extract-{current_time:%Y-%m-%d}.zip"
|
||||
return base_dir / file_name
|
||||
|
||||
|
||||
def add_extract_record_to_database(connection, zip_file_path, extracted_time):
|
||||
with connection.cursor() as cur:
|
||||
truncated_time = extracted_time.replace(second=0, microsecond=0)
|
||||
cur.execute('''INSERT INTO
|
||||
bulk_extracts (extracted_on, extract_path)
|
||||
VALUES
|
||||
(%s, %s)
|
||||
''', (truncated_time, str(zip_file_path)))
|
||||
|
||||
connection.commit()
|
||||
|
||||
|
||||
def read_sql(rel_path_from_script):
|
||||
script_directory = Path(__file__).resolve().parent
|
||||
sql_path = script_directory / rel_path_from_script
|
||||
return sql_path.read_text()
|
||||
|
||||
|
||||
building_attr_query = read_sql('./export_attributes.sql')
|
||||
building_uprn_query = read_sql('./export_uprns.sql')
|
||||
edit_history_query = read_sql('./export_edit_history.sql')
|
||||
|
||||
|
||||
def make_data_extract(current_time, connection, zip_file_path):
|
||||
zip_file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
with zipfile.ZipFile(zip_file_path, mode='w') as newzip:
|
||||
newzip.writestr('building_attributes.csv',
|
||||
db_to_csv(connection, building_attr_query))
|
||||
newzip.writestr('building_uprns.csv',
|
||||
db_to_csv(connection, building_uprn_query))
|
||||
newzip.writestr('edit_history.csv',
|
||||
db_to_csv(connection, edit_history_query))
|
||||
|
||||
# TODO: add README
|
||||
|
||||
add_extract_record_to_database(connection, zip_file_path, current_time)
|
||||
except:
|
||||
zip_file_path.unlink()
|
||||
raise
|
||||
|
||||
|
||||
def main():
|
||||
current_time = datetime.datetime.utcnow()
|
||||
conn = get_connection()
|
||||
zip_file_path = get_extract_zip_file_path(current_time)
|
||||
make_data_extract(current_time, conn, zip_file_path)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
1
maintenance/requirements.txt
Normal file
1
maintenance/requirements.txt
Normal file
@ -0,0 +1 @@
|
||||
psycopg2==2.8.3
|
Loading…
Reference in New Issue
Block a user