diff --git a/app/map_styles/polygon.xml b/app/map_styles/polygon.xml index 659fc4da..5a9d1b50 100644 --- a/app/map_styles/polygon.xml +++ b/app/map_styles/polygon.xml @@ -35,6 +35,12 @@ + ${ + assets.client.css + ? `` + : '' + } + ${ + process.env.NODE_ENV === 'production' + ? `` + : `` + } + + +
${markup}
+ + +` + ); + } +} + +export default frontendRoute; diff --git a/app/src/helpers.ts b/app/src/helpers.ts new file mode 100644 index 00000000..9e58d179 --- /dev/null +++ b/app/src/helpers.ts @@ -0,0 +1,15 @@ +/** + * A function to be passed to JSON.parse as a JSON reviver, in order to transform date values + * (which don't have a native JSON representation and therefore are serialized as strings) + * back to a JavaScript Date object. + * This works by first checking if a string value complies with a date format + * and then converting to a Date if and only if that's the case + * @param name name of the JSON field to revive + * @param value value of the JSON field to revive + */ +export function dateReviver(name, value) { + if (typeof value === "string" && /^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d.\d\d\dZ$/.test(value)) { + return new Date(value); + } + return value; +} diff --git a/app/src/parse.ts b/app/src/parse.ts index 712cfb59..db2a3493 100644 --- a/app/src/parse.ts +++ b/app/src/parse.ts @@ -23,7 +23,7 @@ function strictParseInt(value) { * @returns {number|undefined} */ function parseBuildingURL(url) { - const re = /\/building\/([^/]+).html/; + const re = /\/(\d+)$/; const matches = re.exec(url); if (matches && matches.length >= 2) { diff --git a/app/src/server.tsx b/app/src/server.tsx index 3d46d6ab..3ed45fe0 100644 --- a/app/src/server.tsx +++ b/app/src/server.tsx @@ -4,40 +4,25 @@ * - entry-point to shared React App * */ -import React from 'react'; -import { StaticRouter } from 'react-router-dom'; import express from 'express'; -import { renderToString } from 'react-dom/server'; -import serialize from 'serialize-javascript'; import session from 'express-session'; import pgConnect from 'connect-pg-simple'; -import App from './frontend/app'; import db from './db'; -import { getUserById } from './api/services/user'; -import { - getBuildingById, - getBuildingLikeById, - getBuildingUPRNsById -} from './api/services/building'; import tileserver from './tiles/tileserver'; import apiServer from './api/api'; -import { parseBuildingURL } from './parse'; +import frontendRoute from './frontendRoute'; // create server const server = express(); -// reference packed assets -const assets = require(process.env.RAZZLE_ASSETS_MANIFEST); - // disable header server.disable('x-powered-by'); // serve static files server.use(express.static(process.env.RAZZLE_PUBLIC_DIR)); - // handle user sessions const pgSession = pgConnect(session); const sess: any = { // TODO: remove any @@ -59,106 +44,8 @@ if (server.get('env') === 'production') { } server.use(session(sess)); -// handle HTML routes (server-side rendered React) -server.get('/*.html', frontendRoute); -server.get('/', frontendRoute); - -function frontendRoute(req, res) { - const context: any = {}; // TODO: remove any - const data: any = {}; // TODO: remove any - context.status = 200; - - const userId = req.session.user_id; - const buildingId = parseBuildingURL(req.url); - const isBuilding = (typeof (buildingId) !== 'undefined'); - if (isBuilding && isNaN(buildingId)) { - context.status = 404; - } - - Promise.all([ - userId ? getUserById(userId) : undefined, - isBuilding ? getBuildingById(buildingId) : undefined, - isBuilding ? getBuildingUPRNsById(buildingId) : undefined, - (isBuilding && userId) ? getBuildingLikeById(buildingId, userId) : false - ]).then(function (values) { - const user = values[0]; - const building = values[1]; - const uprns = values[2]; - const buildingLike = values[3]; - if (isBuilding && typeof (building) === 'undefined') { - context.status = 404 - } - data.user = user; - data.building = building; - data.building_like = buildingLike; - if (data.building != null) { - data.building.uprns = uprns; - } - renderHTML(context, data, req, res) - }).catch(error => { - console.error(error); - data.user = undefined; - data.building = undefined; - data.building_like = undefined; - context.status = 500; - renderHTML(context, data, req, res); - }); -} - -function renderHTML(context, data, req, res) { - const markup = renderToString( - - - - ); - - if (context.url) { - res.redirect(context.url); - } else { - res.status(context.status).send( - ` - - - - - Colouring London - - - ${ - assets.client.css - ? `` - : '' -} - ${ - process.env.NODE_ENV === 'production' - ? `` - : `` -} - - -
${markup}
- - -` - ); - } -} - server.use('/tiles', tileserver); - server.use('/api', apiServer); - -// use the frontend route for anything else - will presumably show the 404 page server.use(frontendRoute); export default server; diff --git a/app/src/tiles/cache.ts b/app/src/tiles/cache.ts deleted file mode 100644 index f018e444..00000000 --- a/app/src/tiles/cache.ts +++ /dev/null @@ -1,171 +0,0 @@ -/** - * Cache tiles (PNG images generated from database) - * - * Frequency of change: - * - base layer tiles change rarely - on changes to underlying geometry table - * - visualisation layer tiles change frequently - with almost any edit to the buildings table - * - * Cost of generation and storage: - * - low zoom tiles are more expensive to render, containing more features from the database - * - high zoom tiles are cheaper to rerender, and changes are more visible - * - there are many more high zoom tiles than low: 4 tiles at zoom level n+1 for each tile - * at zoom level n - * - */ - -// Using node-fs package to patch fs -// for node >10 we could drop this in favour of fs.mkdir (which has recursive option) -// and then use stdlib `import fs from 'fs';` -import fs from 'node-fs'; - -import { getXYZ } from './tile'; - -// Use an environment variable to configure the cache location, somewhere we can read/write to. -const CACHE_PATH = process.env.TILECACHE_PATH - -/** - * Get a tile from the cache - * - * @param {String} tileset - * @param {number} z zoom level - * @param {number} x - * @param {number} y - */ -function get(tileset, z, x, y) { - if (!shouldTryCache(tileset, z)) { - return Promise.reject(`Skip cache get ${tileset}/${z}/${x}/${y}`); - } - const location = cacheLocation(tileset, z, x, y); - return new Promise((resolve, reject) => { - fs.readFile(location.fname, (err, data) => { - if (err) { - reject(err) - } else { - resolve(data) - } - }) - }); -} - -/** - * Put a tile in the cache - * - * @param {Buffer} im image data - * @param {String} tileset - * @param {number} z zoom level - * @param {number} x - * @param {number} y - */ -function put(im, tileset, z, x, y) { - if (!shouldTryCache(tileset, z)) { - return Promise.reject(`Skip cache put ${tileset}/${z}/${x}/${y}`); - } - const location = cacheLocation(tileset, z, x, y); - return new Promise((resolve, reject) => { - fs.writeFile(location.fname, im, 'binary', (err) => { - if (err && err.code === 'ENOENT') { - // recursively create tile directory if it didn't previously exist - fs.mkdir(location.dir, 0o755, true, (err) => { - if (err) { - reject(err); - } else { - // then write the file - fs.writeFile(location.fname, im, 'binary', (err) => { - (err)? reject(err): resolve() - }); - } - }); - } else { - (err)? reject(err): resolve() - } - }); - }) -} - -/** - * Remove a single cached tile - * - * @param {String} tileset - * @param {number} z zoom level - * @param {number} x - * @param {number} y - */ -function remove(tileset, z, x, y) { - const location = cacheLocation(tileset, z, x, y) - return new Promise(resolve => { - fs.unlink(location.fname, (err) => { - if(err){ - // pass - } else { - console.log('Expire cache', tileset, z, x, y) - } - resolve() - }) - }) -} - -/** - * Remove all cached data-visualising tiles which intersect a bbox - * - initially called directly after edits; may be better on a worker process? - * - * @param {String} tileset - * @param {Array} bbox [w, s, e, n] in EPSG:3857 coordinates - */ -function removeAllAtBbox(bbox) { - // magic numbers for min/max zoom - const minZoom = 9; - const maxZoom = 18; - // magic list of tilesets - see tileserver, other cache rules - const tilesets = ['date_year', 'size_storeys', 'location', 'likes', 'conservation_area']; - let tileBounds; - const removePromises = []; - for (let ti = 0; ti < tilesets.length; ti++) { - const tileset = tilesets[ti]; - for (let z = minZoom; z <= maxZoom; z++) { - tileBounds = getXYZ(bbox, z) - for (let x = tileBounds.minX; x <= tileBounds.maxX; x++){ - for (let y = tileBounds.minY; y <= tileBounds.maxY; y++){ - removePromises.push(remove(tileset, z, x, y)) - } - } - } - } - Promise.all(removePromises) -} - -/** - * Cache location for a tile - * - * @param {String} tileset - * @param {number} z zoom level - * @param {number} x - * @param {number} y - * @returns {object} { dir: , fname: } - */ -function cacheLocation(tileset, z, x, y) { - const dir = `${CACHE_PATH}/${tileset}/${z}/${x}` - const fname = `${dir}/${y}.png` - return {dir, fname} -} - -/** - * Check rules for caching tiles - * - * @param {String} tileset - * @param {number} z zoom level - * @returns {boolean} whether to use the cache (or not) - */ -function shouldTryCache(tileset, z) { - if (tileset === 'date_year') { - // cache high zoom because of front page hits - return z <= 16 - } - if (tileset === 'base_light' || tileset === 'base_night') { - // cache for higher zoom levels (unlikely to change) - return z <= 17 - } - // else cache for lower zoom levels (change slowly) - return z <= 13 -} - -export { get, put, remove, removeAllAtBbox }; diff --git a/app/src/tiles/dataDefinition.ts b/app/src/tiles/dataDefinition.ts new file mode 100644 index 00000000..50c3508a --- /dev/null +++ b/app/src/tiles/dataDefinition.ts @@ -0,0 +1,135 @@ +import { strictParseInt } from "../parse"; +import { DataConfig } from "./renderers/datasourceRenderer"; + +const BUILDING_LAYER_DEFINITIONS = { + base_light: `( + SELECT + b.location_number as location_number, + g.geometry_geom + FROM + geometries as g, + buildings as b + WHERE + g.geometry_id = b.geometry_id + ) as outline`, + base_night: `( + SELECT + b.location_number as location_number, + g.geometry_geom + FROM + geometries as g, + buildings as b + WHERE + g.geometry_id = b.geometry_id + ) as outline`, + date_year: `( + SELECT + b.date_year as date_year, + g.geometry_geom + FROM + geometries as g, + buildings as b + WHERE + g.geometry_id = b.geometry_id + ) as outline`, + size_storeys: `( + SELECT + ( + coalesce(b.size_storeys_attic, 0) + + coalesce(b.size_storeys_core, 0) + ) as size_storeys, + g.geometry_geom + FROM + geometries as g, + buildings as b + WHERE + g.geometry_id = b.geometry_id + ) as outline`, + location: `( + SELECT + ( + case when b.location_name is null then 0 else 1 end + + case when b.location_number is null then 0 else 1 end + + case when b.location_street is null then 0 else 1 end + + case when b.location_line_two is null then 0 else 1 end + + case when b.location_town is null then 0 else 1 end + + case when b.location_postcode is null then 0 else 1 end + + case when b.location_latitude is null then 0 else 1 end + + case when b.location_longitude is null then 0 else 1 end + + case when b.ref_toid is null then 0 else 1 end + + case when b.ref_osm_id is null then 0 else 1 end + ) as location_info_count, + g.geometry_geom + FROM + geometries as g, + buildings as b + WHERE + g.geometry_id = b.geometry_id + ) as location`, + likes: `( + SELECT + g.geometry_geom, + b.likes_total as likes + FROM + geometries as g, + buildings as b + WHERE + g.geometry_id = b.geometry_id + AND b.likes_total > 0 + ) as location`, + conservation_area: `( + SELECT + g.geometry_geom + FROM + geometries as g, + buildings as b + WHERE + g.geometry_id = b.geometry_id + AND b.planning_in_conservation_area = true + ) as conservation_area` +}; + +const GEOMETRY_FIELD = 'geometry_geom'; + +function getBuildingsDataConfig(tileset: string, dataParams: any): DataConfig { + const table = BUILDING_LAYER_DEFINITIONS[tileset]; + + if(table == undefined) { + throw new Error('Invalid tileset requested'); + } + + return { + geometry_field: GEOMETRY_FIELD, + table: table + }; +} + +function getHighlightDataConfig(tileset: string, dataParams: any): DataConfig { + let { highlight, base } = dataParams; + + highlight = strictParseInt(highlight); + base = base || 'default'; + + if(isNaN(highlight) || base.match(/^\w+$/) == undefined) { + throw new Error('Bad parameters for highlight layer'); + } + + return { + geometry_field: GEOMETRY_FIELD, + table: `( + SELECT + g.geometry_geom, + '${base}' as base_layer + FROM + geometries as g + WHERE + g.geometry_id = ${highlight} + ) as highlight` + }; +} + +export { + BUILDING_LAYER_DEFINITIONS, + getBuildingsDataConfig, + getHighlightDataConfig +}; diff --git a/app/src/tiles/rendererDefinition.ts b/app/src/tiles/rendererDefinition.ts new file mode 100644 index 00000000..b44f0156 --- /dev/null +++ b/app/src/tiles/rendererDefinition.ts @@ -0,0 +1,78 @@ +import { TileCache } from "./tileCache"; +import { BoundingBox, TileParams } from "./types"; +import { StitchRenderer } from "./renderers/stitchRenderer"; +import { CachedRenderer } from "./renderers/cachedRenderer"; +import { BranchingRenderer } from "./renderers/branchingRenderer"; +import { WindowedRenderer } from "./renderers/windowedRenderer"; +import { BlankRenderer } from "./renderers/blankRenderer"; +import { DatasourceRenderer } from "./renderers/datasourceRenderer"; +import { getBuildingsDataConfig, getHighlightDataConfig, BUILDING_LAYER_DEFINITIONS } from "./dataDefinition"; + +/** + * A list of all tilesets handled by the tile server + */ +const allTilesets = ['highlight', ...Object.keys(BUILDING_LAYER_DEFINITIONS)]; + +const buildingDataRenderer = new DatasourceRenderer(getBuildingsDataConfig); + +const stitchRenderer = new StitchRenderer(undefined); // depends recurrently on cache, so parameter will be set later + +/** + * Zoom level when we switch from rendering direct from database to instead composing tiles + * from the zoom level below - gets similar effect, with much lower load on Postgres + */ +const STITCH_THRESHOLD = 12; + +const renderOrStitchRenderer = new BranchingRenderer( + ({ z }) => z <= STITCH_THRESHOLD, + stitchRenderer, // refer to the prepared stitch renderer + buildingDataRenderer +); + +const tileCache = new TileCache( + process.env.TILECACHE_PATH, + { + tilesets: ['date_year', 'size_storeys', 'location', 'likes', 'conservation_area'], + minZoom: 9, + maxZoom: 18, + scales: [1, 2] + }, + ({ tileset, z }: TileParams) => (tileset === 'date_year' && z <= 16) || + ((tileset === 'base_light' || tileset === 'base_night') && z <= 17) || + z <= 13 +); + +const cachedRenderer = new CachedRenderer( + tileCache, + renderOrStitchRenderer +); + +// set up stitch renderer to use the data renderer with caching +stitchRenderer.tileRenderer = cachedRenderer; + +const highlightRenderer = new DatasourceRenderer(getHighlightDataConfig); + +const highlightOrBuildingRenderer = new BranchingRenderer( + ({ tileset }) => tileset === 'highlight', + highlightRenderer, + cachedRenderer +); + +const blankRenderer = new BlankRenderer(); + +/** + * Hard-code extent so we can short-circuit rendering and return empty/transparent tiles outside the area of interest + * bbox in CRS epsg:3857 in form: [w, s, e, n] + */ +const EXTENT_BBOX: BoundingBox = [-61149.622628, 6667754.851372, 28128.826409, 6744803.375884]; +const mainRenderer = new WindowedRenderer( + EXTENT_BBOX, + highlightOrBuildingRenderer, + blankRenderer +); + +export { + allTilesets, + mainRenderer, + tileCache +}; diff --git a/app/src/tiles/renderers/blankRenderer.ts b/app/src/tiles/renderers/blankRenderer.ts new file mode 100644 index 00000000..2d3187ed --- /dev/null +++ b/app/src/tiles/renderers/blankRenderer.ts @@ -0,0 +1,21 @@ +import { Image } from "mapnik"; +import sharp from 'sharp'; + +import { TileParams, TileRenderer } from "../types"; + +class BlankRenderer implements TileRenderer { + getTile(tileParams: TileParams): Promise { + return sharp({ + create: { + width: 1, + height: 1, + channels: 4, + background: { r: 0, g: 0, b: 0, alpha: 0 } + } + }).png().toBuffer(); + } +} + +export { + BlankRenderer +}; diff --git a/app/src/tiles/renderers/branchingRenderer.ts b/app/src/tiles/renderers/branchingRenderer.ts new file mode 100644 index 00000000..ce7d78da --- /dev/null +++ b/app/src/tiles/renderers/branchingRenderer.ts @@ -0,0 +1,23 @@ +import { Image } from "mapnik"; + +import { TileParams, TileRenderer } from "../types"; + +class BranchingRenderer { + constructor( + public branchTestFn: (tileParams: TileParams) => boolean, + public trueResultTileRenderer: TileRenderer, + public falseResultTileRenderer: TileRenderer + ) {} + + getTile(tileParams: TileParams, dataParams: any): Promise { + if(this.branchTestFn(tileParams)) { + return this.trueResultTileRenderer.getTile(tileParams, dataParams); + } else { + return this.falseResultTileRenderer.getTile(tileParams, dataParams); + } + } +} + +export { + BranchingRenderer +}; diff --git a/app/src/tiles/renderers/cachedRenderer.ts b/app/src/tiles/renderers/cachedRenderer.ts new file mode 100644 index 00000000..964e75d4 --- /dev/null +++ b/app/src/tiles/renderers/cachedRenderer.ts @@ -0,0 +1,32 @@ +import { Image } from "mapnik"; + +import { TileParams, TileRenderer } from "../types"; +import { TileCache } from "../tileCache"; +import { formatParams } from "../util"; + +class CachedRenderer implements TileRenderer { + constructor( + /** Cache to use for tiles */ + public tileCache: TileCache, + + /** Renderer to use when tile hasn't been cached yet */ + public tileRenderer: TileRenderer + ) {} + + async getTile(tileParams: TileParams, dataParams: any): Promise { + try { + const tile = await this.tileCache.get(tileParams); + return tile; + } catch(err) { + const im = await this.tileRenderer.getTile(tileParams, dataParams); + try { + await this.tileCache.put(im, tileParams); + } catch (err) {} + return im; + } + } +} + +export { + CachedRenderer +}; diff --git a/app/src/tiles/renderers/datasourceRenderer.ts b/app/src/tiles/renderers/datasourceRenderer.ts new file mode 100644 index 00000000..674512a5 --- /dev/null +++ b/app/src/tiles/renderers/datasourceRenderer.ts @@ -0,0 +1,105 @@ +import path from 'path'; + +import mapnik from "mapnik"; + +import { TileParams, TileRenderer } from "../types"; +import { getBbox, TILE_SIZE } from "../util"; +import { promisify } from "util"; + +interface DataConfig { + table: string; + geometry_field: string; +} + +const TILE_BUFFER_SIZE = 64; +const PROJ4_STRING = '+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs +over'; + +// connection details from environment variables +const DATASOURCE_CONFIG = { + 'host': process.env.PGHOST, + 'dbname': process.env.PGDATABASE, + 'user': process.env.PGUSER, + 'password': process.env.PGPASSWORD, + 'port': process.env.PGPORT, + 'extent': '-20005048.4188,-9039211.13765,19907487.2779,17096598.5401', + 'srid': 3857, + 'type': 'postgis' +}; + +// register datasource adapters for mapnik database connection +if (mapnik.register_default_input_plugins) { + mapnik.register_default_input_plugins(); +} +// register fonts for text rendering +mapnik.register_default_fonts(); + + +class DatasourceRenderer implements TileRenderer { + constructor(private getTableDefinitionFn: (tileset: string, dataParams: any) => DataConfig) {} + + async getTile({tileset, z, x, y, scale}: TileParams, dataParams: any): Promise { + const bbox = getBbox(z, x, y); + + const tileSize = TILE_SIZE * scale; + let map = new mapnik.Map(tileSize, tileSize, PROJ4_STRING); + map.bufferSize = TILE_BUFFER_SIZE; + const layer = new mapnik.Layer('tile', PROJ4_STRING); + + const dataSourceConfig = this.getTableDefinitionFn(tileset, dataParams); + + const conf = Object.assign(dataSourceConfig, DATASOURCE_CONFIG); + + const postgis = new mapnik.Datasource(conf); + layer.datasource = postgis; + layer.styles = [tileset]; + + const stylePath = path.join(__dirname, '..', 'map_styles', 'polygon.xml'); + + map = await promisify(map.load.bind(map))(stylePath, {strict: true}); + + map.add_layer(layer); + const im = new mapnik.Image(map.width, map.height); + map.extent = bbox; + const rendered = await promisify(map.render.bind(map))(im, {}); + + return await promisify(rendered.encode.bind(rendered))('png'); + } +} + +function promiseHandler(resolve, reject) { + return function(err, result) { + if(err) reject(err); + else resolve(result); + } +} + +/** + * Utility function which promisifies a method of an object and binds it to the object + * This makes it easier to use callback-based object methods in a promise-based way + * @param obj Object containing the target method + * @param methodName Method name to promisify and return + */ +function promisifyMethod(obj: T, methodName: keyof T); +/** + * @param methodGetter accessor function to get the method from the object + */ +function promisifyMethod(obj: T, methodGetter: (o: T) => S); +function promisifyMethod(obj: T, paramTwo: keyof T | ((o: T) => S)) { + let method; + if (typeof paramTwo === 'string') { + method = obj[paramTwo]; + } else if (typeof paramTwo === 'function') { + method = paramTwo(obj); + } + + if (typeof method === 'function') { + return promisify(method.bind(obj)); + } else { + throw new Error(`Cannot promisify non-function property '${paramTwo}'`); + } +} + +export { + DatasourceRenderer, + DataConfig +}; diff --git a/app/src/tiles/renderers/stitchRenderer.ts b/app/src/tiles/renderers/stitchRenderer.ts new file mode 100644 index 00000000..3264fc48 --- /dev/null +++ b/app/src/tiles/renderers/stitchRenderer.ts @@ -0,0 +1,67 @@ +import sharp from 'sharp'; +import { Image } from 'mapnik'; + +import { TileParams, TileRenderer } from "../types"; +import { getBbox, getXYZ, TILE_SIZE, formatParams } from "../util"; + +class StitchRenderer implements TileRenderer { + constructor( + /** Renderer to use when retrieving tiles to be stitched together */ + public tileRenderer: TileRenderer + ) {} + + getTile(tileParams: TileParams, dataParams: any): Promise { + console.log(`Stitching tile ${formatParams(tileParams)}`); + return this.stitchTile(tileParams, dataParams, this.tileRenderer); + } + + private async stitchTile({ tileset, z, x, y, scale }: TileParams, dataParams: any, tileRenderer: TileRenderer) { + const bbox = getBbox(z, x, y); + const nextZ = z + 1; + const nextXY = getXYZ(bbox, nextZ); + const tileSize = TILE_SIZE * scale; + + + const [topLeft, topRight, bottomLeft, bottomRight] = await Promise.all([ + [nextXY.minX, nextXY.minY], + [nextXY.maxX, nextXY.minY], + [nextXY.minX, nextXY.maxY], + [nextXY.maxX, nextXY.maxY] + ].map(([x, y]) => tileRenderer.getTile({ tileset, z: nextZ, x, y, scale }, dataParams))); + + // not possible to chain overlays in a single pipeline, but there may still be a better + // way to create image buffer here (four tiles resize to one at the next zoom level) + // instead of repeatedly creating `sharp` objects, to png, to buffer... + return sharp({ + create: { + width: tileSize * 2, + height: tileSize * 2, + channels: 4, + background: { r: 0, g: 0, b: 0, alpha: 0 } + } + }).overlayWith( + topLeft, { gravity: sharp.gravity.northwest } + ).png().toBuffer().then((buf) => { + return sharp(buf).overlayWith( + topRight, { gravity: sharp.gravity.northeast } + ).png().toBuffer() + }).then((buf) => { + return sharp(buf).overlayWith( + bottomLeft, { gravity: sharp.gravity.southwest } + ).png().toBuffer() + }).then((buf) => { + return sharp(buf).overlayWith( + bottomRight, { gravity: sharp.gravity.southeast } + ).png().toBuffer() + }).then((buf) => { + return sharp(buf + ).resize(tileSize, tileSize, { fit: 'inside' } + ).png().toBuffer() + }) + } + +} + +export { + StitchRenderer +}; diff --git a/app/src/tiles/renderers/windowedRenderer.ts b/app/src/tiles/renderers/windowedRenderer.ts new file mode 100644 index 00000000..b63e901e --- /dev/null +++ b/app/src/tiles/renderers/windowedRenderer.ts @@ -0,0 +1,34 @@ +import { Image } from "mapnik"; + +import { BoundingBox, TileParams, TileRenderer } from "../types"; +import { getXYZ } from "../util"; + +class WindowedRenderer implements TileRenderer { + constructor( + /** Bounding box defining the renderer window */ + public bbox: BoundingBox, + + /** Renderer to use for tile requests inside window */ + public insideWindowRenderer: TileRenderer, + + /** Renderer to use for tile requests outside window */ + public outsideWindowRenderer: TileRenderer + ) {} + + getTile(tileParams: TileParams, dataParams: any): Promise { + if(this.isOutsideExtent(tileParams)) { + return this.outsideWindowRenderer.getTile(tileParams, dataParams); + } else { + return this.insideWindowRenderer.getTile(tileParams, dataParams); + } + } + + private isOutsideExtent({x, y, z}: TileParams) { + const xy = getXYZ(this.bbox, z); + return xy.minY > y || xy.maxY < y || xy.minX > x || xy.maxX < x; + } +} + +export { + WindowedRenderer +}; diff --git a/app/src/tiles/tile.ts b/app/src/tiles/tile.ts deleted file mode 100644 index 56da53f2..00000000 --- a/app/src/tiles/tile.ts +++ /dev/null @@ -1,201 +0,0 @@ -/** - * Render tiles - * - * Use mapnik to render map tiles from the database - * - * Styles have two sources of truth for colour ranges (could generate from single source?) - * - XML style definitions in app/map_styles/polygon.xml - * - front-end legend in app/src/frontend/legend.js - * - * Data is provided by the queries in MAP_STYLE_TABLE_DEFINITIONS below. - * - */ -import path from 'path'; -import mapnik from 'mapnik'; -import SphericalMercator from '@mapbox/sphericalmercator'; - -// connection details from environment variables -const DATASOURCE_CONFIG = { - 'host': process.env.PGHOST, - 'dbname': process.env.PGDATABASE, - 'user': process.env.PGUSER, - 'password': process.env.PGPASSWORD, - 'port': process.env.PGPORT, - 'geometry_field': 'geometry_geom', - 'extent': '-20005048.4188,-9039211.13765,19907487.2779,17096598.5401', - 'srid': 3857, - 'type': 'postgis' -} - -const TILE_SIZE = 256 -const TILE_BUFFER_SIZE = 64 -const PROJ4_STRING = '+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs +over'; - -// Mapnik uses table definitions to query geometries and attributes from PostGIS. -// The queries here are eventually used as subqueries when Mapnik fetches data to render a -// tile - so given a table definition like: -// (SELECT geometry_geom FROM geometries) as def -// Mapnik will wrap it in a bbox query and PostGIS will eventually see something like: -// SELECT AsBinary("geometry") AS geom from -// (SELECT geometry_geom FROM geometries) as def -// WHERE "geometry" && SetSRID('BOX3D(0,1,2,3)'::box3d, 3857) -// see docs: https://github.com/mapnik/mapnik/wiki/OptimizeRenderingWithPostGIS -const MAP_STYLE_TABLE_DEFINITIONS = { - base_light: `( - SELECT - b.location_number as location_number, - g.geometry_geom - FROM - geometries as g, - buildings as b - WHERE - g.geometry_id = b.geometry_id - ) as outline`, - base_night: `( - SELECT - b.location_number as location_number, - g.geometry_geom - FROM - geometries as g, - buildings as b - WHERE - g.geometry_id = b.geometry_id - ) as outline`, - date_year: `( - SELECT - b.date_year as date_year, - g.geometry_geom - FROM - geometries as g, - buildings as b - WHERE - g.geometry_id = b.geometry_id - ) as outline`, - size_storeys: `( - SELECT - ( - coalesce(b.size_storeys_attic, 0) + - coalesce(b.size_storeys_core, 0) - ) as size_storeys, - g.geometry_geom - FROM - geometries as g, - buildings as b - WHERE - g.geometry_id = b.geometry_id - ) as outline`, - location: `( - SELECT - ( - case when b.location_name is null then 0 else 1 end + - case when b.location_number is null then 0 else 1 end + - case when b.location_street is null then 0 else 1 end + - case when b.location_line_two is null then 0 else 1 end + - case when b.location_town is null then 0 else 1 end + - case when b.location_postcode is null then 0 else 1 end + - case when b.location_latitude is null then 0 else 1 end + - case when b.location_longitude is null then 0 else 1 end + - case when b.ref_toid is null then 0 else 1 end + - case when b.ref_osm_id is null then 0 else 1 end - ) as location_info_count, - g.geometry_geom - FROM - geometries as g, - buildings as b - WHERE - g.geometry_id = b.geometry_id - ) as location`, - likes: `( - SELECT - g.geometry_geom, - b.likes_total as likes - FROM - geometries as g, - buildings as b - WHERE - g.geometry_id = b.geometry_id - AND b.likes_total > 0 - ) as location`, - conservation_area: `( - SELECT - g.geometry_geom - FROM - geometries as g, - buildings as b - WHERE - g.geometry_id = b.geometry_id - AND b.planning_in_conservation_area = true - ) as conservation_area` -} - -// register datasource adapters for mapnik database connection -if (mapnik.register_default_input_plugins) { - mapnik.register_default_input_plugins(); -} -// register fonts for text rendering -mapnik.register_default_fonts(); - -const mercator = new SphericalMercator({ - size: TILE_SIZE -}); - -function getBbox(z, x, y) { - return mercator.bbox(x, y, z, false, '900913'); -} - -function getXYZ(bbox, z) { - return mercator.xyz(bbox, z, false, '900913') -} - -function renderTile(tileset, z, x, y, geometryId, cb) { - const bbox = getBbox(z, x, y) - - const map = new mapnik.Map(TILE_SIZE, TILE_SIZE, PROJ4_STRING); - map.bufferSize = TILE_BUFFER_SIZE; - const layer = new mapnik.Layer('tile', PROJ4_STRING); - - const tableDefinition = (tileset === 'highlight') ? - getHighlightTableDefinition(geometryId) - : MAP_STYLE_TABLE_DEFINITIONS[tileset]; - - const conf = Object.assign({ table: tableDefinition }, DATASOURCE_CONFIG) - - var postgis; - try { - postgis = new mapnik.Datasource(conf); - layer.datasource = postgis; - layer.styles = [tileset] - - map.load( - path.join(__dirname, '..', 'map_styles', 'polygon.xml'), - { strict: true }, - function (err, map) { - if (err) {throw err} - - map.add_layer(layer) - const im = new mapnik.Image(map.width, map.height) - map.extent = bbox - map.render(im, {}, (err, rendered) => { - if (err) {throw err} - rendered.encode('png', cb) - }); - } - ) - } catch (err) { - console.error(err); - } -} - -// highlight single geometry, requires geometryId in the table query -function getHighlightTableDefinition(geometryId) { - return `( - SELECT - g.geometry_geom - FROM - geometries as g - WHERE - g.geometry_id = ${geometryId} - ) as highlight` -} - -export { getBbox, getXYZ, renderTile, TILE_SIZE }; diff --git a/app/src/tiles/tileCache.ts b/app/src/tiles/tileCache.ts new file mode 100644 index 00000000..27ab060e --- /dev/null +++ b/app/src/tiles/tileCache.ts @@ -0,0 +1,144 @@ +/** + * Cache tiles (PNG images generated from database) + * + * Frequency of change: + * - base layer tiles change rarely - on changes to underlying geometry table + * - visualisation layer tiles change frequently - with almost any edit to the buildings table + * + * Cost of generation and storage: + * - low zoom tiles are more expensive to render, containing more features from the database + * - high zoom tiles are cheaper to rerender, and changes are more visible + * - there are many more high zoom tiles than low: 4 tiles at zoom level n+1 for each tile + * at zoom level n + * + */ + +// Using node-fs package to patch fs +// for node >10 we could drop this in favour of fs.mkdir (which has recursive option) +// and then use stdlib `import fs from 'fs';` +import fs from 'node-fs'; +import { promisify } from 'util' +import { Image } from 'mapnik'; + +import { TileParams, BoundingBox } from './types'; +import { getXYZ, formatParams } from './util'; + +// TODO: switch to modern node and use built-in fs with promise-based API +const readFile = promisify(fs.readFile), + writeFile = promisify(fs.writeFile), + mkdir = promisify(fs.mkdir), + unlink = promisify(fs.unlink); + +interface CacheLocation { + /** + * Cache file directory path + */ + dir: string; + + /** + * Full path to cache file + */ + fname: string; +} + +interface CacheDomain { + /** + * An array of tileset names to cache + */ + tilesets: string[]; + + /** + * The lowest zoom level to cache + */ + minZoom: number; + + /** + * The highest zoom level to cache + */ + maxZoom: number; + + /** + * An array of scale factors to cache + */ + scales: number[]; +} + +class TileCache { + constructor( + /** Base path in filesystem to store the cache */ + private basePath: string, + /** Domain definition for the cache */ + private cacheDomain: CacheDomain, + /** Function for defining custom caching rules (optional) */ + private shouldCacheFn?: (TileParams) => boolean + ) {} + + async get(tileParams: TileParams): Promise { + if (!this.shouldUseCache(tileParams)) { + throw new Error(`Skip cache get ${formatParams(tileParams)}`); + } + const location = this.cacheLocation(tileParams); + return readFile(location.fname); + } + + async put(im: Image, tileParams: TileParams): Promise { + if (!this.shouldUseCache(tileParams)) { + throw new Error(`Skip cache put ${formatParams(tileParams)}`); + } + + const location = this.cacheLocation(tileParams); + try { + await writeFile(location.fname, im, 'binary'); + } catch(err) { + if(err.code === 'ENOENT') { + await mkdir(location.dir, 0o755, true); + await writeFile(location.fname, im, 'binary'); + } else throw err; + } + } + + async remove(tileParams: TileParams): Promise { + const location = this.cacheLocation(tileParams); + try { + await unlink(location.fname); + } catch(err) {} + console.log(`Expire cache ${formatParams(tileParams)}`); + } + + async removeAllAtBbox(bbox: BoundingBox): Promise { + const removePromises: Promise[] = []; + for (const tileset of this.cacheDomain.tilesets) { + for (let z = this.cacheDomain.minZoom; z <= this.cacheDomain.maxZoom; z++) { + let tileBounds = getXYZ(bbox, z) + for (let x = tileBounds.minX; x <= tileBounds.maxX; x++) { + for (let y = tileBounds.minY; y <= tileBounds.maxY; y++) { + for (const scale of this.cacheDomain.scales) { + removePromises.push(this.remove({tileset, z, x, y, scale})); + } + } + } + } + } + return Promise.all(removePromises); + } + + + private cacheLocation({tileset, z, x, y, scale}: TileParams): CacheLocation { + const dir = `${this.basePath}/${tileset}/${z}/${x}`; + const scaleSuffix = scale === 1 ? '' : `@${scale}x`; + const fname = `${dir}/${y}${scaleSuffix}.png`; + return { dir, fname }; + } + + private shouldUseCache(tileParams: TileParams): boolean { + return this.cacheDomain.tilesets.includes(tileParams.tileset) && + this.cacheDomain.minZoom <= tileParams.z && + this.cacheDomain.maxZoom >= tileParams.z && + this.cacheDomain.scales.includes(tileParams.scale) && + (this.shouldCacheFn == undefined || this.shouldCacheFn(tileParams)); + } +} + +export { + TileCache +}; diff --git a/app/src/tiles/tileserver.ts b/app/src/tiles/tileserver.ts index 803999fd..0eee4a20 100644 --- a/app/src/tiles/tileserver.ts +++ b/app/src/tiles/tileserver.ts @@ -1,215 +1,73 @@ /** * Tileserver * - routes for Express app - * - stitch tiles above a certain zoom level (compositing from sharply-rendered lower zooms) - * - render empty tile outside extent of geographical area of interest - * + * - see rendererDefinition for actual rules of rendering */ import express from 'express'; -import sharp from 'sharp'; -import { get, put } from './cache'; -import { renderTile, getBbox, getXYZ, TILE_SIZE } from './tile'; import { strictParseInt } from '../parse'; +import { TileParams } from './types'; +import { mainRenderer, allTilesets } from './rendererDefinition'; +import asyncController from '../api/routes/asyncController'; -// zoom level when we switch from rendering direct from database to instead composing tiles -// from the zoom level below - gets similar effect, with much lower load on Postgres -const STITCH_THRESHOLD = 12 - -// Hard-code extent so we can short-circuit rendering and return empty/transparent tiles outside the area of interest -// bbox in CRS espg:3957 in form: [w, s, e, n] -const EXTENT_BBOX = [-61149.622628, 6667754.851372, 28128.826409, 6744803.375884] +const handleTileRequest = asyncController(async function (req: express.Request, res: express.Response) { + try { + var tileParams = parseTileParams(req.params); + var dataParams = req.query; + } catch(err) { + console.error(err); + return res.status(400).send({error: err.message}); + } + + try { + const im = await mainRenderer.getTile(tileParams, dataParams); + res.writeHead(200, { 'Content-Type': 'image/png' }); + res.end(im); + } catch(err) { + console.error(err); + res.status(500).send({ error: err }); + } +}); // tiles router const router = express.Router() -router.get('/highlight/:z/:x/:y.png', handleHighlightTileRequest); +router.get('/:tileset/:z/:x/:y(\\d+):scale(@\\dx)?.png', handleTileRequest); -router.get('/base_light/:z/:x/:y.png', (req, res) => { - handleTileRequest('base_light', req, res) -}); +function parseTileParams(params: any): TileParams { + const { tileset, z, x, y, scale } = params; -router.get('/base_night/:z/:x/:y.png', (req, res) => { - handleTileRequest('base_night', req, res) -}); - -router.get('/date_year/:z/:x/:y.png', (req, res) => { - handleTileRequest('date_year', req, res) -}); - -router.get('/size_storeys/:z/:x/:y.png', (req, res) => { - handleTileRequest('size_storeys', req, res) -}); - -router.get('/location/:z/:x/:y.png', (req, res) => { - handleTileRequest('location', req, res) -}); - -router.get('/likes/:z/:x/:y.png', (req, res) => { - handleTileRequest('likes', req, res) -}); - -router.get('/conservation_area/:z/:x/:y.png', (req, res) => { - handleTileRequest('conservation_area', req, res) -}); - -function handleTileRequest(tileset, req, res) { - const { z, x, y } = req.params + if (!allTilesets.includes(tileset)) throw new Error('Invalid value for tileset'); + const intZ = strictParseInt(z); + if (isNaN(intZ)) throw new Error('Invalid value for z'); + const intX = strictParseInt(x); + if (isNaN(intX)) throw new Error('Invalid value for x'); + const intY = strictParseInt(y); + if (isNaN(intY)) throw new Error('Invalid value for y'); - if (isNaN(intX) || isNaN(intY) || isNaN(intZ)) { - console.error('Missing x or y or z') - return { error: 'Bad parameter' } - } - - loadTile(tileset, intZ, intX, intY).then((im) => { - res.writeHead(200, { 'Content-Type': 'image/png' }) - res.end(im) - }).catch((err) => { - console.error(err) - res.status(500).send({ error: err }) - }) -} - -function loadTile(tileset, z, x, y) { - if (outsideExtent(z, x, y)) { - return emptyTile() - } - return get(tileset, z, x, y).then((im) => { - console.log(`From cache ${tileset}/${z}/${x}/${y}`) - return im - }).catch(() => { - return renderOrStitchTile(tileset, z, x, y) - }) -} - -function renderOrStitchTile(tileset, z, x, y) { - if (z <= STITCH_THRESHOLD) { - return StitchTile(tileset, z, x, y).then(im => { - return put(im, tileset, z, x, y).then(() => { - console.log(`Stitch ${tileset}/${z}/${x}/${y}`) - return im - }).catch((err) => { - console.error(err) - return im - }) - }) + let intScale: number; + if (scale === '@2x') { + intScale = 2; + } else if (scale === '@1x' || scale == undefined) { + intScale = 1; } else { - - return new Promise((resolve, reject) => { - renderTile(tileset, z, x, y, undefined, (err, im) => { - if (err) { - reject(err) - return - } - put(im, tileset, z, x, y).then(() => { - console.log(`Render ${tileset}/${z}/${x}/${y}`) - resolve(im) - }).catch((err) => { - console.error(err) - resolve(im) - }) - }) - }) - } -} - -function outsideExtent(z, x, y) { - const xy = getXYZ(EXTENT_BBOX, z); - return xy.minY > y || xy.maxY < y || xy.minX > x || xy.maxX < x; -} - -function emptyTile() { - return sharp({ - create: { - width: 1, - height: 1, - channels: 4, - background: { r: 0, g: 0, b: 0, alpha: 0 } - } - }).png().toBuffer() -} - -function StitchTile(tileset, z, x, y) { - const bbox = getBbox(z, x, y) - const nextZ = z + 1 - const nextXY = getXYZ(bbox, nextZ) - - return Promise.all([ - // recurse down through zoom levels, using cache if available... - loadTile(tileset, nextZ, nextXY.minX, nextXY.minY), - loadTile(tileset, nextZ, nextXY.maxX, nextXY.minY), - loadTile(tileset, nextZ, nextXY.minX, nextXY.maxY), - loadTile(tileset, nextZ, nextXY.maxX, nextXY.maxY) - ]).then(([ - topLeft, - topRight, - bottomLeft, - bottomRight - ]) => { - // not possible to chain overlays in a single pipeline, but there may still be a better - // way to create image buffer here (four tiles resize to one at the next zoom level) - // instead of repeatedly creating `sharp` objects, to png, to buffer... - return sharp({ - create: { - width: TILE_SIZE * 2, - height: TILE_SIZE * 2, - channels: 4, - background: { r: 0, g: 0, b: 0, alpha: 0 } - } - }).overlayWith( - topLeft, { gravity: sharp.gravity.northwest } - ).png().toBuffer().then((buf) => { - return sharp(buf).overlayWith( - topRight, { gravity: sharp.gravity.northeast } - ).png().toBuffer() - }).then((buf) => { - return sharp(buf).overlayWith( - bottomLeft, { gravity: sharp.gravity.southwest } - ).png().toBuffer() - }).then((buf) => { - return sharp(buf).overlayWith( - bottomRight, { gravity: sharp.gravity.southeast } - ).png().toBuffer() - }).then((buf) => { - return sharp(buf - ).resize(TILE_SIZE, TILE_SIZE, { fit: 'inside' } - ).png().toBuffer() - }) - }); -} - -function handleHighlightTileRequest(req, res) { - const { z, x, y } = req.params - const intZ = strictParseInt(z); - const intX = strictParseInt(x); - const intY = strictParseInt(y); - - if (isNaN(intX) || isNaN(intY) || isNaN(intZ)) { - console.error('Missing x or y or z') - return { error: 'Bad parameter' } + throw new Error('Invalid value for scale'); } - // highlight layer uses geometry_id to outline a single building - const { highlight } = req.query - const geometryId = strictParseInt(highlight); - if (isNaN(geometryId)) { - res.status(400).send({ error: 'Bad parameter' }) - return - } - - if (outsideExtent(z, x, y)) { - return emptyTile() - } - - renderTile('highlight', intZ, intX, intY, geometryId, function (err, im) { - if (err) {throw err} - - res.writeHead(200, { 'Content-Type': 'image/png' }) - res.end(im) - }) + return { + tileset, + z: intZ, + x: intX, + y: intY, + scale: intScale + }; } +router.use((req, res) => { + return res.status(404).send('Tile not found'); +}); + export default router; diff --git a/app/src/tiles/types.ts b/app/src/tiles/types.ts new file mode 100644 index 00000000..d2fccbef --- /dev/null +++ b/app/src/tiles/types.ts @@ -0,0 +1,43 @@ +import { Image } from 'mapnik'; + +/** + * Bounding box in the format [w, s, e, n] + */ +type BoundingBox = [number, number, number, number]; + +interface TileParams { + /** + * Name of tileset to which the tile belongs + */ + tileset: string; + + /** + * Zoom level + */ + z: number; + + /** + * X coordinate of tile (corresponds to longitude) + */ + x: number; + + /** + * Y coordinate of tile (corresponds to latitude) + */ + y: number; + + /** + * Resolution scale factor for higher pixel density tiles (e.g. x2) + */ + scale: number; +} + +interface TileRenderer { + getTile(tileParams: TileParams, dataParams: any): Promise +} + +export { + BoundingBox, + TileParams, + TileRenderer +}; diff --git a/app/src/tiles/util.ts b/app/src/tiles/util.ts new file mode 100644 index 00000000..46279cd4 --- /dev/null +++ b/app/src/tiles/util.ts @@ -0,0 +1,28 @@ +import SphericalMercator from '@mapbox/sphericalmercator'; + +import { TileParams } from './types'; + +const TILE_SIZE = 256; + +const mercator = new SphericalMercator({ + size: TILE_SIZE +}); + +function getBbox(z, x, y) { + return mercator.bbox(x, y, z, false, '900913'); +} + +function getXYZ(bbox, z) { + return mercator.xyz(bbox, z, false, '900913') +} + +function formatParams({ tileset, z, x, y, scale }: TileParams): string { + return `${tileset}/${z}/${x}/${y}@${scale}x`; +} + +export { + TILE_SIZE, + getBbox, + getXYZ, + formatParams +}; diff --git a/app/tslint.json b/app/tslint.json index cbefcbaa..a0a32198 100644 --- a/app/tslint.json +++ b/app/tslint.json @@ -1,5 +1,6 @@ { - "rules": { - - } - } \ No newline at end of file + "defaultSeverity": "warning", + "rules": { + "eofline": true + } +} \ No newline at end of file diff --git a/maintenance/extract_data/export_attributes.sql b/maintenance/extract_data/export_attributes.sql new file mode 100644 index 00000000..82bb67ba --- /dev/null +++ b/maintenance/extract_data/export_attributes.sql @@ -0,0 +1,51 @@ +SELECT + building_id, + ref_toid, + ref_osm_id, + revision_id, + location_name, + location_number, + location_street, + location_line_two, + location_town, + location_postcode, + location_latitude, + location_longitude, + date_year, + date_lower, + date_upper, + date_source, + date_source_detail, + facade_year, + facade_upper, + facade_lower, + facade_source, + facade_source_detail, + size_storeys_attic, + size_storeys_core, + size_storeys_basement, + size_height_apex, + size_floor_area_ground, + size_floor_area_total, + size_width_frontage, + likes_total, + planning_portal_link, + planning_in_conservation_area, + planning_conservation_area_name, + planning_in_list, + planning_list_id, + planning_heritage_at_risk_id, + planning_world_list_id, + planning_in_glher, + planning_glher_url, + planning_in_apa, + planning_apa_name, + planning_apa_tier, + planning_in_local_list, + planning_local_list_url, + planning_in_historic_area_assessment, + planning_historic_area_assessment_url, + planning_list_cat, + planning_list_grade, + date_link +FROM buildings \ No newline at end of file diff --git a/maintenance/extract_data/export_edit_history.sql b/maintenance/extract_data/export_edit_history.sql new file mode 100644 index 00000000..d142b3fc --- /dev/null +++ b/maintenance/extract_data/export_edit_history.sql @@ -0,0 +1,3 @@ +SELECT log_id as revision_id, log_timestamp as revision_timestamp, building_id, forward_patch, reverse_patch, u.username as user +FROM logs l +JOIN users u ON l.user_id = u.user_id \ No newline at end of file diff --git a/maintenance/extract_data/export_uprns.sql b/maintenance/extract_data/export_uprns.sql new file mode 100644 index 00000000..cb6378ef --- /dev/null +++ b/maintenance/extract_data/export_uprns.sql @@ -0,0 +1,3 @@ +SELECT building_id, uprn, parent_uprn +FROM building_properties +WHERE building_id IS NOT NULL \ No newline at end of file diff --git a/maintenance/extract_data/extract_data.py b/maintenance/extract_data/extract_data.py new file mode 100644 index 00000000..666abc6a --- /dev/null +++ b/maintenance/extract_data/extract_data.py @@ -0,0 +1,118 @@ +#!/usr/bin/env python3 + +import csv +import datetime +from io import StringIO +import os +from pathlib import Path +import zipfile + +import psycopg2 + + +class ZipFileExistsError(Exception): + pass + +def get_connection(): + return psycopg2.connect( + host=os.environ['PGHOST'], + dbname=os.environ['PGDATABASE'], + user=os.environ['PGUSER'], + password=os.environ['PGPASSWORD'] + ) + + +def fetch_with_server_side_cursor( + connection, + query, + on_row, + row_batch_size=10000 +): + with connection.cursor('server_side') as cur: + cur.itersize = row_batch_size + cur.execute(query) + + header_saved = False + + for row in cur: + if not header_saved: + columns = [c[0] for c in cur.description] + on_row(columns) + header_saved = True + on_row(row) + + +def db_to_csv(connection, query): + string_io = StringIO() + writer = csv.writer(string_io) + + fetch_with_server_side_cursor( + connection, + query, + lambda row: writer.writerow(row) + ) + + return string_io.getvalue() + + +def get_extract_zip_file_path(current_time): + base_dir = Path(os.environ['EXTRACTS_DIRECTORY']) + file_name = f"data-extract-{current_time:%Y-%m-%d-%H_%M_%S}.zip" + return base_dir / file_name + + +def add_extract_record_to_database(connection, zip_file_path, extracted_time): + with connection.cursor() as cur: + truncated_time = extracted_time.replace(second=0, microsecond=0) + cur.execute('''INSERT INTO + bulk_extracts (extracted_on, extract_path) + VALUES + (%s, %s) + ''', (truncated_time, str(zip_file_path))) + + connection.commit() + + +def read_sql(rel_path_from_script): + script_directory = Path(__file__).resolve().parent + sql_path = script_directory / rel_path_from_script + return sql_path.read_text() + + +building_attr_query = read_sql('./export_attributes.sql') +building_uprn_query = read_sql('./export_uprns.sql') +edit_history_query = read_sql('./export_edit_history.sql') + + +def make_data_extract(current_time, connection, zip_file_path): + if zip_file_path.exists(): + raise ZipFileExistsError('Archive file under specified name already exists') + + zip_file_path.parent.mkdir(parents=True, exist_ok=True) + + try: + with zipfile.ZipFile(zip_file_path, mode='w') as newzip: + newzip.writestr('building_attributes.csv', + db_to_csv(connection, building_attr_query)) + newzip.writestr('building_uprns.csv', + db_to_csv(connection, building_uprn_query)) + newzip.writestr('edit_history.csv', + db_to_csv(connection, edit_history_query)) + + # TODO: add README + + add_extract_record_to_database(connection, zip_file_path, current_time) + except: + zip_file_path.unlink() + raise + + +def main(): + current_time = datetime.datetime.utcnow() + conn = get_connection() + zip_file_path = get_extract_zip_file_path(current_time) + make_data_extract(current_time, conn, zip_file_path) + + +if __name__ == '__main__': + main() diff --git a/maintenance/requirements.txt b/maintenance/requirements.txt new file mode 100644 index 00000000..cfe86dd1 --- /dev/null +++ b/maintenance/requirements.txt @@ -0,0 +1 @@ +psycopg2==2.8.3 \ No newline at end of file diff --git a/maintenance/tasks.cron.txt b/maintenance/tasks.cron.txt new file mode 100644 index 00000000..d63e8995 --- /dev/null +++ b/maintenance/tasks.cron.txt @@ -0,0 +1 @@ +0 5 * * * /var/www/colouringlondon/maintenance/extract_data/extract_data.py \ No newline at end of file diff --git a/migrations/013.bulk-extracts.down.sql b/migrations/013.bulk-extracts.down.sql new file mode 100644 index 00000000..ce7a95db --- /dev/null +++ b/migrations/013.bulk-extracts.down.sql @@ -0,0 +1,8 @@ +DROP TABLE IF EXISTS bulk_extracts; + +-- convert all existing timestamp columns to timestamptz assuming UTC +ALTER TABLE logs ALTER log_timestamp TYPE timestamp; +ALTER TABLE users + ALTER registered TYPE timestamp, + ALTER deleted_on TYPE timestamp; +ALTER TABLE user_password_reset_tokens ALTER expires_on TYPE timestamp; \ No newline at end of file diff --git a/migrations/013.bulk-extracts.up.sql b/migrations/013.bulk-extracts.up.sql new file mode 100644 index 00000000..031ecf3e --- /dev/null +++ b/migrations/013.bulk-extracts.up.sql @@ -0,0 +1,12 @@ +CREATE TABLE IF NOT EXISTS bulk_extracts ( + extract_id serial PRIMARY KEY, + extracted_on timestamptz NOT NULL, + extract_path text NOT NULL +); + +-- convert all existing timestamp columns to timestamptz assuming UTC +ALTER TABLE logs ALTER log_timestamp TYPE timestamptz USING log_timestamp AT TIME ZONE 'UTC'; +ALTER TABLE users + ALTER registered TYPE timestamptz USING registered AT TIME ZONE 'UTC', + ALTER deleted_on TYPE timestamptz USING deleted_on AT TIME ZONE 'UTC'; +ALTER TABLE user_password_reset_tokens ALTER expires_on TYPE timestamptz USING expires_on AT TIME ZONE 'UTC'; \ No newline at end of file