diff --git a/building_cleanup_workflow.py b/building_cleanup_workflow.py new file mode 100644 index 0000000..0ebab9b --- /dev/null +++ b/building_cleanup_workflow.py @@ -0,0 +1,144 @@ +""" +handle_mtl_ds_workflow module +The workflow of cleaning and updating the Montreal Buildings dataset. +Project Developer: Alireza Adli alireza.adli@concordia.ca +""" + +from scrub_layer_class import * +from input_paths_and_layers import * + +# Making folders for the output data layers +create_output_folders(output_paths, output_paths_dir) + +# Initialize the input data layers +nrcan = ScrubLayer(qgis_path, input_paths['NRCan'], 'NRCan') +geo_index = ScrubLayer(qgis_path, input_paths['GeoIndex'], 'GeoIndex') +property_assessment = \ + ScrubLayer( + qgis_path, input_paths['Property Assessment'], 'Property Assessment') +montreal_boundary = \ + ScrubLayer(qgis_path, input_paths['Montreal Boundary'], 'Montreal Boundary') + +# Processing the NRCan layer includes fixing its geometries +print('Processing the NRCan layer') +print(nrcan) +nrcan.create_spatial_index() +nrcan.fix_geometries(output_paths['Fixed NRCan']) + +# Defining a new layer for the fixed NRCan +nrcan_fixed = \ + ScrubLayer(qgis_path, output_paths['Fixed NRCan'], 'Fixed NRCan') +nrcan_fixed.create_spatial_index() +print(nrcan_fixed) + +# Processing the GeoIndex layer includes fixing its geometries and +# clipping it based on the Montreal boundary data layer +print('Processing the GeoIndex layer') +print(geo_index) +geo_index.create_spatial_index() +geo_index.fix_geometries(output_paths['Fixed GeoIndex']) + +# Defining a new layer for the fixed GeoIndex +geo_index_fixed = ScrubLayer(qgis_path, output_paths['Fixed GeoIndex'], + 'Fixed GeoIndex') +geo_index_fixed.create_spatial_index() +print(geo_index_fixed) +geo_index_fixed.clip_layer(montreal_boundary.layer_path, + output_paths['Clipped Fixed GeoIndex']) +geo_index_clipped = \ + ScrubLayer(qgis_path, + output_paths['Clipped Fixed GeoIndex'], 'Clipped Fixed GeoIndex') +geo_index_clipped.create_spatial_index() +print(geo_index_clipped) + +# Processing the Property Assessment layer includes a pairwise clip, and +# two spatial join with NRCan and GeoIndex layers, respectively + +print(property_assessment) +property_assessment.create_spatial_index() + +# For the pairwise clip, number of overlaying layers can be chosen +# (meaning number of splits for NRCan layer). This improves the performance +# where may increase duplicates. This has been done because using the NRCan +# layer as a whole causes crashing the clipping process. + +# First we split the overlaying layers into our desired number +nrcan_fixed.split_layer(120, output_paths['Splitted NRCans']) + +# Clipping have to be done in +clipping_property_assessment = """ +from input_paths_and_layers import * + +property_assessment.clip_by_multiple( + 120, output_paths['Splitted NRCans'], + output_paths['Pairwise Clipped Property Assessment Partitions'])""" + +exec(clipping_property_assessment) + +property_assessment.merge_layers( + output_paths['Pairwise Clipped Property Assessment Partitions'], + output_paths['Pairwise Clipped Merged Property Assessment']) + +clipped_property_assessment = ScrubLayer( + qgis_path, + output_paths['Pairwise Clipped Merged Property Assessment'], + 'Clipped Property Assessment') + +print(clipped_property_assessment) +clipped_property_assessment.create_spatial_index() + +clipped_property_assessment.spatial_join( + nrcan_fixed.layer_path, + output_paths['Property Assessment and NRCan']) + +property_assessment_nrcan = ScrubLayer( + qgis_path, + output_paths['Property Assessment and NRCan'], + 'Property Assessment and NRCan') + +print(property_assessment_nrcan) +property_assessment_nrcan.create_spatial_index() + +property_assessment_nrcan.spatial_join( + geo_index_clipped, + output_paths['Property Assessment and NRCan and GeoIndex']) + +property_assessment_nrcan_geo = ScrubLayer( + qgis_path, + output_paths['Property Assessment and NRCan and GeoIndex'], + 'Property Assessment and NRCan and GeoIndex') + +print(property_assessment_nrcan_geo) +property_assessment_nrcan_geo.create_spatial_index() + +property_assessment_nrcan_geo.delete_duplicates( + output_paths['Deleted Duplicates Layer']) + +deleted_dups_layer = ScrubLayer( + qgis_path, + output_paths['Deleted Duplicates Layer'], + 'Deleted Duplicates Layer') + +print(deleted_dups_layer) +property_assessment_nrcan_geo.create_spatial_index() + +property_assessment_nrcan_geo.multipart_to_singleparts( + output_paths['Single Parts Layer']) + +single_parts_layer = ScrubLayer( + qgis_path, + output_paths['Single Parts Layer'], + 'Single Parts Layer') + +print(single_parts_layer) +single_parts_layer.create_spatial_index() + +# Add an area field +single_parts_layer.add_field('Area') +single_parts_layer.assign_area('Area') +dismissive_area = 15 +single_parts_layer.conditional_delete_record('Area', '<', dismissive_area) + +print(f'After removing buildings with less than {dismissive_area} squaremeter area:') +print(single_parts_layer) + diff --git a/config.py b/config.py new file mode 100644 index 0000000..ef5226f --- /dev/null +++ b/config.py @@ -0,0 +1,42 @@ +""" +input_paths_and_layers module +Project Developer: Alireza Adli alireza.adli@concordia.ca +""" + +# Application's path +qgis_path = 'C:/Program Files/QGIS 3.34.1/apps/qgis' + +# Gathering input data layers paths +input_paths = { + 'NRCan': + 'C:/Users/a_adli/PycharmProjects/hydroquebec_archetype_gispy/' + 'data/input_data/nrcan/Autobuilding_QC_VILLE_MONTREAL.shp', + 'GeoIndex': + 'C:/Users/a_adli/PycharmProjects/hydroquebec_archetype_gispy/' + 'data/input_data/Geoindex_81670/mamh_usage_predo_2022_s_poly.shp', + 'Property Assessment': + 'C:/Users/a_adli/PycharmProjects/hydroquebec_archetype_gispy/' + 'data/input_data/property_assessment/uniteevaluationfonciere.shp', + 'Montreal Boundary': + 'C:/Users/a_adli/PycharmProjects/hydroquebec_archetype_gispy/' + 'data/input_data/montreal_boundary/Montreal_boundary.shp' +} + +# Defining a directory for all the output data layers +output_paths_dir = \ + 'C:/Users/a_adli/PycharmProjects/hydroquebec_archetype_gispy/' \ + 'data/gisoo_workflow_output' + +# Preparing a bedding for output data layers paths +output_paths = { + 'Fixed NRCan': '', + 'Fixed GeoIndex': '', + 'Clipped Fixed GeoIndex': '', + 'Splitted NRCans': '', + 'Pairwise Clipped Property Assessment Partitions': '', + 'Pairwise Clipped Merged Property Assessment': '', + 'Property Assessment and NRCan': '', + 'Property Assessment and NRCan and GeoIndex': '', + 'Deleted Duplicates Layer': '', + 'Single Parts Layer': '' +} diff --git a/helpers.py b/helpers.py new file mode 100644 index 0000000..416dabd --- /dev/null +++ b/helpers.py @@ -0,0 +1,75 @@ +""" +basic_functions module +A number of functionalities that help the project +but cannot be a part of the PyQGIS tool. +Project Developer: Alireza Adli alireza.adli@concordia.ca +""" + +import os +import glob +import processing + +from qgis.core import QgsApplication +from qgis.analysis import QgsNativeAlgorithms + + +def find_shp_files(root_folder): + shp_files = [] + # Sort folders alphabetically + for foldername, _, _ in sorted(os.walk(root_folder)): + for filename in sorted(glob.glob(os.path.join(foldername, '*.shp'))): + new_file_name = filename.replace('\\', r'/') + shp_files.append(new_file_name) + return shp_files + + +def find_las_files(root_folder): + las_files = [] + # Sort folders alphabetically + for foldername, _, _ in sorted(os.walk(root_folder)): + for filename in sorted(glob.glob(os.path.join(foldername, '*.las'))): + new_file_name = filename.replace('\\', r'/') + las_files.append(new_file_name) + return las_files + + +def create_folders(directory, num_folders): + """ + Create a specified number of folders in the given directory. + + Args: + - directory (str): The directory where folders will be created. + - num_folders (int): The number of folders to create. + """ + # Check if the directory exists, if not, create it + if not os.path.exists(directory): + os.makedirs(directory) + + # Create folders + for i in range(num_folders): + folder_name = f"layer_{i}" + folder_path = os.path.join(directory, folder_name) + os.makedirs(folder_path) + print(f"Created folder: {folder_path}") + + +def create_output_folders(paths_dict, output_dir): + for path in paths_dict.keys(): + new_folder = path.lower().replace(' ', '_') + output_path = output_dir + '/' + new_folder + os.mkdir(output_path) + if path[-1] != 's': + paths_dict[path] = output_path + f'/{new_folder}.shp' + else: + paths_dict[path] = output_path + + +def merge_las_layers(layers_path, mergeded_layer_path): + merging_layers = find_las_files(layers_path) + QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms()) + + params = {'LAYERS': merging_layers, + 'CRS': None, + 'OUTPUT': mergeded_layer_path} + + processing.run("native:mergevectorlayers", params) \ No newline at end of file diff --git a/scrub_layer.py b/scrub_layer.py new file mode 100644 index 0000000..087fe73 --- /dev/null +++ b/scrub_layer.py @@ -0,0 +1,268 @@ +""" +scrub_layer_class module +PyQGIS functionalities that are needed in the cleaning and updating +Montreal Buildings dataset project, gathered in one class. +Project Developer: Alireza Adli alireza.adli@concordia.ca +""" + +from qgis.core import QgsApplication, QgsField, QgsProject, \ + QgsProcessingFeedback, QgsVectorLayer, QgsVectorDataProvider, \ + QgsExpressionContext, QgsExpressionContextUtils, edit, QgsFeatureRequest, \ + QgsExpression, QgsVectorFileWriter, QgsCoordinateReferenceSystem +from qgis.PyQt.QtCore import QVariant +from qgis.analysis import QgsNativeAlgorithms +from basic_functions import * +import processing + + +class ScrubLayer: + def __init__(self, qgis_path, layer_path, layer_name): + + self.qgis_path = qgis_path + # Set the path to QGIS installation + QgsApplication.setPrefixPath(self.qgis_path, True) + + self.layer_path = layer_path + self.layer_name = layer_name + self.layer = self.load_layer() + self.data_count = self.layer.featureCount() + + def duplicate_layer(self, output_path): + options = QgsVectorFileWriter.SaveVectorOptions() + options.driverName = 'ESRI Shapefile' + + duplication = QgsVectorFileWriter.writeAsVectorFormat( + self.layer, + output_path, + options + ) + + if duplication == QgsVectorFileWriter.NoError: + print(f"Shapefile successfully duplicated") + else: + print(f"Error duplicating shapefile: {duplication}") + + def get_cell(self, fid, field_name): + return self.layer.getFeature(fid)[field_name] + + def select_cells( + self, + field_name, field_value, required_field, + return_one_value=False): + """Returns the value of a field + based on the value of another field in the same record""" + expression = QgsExpression(f'{field_name} = {field_value}') + request = QgsFeatureRequest(expression) + features = self.layer.getFeatures(request) + field_field_values = [] + for feature in features: + field_field_values.append(feature[required_field]) + if return_one_value and field_field_values: + return field_field_values[0] + return field_field_values + + def load_layer(self): + the_layer = QgsVectorLayer(self.layer_path, self.layer_name, 'ogr') + if not the_layer.isValid(): + raise ValueError(f'Failed to load layer {self.layer_name} from {self.layer_path}') + else: + QgsProject.instance().addMapLayer(the_layer) + return the_layer + + def features_to_layers(self, layers_dir, crs): + create_folders(layers_dir, self.data_count) + target_crs = QgsCoordinateReferenceSystem(crs) + for feature in self.layer.getFeatures(): + new_layer = QgsVectorLayer(f'Polygon?crs={crs}', "feature_layer", "memory") + new_layer.setCrs(target_crs) + + new_provider = new_layer.dataProvider() + new_provider.addFeatures([feature]) + + feature_id = feature.id() + output_path = f'{layers_dir}layer_{feature_id}/layer_{feature_id}.shp' + + QgsVectorFileWriter.writeAsVectorFormat( + new_layer, + output_path, + 'utf-8', + new_layer.crs(), + 'ESRI Shapefile' + ) + print('Shapefiles created for each feature.') + + def fix_geometries(self, fixed_layer): + QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms()) + fix_geometries_params = { + 'INPUT': self.layer, + 'METHOD': 0, + 'OUTPUT': fixed_layer + } + processing.run("native:fixgeometries", fix_geometries_params) + + def create_spatial_index(self): + QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms()) + create_spatial_index_params = { + 'INPUT': self.layer, + 'OUTPUT': 'Output' + } + processing.run("native:createspatialindex", create_spatial_index_params) + print(f'Creating Spatial index for {self.layer_name} is completed.') + + def spatial_join(self, joining_layer_path, joined_layer_path): + """In QGIS, it is called 'Join attributes by Location'""" + params = {'INPUT': self.layer, + 'PREDICATE': [0], + 'JOIN': joining_layer_path, + 'JOIN_FIELDS': [], + 'METHOD': 0, + 'DISCARD_NONMATCHING': False, + 'PREFIX': '', + 'OUTPUT': joined_layer_path} + + feedback = QgsProcessingFeedback() + processing.run('native:joinattributesbylocation', params, feedback=feedback) + print(f'Spatial Join with input layer {self.layer_name} is completed.') + + def clip_layer(self, overlay_layer, clipped_layer): + """This must be tested""" + QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms()) + clip_layer_params = { + 'INPUT': self.layer_path, + 'OVERLAY': overlay_layer, + 'FILTER_EXPRESSION': '', + 'FILTER_EXTENT': None, + 'OUTPUT': clipped_layer + } + processing.run("native:clip", clip_layer_params) + print(f'Clipping of {self.layer_name} is completed.') + + def clip_by_predefined_zones(self): + pass + + def clip_by_multiple(self, number_of_partitions, overlay_layers_dir, clipped_layers_dir): + create_folders(clipped_layers_dir, number_of_partitions) + for layer in range(number_of_partitions): + overlay = overlay_layers_dir + f'/layer_{layer}/layer_{layer}.shp' + clipped = clipped_layers_dir + f'/layer_{layer}/layer_{layer}.shp' + self.clip_layer(overlay, clipped) + clipped_layer = ScrubLayer(self.qgis_path, clipped, 'Temp Layer') + clipped_layer.create_spatial_index() + + def split_layer(self, number_of_layers, splitted_layers_dir): + number_of_layers -= 1 + QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms()) + create_folders(splitted_layers_dir, number_of_layers) + intervals = self.data_count // number_of_layers + for part in range(number_of_layers): + output_layer_path = \ + splitted_layers_dir + f'/layer_{part}/layer_{part}.shp' + params = {'INPUT': self.layer, + 'EXPRESSION': f'$id >= {part * intervals} ' + f'AND $id < {(part + 1) * intervals}\r\n', + 'OUTPUT': output_layer_path} + + processing.run("native:extractbyexpression", params) + + new_layer = ScrubLayer(self.qgis_path, output_layer_path, 'Temp Layer') + new_layer.create_spatial_index() + + # Adding a folder for the remaining features + + os.makedirs(splitted_layers_dir + f'/layer_{number_of_layers}') + output_layer_path = splitted_layers_dir + \ + f'/layer_{number_of_layers}/layer_{number_of_layers}.shp' + params = {'INPUT': self.layer, + 'EXPRESSION': f'$id >= {number_of_layers * intervals}\r\n', + 'OUTPUT': output_layer_path} + + processing.run("native:extractbyexpression", params) + new_layer = ScrubLayer(self.qgis_path, output_layer_path, 'Temp Layer') + new_layer.create_spatial_index() + + @staticmethod + def merge_layers(layers_path, mergeded_layer_path): + merging_layers = find_shp_files(layers_path) + QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms()) + + params = {'LAYERS': merging_layers, + 'CRS': None, + 'OUTPUT': mergeded_layer_path} + + processing.run("native:mergevectorlayers", params) + + def multipart_to_singleparts(self, singleparts_layer_path): + QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms()) + params = {'INPUT': self.layer, + 'OUTPUT': singleparts_layer_path} + processing.run("native:multiparttosingleparts", params) + + def delete_duplicates(self, deleted_duplicates_layer): + QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms()) + params = {'INPUT': self.layer_path, + 'OUTPUT': deleted_duplicates_layer} + processing.run("native:deleteduplicategeometries", params) + + def delete_field(self, field_name): + QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms()) + with edit(self.layer): + # Get the index of the column to delete + idx = self.layer.fields().indexFromName(field_name) + + # Delete the field + self.layer.deleteAttribute(idx) + + # Update layer fields + self.layer.updateFields() + + def delete_record_by_index(self, record_index): + self.layer.startEditing() + + if self.layer.deleteFeature(record_index): + print(f"Feature with ID {record_index} has been successfully removed.") + else: + print(f"Failed to remove feature with ID {record_index}.") + + self.layer.commitChanges() + + def conditional_delete_record(self, field_name, operator, condition): + request = QgsFeatureRequest().setFilterExpression( + f'{field_name} {operator} {str(condition)}') + with edit(self.layer): + for feature in self.layer.getFeatures(request): + self.layer.deleteFeature(feature.id()) + self.layer.commitChanges() + + def add_field(self, new_field_name): + functionalities = self.layer.dataProvider().capabilities() + + if functionalities & QgsVectorDataProvider.AddAttributes: + new_field = QgsField(new_field_name, QVariant.Double) + self.layer.dataProvider().addAttributes([new_field]) + self.layer.updateFields() + + def assign_area(self, field_name): + self.layer.startEditing() + idx = self.layer.fields().indexFromName(field_name) + + context = QgsExpressionContext() + context.appendScopes( + QgsExpressionContextUtils.globalProjectLayerScopes(self.layer)) + + for feature in self.layer.getFeatures(): + area = feature.geometry().area() + feature[idx] = area + self.layer.updateFeature(feature) + + self.layer.commitChanges() + + def __str__(self): + return f'The {self.layer_name} has {self.data_count} records.' + + @staticmethod + def cleanup(): + QgsApplication.exitQgis() + + + + diff --git a/scrub_mtl.py b/scrub_mtl.py new file mode 100644 index 0000000..9dae53c --- /dev/null +++ b/scrub_mtl.py @@ -0,0 +1,130 @@ +""" +scrub_mtl_class module +The workflow of cleaning and updating the Montreal Buildings dataset. +The development of this class has been stopped but the whole workflow +can be found in a module namely handle_mtl_ds_workflow, in the same project. +Project Developer: Alireza Adli alireza.adli@concordia.ca +""" + +from scrub_layer_class import * +from basic_functions import * + + +class ScrubMTL: + def __init__(self, qgis_path, nrcan, geo_index, property_assessment, + montreal_boundary, output_paths_dir): + self.qgis_path = qgis_path + self.nrcan = self.initialize_layer(nrcan, 'NRCan') + self.geo_index = self.initialize_layer(geo_index, 'GeoIndex') + self.property_assessment = \ + self.initialize_layer(property_assessment, 'Property Assessment') + self.montreal_boundary = montreal_boundary + self.output_paths_dir = output_paths_dir + self.input_paths = { + 'NRCan': nrcan, + 'GeoIndex': geo_index, + 'Property Assessment': property_assessment, + 'Montreal Boundary': montreal_boundary + } + self.output_paths = { + 'Fixed NRCan': '', + 'Fixed GeoIndex': '', + 'Clipped Fixed GeoIndex': '', + 'Splitted NRCans': '', + 'Pairwise Clipped Property Assessment Partitions': '', + 'Pairwise Clipped Merged Property Assessment': '', + 'Property Assessment and NRCan': '', + 'Property Assessment and NRCan and GeoIndex': '', + 'Deleted Duplicates Layer': '', + 'Singled Parts Layer': '' + } + + @staticmethod + def merge_layers(layers_path, mergeded_layer_path): + merging_layers = find_shp_files(layers_path) + QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms()) + + params = {'LAYERS': merging_layers, + 'CRS': None, + 'OUTPUT': mergeded_layer_path} + + processing.run("native:mergevectorlayers", params) + + def generate_output_paths(self): + for path in self.output_paths.keys(): + new_folder = path.lower().replace(' ', '_') + output_path = self.output_paths_dir + '/' + new_folder + os.mkdir(output_path) + if path[-1] != 's': + self.output_paths[path] = output_path + f'/{new_folder}.shp' + else: + self.output_paths[path] = output_path + + def initialize_layer(self, layer_path, layer_name): + return ScrubLayer(self.qgis_path, layer_path, layer_name) + + def process_nrcan(self): + print(f'Data Count of the NRCan layer: {self.nrcan.data_count}') + self.nrcan.create_spatial_index() + self.nrcan.fix_geometries(self.output_paths['Fixed NRCan']) + nrcan_fixed = \ + self.initialize_layer(self.output_paths['Fixed NRCan'], + 'Fixed NRCan') + nrcan_fixed.create_spatial_index() + print(f'Data Count of the NRCan layer ({nrcan_fixed.layer_name}) ' + f'after fixing geometries: {nrcan_fixed.layer.featureCount()}') + + def process_geo_index(self): + print(f'Data Count of the GeoIndex layer: {self.geo_index.data_count}') + self.geo_index.create_spatial_index() + self.geo_index.fix_geometries(self.output_paths['Fixed GeoIndex']) + geo_index_fixed = \ + self.initialize_layer(self.output_paths['Fixed GeoIndex'], + 'Fixed GeoIndex') + geo_index_fixed.create_spatial_index() + print(f'Data Count of the GeoIndex layer ({geo_index_fixed.layer_name}) ' + f'after fixing geometries: {geo_index_fixed.layer.featureCount()}') + geo_index_fixed.clip_layer(self.montreal_boundary, + self.output_paths['Clipped Fixed GeoIndex']) + geo_index_clipped = \ + self.initialize_layer(self.output_paths['Clipped Fixed GeoIndex'], + 'Clipped Fixed GeoIndex') + geo_index_clipped.create_spatial_index() + print(f'Data Count of the {geo_index_fixed.layer_name} ' + f'({geo_index_clipped.layer_name}) after clipping it ' + f'based on the Montreal Boundary layer: ' + f'{geo_index_clipped.layer.featureCount()}') + + def process_property_assesment(self): + print(f'Data Count of the Property Assessment layer: ' + f'{self.property_assessment.data_count}') + self.property_assessment.create_spatial_index() + + def the_cleaning_workflow(self): + """It carries out all the steps to clean the dataset.""" + self.generate_output_paths() + self.process_nrcan() + self.process_geo_index() + self.process_property_assesment() + self.property_assessment.clip_by_multiple(120, + self.output_paths['Fixed NRCan'], + self.output_paths['Splitted NRCans'], + self.output_paths + ['Pairwise Clipped Property Assessment Partitions'] + ) + prop_a = ScrubLayer(self.qgis_path, self.input_paths['Property Assessment'], + 'Property Aim') + + prop_a.\ + clip_by_multiple(120, 'the path', + self.output_paths['Splitted NRCans']) + + def refine_heights(self): + pass + + def remove_redundant_fields(self): + pass + + def remove_records_by_area(self, area_limitation): + """Area limitation can be assigned in the constructor""" + pass