Rename modules based on the pull request instructions

This commit is contained in:
Alireza Adli 2024-11-04 10:26:16 -05:00
parent 2149217b80
commit ffb4d87e57
5 changed files with 659 additions and 0 deletions

View File

@ -0,0 +1,144 @@
"""
handle_mtl_ds_workflow module
The workflow of cleaning and updating the Montreal Buildings dataset.
Project Developer: Alireza Adli alireza.adli@concordia.ca
"""
from scrub_layer_class import *
from input_paths_and_layers import *
# Making folders for the output data layers
create_output_folders(output_paths, output_paths_dir)
# Initialize the input data layers
nrcan = ScrubLayer(qgis_path, input_paths['NRCan'], 'NRCan')
geo_index = ScrubLayer(qgis_path, input_paths['GeoIndex'], 'GeoIndex')
property_assessment = \
ScrubLayer(
qgis_path, input_paths['Property Assessment'], 'Property Assessment')
montreal_boundary = \
ScrubLayer(qgis_path, input_paths['Montreal Boundary'], 'Montreal Boundary')
# Processing the NRCan layer includes fixing its geometries
print('Processing the NRCan layer')
print(nrcan)
nrcan.create_spatial_index()
nrcan.fix_geometries(output_paths['Fixed NRCan'])
# Defining a new layer for the fixed NRCan
nrcan_fixed = \
ScrubLayer(qgis_path, output_paths['Fixed NRCan'], 'Fixed NRCan')
nrcan_fixed.create_spatial_index()
print(nrcan_fixed)
# Processing the GeoIndex layer includes fixing its geometries and
# clipping it based on the Montreal boundary data layer
print('Processing the GeoIndex layer')
print(geo_index)
geo_index.create_spatial_index()
geo_index.fix_geometries(output_paths['Fixed GeoIndex'])
# Defining a new layer for the fixed GeoIndex
geo_index_fixed = ScrubLayer(qgis_path, output_paths['Fixed GeoIndex'],
'Fixed GeoIndex')
geo_index_fixed.create_spatial_index()
print(geo_index_fixed)
geo_index_fixed.clip_layer(montreal_boundary.layer_path,
output_paths['Clipped Fixed GeoIndex'])
geo_index_clipped = \
ScrubLayer(qgis_path,
output_paths['Clipped Fixed GeoIndex'], 'Clipped Fixed GeoIndex')
geo_index_clipped.create_spatial_index()
print(geo_index_clipped)
# Processing the Property Assessment layer includes a pairwise clip, and
# two spatial join with NRCan and GeoIndex layers, respectively
print(property_assessment)
property_assessment.create_spatial_index()
# For the pairwise clip, number of overlaying layers can be chosen
# (meaning number of splits for NRCan layer). This improves the performance
# where may increase duplicates. This has been done because using the NRCan
# layer as a whole causes crashing the clipping process.
# First we split the overlaying layers into our desired number
nrcan_fixed.split_layer(120, output_paths['Splitted NRCans'])
# Clipping have to be done in
clipping_property_assessment = """
from input_paths_and_layers import *
property_assessment.clip_by_multiple(
120, output_paths['Splitted NRCans'],
output_paths['Pairwise Clipped Property Assessment Partitions'])"""
exec(clipping_property_assessment)
property_assessment.merge_layers(
output_paths['Pairwise Clipped Property Assessment Partitions'],
output_paths['Pairwise Clipped Merged Property Assessment'])
clipped_property_assessment = ScrubLayer(
qgis_path,
output_paths['Pairwise Clipped Merged Property Assessment'],
'Clipped Property Assessment')
print(clipped_property_assessment)
clipped_property_assessment.create_spatial_index()
clipped_property_assessment.spatial_join(
nrcan_fixed.layer_path,
output_paths['Property Assessment and NRCan'])
property_assessment_nrcan = ScrubLayer(
qgis_path,
output_paths['Property Assessment and NRCan'],
'Property Assessment and NRCan')
print(property_assessment_nrcan)
property_assessment_nrcan.create_spatial_index()
property_assessment_nrcan.spatial_join(
geo_index_clipped,
output_paths['Property Assessment and NRCan and GeoIndex'])
property_assessment_nrcan_geo = ScrubLayer(
qgis_path,
output_paths['Property Assessment and NRCan and GeoIndex'],
'Property Assessment and NRCan and GeoIndex')
print(property_assessment_nrcan_geo)
property_assessment_nrcan_geo.create_spatial_index()
property_assessment_nrcan_geo.delete_duplicates(
output_paths['Deleted Duplicates Layer'])
deleted_dups_layer = ScrubLayer(
qgis_path,
output_paths['Deleted Duplicates Layer'],
'Deleted Duplicates Layer')
print(deleted_dups_layer)
property_assessment_nrcan_geo.create_spatial_index()
property_assessment_nrcan_geo.multipart_to_singleparts(
output_paths['Single Parts Layer'])
single_parts_layer = ScrubLayer(
qgis_path,
output_paths['Single Parts Layer'],
'Single Parts Layer')
print(single_parts_layer)
single_parts_layer.create_spatial_index()
# Add an area field
single_parts_layer.add_field('Area')
single_parts_layer.assign_area('Area')
dismissive_area = 15
single_parts_layer.conditional_delete_record('Area', '<', dismissive_area)
print(f'After removing buildings with less than {dismissive_area} squaremeter area:')
print(single_parts_layer)

42
config.py Normal file
View File

@ -0,0 +1,42 @@
"""
input_paths_and_layers module
Project Developer: Alireza Adli alireza.adli@concordia.ca
"""
# Application's path
qgis_path = 'C:/Program Files/QGIS 3.34.1/apps/qgis'
# Gathering input data layers paths
input_paths = {
'NRCan':
'C:/Users/a_adli/PycharmProjects/hydroquebec_archetype_gispy/'
'data/input_data/nrcan/Autobuilding_QC_VILLE_MONTREAL.shp',
'GeoIndex':
'C:/Users/a_adli/PycharmProjects/hydroquebec_archetype_gispy/'
'data/input_data/Geoindex_81670/mamh_usage_predo_2022_s_poly.shp',
'Property Assessment':
'C:/Users/a_adli/PycharmProjects/hydroquebec_archetype_gispy/'
'data/input_data/property_assessment/uniteevaluationfonciere.shp',
'Montreal Boundary':
'C:/Users/a_adli/PycharmProjects/hydroquebec_archetype_gispy/'
'data/input_data/montreal_boundary/Montreal_boundary.shp'
}
# Defining a directory for all the output data layers
output_paths_dir = \
'C:/Users/a_adli/PycharmProjects/hydroquebec_archetype_gispy/' \
'data/gisoo_workflow_output'
# Preparing a bedding for output data layers paths
output_paths = {
'Fixed NRCan': '',
'Fixed GeoIndex': '',
'Clipped Fixed GeoIndex': '',
'Splitted NRCans': '',
'Pairwise Clipped Property Assessment Partitions': '',
'Pairwise Clipped Merged Property Assessment': '',
'Property Assessment and NRCan': '',
'Property Assessment and NRCan and GeoIndex': '',
'Deleted Duplicates Layer': '',
'Single Parts Layer': ''
}

75
helpers.py Normal file
View File

@ -0,0 +1,75 @@
"""
basic_functions module
A number of functionalities that help the project
but cannot be a part of the PyQGIS tool.
Project Developer: Alireza Adli alireza.adli@concordia.ca
"""
import os
import glob
import processing
from qgis.core import QgsApplication
from qgis.analysis import QgsNativeAlgorithms
def find_shp_files(root_folder):
shp_files = []
# Sort folders alphabetically
for foldername, _, _ in sorted(os.walk(root_folder)):
for filename in sorted(glob.glob(os.path.join(foldername, '*.shp'))):
new_file_name = filename.replace('\\', r'/')
shp_files.append(new_file_name)
return shp_files
def find_las_files(root_folder):
las_files = []
# Sort folders alphabetically
for foldername, _, _ in sorted(os.walk(root_folder)):
for filename in sorted(glob.glob(os.path.join(foldername, '*.las'))):
new_file_name = filename.replace('\\', r'/')
las_files.append(new_file_name)
return las_files
def create_folders(directory, num_folders):
"""
Create a specified number of folders in the given directory.
Args:
- directory (str): The directory where folders will be created.
- num_folders (int): The number of folders to create.
"""
# Check if the directory exists, if not, create it
if not os.path.exists(directory):
os.makedirs(directory)
# Create folders
for i in range(num_folders):
folder_name = f"layer_{i}"
folder_path = os.path.join(directory, folder_name)
os.makedirs(folder_path)
print(f"Created folder: {folder_path}")
def create_output_folders(paths_dict, output_dir):
for path in paths_dict.keys():
new_folder = path.lower().replace(' ', '_')
output_path = output_dir + '/' + new_folder
os.mkdir(output_path)
if path[-1] != 's':
paths_dict[path] = output_path + f'/{new_folder}.shp'
else:
paths_dict[path] = output_path
def merge_las_layers(layers_path, mergeded_layer_path):
merging_layers = find_las_files(layers_path)
QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms())
params = {'LAYERS': merging_layers,
'CRS': None,
'OUTPUT': mergeded_layer_path}
processing.run("native:mergevectorlayers", params)

268
scrub_layer.py Normal file
View File

@ -0,0 +1,268 @@
"""
scrub_layer_class module
PyQGIS functionalities that are needed in the cleaning and updating
Montreal Buildings dataset project, gathered in one class.
Project Developer: Alireza Adli alireza.adli@concordia.ca
"""
from qgis.core import QgsApplication, QgsField, QgsProject, \
QgsProcessingFeedback, QgsVectorLayer, QgsVectorDataProvider, \
QgsExpressionContext, QgsExpressionContextUtils, edit, QgsFeatureRequest, \
QgsExpression, QgsVectorFileWriter, QgsCoordinateReferenceSystem
from qgis.PyQt.QtCore import QVariant
from qgis.analysis import QgsNativeAlgorithms
from basic_functions import *
import processing
class ScrubLayer:
def __init__(self, qgis_path, layer_path, layer_name):
self.qgis_path = qgis_path
# Set the path to QGIS installation
QgsApplication.setPrefixPath(self.qgis_path, True)
self.layer_path = layer_path
self.layer_name = layer_name
self.layer = self.load_layer()
self.data_count = self.layer.featureCount()
def duplicate_layer(self, output_path):
options = QgsVectorFileWriter.SaveVectorOptions()
options.driverName = 'ESRI Shapefile'
duplication = QgsVectorFileWriter.writeAsVectorFormat(
self.layer,
output_path,
options
)
if duplication == QgsVectorFileWriter.NoError:
print(f"Shapefile successfully duplicated")
else:
print(f"Error duplicating shapefile: {duplication}")
def get_cell(self, fid, field_name):
return self.layer.getFeature(fid)[field_name]
def select_cells(
self,
field_name, field_value, required_field,
return_one_value=False):
"""Returns the value of a field
based on the value of another field in the same record"""
expression = QgsExpression(f'{field_name} = {field_value}')
request = QgsFeatureRequest(expression)
features = self.layer.getFeatures(request)
field_field_values = []
for feature in features:
field_field_values.append(feature[required_field])
if return_one_value and field_field_values:
return field_field_values[0]
return field_field_values
def load_layer(self):
the_layer = QgsVectorLayer(self.layer_path, self.layer_name, 'ogr')
if not the_layer.isValid():
raise ValueError(f'Failed to load layer {self.layer_name} from {self.layer_path}')
else:
QgsProject.instance().addMapLayer(the_layer)
return the_layer
def features_to_layers(self, layers_dir, crs):
create_folders(layers_dir, self.data_count)
target_crs = QgsCoordinateReferenceSystem(crs)
for feature in self.layer.getFeatures():
new_layer = QgsVectorLayer(f'Polygon?crs={crs}', "feature_layer", "memory")
new_layer.setCrs(target_crs)
new_provider = new_layer.dataProvider()
new_provider.addFeatures([feature])
feature_id = feature.id()
output_path = f'{layers_dir}layer_{feature_id}/layer_{feature_id}.shp'
QgsVectorFileWriter.writeAsVectorFormat(
new_layer,
output_path,
'utf-8',
new_layer.crs(),
'ESRI Shapefile'
)
print('Shapefiles created for each feature.')
def fix_geometries(self, fixed_layer):
QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms())
fix_geometries_params = {
'INPUT': self.layer,
'METHOD': 0,
'OUTPUT': fixed_layer
}
processing.run("native:fixgeometries", fix_geometries_params)
def create_spatial_index(self):
QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms())
create_spatial_index_params = {
'INPUT': self.layer,
'OUTPUT': 'Output'
}
processing.run("native:createspatialindex", create_spatial_index_params)
print(f'Creating Spatial index for {self.layer_name} is completed.')
def spatial_join(self, joining_layer_path, joined_layer_path):
"""In QGIS, it is called 'Join attributes by Location'"""
params = {'INPUT': self.layer,
'PREDICATE': [0],
'JOIN': joining_layer_path,
'JOIN_FIELDS': [],
'METHOD': 0,
'DISCARD_NONMATCHING': False,
'PREFIX': '',
'OUTPUT': joined_layer_path}
feedback = QgsProcessingFeedback()
processing.run('native:joinattributesbylocation', params, feedback=feedback)
print(f'Spatial Join with input layer {self.layer_name} is completed.')
def clip_layer(self, overlay_layer, clipped_layer):
"""This must be tested"""
QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms())
clip_layer_params = {
'INPUT': self.layer_path,
'OVERLAY': overlay_layer,
'FILTER_EXPRESSION': '',
'FILTER_EXTENT': None,
'OUTPUT': clipped_layer
}
processing.run("native:clip", clip_layer_params)
print(f'Clipping of {self.layer_name} is completed.')
def clip_by_predefined_zones(self):
pass
def clip_by_multiple(self, number_of_partitions, overlay_layers_dir, clipped_layers_dir):
create_folders(clipped_layers_dir, number_of_partitions)
for layer in range(number_of_partitions):
overlay = overlay_layers_dir + f'/layer_{layer}/layer_{layer}.shp'
clipped = clipped_layers_dir + f'/layer_{layer}/layer_{layer}.shp'
self.clip_layer(overlay, clipped)
clipped_layer = ScrubLayer(self.qgis_path, clipped, 'Temp Layer')
clipped_layer.create_spatial_index()
def split_layer(self, number_of_layers, splitted_layers_dir):
number_of_layers -= 1
QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms())
create_folders(splitted_layers_dir, number_of_layers)
intervals = self.data_count // number_of_layers
for part in range(number_of_layers):
output_layer_path = \
splitted_layers_dir + f'/layer_{part}/layer_{part}.shp'
params = {'INPUT': self.layer,
'EXPRESSION': f'$id >= {part * intervals} '
f'AND $id < {(part + 1) * intervals}\r\n',
'OUTPUT': output_layer_path}
processing.run("native:extractbyexpression", params)
new_layer = ScrubLayer(self.qgis_path, output_layer_path, 'Temp Layer')
new_layer.create_spatial_index()
# Adding a folder for the remaining features
os.makedirs(splitted_layers_dir + f'/layer_{number_of_layers}')
output_layer_path = splitted_layers_dir + \
f'/layer_{number_of_layers}/layer_{number_of_layers}.shp'
params = {'INPUT': self.layer,
'EXPRESSION': f'$id >= {number_of_layers * intervals}\r\n',
'OUTPUT': output_layer_path}
processing.run("native:extractbyexpression", params)
new_layer = ScrubLayer(self.qgis_path, output_layer_path, 'Temp Layer')
new_layer.create_spatial_index()
@staticmethod
def merge_layers(layers_path, mergeded_layer_path):
merging_layers = find_shp_files(layers_path)
QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms())
params = {'LAYERS': merging_layers,
'CRS': None,
'OUTPUT': mergeded_layer_path}
processing.run("native:mergevectorlayers", params)
def multipart_to_singleparts(self, singleparts_layer_path):
QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms())
params = {'INPUT': self.layer,
'OUTPUT': singleparts_layer_path}
processing.run("native:multiparttosingleparts", params)
def delete_duplicates(self, deleted_duplicates_layer):
QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms())
params = {'INPUT': self.layer_path,
'OUTPUT': deleted_duplicates_layer}
processing.run("native:deleteduplicategeometries", params)
def delete_field(self, field_name):
QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms())
with edit(self.layer):
# Get the index of the column to delete
idx = self.layer.fields().indexFromName(field_name)
# Delete the field
self.layer.deleteAttribute(idx)
# Update layer fields
self.layer.updateFields()
def delete_record_by_index(self, record_index):
self.layer.startEditing()
if self.layer.deleteFeature(record_index):
print(f"Feature with ID {record_index} has been successfully removed.")
else:
print(f"Failed to remove feature with ID {record_index}.")
self.layer.commitChanges()
def conditional_delete_record(self, field_name, operator, condition):
request = QgsFeatureRequest().setFilterExpression(
f'{field_name} {operator} {str(condition)}')
with edit(self.layer):
for feature in self.layer.getFeatures(request):
self.layer.deleteFeature(feature.id())
self.layer.commitChanges()
def add_field(self, new_field_name):
functionalities = self.layer.dataProvider().capabilities()
if functionalities & QgsVectorDataProvider.AddAttributes:
new_field = QgsField(new_field_name, QVariant.Double)
self.layer.dataProvider().addAttributes([new_field])
self.layer.updateFields()
def assign_area(self, field_name):
self.layer.startEditing()
idx = self.layer.fields().indexFromName(field_name)
context = QgsExpressionContext()
context.appendScopes(
QgsExpressionContextUtils.globalProjectLayerScopes(self.layer))
for feature in self.layer.getFeatures():
area = feature.geometry().area()
feature[idx] = area
self.layer.updateFeature(feature)
self.layer.commitChanges()
def __str__(self):
return f'The {self.layer_name} has {self.data_count} records.'
@staticmethod
def cleanup():
QgsApplication.exitQgis()

130
scrub_mtl.py Normal file
View File

@ -0,0 +1,130 @@
"""
scrub_mtl_class module
The workflow of cleaning and updating the Montreal Buildings dataset.
The development of this class has been stopped but the whole workflow
can be found in a module namely handle_mtl_ds_workflow, in the same project.
Project Developer: Alireza Adli alireza.adli@concordia.ca
"""
from scrub_layer_class import *
from basic_functions import *
class ScrubMTL:
def __init__(self, qgis_path, nrcan, geo_index, property_assessment,
montreal_boundary, output_paths_dir):
self.qgis_path = qgis_path
self.nrcan = self.initialize_layer(nrcan, 'NRCan')
self.geo_index = self.initialize_layer(geo_index, 'GeoIndex')
self.property_assessment = \
self.initialize_layer(property_assessment, 'Property Assessment')
self.montreal_boundary = montreal_boundary
self.output_paths_dir = output_paths_dir
self.input_paths = {
'NRCan': nrcan,
'GeoIndex': geo_index,
'Property Assessment': property_assessment,
'Montreal Boundary': montreal_boundary
}
self.output_paths = {
'Fixed NRCan': '',
'Fixed GeoIndex': '',
'Clipped Fixed GeoIndex': '',
'Splitted NRCans': '',
'Pairwise Clipped Property Assessment Partitions': '',
'Pairwise Clipped Merged Property Assessment': '',
'Property Assessment and NRCan': '',
'Property Assessment and NRCan and GeoIndex': '',
'Deleted Duplicates Layer': '',
'Singled Parts Layer': ''
}
@staticmethod
def merge_layers(layers_path, mergeded_layer_path):
merging_layers = find_shp_files(layers_path)
QgsApplication.processingRegistry().addProvider(QgsNativeAlgorithms())
params = {'LAYERS': merging_layers,
'CRS': None,
'OUTPUT': mergeded_layer_path}
processing.run("native:mergevectorlayers", params)
def generate_output_paths(self):
for path in self.output_paths.keys():
new_folder = path.lower().replace(' ', '_')
output_path = self.output_paths_dir + '/' + new_folder
os.mkdir(output_path)
if path[-1] != 's':
self.output_paths[path] = output_path + f'/{new_folder}.shp'
else:
self.output_paths[path] = output_path
def initialize_layer(self, layer_path, layer_name):
return ScrubLayer(self.qgis_path, layer_path, layer_name)
def process_nrcan(self):
print(f'Data Count of the NRCan layer: {self.nrcan.data_count}')
self.nrcan.create_spatial_index()
self.nrcan.fix_geometries(self.output_paths['Fixed NRCan'])
nrcan_fixed = \
self.initialize_layer(self.output_paths['Fixed NRCan'],
'Fixed NRCan')
nrcan_fixed.create_spatial_index()
print(f'Data Count of the NRCan layer ({nrcan_fixed.layer_name}) '
f'after fixing geometries: {nrcan_fixed.layer.featureCount()}')
def process_geo_index(self):
print(f'Data Count of the GeoIndex layer: {self.geo_index.data_count}')
self.geo_index.create_spatial_index()
self.geo_index.fix_geometries(self.output_paths['Fixed GeoIndex'])
geo_index_fixed = \
self.initialize_layer(self.output_paths['Fixed GeoIndex'],
'Fixed GeoIndex')
geo_index_fixed.create_spatial_index()
print(f'Data Count of the GeoIndex layer ({geo_index_fixed.layer_name}) '
f'after fixing geometries: {geo_index_fixed.layer.featureCount()}')
geo_index_fixed.clip_layer(self.montreal_boundary,
self.output_paths['Clipped Fixed GeoIndex'])
geo_index_clipped = \
self.initialize_layer(self.output_paths['Clipped Fixed GeoIndex'],
'Clipped Fixed GeoIndex')
geo_index_clipped.create_spatial_index()
print(f'Data Count of the {geo_index_fixed.layer_name} '
f'({geo_index_clipped.layer_name}) after clipping it '
f'based on the Montreal Boundary layer: '
f'{geo_index_clipped.layer.featureCount()}')
def process_property_assesment(self):
print(f'Data Count of the Property Assessment layer: '
f'{self.property_assessment.data_count}')
self.property_assessment.create_spatial_index()
def the_cleaning_workflow(self):
"""It carries out all the steps to clean the dataset."""
self.generate_output_paths()
self.process_nrcan()
self.process_geo_index()
self.process_property_assesment()
self.property_assessment.clip_by_multiple(120,
self.output_paths['Fixed NRCan'],
self.output_paths['Splitted NRCans'],
self.output_paths
['Pairwise Clipped Property Assessment Partitions']
)
prop_a = ScrubLayer(self.qgis_path, self.input_paths['Property Assessment'],
'Property Aim')
prop_a.\
clip_by_multiple(120, 'the path',
self.output_paths['Splitted NRCans'])
def refine_heights(self):
pass
def remove_redundant_fields(self):
pass
def remove_records_by_area(self, area_limitation):
"""Area limitation can be assigned in the constructor"""
pass