From 52f6337162eed1bcd20582853136bba9f603126a Mon Sep 17 00:00:00 2001 From: Alireza Adli Date: Tue, 27 Aug 2024 11:57:47 -0400 Subject: [PATCH] Add bounding box features to the previous version --- .../remove_nrcan_duplicates_bbox.py | 157 ++++++++++++++++++ 1 file changed, 157 insertions(+) create mode 100644 varennes_single_processes/remove_nrcan_duplicates_bbox.py diff --git a/varennes_single_processes/remove_nrcan_duplicates_bbox.py b/varennes_single_processes/remove_nrcan_duplicates_bbox.py new file mode 100644 index 0000000..87ffbd5 --- /dev/null +++ b/varennes_single_processes/remove_nrcan_duplicates_bbox.py @@ -0,0 +1,157 @@ +""" +handle_varennes_ds_workflow module +NRCan datalayer has two polygons for each buildings' footprint. +The below workflow has been designed to remove the extra polygons. +Project Developer: Alireza Adli alireza.adli@concordia.ca +""" +# You need to clone mtl_gis_oo project and +# add it as a dependency of this new project +from scrub_layer_class import * +import pandas as pd + +# Change the paths by the location of your QGIS installation and datalayers +qgis_path = 'C:/Program Files/QGIS 3.34.1/apps/qgis' + +varennes_nrcan_extra_polygons = \ + 'C:/Users/a_adli/PycharmProjects/varennes_gis_oo/' \ + 'data/initial_data/endeavor/nrcan_without_centroids/auto_building_2.shp' + +# First we duplicate the layer to preserve the main data layer. +duplicated = \ + 'C:/Users/a_adli/PycharmProjects/varennes_gis_oo/' \ + 'data/initial_data/endeavor/nrcan_tolerance_7_removed_dups_pro/' \ + 'nrcan_tolerance_7_removed_dups.shp' + +varennes_nrcan = ScrubLayer( + qgis_path, varennes_nrcan_extra_polygons, 'NRCan Varennes') +varennes_nrcan.duplicate_layer(duplicated) + +varennes_nrcan_centroids = ScrubLayer( + qgis_path, duplicated, 'NRCan Varennes with Coordinates') + +# Then we add coordinates to each polygon so we can remove the +# very similar polygons based on coordinates. +varennes_nrcan_centroids.layer.startEditing() + +# Add new fields for the centroid coordinates +varennes_nrcan_centroids.layer.dataProvider().\ + addAttributes([QgsField("centroid_x", QVariant.Double), + QgsField("centroid_y", QVariant.Double), + QgsField("min_x", QVariant.Double), + QgsField("min_y", QVariant.Double), + QgsField("max_x", QVariant.Double), + QgsField("max_y", QVariant.Double)] + ) +varennes_nrcan_centroids.layer.updateFields() + + +centroid_x_index = varennes_nrcan_centroids.\ + layer.fields().indexFromName("centroid_x") +centroid_y_index = varennes_nrcan_centroids.\ + layer.fields().indexFromName("centroid_y") +min_x_index = varennes_nrcan_centroids.\ + layer.fields().indexFromName("min_x") +min_y_index = varennes_nrcan_centroids.\ + layer.fields().indexFromName("min_y") +max_x_index = varennes_nrcan_centroids.\ + layer.fields().indexFromName("max_x") +max_y_index = varennes_nrcan_centroids.\ + layer.fields().indexFromName("max_y") + +for feature in varennes_nrcan_centroids.layer.getFeatures(): + centroid = feature.geometry().centroid().asPoint() + feature.setAttribute(centroid_x_index, centroid.x()) + feature.setAttribute(centroid_y_index, centroid.y()) + + # Calculate bounding box coordinates + bbox = feature.geometry().boundingBox() + feature.setAttribute(min_x_index, bbox.xMinimum()) + feature.setAttribute(min_y_index, bbox.yMinimum()) + feature.setAttribute(max_x_index, bbox.xMaximum()) + feature.setAttribute(max_y_index, bbox.yMaximum()) + varennes_nrcan_centroids.layer.updateFeature(feature) + +# Commit the changes for adding centroids fields +varennes_nrcan_centroids.layer.commitChanges() + + +# Pandas is a better option to compare polygons and remove the duplicates +# so we make a dataframe. We just transfer the necessary fields +# to the dataframe. + +field_names = \ + ['feature_id', 'centroid_x', 'centroid_y', + 'min_x', 'min_y', 'max_x', 'max_y'] + +# Get the indices of the specified fields +field_indices = [varennes_nrcan_centroids.layer.fields().indexOf(field) + for field in field_names] + +# Extract the attribute values and store them in a list of dictionaries +data = [] +for feature in varennes_nrcan_centroids.layer.getFeatures(): + attributes = [feature.attributes()[index] for index in field_indices] + data.append(dict(zip(field_names, attributes))) + +# Create a DataFrame from the list of dictionaries +varennes_nrcan_centroids_df = pd.DataFrame(data) +varennes_nrcan_centroids_df['ID'] = range(len(varennes_nrcan_centroids_df)) + +# Removing polygones based on a diifference (tolerance variable) +# between centroid_x of polygons and centroid_y of the polygons +# The tolerance can be changed to one or five +tolerance = 7 +counter = 0 +centroid_x = varennes_nrcan_centroids_df['centroid_x'].tolist() +centroid_y = varennes_nrcan_centroids_df['centroid_y'].tolist() +min_x = varennes_nrcan_centroids_df['min_x'].tolist() +min_y = varennes_nrcan_centroids_df['min_y'].tolist() +max_x = varennes_nrcan_centroids_df['max_x'].tolist() +max_y = varennes_nrcan_centroids_df['max_y'].tolist() +feature_ids_all = varennes_nrcan_centroids_df['feature_id'].tolist() +duplicated_feature_ids = [] + +for feature_index in range(len(centroid_x)): + for next_feature_index in range(feature_index + 1, len(centroid_x)): + a_x = centroid_x[feature_index] + b_x = centroid_x[next_feature_index] + subtract_centroid_x = a_x - b_x + a_y = centroid_y[feature_index] + b_y = centroid_y[next_feature_index] + subtract_centroid_y = a_y - b_y + + a_min_x = min_x[feature_index] + b_min_x = min_x[next_feature_index] + subtract_min_x = a_min_x - b_min_x + a_min_y = min_y[feature_index] + b_min_y = min_y[next_feature_index] + subtract_min_y = a_min_y - b_min_y + + a_max_x = max_x[feature_index] + b_max_x = max_x[next_feature_index] + subtract_max_x = a_max_x - b_max_x + a_max_y = max_y[feature_index] + b_max_y = max_y[next_feature_index] + subtract_max_y = a_max_y - b_max_y + + if abs(subtract_centroid_x) < tolerance and \ + abs(subtract_centroid_y) < tolerance and \ + abs(subtract_min_x) < tolerance and \ + abs(subtract_min_y) < tolerance and \ + abs(subtract_max_x) < tolerance and \ + abs(subtract_max_y) < tolerance: + duplicated_feature_ids.append(feature_ids_all[next_feature_index]) + +# Removing records based on the duplicated_feature_ids list +varennes_nrcan_centroids.layer.startEditing() + + +features = varennes_nrcan_centroids.layer.getFeatures() +# Iterate through features in the layer +for feature in features: + if feature['feature_id'] in duplicated_feature_ids: + # Delete the feature + varennes_nrcan_centroids.layer.deleteFeature(feature.id()) + +# Save changes and stop editing +varennes_nrcan_centroids.layer.commitChanges()