QGIS API Documentation 3.37.0-Master (fdefdf9c27f)
qgsalgorithmdeleteduplicategeometries.cpp
Go to the documentation of this file.
1/***************************************************************************
2 qgsalgorithmdeleteduplicategeometries.cpp
3 -----------------------------------------
4 begin : December 2019
5 copyright : (C) 2019 by Nyall Dawson
6 email : nyall dot dawson at gmail dot com
7 ***************************************************************************/
8
9/***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
19#include "qgsvectorlayer.h"
20#include "qgsgeometryengine.h"
21#include "qgsspatialindex.h"
22
24
25QString QgsDeleteDuplicateGeometriesAlgorithm::name() const
26{
27 return QStringLiteral( "deleteduplicategeometries" );
28}
29
30QString QgsDeleteDuplicateGeometriesAlgorithm::displayName() const
31{
32 return QObject::tr( "Delete duplicate geometries" );
33}
34
35QStringList QgsDeleteDuplicateGeometriesAlgorithm::tags() const
36{
37 return QObject::tr( "drop,remove,same,points,coincident,overlapping,filter" ).split( ',' );
38}
39
40QString QgsDeleteDuplicateGeometriesAlgorithm::group() const
41{
42 return QObject::tr( "Vector general" );
43}
44
45QString QgsDeleteDuplicateGeometriesAlgorithm::groupId() const
46{
47 return QStringLiteral( "vectorgeneral" );
48}
49
50void QgsDeleteDuplicateGeometriesAlgorithm::initAlgorithm( const QVariantMap & )
51{
52 addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ), QObject::tr( "Input layer" ) ) );
53 addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Cleaned" ) ) );
54 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "RETAINED_COUNT" ), QObject::tr( "Count of retained records" ) ) );
55 addOutput( new QgsProcessingOutputNumber( QStringLiteral( "DUPLICATE_COUNT" ), QObject::tr( "Count of discarded duplicate records" ) ) );
56}
57
58QString QgsDeleteDuplicateGeometriesAlgorithm::shortHelpString() const
59{
60 return QObject::tr( "This algorithm finds duplicated geometries and removes them.\n\nAttributes are not checked, "
61 "so in case two features have identical geometries but different attributes, only one of "
62 "them will be added to the result layer." );
63}
64
65QString QgsDeleteDuplicateGeometriesAlgorithm::shortDescription() const
66{
67 return QObject::tr( "Finds duplicated geometries in a layer and removes them." );
68}
69
70QgsDeleteDuplicateGeometriesAlgorithm *QgsDeleteDuplicateGeometriesAlgorithm::createInstance() const
71{
72 return new QgsDeleteDuplicateGeometriesAlgorithm();
73}
74
75bool QgsDeleteDuplicateGeometriesAlgorithm::prepareAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback * )
76{
77 mSource.reset( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
78 if ( !mSource )
79 throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
80
81 return true;
82}
83
84QVariantMap QgsDeleteDuplicateGeometriesAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
85{
86 QString destId;
87 std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, destId, mSource->fields(),
88 mSource->wkbType(), mSource->sourceCrs() ) );
89 if ( !sink )
90 throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
91
92 QgsFeatureIterator it = mSource->getFeatures( QgsFeatureRequest().setSubsetOfAttributes( QgsAttributeList() ) );
93
94 double step = mSource->featureCount() > 0 ? 100.0 / mSource->featureCount() : 0;
95 QHash< QgsFeatureId, QgsGeometry > geometries;
96 QSet< QgsFeatureId > nullGeometryFeatures;
97 long current = 0;
98 const QgsSpatialIndex index( it, [&]( const QgsFeature & f ) ->bool
99 {
100 if ( feedback->isCanceled() )
101 return false;
102
103 if ( !f.hasGeometry() )
104 {
105 nullGeometryFeatures.insert( f.id() );
106 }
107 else
108 {
109 geometries.insert( f.id(), f.geometry() );
110 }
111
112 // overall this loop takes about 10% of time
113 current++;
114 feedback->setProgress( 0.10 * current * step );
115 return true;
116 } );
117
118 QgsFeature f;
119
120 // start by assuming everything is unique, and chop away at this list
121 QHash< QgsFeatureId, QgsGeometry > uniqueFeatures = geometries;
122 current = 0;
123 long removed = 0;
124
125 for ( auto it = geometries.constBegin(); it != geometries.constEnd(); ++it )
126 {
127 const QgsFeatureId featureId = it.key();
128 const QgsGeometry geometry = it.value();
129
130 if ( feedback->isCanceled() )
131 break;
132
133 if ( !uniqueFeatures.contains( featureId ) )
134 {
135 // feature was already marked as a duplicate
136 }
137 else
138 {
139 const QList<QgsFeatureId> candidates = index.intersects( geometry.boundingBox() );
140
141 for ( const QgsFeatureId candidateId : candidates )
142 {
143 if ( candidateId == featureId )
144 continue;
145
146 if ( !uniqueFeatures.contains( candidateId ) )
147 {
148 // candidate already marked as a duplicate (not sure if this is possible,
149 // since it would mean the current feature would also have to be a duplicate!
150 // but let's be safe!)
151 continue;
152 }
153 else if ( geometry.isGeosEqual( geometries.value( candidateId ) ) )
154 {
155 // candidate is a duplicate of feature
156 uniqueFeatures.remove( candidateId );
157 removed++;
158 }
159 }
160 }
161
162 current++;
163 feedback->setProgress( 0.80 * current * step + 10 ); // takes about 80% of time
164 }
165
166 // now, fetch all the feature attributes for the unique features only
167 // be super-smart and don't re-fetch geometries
168 QSet< QgsFeatureId > outputFeatureIds = qgis::listToSet( uniqueFeatures.keys() );
169 outputFeatureIds.unite( nullGeometryFeatures );
170 step = outputFeatureIds.empty() ? 1 : 100.0 / outputFeatureIds.size();
171
173 it = mSource->getFeatures( request );
174 current = 0;
175 while ( it.nextFeature( f ) )
176 {
177 if ( feedback->isCanceled() )
178 break;
179
180 // use already fetched geometry
181 if ( !nullGeometryFeatures.contains( f.id() ) )
182 {
183 f.setGeometry( uniqueFeatures.value( f.id() ) );
184 }
185 if ( !sink->addFeature( f, QgsFeatureSink::FastInsert ) )
186 throw QgsProcessingException( writeFeatureError( sink.get(), parameters, QStringLiteral( "OUTPUT" ) ) );
187
188 current++;
189 feedback->setProgress( 0.10 * current * step + 90 ); // takes about 10% of time
190 }
191
192 feedback->pushInfo( QObject::tr( "%n duplicate feature(s) removed", nullptr, removed ) );
193
194 QVariantMap outputs;
195 outputs.insert( QStringLiteral( "OUTPUT" ), destId );
196 outputs.insert( QStringLiteral( "DUPLICATE_COUNT" ), static_cast< long long >( removed ) );
197 outputs.insert( QStringLiteral( "RETAINED_COUNT" ), outputFeatureIds.size() );
198 return outputs;
199}
200
@ NoGeometry
Geometry is not required. It may still be returned if e.g. required for a filter condition.
Wrapper for iterator of features from vector data provider or vector layer.
bool nextFeature(QgsFeature &f)
Fetch next feature and stores in f, returns true on success.
This class wraps a request for features to a vector layer (or directly its vector data provider).
QgsFeatureRequest & setFlags(Qgis::FeatureRequestFlags flags)
Sets flags that affect how features will be fetched.
QgsFeatureRequest & setFilterFids(const QgsFeatureIds &fids)
Sets the feature IDs that should be fetched.
@ FastInsert
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
The feature class encapsulates a single feature including its unique ID, geometry and a list of field...
Definition: qgsfeature.h:56
bool hasGeometry() const
Returns true if the feature has an associated geometry.
Definition: qgsfeature.cpp:230
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition: qgsfeedback.h:53
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition: qgsfeedback.h:61
A geometry is the spatial representation of a feature.
Definition: qgsgeometry.h:162
QgsRectangle boundingBox() const
Returns the bounding box of the geometry.
bool isGeosEqual(const QgsGeometry &) const
Compares the geometry with another geometry using GEOS.
Contains information about the context in which a processing algorithm is executed.
Custom exception class for processing related exceptions.
Definition: qgsexception.h:83
Base class for providing feedback from a processing algorithm.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
A numeric output for processing algorithms.
A feature sink output for processing algorithms.
An input feature source (such as vector layers) parameter for processing algorithms.
A spatial index for QgsFeature objects.
qint64 QgsFeatureId
64 bit feature ids negative numbers are used for uncommitted/newly added features
Definition: qgsfeatureid.h:28
QList< int > QgsAttributeList
Definition: qgsfield.h:27