QGIS API Documentation  3.17.0-Master (df2c9ff931)
qgsalgorithmdetectdatasetchanges.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsalgorithmdetectdatasetchanges.cpp
3  -----------------------------------------
4  begin : December 2019
5  copyright : (C) 2019 by Nyall Dawson
6  email : nyall dot dawson at gmail dot com
7  ***************************************************************************/
8 
9 /***************************************************************************
10  * *
11  * This program is free software; you can redistribute it and/or modify *
12  * it under the terms of the GNU General Public License as published by *
13  * the Free Software Foundation; either version 2 of the License, or *
14  * (at your option) any later version. *
15  * *
16  ***************************************************************************/
17 
19 #include "qgsvectorlayer.h"
20 #include "qgsgeometryengine.h"
21 
23 
24 QString QgsDetectVectorChangesAlgorithm::name() const
25 {
26  return QStringLiteral( "detectvectorchanges" );
27 }
28 
29 QString QgsDetectVectorChangesAlgorithm::displayName() const
30 {
31  return QObject::tr( "Detect dataset changes" );
32 }
33 
34 QStringList QgsDetectVectorChangesAlgorithm::tags() const
35 {
36  return QObject::tr( "added,dropped,new,deleted,features,geometries,difference,delta,revised,original,version" ).split( ',' );
37 }
38 
39 QString QgsDetectVectorChangesAlgorithm::group() const
40 {
41  return QObject::tr( "Vector general" );
42 }
43 
44 QString QgsDetectVectorChangesAlgorithm::groupId() const
45 {
46  return QStringLiteral( "vectorgeneral" );
47 }
48 
49 void QgsDetectVectorChangesAlgorithm::initAlgorithm( const QVariantMap & )
50 {
51  addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "ORIGINAL" ), QObject::tr( "Original layer" ) ) );
52  addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "REVISED" ), QObject::tr( "Revised layer" ) ) );
53 
54  std::unique_ptr< QgsProcessingParameterField > compareAttributesParam = qgis::make_unique< QgsProcessingParameterField >( QStringLiteral( "COMPARE_ATTRIBUTES" ),
55  QObject::tr( "Attributes to consider for match (or none to compare geometry only)" ), QVariant(),
56  QStringLiteral( "ORIGINAL" ), QgsProcessingParameterField::Any, true, true );
57  compareAttributesParam->setDefaultToAllFields( true );
58  addParameter( compareAttributesParam.release() );
59 
60  std::unique_ptr< QgsProcessingParameterDefinition > matchTypeParam = qgis::make_unique< QgsProcessingParameterEnum >( QStringLiteral( "MATCH_TYPE" ),
61  QObject::tr( "Geometry comparison behavior" ),
62  QStringList() << QObject::tr( "Exact Match" )
63  << QObject::tr( "Tolerant Match (Topological Equality)" ),
64  false, 1 );
65  matchTypeParam->setFlags( matchTypeParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
66  addParameter( matchTypeParam.release() );
67 
68  addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "UNCHANGED" ), QObject::tr( "Unchanged features" ), QgsProcessing::TypeVectorAnyGeometry, QVariant(), true, true ) );
69  addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "ADDED" ), QObject::tr( "Added features" ), QgsProcessing::TypeVectorAnyGeometry, QVariant(), true, true ) );
70  addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "DELETED" ), QObject::tr( "Deleted features" ), QgsProcessing::TypeVectorAnyGeometry, QVariant(), true, true ) );
71 
72  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "UNCHANGED_COUNT" ), QObject::tr( "Count of unchanged features" ) ) );
73  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "ADDED_COUNT" ), QObject::tr( "Count of features added in revised layer" ) ) );
74  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "DELETED_COUNT" ), QObject::tr( "Count of features deleted from original layer" ) ) );
75 }
76 
77 QString QgsDetectVectorChangesAlgorithm::shortHelpString() const
78 {
79  return QObject::tr( "This algorithm compares two vector layers, and determines which features are unchanged, added or deleted between "
80  "the two. It is designed for comparing two different versions of the same dataset.\n\n"
81  "When comparing features, the original and revised feature geometries will be compared against each other. Depending "
82  "on the Geometry Comparison Behavior setting, the comparison will either be made using an exact comparison (where "
83  "geometries must be an exact match for each other, including the order and count of vertices) or a topological "
84  "comparison only (where are geometries area considered equal if all of their component edges overlap. E.g. "
85  "lines with the same vertex locations but opposite direction will be considered equal by this method). If the topological "
86  "comparison is selected then any z or m values present in the geometries will not be compared.\n\n"
87  "By default, the algorithm compares all attributes from the original and revised features. If the Attributes to Consider for Match "
88  "parameter is changed, then only the selected attributes will be compared (e.g. allowing users to ignore a timestamp or ID field "
89  "which is expected to change between the revisions).\n\n"
90  "If any features in the original or revised layers do not have an associated geometry, then care must be taken to ensure "
91  "that these features have a unique set of attributes selected for comparison. If this condition is not met, warnings will be "
92  "raised and the resultant outputs may be misleading.\n\n"
93  "The algorithm outputs three layers, one containing all features which are considered to be unchanged between the revisions, "
94  "one containing features deleted from the original layer which are not present in the revised layer, and one containing features "
95  "add to the revised layer which are not present in the original layer." );
96 }
97 
98 QString QgsDetectVectorChangesAlgorithm::shortDescription() const
99 {
100  return QObject::tr( "Calculates features which are unchanged, added or deleted between two dataset versions." );
101 }
102 
103 QgsDetectVectorChangesAlgorithm *QgsDetectVectorChangesAlgorithm::createInstance() const
104 {
105  return new QgsDetectVectorChangesAlgorithm();
106 }
107 
108 bool QgsDetectVectorChangesAlgorithm::prepareAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
109 {
110  mOriginal.reset( parameterAsSource( parameters, QStringLiteral( "ORIGINAL" ), context ) );
111  if ( !mOriginal )
112  throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "ORIGINAL" ) ) );
113 
114  mRevised.reset( parameterAsSource( parameters, QStringLiteral( "REVISED" ), context ) );
115  if ( !mRevised )
116  throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "REVISED" ) ) );
117 
118  mMatchType = static_cast< GeometryMatchType >( parameterAsEnum( parameters, QStringLiteral( "MATCH_TYPE" ), context ) );
119 
120  switch ( mMatchType )
121  {
122  case Exact:
123  if ( mOriginal->wkbType() != mRevised->wkbType() )
124  throw QgsProcessingException( QObject::tr( "Geometry type of revised layer (%1) does not match the original layer (%2). Consider using the \"Tolerant Match\" option instead." ).arg( QgsWkbTypes::displayString( mRevised->wkbType() ),
125  QgsWkbTypes::displayString( mOriginal->wkbType() ) ) );
126  break;
127 
128  case Topological:
129  if ( QgsWkbTypes::geometryType( mOriginal->wkbType() ) != QgsWkbTypes::geometryType( mRevised->wkbType() ) )
130  throw QgsProcessingException( QObject::tr( "Geometry type of revised layer (%1) does not match the original layer (%2)" ).arg( QgsWkbTypes::geometryDisplayString( QgsWkbTypes::geometryType( mRevised->wkbType() ) ),
131  QgsWkbTypes::geometryDisplayString( QgsWkbTypes::geometryType( mOriginal->wkbType() ) ) ) );
132  break;
133 
134  }
135 
136  if ( mOriginal->sourceCrs() != mRevised->sourceCrs() )
137  feedback->reportError( QObject::tr( "CRS for revised layer (%1) does not match the original layer (%2) - reprojection accuracy may affect geometry matching" ).arg( mOriginal->sourceCrs().userFriendlyIdentifier(),
138  mRevised->sourceCrs().userFriendlyIdentifier() ), false );
139 
140  mFieldsToCompare = parameterAsFields( parameters, QStringLiteral( "COMPARE_ATTRIBUTES" ), context );
141  mOriginalFieldsToCompareIndices.reserve( mFieldsToCompare.size() );
142  mRevisedFieldsToCompareIndices.reserve( mFieldsToCompare.size() );
143  QStringList missingOriginalFields;
144  QStringList missingRevisedFields;
145  for ( const QString &field : mFieldsToCompare )
146  {
147  const int originalIndex = mOriginal->fields().lookupField( field );
148  mOriginalFieldsToCompareIndices.append( originalIndex );
149  if ( originalIndex < 0 )
150  missingOriginalFields << field;
151 
152  const int revisedIndex = mRevised->fields().lookupField( field );
153  if ( revisedIndex < 0 )
154  missingRevisedFields << field;
155  mRevisedFieldsToCompareIndices.append( revisedIndex );
156  }
157 
158  if ( !missingOriginalFields.empty() )
159  throw QgsProcessingException( QObject::tr( "Original layer missing selected comparison attributes: %1" ).arg( missingOriginalFields.join( ',' ) ) );
160  if ( !missingRevisedFields.empty() )
161  throw QgsProcessingException( QObject::tr( "Revised layer missing selected comparison attributes: %1" ).arg( missingRevisedFields.join( ',' ) ) );
162 
163  return true;
164 }
165 
166 QVariantMap QgsDetectVectorChangesAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
167 {
168  QString unchangedDestId;
169  std::unique_ptr< QgsFeatureSink > unchangedSink( parameterAsSink( parameters, QStringLiteral( "UNCHANGED" ), context, unchangedDestId, mOriginal->fields(),
170  mOriginal->wkbType(), mOriginal->sourceCrs() ) );
171  if ( !unchangedSink && parameters.value( QStringLiteral( "UNCHANGED" ) ).isValid() )
172  throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "UNCHANGED" ) ) );
173 
174  QString addedDestId;
175  std::unique_ptr< QgsFeatureSink > addedSink( parameterAsSink( parameters, QStringLiteral( "ADDED" ), context, addedDestId, mRevised->fields(),
176  mRevised->wkbType(), mRevised->sourceCrs() ) );
177  if ( !addedSink && parameters.value( QStringLiteral( "ADDED" ) ).isValid() )
178  throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "ADDED" ) ) );
179 
180  QString deletedDestId;
181  std::unique_ptr< QgsFeatureSink > deletedSink( parameterAsSink( parameters, QStringLiteral( "DELETED" ), context, deletedDestId, mOriginal->fields(),
182  mOriginal->wkbType(), mOriginal->sourceCrs() ) );
183  if ( !deletedSink && parameters.value( QStringLiteral( "DELETED" ) ).isValid() )
184  throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "DELETED" ) ) );
185 
186  // first iteration: we loop through the entire original layer, building up a spatial index of ALL original geometries
187  // and collecting the original geometries themselves along with the attributes to compare
188  QgsFeatureRequest request;
189  request.setSubsetOfAttributes( mOriginalFieldsToCompareIndices );
190 
191  QgsFeatureIterator it = mOriginal->getFeatures( request );
192 
193  double step = mOriginal->featureCount() > 0 ? 100.0 / mOriginal->featureCount() : 0;
194  QHash< QgsFeatureId, QgsGeometry > originalGeometries;
195  QHash< QgsFeatureId, QgsAttributes > originalAttributes;
196  QHash< QgsAttributes, QgsFeatureId > originalNullGeometryAttributes;
197  long current = 0;
198 
199  QgsAttributes attrs;
200  attrs.resize( mFieldsToCompare.size() );
201 
202  QgsSpatialIndex index( it, [&]( const QgsFeature & f )->bool
203  {
204  if ( feedback->isCanceled() )
205  return false;
206 
207  if ( f.hasGeometry() )
208  {
209  originalGeometries.insert( f.id(), f.geometry() );
210  }
211 
212  if ( !mFieldsToCompare.empty() )
213  {
214  int idx = 0;
215  for ( int field : mOriginalFieldsToCompareIndices )
216  {
217  attrs[idx++] = f.attributes().at( field );
218  }
219  originalAttributes.insert( f.id(), attrs );
220  }
221 
222  if ( !f.hasGeometry() )
223  {
224  if ( originalNullGeometryAttributes.contains( attrs ) )
225  {
226  feedback->reportError( QObject::tr( "A non-unique set of comparison attributes was found for "
227  "one or more features without geometries - results may be misleading (features %1 and %2)" ).arg( f.id() ).arg( originalNullGeometryAttributes.value( attrs ) ) );
228  }
229  else
230  {
231  originalNullGeometryAttributes.insert( attrs, f.id() );
232  }
233  }
234 
235  // overall this loop takes about 10% of time
236  current++;
237  feedback->setProgress( 0.10 * current * step );
238  return true;
239  } );
240 
241  QSet<QgsFeatureId> unchangedOriginalIds;
242  QSet<QgsFeatureId> addedRevisedIds;
243  current = 0;
244 
245  // second iteration: we loop through ALL revised features, checking whether each is a match for a geometry from the
246  // original set. If so, check if the feature is unchanged. If there's no match with the original features, we mark it as an "added" feature
247  step = mRevised->featureCount() > 0 ? 100.0 / mRevised->featureCount() : 0;
248  QgsFeatureRequest revisedRequest = QgsFeatureRequest().setDestinationCrs( mOriginal->sourceCrs(), context.transformContext() );
249  revisedRequest.setSubsetOfAttributes( mRevisedFieldsToCompareIndices );
250  it = mRevised->getFeatures( revisedRequest );
251  QgsFeature revisedFeature;
252  while ( it.nextFeature( revisedFeature ) )
253  {
254  if ( feedback->isCanceled() )
255  break;
256 
257  int idx = 0;
258  for ( int field : mRevisedFieldsToCompareIndices )
259  {
260  attrs[idx++] = revisedFeature.attributes().at( field );
261  }
262 
263  bool matched = false;
264 
265  if ( !revisedFeature.hasGeometry() )
266  {
267  if ( originalNullGeometryAttributes.contains( attrs ) )
268  {
269  // found a match for feature
270  unchangedOriginalIds.insert( originalNullGeometryAttributes.value( attrs ) );
271  matched = true;
272  }
273  }
274  else
275  {
276  // can we match this feature?
277  const QList<QgsFeatureId> candidates = index.intersects( revisedFeature.geometry().boundingBox() );
278 
279  // lazy evaluate -- there may be NO candidates!
280  QgsGeometry revised;
281 
282  for ( const QgsFeatureId candidateId : candidates )
283  {
284  if ( unchangedOriginalIds.contains( candidateId ) )
285  {
286  // already matched this original feature
287  continue;
288  }
289 
290  // attribute comparison is faster to do first, if desired
291  if ( !mFieldsToCompare.empty() )
292  {
293  if ( attrs != originalAttributes[ candidateId ] )
294  {
295  // attributes don't match, so candidates is not a match
296  continue;
297  }
298  }
299 
300  QgsGeometry original = originalGeometries.value( candidateId );
301  // lazy evaluation
302  if ( revised.isNull() )
303  {
304  revised = revisedFeature.geometry();
305  // drop z/m if not wanted for match
306  switch ( mMatchType )
307  {
308  case Topological:
309  {
310  revised.get()->dropMValue();
311  revised.get()->dropZValue();
312  original.get()->dropMValue();
313  original.get()->dropZValue();
314  break;
315  }
316 
317  case Exact:
318  break;
319  }
320  }
321 
322  bool geometryMatch = false;
323  switch ( mMatchType )
324  {
325  case Topological:
326  {
327  geometryMatch = revised.isGeosEqual( original );
328  break;
329  }
330 
331  case Exact:
332  geometryMatch = revised.equals( original );
333  break;
334  }
335 
336  if ( geometryMatch )
337  {
338  // candidate is a match for feature
339  unchangedOriginalIds.insert( candidateId );
340  matched = true;
341  break;
342  }
343  }
344  }
345 
346  if ( !matched )
347  {
348  // new feature
349  addedRevisedIds.insert( revisedFeature.id() );
350  }
351 
352  current++;
353  feedback->setProgress( 0.70 * current * step + 10 ); // takes about 70% of time
354  }
355 
356  // third iteration: iterate back over the original features, and direct them to the appropriate sink.
357  // If they were marked as unchanged during the second iteration, we put them in the unchanged sink. Otherwise
358  // they are placed into the deleted sink.
359  step = mOriginal->featureCount() > 0 ? 100.0 / mOriginal->featureCount() : 0;
360 
362  it = mOriginal->getFeatures( request );
363  current = 0;
364  long deleted = 0;
365  QgsFeature f;
366  while ( it.nextFeature( f ) )
367  {
368  if ( feedback->isCanceled() )
369  break;
370 
371  // use already fetched geometry
372  f.setGeometry( originalGeometries.value( f.id(), QgsGeometry() ) );
373 
374  if ( unchangedOriginalIds.contains( f.id() ) )
375  {
376  // unchanged
377  if ( unchangedSink )
378  unchangedSink->addFeature( f, QgsFeatureSink::FastInsert );
379  }
380  else
381  {
382  // deleted feature
383  if ( deletedSink )
384  deletedSink->addFeature( f, QgsFeatureSink::FastInsert );
385  deleted++;
386  }
387 
388  current++;
389  feedback->setProgress( 0.10 * current * step + 80 ); // takes about 10% of time
390  }
391 
392  // forth iteration: collect all added features and add them to the added sink
393  // NOTE: while we could potentially do this as part of the second iteration and save some time, we instead
394  // do this here using a brand new request because the second iteration
395  // is fetching reprojected features and we ideally want geometries from the revised layer's actual CRS only here!
396  // also, the second iteration is only fetching the actual attributes used in the comparison, whereas we want
397  // to include all attributes in the "added" output
398  if ( addedSink )
399  {
400  step = addedRevisedIds.size() > 0 ? 100.0 / addedRevisedIds.size() : 0;
401  it = mRevised->getFeatures( QgsFeatureRequest().setFilterFids( addedRevisedIds ) );
402  current = 0;
403  while ( it.nextFeature( f ) )
404  {
405  if ( feedback->isCanceled() )
406  break;
407 
408  // added feature
409  addedSink->addFeature( f, QgsFeatureSink::FastInsert );
410 
411  current++;
412  feedback->setProgress( 0.10 * current * step + 90 ); // takes about 10% of time
413  }
414  }
415  feedback->setProgress( 100 );
416 
417  feedback->pushInfo( QObject::tr( "%1 features unchanged" ).arg( unchangedOriginalIds.size() ) );
418  feedback->pushInfo( QObject::tr( "%1 features added" ).arg( addedRevisedIds.size() ) );
419  feedback->pushInfo( QObject::tr( "%1 features deleted" ).arg( deleted ) );
420 
421  QVariantMap outputs;
422  outputs.insert( QStringLiteral( "UNCHANGED" ), unchangedDestId );
423  outputs.insert( QStringLiteral( "ADDED" ), addedDestId );
424  outputs.insert( QStringLiteral( "DELETED" ), deletedDestId );
425  outputs.insert( QStringLiteral( "UNCHANGED_COUNT" ), static_cast< long long >( unchangedOriginalIds.size() ) );
426  outputs.insert( QStringLiteral( "ADDED_COUNT" ), static_cast< long long >( addedRevisedIds.size() ) );
427  outputs.insert( QStringLiteral( "DELETED_COUNT" ), static_cast< long long >( deleted ) );
428 
429  return outputs;
430 }
431 
QgsFeatureRequest & setDestinationCrs(const QgsCoordinateReferenceSystem &crs, const QgsCoordinateTransformContext &context)
Sets the destination crs for feature&#39;s geometries.
QgsFeatureId id
Definition: qgsfeature.h:64
Wrapper for iterator of features from vector data provider or vector layer.
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
Base class for providing feedback from a processing algorithm.
Parameter is an advanced parameter which should be hidden from users by default.
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition: qgsfeedback.h:62
qint64 QgsFeatureId
64 bit feature ids negative numbers are used for uncommitted/newly added features ...
Definition: qgsfeatureid.h:28
QgsFeatureRequest & setSubsetOfAttributes(const QgsAttributeList &attrs)
Set a subset of attributes that will be fetched.
A geometry is the spatial representation of a feature.
Definition: qgsgeometry.h:123
A numeric output for processing algorithms.
static QString geometryDisplayString(GeometryType type) SIP_HOLDGIL
Returns a display string for a geometry type.
The feature class encapsulates a single feature including its id, geometry and a list of field/values...
Definition: qgsfeature.h:55
bool hasGeometry() const
Returns true if the feature has an associated geometry.
Definition: qgsfeature.cpp:204
A feature sink output for processing algorithms.
bool isGeosEqual(const QgsGeometry &) const
Compares the geometry with another geometry using GEOS.
bool equals(const QgsGeometry &geometry) const
Test if this geometry is exactly equal to another geometry.
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.
This class wraps a request for features to a vector layer (or directly its vector data provider)...
Custom exception class for processing related exceptions.
Definition: qgsexception.h:82
QgsCoordinateTransformContext transformContext() const
Returns the coordinate transform context.
QgsAbstractGeometry * get()
Returns a modifiable (non-const) reference to the underlying abstract geometry primitive.
A spatial index for QgsFeature objects.
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition: qgsfeedback.h:53
An input feature source (such as vector layers) parameter for processing algorithms.
QgsRectangle boundingBox() const
Returns the bounding box of the geometry.
void setGeometry(const QgsGeometry &geometry)
Set the feature&#39;s geometry.
Definition: qgsfeature.cpp:144
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
QgsGeometry geometry
Definition: qgsfeature.h:67
virtual bool dropMValue()=0
Drops any measure values which exist in the geometry.
bool nextFeature(QgsFeature &f)
Geometry is not required. It may still be returned if e.g. required for a filter condition.
A vector of attributes.
Definition: qgsattributes.h:57
static QString displayString(Type type) SIP_HOLDGIL
Returns a non-translated display string type for a WKB type, e.g., the geometry name used in WKT geom...
Contains information about the context in which a processing algorithm is executed.
const QgsField & field
Definition: qgsfield.h:471
Any vector layer with geometry.
Definition: qgsprocessing.h:47
static GeometryType geometryType(Type type) SIP_HOLDGIL
Returns the geometry type for a WKB type, e.g., both MultiPolygon and CurvePolygon would have a Polyg...
Definition: qgswkbtypes.h:938
virtual bool dropZValue()=0
Drops any z-dimensions which exist in the geometry.
QgsAttributes attributes
Definition: qgsfeature.h:65
QgsFeatureRequest & setFlags(QgsFeatureRequest::Flags flags)
Sets flags that affect how features will be fetched.