QGIS API Documentation  3.4.15-Madeira (e83d02e274)
qgsalgorithmdbscanclustering.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsalgorithmdbscanclustering.cpp
3  ---------------------
4  begin : July 2018
5  copyright : (C) 2018 by Nyall Dawson
6  email : nyall dot dawson at gmail dot com
7  ***************************************************************************/
8 
9 /***************************************************************************
10  * *
11  * This program is free software; you can redistribute it and/or modify *
12  * it under the terms of the GNU General Public License as published by *
13  * the Free Software Foundation; either version 2 of the License, or *
14  * (at your option) any later version. *
15  * *
16  ***************************************************************************/
17 
19 #include "qgsspatialindexkdbush.h"
20 #include <unordered_set>
21 
23 
24 QString QgsDbscanClusteringAlgorithm::name() const
25 {
26  return QStringLiteral( "dbscanclustering" );
27 }
28 
29 QString QgsDbscanClusteringAlgorithm::displayName() const
30 {
31  return QObject::tr( "DBSCAN clustering" );
32 }
33 
34 QString QgsDbscanClusteringAlgorithm::shortDescription() const
35 {
36  return QObject::tr( "Clusters point features using a density based scan algorithm." );
37 }
38 
39 QStringList QgsDbscanClusteringAlgorithm::tags() const
40 {
41  return QObject::tr( "clustering,clusters,density,based,points" ).split( ',' );
42 }
43 
44 QString QgsDbscanClusteringAlgorithm::group() const
45 {
46  return QObject::tr( "Vector analysis" );
47 }
48 
49 QString QgsDbscanClusteringAlgorithm::groupId() const
50 {
51  return QStringLiteral( "vectoranalysis" );
52 }
53 
54 void QgsDbscanClusteringAlgorithm::initAlgorithm( const QVariantMap & )
55 {
56  addParameter( new QgsProcessingParameterFeatureSource( QStringLiteral( "INPUT" ),
57  QObject::tr( "Input layer" ), QList< int >() << QgsProcessing::TypeVectorPoint ) );
58  addParameter( new QgsProcessingParameterNumber( QStringLiteral( "MIN_SIZE" ), QObject::tr( "Minimum cluster size" ),
60  addParameter( new QgsProcessingParameterDistance( QStringLiteral( "EPS" ),
61  QObject::tr( "Maximum distance between clustered points" ), 1, QStringLiteral( "INPUT" ), false, 0 ) );
62 
63  auto dbscanStarParam = qgis::make_unique<QgsProcessingParameterBoolean>( QStringLiteral( "DBSCAN*" ),
64  QObject::tr( "Treat border points as noise (DBSCAN*)" ), false, true );
65  dbscanStarParam->setFlags( dbscanStarParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
66  addParameter( dbscanStarParam.release() );
67 
68  auto fieldNameParam = qgis::make_unique<QgsProcessingParameterString>( QStringLiteral( "FIELD_NAME" ),
69  QObject::tr( "Cluster field name" ), QStringLiteral( "CLUSTER_ID" ) );
70  fieldNameParam->setFlags( fieldNameParam->flags() | QgsProcessingParameterDefinition::FlagAdvanced );
71  addParameter( fieldNameParam.release() );
72 
73  addParameter( new QgsProcessingParameterFeatureSink( QStringLiteral( "OUTPUT" ), QObject::tr( "Clusters" ), QgsProcessing::TypeVectorPoint ) );
74 
75  addOutput( new QgsProcessingOutputNumber( QStringLiteral( "NUM_CLUSTERS" ), QObject::tr( "Number of clusters" ) ) );
76 }
77 
78 QString QgsDbscanClusteringAlgorithm::shortHelpString() const
79 {
80  return QObject::tr( "Clusters point features based on a 2D implementation of Density-based spatial clustering of applications with noise (DBSCAN) algorithm.\n\n"
81  "The algorithm requires two parameters, a minimum cluster size (“minPts”), and the maximum distance allowed between clustered points (“eps”)." );
82 }
83 
84 QgsDbscanClusteringAlgorithm *QgsDbscanClusteringAlgorithm::createInstance() const
85 {
86  return new QgsDbscanClusteringAlgorithm();
87 }
88 
89 struct KDBushDataEqualById
90 {
91  bool operator()( const QgsSpatialIndexKDBushData &a, const QgsSpatialIndexKDBushData &b ) const
92  {
93  return a.id == b.id;
94  }
95 };
96 
97 struct KDBushDataHashById
98 {
99  std::size_t operator()( const QgsSpatialIndexKDBushData &a ) const
100  {
101  return std::hash< QgsFeatureId > {}( a.id );
102  }
103 };
104 
105 QVariantMap QgsDbscanClusteringAlgorithm::processAlgorithm( const QVariantMap &parameters, QgsProcessingContext &context, QgsProcessingFeedback *feedback )
106 {
107  std::unique_ptr< QgsProcessingFeatureSource > source( parameterAsSource( parameters, QStringLiteral( "INPUT" ), context ) );
108  if ( !source )
109  throw QgsProcessingException( invalidSourceError( parameters, QStringLiteral( "INPUT" ) ) );
110 
111  const std::size_t minSize = static_cast< std::size_t>( parameterAsInt( parameters, QStringLiteral( "MIN_SIZE" ), context ) );
112  const double eps = parameterAsDouble( parameters, QStringLiteral( "EPS" ), context );
113  const bool borderPointsAreNoise = parameterAsBool( parameters, QStringLiteral( "DBSCAN*" ), context );
114 
115  QgsFields outputFields = source->fields();
116  const QString clusterFieldName = parameterAsString( parameters, QStringLiteral( "FIELD_NAME" ), context );
117  QgsFields newFields;
118  newFields.append( QgsField( clusterFieldName, QVariant::Int ) );
119  outputFields = QgsProcessingUtils::combineFields( outputFields, newFields );
120 
121  QString dest;
122  std::unique_ptr< QgsFeatureSink > sink( parameterAsSink( parameters, QStringLiteral( "OUTPUT" ), context, dest, outputFields, source->wkbType(), source->sourceCrs() ) );
123  if ( !sink )
124  throw QgsProcessingException( invalidSinkError( parameters, QStringLiteral( "OUTPUT" ) ) );
125 
126  // build spatial index
127  feedback->pushInfo( QObject::tr( "Building spatial index" ) );
128  QgsSpatialIndexKDBush index( *source, feedback );
129  if ( feedback->isCanceled() )
130  return QVariantMap();
131 
132  // dbscan!
133  feedback->pushInfo( QObject::tr( "Analysing clusters" ) );
134  std::unordered_map< QgsFeatureId, int> idToCluster;
135  idToCluster.reserve( index.size() );
136  QgsFeatureIterator features = source->getFeatures( QgsFeatureRequest().setNoAttributes() );
137  const long featureCount = source->featureCount();
138 
139  int clusterCount = 0;
140  dbscan( minSize, eps, borderPointsAreNoise, featureCount, features, index, idToCluster, clusterCount, feedback );
141 
142  // write clusters
143  const double writeStep = featureCount > 0 ? 10.0 / featureCount : 1;
144  features = source->getFeatures();
145  int i = 0;
146  QgsFeature feat;
147  while ( features.nextFeature( feat ) )
148  {
149  i++;
150  if ( feedback->isCanceled() )
151  {
152  break;
153  }
154 
155  feedback->setProgress( 90 + i * writeStep );
156  QgsAttributes attr = feat.attributes();
157  auto cluster = idToCluster.find( feat.id() );
158  if ( cluster != idToCluster.end() )
159  {
160  attr << cluster->second;
161  }
162  else
163  {
164  attr << QVariant();
165  }
166  feat.setAttributes( attr );
167  sink->addFeature( feat, QgsFeatureSink::FastInsert );
168  }
169 
170  QVariantMap outputs;
171  outputs.insert( QStringLiteral( "OUTPUT" ), dest );
172  outputs.insert( QStringLiteral( "NUM_CLUSTERS" ), clusterCount );
173  return outputs;
174 }
175 
176 
177 void QgsDbscanClusteringAlgorithm::dbscan( const std::size_t minSize,
178  const double eps,
179  const bool borderPointsAreNoise,
180  const long featureCount,
181  QgsFeatureIterator features,
182  QgsSpatialIndexKDBush &index,
183  std::unordered_map< QgsFeatureId, int> &idToCluster,
184  int &clusterCount,
185  QgsProcessingFeedback *feedback )
186 {
187  const double step = featureCount > 0 ? 90.0 / featureCount : 1;
188 
189  std::unordered_set< QgsFeatureId > visited;
190  visited.reserve( index.size() );
191 
192  QgsFeature feat;
193  int i = 0;
194  clusterCount = 0;
195 
196  while ( features.nextFeature( feat ) )
197  {
198  if ( feedback->isCanceled() )
199  {
200  break;
201  }
202 
203  if ( !feat.hasGeometry() )
204  {
205  feedback->setProgress( ++i * step );
206  continue;
207  }
208 
209  if ( visited.find( feat.id() ) != visited.end() )
210  {
211  // already visited!
212  continue;
213  }
214 
215  QgsPointXY point;
217  point = QgsPointXY( *qgsgeometry_cast< const QgsPoint * >( feat.geometry().constGet() ) );
218  else
219  {
220  // not a point geometry
221  feedback->reportError( QObject::tr( "Feature %1 is a %2 feature, not a point." ).arg( feat.id() ).arg( QgsWkbTypes::displayString( feat.geometry().wkbType() ) ) );
222  feedback->setProgress( ++i * step );
223  continue;
224  }
225 
226  std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById> within;
227 
228  if ( minSize > 1 )
229  {
230  index.within( point, eps, [ &within]( const QgsSpatialIndexKDBushData & data )
231  {
232  within.insert( data );
233  } );
234  if ( within.size() < minSize )
235  continue;
236 
237  visited.insert( feat.id() );
238  }
239  else
240  {
241  // optimised case for minSize == 1, we can skip the initial check
242  within.insert( QgsSpatialIndexKDBushData( feat.id(), point.x(), point.y() ) );
243  }
244 
245  // start new cluster
246  clusterCount++;
247  idToCluster[ feat.id() ] = clusterCount;
248  feedback->setProgress( ++i * step );
249 
250  while ( !within.empty() )
251  {
252  if ( feedback->isCanceled() )
253  {
254  break;
255  }
256 
257  QgsSpatialIndexKDBushData j = *within.begin();
258  within.erase( within.begin() );
259 
260  if ( visited.find( j.id ) != visited.end() )
261  {
262  // already visited!
263  continue;
264  }
265 
266  visited.insert( j.id );
267  feedback->setProgress( ++i * step );
268 
269  // check from this point
270  QgsPointXY point2 = j.point();
271 
272  std::unordered_set< QgsSpatialIndexKDBushData, KDBushDataHashById, KDBushDataEqualById > within2;
273  index.within( point2, eps, [&within2]( const QgsSpatialIndexKDBushData & data )
274  {
275  within2.insert( data );
276  } );
277  if ( within2.size() >= minSize )
278  {
279  // expand neighbourhood
280  std::copy_if( within2.begin(),
281  within2.end(),
282  std::inserter( within, within.end() ),
283  [&visited]( const QgsSpatialIndexKDBushData & needle )
284  {
285  return visited.find( needle.id ) == visited.end();
286  } );
287  }
288  if ( !borderPointsAreNoise || within2.size() >= minSize )
289  {
290  idToCluster[ j.id ] = clusterCount;
291  }
292  }
293  }
294 }
295 
297 
298 
QgsFeatureId id
Definition: qgsfeature.h:64
Wrapper for iterator of features from vector data provider or vector layer.
bool isCanceled() const
Tells whether the operation has been canceled already.
Definition: qgsfeedback.h:54
Use faster inserts, at the cost of updating the passed features to reflect changes made at the provid...
A container for data stored inside a QgsSpatialIndexKDBush index.
A very fast static spatial index for 2D points based on a flat KD-tree.
Base class for providing feedback from a processing algorithm.
Parameter is an advanced parameter which should be hidden from users by default.
double y
Definition: qgspointxy.h:48
A class to represent a 2D point.
Definition: qgspointxy.h:43
void setProgress(double progress)
Sets the current progress for the feedback object.
Definition: qgsfeedback.h:63
Container of fields for a vector layer.
Definition: qgsfields.h:42
void setAttributes(const QgsAttributes &attrs)
Sets the feature&#39;s attributes.
Definition: qgsfeature.cpp:127
A numeric output for processing algorithms.
QList< QgsSpatialIndexKDBushData > within(const QgsPointXY &point, double radius) const
Returns the list of features which are within the given search radius of point.
The feature class encapsulates a single feature including its id, geometry and a list of field/values...
Definition: qgsfeature.h:55
A feature sink output for processing algorithms.
const QgsAbstractGeometry * constGet() const
Returns a non-modifiable (const) reference to the underlying abstract geometry primitive.
static QgsFields combineFields(const QgsFields &fieldsA, const QgsFields &fieldsB)
Combines two field lists, avoiding duplicate field names (in a case-insensitive manner).
A double numeric parameter for distance values.
This class wraps a request for features to a vector layer (or directly its vector data provider)...
Custom exception class for processing related exceptions.
Definition: qgsexception.h:82
bool append(const QgsField &field, FieldOrigin origin=OriginProvider, int originIndex=-1)
Appends a field. The field must have unique name, otherwise it is rejected (returns false) ...
Definition: qgsfields.cpp:59
Encapsulate a field in an attribute table or data source.
Definition: qgsfield.h:48
A numeric parameter for processing algorithms.
double x
Definition: qgspointxy.h:47
QgsPointXY point() const
Returns the indexed point.
Vector point layers.
Definition: qgsprocessing.h:48
An input feature source (such as vector layers) parameter for processing algorithms.
qgssize size() const
Returns the size of the index, i.e.
bool hasGeometry() const
Returns true if the feature has an associated geometry.
Definition: qgsfeature.cpp:197
static QString displayString(Type type)
Returns a display string type for a WKB type, e.g., the geometry name used in WKT geometry representa...
QgsGeometry geometry
Definition: qgsfeature.h:67
bool nextFeature(QgsFeature &f)
A vector of attributes.
Definition: qgsattributes.h:57
static Type flatType(Type type)
Returns the flat type for a WKB type.
Definition: qgswkbtypes.h:565
Contains information about the context in which a processing algorithm is executed.
QgsWkbTypes::Type wkbType() const
Returns type of the geometry as a WKB type (point / linestring / polygon etc.)
virtual void reportError(const QString &error, bool fatalError=false)
Reports that the algorithm encountered an error while executing.
QgsAttributes attributes
Definition: qgsfeature.h:65
virtual void pushInfo(const QString &info)
Pushes a general informational message from the algorithm.