QGIS API Documentation  2.99.0-Master (69af2f5)
qgsgmlschema.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsgmlschema.cpp
3  --------------------------------------
4  Date : February 2013
5  Copyright : (C) 2013 by Radim Blazek
6  Email : [email protected]
7  ***************************************************************************
8  * *
9  * This program is free software; you can redistribute it and/or modify *
10  * it under the terms of the GNU General Public License as published by *
11  * the Free Software Foundation; either version 2 of the License, or *
12  * (at your option) any later version. *
13  * *
14  ***************************************************************************/
15 #include "qgsgmlschema.h"
16 #include "qgsrectangle.h"
18 #include "qgserror.h"
19 #include "qgsgeometry.h"
20 #include "qgslogger.h"
22 #include <QBuffer>
23 #include <QList>
24 #include <QNetworkRequest>
25 #include <QNetworkReply>
26 #include <QProgressDialog>
27 #include <QSet>
28 #include <QSettings>
29 #include <QUrl>
30 
31 #include <limits>
32 
33 const char NS_SEPARATOR = '?';
34 const QString GML_NAMESPACE = QStringLiteral( "http://www.opengis.net/gml" );
35 
37 {
38 }
39 
40 QgsGmlFeatureClass::QgsGmlFeatureClass( const QString &name, const QString &path )
41  : mName( name )
42  , mPath( path )
43 {
44 }
45 
46 int QgsGmlFeatureClass::fieldIndex( const QString &name )
47 {
48  for ( int i = 0; i < mFields.size(); i++ )
49  {
50  if ( mFields[i].name() == name ) return i;
51  }
52  return -1;
53 }
54 
55 // --------------------------- QgsGmlSchema -------------------------------
57  : QObject()
58  , mCurrentFeature( nullptr )
59  , mFeatureCount( 0 )
60  , mLevel( 0 )
61  , mSkipLevel( std::numeric_limits<int>::max() )
62 {
63  mGeometryTypes << QStringLiteral( "Point" ) << QStringLiteral( "MultiPoint" )
64  << QStringLiteral( "LineString" ) << QStringLiteral( "MultiLineString" )
65  << QStringLiteral( "Polygon" ) << QStringLiteral( "MultiPolygon" );
66 }
67 
68 QString QgsGmlSchema::readAttribute( const QString &attributeName, const XML_Char **attr ) const
69 {
70  int i = 0;
71  while ( attr[i] )
72  {
73  if ( attributeName.compare( attr[i] ) == 0 )
74  {
75  return QString( attr[i + 1] );
76  }
77  i += 2;
78  }
79  return QString();
80 }
81 
82 bool QgsGmlSchema::parseXSD( const QByteArray &xml )
83 {
84  QDomDocument dom;
85  QString errorMsg;
86  int errorLine;
87  int errorColumn;
88  if ( !dom.setContent( xml, false, &errorMsg, &errorLine, &errorColumn ) )
89  {
90  // TODO: error
91  return false;
92  }
93 
94  QDomElement docElem = dom.documentElement();
95 
96  QList<QDomElement> elementElements = domElements( docElem, QStringLiteral( "element" ) );
97 
98  //QgsDebugMsg( QString( "%1 elemets read" ).arg( elementElements.size() ) );
99 
100  Q_FOREACH ( const QDomElement &elementElement, elementElements )
101  {
102  QString name = elementElement.attribute( QStringLiteral( "name" ) );
103  QString type = elementElement.attribute( QStringLiteral( "type" ) );
104 
105  QString gmlBaseType = xsdComplexTypeGmlBaseType( docElem, stripNS( type ) );
106  //QgsDebugMsg( QString( "gmlBaseType = %1" ).arg( gmlBaseType ) );
107  //QgsDebugMsg( QString( "name = %1 gmlBaseType = %2" ).arg( name ).arg( gmlBaseType ) );
108  // We should only use gml:AbstractFeatureType descendants which have
109  // ancestor listed in gml:FeatureAssociationType (featureMember) descendant
110  // But we could only loose some data if XSD was not correct, I think.
111 
112  if ( gmlBaseType == QLatin1String( "AbstractFeatureType" ) )
113  {
114  // Get feature type definition
115  QgsGmlFeatureClass featureClass( name, QLatin1String( "" ) );
116  xsdFeatureClass( docElem, stripNS( type ), featureClass );
117  mFeatureClassMap.insert( name, featureClass );
118  }
119  // A feature may have more geometries, we take just the first one
120  }
121 
122  return true;
123 }
124 
125 bool QgsGmlSchema::xsdFeatureClass( const QDomElement &element, const QString &typeName, QgsGmlFeatureClass &featureClass )
126 {
127  //QgsDebugMsg("typeName = " + typeName );
128  QDomElement complexTypeElement = domElement( element, QStringLiteral( "complexType" ), QStringLiteral( "name" ), typeName );
129  if ( complexTypeElement.isNull() ) return false;
130 
131  // extension or restriction
132  QDomElement extrest = domElement( complexTypeElement, QStringLiteral( "complexContent.extension" ) );
133  if ( extrest.isNull() )
134  {
135  extrest = domElement( complexTypeElement, QStringLiteral( "complexContent.restriction" ) );
136  }
137  if ( extrest.isNull() ) return false;
138 
139  QString extrestName = extrest.attribute( QStringLiteral( "base" ) );
140  if ( extrestName == QLatin1String( "gml:AbstractFeatureType" ) )
141  {
142  // In theory we should add gml:AbstractFeatureType default attributes gml:description
143  // and gml:name but it does not seem to be a common practice and we would probably
144  // confuse most users
145  }
146  else
147  {
148  // Get attributes from extrest
149  if ( !xsdFeatureClass( element, stripNS( extrestName ), featureClass ) ) return false;
150  }
151 
152  // Supported geometry types
153  QStringList geometryPropertyTypes;
154  Q_FOREACH ( const QString &geom, mGeometryTypes )
155  {
156  geometryPropertyTypes << geom + "PropertyType";
157  }
158 
159  QStringList geometryAliases;
160  geometryAliases << QStringLiteral( "location" ) << QStringLiteral( "centerOf" ) << QStringLiteral( "position" ) << QStringLiteral( "extentOf" )
161  << QStringLiteral( "coverage" ) << QStringLiteral( "edgeOf" ) << QStringLiteral( "centerLineOf" ) << QStringLiteral( "multiLocation" )
162  << QStringLiteral( "multiCenterOf" ) << QStringLiteral( "multiPosition" ) << QStringLiteral( "multiCenterLineOf" )
163  << QStringLiteral( "multiEdgeOf" ) << QStringLiteral( "multiCoverage" ) << QStringLiteral( "multiExtentOf" );
164 
165  // Add attributes from current comple type
166  QList<QDomElement> sequenceElements = domElements( extrest, QStringLiteral( "sequence.element" ) );
167  Q_FOREACH ( const QDomElement &sequenceElement, sequenceElements )
168  {
169  QString fieldName = sequenceElement.attribute( QStringLiteral( "name" ) );
170  QString fieldTypeName = stripNS( sequenceElement.attribute( QStringLiteral( "type" ) ) );
171  QString ref = sequenceElement.attribute( QStringLiteral( "ref" ) );
172  //QgsDebugMsg ( QString("fieldName = %1 fieldTypeName = %2 ref = %3").arg(fieldName).arg(fieldTypeName).arg(ref) );
173 
174  if ( !ref.isEmpty() )
175  {
176  if ( ref.startsWith( QLatin1String( "gml:" ) ) )
177  {
178  if ( geometryAliases.contains( stripNS( ref ) ) )
179  {
180  featureClass.geometryAttributes().append( stripNS( ref ) );
181  }
182  else
183  {
184  QgsDebugMsg( QString( "Unknown referenced GML element: %1" ).arg( ref ) );
185  }
186  }
187  else
188  {
189  // TODO: get type from referenced element
190  QgsDebugMsg( QString( "field %1.%2 is referencing %3 - not supported" ).arg( typeName, fieldName ) );
191  }
192  continue;
193  }
194 
195  if ( fieldName.isEmpty() )
196  {
197  QgsDebugMsg( QString( "field in %1 without name" ).arg( typeName ) );
198  continue;
199  }
200 
201  // type is either type attribute
202  if ( fieldTypeName.isEmpty() )
203  {
204  // or type is inheriting from xs:simpleType
205  QDomElement sequenceElementRestriction = domElement( sequenceElement, QStringLiteral( "simpleType.restriction" ) );
206  fieldTypeName = stripNS( sequenceElementRestriction.attribute( QStringLiteral( "base" ) ) );
207  }
208 
209  QVariant::Type fieldType = QVariant::String;
210  if ( fieldTypeName.isEmpty() )
211  {
212  QgsDebugMsg( QString( "Cannot get %1.%2 field type" ).arg( typeName, fieldName ) );
213  }
214  else
215  {
216  if ( geometryPropertyTypes.contains( fieldTypeName ) )
217  {
218  // Geometry attribute
219  featureClass.geometryAttributes().append( fieldName );
220  continue;
221  }
222 
223  if ( fieldTypeName == QLatin1String( "decimal" ) )
224  {
225  fieldType = QVariant::Double;
226  }
227  else if ( fieldTypeName == QLatin1String( "integer" ) )
228  {
229  fieldType = QVariant::Int;
230  }
231  }
232 
233  QgsField field( fieldName, fieldType, fieldTypeName );
234  featureClass.fields().append( field );
235  }
236 
237  return true;
238 }
239 
240 QString QgsGmlSchema::xsdComplexTypeGmlBaseType( const QDomElement &element, const QString &name )
241 {
242  //QgsDebugMsg("name = " + name );
243  QDomElement complexTypeElement = domElement( element, QStringLiteral( "complexType" ), QStringLiteral( "name" ), name );
244  if ( complexTypeElement.isNull() ) return QLatin1String( "" );
245 
246  QDomElement extrest = domElement( complexTypeElement, QStringLiteral( "complexContent.extension" ) );
247  if ( extrest.isNull() )
248  {
249  extrest = domElement( complexTypeElement, QStringLiteral( "complexContent.restriction" ) );
250  }
251  if ( extrest.isNull() ) return QLatin1String( "" );
252 
253  QString extrestName = extrest.attribute( QStringLiteral( "base" ) );
254  if ( extrestName.startsWith( QLatin1String( "gml:" ) ) )
255  {
256  // GML base type found
257  return stripNS( extrestName );
258  }
259  // Continue recursively until GML base type is reached
260  return xsdComplexTypeGmlBaseType( element, stripNS( extrestName ) );
261 }
262 
263 QString QgsGmlSchema::stripNS( const QString &name )
264 {
265  return name.contains( ':' ) ? name.section( ':', 1 ) : name;
266 }
267 
268 QList<QDomElement> QgsGmlSchema::domElements( const QDomElement &element, const QString &path )
269 {
270  QList<QDomElement> list;
271 
272  QStringList names = path.split( '.' );
273  if ( names.isEmpty() ) return list;
274  QString name = names.value( 0 );
275  names.removeFirst();
276 
277  QDomNode n1 = element.firstChild();
278  while ( !n1.isNull() )
279  {
280  QDomElement el = n1.toElement();
281  if ( !el.isNull() )
282  {
283  QString tagName = stripNS( el.tagName() );
284  if ( tagName == name )
285  {
286  if ( names.isEmpty() )
287  {
288  list.append( el );
289  }
290  else
291  {
292  list.append( domElements( el, names.join( QStringLiteral( "." ) ) ) );
293  }
294  }
295  }
296  n1 = n1.nextSibling();
297  }
298 
299  return list;
300 }
301 
302 QDomElement QgsGmlSchema::domElement( const QDomElement &element, const QString &path )
303 {
304  return domElements( element, path ).value( 0 );
305 }
306 
307 QList<QDomElement> QgsGmlSchema::domElements( QList<QDomElement> &elements, const QString &attr, const QString &attrVal )
308 {
309  QList<QDomElement> list;
310  Q_FOREACH ( const QDomElement &el, elements )
311  {
312  if ( el.attribute( attr ) == attrVal )
313  {
314  list << el;
315  }
316  }
317  return list;
318 }
319 
320 QDomElement QgsGmlSchema::domElement( const QDomElement &element, const QString &path, const QString &attr, const QString &attrVal )
321 {
322  QList<QDomElement> list = domElements( element, path );
323  return domElements( list, attr, attrVal ).value( 0 );
324 }
325 
326 bool QgsGmlSchema::guessSchema( const QByteArray &data )
327 {
328  mLevel = 0;
329  mSkipLevel = std::numeric_limits<int>::max();
330  XML_Parser p = XML_ParserCreateNS( nullptr, NS_SEPARATOR );
331  XML_SetUserData( p, this );
332  XML_SetElementHandler( p, QgsGmlSchema::start, QgsGmlSchema::end );
333  XML_SetCharacterDataHandler( p, QgsGmlSchema::chars );
334  int atEnd = 1;
335  int res = XML_Parse( p, data.constData(), data.size(), atEnd );
336 
337  if ( res == 0 )
338  {
339  QString err = QString( XML_ErrorString( XML_GetErrorCode( p ) ) );
340  QgsDebugMsg( QString( "XML_Parse returned %1 error %2" ).arg( res ).arg( err ) );
341  mError = QgsError( err, QStringLiteral( "GML schema" ) );
342  mError.append( tr( "Cannot guess schema" ) );
343  }
344 
345  return res != 0;
346 }
347 
348 void QgsGmlSchema::startElement( const XML_Char *el, const XML_Char **attr )
349 {
350  Q_UNUSED( attr );
351  mLevel++;
352 
353  QString elementName = QString::fromUtf8( el );
354  QgsDebugMsgLevel( QString( "-> %1 %2 %3" ).arg( mLevel ).arg( elementName, mLevel >= mSkipLevel ? "skip" : "" ), 5 );
355 
356  if ( mLevel >= mSkipLevel )
357  {
358  //QgsDebugMsg( QString("skip level %1").arg( mLevel ) );
359  return;
360  }
361 
362  mParsePathStack.append( elementName );
363  QString path = mParsePathStack.join( QStringLiteral( "." ) );
364 
365  QStringList splitName = elementName.split( NS_SEPARATOR );
366  QString localName = splitName.last();
367  QString ns = splitName.size() > 1 ? splitName.first() : QLatin1String( "" );
368  //QgsDebugMsg( "ns = " + ns + " localName = " + localName );
369 
370  ParseMode parseMode = modeStackTop();
371  //QgsDebugMsg ( QString("localName = %1 parseMode = %2").arg(localName).arg(parseMode) );
372 
373  if ( ns == GML_NAMESPACE && localName == QLatin1String( "boundedBy" ) )
374  {
375  // gml:boundedBy in feature or feature collection -> skip
376  mSkipLevel = mLevel + 1;
377  }
378  else if ( localName.compare( QLatin1String( "featureMembers" ), Qt::CaseInsensitive ) == 0 )
379  {
380  mParseModeStack.push( QgsGmlSchema::FeatureMembers );
381  }
382  // GML does not specify that gml:FeatureAssociationType elements should end
383  // with 'Member' apart standard gml:featureMember, but it is quite usual to
384  // that the names ends with 'Member', e.g.: osgb:topographicMember, cityMember,...
385  // so this is really fail if the name does not contain 'Member'
386 
387  else if ( localName.endsWith( QLatin1String( "member" ), Qt::CaseInsensitive ) )
388  {
389  mParseModeStack.push( QgsGmlSchema::FeatureMember );
390  }
391  // UMN Mapserver simple GetFeatureInfo response layer element (ends with _layer)
392  else if ( elementName.endsWith( QLatin1String( "_layer" ) ) )
393  {
394  // do nothing, we catch _feature children
395  }
396  // UMN Mapserver simple GetFeatureInfo response feature element (ends with _feature)
397  // or featureMember children.
398  // QGIS mapserver 2.2 GetFeatureInfo is using <Feature id="###"> for feature member,
399  // without any feature class distinction.
400  else if ( elementName.endsWith( QLatin1String( "_feature" ) )
401  || parseMode == QgsGmlSchema::FeatureMember
402  || parseMode == QgsGmlSchema::FeatureMembers
403  || localName.compare( QLatin1String( "feature" ), Qt::CaseInsensitive ) == 0 )
404  {
405  QgsDebugMsg( "is feature path = " + path );
406  if ( mFeatureClassMap.count( localName ) == 0 )
407  {
408  mFeatureClassMap.insert( localName, QgsGmlFeatureClass( localName, path ) );
409  }
410  mCurrentFeatureName = localName;
411  mParseModeStack.push( QgsGmlSchema::Feature );
412  }
413  else if ( parseMode == QgsGmlSchema::Attribute && ns == GML_NAMESPACE && mGeometryTypes.indexOf( localName ) >= 0 )
414  {
415  // Geometry (Point,MultiPoint,...) in geometry attribute
416  QStringList &geometryAttributes = mFeatureClassMap[mCurrentFeatureName].geometryAttributes();
417  if ( geometryAttributes.count( mAttributeName ) == 0 )
418  {
419  geometryAttributes.append( mAttributeName );
420  }
421  mSkipLevel = mLevel + 1; // no need to parse children
422  }
423  else if ( parseMode == QgsGmlSchema::Feature )
424  {
425  // An element in feature should be ordinary or geometry attribute
426  //QgsDebugMsg( "is attribute");
427 
428  // Usually localName is attribute name, e.g.
429  // <gml:desc>My description</gml:desc>
430  // but QGIS server (2.2) is using:
431  // <Attribute value="My description" name="desc"/>
432  QString name = readAttribute( QStringLiteral( "name" ), attr );
433  //QgsDebugMsg ( "attribute name = " + name );
434  if ( localName.compare( QLatin1String( "attribute" ), Qt::CaseInsensitive ) == 0
435  && !name.isEmpty() )
436  {
437  QString value = readAttribute( QStringLiteral( "value" ), attr );
438  //QgsDebugMsg ( "attribute value = " + value );
439  addAttribute( name, value );
440  }
441  else
442  {
443  mAttributeName = localName;
444  mParseModeStack.push( QgsGmlSchema::Attribute );
445  mStringCash.clear();
446  }
447  }
448 }
449 
450 void QgsGmlSchema::endElement( const XML_Char *el )
451 {
452  QString elementName = QString::fromUtf8( el );
453  QgsDebugMsgLevel( QString( "<- %1 %2" ).arg( mLevel ).arg( elementName ), 5 );
454 
455  if ( mLevel >= mSkipLevel )
456  {
457  //QgsDebugMsg( QString("skip level %1").arg( mLevel ) );
458  mLevel--;
459  return;
460  }
461  else
462  {
463  // clear possible skip level
464  mSkipLevel = std::numeric_limits<int>::max();
465  }
466 
467  QStringList splitName = elementName.split( NS_SEPARATOR );
468  QString localName = splitName.last();
469  QString ns = splitName.size() > 1 ? splitName.first() : QLatin1String( "" );
470 
471  QgsGmlSchema::ParseMode parseMode = modeStackTop();
472 
473  if ( parseMode == QgsGmlSchema::FeatureMembers )
474  {
475  modeStackPop();
476  }
477  else if ( parseMode == QgsGmlSchema::Attribute && localName == mAttributeName )
478  {
479  // End of attribute
480  //QgsDebugMsg("end attribute");
481  modeStackPop(); // go up to feature
482 
483  if ( mFeatureClassMap[mCurrentFeatureName].geometryAttributes().count( mAttributeName ) == 0 )
484  {
485  addAttribute( mAttributeName, mStringCash );
486  }
487  }
488  else if ( ns == GML_NAMESPACE && localName == QLatin1String( "boundedBy" ) )
489  {
490  // was skipped
491  }
492  else if ( localName.endsWith( QLatin1String( "member" ), Qt::CaseInsensitive ) )
493  {
494  modeStackPop();
495  }
496  mParsePathStack.removeLast();
497  mLevel--;
498 }
499 
500 void QgsGmlSchema::characters( const XML_Char *chars, int len )
501 {
502  //QgsDebugMsg( QString("level %1 : %2").arg( mLevel ).arg( QString::fromUtf8( chars, len ) ) );
503  if ( mLevel >= mSkipLevel )
504  {
505  //QgsDebugMsg( QString("skip level %1").arg( mLevel ) );
506  return;
507  }
508 
509  //save chars in mStringCash attribute mode for value type analysis
510  if ( modeStackTop() == QgsGmlSchema::Attribute )
511  {
512  mStringCash.append( QString::fromUtf8( chars, len ) );
513  }
514 }
515 
516 void QgsGmlSchema::addAttribute( const QString &name, const QString &value )
517 {
518  // It is not geometry attribute -> analyze value
519  bool ok;
520  value.toInt( &ok );
521  QVariant::Type type = QVariant::String;
522  if ( ok )
523  {
524  type = QVariant::Int;
525  }
526  else
527  {
528  value.toDouble( &ok );
529  if ( ok )
530  {
531  type = QVariant::Double;
532  }
533  }
534  //QgsDebugMsg( "mStringCash = " + mStringCash + " type = " + QVariant::typeToName( type ) );
535  //QMap<QString, QgsField> & fields = mFeatureClassMap[mCurrentFeatureName].fields();
536  QList<QgsField> &fields = mFeatureClassMap[mCurrentFeatureName].fields();
537  int fieldIndex = mFeatureClassMap[mCurrentFeatureName].fieldIndex( name );
538  if ( fieldIndex == -1 )
539  {
540  QgsField field( name, type );
541  fields.append( field );
542  }
543  else
544  {
545  QgsField &field = fields[fieldIndex];
546  // check if type is sufficient
547  if ( ( field.type() == QVariant::Int && ( type == QVariant::String || type == QVariant::Double ) ) ||
548  ( field.type() == QVariant::Double && type == QVariant::String ) )
549  {
550  field.setType( type );
551  }
552  }
553 }
554 
555 QStringList QgsGmlSchema::typeNames() const
556 {
557  return mFeatureClassMap.keys();
558 }
559 
560 QList<QgsField> QgsGmlSchema::fields( const QString &typeName )
561 {
562  if ( mFeatureClassMap.count( typeName ) == 0 ) return QList<QgsField>();
563  return mFeatureClassMap[typeName].fields();
564 }
565 
566 QStringList QgsGmlSchema::geometryAttributes( const QString &typeName )
567 {
568  if ( mFeatureClassMap.count( typeName ) == 0 ) return QStringList();
569  return mFeatureClassMap[typeName].geometryAttributes();
570 }
bool guessSchema(const QByteArray &data)
Guess GML schema from data if XSD does not exist.
QString path() const
Definition: qgsgmlschema.h:49
#define QgsDebugMsg(str)
Definition: qgslogger.h:37
const QString GML_NAMESPACE
QList< QgsField > & fields()
Definition: qgsgmlschema.h:45
#define QgsDebugMsgLevel(str, level)
Definition: qgslogger.h:38
QList< QgsField > fields(const QString &typeName)
Get fields for type/class name parsed from GML or XSD.
void append(const QString &message, const QString &tag)
Append new error message.
Definition: qgserror.cpp:40
Encapsulate a field in an attribute table or data source.
Definition: qgsfield.h:46
double ANALYSIS_EXPORT max(double x, double y)
Returns the maximum of two doubles or the first argument if both are equal.
Definition: MathUtils.cc:437
Description of feature class in GML.
Definition: qgsgmlschema.h:39
void setType(QVariant::Type type)
Set variant type.
Definition: qgsfield.cpp:140
QgsError is container for error messages (report).
Definition: qgserror.h:82
const char NS_SEPARATOR
QStringList & geometryAttributes()
Definition: qgsgmlschema.h:51
QVariant::Type type() const
Gets variant type of the field as it will be retrieved from data source.
Definition: qgsfield.cpp:94
QStringList typeNames() const
Get list of dot separated paths to feature classes parsed from GML or XSD.
int fieldIndex(const QString &name)
bool parseXSD(const QByteArray &xml)
Get fields info from XSD.
QStringList geometryAttributes(const QString &typeName)
Get list of geometry attributes for type/class name.