QGIS API Documentation  3.21.0-Master (5b68dc587e)
qgsstringutils.cpp
Go to the documentation of this file.
1 /***************************************************************************
2  qgsstringutils.cpp
3  ------------------
4  begin : June 2015
5  copyright : (C) 2015 by Nyall Dawson
6  email : nyall dot dawson at gmail dot com
7  ***************************************************************************
8  * *
9  * This program is free software; you can redistribute it and/or modify *
10  * it under the terms of the GNU General Public License as published by *
11  * the Free Software Foundation; either version 2 of the License, or *
12  * (at your option) any later version. *
13  * *
14  ***************************************************************************/
15 
16 #include "qgsstringutils.h"
17 #include "qgslogger.h"
18 #include <QVector>
19 #include <QStringList>
20 #include <QTextBoundaryFinder>
21 #include <QRegularExpression>
22 #include <cstdlib> // for std::abs
23 
24 QString QgsStringUtils::capitalize( const QString &string, QgsStringUtils::Capitalization capitalization )
25 {
26  if ( string.isEmpty() )
27  return QString();
28 
29  switch ( capitalization )
30  {
31  case MixedCase:
32  return string;
33 
34  case AllUppercase:
35  return string.toUpper();
36 
37  case AllLowercase:
38  return string.toLower();
39 
41  {
42  QString temp = string;
43 
44  QTextBoundaryFinder wordSplitter( QTextBoundaryFinder::Word, string.constData(), string.length(), nullptr, 0 );
45  QTextBoundaryFinder letterSplitter( QTextBoundaryFinder::Grapheme, string.constData(), string.length(), nullptr, 0 );
46 
47  wordSplitter.setPosition( 0 );
48  bool first = true;
49  while ( ( first && wordSplitter.boundaryReasons() & QTextBoundaryFinder::StartOfItem )
50  || wordSplitter.toNextBoundary() >= 0 )
51  {
52  first = false;
53  letterSplitter.setPosition( wordSplitter.position() );
54  letterSplitter.toNextBoundary();
55  QString substr = string.mid( wordSplitter.position(), letterSplitter.position() - wordSplitter.position() );
56  temp.replace( wordSplitter.position(), substr.length(), substr.toUpper() );
57  }
58  return temp;
59  }
60 
61  case TitleCase:
62  {
63  // yes, this is MASSIVELY simplifying the problem!!
64 
65  static QStringList smallWords;
66  static QStringList newPhraseSeparators;
67  static QRegularExpression splitWords;
68  if ( smallWords.empty() )
69  {
70  smallWords = QObject::tr( "a|an|and|as|at|but|by|en|for|if|in|nor|of|on|or|per|s|the|to|vs.|vs|via" ).split( '|' );
71  newPhraseSeparators = QObject::tr( ".|:" ).split( '|' );
72  splitWords = QRegularExpression( QStringLiteral( "\\b" ), QRegularExpression::UseUnicodePropertiesOption );
73  }
74 
75  const bool allSameCase = string.toLower() == string || string.toUpper() == string;
76 #if QT_VERSION < QT_VERSION_CHECK(5, 15, 0)
77  const QStringList parts = ( allSameCase ? string.toLower() : string ).split( splitWords, QString::SkipEmptyParts );
78 #else
79  const QStringList parts = ( allSameCase ? string.toLower() : string ).split( splitWords, Qt::SkipEmptyParts );
80 #endif
81  QString result;
82  bool firstWord = true;
83  int i = 0;
84  int lastWord = parts.count() - 1;
85  for ( const QString &word : std::as_const( parts ) )
86  {
87  if ( newPhraseSeparators.contains( word.trimmed() ) )
88  {
89  firstWord = true;
90  result += word;
91  }
92  else if ( firstWord || ( i == lastWord ) || !smallWords.contains( word ) )
93  {
94  result += word.at( 0 ).toUpper() + word.mid( 1 );
95  firstWord = false;
96  }
97  else
98  {
99  result += word;
100  }
101  i++;
102  }
103  return result;
104  }
105 
106  case UpperCamelCase:
107  QString result = QgsStringUtils::capitalize( string.toLower(), QgsStringUtils::ForceFirstLetterToCapital ).simplified();
108  result.remove( ' ' );
109  return result;
110  }
111  // no warnings
112  return string;
113 }
114 
115 // original code from http://www.qtcentre.org/threads/52456-HTML-Unicode-ampersand-encoding
116 QString QgsStringUtils::ampersandEncode( const QString &string )
117 {
118  QString encoded;
119  for ( int i = 0; i < string.size(); ++i )
120  {
121  QChar ch = string.at( i );
122  if ( ch.unicode() > 160 )
123  encoded += QStringLiteral( "&#%1;" ).arg( static_cast< int >( ch.unicode() ) );
124  else if ( ch.unicode() == 38 )
125  encoded += QLatin1String( "&amp;" );
126  else if ( ch.unicode() == 60 )
127  encoded += QLatin1String( "&lt;" );
128  else if ( ch.unicode() == 62 )
129  encoded += QLatin1String( "&gt;" );
130  else
131  encoded += ch;
132  }
133  return encoded;
134 }
135 
136 int QgsStringUtils::levenshteinDistance( const QString &string1, const QString &string2, bool caseSensitive )
137 {
138  int length1 = string1.length();
139  int length2 = string2.length();
140 
141  //empty strings? solution is trivial...
142  if ( string1.isEmpty() )
143  {
144  return length2;
145  }
146  else if ( string2.isEmpty() )
147  {
148  return length1;
149  }
150 
151  //handle case sensitive flag (or not)
152  QString s1( caseSensitive ? string1 : string1.toLower() );
153  QString s2( caseSensitive ? string2 : string2.toLower() );
154 
155  const QChar *s1Char = s1.constData();
156  const QChar *s2Char = s2.constData();
157 
158  //strip out any common prefix
159  int commonPrefixLen = 0;
160  while ( length1 > 0 && length2 > 0 && *s1Char == *s2Char )
161  {
162  commonPrefixLen++;
163  length1--;
164  length2--;
165  s1Char++;
166  s2Char++;
167  }
168 
169  //strip out any common suffix
170  while ( length1 > 0 && length2 > 0 && s1.at( commonPrefixLen + length1 - 1 ) == s2.at( commonPrefixLen + length2 - 1 ) )
171  {
172  length1--;
173  length2--;
174  }
175 
176  //fully checked either string? if so, the answer is easy...
177  if ( length1 == 0 )
178  {
179  return length2;
180  }
181  else if ( length2 == 0 )
182  {
183  return length1;
184  }
185 
186  //ensure the inner loop is longer
187  if ( length1 > length2 )
188  {
189  std::swap( s1, s2 );
190  std::swap( length1, length2 );
191  }
192 
193  //levenshtein algorithm begins here
194  QVector< int > col;
195  col.fill( 0, length2 + 1 );
196  QVector< int > prevCol;
197  prevCol.reserve( length2 + 1 );
198  for ( int i = 0; i < length2 + 1; ++i )
199  {
200  prevCol << i;
201  }
202  const QChar *s2start = s2Char;
203  for ( int i = 0; i < length1; ++i )
204  {
205  col[0] = i + 1;
206  s2Char = s2start;
207  for ( int j = 0; j < length2; ++j )
208  {
209  col[j + 1] = std::min( std::min( 1 + col[j], 1 + prevCol[1 + j] ), prevCol[j] + ( ( *s1Char == *s2Char ) ? 0 : 1 ) );
210  s2Char++;
211  }
212  col.swap( prevCol );
213  s1Char++;
214  }
215  return prevCol[length2];
216 }
217 
218 QString QgsStringUtils::longestCommonSubstring( const QString &string1, const QString &string2, bool caseSensitive )
219 {
220  if ( string1.isEmpty() || string2.isEmpty() )
221  {
222  //empty strings, solution is trivial...
223  return QString();
224  }
225 
226  //handle case sensitive flag (or not)
227  QString s1( caseSensitive ? string1 : string1.toLower() );
228  QString s2( caseSensitive ? string2 : string2.toLower() );
229 
230  if ( s1 == s2 )
231  {
232  //another trivial case, identical strings
233  return s1;
234  }
235 
236  int *currentScores = new int [ s2.length()];
237  int *previousScores = new int [ s2.length()];
238  int maxCommonLength = 0;
239  int lastMaxBeginIndex = 0;
240 
241  const QChar *s1Char = s1.constData();
242  const QChar *s2Char = s2.constData();
243  const QChar *s2Start = s2Char;
244 
245  for ( int i = 0; i < s1.length(); ++i )
246  {
247  for ( int j = 0; j < s2.length(); ++j )
248  {
249  if ( *s1Char != *s2Char )
250  {
251  currentScores[j] = 0;
252  }
253  else
254  {
255  if ( i == 0 || j == 0 )
256  {
257  currentScores[j] = 1;
258  }
259  else
260  {
261  currentScores[j] = 1 + previousScores[j - 1];
262  }
263 
264  if ( maxCommonLength < currentScores[j] )
265  {
266  maxCommonLength = currentScores[j];
267  lastMaxBeginIndex = i;
268  }
269  }
270  s2Char++;
271  }
272  std::swap( currentScores, previousScores );
273  s1Char++;
274  s2Char = s2Start;
275  }
276  delete [] currentScores;
277  delete [] previousScores;
278  return string1.mid( lastMaxBeginIndex - maxCommonLength + 1, maxCommonLength );
279 }
280 
281 int QgsStringUtils::hammingDistance( const QString &string1, const QString &string2, bool caseSensitive )
282 {
283  if ( string1.isEmpty() && string2.isEmpty() )
284  {
285  //empty strings, solution is trivial...
286  return 0;
287  }
288 
289  if ( string1.length() != string2.length() )
290  {
291  //invalid inputs
292  return -1;
293  }
294 
295  //handle case sensitive flag (or not)
296  QString s1( caseSensitive ? string1 : string1.toLower() );
297  QString s2( caseSensitive ? string2 : string2.toLower() );
298 
299  if ( s1 == s2 )
300  {
301  //another trivial case, identical strings
302  return 0;
303  }
304 
305  int distance = 0;
306  const QChar *s1Char = s1.constData();
307  const QChar *s2Char = s2.constData();
308 
309  for ( int i = 0; i < string1.length(); ++i )
310  {
311  if ( *s1Char != *s2Char )
312  distance++;
313  s1Char++;
314  s2Char++;
315  }
316 
317  return distance;
318 }
319 
320 QString QgsStringUtils::soundex( const QString &string )
321 {
322  if ( string.isEmpty() )
323  return QString();
324 
325  QString tmp = string.toUpper();
326 
327  //strip non character codes, and vowel like characters after the first character
328  QChar *char1 = tmp.data();
329  QChar *char2 = tmp.data();
330  int outLen = 0;
331  for ( int i = 0; i < tmp.length(); ++i, ++char2 )
332  {
333  if ( ( *char2 ).unicode() >= 0x41 && ( *char2 ).unicode() <= 0x5A && ( i == 0 || ( ( *char2 ).unicode() != 0x41 && ( *char2 ).unicode() != 0x45
334  && ( *char2 ).unicode() != 0x48 && ( *char2 ).unicode() != 0x49
335  && ( *char2 ).unicode() != 0x4F && ( *char2 ).unicode() != 0x55
336  && ( *char2 ).unicode() != 0x57 && ( *char2 ).unicode() != 0x59 ) ) )
337  {
338  *char1 = *char2;
339  char1++;
340  outLen++;
341  }
342  }
343  tmp.truncate( outLen );
344 
345  QChar *tmpChar = tmp.data();
346  tmpChar++;
347  for ( int i = 1; i < tmp.length(); ++i, ++tmpChar )
348  {
349  switch ( ( *tmpChar ).unicode() )
350  {
351  case 0x42:
352  case 0x46:
353  case 0x50:
354  case 0x56:
355  tmp.replace( i, 1, QChar( 0x31 ) );
356  break;
357 
358  case 0x43:
359  case 0x47:
360  case 0x4A:
361  case 0x4B:
362  case 0x51:
363  case 0x53:
364  case 0x58:
365  case 0x5A:
366  tmp.replace( i, 1, QChar( 0x32 ) );
367  break;
368 
369  case 0x44:
370  case 0x54:
371  tmp.replace( i, 1, QChar( 0x33 ) );
372  break;
373 
374  case 0x4C:
375  tmp.replace( i, 1, QChar( 0x34 ) );
376  break;
377 
378  case 0x4D:
379  case 0x4E:
380  tmp.replace( i, 1, QChar( 0x35 ) );
381  break;
382 
383  case 0x52:
384  tmp.replace( i, 1, QChar( 0x36 ) );
385  break;
386  }
387  }
388 
389  //remove adjacent duplicates
390  char1 = tmp.data();
391  char2 = tmp.data();
392  char2++;
393  outLen = 1;
394  for ( int i = 1; i < tmp.length(); ++i, ++char2 )
395  {
396  if ( *char2 != *char1 )
397  {
398  char1++;
399  *char1 = *char2;
400  outLen++;
401  if ( outLen == 4 )
402  break;
403  }
404  }
405  tmp.truncate( outLen );
406  if ( tmp.length() < 4 )
407  {
408  tmp.append( "000" );
409  tmp.truncate( 4 );
410  }
411 
412  return tmp;
413 }
414 
415 
416 double QgsStringUtils::fuzzyScore( const QString &candidate, const QString &search )
417 {
418  QString candidateNormalized = candidate.simplified().normalized( QString:: NormalizationForm_C ).toLower();
419  QString searchNormalized = search.simplified().normalized( QString:: NormalizationForm_C ).toLower();
420 
421  int candidateLength = candidateNormalized.length();
422  int searchLength = searchNormalized.length();
423  int score = 0;
424 
425  // if the candidate and the search term are empty, no other option than 0 score
426  if ( candidateLength == 0 || searchLength == 0 )
427  return score;
428 
429  int candidateIdx = 0;
430  int searchIdx = 0;
431  // there is always at least one word
432  int maxScore = FUZZY_SCORE_WORD_MATCH;
433 
434  bool isPreviousIndexMatching = false;
435  bool isWordOpen = true;
436 
437  // loop trough each candidate char and calculate the potential max score
438  while ( candidateIdx < candidateLength )
439  {
440  QChar candidateChar = candidateNormalized[ candidateIdx++ ];
441  bool isCandidateCharWordEnd = candidateChar == ' ' || candidateChar.isPunct();
442 
443  // the first char is always the default score
444  if ( candidateIdx == 1 )
445  maxScore += FUZZY_SCORE_NEW_MATCH;
446  // every space character or underscore is a opportunity for a new word
447  else if ( isCandidateCharWordEnd )
448  maxScore += FUZZY_SCORE_WORD_MATCH;
449  // potentially we can match every other character
450  else
451  maxScore += FUZZY_SCORE_CONSECUTIVE_MATCH;
452 
453  // we looped through all the characters
454  if ( searchIdx >= searchLength )
455  continue;
456 
457  QChar searchChar = searchNormalized[ searchIdx ];
458  bool isSearchCharWordEnd = searchChar == ' ' || searchChar.isPunct();
459 
460  // match!
461  if ( candidateChar == searchChar || ( isCandidateCharWordEnd && isSearchCharWordEnd ) )
462  {
463  searchIdx++;
464 
465  // if we have just successfully finished a word, give higher score
466  if ( isSearchCharWordEnd )
467  {
468  if ( isWordOpen )
469  score += FUZZY_SCORE_WORD_MATCH;
470  else if ( isPreviousIndexMatching )
472  else
473  score += FUZZY_SCORE_NEW_MATCH;
474 
475  isWordOpen = true;
476  }
477  // if we have consecutive characters matching, give higher score
478  else if ( isPreviousIndexMatching )
479  {
481  }
482  // normal score for new independent character that matches
483  else
484  {
485  score += FUZZY_SCORE_NEW_MATCH;
486  }
487 
488  isPreviousIndexMatching = true;
489  }
490  // if the current character does NOT match, we are sure we cannot build a word for now
491  else
492  {
493  isPreviousIndexMatching = false;
494  isWordOpen = false;
495  }
496 
497  // if the search string is covered, check if the last match is end of word
498  if ( searchIdx >= searchLength )
499  {
500  bool isEndOfWord = ( candidateIdx >= candidateLength )
501  ? true
502  : candidateNormalized[candidateIdx] == ' ' || candidateNormalized[candidateIdx].isPunct();
503 
504  if ( isEndOfWord )
505  score += FUZZY_SCORE_WORD_MATCH;
506  }
507 
508  // QgsLogger::debug( QStringLiteral( "TMP: %1 | %2 | %3 | %4 | %5" ).arg( candidateChar, searchChar, QString::number(score), QString::number(isCandidateCharWordEnd), QString::number(isSearchCharWordEnd) ) + QStringLiteral( __FILE__ ) );
509  }
510 
511  // QgsLogger::debug( QStringLiteral( "RES: %1 | %2" ).arg( QString::number(maxScore), QString::number(score) ) + QStringLiteral( __FILE__ ) );
512  // we didn't loop through all the search chars, it means, that they are not present in the current candidate
513  if ( searchIdx < searchLength )
514  score = 0;
515 
516  return static_cast<float>( std::max( score, 0 ) ) / std::max( maxScore, 1 );
517 }
518 
519 
520 QString QgsStringUtils::insertLinks( const QString &string, bool *foundLinks )
521 {
522  QString converted = string;
523 
524  // http://alanstorm.com/url_regex_explained
525  // note - there's more robust implementations available
526  static thread_local QRegularExpression urlRegEx( "(\\b(([\\w-]+://?|www[.])[^\\s()<>]+(?:\\([\\w\\d]+\\)|([^!\"#$%&'()*+,\\-./:;<=>[email protected][\\\\\\]^_`{|}~\\s]|/))))" );
527  static thread_local QRegularExpression protoRegEx( "^(?:f|ht)tps?://|file://" );
528  static thread_local QRegularExpression emailRegEx( "([\\w._%+-][email protected][\\w.-]+\\.[A-Za-z]+)" );
529 
530  int offset = 0;
531  bool found = false;
532  QRegularExpressionMatch match = urlRegEx.match( converted );
533  while ( match.hasMatch() )
534  {
535  found = true;
536  QString url = match.captured( 1 );
537  QString protoUrl = url;
538  if ( !protoRegEx.match( protoUrl ).hasMatch() )
539  {
540  protoUrl.prepend( "http://" );
541  }
542  QString anchor = QStringLiteral( "<a href=\"%1\">%2</a>" ).arg( protoUrl.toHtmlEscaped(), url.toHtmlEscaped() );
543  converted.replace( match.capturedStart( 1 ), url.length(), anchor );
544  offset = match.capturedStart( 1 ) + anchor.length();
545  match = urlRegEx.match( converted, offset );
546  }
547 
548  offset = 0;
549  match = emailRegEx.match( converted );
550  while ( match.hasMatch() )
551  {
552  found = true;
553  QString email = match.captured( 1 );
554  QString anchor = QStringLiteral( "<a href=\"mailto:%1\">%1</a>" ).arg( email.toHtmlEscaped() );
555  converted.replace( match.capturedStart( 1 ), email.length(), anchor );
556  offset = match.capturedStart( 1 ) + anchor.length();
557  match = emailRegEx.match( converted, offset );
558  }
559 
560  if ( foundLinks )
561  *foundLinks = found;
562 
563  return converted;
564 }
565 
566 bool QgsStringUtils::isUrl( const QString &string )
567 {
568  const thread_local QRegularExpression rxUrl( "^(http|https|ftp|file)://\\S+$" );
569  return rxUrl.match( string ).hasMatch();
570 }
571 
572 QString QgsStringUtils::htmlToMarkdown( const QString &html )
573 {
574  // Any changes in this function must be copied to qgscrashreport.cpp too
575  QString converted = html;
576  converted.replace( QLatin1String( "<br>" ), QLatin1String( "\n" ) );
577  converted.replace( QLatin1String( "<b>" ), QLatin1String( "**" ) );
578  converted.replace( QLatin1String( "</b>" ), QLatin1String( "**" ) );
579 
580  static thread_local QRegularExpression hrefRegEx( "<a\\s+href\\s*=\\s*([^<>]*)\\s*>([^<>]*)</a>" );
581 
582  int offset = 0;
583  QRegularExpressionMatch match = hrefRegEx.match( converted );
584  while ( match.hasMatch() )
585  {
586  QString url = match.captured( 1 ).replace( QLatin1String( "\"" ), QString() );
587  url.replace( '\'', QString() );
588  QString name = match.captured( 2 );
589  QString anchor = QStringLiteral( "[%1](%2)" ).arg( name, url );
590  converted.replace( match.capturedStart(), match.capturedLength(), anchor );
591  offset = match.capturedStart() + anchor.length();
592  match = hrefRegEx.match( converted, offset );
593  }
594 
595  return converted;
596 }
597 
598 QString QgsStringUtils::wordWrap( const QString &string, const int length, const bool useMaxLineLength, const QString &customDelimiter )
599 {
600  if ( string.isEmpty() || length == 0 )
601  return string;
602 
603  QString newstr;
604  QRegularExpression rx;
605  int delimiterLength = 0;
606 
607  if ( !customDelimiter.isEmpty() )
608  {
609  rx.setPattern( QRegularExpression::escape( customDelimiter ) );
610  delimiterLength = customDelimiter.length();
611  }
612  else
613  {
614  // \x{200B} is a ZERO-WIDTH SPACE, needed for worwrap to support a number of complex scripts (Indic, Arabic, etc.)
615  rx.setPattern( QStringLiteral( "[\\x{200B}\\s]" ) );
616  delimiterLength = 1;
617  }
618 
619  const QStringList lines = string.split( '\n' );
620  int strLength, strCurrent, strHit, lastHit;
621 
622  for ( int i = 0; i < lines.size(); i++ )
623  {
624  strLength = lines.at( i ).length();
625  strCurrent = 0;
626  strHit = 0;
627  lastHit = 0;
628 
629  while ( strCurrent < strLength )
630  {
631  // positive wrap value = desired maximum line width to wrap
632  // negative wrap value = desired minimum line width before wrap
633  if ( useMaxLineLength )
634  {
635  //first try to locate delimiter backwards
636  strHit = lines.at( i ).lastIndexOf( rx, strCurrent + length );
637  if ( strHit == lastHit || strHit == -1 )
638  {
639  //if no new backward delimiter found, try to locate forward
640  strHit = lines.at( i ).indexOf( rx, strCurrent + std::abs( length ) );
641  }
642  lastHit = strHit;
643  }
644  else
645  {
646  strHit = lines.at( i ).indexOf( rx, strCurrent + std::abs( length ) );
647  }
648  if ( strHit > -1 )
649  {
650 #if QT_VERSION < QT_VERSION_CHECK(5, 15, 2)
651  newstr.append( lines.at( i ).midRef( strCurrent, strHit - strCurrent ) );
652 #else
653  newstr.append( QStringView {lines.at( i )} .mid( strCurrent, strHit - strCurrent ) );
654 #endif
655  newstr.append( '\n' );
656  strCurrent = strHit + delimiterLength;
657  }
658  else
659  {
660 #if QT_VERSION < QT_VERSION_CHECK(5, 15, 2)
661  newstr.append( lines.at( i ).midRef( strCurrent ) );
662 #else
663  newstr.append( QStringView {lines.at( i )}.mid( strCurrent ) );
664 #endif
665  strCurrent = strLength;
666  }
667  }
668  if ( i < lines.size() - 1 )
669  newstr.append( '\n' );
670  }
671 
672  return newstr;
673 }
674 
676 {
677  string = string.replace( ',', QChar( 65040 ) ).replace( QChar( 8229 ), QChar( 65072 ) ); // comma & two-dot leader
678  string = string.replace( QChar( 12289 ), QChar( 65041 ) ).replace( QChar( 12290 ), QChar( 65042 ) ); // ideographic comma & full stop
679  string = string.replace( ':', QChar( 65043 ) ).replace( ';', QChar( 65044 ) );
680  string = string.replace( '!', QChar( 65045 ) ).replace( '?', QChar( 65046 ) );
681  string = string.replace( QChar( 12310 ), QChar( 65047 ) ).replace( QChar( 12311 ), QChar( 65048 ) ); // white lenticular brackets
682  string = string.replace( QChar( 8230 ), QChar( 65049 ) ); // three-dot ellipse
683  string = string.replace( QChar( 8212 ), QChar( 65073 ) ).replace( QChar( 8211 ), QChar( 65074 ) ); // em & en dash
684  string = string.replace( '_', QChar( 65075 ) ).replace( QChar( 65103 ), QChar( 65076 ) ); // low line & wavy low line
685  string = string.replace( '(', QChar( 65077 ) ).replace( ')', QChar( 65078 ) );
686  string = string.replace( '{', QChar( 65079 ) ).replace( '}', QChar( 65080 ) );
687  string = string.replace( '<', QChar( 65087 ) ).replace( '>', QChar( 65088 ) );
688  string = string.replace( '[', QChar( 65095 ) ).replace( ']', QChar( 65096 ) );
689  string = string.replace( QChar( 12308 ), QChar( 65081 ) ).replace( QChar( 12309 ), QChar( 65082 ) ); // tortoise shell brackets
690  string = string.replace( QChar( 12304 ), QChar( 65083 ) ).replace( QChar( 12305 ), QChar( 65084 ) ); // black lenticular brackets
691  string = string.replace( QChar( 12298 ), QChar( 65085 ) ).replace( QChar( 12299 ), QChar( 65086 ) ); // double angle brackets
692  string = string.replace( QChar( 12300 ), QChar( 65089 ) ).replace( QChar( 12301 ), QChar( 65090 ) ); // corner brackets
693  string = string.replace( QChar( 12302 ), QChar( 65091 ) ).replace( QChar( 12303 ), QChar( 65092 ) ); // white corner brackets
694  return string;
695 }
696 
697 QString QgsStringUtils::qRegExpEscape( const QString &string )
698 {
699  // code and logic taken from the Qt source code
700  const QLatin1Char backslash( '\\' );
701  const int count = string.count();
702 
703  QString escaped;
704  escaped.reserve( count * 2 );
705  for ( int i = 0; i < count; i++ )
706  {
707  switch ( string.at( i ).toLatin1() )
708  {
709  case '$':
710  case '(':
711  case ')':
712  case '*':
713  case '+':
714  case '.':
715  case '?':
716  case '[':
717  case '\\':
718  case ']':
719  case '^':
720  case '{':
721  case '|':
722  case '}':
723  escaped.append( backslash );
724  }
725  escaped.append( string.at( i ) );
726  }
727  return escaped;
728 }
729 
730 QString QgsStringUtils::truncateMiddleOfString( const QString &string, int maxLength )
731 {
732  const int charactersToTruncate = string.length() - maxLength;
733  if ( charactersToTruncate <= 0 )
734  return string;
735 
736  // note we actually truncate an extra character, as we'll be replacing it with the ... character
737  const int truncateFrom = string.length() / 2 - ( charactersToTruncate + 1 ) / 2;
738 
739 #if QT_VERSION < QT_VERSION_CHECK(6, 0, 0)
740  return string.leftRef( truncateFrom ) + QString( QChar( 0x2026 ) ) + string.midRef( truncateFrom + charactersToTruncate + 1 );
741 #else
742  return QStringView( string ).first( truncateFrom ) + QString( QChar( 0x2026 ) ) + QStringView( string ).sliced( truncateFrom + charactersToTruncate + 1 );
743 #endif
744 }
745 
746 QgsStringReplacement::QgsStringReplacement( const QString &match, const QString &replacement, bool caseSensitive, bool wholeWordOnly )
747  : mMatch( match )
748  , mReplacement( replacement )
749  , mCaseSensitive( caseSensitive )
750  , mWholeWordOnly( wholeWordOnly )
751 {
752  if ( mWholeWordOnly )
753  {
754  mRx.setPattern( QString( "\\b%1\\b" ).arg( mMatch ) );
755  mRx.setPatternOptions( mCaseSensitive ? QRegularExpression::NoPatternOption : QRegularExpression::CaseInsensitiveOption );
756  }
757 }
758 
759 QString QgsStringReplacement::process( const QString &input ) const
760 {
761  QString result = input;
762  if ( !mWholeWordOnly )
763  {
764  return result.replace( mMatch, mReplacement, mCaseSensitive ? Qt::CaseSensitive : Qt::CaseInsensitive );
765  }
766  else
767  {
768  return result.replace( mRx, mReplacement );
769  }
770 }
771 
773 {
774  QgsStringMap map;
775  map.insert( QStringLiteral( "match" ), mMatch );
776  map.insert( QStringLiteral( "replace" ), mReplacement );
777  map.insert( QStringLiteral( "caseSensitive" ), mCaseSensitive ? "1" : "0" );
778  map.insert( QStringLiteral( "wholeWord" ), mWholeWordOnly ? "1" : "0" );
779  return map;
780 }
781 
783 {
784  return QgsStringReplacement( properties.value( QStringLiteral( "match" ) ),
785  properties.value( QStringLiteral( "replace" ) ),
786  properties.value( QStringLiteral( "caseSensitive" ), QStringLiteral( "0" ) ) == QLatin1String( "1" ),
787  properties.value( QStringLiteral( "wholeWord" ), QStringLiteral( "0" ) ) == QLatin1String( "1" ) );
788 }
789 
790 QString QgsStringReplacementCollection::process( const QString &input ) const
791 {
792  QString result = input;
793  const auto constMReplacements = mReplacements;
794  for ( const QgsStringReplacement &r : constMReplacements )
795  {
796  result = r.process( result );
797  }
798  return result;
799 }
800 
801 void QgsStringReplacementCollection::writeXml( QDomElement &elem, QDomDocument &doc ) const
802 {
803  const auto constMReplacements = mReplacements;
804  for ( const QgsStringReplacement &r : constMReplacements )
805  {
806  QgsStringMap props = r.properties();
807  QDomElement propEl = doc.createElement( QStringLiteral( "replacement" ) );
808  QgsStringMap::const_iterator it = props.constBegin();
809  for ( ; it != props.constEnd(); ++it )
810  {
811  propEl.setAttribute( it.key(), it.value() );
812  }
813  elem.appendChild( propEl );
814  }
815 }
816 
817 void QgsStringReplacementCollection::readXml( const QDomElement &elem )
818 {
819  mReplacements.clear();
820  QDomNodeList nodelist = elem.elementsByTagName( QStringLiteral( "replacement" ) );
821  for ( int i = 0; i < nodelist.count(); i++ )
822  {
823  QDomElement replacementElem = nodelist.at( i ).toElement();
824  QDomNamedNodeMap nodeMap = replacementElem.attributes();
825 
826  QgsStringMap props;
827  for ( int j = 0; j < nodeMap.count(); ++j )
828  {
829  props.insert( nodeMap.item( j ).nodeName(), nodeMap.item( j ).nodeValue() );
830  }
831  mReplacements << QgsStringReplacement::fromProperties( props );
832  }
833 
834 }
void readXml(const QDomElement &elem)
Reads the collection state from an XML element.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made using QgsStringR...
void writeXml(QDomElement &elem, QDomDocument &doc) const
Writes the collection state to an XML element.
A representation of a single string replacement.
static QgsStringReplacement fromProperties(const QgsStringMap &properties)
Creates a new QgsStringReplacement from an encoded properties map.
QString process(const QString &input) const
Processes a given input string, applying any valid replacements which should be made.
QgsStringReplacement(const QString &match, const QString &replacement, bool caseSensitive=false, bool wholeWordOnly=false)
Constructor for QgsStringReplacement.
QgsStringMap properties() const
Returns a map of the replacement properties.
static int hammingDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Hamming distance between two strings.
static QString soundex(const QString &string)
Returns the Soundex representation of a string.
static int levenshteinDistance(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the Levenshtein edit distance between two strings.
static QString htmlToMarkdown(const QString &html)
Convert simple HTML to markdown.
static QString longestCommonSubstring(const QString &string1, const QString &string2, bool caseSensitive=false)
Returns the longest common substring between two strings.
static QString substituteVerticalCharacters(QString string)
Returns a string with characters having vertical representation form substituted.
static QString capitalize(const QString &string, Capitalization capitalization)
Converts a string by applying capitalization rules to the string.
static QString insertLinks(const QString &string, bool *foundLinks=nullptr)
Returns a string with any URL (e.g., http(s)/ftp) and mailto: text converted to valid HTML <a ....
static double fuzzyScore(const QString &candidate, const QString &search)
Tests a candidate string to see how likely it is a match for a specified search string.
Capitalization
Capitalization options.
@ UpperCamelCase
Convert the string to upper camel case. Note that this method does not unaccent characters.
@ MixedCase
Mixed case, ie no change.
@ AllLowercase
Convert all characters to lowercase.
@ TitleCase
Simple title case conversion - does not fully grammatically parse the text and uses simple rules only...
@ AllUppercase
Convert all characters to uppercase.
@ ForceFirstLetterToCapital
Convert just the first letter of each word to uppercase, leave the rest untouched.
static QString qRegExpEscape(const QString &string)
Returns an escaped string matching the behavior of QRegExp::escape.
static QString ampersandEncode(const QString &string)
Makes a raw string safe for inclusion as a HTML/XML string literal.
static QString wordWrap(const QString &string, int length, bool useMaxLineLength=true, const QString &customDelimiter=QString())
Automatically wraps a string by inserting new line characters at appropriate locations in the string.
static bool isUrl(const QString &string)
Returns whether the string is a URL (http,https,ftp,file)
static QString truncateMiddleOfString(const QString &string, int maxLength)
Truncates a string to the specified maximum character length.
QMap< QString, QString > QgsStringMap
Definition: qgis.h:1691
#define FUZZY_SCORE_CONSECUTIVE_MATCH
#define FUZZY_SCORE_WORD_MATCH
#define FUZZY_SCORE_NEW_MATCH