mirror of
https://github.com/swift-project/pilotclient.git
synced 2026-03-22 14:55:36 +08:00
String simplification (accent removal) using Unicode decomposition
This commit is contained in:
@@ -243,6 +243,26 @@ namespace BlackMisc
|
||||
return output;
|
||||
}
|
||||
|
||||
QString simplifyByDecomposition(const QString &s)
|
||||
{
|
||||
QString result;
|
||||
for (const QChar c : s)
|
||||
{
|
||||
if (c.decompositionTag() == QChar::NoDecomposition)
|
||||
{
|
||||
result.push_back(c);
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const QChar dc : c.decomposition())
|
||||
{
|
||||
if (!dc.isMark()) { result.push_back(dc); }
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool caseInsensitiveStringCompare(const QString &c1, const QString &c2)
|
||||
{
|
||||
return c1.length() == c2.length() && c1.startsWith(c2, Qt::CaseInsensitive);
|
||||
|
||||
@@ -256,6 +256,9 @@ namespace BlackMisc
|
||||
//! Remove accents / diacritic marks from a string
|
||||
BLACKMISC_EXPORT QString simplifyAccents(const QString &candidate);
|
||||
|
||||
//! Remove accents / diacritic marks from a string by doing a Unicode decomposition and removing mark characters
|
||||
BLACKMISC_EXPORT QString simplifyByDecomposition(const QString &candidate);
|
||||
|
||||
//! Case insensitive string compare
|
||||
BLACKMISC_EXPORT bool caseInsensitiveStringCompare(const QString &c1, const QString &c2);
|
||||
|
||||
|
||||
@@ -36,6 +36,7 @@ namespace BlackMiscTest
|
||||
void testSplit();
|
||||
void testTimestampParsing();
|
||||
void testCodecs();
|
||||
void testSimplify();
|
||||
};
|
||||
|
||||
void CTestStringUtils::testRemove()
|
||||
@@ -157,6 +158,14 @@ namespace BlackMiscTest
|
||||
QVERIFY2(okRu1, "Russian \"test\" equal after round-trip with cp1251");
|
||||
QVERIFY2(okRu2, "Russian \"test\" equal after round-trip with utf8");
|
||||
}
|
||||
|
||||
void CTestStringUtils::testSimplify()
|
||||
{
|
||||
const QString input = QString::fromUtf8(u8"ŠŽšžŸÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöùúûüýÿ");
|
||||
const QString output = QLatin1String("SZszYAAAAAACEEEEIIIINOOOOOUUUUYaaaaaaceeeeiiiinooooouuuuyy");
|
||||
QCOMPARE(simplifyAccents(input), output);
|
||||
QCOMPARE(simplifyByDecomposition(input), output);
|
||||
}
|
||||
}
|
||||
|
||||
//! main
|
||||
|
||||
Reference in New Issue
Block a user