mirror of
https://github.com/swift-project/pilotclient.git
synced 2026-04-14 08:45:36 +08:00
String simplification (accent removal) using Unicode decomposition
This commit is contained in:
@@ -243,6 +243,26 @@ namespace BlackMisc
|
|||||||
return output;
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
QString simplifyByDecomposition(const QString &s)
|
||||||
|
{
|
||||||
|
QString result;
|
||||||
|
for (const QChar c : s)
|
||||||
|
{
|
||||||
|
if (c.decompositionTag() == QChar::NoDecomposition)
|
||||||
|
{
|
||||||
|
result.push_back(c);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
for (const QChar dc : c.decomposition())
|
||||||
|
{
|
||||||
|
if (!dc.isMark()) { result.push_back(dc); }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
bool caseInsensitiveStringCompare(const QString &c1, const QString &c2)
|
bool caseInsensitiveStringCompare(const QString &c1, const QString &c2)
|
||||||
{
|
{
|
||||||
return c1.length() == c2.length() && c1.startsWith(c2, Qt::CaseInsensitive);
|
return c1.length() == c2.length() && c1.startsWith(c2, Qt::CaseInsensitive);
|
||||||
|
|||||||
@@ -256,6 +256,9 @@ namespace BlackMisc
|
|||||||
//! Remove accents / diacritic marks from a string
|
//! Remove accents / diacritic marks from a string
|
||||||
BLACKMISC_EXPORT QString simplifyAccents(const QString &candidate);
|
BLACKMISC_EXPORT QString simplifyAccents(const QString &candidate);
|
||||||
|
|
||||||
|
//! Remove accents / diacritic marks from a string by doing a Unicode decomposition and removing mark characters
|
||||||
|
BLACKMISC_EXPORT QString simplifyByDecomposition(const QString &candidate);
|
||||||
|
|
||||||
//! Case insensitive string compare
|
//! Case insensitive string compare
|
||||||
BLACKMISC_EXPORT bool caseInsensitiveStringCompare(const QString &c1, const QString &c2);
|
BLACKMISC_EXPORT bool caseInsensitiveStringCompare(const QString &c1, const QString &c2);
|
||||||
|
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ namespace BlackMiscTest
|
|||||||
void testSplit();
|
void testSplit();
|
||||||
void testTimestampParsing();
|
void testTimestampParsing();
|
||||||
void testCodecs();
|
void testCodecs();
|
||||||
|
void testSimplify();
|
||||||
};
|
};
|
||||||
|
|
||||||
void CTestStringUtils::testRemove()
|
void CTestStringUtils::testRemove()
|
||||||
@@ -157,6 +158,14 @@ namespace BlackMiscTest
|
|||||||
QVERIFY2(okRu1, "Russian \"test\" equal after round-trip with cp1251");
|
QVERIFY2(okRu1, "Russian \"test\" equal after round-trip with cp1251");
|
||||||
QVERIFY2(okRu2, "Russian \"test\" equal after round-trip with utf8");
|
QVERIFY2(okRu2, "Russian \"test\" equal after round-trip with utf8");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CTestStringUtils::testSimplify()
|
||||||
|
{
|
||||||
|
const QString input = QString::fromUtf8(u8"ŠŽšžŸÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöùúûüýÿ");
|
||||||
|
const QString output = QLatin1String("SZszYAAAAAACEEEEIIIINOOOOOUUUUYaaaaaaceeeeiiiinooooouuuuyy");
|
||||||
|
QCOMPARE(simplifyAccents(input), output);
|
||||||
|
QCOMPARE(simplifyByDecomposition(input), output);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//! main
|
//! main
|
||||||
|
|||||||
Reference in New Issue
Block a user