String simplification (accent removal) using Unicode decomposition

This commit is contained in:
Mat Sutcliffe
2019-07-26 21:10:11 +01:00
parent cb8eb6569e
commit dd86be3335
3 changed files with 32 additions and 0 deletions

View File

@@ -36,6 +36,7 @@ namespace BlackMiscTest
void testSplit();
void testTimestampParsing();
void testCodecs();
void testSimplify();
};
void CTestStringUtils::testRemove()
@@ -157,6 +158,14 @@ namespace BlackMiscTest
QVERIFY2(okRu1, "Russian \"test\" equal after round-trip with cp1251");
QVERIFY2(okRu2, "Russian \"test\" equal after round-trip with utf8");
}
void CTestStringUtils::testSimplify()
{
const QString input = QString::fromUtf8(u8"ŠŽšžŸÀÁÂÃÄÅÇÈÉÊËÌÍÎÏÑÒÓÔÕÖÙÚÛÜÝàáâãäåçèéêëìíîïñòóôõöùúûüýÿ");
const QString output = QLatin1String("SZszYAAAAAACEEEEIIIINOOOOOUUUUYaaaaaaceeeeiiiinooooouuuuyy");
QCOMPARE(simplifyAccents(input), output);
QCOMPARE(simplifyByDecomposition(input), output);
}
}
//! main