From 888cd41ade0f624b59efbd7d89e3197aafcbc95a Mon Sep 17 00:00:00 2001 From: Douglas Bagnall Date: Wed, 17 Apr 2024 11:42:30 +1200 Subject: [PATCH 1/3] ldb:pytests: test for Turkic i-dots in ldb_comparison_fold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In tr_TR and some other locales where the letter 'i' uppercases to 'İ', which is not ideal for LDB as we need certain strings like 'guid' to casefold in the ASCII way. In fixing https://bugzilla.samba.org/show_bug.cgi?id=15248) we solved this problem in many cases, but for unindexed searches where the 'i' is not the last character in the string. This test shows that. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15637 Signed-off-by: Douglas Bagnall Reviewed-by: Andrew Bartlett (backported from commit 078ecf486a62dc3aaa2842ada96456ac9870dad7) [dbagnall@samba.org: master had wrong test name in knownfail] --- lib/ldb/tests/python/api.py | 16 ++++++++++++++++ selftest/knownfail.d/ldb.python.api.tr | 2 ++ 2 files changed, 18 insertions(+) create mode 100644 selftest/knownfail.d/ldb.python.api.tr diff --git a/lib/ldb/tests/python/api.py b/lib/ldb/tests/python/api.py index 043b7213a49..79f15a92867 100755 --- a/lib/ldb/tests/python/api.py +++ b/lib/ldb/tests/python/api.py @@ -1266,6 +1266,22 @@ class SearchTests(LdbBaseTest): expression="(ou=unique)") self.assertEqual(len(res11), 1) + def test_subtree_uni123_elsewhere(self): + """Testing a search, where the search term contains a (normal ASCII) + dotted-i, that will be upper-cased to 'İ', U+0130, LATIN + CAPITAL LETTER I WITH DOT ABOVE in certain locales including + tr_TR in which this test is sometimes run. + + The search term should fail because the ou does not exist, but + we used to get it wrong in unindexed searches which stopped + comparing at the i, ignoring the rest of the string, which is + not the same as the existing ou ('123' != 'que'). + """ + res11 = self.l.search(base="DC=EXAMPLE,DC=NET", + scope=ldb.SCOPE_SUBTREE, + expression="(ou=uni123)") + self.assertEqual(len(res11), 0) + def test_subtree_unique_elsewhere3(self): """Testing a search""" diff --git a/selftest/knownfail.d/ldb.python.api.tr b/selftest/knownfail.d/ldb.python.api.tr new file mode 100644 index 00000000000..f327762e70b --- /dev/null +++ b/selftest/knownfail.d/ldb.python.api.tr @@ -0,0 +1,2 @@ +^ldb.python.api.tr.api.SearchTests.test_subtree_uni123_elsewhere.none +^ldb.python.api.tr.api.SearchTestsLmdb.test_subtree_uni123_elsewhere.none -- 2.34.1 From 4b962cc4ac8fb27506bf1cba3c50e7f717d520e9 Mon Sep 17 00:00:00 2001 From: Douglas Bagnall Date: Thu, 11 Apr 2024 13:46:28 +1200 Subject: [PATCH 2/3] ldb:attrib_handlers: use ldb_ascii_toupper() in first loop In a dotless-I locale, we might meet an 'i' before we meet a byte with the high bit set, in which case we still want the ldb casefold comparison. Many ldb operations will do some case-folding before getting here, so hitting this might be quite rare even in those locales. BUG: https://bugzilla.samba.org/show_bug.cgi?id=15637 Signed-off-by: Douglas Bagnall Reviewed-by: Andrew Bartlett (backported from commit dca6b2d25529288eaf7b31baf37ca4f6de4f4b9d) [dbagnall@samba.org: master had wrong test name in knownfail] --- lib/ldb/common/attrib_handlers.c | 3 ++- selftest/knownfail.d/ldb.python.api.tr | 2 -- 2 files changed, 2 insertions(+), 3 deletions(-) delete mode 100644 selftest/knownfail.d/ldb.python.api.tr diff --git a/lib/ldb/common/attrib_handlers.c b/lib/ldb/common/attrib_handlers.c index 15470cfcc74..35b788f8654 100644 --- a/lib/ldb/common/attrib_handlers.c +++ b/lib/ldb/common/attrib_handlers.c @@ -330,8 +330,9 @@ int ldb_comparison_fold(struct ldb_context *ldb, void *mem_ctx, * never appear in multibyte sequences */ if (((unsigned char)s1[0]) & 0x80) goto utf8str; if (((unsigned char)s2[0]) & 0x80) goto utf8str; - if (toupper((unsigned char)*s1) != toupper((unsigned char)*s2)) + if (ldb_ascii_toupper(*s1) != ldb_ascii_toupper(*s2)) { break; + } if (*s1 == ' ') { while (n1 > 1 && s1[0] == s1[1]) { s1++; n1--; } while (n2 > 1 && s2[0] == s2[1]) { s2++; n2--; } diff --git a/selftest/knownfail.d/ldb.python.api.tr b/selftest/knownfail.d/ldb.python.api.tr deleted file mode 100644 index f327762e70b..00000000000 --- a/selftest/knownfail.d/ldb.python.api.tr +++ /dev/null @@ -1,2 +0,0 @@ -^ldb.python.api.tr.api.SearchTests.test_subtree_uni123_elsewhere.none -^ldb.python.api.tr.api.SearchTestsLmdb.test_subtree_uni123_elsewhere.none -- 2.34.1 From 8159c7a608bec093f95e38adc407782d656291cb Mon Sep 17 00:00:00 2001 From: Douglas Bagnall Date: Tue, 16 Apr 2024 23:31:45 +1200 Subject: [PATCH 3/3] ldb:tests: add a test for dotted i uppercase This didn't fail in the tr_TR locale before recent changes for https://bugzilla.samba.org/show_bug.cgi?id=15637, because this is a different casefold codepath. But it could fail if that other path goes wrong, so we might as well have the test. Signed-off-by: Douglas Bagnall Reviewed-by: Andrew Bartlett (cherry picked from commit 8f080c0295d07b526740882469e1577a44c79060) --- lib/ldb/tests/python/api.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/lib/ldb/tests/python/api.py b/lib/ldb/tests/python/api.py index 79f15a92867..69107ffa137 100755 --- a/lib/ldb/tests/python/api.py +++ b/lib/ldb/tests/python/api.py @@ -2777,6 +2777,10 @@ class DnTests(TestCase): x = ldb.Dn(self.ldb, "dc=foo14,bar=bloe") self.assertEqual(x.get_casefold(), "DC=FOO14,BAR=bloe") + def test_get_casefold_dotted_i(self): + x = ldb.Dn(self.ldb, "dc=foo14,bir=blie") + self.assertEqual(x.get_casefold(), "DC=FOO14,BIR=blie") + def test_validate(self): x = ldb.Dn(self.ldb, "dc=foo15,bar=bloe") self.assertTrue(x.validate()) -- 2.34.1