From ba42248575b9b8efff6ec6bf72dbd3cb70dbb60e Mon Sep 17 00:00:00 2001 From: Gabriel Ravier Date: Thu, 30 Mar 2023 02:28:18 +0200 Subject: [PATCH] Fix larger than WCHAR_MAX differences in wcs{,n}cmp (#795) The C standard states: > Unless explicitly stated otherwise, the functions described in this > subclause order two wide characters the same way as two integers of > the underlying integer type designated by wchar_t. > > [...] > > The wcscmp function returns an integer greater than, equal to, or > less than zero, accordingly as the wide string pointed to by s1 is > greater than, equal to, or less than the wide string pointed to by > s2. > > [...] > > The wcsncmp function returns an integer greater than, equal to, or > less than zero, accordingly as the possibly null-terminated array > pointed to by s1 is greater than, equal to, or less than the > possibly null-terminated array pointed to by s2. - C Standard, 7.31.4.4. Wide string comparison functions Cosmopolitan fails to obey this in cases where the difference between two wide characters is larger than WCHAR_MAX. This means that, for example, the following program: #include #include #include int main() { wchar_t str1[] = { WCHAR_MIN, L'\0' }; wchar_t str2[] = { WCHAR_MAX, L'\0' }; printf("%d\n", wcscmp(str1, str2)); printf("%d\n", wcsncmp(str1, str2, 2)); } will print `1` twice, instead of the negative numbers mandated by the standard (as WCHAR_MIN is less than WCHAR_MAX) This patch fixes this, along with the associated Github issue, https://github.com/jart/cosmopolitan/issues/783 --- libc/str/wcscmp.c | 2 +- libc/str/wcsncmp.c | 2 +- test/libc/str/strcmp_test.c | 16 ++++++++-------- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/libc/str/wcscmp.c b/libc/str/wcscmp.c index 33abf540a..2ac448237 100644 --- a/libc/str/wcscmp.c +++ b/libc/str/wcscmp.c @@ -30,5 +30,5 @@ int wcscmp(const wchar_t *a, const wchar_t *b) { size_t i = 0; if (a == b) return 0; while (a[i] == b[i] && b[i]) ++i; - return (unsigned)a[i] - (unsigned)b[i]; + return (a[i] > b[i]) - (a[i] < b[i]); } diff --git a/libc/str/wcsncmp.c b/libc/str/wcsncmp.c index e12eda01e..fba450993 100644 --- a/libc/str/wcsncmp.c +++ b/libc/str/wcsncmp.c @@ -30,5 +30,5 @@ int wcsncmp(const wchar_t *a, const wchar_t *b, size_t n) { size_t i = 0; if (!n-- || a == b) return 0; while (i < n && a[i] == b[i] && b[i]) ++i; - return (unsigned)a[i] - (unsigned)b[i]; + return (a[i] > b[i]) - (a[i] < b[i]); } diff --git a/test/libc/str/strcmp_test.c b/test/libc/str/strcmp_test.c index f7ceb4ff1..ada05d0f8 100644 --- a/test/libc/str/strcmp_test.c +++ b/test/libc/str/strcmp_test.c @@ -460,11 +460,11 @@ TEST(wcscmp, testTwosComplementBane) { EXPECT_EQ(wcscmp(memcpy(B1, "\x00\x00\x00\x80", 4), memcpy(B2, "\x00\x00\x00\x80", 4)), 0); - EXPECT_EQ(-1, wcscmp(memcpy(B1, "\xff\xff\xff\x7f", 4), - memcpy(B2, "\x00\x00\x00\x80", 4))); - EXPECT_EQ(wcscmp(memcpy(B1, "\x00\x00\x00\x80", 4), + EXPECT_LT(0, wcscmp(memcpy(B1, "\xff\xff\xff\x7f", 4), + memcpy(B2, "\x00\x00\x00\x80", 4))); + EXPECT_LT(wcscmp(memcpy(B1, "\x00\x00\x00\x80", 4), memcpy(B2, "\xff\xff\xff\x7f", 4)), - 1); + 0); free(B2); free(B1); } @@ -475,12 +475,12 @@ TEST(wcsncmp, testTwosComplementBane) { EXPECT_EQ(wcsncmp(memcpy(B1, "\x00\x00\x00\x80", 4), memcpy(B2, "\x00\x00\x00\x80", 4), 1), 0); - EXPECT_EQ(wcsncmp(memcpy(B1, "\xff\xff\xff\x7f", 4), + EXPECT_GT(wcsncmp(memcpy(B1, "\xff\xff\xff\x7f", 4), memcpy(B2, "\x00\x00\x00\x80", 4), 1), - -1); - EXPECT_EQ(wcsncmp(memcpy(B1, "\x00\x00\x00\x80", 4), + 0); + EXPECT_LT(wcsncmp(memcpy(B1, "\x00\x00\x00\x80", 4), memcpy(B2, "\xff\xff\xff\x7f", 4), 1), - 1); + 0); free(B2); free(B1); }