Update Musl Libc code

We now have implement all of Musl's localization code, the same way that
Musl implements localization. You may need setlocale(LC_ALL, "C.UTF-8"),
just in case anything stops working as expected.
This commit is contained in:
Justine Tunney 2024-07-30 09:14:57 -07:00
parent d0360bf4bd
commit bb815eafaf
No known key found for this signature in database
GPG key ID: BE714B4575D6E328
116 changed files with 6525 additions and 5523 deletions

View file

@ -36,7 +36,6 @@ TEST_LIBC_STR_DIRECTDEPS = \
LIBC_FMT \
LIBC_INTRIN \
LIBC_LOG \
LIBC_TINYMATH \
LIBC_MEM \
LIBC_NEXGEN32E \
LIBC_RUNTIME \
@ -45,14 +44,16 @@ TEST_LIBC_STR_DIRECTDEPS = \
LIBC_SYSV \
LIBC_SYSV_CALLS \
LIBC_TESTLIB \
LIBC_TINYMATH \
LIBC_X \
THIRD_PARTY_COMPILER_RT \
THIRD_PARTY_MBEDTLS \
THIRD_PARTY_REGEX \
THIRD_PARTY_ZLIB \
THIRD_PARTY_LIBCXX \
THIRD_PARTY_MBEDTLS \
THIRD_PARTY_MUSL \
THIRD_PARTY_REGEX \
THIRD_PARTY_SMALLZ4 \
THIRD_PARTY_VQSORT
THIRD_PARTY_VQSORT \
THIRD_PARTY_ZLIB \
TEST_LIBC_STR_DEPS := \
$(call uniq,$(foreach x,$(TEST_LIBC_STR_DIRECTDEPS),$($(x))))

View file

@ -19,10 +19,15 @@
#include "third_party/regex/regex.h"
#include "libc/mem/gc.h"
#include "libc/mem/mem.h"
#include "libc/str/locale.h"
#include "libc/str/str.h"
#include "libc/testlib/ezbench.h"
#include "libc/testlib/testlib.h"
void SetUpOnce(void) {
setlocale(LC_ALL, "C.UTF-8");
}
TEST(regex, test) {
regex_t rx;
EXPECT_EQ(REG_OK, regcomp(&rx, "^[A-Za-z\x7f-\uffff]{2}$", REG_EXTENDED));

View file

@ -1,30 +0,0 @@
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi
Copyright 2022 Gavin Arthur Hayes
Permission to use, copy, modify, and/or distribute this software for
any purpose with or without fee is hereby granted, provided that the
above copyright notice and this permission notice appear in all copies.
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THIS SOFTWARE.
*/
#include "libc/str/locale.h"
#include "libc/testlib/testlib.h"
TEST(setlocale, test) {
EXPECT_STREQ("C", setlocale(LC_ALL, NULL));
EXPECT_STREQ("C", setlocale(LC_ALL, "C"));
EXPECT_STREQ("C", setlocale(LC_ALL, NULL));
EXPECT_STREQ("POSIX", setlocale(LC_ALL, "POSIX"));
EXPECT_STREQ("C", setlocale(LC_ALL, ""));
EXPECT_EQ(0, setlocale(LC_ALL, "ja_JP.PCK"));
EXPECT_STREQ("C", setlocale(LC_ALL, NULL));
}

View file

@ -28,7 +28,8 @@ TEST_LIBC_TIME_DIRECTDEPS = \
LIBC_SYSV \
LIBC_TESTLIB \
LIBC_X \
THIRD_PARTY_TZ
THIRD_PARTY_MUSL \
THIRD_PARTY_TZ \
TEST_LIBC_TIME_DEPS := \
$(call uniq,$(foreach x,$(TEST_LIBC_TIME_DIRECTDEPS),$($(x))))

View file

@ -8,15 +8,16 @@ TEST_LIBC_TINYMATH_SRCS_CC := $(wildcard test/libc/tinymath/*.cc)
TEST_LIBC_TINYMATH_SRCS_TEST = $(filter %_test.c,$(TEST_LIBC_TINYMATH_SRCS))
TEST_LIBC_TINYMATH_SRCS = \
$(TEST_LIBC_TINYMATH_SRCS_C:%.c=o/$(MODE)/%.o) \
$(TEST_LIBC_TINYMATH_SRCS_CC:%.cc=o/$(MODE)/%.o)
$(TEST_LIBC_TINYMATH_SRCS_C) \
$(TEST_LIBC_TINYMATH_SRCS_CC)
TEST_LIBC_TINYMATH_OBJS = \
$(TEST_LIBC_TINYMATH_SRCS_C:%.c=o/$(MODE)/%.o) \
$(TEST_LIBC_TINYMATH_SRCS_CC:%.cc=o/$(MODE)/%.o)
TEST_LIBC_TINYMATH_COMS = \
$(TEST_LIBC_TINYMATH_SRCS:%.c=o/$(MODE)/%)
$(TEST_LIBC_TINYMATH_SRCS_C:%.c=o/$(MODE)/%) \
$(TEST_LIBC_TINYMATH_SRCS_CC:%.cc=o/$(MODE)/%)
TEST_LIBC_TINYMATH_BINS = \
$(TEST_LIBC_TINYMATH_COMS) \
@ -68,10 +69,6 @@ $(TEST_LIBC_TINYMATH_OBJS): private \
CFLAGS += \
-fno-builtin
$(TEST_LIBC_TINYMATH_OBJS): private \
CXXFLAGS += \
#-ffast-math
.PHONY: o/$(MODE)/test/libc/tinymath
o/$(MODE)/test/libc/tinymath: \
$(TEST_LIBC_TINYMATH_BINS) \

View file

@ -35,7 +35,8 @@ TEST_POSIX_DIRECTDEPS = \
LIBC_STDIO \
LIBC_STR \
LIBC_SYSV \
LIBC_THREAD
LIBC_THREAD \
THIRD_PARTY_MUSL \
TEST_POSIX_DEPS := \
$(call uniq,$(foreach x,$(TEST_POSIX_DIRECTDEPS),$($(x))))

View file

@ -0,0 +1,173 @@
#include <errno.h>
#include <iconv.h>
#include <stdlib.h>
#include <string.h>
#include <uchar.h>
#define INBUF_SIZE 1024
#define OUTBUF_SIZE 2048
int g_count;
int check_conversion(const char* input, size_t input_len,
const char16_t* expected_output, size_t expected_len) {
iconv_t cd;
char inbuf[INBUF_SIZE];
char outbuf[OUTBUF_SIZE];
char* inptr = inbuf;
char* outptr = outbuf;
size_t inbytesleft = input_len;
size_t outbytesleft = OUTBUF_SIZE;
size_t result;
++g_count;
memcpy(inbuf, input, input_len);
cd = iconv_open("UTF-16LE", "UTF-8");
if (cd == (iconv_t)-1) {
return 10 + g_count; // iconv_open failed
}
result = iconv(cd, &inptr, &inbytesleft, &outptr, &outbytesleft);
if (result == (size_t)-1) {
iconv_close(cd);
return 20 + g_count; // iconv failed, return 20 + specific errno
}
if (inbytesleft != 0) {
iconv_close(cd);
return 40 + g_count; // Not all input was converted
}
size_t output_len = OUTBUF_SIZE - outbytesleft;
if (output_len != expected_len) {
iconv_close(cd);
return 50 + g_count; // Output length mismatch
}
if (memcmp(outbuf, expected_output, output_len) != 0) {
iconv_close(cd);
return 60 + g_count; // Output content mismatch
}
if (iconv_close(cd) == -1)
return 70 + g_count; // iconv_close failed
// Reverse direction check: UTF-16LE back to UTF-8
cd = iconv_open("UTF-8", "UTF-16LE");
if (cd == (iconv_t)-1) {
return 80 + g_count; // iconv_open failed for reverse direction
}
char reverse_inbuf[OUTBUF_SIZE];
char reverse_outbuf[INBUF_SIZE];
char* reverse_inptr = reverse_inbuf;
char* reverse_outptr = reverse_outbuf;
size_t reverse_inbytesleft = output_len;
size_t reverse_outbytesleft = INBUF_SIZE;
memcpy(reverse_inbuf, outbuf, output_len);
result = iconv(cd, &reverse_inptr, &reverse_inbytesleft, &reverse_outptr,
&reverse_outbytesleft);
if (result == (size_t)-1) {
iconv_close(cd);
return 90 + g_count; // iconv failed for reverse direction
}
if (reverse_inbytesleft != 0) {
iconv_close(cd);
return 100 + g_count; // Not all input was converted in reverse direction
}
size_t reverse_output_len = INBUF_SIZE - reverse_outbytesleft;
if (reverse_output_len != input_len) {
iconv_close(cd);
return 110 + g_count; // Reverse output length mismatch
}
if (memcmp(reverse_outbuf, input, input_len) != 0) {
iconv_close(cd);
return 120 + g_count; // Reverse output content mismatch
}
if (iconv_close(cd) == -1)
return 130 + g_count; // iconv_close failed for reverse direction
return 0; // Success
}
int main() {
// Test case 1: Basic ASCII
const char input1[] = "Hello, world!";
const char16_t expected1[] = u"Hello, world!";
int result = check_conversion(input1, sizeof(input1) - 1, expected1,
sizeof(expected1) - 2);
if (result != 0)
return result;
// Test case 2: Non-ASCII characters and newline
const char input2[] = "こんにちは\nWorld! ☺";
const char16_t expected2[] = u"こんにちは\nWorld! ☺";
result = check_conversion(input2, sizeof(input2) - 1, expected2,
sizeof(expected2) - 2);
if (result != 0)
return result;
// Test case 3: Empty string
const char input3[] = "";
const char16_t expected3[] = u"";
result = check_conversion(input3, 0, expected3, 0);
if (result != 0)
return result;
// Test case 4: String with null characters
const char input4[] = "Hello\0World";
const char16_t expected4[] = u"Hello\0World";
result = check_conversion(input4, sizeof(input4) - 1, expected4,
sizeof(expected4) - 2);
if (result != 0)
return result;
// Test case 5: Long string to test buffer handling
char input5[INBUF_SIZE];
char16_t expected5[INBUF_SIZE];
memset(input5, 'A', INBUF_SIZE - 1);
input5[INBUF_SIZE - 1] = '\0';
for (int i = 0; i < INBUF_SIZE - 1; i++) {
expected5[i] = u'A';
}
result =
check_conversion(input5, INBUF_SIZE - 1, expected5, (INBUF_SIZE - 1) * 2);
if (result != 0)
return result;
// Test case 6: Invalid UTF-8 sequence
const char input6[] = {0xC0, 0x80};
result = check_conversion(input6, sizeof(input6), NULL, 0);
if (result != 26) {
if (errno != EILSEQ)
return 201;
return 200;
}
// Test case 7: Mixing ASCII and non-ASCII
const char input7[] = "Hello, 世界!";
const char16_t expected7[] = u"Hello, 世界!";
result = check_conversion(input7, sizeof(input7) - 1, expected7,
sizeof(expected7) - 2);
if (result != 0)
return result;
// Test case 8: Surrogate pairs
const char input8[] = "𐐷"; // U+10437
const char16_t expected8[] =
u"𐐷"; // This will be encoded as a surrogate pair
result = check_conversion(input8, sizeof(input8) - 1, expected8,
sizeof(expected8) - 2);
if (result != 0)
return result;
return 0; // All tests passed
}

View file

@ -0,0 +1,172 @@
#include <errno.h>
#include <iconv.h>
#include <stdlib.h>
#include <string.h>
#include <uchar.h>
#define INBUF_SIZE 1024
#define OUTBUF_SIZE 4096
int g_count;
int check_conversion(const char* input, size_t input_len,
const wchar_t* expected_output, size_t expected_len) {
iconv_t cd;
char inbuf[INBUF_SIZE];
char outbuf[OUTBUF_SIZE];
char* inptr = inbuf;
char* outptr = outbuf;
size_t inbytesleft = input_len;
size_t outbytesleft = OUTBUF_SIZE;
size_t result;
++g_count;
memcpy(inbuf, input, input_len);
cd = iconv_open("UTF-32LE", "UTF-8");
if (cd == (iconv_t)-1) {
return 10 + g_count; // iconv_open failed
}
result = iconv(cd, &inptr, &inbytesleft, &outptr, &outbytesleft);
if (result == (size_t)-1) {
iconv_close(cd);
return 20 + g_count; // iconv failed, return 20 + specific errno
}
if (inbytesleft != 0) {
iconv_close(cd);
return 40 + g_count; // Not all input was converted
}
size_t output_len = OUTBUF_SIZE - outbytesleft;
if (output_len != expected_len) {
iconv_close(cd);
return 50 + g_count; // Output length mismatch
}
if (memcmp(outbuf, expected_output, output_len) != 0) {
iconv_close(cd);
return 60 + g_count; // Output content mismatch
}
if (iconv_close(cd) == -1)
return 70 + g_count; // iconv_close failed
// Reverse direction check: UTF-32LE back to UTF-8
cd = iconv_open("UTF-8", "UTF-32LE");
if (cd == (iconv_t)-1) {
return 80 + g_count; // iconv_open failed for reverse direction
}
char reverse_inbuf[OUTBUF_SIZE];
char reverse_outbuf[INBUF_SIZE];
char* reverse_inptr = reverse_inbuf;
char* reverse_outptr = reverse_outbuf;
size_t reverse_inbytesleft = output_len;
size_t reverse_outbytesleft = INBUF_SIZE;
memcpy(reverse_inbuf, outbuf, output_len);
result = iconv(cd, &reverse_inptr, &reverse_inbytesleft, &reverse_outptr,
&reverse_outbytesleft);
if (result == (size_t)-1) {
iconv_close(cd);
return 90 + g_count; // iconv failed for reverse direction
}
if (reverse_inbytesleft != 0) {
iconv_close(cd);
return 100 + g_count; // Not all input was converted in reverse direction
}
size_t reverse_output_len = INBUF_SIZE - reverse_outbytesleft;
if (reverse_output_len != input_len) {
iconv_close(cd);
return 110 + g_count; // Reverse output length mismatch
}
if (memcmp(reverse_outbuf, input, input_len) != 0) {
iconv_close(cd);
return 120 + g_count; // Reverse output content mismatch
}
if (iconv_close(cd) == -1)
return 130 + g_count; // iconv_close failed for reverse direction
return 0; // Success
}
int main() {
// Test case 1: Basic ASCII
const char input1[] = "Hello, world!";
const wchar_t expected1[] = L"Hello, world!";
int result = check_conversion(input1, sizeof(input1) - 1, expected1,
sizeof(expected1) - 4);
if (result != 0)
return result;
// Test case 2: Non-ASCII characters and newline
const char input2[] = "こんにちは\nWorld! ☺";
const wchar_t expected2[] = L"こんにちは\nWorld! ☺";
result = check_conversion(input2, sizeof(input2) - 1, expected2,
sizeof(expected2) - 4);
if (result != 0)
return result;
// Test case 3: Empty string
const char input3[] = "";
const wchar_t expected3[] = L"";
result = check_conversion(input3, 0, expected3, 0);
if (result != 0)
return result;
// Test case 4: String with null characters
const char input4[] = "Hello\0World";
const wchar_t expected4[] = L"Hello\0World";
result = check_conversion(input4, sizeof(input4) - 1, expected4,
sizeof(expected4) - 4);
if (result != 0)
return result;
// Test case 5: Long string to test buffer handling
char input5[INBUF_SIZE];
wchar_t expected5[INBUF_SIZE];
memset(input5, 'A', INBUF_SIZE - 1);
input5[INBUF_SIZE - 1] = '\0';
for (int i = 0; i < INBUF_SIZE - 1; i++) {
expected5[i] = u'A';
}
result =
check_conversion(input5, INBUF_SIZE - 1, expected5, (INBUF_SIZE - 1) * 4);
if (result != 0)
return result;
// Test case 6: Invalid UTF-8 sequence
const char input6[] = {0xC0, 0x80};
result = check_conversion(input6, sizeof(input6), NULL, 0);
if (result != 26) {
if (errno != EILSEQ)
return 201;
return 200;
}
// Test case 7: Mixing ASCII and non-ASCII
const char input7[] = "Hello, 世界!";
const wchar_t expected7[] = L"Hello, 世界!";
result = check_conversion(input7, sizeof(input7) - 1, expected7,
sizeof(expected7) - 4);
if (result != 0)
return result;
// Test case 8: Surrogate pairs
const char input8[] = "𐐷"; // U+10437
const wchar_t expected8[] = L"𐐷"; // This will be encoded as a surrogate pair
result = check_conversion(input8, sizeof(input8) - 1, expected8,
sizeof(expected8) - 4);
if (result != 0)
return result;
return 0; // All tests passed
}