mirror of
https://github.com/jart/cosmopolitan.git
synced 2025-07-07 03:38:31 +00:00
Update Musl Libc code
We now have implement all of Musl's localization code, the same way that Musl implements localization. You may need setlocale(LC_ALL, "C.UTF-8"), just in case anything stops working as expected.
This commit is contained in:
parent
d0360bf4bd
commit
bb815eafaf
116 changed files with 6525 additions and 5523 deletions
|
@ -36,7 +36,6 @@ TEST_LIBC_STR_DIRECTDEPS = \
|
|||
LIBC_FMT \
|
||||
LIBC_INTRIN \
|
||||
LIBC_LOG \
|
||||
LIBC_TINYMATH \
|
||||
LIBC_MEM \
|
||||
LIBC_NEXGEN32E \
|
||||
LIBC_RUNTIME \
|
||||
|
@ -45,14 +44,16 @@ TEST_LIBC_STR_DIRECTDEPS = \
|
|||
LIBC_SYSV \
|
||||
LIBC_SYSV_CALLS \
|
||||
LIBC_TESTLIB \
|
||||
LIBC_TINYMATH \
|
||||
LIBC_X \
|
||||
THIRD_PARTY_COMPILER_RT \
|
||||
THIRD_PARTY_MBEDTLS \
|
||||
THIRD_PARTY_REGEX \
|
||||
THIRD_PARTY_ZLIB \
|
||||
THIRD_PARTY_LIBCXX \
|
||||
THIRD_PARTY_MBEDTLS \
|
||||
THIRD_PARTY_MUSL \
|
||||
THIRD_PARTY_REGEX \
|
||||
THIRD_PARTY_SMALLZ4 \
|
||||
THIRD_PARTY_VQSORT
|
||||
THIRD_PARTY_VQSORT \
|
||||
THIRD_PARTY_ZLIB \
|
||||
|
||||
TEST_LIBC_STR_DEPS := \
|
||||
$(call uniq,$(foreach x,$(TEST_LIBC_STR_DIRECTDEPS),$($(x))))
|
||||
|
|
|
@ -19,10 +19,15 @@
|
|||
#include "third_party/regex/regex.h"
|
||||
#include "libc/mem/gc.h"
|
||||
#include "libc/mem/mem.h"
|
||||
#include "libc/str/locale.h"
|
||||
#include "libc/str/str.h"
|
||||
#include "libc/testlib/ezbench.h"
|
||||
#include "libc/testlib/testlib.h"
|
||||
|
||||
void SetUpOnce(void) {
|
||||
setlocale(LC_ALL, "C.UTF-8");
|
||||
}
|
||||
|
||||
TEST(regex, test) {
|
||||
regex_t rx;
|
||||
EXPECT_EQ(REG_OK, regcomp(&rx, "^[A-Za-z\x7f-\uffff]{2}$", REG_EXTENDED));
|
||||
|
|
|
@ -1,30 +0,0 @@
|
|||
/*-*- mode:c;indent-tabs-mode:nil;c-basic-offset:2;tab-width:8;coding:utf-8 -*-│
|
||||
│ vi: set et ft=c ts=2 sts=2 sw=2 fenc=utf-8 :vi │
|
||||
╞══════════════════════════════════════════════════════════════════════════════╡
|
||||
│ Copyright 2022 Gavin Arthur Hayes │
|
||||
│ │
|
||||
│ Permission to use, copy, modify, and/or distribute this software for │
|
||||
│ any purpose with or without fee is hereby granted, provided that the │
|
||||
│ above copyright notice and this permission notice appear in all copies. │
|
||||
│ │
|
||||
│ THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL │
|
||||
│ WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED │
|
||||
│ WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE │
|
||||
│ AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL │
|
||||
│ DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR │
|
||||
│ PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER │
|
||||
│ TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR │
|
||||
│ PERFORMANCE OF THIS SOFTWARE. │
|
||||
╚─────────────────────────────────────────────────────────────────────────────*/
|
||||
#include "libc/str/locale.h"
|
||||
#include "libc/testlib/testlib.h"
|
||||
|
||||
TEST(setlocale, test) {
|
||||
EXPECT_STREQ("C", setlocale(LC_ALL, NULL));
|
||||
EXPECT_STREQ("C", setlocale(LC_ALL, "C"));
|
||||
EXPECT_STREQ("C", setlocale(LC_ALL, NULL));
|
||||
EXPECT_STREQ("POSIX", setlocale(LC_ALL, "POSIX"));
|
||||
EXPECT_STREQ("C", setlocale(LC_ALL, ""));
|
||||
EXPECT_EQ(0, setlocale(LC_ALL, "ja_JP.PCK"));
|
||||
EXPECT_STREQ("C", setlocale(LC_ALL, NULL));
|
||||
}
|
|
@ -28,7 +28,8 @@ TEST_LIBC_TIME_DIRECTDEPS = \
|
|||
LIBC_SYSV \
|
||||
LIBC_TESTLIB \
|
||||
LIBC_X \
|
||||
THIRD_PARTY_TZ
|
||||
THIRD_PARTY_MUSL \
|
||||
THIRD_PARTY_TZ \
|
||||
|
||||
TEST_LIBC_TIME_DEPS := \
|
||||
$(call uniq,$(foreach x,$(TEST_LIBC_TIME_DIRECTDEPS),$($(x))))
|
||||
|
|
|
@ -8,15 +8,16 @@ TEST_LIBC_TINYMATH_SRCS_CC := $(wildcard test/libc/tinymath/*.cc)
|
|||
TEST_LIBC_TINYMATH_SRCS_TEST = $(filter %_test.c,$(TEST_LIBC_TINYMATH_SRCS))
|
||||
|
||||
TEST_LIBC_TINYMATH_SRCS = \
|
||||
$(TEST_LIBC_TINYMATH_SRCS_C:%.c=o/$(MODE)/%.o) \
|
||||
$(TEST_LIBC_TINYMATH_SRCS_CC:%.cc=o/$(MODE)/%.o)
|
||||
$(TEST_LIBC_TINYMATH_SRCS_C) \
|
||||
$(TEST_LIBC_TINYMATH_SRCS_CC)
|
||||
|
||||
TEST_LIBC_TINYMATH_OBJS = \
|
||||
$(TEST_LIBC_TINYMATH_SRCS_C:%.c=o/$(MODE)/%.o) \
|
||||
$(TEST_LIBC_TINYMATH_SRCS_CC:%.cc=o/$(MODE)/%.o)
|
||||
|
||||
TEST_LIBC_TINYMATH_COMS = \
|
||||
$(TEST_LIBC_TINYMATH_SRCS:%.c=o/$(MODE)/%)
|
||||
$(TEST_LIBC_TINYMATH_SRCS_C:%.c=o/$(MODE)/%) \
|
||||
$(TEST_LIBC_TINYMATH_SRCS_CC:%.cc=o/$(MODE)/%)
|
||||
|
||||
TEST_LIBC_TINYMATH_BINS = \
|
||||
$(TEST_LIBC_TINYMATH_COMS) \
|
||||
|
@ -68,10 +69,6 @@ $(TEST_LIBC_TINYMATH_OBJS): private \
|
|||
CFLAGS += \
|
||||
-fno-builtin
|
||||
|
||||
$(TEST_LIBC_TINYMATH_OBJS): private \
|
||||
CXXFLAGS += \
|
||||
#-ffast-math
|
||||
|
||||
.PHONY: o/$(MODE)/test/libc/tinymath
|
||||
o/$(MODE)/test/libc/tinymath: \
|
||||
$(TEST_LIBC_TINYMATH_BINS) \
|
||||
|
|
|
@ -35,7 +35,8 @@ TEST_POSIX_DIRECTDEPS = \
|
|||
LIBC_STDIO \
|
||||
LIBC_STR \
|
||||
LIBC_SYSV \
|
||||
LIBC_THREAD
|
||||
LIBC_THREAD \
|
||||
THIRD_PARTY_MUSL \
|
||||
|
||||
TEST_POSIX_DEPS := \
|
||||
$(call uniq,$(foreach x,$(TEST_POSIX_DIRECTDEPS),$($(x))))
|
||||
|
|
173
test/posix/iconv_utf8_utf16_test.c
Normal file
173
test/posix/iconv_utf8_utf16_test.c
Normal file
|
@ -0,0 +1,173 @@
|
|||
#include <errno.h>
|
||||
#include <iconv.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <uchar.h>
|
||||
|
||||
#define INBUF_SIZE 1024
|
||||
#define OUTBUF_SIZE 2048
|
||||
|
||||
int g_count;
|
||||
|
||||
int check_conversion(const char* input, size_t input_len,
|
||||
const char16_t* expected_output, size_t expected_len) {
|
||||
iconv_t cd;
|
||||
char inbuf[INBUF_SIZE];
|
||||
char outbuf[OUTBUF_SIZE];
|
||||
char* inptr = inbuf;
|
||||
char* outptr = outbuf;
|
||||
size_t inbytesleft = input_len;
|
||||
size_t outbytesleft = OUTBUF_SIZE;
|
||||
size_t result;
|
||||
|
||||
++g_count;
|
||||
|
||||
memcpy(inbuf, input, input_len);
|
||||
|
||||
cd = iconv_open("UTF-16LE", "UTF-8");
|
||||
if (cd == (iconv_t)-1) {
|
||||
return 10 + g_count; // iconv_open failed
|
||||
}
|
||||
|
||||
result = iconv(cd, &inptr, &inbytesleft, &outptr, &outbytesleft);
|
||||
if (result == (size_t)-1) {
|
||||
iconv_close(cd);
|
||||
return 20 + g_count; // iconv failed, return 20 + specific errno
|
||||
}
|
||||
|
||||
if (inbytesleft != 0) {
|
||||
iconv_close(cd);
|
||||
return 40 + g_count; // Not all input was converted
|
||||
}
|
||||
|
||||
size_t output_len = OUTBUF_SIZE - outbytesleft;
|
||||
if (output_len != expected_len) {
|
||||
iconv_close(cd);
|
||||
return 50 + g_count; // Output length mismatch
|
||||
}
|
||||
|
||||
if (memcmp(outbuf, expected_output, output_len) != 0) {
|
||||
iconv_close(cd);
|
||||
return 60 + g_count; // Output content mismatch
|
||||
}
|
||||
|
||||
if (iconv_close(cd) == -1)
|
||||
return 70 + g_count; // iconv_close failed
|
||||
|
||||
// Reverse direction check: UTF-16LE back to UTF-8
|
||||
cd = iconv_open("UTF-8", "UTF-16LE");
|
||||
if (cd == (iconv_t)-1) {
|
||||
return 80 + g_count; // iconv_open failed for reverse direction
|
||||
}
|
||||
|
||||
char reverse_inbuf[OUTBUF_SIZE];
|
||||
char reverse_outbuf[INBUF_SIZE];
|
||||
char* reverse_inptr = reverse_inbuf;
|
||||
char* reverse_outptr = reverse_outbuf;
|
||||
size_t reverse_inbytesleft = output_len;
|
||||
size_t reverse_outbytesleft = INBUF_SIZE;
|
||||
|
||||
memcpy(reverse_inbuf, outbuf, output_len);
|
||||
|
||||
result = iconv(cd, &reverse_inptr, &reverse_inbytesleft, &reverse_outptr,
|
||||
&reverse_outbytesleft);
|
||||
if (result == (size_t)-1) {
|
||||
iconv_close(cd);
|
||||
return 90 + g_count; // iconv failed for reverse direction
|
||||
}
|
||||
|
||||
if (reverse_inbytesleft != 0) {
|
||||
iconv_close(cd);
|
||||
return 100 + g_count; // Not all input was converted in reverse direction
|
||||
}
|
||||
|
||||
size_t reverse_output_len = INBUF_SIZE - reverse_outbytesleft;
|
||||
if (reverse_output_len != input_len) {
|
||||
iconv_close(cd);
|
||||
return 110 + g_count; // Reverse output length mismatch
|
||||
}
|
||||
|
||||
if (memcmp(reverse_outbuf, input, input_len) != 0) {
|
||||
iconv_close(cd);
|
||||
return 120 + g_count; // Reverse output content mismatch
|
||||
}
|
||||
|
||||
if (iconv_close(cd) == -1)
|
||||
return 130 + g_count; // iconv_close failed for reverse direction
|
||||
|
||||
return 0; // Success
|
||||
}
|
||||
|
||||
int main() {
|
||||
// Test case 1: Basic ASCII
|
||||
const char input1[] = "Hello, world!";
|
||||
const char16_t expected1[] = u"Hello, world!";
|
||||
int result = check_conversion(input1, sizeof(input1) - 1, expected1,
|
||||
sizeof(expected1) - 2);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
// Test case 2: Non-ASCII characters and newline
|
||||
const char input2[] = "こんにちは\nWorld! ☺";
|
||||
const char16_t expected2[] = u"こんにちは\nWorld! ☺";
|
||||
result = check_conversion(input2, sizeof(input2) - 1, expected2,
|
||||
sizeof(expected2) - 2);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
// Test case 3: Empty string
|
||||
const char input3[] = "";
|
||||
const char16_t expected3[] = u"";
|
||||
result = check_conversion(input3, 0, expected3, 0);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
// Test case 4: String with null characters
|
||||
const char input4[] = "Hello\0World";
|
||||
const char16_t expected4[] = u"Hello\0World";
|
||||
result = check_conversion(input4, sizeof(input4) - 1, expected4,
|
||||
sizeof(expected4) - 2);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
// Test case 5: Long string to test buffer handling
|
||||
char input5[INBUF_SIZE];
|
||||
char16_t expected5[INBUF_SIZE];
|
||||
memset(input5, 'A', INBUF_SIZE - 1);
|
||||
input5[INBUF_SIZE - 1] = '\0';
|
||||
for (int i = 0; i < INBUF_SIZE - 1; i++) {
|
||||
expected5[i] = u'A';
|
||||
}
|
||||
result =
|
||||
check_conversion(input5, INBUF_SIZE - 1, expected5, (INBUF_SIZE - 1) * 2);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
// Test case 6: Invalid UTF-8 sequence
|
||||
const char input6[] = {0xC0, 0x80};
|
||||
result = check_conversion(input6, sizeof(input6), NULL, 0);
|
||||
if (result != 26) {
|
||||
if (errno != EILSEQ)
|
||||
return 201;
|
||||
return 200;
|
||||
}
|
||||
|
||||
// Test case 7: Mixing ASCII and non-ASCII
|
||||
const char input7[] = "Hello, 世界!";
|
||||
const char16_t expected7[] = u"Hello, 世界!";
|
||||
result = check_conversion(input7, sizeof(input7) - 1, expected7,
|
||||
sizeof(expected7) - 2);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
// Test case 8: Surrogate pairs
|
||||
const char input8[] = "𐐷"; // U+10437
|
||||
const char16_t expected8[] =
|
||||
u"𐐷"; // This will be encoded as a surrogate pair
|
||||
result = check_conversion(input8, sizeof(input8) - 1, expected8,
|
||||
sizeof(expected8) - 2);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
return 0; // All tests passed
|
||||
}
|
172
test/posix/iconv_utf8_utf32_test.c
Normal file
172
test/posix/iconv_utf8_utf32_test.c
Normal file
|
@ -0,0 +1,172 @@
|
|||
#include <errno.h>
|
||||
#include <iconv.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <uchar.h>
|
||||
|
||||
#define INBUF_SIZE 1024
|
||||
#define OUTBUF_SIZE 4096
|
||||
|
||||
int g_count;
|
||||
|
||||
int check_conversion(const char* input, size_t input_len,
|
||||
const wchar_t* expected_output, size_t expected_len) {
|
||||
iconv_t cd;
|
||||
char inbuf[INBUF_SIZE];
|
||||
char outbuf[OUTBUF_SIZE];
|
||||
char* inptr = inbuf;
|
||||
char* outptr = outbuf;
|
||||
size_t inbytesleft = input_len;
|
||||
size_t outbytesleft = OUTBUF_SIZE;
|
||||
size_t result;
|
||||
|
||||
++g_count;
|
||||
|
||||
memcpy(inbuf, input, input_len);
|
||||
|
||||
cd = iconv_open("UTF-32LE", "UTF-8");
|
||||
if (cd == (iconv_t)-1) {
|
||||
return 10 + g_count; // iconv_open failed
|
||||
}
|
||||
|
||||
result = iconv(cd, &inptr, &inbytesleft, &outptr, &outbytesleft);
|
||||
if (result == (size_t)-1) {
|
||||
iconv_close(cd);
|
||||
return 20 + g_count; // iconv failed, return 20 + specific errno
|
||||
}
|
||||
|
||||
if (inbytesleft != 0) {
|
||||
iconv_close(cd);
|
||||
return 40 + g_count; // Not all input was converted
|
||||
}
|
||||
|
||||
size_t output_len = OUTBUF_SIZE - outbytesleft;
|
||||
if (output_len != expected_len) {
|
||||
iconv_close(cd);
|
||||
return 50 + g_count; // Output length mismatch
|
||||
}
|
||||
|
||||
if (memcmp(outbuf, expected_output, output_len) != 0) {
|
||||
iconv_close(cd);
|
||||
return 60 + g_count; // Output content mismatch
|
||||
}
|
||||
|
||||
if (iconv_close(cd) == -1)
|
||||
return 70 + g_count; // iconv_close failed
|
||||
|
||||
// Reverse direction check: UTF-32LE back to UTF-8
|
||||
cd = iconv_open("UTF-8", "UTF-32LE");
|
||||
if (cd == (iconv_t)-1) {
|
||||
return 80 + g_count; // iconv_open failed for reverse direction
|
||||
}
|
||||
|
||||
char reverse_inbuf[OUTBUF_SIZE];
|
||||
char reverse_outbuf[INBUF_SIZE];
|
||||
char* reverse_inptr = reverse_inbuf;
|
||||
char* reverse_outptr = reverse_outbuf;
|
||||
size_t reverse_inbytesleft = output_len;
|
||||
size_t reverse_outbytesleft = INBUF_SIZE;
|
||||
|
||||
memcpy(reverse_inbuf, outbuf, output_len);
|
||||
|
||||
result = iconv(cd, &reverse_inptr, &reverse_inbytesleft, &reverse_outptr,
|
||||
&reverse_outbytesleft);
|
||||
if (result == (size_t)-1) {
|
||||
iconv_close(cd);
|
||||
return 90 + g_count; // iconv failed for reverse direction
|
||||
}
|
||||
|
||||
if (reverse_inbytesleft != 0) {
|
||||
iconv_close(cd);
|
||||
return 100 + g_count; // Not all input was converted in reverse direction
|
||||
}
|
||||
|
||||
size_t reverse_output_len = INBUF_SIZE - reverse_outbytesleft;
|
||||
if (reverse_output_len != input_len) {
|
||||
iconv_close(cd);
|
||||
return 110 + g_count; // Reverse output length mismatch
|
||||
}
|
||||
|
||||
if (memcmp(reverse_outbuf, input, input_len) != 0) {
|
||||
iconv_close(cd);
|
||||
return 120 + g_count; // Reverse output content mismatch
|
||||
}
|
||||
|
||||
if (iconv_close(cd) == -1)
|
||||
return 130 + g_count; // iconv_close failed for reverse direction
|
||||
|
||||
return 0; // Success
|
||||
}
|
||||
|
||||
int main() {
|
||||
// Test case 1: Basic ASCII
|
||||
const char input1[] = "Hello, world!";
|
||||
const wchar_t expected1[] = L"Hello, world!";
|
||||
int result = check_conversion(input1, sizeof(input1) - 1, expected1,
|
||||
sizeof(expected1) - 4);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
// Test case 2: Non-ASCII characters and newline
|
||||
const char input2[] = "こんにちは\nWorld! ☺";
|
||||
const wchar_t expected2[] = L"こんにちは\nWorld! ☺";
|
||||
result = check_conversion(input2, sizeof(input2) - 1, expected2,
|
||||
sizeof(expected2) - 4);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
// Test case 3: Empty string
|
||||
const char input3[] = "";
|
||||
const wchar_t expected3[] = L"";
|
||||
result = check_conversion(input3, 0, expected3, 0);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
// Test case 4: String with null characters
|
||||
const char input4[] = "Hello\0World";
|
||||
const wchar_t expected4[] = L"Hello\0World";
|
||||
result = check_conversion(input4, sizeof(input4) - 1, expected4,
|
||||
sizeof(expected4) - 4);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
// Test case 5: Long string to test buffer handling
|
||||
char input5[INBUF_SIZE];
|
||||
wchar_t expected5[INBUF_SIZE];
|
||||
memset(input5, 'A', INBUF_SIZE - 1);
|
||||
input5[INBUF_SIZE - 1] = '\0';
|
||||
for (int i = 0; i < INBUF_SIZE - 1; i++) {
|
||||
expected5[i] = u'A';
|
||||
}
|
||||
result =
|
||||
check_conversion(input5, INBUF_SIZE - 1, expected5, (INBUF_SIZE - 1) * 4);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
// Test case 6: Invalid UTF-8 sequence
|
||||
const char input6[] = {0xC0, 0x80};
|
||||
result = check_conversion(input6, sizeof(input6), NULL, 0);
|
||||
if (result != 26) {
|
||||
if (errno != EILSEQ)
|
||||
return 201;
|
||||
return 200;
|
||||
}
|
||||
|
||||
// Test case 7: Mixing ASCII and non-ASCII
|
||||
const char input7[] = "Hello, 世界!";
|
||||
const wchar_t expected7[] = L"Hello, 世界!";
|
||||
result = check_conversion(input7, sizeof(input7) - 1, expected7,
|
||||
sizeof(expected7) - 4);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
// Test case 8: Surrogate pairs
|
||||
const char input8[] = "𐐷"; // U+10437
|
||||
const wchar_t expected8[] = L"𐐷"; // This will be encoded as a surrogate pair
|
||||
result = check_conversion(input8, sizeof(input8) - 1, expected8,
|
||||
sizeof(expected8) - 4);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
return 0; // All tests passed
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue