diff --git a/ChangeLog b/ChangeLog index 43b5d59d3..91eca49ae 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,23 @@ +2011-12-13 Vladimir Serbinenko + + Enable UTF8 in gnulib regexp. + + * config.h.in (RE_ENABLE_I18N) [!GRUB_UTIL]: New define. + * grub-core/lib/posix_wrap/ctype.h (islower): Use grub_islower. + (isupper): Use grub_isupper. + (isascii): New inline function. + * grub-core/lib/posix_wrap/wchar.h: Replace dummy with real contents. + * grub-core/lib/posix_wrap/wctype.h: Likewise. + * grub-core/normal/charset.c (grub_utf8_process): New function. + (grub_utf8_to_utf16): Use grub_utf8_process. + (grub_encode_utf8_character): New function. + (grub_ucs4_to_utf8): Use grub_encode_utf8_character. + * include/grub/charset.h (grub_utf8_process): New declaration. + (grub_encode_utf8_character): Likewise. + * include/grub/misc.h (grub_islower): New inline function. + (grub_isupper): Likewise. + (grub_strchrsub): Moved down to fix the definitions. + 2011-12-13 Vladimir Serbinenko * grub-core/bus/usb/ohci.c (grub_ohci_check_transfer): Add an unsigned diff --git a/config.h.in b/config.h.in index 92d7a07f2..39b8fbafd 100644 --- a/config.h.in +++ b/config.h.in @@ -39,6 +39,8 @@ /* Define to 1 to enable disk cache statistics. */ #define DISK_CACHE_STATS @DISK_CACHE_STATS@ +#define RE_ENABLE_I18N 1 + #if defined(__i386__) #define NESTED_FUNC_ATTR __attribute__ ((__regparm__ (1))) #else diff --git a/grub-core/lib/posix_wrap/ctype.h b/grub-core/lib/posix_wrap/ctype.h index 9589778b6..38b572703 100644 --- a/grub-core/lib/posix_wrap/ctype.h +++ b/grub-core/lib/posix_wrap/ctype.h @@ -42,13 +42,19 @@ isdigit (int c) static inline int islower (int c) { - return (c >= 'a' && c <= 'z'); + return grub_islower (c); +} + +static inline int +isascii (int c) +{ + return !(c & ~0x7f); } static inline int isupper (int c) { - return (c >= 'A' && c <= 'Z'); + return grub_isupper (c); } static inline int diff --git a/grub-core/lib/posix_wrap/wchar.h b/grub-core/lib/posix_wrap/wchar.h index fd56fd332..a696643de 100644 --- a/grub-core/lib/posix_wrap/wchar.h +++ b/grub-core/lib/posix_wrap/wchar.h @@ -19,7 +19,92 @@ #ifndef GRUB_POSIX_WCHAR_H #define GRUB_POSIX_WCHAR_H 1 +#include + /* UCS-4. */ -typedef grub_uint32_t wchar_t; +typedef grub_int32_t wint_t; +enum + { + WEOF = -1 + }; + +#define MB_LEN_MAX 4 + +/* UCS-4. */ +typedef grub_int32_t wchar_t; + +typedef struct mbstate { + grub_uint32_t code; + int count; +} mbstate_t; + +static inline size_t +mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps) +{ + const char *ptr; + if (!s) + { + pwc = 0; + s = ""; + n = 1; + } + + for (ptr = s; ptr < s + n; ptr++) + { + if (!grub_utf8_process (*ptr, &ps->code, &ps->count)) + return -1; + if (ps->count) + continue; + if (pwc) + *pwc = ps->code; + if (ps->code == 0) + return 0; + return ptr - s + 1; + } + return -2; +} + +static inline int +mbsinit(const mbstate_t *ps) +{ + return ps->count == 0; +} + +static inline size_t +wcrtomb (char *s, wchar_t wc, mbstate_t *ps __attribute__ ((unused))) +{ + if (s == 0) + return 1; + return grub_encode_utf8_character ((grub_uint8_t *) s, + (grub_uint8_t *) s + MB_LEN_MAX, + wc); +} + +static inline wint_t btowc (int c) +{ + if (c & ~0x7f) + return WEOF; + return c; +} + + +static inline int +wcscoll (const wchar_t *s1, const wchar_t *s2) +{ + while (*s1 && *s2) + { + if (*s1 != *s2) + break; + + s1++; + s2++; + } + + if (*s1 < *s2) + return -1; + if (*s1 > *s2) + return +1; + return 0; +} #endif diff --git a/grub-core/lib/posix_wrap/wctype.h b/grub-core/lib/posix_wrap/wctype.h index e69de29bb..b2b33ab08 100644 --- a/grub-core/lib/posix_wrap/wctype.h +++ b/grub-core/lib/posix_wrap/wctype.h @@ -0,0 +1,106 @@ +/* + * GRUB -- GRand Unified Bootloader + * Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc. + * + * GRUB is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * GRUB is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GRUB. If not, see . + */ + +#ifndef GRUB_POSIX_WCTYPE_H +#define GRUB_POSIX_WCTYPE_H 1 + +#include +#include + +typedef enum { GRUB_CTYPE_INVALID, + GRUB_CTYPE_ALNUM, GRUB_CTYPE_CNTRL, GRUB_CTYPE_LOWER, + GRUB_CTYPE_SPACE, GRUB_CTYPE_ALPHA, GRUB_CTYPE_DIGIT, + GRUB_CTYPE_PRINT, GRUB_CTYPE_UPPER, GRUB_CTYPE_BLANK, + GRUB_CTYPE_GRAPH, GRUB_CTYPE_PUNCT, GRUB_CTYPE_XDIGIT, + GRUB_CTYPE_MAX} wctype_t; + +static inline wctype_t +wctype (const char *name) +{ + wctype_t i; + static const char names[][10] = { "", + "alnum", "cntrl", "lower", + "space", "alpha", "digit", + "print", "upper", "blank", + "graph", "punct", "xdigit" }; + for (i = GRUB_CTYPE_INVALID; i < GRUB_CTYPE_MAX; i++) + if (grub_strcmp (names[i], name) == 0) + return i; + return GRUB_CTYPE_INVALID; +} + +/* FIXME: take into account international lowercase characters. */ +static inline int +iswlower (wint_t wc) +{ + return grub_islower (wc); +} + +static inline wint_t +towlower (wint_t c) +{ + return grub_tolower (c); +} + +static inline wint_t +towupper (wint_t c) +{ + return grub_toupper (c); +} + +static inline int +iswalnum (wint_t c) +{ + return grub_isalpha (c) || grub_isdigit (c); +} + +static inline int +iswctype (wint_t wc, wctype_t desc) +{ + switch (desc) + { + case GRUB_CTYPE_ALNUM: + return iswalnum (wc); + case GRUB_CTYPE_CNTRL: + return grub_iscntrl (wc); + case GRUB_CTYPE_LOWER: + return iswlower (wc); + case GRUB_CTYPE_SPACE: + return grub_isspace (wc); + case GRUB_CTYPE_ALPHA: + return grub_isalpha (wc); + case GRUB_CTYPE_DIGIT: + return grub_isdigit (wc); + case GRUB_CTYPE_PRINT: + return grub_isprint (wc); + case GRUB_CTYPE_UPPER: + return grub_isupper (wc); + case GRUB_CTYPE_BLANK: + return wc == ' ' || wc == '\t'; + case GRUB_CTYPE_GRAPH: + return grub_isgraph (wc); + case GRUB_CTYPE_PUNCT: + return grub_isprint (wc) && !grub_isspace (wc) && !iswalnum (wc); + case GRUB_CTYPE_XDIGIT: + return grub_isxdigit (wc); + default: + return 0; + } +} + +#endif diff --git a/grub-core/normal/charset.c b/grub-core/normal/charset.c index 85ead53c4..d089843cc 100644 --- a/grub-core/normal/charset.c +++ b/grub-core/normal/charset.c @@ -60,6 +60,51 @@ #include "widthspec.h" #endif +int +grub_utf8_process (grub_uint8_t c, grub_uint32_t *code, int *count) +{ + if (*count) + { + if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT) + { + /* invalid */ + return 0; + } + else + { + *code <<= 6; + *code |= (c & GRUB_UINT8_6_TRAILINGBITS); + (*count)--; + return 1; + } + } + + if ((c & GRUB_UINT8_1_LEADINGBIT) == 0) + { + *code = c; + return 1; + } + if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS) + { + *count = 1; + *code = c & GRUB_UINT8_5_TRAILINGBITS; + return 1; + } + if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS) + { + *count = 2; + *code = c & GRUB_UINT8_4_TRAILINGBITS; + return 1; + } + if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS) + { + *count = 3; + *code = c & GRUB_UINT8_3_TRAILINGBITS; + return 1; + } + return 0; +} + grub_ssize_t grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize, const grub_uint8_t *src, grub_size_t srcsize, @@ -74,64 +119,27 @@ grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize, while (srcsize && destsize) { - grub_uint32_t c = *src++; + grub_uint8_t c = *src++; if (srcsize != (grub_size_t)-1) srcsize--; - if (count) + if (!grub_utf8_process (c, &code, &count)) + return -1; + if (count != 0) + continue; + if (code == 0) + break; + if (destsize < 2 && code >= GRUB_UCS2_LIMIT) + break; + if (code >= GRUB_UCS2_LIMIT) { - if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT) - { - /* invalid */ - return -1; - } - else - { - code <<= 6; - code |= (c & GRUB_UINT8_6_TRAILINGBITS); - count--; - } + *p++ = GRUB_UTF16_UPPER_SURROGATE (code); + *p++ = GRUB_UTF16_LOWER_SURROGATE (code); + destsize -= 2; } else { - if (c == 0) - break; - - if ((c & GRUB_UINT8_1_LEADINGBIT) == 0) - code = c; - else if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS) - { - count = 1; - code = c & GRUB_UINT8_5_TRAILINGBITS; - } - else if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS) - { - count = 2; - code = c & GRUB_UINT8_4_TRAILINGBITS; - } - else if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS) - { - count = 3; - code = c & GRUB_UINT8_3_TRAILINGBITS; - } - else - return -1; - } - - if (count == 0) - { - if (destsize < 2 && code >= GRUB_UCS2_LIMIT) - break; - if (code >= GRUB_UCS2_LIMIT) - { - *p++ = GRUB_UTF16_UPPER_SURROGATE (code); - *p++ = GRUB_UTF16_LOWER_SURROGATE (code); - destsize -= 2; - } - else - { - *p++ = code; - destsize--; - } + *p++ = code; + destsize--; } } @@ -140,6 +148,53 @@ grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize, return p - dest; } +/* Returns -2 if not enough space, -1 on invalid character. */ +grub_ssize_t +grub_encode_utf8_character (grub_uint8_t *dest, grub_uint8_t *destend, + grub_uint32_t code) +{ + if (dest >= destend) + return -2; + if (code <= 0x007F) + { + *dest++ = code; + return 1; + } + if (code <= 0x07FF) + { + if (dest + 1 >= destend) + return -2; + *dest++ = (code >> 6) | 0xC0; + *dest++ = (code & 0x3F) | 0x80; + return 2; + } + if ((code >= 0xDC00 && code <= 0xDFFF) + || (code >= 0xD800 && code <= 0xDBFF)) + { + /* No surrogates in UCS-4... */ + return -1; + } + if (code < 0x10000) + { + if (dest + 2 >= destend) + return -2; + *dest++ = (code >> 12) | 0xE0; + *dest++ = ((code >> 6) & 0x3F) | 0x80; + *dest++ = (code & 0x3F) | 0x80; + return 3; + } + { + if (dest + 3 >= destend) + return -2; + *dest++ = (code >> 18) | 0xF0; + *dest++ = ((code >> 12) & 0x3F) | 0x80; + *dest++ = ((code >> 6) & 0x3F) | 0x80; + *dest++ = (code & 0x3F) | 0x80; + return 4; + } + +} + /* Convert UCS-4 to UTF-8. */ void grub_ucs4_to_utf8 (grub_uint32_t *src, grub_size_t size, @@ -151,39 +206,17 @@ grub_ucs4_to_utf8 (grub_uint32_t *src, grub_size_t size, while (size-- && dest < destend) { grub_uint32_t code = *src++; - - if (code <= 0x007F) - *dest++ = code; - else if (code <= 0x07FF) + grub_ssize_t s; + s = grub_encode_utf8_character (dest, destend, + code); + if (s == -2) + break; + if (s == -1) { - if (dest + 1 >= destend) - break; - *dest++ = (code >> 6) | 0xC0; - *dest++ = (code & 0x3F) | 0x80; - } - else if ((code >= 0xDC00 && code <= 0xDFFF) - || (code >= 0xD800 && code <= 0xDBFF)) - { - /* No surrogates in UCS-4... */ *dest++ = '?'; + continue; } - else if (code < 0x10000) - { - if (dest + 2 >= destend) - break; - *dest++ = (code >> 12) | 0xE0; - *dest++ = ((code >> 6) & 0x3F) | 0x80; - *dest++ = (code & 0x3F) | 0x80; - } - else - { - if (dest + 3 >= destend) - break; - *dest++ = (code >> 18) | 0xF0; - *dest++ = ((code >> 12) & 0x3F) | 0x80; - *dest++ = ((code >> 6) & 0x3F) | 0x80; - *dest++ = (code & 0x3F) | 0x80; - } + dest += s; } *dest = 0; } diff --git a/include/grub/charset.h b/include/grub/charset.h index 13443dad2..c7f86a1ef 100644 --- a/include/grub/charset.h +++ b/include/grub/charset.h @@ -126,11 +126,22 @@ grub_is_valid_utf8 (const grub_uint8_t *src, grub_size_t srcsize); int grub_utf8_to_ucs4_alloc (const char *msg, grub_uint32_t **unicode_msg, grub_uint32_t **last_position); + +/* Process one character from UTF8 sequence. + At beginning set *code = 0, *count = 0. Returns 0 on failure and + 1 on success. *count holds the number of trailing bytes. */ +int +grub_utf8_process (grub_uint8_t c, grub_uint32_t *code, int *count); + void grub_ucs4_to_utf8 (grub_uint32_t *src, grub_size_t size, grub_uint8_t *dest, grub_size_t destsize); grub_size_t grub_utf8_to_ucs4 (grub_uint32_t *dest, grub_size_t destsize, const grub_uint8_t *src, grub_size_t srcsize, const grub_uint8_t **srcend); +/* Returns -2 if not enough space, -1 on invalid character. */ +grub_ssize_t +grub_encode_utf8_character (grub_uint8_t *dest, grub_uint8_t *destend, + grub_uint32_t code); #endif diff --git a/include/grub/misc.h b/include/grub/misc.h index 358f73258..0344d528b 100644 --- a/include/grub/misc.h +++ b/include/grub/misc.h @@ -175,6 +175,18 @@ grub_isalpha (int c) return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); } +static inline int +grub_islower (int c) +{ + return (c >= 'a' && c <= 'z'); +} + +static inline int +grub_isupper (int c) +{ + return (c >= 'A' && c <= 'Z'); +} + static inline int grub_isgraph (int c) { @@ -250,27 +262,6 @@ grub_strncasecmp (const char *s1, const char *s2, grub_size_t n) return (int) grub_tolower (*s1) - (int) grub_tolower (*s2); } -/* Replace all `ch' characters of `input' with `with' and copy the - result into `output'; return EOS address of `output'. */ -static inline char * -grub_strchrsub (char *output, const char *input, char ch, const char *with) -{ - grub_size_t grub_strlen (const char *s); - while (*input) - { - if (*input == ch) - { - grub_strcpy (output, with); - output += grub_strlen (with); - input++; - continue; - } - *output++ = *input++; - } - *output = '\0'; - return output; -} - unsigned long EXPORT_FUNC(grub_strtoul) (const char *str, char **end, int base); unsigned long long EXPORT_FUNC(grub_strtoull) (const char *str, char **end, int base); @@ -317,6 +308,26 @@ grub_size_t EXPORT_FUNC(grub_strlen) (const char *s) __attribute__ ((warn_unused int EXPORT_FUNC(grub_printf) (const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); int EXPORT_FUNC(grub_printf_) (const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); +/* Replace all `ch' characters of `input' with `with' and copy the + result into `output'; return EOS address of `output'. */ +static inline char * +grub_strchrsub (char *output, const char *input, char ch, const char *with) +{ + while (*input) + { + if (*input == ch) + { + grub_strcpy (output, with); + output += grub_strlen (with); + input++; + continue; + } + *output++ = *input++; + } + *output = '\0'; + return output; +} + extern void (*EXPORT_VAR (grub_xputs)) (const char *str); static inline int