Enable UTF8 in gnulib regexp.
* config.h.in (RE_ENABLE_I18N) [!GRUB_UTIL]: New define. * grub-core/lib/posix_wrap/ctype.h (islower): Use grub_islower. (isupper): Use grub_isupper. (isascii): New inline function. * grub-core/lib/posix_wrap/wchar.h: Replace dummy with real contents. * grub-core/lib/posix_wrap/wctype.h: Likewise. * grub-core/normal/charset.c (grub_utf8_process): New function. (grub_utf8_to_utf16): Use grub_utf8_process. (grub_encode_utf8_character): New function. (grub_ucs4_to_utf8): Use grub_encode_utf8_character. * include/grub/charset.h (grub_utf8_process): New declaration. (grub_encode_utf8_character): Likewise. * include/grub/misc.h (grub_islower): New inline function. (grub_isupper): Likewise. (grub_strchrsub): Moved down to fix the definitions.
This commit is contained in:
parent
0af2346fdb
commit
c5fc563aff
8 changed files with 380 additions and 106 deletions
|
@ -42,13 +42,19 @@ isdigit (int c)
|
|||
static inline int
|
||||
islower (int c)
|
||||
{
|
||||
return (c >= 'a' && c <= 'z');
|
||||
return grub_islower (c);
|
||||
}
|
||||
|
||||
static inline int
|
||||
isascii (int c)
|
||||
{
|
||||
return !(c & ~0x7f);
|
||||
}
|
||||
|
||||
static inline int
|
||||
isupper (int c)
|
||||
{
|
||||
return (c >= 'A' && c <= 'Z');
|
||||
return grub_isupper (c);
|
||||
}
|
||||
|
||||
static inline int
|
||||
|
|
|
@ -19,7 +19,92 @@
|
|||
#ifndef GRUB_POSIX_WCHAR_H
|
||||
#define GRUB_POSIX_WCHAR_H 1
|
||||
|
||||
#include <grub/charset.h>
|
||||
|
||||
/* UCS-4. */
|
||||
typedef grub_uint32_t wchar_t;
|
||||
typedef grub_int32_t wint_t;
|
||||
enum
|
||||
{
|
||||
WEOF = -1
|
||||
};
|
||||
|
||||
#define MB_LEN_MAX 4
|
||||
|
||||
/* UCS-4. */
|
||||
typedef grub_int32_t wchar_t;
|
||||
|
||||
typedef struct mbstate {
|
||||
grub_uint32_t code;
|
||||
int count;
|
||||
} mbstate_t;
|
||||
|
||||
static inline size_t
|
||||
mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
|
||||
{
|
||||
const char *ptr;
|
||||
if (!s)
|
||||
{
|
||||
pwc = 0;
|
||||
s = "";
|
||||
n = 1;
|
||||
}
|
||||
|
||||
for (ptr = s; ptr < s + n; ptr++)
|
||||
{
|
||||
if (!grub_utf8_process (*ptr, &ps->code, &ps->count))
|
||||
return -1;
|
||||
if (ps->count)
|
||||
continue;
|
||||
if (pwc)
|
||||
*pwc = ps->code;
|
||||
if (ps->code == 0)
|
||||
return 0;
|
||||
return ptr - s + 1;
|
||||
}
|
||||
return -2;
|
||||
}
|
||||
|
||||
static inline int
|
||||
mbsinit(const mbstate_t *ps)
|
||||
{
|
||||
return ps->count == 0;
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
wcrtomb (char *s, wchar_t wc, mbstate_t *ps __attribute__ ((unused)))
|
||||
{
|
||||
if (s == 0)
|
||||
return 1;
|
||||
return grub_encode_utf8_character ((grub_uint8_t *) s,
|
||||
(grub_uint8_t *) s + MB_LEN_MAX,
|
||||
wc);
|
||||
}
|
||||
|
||||
static inline wint_t btowc (int c)
|
||||
{
|
||||
if (c & ~0x7f)
|
||||
return WEOF;
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
static inline int
|
||||
wcscoll (const wchar_t *s1, const wchar_t *s2)
|
||||
{
|
||||
while (*s1 && *s2)
|
||||
{
|
||||
if (*s1 != *s2)
|
||||
break;
|
||||
|
||||
s1++;
|
||||
s2++;
|
||||
}
|
||||
|
||||
if (*s1 < *s2)
|
||||
return -1;
|
||||
if (*s1 > *s2)
|
||||
return +1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* GRUB -- GRand Unified Bootloader
|
||||
* Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
|
||||
*
|
||||
* GRUB is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GRUB is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GRUB. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef GRUB_POSIX_WCTYPE_H
|
||||
#define GRUB_POSIX_WCTYPE_H 1
|
||||
|
||||
#include <grub/misc.h>
|
||||
#include <wchar.h>
|
||||
|
||||
typedef enum { GRUB_CTYPE_INVALID,
|
||||
GRUB_CTYPE_ALNUM, GRUB_CTYPE_CNTRL, GRUB_CTYPE_LOWER,
|
||||
GRUB_CTYPE_SPACE, GRUB_CTYPE_ALPHA, GRUB_CTYPE_DIGIT,
|
||||
GRUB_CTYPE_PRINT, GRUB_CTYPE_UPPER, GRUB_CTYPE_BLANK,
|
||||
GRUB_CTYPE_GRAPH, GRUB_CTYPE_PUNCT, GRUB_CTYPE_XDIGIT,
|
||||
GRUB_CTYPE_MAX} wctype_t;
|
||||
|
||||
static inline wctype_t
|
||||
wctype (const char *name)
|
||||
{
|
||||
wctype_t i;
|
||||
static const char names[][10] = { "",
|
||||
"alnum", "cntrl", "lower",
|
||||
"space", "alpha", "digit",
|
||||
"print", "upper", "blank",
|
||||
"graph", "punct", "xdigit" };
|
||||
for (i = GRUB_CTYPE_INVALID; i < GRUB_CTYPE_MAX; i++)
|
||||
if (grub_strcmp (names[i], name) == 0)
|
||||
return i;
|
||||
return GRUB_CTYPE_INVALID;
|
||||
}
|
||||
|
||||
/* FIXME: take into account international lowercase characters. */
|
||||
static inline int
|
||||
iswlower (wint_t wc)
|
||||
{
|
||||
return grub_islower (wc);
|
||||
}
|
||||
|
||||
static inline wint_t
|
||||
towlower (wint_t c)
|
||||
{
|
||||
return grub_tolower (c);
|
||||
}
|
||||
|
||||
static inline wint_t
|
||||
towupper (wint_t c)
|
||||
{
|
||||
return grub_toupper (c);
|
||||
}
|
||||
|
||||
static inline int
|
||||
iswalnum (wint_t c)
|
||||
{
|
||||
return grub_isalpha (c) || grub_isdigit (c);
|
||||
}
|
||||
|
||||
static inline int
|
||||
iswctype (wint_t wc, wctype_t desc)
|
||||
{
|
||||
switch (desc)
|
||||
{
|
||||
case GRUB_CTYPE_ALNUM:
|
||||
return iswalnum (wc);
|
||||
case GRUB_CTYPE_CNTRL:
|
||||
return grub_iscntrl (wc);
|
||||
case GRUB_CTYPE_LOWER:
|
||||
return iswlower (wc);
|
||||
case GRUB_CTYPE_SPACE:
|
||||
return grub_isspace (wc);
|
||||
case GRUB_CTYPE_ALPHA:
|
||||
return grub_isalpha (wc);
|
||||
case GRUB_CTYPE_DIGIT:
|
||||
return grub_isdigit (wc);
|
||||
case GRUB_CTYPE_PRINT:
|
||||
return grub_isprint (wc);
|
||||
case GRUB_CTYPE_UPPER:
|
||||
return grub_isupper (wc);
|
||||
case GRUB_CTYPE_BLANK:
|
||||
return wc == ' ' || wc == '\t';
|
||||
case GRUB_CTYPE_GRAPH:
|
||||
return grub_isgraph (wc);
|
||||
case GRUB_CTYPE_PUNCT:
|
||||
return grub_isprint (wc) && !grub_isspace (wc) && !iswalnum (wc);
|
||||
case GRUB_CTYPE_XDIGIT:
|
||||
return grub_isxdigit (wc);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -60,6 +60,51 @@
|
|||
#include "widthspec.h"
|
||||
#endif
|
||||
|
||||
int
|
||||
grub_utf8_process (grub_uint8_t c, grub_uint32_t *code, int *count)
|
||||
{
|
||||
if (*count)
|
||||
{
|
||||
if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT)
|
||||
{
|
||||
/* invalid */
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
*code <<= 6;
|
||||
*code |= (c & GRUB_UINT8_6_TRAILINGBITS);
|
||||
(*count)--;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if ((c & GRUB_UINT8_1_LEADINGBIT) == 0)
|
||||
{
|
||||
*code = c;
|
||||
return 1;
|
||||
}
|
||||
if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS)
|
||||
{
|
||||
*count = 1;
|
||||
*code = c & GRUB_UINT8_5_TRAILINGBITS;
|
||||
return 1;
|
||||
}
|
||||
if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS)
|
||||
{
|
||||
*count = 2;
|
||||
*code = c & GRUB_UINT8_4_TRAILINGBITS;
|
||||
return 1;
|
||||
}
|
||||
if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS)
|
||||
{
|
||||
*count = 3;
|
||||
*code = c & GRUB_UINT8_3_TRAILINGBITS;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
grub_ssize_t
|
||||
grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
|
||||
const grub_uint8_t *src, grub_size_t srcsize,
|
||||
|
@ -74,64 +119,27 @@ grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
|
|||
|
||||
while (srcsize && destsize)
|
||||
{
|
||||
grub_uint32_t c = *src++;
|
||||
grub_uint8_t c = *src++;
|
||||
if (srcsize != (grub_size_t)-1)
|
||||
srcsize--;
|
||||
if (count)
|
||||
if (!grub_utf8_process (c, &code, &count))
|
||||
return -1;
|
||||
if (count != 0)
|
||||
continue;
|
||||
if (code == 0)
|
||||
break;
|
||||
if (destsize < 2 && code >= GRUB_UCS2_LIMIT)
|
||||
break;
|
||||
if (code >= GRUB_UCS2_LIMIT)
|
||||
{
|
||||
if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT)
|
||||
{
|
||||
/* invalid */
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
code <<= 6;
|
||||
code |= (c & GRUB_UINT8_6_TRAILINGBITS);
|
||||
count--;
|
||||
}
|
||||
*p++ = GRUB_UTF16_UPPER_SURROGATE (code);
|
||||
*p++ = GRUB_UTF16_LOWER_SURROGATE (code);
|
||||
destsize -= 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (c == 0)
|
||||
break;
|
||||
|
||||
if ((c & GRUB_UINT8_1_LEADINGBIT) == 0)
|
||||
code = c;
|
||||
else if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS)
|
||||
{
|
||||
count = 1;
|
||||
code = c & GRUB_UINT8_5_TRAILINGBITS;
|
||||
}
|
||||
else if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS)
|
||||
{
|
||||
count = 2;
|
||||
code = c & GRUB_UINT8_4_TRAILINGBITS;
|
||||
}
|
||||
else if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS)
|
||||
{
|
||||
count = 3;
|
||||
code = c & GRUB_UINT8_3_TRAILINGBITS;
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (count == 0)
|
||||
{
|
||||
if (destsize < 2 && code >= GRUB_UCS2_LIMIT)
|
||||
break;
|
||||
if (code >= GRUB_UCS2_LIMIT)
|
||||
{
|
||||
*p++ = GRUB_UTF16_UPPER_SURROGATE (code);
|
||||
*p++ = GRUB_UTF16_LOWER_SURROGATE (code);
|
||||
destsize -= 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
*p++ = code;
|
||||
destsize--;
|
||||
}
|
||||
*p++ = code;
|
||||
destsize--;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -140,6 +148,53 @@ grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
|
|||
return p - dest;
|
||||
}
|
||||
|
||||
/* Returns -2 if not enough space, -1 on invalid character. */
|
||||
grub_ssize_t
|
||||
grub_encode_utf8_character (grub_uint8_t *dest, grub_uint8_t *destend,
|
||||
grub_uint32_t code)
|
||||
{
|
||||
if (dest >= destend)
|
||||
return -2;
|
||||
if (code <= 0x007F)
|
||||
{
|
||||
*dest++ = code;
|
||||
return 1;
|
||||
}
|
||||
if (code <= 0x07FF)
|
||||
{
|
||||
if (dest + 1 >= destend)
|
||||
return -2;
|
||||
*dest++ = (code >> 6) | 0xC0;
|
||||
*dest++ = (code & 0x3F) | 0x80;
|
||||
return 2;
|
||||
}
|
||||
if ((code >= 0xDC00 && code <= 0xDFFF)
|
||||
|| (code >= 0xD800 && code <= 0xDBFF))
|
||||
{
|
||||
/* No surrogates in UCS-4... */
|
||||
return -1;
|
||||
}
|
||||
if (code < 0x10000)
|
||||
{
|
||||
if (dest + 2 >= destend)
|
||||
return -2;
|
||||
*dest++ = (code >> 12) | 0xE0;
|
||||
*dest++ = ((code >> 6) & 0x3F) | 0x80;
|
||||
*dest++ = (code & 0x3F) | 0x80;
|
||||
return 3;
|
||||
}
|
||||
{
|
||||
if (dest + 3 >= destend)
|
||||
return -2;
|
||||
*dest++ = (code >> 18) | 0xF0;
|
||||
*dest++ = ((code >> 12) & 0x3F) | 0x80;
|
||||
*dest++ = ((code >> 6) & 0x3F) | 0x80;
|
||||
*dest++ = (code & 0x3F) | 0x80;
|
||||
return 4;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Convert UCS-4 to UTF-8. */
|
||||
void
|
||||
grub_ucs4_to_utf8 (grub_uint32_t *src, grub_size_t size,
|
||||
|
@ -151,39 +206,17 @@ grub_ucs4_to_utf8 (grub_uint32_t *src, grub_size_t size,
|
|||
while (size-- && dest < destend)
|
||||
{
|
||||
grub_uint32_t code = *src++;
|
||||
|
||||
if (code <= 0x007F)
|
||||
*dest++ = code;
|
||||
else if (code <= 0x07FF)
|
||||
grub_ssize_t s;
|
||||
s = grub_encode_utf8_character (dest, destend,
|
||||
code);
|
||||
if (s == -2)
|
||||
break;
|
||||
if (s == -1)
|
||||
{
|
||||
if (dest + 1 >= destend)
|
||||
break;
|
||||
*dest++ = (code >> 6) | 0xC0;
|
||||
*dest++ = (code & 0x3F) | 0x80;
|
||||
}
|
||||
else if ((code >= 0xDC00 && code <= 0xDFFF)
|
||||
|| (code >= 0xD800 && code <= 0xDBFF))
|
||||
{
|
||||
/* No surrogates in UCS-4... */
|
||||
*dest++ = '?';
|
||||
continue;
|
||||
}
|
||||
else if (code < 0x10000)
|
||||
{
|
||||
if (dest + 2 >= destend)
|
||||
break;
|
||||
*dest++ = (code >> 12) | 0xE0;
|
||||
*dest++ = ((code >> 6) & 0x3F) | 0x80;
|
||||
*dest++ = (code & 0x3F) | 0x80;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (dest + 3 >= destend)
|
||||
break;
|
||||
*dest++ = (code >> 18) | 0xF0;
|
||||
*dest++ = ((code >> 12) & 0x3F) | 0x80;
|
||||
*dest++ = ((code >> 6) & 0x3F) | 0x80;
|
||||
*dest++ = (code & 0x3F) | 0x80;
|
||||
}
|
||||
dest += s;
|
||||
}
|
||||
*dest = 0;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue