Enable UTF8 in gnulib regexp.
* config.h.in (RE_ENABLE_I18N) [!GRUB_UTIL]: New define. * grub-core/lib/posix_wrap/ctype.h (islower): Use grub_islower. (isupper): Use grub_isupper. (isascii): New inline function. * grub-core/lib/posix_wrap/wchar.h: Replace dummy with real contents. * grub-core/lib/posix_wrap/wctype.h: Likewise. * grub-core/normal/charset.c (grub_utf8_process): New function. (grub_utf8_to_utf16): Use grub_utf8_process. (grub_encode_utf8_character): New function. (grub_ucs4_to_utf8): Use grub_encode_utf8_character. * include/grub/charset.h (grub_utf8_process): New declaration. (grub_encode_utf8_character): Likewise. * include/grub/misc.h (grub_islower): New inline function. (grub_isupper): Likewise. (grub_strchrsub): Moved down to fix the definitions.
This commit is contained in:
parent
0af2346fdb
commit
c5fc563aff
8 changed files with 380 additions and 106 deletions
20
ChangeLog
20
ChangeLog
|
@ -1,3 +1,23 @@
|
|||
2011-12-13 Vladimir Serbinenko <phcoder@gmail.com>
|
||||
|
||||
Enable UTF8 in gnulib regexp.
|
||||
|
||||
* config.h.in (RE_ENABLE_I18N) [!GRUB_UTIL]: New define.
|
||||
* grub-core/lib/posix_wrap/ctype.h (islower): Use grub_islower.
|
||||
(isupper): Use grub_isupper.
|
||||
(isascii): New inline function.
|
||||
* grub-core/lib/posix_wrap/wchar.h: Replace dummy with real contents.
|
||||
* grub-core/lib/posix_wrap/wctype.h: Likewise.
|
||||
* grub-core/normal/charset.c (grub_utf8_process): New function.
|
||||
(grub_utf8_to_utf16): Use grub_utf8_process.
|
||||
(grub_encode_utf8_character): New function.
|
||||
(grub_ucs4_to_utf8): Use grub_encode_utf8_character.
|
||||
* include/grub/charset.h (grub_utf8_process): New declaration.
|
||||
(grub_encode_utf8_character): Likewise.
|
||||
* include/grub/misc.h (grub_islower): New inline function.
|
||||
(grub_isupper): Likewise.
|
||||
(grub_strchrsub): Moved down to fix the definitions.
|
||||
|
||||
2011-12-13 Vladimir Serbinenko <phcoder@gmail.com>
|
||||
|
||||
* grub-core/bus/usb/ohci.c (grub_ohci_check_transfer): Add an unsigned
|
||||
|
|
|
@ -39,6 +39,8 @@
|
|||
/* Define to 1 to enable disk cache statistics. */
|
||||
#define DISK_CACHE_STATS @DISK_CACHE_STATS@
|
||||
|
||||
#define RE_ENABLE_I18N 1
|
||||
|
||||
#if defined(__i386__)
|
||||
#define NESTED_FUNC_ATTR __attribute__ ((__regparm__ (1)))
|
||||
#else
|
||||
|
|
|
@ -42,13 +42,19 @@ isdigit (int c)
|
|||
static inline int
|
||||
islower (int c)
|
||||
{
|
||||
return (c >= 'a' && c <= 'z');
|
||||
return grub_islower (c);
|
||||
}
|
||||
|
||||
static inline int
|
||||
isascii (int c)
|
||||
{
|
||||
return !(c & ~0x7f);
|
||||
}
|
||||
|
||||
static inline int
|
||||
isupper (int c)
|
||||
{
|
||||
return (c >= 'A' && c <= 'Z');
|
||||
return grub_isupper (c);
|
||||
}
|
||||
|
||||
static inline int
|
||||
|
|
|
@ -19,7 +19,92 @@
|
|||
#ifndef GRUB_POSIX_WCHAR_H
|
||||
#define GRUB_POSIX_WCHAR_H 1
|
||||
|
||||
#include <grub/charset.h>
|
||||
|
||||
/* UCS-4. */
|
||||
typedef grub_uint32_t wchar_t;
|
||||
typedef grub_int32_t wint_t;
|
||||
enum
|
||||
{
|
||||
WEOF = -1
|
||||
};
|
||||
|
||||
#define MB_LEN_MAX 4
|
||||
|
||||
/* UCS-4. */
|
||||
typedef grub_int32_t wchar_t;
|
||||
|
||||
typedef struct mbstate {
|
||||
grub_uint32_t code;
|
||||
int count;
|
||||
} mbstate_t;
|
||||
|
||||
static inline size_t
|
||||
mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
|
||||
{
|
||||
const char *ptr;
|
||||
if (!s)
|
||||
{
|
||||
pwc = 0;
|
||||
s = "";
|
||||
n = 1;
|
||||
}
|
||||
|
||||
for (ptr = s; ptr < s + n; ptr++)
|
||||
{
|
||||
if (!grub_utf8_process (*ptr, &ps->code, &ps->count))
|
||||
return -1;
|
||||
if (ps->count)
|
||||
continue;
|
||||
if (pwc)
|
||||
*pwc = ps->code;
|
||||
if (ps->code == 0)
|
||||
return 0;
|
||||
return ptr - s + 1;
|
||||
}
|
||||
return -2;
|
||||
}
|
||||
|
||||
static inline int
|
||||
mbsinit(const mbstate_t *ps)
|
||||
{
|
||||
return ps->count == 0;
|
||||
}
|
||||
|
||||
static inline size_t
|
||||
wcrtomb (char *s, wchar_t wc, mbstate_t *ps __attribute__ ((unused)))
|
||||
{
|
||||
if (s == 0)
|
||||
return 1;
|
||||
return grub_encode_utf8_character ((grub_uint8_t *) s,
|
||||
(grub_uint8_t *) s + MB_LEN_MAX,
|
||||
wc);
|
||||
}
|
||||
|
||||
static inline wint_t btowc (int c)
|
||||
{
|
||||
if (c & ~0x7f)
|
||||
return WEOF;
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
static inline int
|
||||
wcscoll (const wchar_t *s1, const wchar_t *s2)
|
||||
{
|
||||
while (*s1 && *s2)
|
||||
{
|
||||
if (*s1 != *s2)
|
||||
break;
|
||||
|
||||
s1++;
|
||||
s2++;
|
||||
}
|
||||
|
||||
if (*s1 < *s2)
|
||||
return -1;
|
||||
if (*s1 > *s2)
|
||||
return +1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* GRUB -- GRand Unified Bootloader
|
||||
* Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
|
||||
*
|
||||
* GRUB is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* GRUB is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GRUB. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef GRUB_POSIX_WCTYPE_H
|
||||
#define GRUB_POSIX_WCTYPE_H 1
|
||||
|
||||
#include <grub/misc.h>
|
||||
#include <wchar.h>
|
||||
|
||||
typedef enum { GRUB_CTYPE_INVALID,
|
||||
GRUB_CTYPE_ALNUM, GRUB_CTYPE_CNTRL, GRUB_CTYPE_LOWER,
|
||||
GRUB_CTYPE_SPACE, GRUB_CTYPE_ALPHA, GRUB_CTYPE_DIGIT,
|
||||
GRUB_CTYPE_PRINT, GRUB_CTYPE_UPPER, GRUB_CTYPE_BLANK,
|
||||
GRUB_CTYPE_GRAPH, GRUB_CTYPE_PUNCT, GRUB_CTYPE_XDIGIT,
|
||||
GRUB_CTYPE_MAX} wctype_t;
|
||||
|
||||
static inline wctype_t
|
||||
wctype (const char *name)
|
||||
{
|
||||
wctype_t i;
|
||||
static const char names[][10] = { "",
|
||||
"alnum", "cntrl", "lower",
|
||||
"space", "alpha", "digit",
|
||||
"print", "upper", "blank",
|
||||
"graph", "punct", "xdigit" };
|
||||
for (i = GRUB_CTYPE_INVALID; i < GRUB_CTYPE_MAX; i++)
|
||||
if (grub_strcmp (names[i], name) == 0)
|
||||
return i;
|
||||
return GRUB_CTYPE_INVALID;
|
||||
}
|
||||
|
||||
/* FIXME: take into account international lowercase characters. */
|
||||
static inline int
|
||||
iswlower (wint_t wc)
|
||||
{
|
||||
return grub_islower (wc);
|
||||
}
|
||||
|
||||
static inline wint_t
|
||||
towlower (wint_t c)
|
||||
{
|
||||
return grub_tolower (c);
|
||||
}
|
||||
|
||||
static inline wint_t
|
||||
towupper (wint_t c)
|
||||
{
|
||||
return grub_toupper (c);
|
||||
}
|
||||
|
||||
static inline int
|
||||
iswalnum (wint_t c)
|
||||
{
|
||||
return grub_isalpha (c) || grub_isdigit (c);
|
||||
}
|
||||
|
||||
static inline int
|
||||
iswctype (wint_t wc, wctype_t desc)
|
||||
{
|
||||
switch (desc)
|
||||
{
|
||||
case GRUB_CTYPE_ALNUM:
|
||||
return iswalnum (wc);
|
||||
case GRUB_CTYPE_CNTRL:
|
||||
return grub_iscntrl (wc);
|
||||
case GRUB_CTYPE_LOWER:
|
||||
return iswlower (wc);
|
||||
case GRUB_CTYPE_SPACE:
|
||||
return grub_isspace (wc);
|
||||
case GRUB_CTYPE_ALPHA:
|
||||
return grub_isalpha (wc);
|
||||
case GRUB_CTYPE_DIGIT:
|
||||
return grub_isdigit (wc);
|
||||
case GRUB_CTYPE_PRINT:
|
||||
return grub_isprint (wc);
|
||||
case GRUB_CTYPE_UPPER:
|
||||
return grub_isupper (wc);
|
||||
case GRUB_CTYPE_BLANK:
|
||||
return wc == ' ' || wc == '\t';
|
||||
case GRUB_CTYPE_GRAPH:
|
||||
return grub_isgraph (wc);
|
||||
case GRUB_CTYPE_PUNCT:
|
||||
return grub_isprint (wc) && !grub_isspace (wc) && !iswalnum (wc);
|
||||
case GRUB_CTYPE_XDIGIT:
|
||||
return grub_isxdigit (wc);
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -60,6 +60,51 @@
|
|||
#include "widthspec.h"
|
||||
#endif
|
||||
|
||||
int
|
||||
grub_utf8_process (grub_uint8_t c, grub_uint32_t *code, int *count)
|
||||
{
|
||||
if (*count)
|
||||
{
|
||||
if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT)
|
||||
{
|
||||
/* invalid */
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
*code <<= 6;
|
||||
*code |= (c & GRUB_UINT8_6_TRAILINGBITS);
|
||||
(*count)--;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if ((c & GRUB_UINT8_1_LEADINGBIT) == 0)
|
||||
{
|
||||
*code = c;
|
||||
return 1;
|
||||
}
|
||||
if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS)
|
||||
{
|
||||
*count = 1;
|
||||
*code = c & GRUB_UINT8_5_TRAILINGBITS;
|
||||
return 1;
|
||||
}
|
||||
if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS)
|
||||
{
|
||||
*count = 2;
|
||||
*code = c & GRUB_UINT8_4_TRAILINGBITS;
|
||||
return 1;
|
||||
}
|
||||
if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS)
|
||||
{
|
||||
*count = 3;
|
||||
*code = c & GRUB_UINT8_3_TRAILINGBITS;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
grub_ssize_t
|
||||
grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
|
||||
const grub_uint8_t *src, grub_size_t srcsize,
|
||||
|
@ -74,51 +119,15 @@ grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
|
|||
|
||||
while (srcsize && destsize)
|
||||
{
|
||||
grub_uint32_t c = *src++;
|
||||
grub_uint8_t c = *src++;
|
||||
if (srcsize != (grub_size_t)-1)
|
||||
srcsize--;
|
||||
if (count)
|
||||
{
|
||||
if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT)
|
||||
{
|
||||
/* invalid */
|
||||
if (!grub_utf8_process (c, &code, &count))
|
||||
return -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
code <<= 6;
|
||||
code |= (c & GRUB_UINT8_6_TRAILINGBITS);
|
||||
count--;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (c == 0)
|
||||
if (count != 0)
|
||||
continue;
|
||||
if (code == 0)
|
||||
break;
|
||||
|
||||
if ((c & GRUB_UINT8_1_LEADINGBIT) == 0)
|
||||
code = c;
|
||||
else if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS)
|
||||
{
|
||||
count = 1;
|
||||
code = c & GRUB_UINT8_5_TRAILINGBITS;
|
||||
}
|
||||
else if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS)
|
||||
{
|
||||
count = 2;
|
||||
code = c & GRUB_UINT8_4_TRAILINGBITS;
|
||||
}
|
||||
else if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS)
|
||||
{
|
||||
count = 3;
|
||||
code = c & GRUB_UINT8_3_TRAILINGBITS;
|
||||
}
|
||||
else
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (count == 0)
|
||||
{
|
||||
if (destsize < 2 && code >= GRUB_UCS2_LIMIT)
|
||||
break;
|
||||
if (code >= GRUB_UCS2_LIMIT)
|
||||
|
@ -133,13 +142,59 @@ grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
|
|||
destsize--;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (srcend)
|
||||
*srcend = src;
|
||||
return p - dest;
|
||||
}
|
||||
|
||||
/* Returns -2 if not enough space, -1 on invalid character. */
|
||||
grub_ssize_t
|
||||
grub_encode_utf8_character (grub_uint8_t *dest, grub_uint8_t *destend,
|
||||
grub_uint32_t code)
|
||||
{
|
||||
if (dest >= destend)
|
||||
return -2;
|
||||
if (code <= 0x007F)
|
||||
{
|
||||
*dest++ = code;
|
||||
return 1;
|
||||
}
|
||||
if (code <= 0x07FF)
|
||||
{
|
||||
if (dest + 1 >= destend)
|
||||
return -2;
|
||||
*dest++ = (code >> 6) | 0xC0;
|
||||
*dest++ = (code & 0x3F) | 0x80;
|
||||
return 2;
|
||||
}
|
||||
if ((code >= 0xDC00 && code <= 0xDFFF)
|
||||
|| (code >= 0xD800 && code <= 0xDBFF))
|
||||
{
|
||||
/* No surrogates in UCS-4... */
|
||||
return -1;
|
||||
}
|
||||
if (code < 0x10000)
|
||||
{
|
||||
if (dest + 2 >= destend)
|
||||
return -2;
|
||||
*dest++ = (code >> 12) | 0xE0;
|
||||
*dest++ = ((code >> 6) & 0x3F) | 0x80;
|
||||
*dest++ = (code & 0x3F) | 0x80;
|
||||
return 3;
|
||||
}
|
||||
{
|
||||
if (dest + 3 >= destend)
|
||||
return -2;
|
||||
*dest++ = (code >> 18) | 0xF0;
|
||||
*dest++ = ((code >> 12) & 0x3F) | 0x80;
|
||||
*dest++ = ((code >> 6) & 0x3F) | 0x80;
|
||||
*dest++ = (code & 0x3F) | 0x80;
|
||||
return 4;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/* Convert UCS-4 to UTF-8. */
|
||||
void
|
||||
grub_ucs4_to_utf8 (grub_uint32_t *src, grub_size_t size,
|
||||
|
@ -151,39 +206,17 @@ grub_ucs4_to_utf8 (grub_uint32_t *src, grub_size_t size,
|
|||
while (size-- && dest < destend)
|
||||
{
|
||||
grub_uint32_t code = *src++;
|
||||
|
||||
if (code <= 0x007F)
|
||||
*dest++ = code;
|
||||
else if (code <= 0x07FF)
|
||||
{
|
||||
if (dest + 1 >= destend)
|
||||
grub_ssize_t s;
|
||||
s = grub_encode_utf8_character (dest, destend,
|
||||
code);
|
||||
if (s == -2)
|
||||
break;
|
||||
*dest++ = (code >> 6) | 0xC0;
|
||||
*dest++ = (code & 0x3F) | 0x80;
|
||||
}
|
||||
else if ((code >= 0xDC00 && code <= 0xDFFF)
|
||||
|| (code >= 0xD800 && code <= 0xDBFF))
|
||||
if (s == -1)
|
||||
{
|
||||
/* No surrogates in UCS-4... */
|
||||
*dest++ = '?';
|
||||
continue;
|
||||
}
|
||||
else if (code < 0x10000)
|
||||
{
|
||||
if (dest + 2 >= destend)
|
||||
break;
|
||||
*dest++ = (code >> 12) | 0xE0;
|
||||
*dest++ = ((code >> 6) & 0x3F) | 0x80;
|
||||
*dest++ = (code & 0x3F) | 0x80;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (dest + 3 >= destend)
|
||||
break;
|
||||
*dest++ = (code >> 18) | 0xF0;
|
||||
*dest++ = ((code >> 12) & 0x3F) | 0x80;
|
||||
*dest++ = ((code >> 6) & 0x3F) | 0x80;
|
||||
*dest++ = (code & 0x3F) | 0x80;
|
||||
}
|
||||
dest += s;
|
||||
}
|
||||
*dest = 0;
|
||||
}
|
||||
|
|
|
@ -126,11 +126,22 @@ grub_is_valid_utf8 (const grub_uint8_t *src, grub_size_t srcsize);
|
|||
|
||||
int grub_utf8_to_ucs4_alloc (const char *msg, grub_uint32_t **unicode_msg,
|
||||
grub_uint32_t **last_position);
|
||||
|
||||
/* Process one character from UTF8 sequence.
|
||||
At beginning set *code = 0, *count = 0. Returns 0 on failure and
|
||||
1 on success. *count holds the number of trailing bytes. */
|
||||
int
|
||||
grub_utf8_process (grub_uint8_t c, grub_uint32_t *code, int *count);
|
||||
|
||||
void
|
||||
grub_ucs4_to_utf8 (grub_uint32_t *src, grub_size_t size,
|
||||
grub_uint8_t *dest, grub_size_t destsize);
|
||||
grub_size_t grub_utf8_to_ucs4 (grub_uint32_t *dest, grub_size_t destsize,
|
||||
const grub_uint8_t *src, grub_size_t srcsize,
|
||||
const grub_uint8_t **srcend);
|
||||
/* Returns -2 if not enough space, -1 on invalid character. */
|
||||
grub_ssize_t
|
||||
grub_encode_utf8_character (grub_uint8_t *dest, grub_uint8_t *destend,
|
||||
grub_uint32_t code);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -175,6 +175,18 @@ grub_isalpha (int c)
|
|||
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
|
||||
}
|
||||
|
||||
static inline int
|
||||
grub_islower (int c)
|
||||
{
|
||||
return (c >= 'a' && c <= 'z');
|
||||
}
|
||||
|
||||
static inline int
|
||||
grub_isupper (int c)
|
||||
{
|
||||
return (c >= 'A' && c <= 'Z');
|
||||
}
|
||||
|
||||
static inline int
|
||||
grub_isgraph (int c)
|
||||
{
|
||||
|
@ -250,27 +262,6 @@ grub_strncasecmp (const char *s1, const char *s2, grub_size_t n)
|
|||
return (int) grub_tolower (*s1) - (int) grub_tolower (*s2);
|
||||
}
|
||||
|
||||
/* Replace all `ch' characters of `input' with `with' and copy the
|
||||
result into `output'; return EOS address of `output'. */
|
||||
static inline char *
|
||||
grub_strchrsub (char *output, const char *input, char ch, const char *with)
|
||||
{
|
||||
grub_size_t grub_strlen (const char *s);
|
||||
while (*input)
|
||||
{
|
||||
if (*input == ch)
|
||||
{
|
||||
grub_strcpy (output, with);
|
||||
output += grub_strlen (with);
|
||||
input++;
|
||||
continue;
|
||||
}
|
||||
*output++ = *input++;
|
||||
}
|
||||
*output = '\0';
|
||||
return output;
|
||||
}
|
||||
|
||||
unsigned long EXPORT_FUNC(grub_strtoul) (const char *str, char **end, int base);
|
||||
unsigned long long EXPORT_FUNC(grub_strtoull) (const char *str, char **end, int base);
|
||||
|
||||
|
@ -317,6 +308,26 @@ grub_size_t EXPORT_FUNC(grub_strlen) (const char *s) __attribute__ ((warn_unused
|
|||
int EXPORT_FUNC(grub_printf) (const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
|
||||
int EXPORT_FUNC(grub_printf_) (const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
|
||||
|
||||
/* Replace all `ch' characters of `input' with `with' and copy the
|
||||
result into `output'; return EOS address of `output'. */
|
||||
static inline char *
|
||||
grub_strchrsub (char *output, const char *input, char ch, const char *with)
|
||||
{
|
||||
while (*input)
|
||||
{
|
||||
if (*input == ch)
|
||||
{
|
||||
grub_strcpy (output, with);
|
||||
output += grub_strlen (with);
|
||||
input++;
|
||||
continue;
|
||||
}
|
||||
*output++ = *input++;
|
||||
}
|
||||
*output = '\0';
|
||||
return output;
|
||||
}
|
||||
|
||||
extern void (*EXPORT_VAR (grub_xputs)) (const char *str);
|
||||
|
||||
static inline int
|
||||
|
|
Loading…
Reference in a new issue