Enable UTF8 in gnulib regexp.

* config.h.in (RE_ENABLE_I18N) [!GRUB_UTIL]: New define.
	* grub-core/lib/posix_wrap/ctype.h (islower): Use grub_islower.
	(isupper): Use grub_isupper.
	(isascii): New inline function.
	* grub-core/lib/posix_wrap/wchar.h: Replace dummy with real contents.
	* grub-core/lib/posix_wrap/wctype.h: Likewise.
	* grub-core/normal/charset.c (grub_utf8_process): New function.
	(grub_utf8_to_utf16): Use grub_utf8_process.
	(grub_encode_utf8_character): New function.
	(grub_ucs4_to_utf8): Use grub_encode_utf8_character.
	* include/grub/charset.h (grub_utf8_process): New declaration.
	(grub_encode_utf8_character): Likewise.
	* include/grub/misc.h (grub_islower): New inline function.
	(grub_isupper): Likewise.
	(grub_strchrsub): Moved down to fix the definitions.
This commit is contained in:
Vladimir 'phcoder' Serbinenko 2011-12-13 00:50:49 +01:00
parent 0af2346fdb
commit c5fc563aff
8 changed files with 380 additions and 106 deletions

View file

@ -1,3 +1,23 @@
2011-12-13 Vladimir Serbinenko <phcoder@gmail.com>
Enable UTF8 in gnulib regexp.
* config.h.in (RE_ENABLE_I18N) [!GRUB_UTIL]: New define.
* grub-core/lib/posix_wrap/ctype.h (islower): Use grub_islower.
(isupper): Use grub_isupper.
(isascii): New inline function.
* grub-core/lib/posix_wrap/wchar.h: Replace dummy with real contents.
* grub-core/lib/posix_wrap/wctype.h: Likewise.
* grub-core/normal/charset.c (grub_utf8_process): New function.
(grub_utf8_to_utf16): Use grub_utf8_process.
(grub_encode_utf8_character): New function.
(grub_ucs4_to_utf8): Use grub_encode_utf8_character.
* include/grub/charset.h (grub_utf8_process): New declaration.
(grub_encode_utf8_character): Likewise.
* include/grub/misc.h (grub_islower): New inline function.
(grub_isupper): Likewise.
(grub_strchrsub): Moved down to fix the definitions.
2011-12-13 Vladimir Serbinenko <phcoder@gmail.com> 2011-12-13 Vladimir Serbinenko <phcoder@gmail.com>
* grub-core/bus/usb/ohci.c (grub_ohci_check_transfer): Add an unsigned * grub-core/bus/usb/ohci.c (grub_ohci_check_transfer): Add an unsigned

View file

@ -39,6 +39,8 @@
/* Define to 1 to enable disk cache statistics. */ /* Define to 1 to enable disk cache statistics. */
#define DISK_CACHE_STATS @DISK_CACHE_STATS@ #define DISK_CACHE_STATS @DISK_CACHE_STATS@
#define RE_ENABLE_I18N 1
#if defined(__i386__) #if defined(__i386__)
#define NESTED_FUNC_ATTR __attribute__ ((__regparm__ (1))) #define NESTED_FUNC_ATTR __attribute__ ((__regparm__ (1)))
#else #else

View file

@ -42,13 +42,19 @@ isdigit (int c)
static inline int static inline int
islower (int c) islower (int c)
{ {
return (c >= 'a' && c <= 'z'); return grub_islower (c);
}
static inline int
isascii (int c)
{
return !(c & ~0x7f);
} }
static inline int static inline int
isupper (int c) isupper (int c)
{ {
return (c >= 'A' && c <= 'Z'); return grub_isupper (c);
} }
static inline int static inline int

View file

@ -19,7 +19,92 @@
#ifndef GRUB_POSIX_WCHAR_H #ifndef GRUB_POSIX_WCHAR_H
#define GRUB_POSIX_WCHAR_H 1 #define GRUB_POSIX_WCHAR_H 1
#include <grub/charset.h>
/* UCS-4. */ /* UCS-4. */
typedef grub_uint32_t wchar_t; typedef grub_int32_t wint_t;
enum
{
WEOF = -1
};
#define MB_LEN_MAX 4
/* UCS-4. */
typedef grub_int32_t wchar_t;
typedef struct mbstate {
grub_uint32_t code;
int count;
} mbstate_t;
static inline size_t
mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
{
const char *ptr;
if (!s)
{
pwc = 0;
s = "";
n = 1;
}
for (ptr = s; ptr < s + n; ptr++)
{
if (!grub_utf8_process (*ptr, &ps->code, &ps->count))
return -1;
if (ps->count)
continue;
if (pwc)
*pwc = ps->code;
if (ps->code == 0)
return 0;
return ptr - s + 1;
}
return -2;
}
static inline int
mbsinit(const mbstate_t *ps)
{
return ps->count == 0;
}
static inline size_t
wcrtomb (char *s, wchar_t wc, mbstate_t *ps __attribute__ ((unused)))
{
if (s == 0)
return 1;
return grub_encode_utf8_character ((grub_uint8_t *) s,
(grub_uint8_t *) s + MB_LEN_MAX,
wc);
}
static inline wint_t btowc (int c)
{
if (c & ~0x7f)
return WEOF;
return c;
}
static inline int
wcscoll (const wchar_t *s1, const wchar_t *s2)
{
while (*s1 && *s2)
{
if (*s1 != *s2)
break;
s1++;
s2++;
}
if (*s1 < *s2)
return -1;
if (*s1 > *s2)
return +1;
return 0;
}
#endif #endif

View file

@ -0,0 +1,106 @@
/*
* GRUB -- GRand Unified Bootloader
* Copyright (C) 2009, 2010, 2011 Free Software Foundation, Inc.
*
* GRUB is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GRUB is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GRUB. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef GRUB_POSIX_WCTYPE_H
#define GRUB_POSIX_WCTYPE_H 1
#include <grub/misc.h>
#include <wchar.h>
typedef enum { GRUB_CTYPE_INVALID,
GRUB_CTYPE_ALNUM, GRUB_CTYPE_CNTRL, GRUB_CTYPE_LOWER,
GRUB_CTYPE_SPACE, GRUB_CTYPE_ALPHA, GRUB_CTYPE_DIGIT,
GRUB_CTYPE_PRINT, GRUB_CTYPE_UPPER, GRUB_CTYPE_BLANK,
GRUB_CTYPE_GRAPH, GRUB_CTYPE_PUNCT, GRUB_CTYPE_XDIGIT,
GRUB_CTYPE_MAX} wctype_t;
static inline wctype_t
wctype (const char *name)
{
wctype_t i;
static const char names[][10] = { "",
"alnum", "cntrl", "lower",
"space", "alpha", "digit",
"print", "upper", "blank",
"graph", "punct", "xdigit" };
for (i = GRUB_CTYPE_INVALID; i < GRUB_CTYPE_MAX; i++)
if (grub_strcmp (names[i], name) == 0)
return i;
return GRUB_CTYPE_INVALID;
}
/* FIXME: take into account international lowercase characters. */
static inline int
iswlower (wint_t wc)
{
return grub_islower (wc);
}
static inline wint_t
towlower (wint_t c)
{
return grub_tolower (c);
}
static inline wint_t
towupper (wint_t c)
{
return grub_toupper (c);
}
static inline int
iswalnum (wint_t c)
{
return grub_isalpha (c) || grub_isdigit (c);
}
static inline int
iswctype (wint_t wc, wctype_t desc)
{
switch (desc)
{
case GRUB_CTYPE_ALNUM:
return iswalnum (wc);
case GRUB_CTYPE_CNTRL:
return grub_iscntrl (wc);
case GRUB_CTYPE_LOWER:
return iswlower (wc);
case GRUB_CTYPE_SPACE:
return grub_isspace (wc);
case GRUB_CTYPE_ALPHA:
return grub_isalpha (wc);
case GRUB_CTYPE_DIGIT:
return grub_isdigit (wc);
case GRUB_CTYPE_PRINT:
return grub_isprint (wc);
case GRUB_CTYPE_UPPER:
return grub_isupper (wc);
case GRUB_CTYPE_BLANK:
return wc == ' ' || wc == '\t';
case GRUB_CTYPE_GRAPH:
return grub_isgraph (wc);
case GRUB_CTYPE_PUNCT:
return grub_isprint (wc) && !grub_isspace (wc) && !iswalnum (wc);
case GRUB_CTYPE_XDIGIT:
return grub_isxdigit (wc);
default:
return 0;
}
}
#endif

View file

@ -60,6 +60,51 @@
#include "widthspec.h" #include "widthspec.h"
#endif #endif
int
grub_utf8_process (grub_uint8_t c, grub_uint32_t *code, int *count)
{
if (*count)
{
if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT)
{
/* invalid */
return 0;
}
else
{
*code <<= 6;
*code |= (c & GRUB_UINT8_6_TRAILINGBITS);
(*count)--;
return 1;
}
}
if ((c & GRUB_UINT8_1_LEADINGBIT) == 0)
{
*code = c;
return 1;
}
if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS)
{
*count = 1;
*code = c & GRUB_UINT8_5_TRAILINGBITS;
return 1;
}
if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS)
{
*count = 2;
*code = c & GRUB_UINT8_4_TRAILINGBITS;
return 1;
}
if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS)
{
*count = 3;
*code = c & GRUB_UINT8_3_TRAILINGBITS;
return 1;
}
return 0;
}
grub_ssize_t grub_ssize_t
grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize, grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
const grub_uint8_t *src, grub_size_t srcsize, const grub_uint8_t *src, grub_size_t srcsize,
@ -74,64 +119,27 @@ grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
while (srcsize && destsize) while (srcsize && destsize)
{ {
grub_uint32_t c = *src++; grub_uint8_t c = *src++;
if (srcsize != (grub_size_t)-1) if (srcsize != (grub_size_t)-1)
srcsize--; srcsize--;
if (count) if (!grub_utf8_process (c, &code, &count))
return -1;
if (count != 0)
continue;
if (code == 0)
break;
if (destsize < 2 && code >= GRUB_UCS2_LIMIT)
break;
if (code >= GRUB_UCS2_LIMIT)
{ {
if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT) *p++ = GRUB_UTF16_UPPER_SURROGATE (code);
{ *p++ = GRUB_UTF16_LOWER_SURROGATE (code);
/* invalid */ destsize -= 2;
return -1;
}
else
{
code <<= 6;
code |= (c & GRUB_UINT8_6_TRAILINGBITS);
count--;
}
} }
else else
{ {
if (c == 0) *p++ = code;
break; destsize--;
if ((c & GRUB_UINT8_1_LEADINGBIT) == 0)
code = c;
else if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS)
{
count = 1;
code = c & GRUB_UINT8_5_TRAILINGBITS;
}
else if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS)
{
count = 2;
code = c & GRUB_UINT8_4_TRAILINGBITS;
}
else if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS)
{
count = 3;
code = c & GRUB_UINT8_3_TRAILINGBITS;
}
else
return -1;
}
if (count == 0)
{
if (destsize < 2 && code >= GRUB_UCS2_LIMIT)
break;
if (code >= GRUB_UCS2_LIMIT)
{
*p++ = GRUB_UTF16_UPPER_SURROGATE (code);
*p++ = GRUB_UTF16_LOWER_SURROGATE (code);
destsize -= 2;
}
else
{
*p++ = code;
destsize--;
}
} }
} }
@ -140,6 +148,53 @@ grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
return p - dest; return p - dest;
} }
/* Returns -2 if not enough space, -1 on invalid character. */
grub_ssize_t
grub_encode_utf8_character (grub_uint8_t *dest, grub_uint8_t *destend,
grub_uint32_t code)
{
if (dest >= destend)
return -2;
if (code <= 0x007F)
{
*dest++ = code;
return 1;
}
if (code <= 0x07FF)
{
if (dest + 1 >= destend)
return -2;
*dest++ = (code >> 6) | 0xC0;
*dest++ = (code & 0x3F) | 0x80;
return 2;
}
if ((code >= 0xDC00 && code <= 0xDFFF)
|| (code >= 0xD800 && code <= 0xDBFF))
{
/* No surrogates in UCS-4... */
return -1;
}
if (code < 0x10000)
{
if (dest + 2 >= destend)
return -2;
*dest++ = (code >> 12) | 0xE0;
*dest++ = ((code >> 6) & 0x3F) | 0x80;
*dest++ = (code & 0x3F) | 0x80;
return 3;
}
{
if (dest + 3 >= destend)
return -2;
*dest++ = (code >> 18) | 0xF0;
*dest++ = ((code >> 12) & 0x3F) | 0x80;
*dest++ = ((code >> 6) & 0x3F) | 0x80;
*dest++ = (code & 0x3F) | 0x80;
return 4;
}
}
/* Convert UCS-4 to UTF-8. */ /* Convert UCS-4 to UTF-8. */
void void
grub_ucs4_to_utf8 (grub_uint32_t *src, grub_size_t size, grub_ucs4_to_utf8 (grub_uint32_t *src, grub_size_t size,
@ -151,39 +206,17 @@ grub_ucs4_to_utf8 (grub_uint32_t *src, grub_size_t size,
while (size-- && dest < destend) while (size-- && dest < destend)
{ {
grub_uint32_t code = *src++; grub_uint32_t code = *src++;
grub_ssize_t s;
if (code <= 0x007F) s = grub_encode_utf8_character (dest, destend,
*dest++ = code; code);
else if (code <= 0x07FF) if (s == -2)
break;
if (s == -1)
{ {
if (dest + 1 >= destend)
break;
*dest++ = (code >> 6) | 0xC0;
*dest++ = (code & 0x3F) | 0x80;
}
else if ((code >= 0xDC00 && code <= 0xDFFF)
|| (code >= 0xD800 && code <= 0xDBFF))
{
/* No surrogates in UCS-4... */
*dest++ = '?'; *dest++ = '?';
continue;
} }
else if (code < 0x10000) dest += s;
{
if (dest + 2 >= destend)
break;
*dest++ = (code >> 12) | 0xE0;
*dest++ = ((code >> 6) & 0x3F) | 0x80;
*dest++ = (code & 0x3F) | 0x80;
}
else
{
if (dest + 3 >= destend)
break;
*dest++ = (code >> 18) | 0xF0;
*dest++ = ((code >> 12) & 0x3F) | 0x80;
*dest++ = ((code >> 6) & 0x3F) | 0x80;
*dest++ = (code & 0x3F) | 0x80;
}
} }
*dest = 0; *dest = 0;
} }

View file

@ -126,11 +126,22 @@ grub_is_valid_utf8 (const grub_uint8_t *src, grub_size_t srcsize);
int grub_utf8_to_ucs4_alloc (const char *msg, grub_uint32_t **unicode_msg, int grub_utf8_to_ucs4_alloc (const char *msg, grub_uint32_t **unicode_msg,
grub_uint32_t **last_position); grub_uint32_t **last_position);
/* Process one character from UTF8 sequence.
At beginning set *code = 0, *count = 0. Returns 0 on failure and
1 on success. *count holds the number of trailing bytes. */
int
grub_utf8_process (grub_uint8_t c, grub_uint32_t *code, int *count);
void void
grub_ucs4_to_utf8 (grub_uint32_t *src, grub_size_t size, grub_ucs4_to_utf8 (grub_uint32_t *src, grub_size_t size,
grub_uint8_t *dest, grub_size_t destsize); grub_uint8_t *dest, grub_size_t destsize);
grub_size_t grub_utf8_to_ucs4 (grub_uint32_t *dest, grub_size_t destsize, grub_size_t grub_utf8_to_ucs4 (grub_uint32_t *dest, grub_size_t destsize,
const grub_uint8_t *src, grub_size_t srcsize, const grub_uint8_t *src, grub_size_t srcsize,
const grub_uint8_t **srcend); const grub_uint8_t **srcend);
/* Returns -2 if not enough space, -1 on invalid character. */
grub_ssize_t
grub_encode_utf8_character (grub_uint8_t *dest, grub_uint8_t *destend,
grub_uint32_t code);
#endif #endif

View file

@ -175,6 +175,18 @@ grub_isalpha (int c)
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
} }
static inline int
grub_islower (int c)
{
return (c >= 'a' && c <= 'z');
}
static inline int
grub_isupper (int c)
{
return (c >= 'A' && c <= 'Z');
}
static inline int static inline int
grub_isgraph (int c) grub_isgraph (int c)
{ {
@ -250,27 +262,6 @@ grub_strncasecmp (const char *s1, const char *s2, grub_size_t n)
return (int) grub_tolower (*s1) - (int) grub_tolower (*s2); return (int) grub_tolower (*s1) - (int) grub_tolower (*s2);
} }
/* Replace all `ch' characters of `input' with `with' and copy the
result into `output'; return EOS address of `output'. */
static inline char *
grub_strchrsub (char *output, const char *input, char ch, const char *with)
{
grub_size_t grub_strlen (const char *s);
while (*input)
{
if (*input == ch)
{
grub_strcpy (output, with);
output += grub_strlen (with);
input++;
continue;
}
*output++ = *input++;
}
*output = '\0';
return output;
}
unsigned long EXPORT_FUNC(grub_strtoul) (const char *str, char **end, int base); unsigned long EXPORT_FUNC(grub_strtoul) (const char *str, char **end, int base);
unsigned long long EXPORT_FUNC(grub_strtoull) (const char *str, char **end, int base); unsigned long long EXPORT_FUNC(grub_strtoull) (const char *str, char **end, int base);
@ -317,6 +308,26 @@ grub_size_t EXPORT_FUNC(grub_strlen) (const char *s) __attribute__ ((warn_unused
int EXPORT_FUNC(grub_printf) (const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); int EXPORT_FUNC(grub_printf) (const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
int EXPORT_FUNC(grub_printf_) (const char *fmt, ...) __attribute__ ((format (printf, 1, 2))); int EXPORT_FUNC(grub_printf_) (const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
/* Replace all `ch' characters of `input' with `with' and copy the
result into `output'; return EOS address of `output'. */
static inline char *
grub_strchrsub (char *output, const char *input, char ch, const char *with)
{
while (*input)
{
if (*input == ch)
{
grub_strcpy (output, with);
output += grub_strlen (with);
input++;
continue;
}
*output++ = *input++;
}
*output = '\0';
return output;
}
extern void (*EXPORT_VAR (grub_xputs)) (const char *str); extern void (*EXPORT_VAR (grub_xputs)) (const char *str);
static inline int static inline int