Make better Unicode-compliant and unify some UTF-8 code pathes.

* grub-core/normal/charset.c (grub_utf8_to_utf16): Don't eat possibly
	valid character.
	(grub_is_valid_utf8): Use grub_utf8_process.
	Check resulting code range.
	(grub_utf8_to_ucs4): Use grub_utf8_process.
	* include/grub/charset.h (grub_utf16_to_utf8): Don't eat up a possibly
	valid character.
This commit is contained in:
Vladimir 'phcoder' Serbinenko 2011-12-25 15:57:50 +01:00
parent cc4fddf5f5
commit 8569f13d8d
3 changed files with 45 additions and 97 deletions

View file

@ -1,3 +1,15 @@
2011-12-25 Vladimir Serbinenko <phcoder@gmail.com>
Make better Unicode-compliant and unify some UTF-8 code pathes.
* grub-core/normal/charset.c (grub_utf8_to_utf16): Don't eat possibly
valid character.
(grub_is_valid_utf8): Use grub_utf8_process.
Check resulting code range.
(grub_utf8_to_ucs4): Use grub_utf8_process.
* include/grub/charset.h (grub_utf16_to_utf8): Don't eat up a possibly
valid character.
2011-12-25 Vladimir Serbinenko <phcoder@gmail.com> 2011-12-25 Vladimir Serbinenko <phcoder@gmail.com>
* grub-core/io/bufio.c (grub_bufio_read): Fix handling of corner cases. * grub-core/io/bufio.c (grub_bufio_read): Fix handling of corner cases.

View file

@ -119,11 +119,17 @@ grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
while (srcsize && destsize) while (srcsize && destsize)
{ {
grub_uint8_t c = *src++; int was_count = count;
if (srcsize != (grub_size_t)-1) if (srcsize != (grub_size_t)-1)
srcsize--; srcsize--;
if (!grub_utf8_process (c, &code, &count)) if (!grub_utf8_process (*src++, &code, &count))
return -1; {
code = '?';
count = 0;
/* Character c may be valid, don't eat it. */
if (was_count)
src--;
}
if (count != 0) if (count != 0)
continue; continue;
if (code == 0) if (code == 0)
@ -263,54 +269,22 @@ grub_ucs4_to_utf8_alloc (const grub_uint32_t *src, grub_size_t size)
int int
grub_is_valid_utf8 (const grub_uint8_t *src, grub_size_t srcsize) grub_is_valid_utf8 (const grub_uint8_t *src, grub_size_t srcsize)
{ {
grub_uint32_t code = 0;
int count = 0; int count = 0;
grub_uint32_t code = 0;
while (srcsize) while (srcsize)
{ {
grub_uint32_t c = *src++;
if (srcsize != (grub_size_t)-1) if (srcsize != (grub_size_t)-1)
srcsize--; srcsize--;
if (count) if (!grub_utf8_process (*src++, &code, &count))
{
if ((c & 0xc0) != 0x80)
{
/* invalid */
return 0; return 0;
} if (count != 0)
else continue;
{ if (code == 0)
code <<= 6; return 1;
code |= (c & 0x3f); if (code > GRUB_UNICODE_LAST_VALID)
count--;
}
}
else
{
if (c == 0)
break;
if ((c & 0x80) == 0x00)
code = c;
else if ((c & 0xe0) == 0xc0)
{
count = 1;
code = c & 0x1f;
}
else if ((c & 0xf0) == 0xe0)
{
count = 2;
code = c & 0x0f;
}
else if ((c & 0xf8) == 0xf0)
{
count = 3;
code = c & 0x07;
}
else
return 0; return 0;
} }
}
return 1; return 1;
} }
@ -355,64 +329,24 @@ grub_utf8_to_ucs4 (grub_uint32_t *dest, grub_size_t destsize,
while (srcsize && destsize) while (srcsize && destsize)
{ {
grub_uint32_t c = *src++; int was_count = count;
if (srcsize != (grub_size_t)-1) if (srcsize != (grub_size_t)-1)
srcsize--; srcsize--;
if (count) if (!grub_utf8_process (*src++, &code, &count))
{ {
if ((c & 0xc0) != 0x80)
{
/* invalid */
code = '?'; code = '?';
count = 0;
/* Character c may be valid, don't eat it. */ /* Character c may be valid, don't eat it. */
if (was_count)
src--; src--;
if (srcsize != (grub_size_t)-1)
srcsize++;
count = 0;
} }
else if (count != 0)
{ continue;
code <<= 6; if (code == 0)
code |= (c & 0x3f);
count--;
}
}
else
{
if (c == 0)
break; break;
if ((c & 0x80) == 0x00)
code = c;
else if ((c & 0xe0) == 0xc0)
{
count = 1;
code = c & 0x1f;
}
else if ((c & 0xf0) == 0xe0)
{
count = 2;
code = c & 0x0f;
}
else if ((c & 0xf8) == 0xf0)
{
count = 3;
code = c & 0x07;
}
else
{
/* invalid */
code = '?';
count = 0;
}
}
if (count == 0)
{
*p++ = code; *p++ = code;
destsize--; destsize--;
} }
}
if (srcend) if (srcend)
*srcend = src; *srcend = src;

View file

@ -76,6 +76,8 @@ grub_utf16_to_utf8 (grub_uint8_t *dest, const grub_uint16_t *src,
{ {
/* Error... */ /* Error... */
*dest++ = '?'; *dest++ = '?';
/* *src may be valid. Don't eat it. */
src--;
} }
code_high = 0; code_high = 0;