Make better Unicode-compliant and unify some UTF-8 code pathes.

* grub-core/normal/charset.c (grub_utf8_to_utf16): Don't eat possibly
	valid character.
	(grub_is_valid_utf8): Use grub_utf8_process.
	Check resulting code range.
	(grub_utf8_to_ucs4): Use grub_utf8_process.
	* include/grub/charset.h (grub_utf16_to_utf8): Don't eat up a possibly
	valid character.
This commit is contained in:
Vladimir 'phcoder' Serbinenko 2011-12-25 15:57:50 +01:00
parent cc4fddf5f5
commit 8569f13d8d
3 changed files with 45 additions and 97 deletions

View file

@ -1,3 +1,15 @@
2011-12-25 Vladimir Serbinenko <phcoder@gmail.com>
Make better Unicode-compliant and unify some UTF-8 code pathes.
* grub-core/normal/charset.c (grub_utf8_to_utf16): Don't eat possibly
valid character.
(grub_is_valid_utf8): Use grub_utf8_process.
Check resulting code range.
(grub_utf8_to_ucs4): Use grub_utf8_process.
* include/grub/charset.h (grub_utf16_to_utf8): Don't eat up a possibly
valid character.
2011-12-25 Vladimir Serbinenko <phcoder@gmail.com>
* grub-core/io/bufio.c (grub_bufio_read): Fix handling of corner cases.

View file

@ -119,11 +119,17 @@ grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
while (srcsize && destsize)
{
grub_uint8_t c = *src++;
int was_count = count;
if (srcsize != (grub_size_t)-1)
srcsize--;
if (!grub_utf8_process (c, &code, &count))
return -1;
if (!grub_utf8_process (*src++, &code, &count))
{
code = '?';
count = 0;
/* Character c may be valid, don't eat it. */
if (was_count)
src--;
}
if (count != 0)
continue;
if (code == 0)
@ -263,54 +269,22 @@ grub_ucs4_to_utf8_alloc (const grub_uint32_t *src, grub_size_t size)
int
grub_is_valid_utf8 (const grub_uint8_t *src, grub_size_t srcsize)
{
grub_uint32_t code = 0;
int count = 0;
grub_uint32_t code = 0;
while (srcsize)
{
grub_uint32_t c = *src++;
if (srcsize != (grub_size_t)-1)
srcsize--;
if (count)
{
if ((c & 0xc0) != 0x80)
{
/* invalid */
if (!grub_utf8_process (*src++, &code, &count))
return 0;
}
else
{
code <<= 6;
code |= (c & 0x3f);
count--;
}
}
else
{
if (c == 0)
break;
if ((c & 0x80) == 0x00)
code = c;
else if ((c & 0xe0) == 0xc0)
{
count = 1;
code = c & 0x1f;
}
else if ((c & 0xf0) == 0xe0)
{
count = 2;
code = c & 0x0f;
}
else if ((c & 0xf8) == 0xf0)
{
count = 3;
code = c & 0x07;
}
else
if (count != 0)
continue;
if (code == 0)
return 1;
if (code > GRUB_UNICODE_LAST_VALID)
return 0;
}
}
return 1;
}
@ -355,64 +329,24 @@ grub_utf8_to_ucs4 (grub_uint32_t *dest, grub_size_t destsize,
while (srcsize && destsize)
{
grub_uint32_t c = *src++;
int was_count = count;
if (srcsize != (grub_size_t)-1)
srcsize--;
if (count)
if (!grub_utf8_process (*src++, &code, &count))
{
if ((c & 0xc0) != 0x80)
{
/* invalid */
code = '?';
count = 0;
/* Character c may be valid, don't eat it. */
if (was_count)
src--;
if (srcsize != (grub_size_t)-1)
srcsize++;
count = 0;
}
else
{
code <<= 6;
code |= (c & 0x3f);
count--;
}
}
else
{
if (c == 0)
if (count != 0)
continue;
if (code == 0)
break;
if ((c & 0x80) == 0x00)
code = c;
else if ((c & 0xe0) == 0xc0)
{
count = 1;
code = c & 0x1f;
}
else if ((c & 0xf0) == 0xe0)
{
count = 2;
code = c & 0x0f;
}
else if ((c & 0xf8) == 0xf0)
{
count = 3;
code = c & 0x07;
}
else
{
/* invalid */
code = '?';
count = 0;
}
}
if (count == 0)
{
*p++ = code;
destsize--;
}
}
if (srcend)
*srcend = src;

View file

@ -76,6 +76,8 @@ grub_utf16_to_utf8 (grub_uint8_t *dest, const grub_uint16_t *src,
{
/* Error... */
*dest++ = '?';
/* *src may be valid. Don't eat it. */
src--;
}
code_high = 0;