diff --git a/ChangeLog b/ChangeLog index ee7611caf..ada61af4c 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2011-12-25 Vladimir Serbinenko + + Make better Unicode-compliant and unify some UTF-8 code pathes. + + * grub-core/normal/charset.c (grub_utf8_to_utf16): Don't eat possibly + valid character. + (grub_is_valid_utf8): Use grub_utf8_process. + Check resulting code range. + (grub_utf8_to_ucs4): Use grub_utf8_process. + * include/grub/charset.h (grub_utf16_to_utf8): Don't eat up a possibly + valid character. + 2011-12-25 Vladimir Serbinenko * grub-core/io/bufio.c (grub_bufio_read): Fix handling of corner cases. diff --git a/grub-core/normal/charset.c b/grub-core/normal/charset.c index ee4a7ef5f..6ddd91827 100644 --- a/grub-core/normal/charset.c +++ b/grub-core/normal/charset.c @@ -119,11 +119,17 @@ grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize, while (srcsize && destsize) { - grub_uint8_t c = *src++; + int was_count = count; if (srcsize != (grub_size_t)-1) srcsize--; - if (!grub_utf8_process (c, &code, &count)) - return -1; + if (!grub_utf8_process (*src++, &code, &count)) + { + code = '?'; + count = 0; + /* Character c may be valid, don't eat it. */ + if (was_count) + src--; + } if (count != 0) continue; if (code == 0) @@ -263,53 +269,21 @@ grub_ucs4_to_utf8_alloc (const grub_uint32_t *src, grub_size_t size) int grub_is_valid_utf8 (const grub_uint8_t *src, grub_size_t srcsize) { - grub_uint32_t code = 0; int count = 0; + grub_uint32_t code = 0; while (srcsize) { - grub_uint32_t c = *src++; if (srcsize != (grub_size_t)-1) srcsize--; - if (count) - { - if ((c & 0xc0) != 0x80) - { - /* invalid */ - return 0; - } - else - { - code <<= 6; - code |= (c & 0x3f); - count--; - } - } - else - { - if (c == 0) - break; - - if ((c & 0x80) == 0x00) - code = c; - else if ((c & 0xe0) == 0xc0) - { - count = 1; - code = c & 0x1f; - } - else if ((c & 0xf0) == 0xe0) - { - count = 2; - code = c & 0x0f; - } - else if ((c & 0xf8) == 0xf0) - { - count = 3; - code = c & 0x07; - } - else - return 0; - } + if (!grub_utf8_process (*src++, &code, &count)) + return 0; + if (count != 0) + continue; + if (code == 0) + return 1; + if (code > GRUB_UNICODE_LAST_VALID) + return 0; } return 1; @@ -355,63 +329,23 @@ grub_utf8_to_ucs4 (grub_uint32_t *dest, grub_size_t destsize, while (srcsize && destsize) { - grub_uint32_t c = *src++; + int was_count = count; if (srcsize != (grub_size_t)-1) srcsize--; - if (count) + if (!grub_utf8_process (*src++, &code, &count)) { - if ((c & 0xc0) != 0x80) - { - /* invalid */ - code = '?'; - /* Character c may be valid, don't eat it. */ - src--; - if (srcsize != (grub_size_t)-1) - srcsize++; - count = 0; - } - else - { - code <<= 6; - code |= (c & 0x3f); - count--; - } - } - else - { - if (c == 0) - break; - - if ((c & 0x80) == 0x00) - code = c; - else if ((c & 0xe0) == 0xc0) - { - count = 1; - code = c & 0x1f; - } - else if ((c & 0xf0) == 0xe0) - { - count = 2; - code = c & 0x0f; - } - else if ((c & 0xf8) == 0xf0) - { - count = 3; - code = c & 0x07; - } - else - { - /* invalid */ - code = '?'; - count = 0; - } - } - - if (count == 0) - { - *p++ = code; - destsize--; + code = '?'; + count = 0; + /* Character c may be valid, don't eat it. */ + if (was_count) + src--; } + if (count != 0) + continue; + if (code == 0) + break; + *p++ = code; + destsize--; } if (srcend) diff --git a/include/grub/charset.h b/include/grub/charset.h index b0960c940..82a005f32 100644 --- a/include/grub/charset.h +++ b/include/grub/charset.h @@ -76,6 +76,8 @@ grub_utf16_to_utf8 (grub_uint8_t *dest, const grub_uint16_t *src, { /* Error... */ *dest++ = '?'; + /* *src may be valid. Don't eat it. */ + src--; } code_high = 0;