From 1afcc914c5c7e45655ebe37646ed499de8d8d2fc Mon Sep 17 00:00:00 2001 From: Vladimir 'phcoder' Serbinenko Date: Wed, 17 Nov 2010 16:13:16 +0100 Subject: [PATCH] Make better UTF compliant. * grub-core/normal/charset.c (grub_utf8_to_utf16): Handle 6- and 7-byte sequences as incorrect. (grub_is_valid_utf8): Likewise. (grub_utf8_to_ucs4): Likewise. (grub_ucs4_to_utf8): Handle codepoints outside of BMP. (grub_ucs4_to_utf8_alloc): Likewise. * include/grub/charset.h (grub_utf16_to_utf8): Likewise. --- ChangeLog | 12 ++++++++++ grub-core/normal/charset.c | 45 +++++++++++--------------------------- include/grub/charset.h | 9 +++++++- 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/ChangeLog b/ChangeLog index 3dc2e06da..e5863f6b4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,15 @@ +2010-11-16 Vladimir Serbinenko + + Make better UTF compliant. + + * grub-core/normal/charset.c (grub_utf8_to_utf16): Handle 6- and 7-byte + sequences as incorrect. + (grub_is_valid_utf8): Likewise. + (grub_utf8_to_ucs4): Likewise. + (grub_ucs4_to_utf8): Handle codepoints outside of BMP. + (grub_ucs4_to_utf8_alloc): Likewise. + * include/grub/charset.h (grub_utf16_to_utf8): Likewise. + 2010-11-16 Vladimir Serbinenko Make legacy_source behave like source. diff --git a/grub-core/normal/charset.c b/grub-core/normal/charset.c index b7f775c4f..85ead53c4 100644 --- a/grub-core/normal/charset.c +++ b/grub-core/normal/charset.c @@ -113,16 +113,6 @@ grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize, count = 3; code = c & GRUB_UINT8_3_TRAILINGBITS; } - else if ((c & GRUB_UINT8_6_LEADINGBITS) == GRUB_UINT8_5_LEADINGBITS) - { - count = 4; - code = c & GRUB_UINT8_2_TRAILINGBITS; - } - else if ((c & GRUB_UINT8_7_LEADINGBITS) == GRUB_UINT8_6_LEADINGBITS) - { - count = 5; - code = c & GRUB_UINT8_1_TRAILINGBIT; - } else return -1; } @@ -177,7 +167,7 @@ grub_ucs4_to_utf8 (grub_uint32_t *src, grub_size_t size, /* No surrogates in UCS-4... */ *dest++ = '?'; } - else + else if (code < 0x10000) { if (dest + 2 >= destend) break; @@ -185,6 +175,15 @@ grub_ucs4_to_utf8 (grub_uint32_t *src, grub_size_t size, *dest++ = ((code >> 6) & 0x3F) | 0x80; *dest++ = (code & 0x3F) | 0x80; } + else + { + if (dest + 3 >= destend) + break; + *dest++ = (code >> 18) | 0xF0; + *dest++ = ((code >> 12) & 0x3F) | 0x80; + *dest++ = ((code >> 6) & 0x3F) | 0x80; + *dest++ = (code & 0x3F) | 0x80; + } } *dest = 0; } @@ -212,8 +211,10 @@ grub_ucs4_to_utf8_alloc (grub_uint32_t *src, grub_size_t size) || (code >= 0xD800 && code <= 0xDBFF)) /* No surrogates in UCS-4... */ cnt++; - else + else if (code < 0x10000) cnt += 3; + else + cnt += 4; } cnt++; @@ -273,16 +274,6 @@ grub_is_valid_utf8 (const grub_uint8_t *src, grub_size_t srcsize) count = 3; code = c & 0x07; } - else if ((c & 0xfc) == 0xf8) - { - count = 4; - code = c & 0x03; - } - else if ((c & 0xfe) == 0xfc) - { - count = 5; - code = c & 0x01; - } else return 0; } @@ -375,16 +366,6 @@ grub_utf8_to_ucs4 (grub_uint32_t *dest, grub_size_t destsize, count = 3; code = c & 0x07; } - else if ((c & 0xfc) == 0xf8) - { - count = 4; - code = c & 0x03; - } - else if ((c & 0xfe) == 0xfc) - { - count = 5; - code = c & 0x01; - } else { /* invalid */ diff --git a/include/grub/charset.h b/include/grub/charset.h index 1d79d5d2c..c8247f78a 100644 --- a/include/grub/charset.h +++ b/include/grub/charset.h @@ -97,12 +97,19 @@ grub_utf16_to_utf8 (grub_uint8_t *dest, grub_uint16_t *src, /* Error... */ *dest++ = '?'; } - else + else if (code < 0x10000) { *dest++ = (code >> 12) | 0xE0; *dest++ = ((code >> 6) & 0x3F) | 0x80; *dest++ = (code & 0x3F) | 0x80; } + else + { + *dest++ = (code >> 18) | 0xF0; + *dest++ = ((code >> 12) & 0x3F) | 0x80; + *dest++ = ((code >> 6) & 0x3F) | 0x80; + *dest++ = (code & 0x3F) | 0x80; + } } }