363 lines
7.4 KiB
C
363 lines
7.4 KiB
C
/*
|
|
* GRUB -- GRand Unified Bootloader
|
|
* Copyright (C) 1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009 Free Software Foundation, Inc.
|
|
*
|
|
* GRUB is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* GRUB is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with GRUB. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
/* Convert a (possibly null-terminated) UTF-8 string of at most SRCSIZE
|
|
bytes (if SRCSIZE is -1, it is ignored) in length to a UTF-16 string.
|
|
Return the number of characters converted. DEST must be able to hold
|
|
at least DESTSIZE characters. If an invalid sequence is found, return -1.
|
|
If SRCEND is not NULL, then *SRCEND is set to the next byte after the
|
|
last byte used in SRC. */
|
|
|
|
#include <grub/charset.h>
|
|
#include <grub/mm.h>
|
|
#include <grub/misc.h>
|
|
|
|
grub_ssize_t
|
|
grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
|
|
const grub_uint8_t *src, grub_size_t srcsize,
|
|
const grub_uint8_t **srcend)
|
|
{
|
|
grub_uint16_t *p = dest;
|
|
int count = 0;
|
|
grub_uint32_t code = 0;
|
|
|
|
if (srcend)
|
|
*srcend = src;
|
|
|
|
while (srcsize && destsize)
|
|
{
|
|
grub_uint32_t c = *src++;
|
|
if (srcsize != (grub_size_t)-1)
|
|
srcsize--;
|
|
if (count)
|
|
{
|
|
if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT)
|
|
{
|
|
/* invalid */
|
|
return -1;
|
|
}
|
|
else
|
|
{
|
|
code <<= 6;
|
|
code |= (c & GRUB_UINT8_6_TRAILINGBITS);
|
|
count--;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (c == 0)
|
|
break;
|
|
|
|
if ((c & GRUB_UINT8_1_LEADINGBIT) == 0)
|
|
code = c;
|
|
else if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS)
|
|
{
|
|
count = 1;
|
|
code = c & GRUB_UINT8_5_TRAILINGBITS;
|
|
}
|
|
else if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS)
|
|
{
|
|
count = 2;
|
|
code = c & GRUB_UINT8_4_TRAILINGBITS;
|
|
}
|
|
else if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS)
|
|
{
|
|
count = 3;
|
|
code = c & GRUB_UINT8_3_TRAILINGBITS;
|
|
}
|
|
else if ((c & GRUB_UINT8_6_LEADINGBITS) == GRUB_UINT8_5_LEADINGBITS)
|
|
{
|
|
count = 4;
|
|
code = c & GRUB_UINT8_2_TRAILINGBITS;
|
|
}
|
|
else if ((c & GRUB_UINT8_7_LEADINGBITS) == GRUB_UINT8_6_LEADINGBITS)
|
|
{
|
|
count = 5;
|
|
code = c & GRUB_UINT8_1_TRAILINGBIT;
|
|
}
|
|
else
|
|
return -1;
|
|
}
|
|
|
|
if (count == 0)
|
|
{
|
|
if (destsize < 2 && code >= GRUB_UCS2_LIMIT)
|
|
break;
|
|
if (code >= GRUB_UCS2_LIMIT)
|
|
{
|
|
*p++ = GRUB_UTF16_UPPER_SURROGATE (code);
|
|
*p++ = GRUB_UTF16_LOWER_SURROGATE (code);
|
|
destsize -= 2;
|
|
}
|
|
else
|
|
{
|
|
*p++ = code;
|
|
destsize--;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (srcend)
|
|
*srcend = src;
|
|
return p - dest;
|
|
}
|
|
|
|
/* Convert UCS-4 to UTF-8. */
|
|
char *
|
|
grub_ucs4_to_utf8_alloc (grub_uint32_t *src, grub_size_t size)
|
|
{
|
|
grub_size_t remaining;
|
|
grub_uint32_t *ptr;
|
|
grub_size_t cnt = 0;
|
|
grub_uint8_t *ret, *dest;
|
|
|
|
remaining = size;
|
|
ptr = src;
|
|
while (remaining--)
|
|
{
|
|
grub_uint32_t code = *ptr++;
|
|
|
|
if (code <= 0x007F)
|
|
cnt++;
|
|
else if (code <= 0x07FF)
|
|
cnt += 2;
|
|
else if ((code >= 0xDC00 && code <= 0xDFFF)
|
|
|| (code >= 0xD800 && code <= 0xDBFF))
|
|
/* No surrogates in UCS-4... */
|
|
cnt++;
|
|
else
|
|
cnt += 3;
|
|
}
|
|
cnt++;
|
|
|
|
ret = grub_malloc (cnt);
|
|
if (!ret)
|
|
return 0;
|
|
|
|
dest = ret;
|
|
remaining = size;
|
|
ptr = src;
|
|
while (remaining--)
|
|
{
|
|
grub_uint32_t code = *ptr++;
|
|
|
|
if (code <= 0x007F)
|
|
*dest++ = code;
|
|
else if (code <= 0x07FF)
|
|
{
|
|
*dest++ = (code >> 6) | 0xC0;
|
|
*dest++ = (code & 0x3F) | 0x80;
|
|
}
|
|
else if ((code >= 0xDC00 && code <= 0xDFFF)
|
|
|| (code >= 0xD800 && code <= 0xDBFF))
|
|
{
|
|
/* No surrogates in UCS-4... */
|
|
*dest++ = '?';
|
|
}
|
|
else
|
|
{
|
|
*dest++ = (code >> 12) | 0xE0;
|
|
*dest++ = ((code >> 6) & 0x3F) | 0x80;
|
|
*dest++ = (code & 0x3F) | 0x80;
|
|
}
|
|
}
|
|
*dest = 0;
|
|
|
|
return (char *) ret;
|
|
}
|
|
|
|
int
|
|
grub_is_valid_utf8 (const grub_uint8_t *src, grub_size_t srcsize)
|
|
{
|
|
grub_uint32_t code = 0;
|
|
int count = 0;
|
|
|
|
while (srcsize)
|
|
{
|
|
grub_uint32_t c = *src++;
|
|
if (srcsize != (grub_size_t)-1)
|
|
srcsize--;
|
|
if (count)
|
|
{
|
|
if ((c & 0xc0) != 0x80)
|
|
{
|
|
/* invalid */
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
code <<= 6;
|
|
code |= (c & 0x3f);
|
|
count--;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (c == 0)
|
|
break;
|
|
|
|
if ((c & 0x80) == 0x00)
|
|
code = c;
|
|
else if ((c & 0xe0) == 0xc0)
|
|
{
|
|
count = 1;
|
|
code = c & 0x1f;
|
|
}
|
|
else if ((c & 0xf0) == 0xe0)
|
|
{
|
|
count = 2;
|
|
code = c & 0x0f;
|
|
}
|
|
else if ((c & 0xf8) == 0xf0)
|
|
{
|
|
count = 3;
|
|
code = c & 0x07;
|
|
}
|
|
else if ((c & 0xfc) == 0xf8)
|
|
{
|
|
count = 4;
|
|
code = c & 0x03;
|
|
}
|
|
else if ((c & 0xfe) == 0xfc)
|
|
{
|
|
count = 5;
|
|
code = c & 0x01;
|
|
}
|
|
else
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
int
|
|
grub_utf8_to_ucs4_alloc (const char *msg, grub_uint32_t **unicode_msg,
|
|
grub_uint32_t **last_position)
|
|
{
|
|
grub_size_t msg_len = grub_strlen (msg);
|
|
|
|
*unicode_msg = grub_malloc (grub_strlen (msg) * sizeof (grub_uint32_t));
|
|
|
|
if (!*unicode_msg)
|
|
{
|
|
grub_printf ("utf8_to_ucs4 ERROR1: %s", msg);
|
|
return -1;
|
|
}
|
|
|
|
msg_len = grub_utf8_to_ucs4 (*unicode_msg, msg_len,
|
|
(grub_uint8_t *) msg, -1, 0);
|
|
|
|
*last_position = *unicode_msg + msg_len;
|
|
|
|
return msg_len;
|
|
}
|
|
|
|
/* Convert a (possibly null-terminated) UTF-8 string of at most SRCSIZE
|
|
bytes (if SRCSIZE is -1, it is ignored) in length to a UCS-4 string.
|
|
Return the number of characters converted. DEST must be able to hold
|
|
at least DESTSIZE characters.
|
|
If SRCEND is not NULL, then *SRCEND is set to the next byte after the
|
|
last byte used in SRC. */
|
|
grub_size_t
|
|
grub_utf8_to_ucs4 (grub_uint32_t *dest, grub_size_t destsize,
|
|
const grub_uint8_t *src, grub_size_t srcsize,
|
|
const grub_uint8_t **srcend)
|
|
{
|
|
grub_uint32_t *p = dest;
|
|
int count = 0;
|
|
grub_uint32_t code = 0;
|
|
|
|
if (srcend)
|
|
*srcend = src;
|
|
|
|
while (srcsize && destsize)
|
|
{
|
|
grub_uint32_t c = *src++;
|
|
if (srcsize != (grub_size_t)-1)
|
|
srcsize--;
|
|
if (count)
|
|
{
|
|
if ((c & 0xc0) != 0x80)
|
|
{
|
|
/* invalid */
|
|
code = '?';
|
|
/* Character c may be valid, don't eat it. */
|
|
src--;
|
|
if (srcsize != (grub_size_t)-1)
|
|
srcsize++;
|
|
count = 0;
|
|
}
|
|
else
|
|
{
|
|
code <<= 6;
|
|
code |= (c & 0x3f);
|
|
count--;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (c == 0)
|
|
break;
|
|
|
|
if ((c & 0x80) == 0x00)
|
|
code = c;
|
|
else if ((c & 0xe0) == 0xc0)
|
|
{
|
|
count = 1;
|
|
code = c & 0x1f;
|
|
}
|
|
else if ((c & 0xf0) == 0xe0)
|
|
{
|
|
count = 2;
|
|
code = c & 0x0f;
|
|
}
|
|
else if ((c & 0xf8) == 0xf0)
|
|
{
|
|
count = 3;
|
|
code = c & 0x07;
|
|
}
|
|
else if ((c & 0xfc) == 0xf8)
|
|
{
|
|
count = 4;
|
|
code = c & 0x03;
|
|
}
|
|
else if ((c & 0xfe) == 0xfc)
|
|
{
|
|
count = 5;
|
|
code = c & 0x01;
|
|
}
|
|
else
|
|
{
|
|
/* invalid */
|
|
code = '?';
|
|
count = 0;
|
|
}
|
|
}
|
|
|
|
if (count == 0)
|
|
{
|
|
*p++ = code;
|
|
destsize--;
|
|
}
|
|
}
|
|
|
|
if (srcend)
|
|
*srcend = src;
|
|
return p - dest;
|
|
}
|