grub/grub-core/lib/charset.c
2010-05-06 11:34:04 +05:30

269 lines
5.7 KiB
C

/*
* GRUB -- GRand Unified Bootloader
* Copyright (C) 1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009 Free Software Foundation, Inc.
*
* GRUB is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GRUB is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GRUB. If not, see <http://www.gnu.org/licenses/>.
*/
/* Convert a (possibly null-terminated) UTF-8 string of at most SRCSIZE
bytes (if SRCSIZE is -1, it is ignored) in length to a UTF-16 string.
Return the number of characters converted. DEST must be able to hold
at least DESTSIZE characters. If an invalid sequence is found, return -1.
If SRCEND is not NULL, then *SRCEND is set to the next byte after the
last byte used in SRC. */
#include <grub/charset.h>
#include <grub/mm.h>
#include <grub/misc.h>
grub_ssize_t
grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
const grub_uint8_t *src, grub_size_t srcsize,
const grub_uint8_t **srcend)
{
grub_uint16_t *p = dest;
int count = 0;
grub_uint32_t code = 0;
if (srcend)
*srcend = src;
while (srcsize && destsize)
{
grub_uint32_t c = *src++;
if (srcsize != (grub_size_t)-1)
srcsize--;
if (count)
{
if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT)
{
/* invalid */
return -1;
}
else
{
code <<= 6;
code |= (c & GRUB_UINT8_6_TRAILINGBITS);
count--;
}
}
else
{
if (c == 0)
break;
if ((c & GRUB_UINT8_1_LEADINGBIT) == 0)
code = c;
else if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS)
{
count = 1;
code = c & GRUB_UINT8_5_TRAILINGBITS;
}
else if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS)
{
count = 2;
code = c & GRUB_UINT8_4_TRAILINGBITS;
}
else if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS)
{
count = 3;
code = c & GRUB_UINT8_3_TRAILINGBITS;
}
else if ((c & GRUB_UINT8_6_LEADINGBITS) == GRUB_UINT8_5_LEADINGBITS)
{
count = 4;
code = c & GRUB_UINT8_2_TRAILINGBITS;
}
else if ((c & GRUB_UINT8_7_LEADINGBITS) == GRUB_UINT8_6_LEADINGBITS)
{
count = 5;
code = c & GRUB_UINT8_1_TRAILINGBIT;
}
else
return -1;
}
if (count == 0)
{
if (destsize < 2 && code >= GRUB_UCS2_LIMIT)
break;
if (code >= GRUB_UCS2_LIMIT)
{
*p++ = GRUB_UTF16_UPPER_SURROGATE (code);
*p++ = GRUB_UTF16_LOWER_SURROGATE (code);
destsize -= 2;
}
else
{
*p++ = code;
destsize--;
}
}
}
if (srcend)
*srcend = src;
return p - dest;
}
/* Convert UCS-4 to UTF-8. */
char *
grub_ucs4_to_utf8_alloc (grub_uint32_t *src, grub_size_t size)
{
grub_size_t remaining;
grub_uint32_t *ptr;
grub_size_t cnt = 0;
grub_uint8_t *ret, *dest;
remaining = size;
ptr = src;
while (remaining--)
{
grub_uint32_t code = *ptr++;
if (code <= 0x007F)
cnt++;
else if (code <= 0x07FF)
cnt += 2;
else if ((code >= 0xDC00 && code <= 0xDFFF)
|| (code >= 0xD800 && code <= 0xDBFF))
/* No surrogates in UCS-4... */
cnt++;
else
cnt += 3;
}
cnt++;
ret = grub_malloc (cnt);
if (!ret)
return 0;
dest = ret;
remaining = size;
ptr = src;
while (remaining--)
{
grub_uint32_t code = *ptr++;
if (code <= 0x007F)
*dest++ = code;
else if (code <= 0x07FF)
{
*dest++ = (code >> 6) | 0xC0;
*dest++ = (code & 0x3F) | 0x80;
}
else if ((code >= 0xDC00 && code <= 0xDFFF)
|| (code >= 0xD800 && code <= 0xDBFF))
{
/* No surrogates in UCS-4... */
*dest++ = '?';
}
else
{
*dest++ = (code >> 12) | 0xE0;
*dest++ = ((code >> 6) & 0x3F) | 0x80;
*dest++ = (code & 0x3F) | 0x80;
}
}
*dest = 0;
return (char *) ret;
}
int
grub_is_valid_utf8 (const grub_uint8_t *src, grub_size_t srcsize)
{
grub_uint32_t code = 0;
int count = 0;
while (srcsize)
{
grub_uint32_t c = *src++;
if (srcsize != (grub_size_t)-1)
srcsize--;
if (count)
{
if ((c & 0xc0) != 0x80)
{
/* invalid */
return 0;
}
else
{
code <<= 6;
code |= (c & 0x3f);
count--;
}
}
else
{
if (c == 0)
break;
if ((c & 0x80) == 0x00)
code = c;
else if ((c & 0xe0) == 0xc0)
{
count = 1;
code = c & 0x1f;
}
else if ((c & 0xf0) == 0xe0)
{
count = 2;
code = c & 0x0f;
}
else if ((c & 0xf8) == 0xf0)
{
count = 3;
code = c & 0x07;
}
else if ((c & 0xfc) == 0xf8)
{
count = 4;
code = c & 0x03;
}
else if ((c & 0xfe) == 0xfc)
{
count = 5;
code = c & 0x01;
}
else
return 0;
}
}
return 1;
}
int
grub_utf8_to_ucs4_alloc (const char *msg, grub_uint32_t **unicode_msg,
grub_uint32_t **last_position)
{
grub_size_t msg_len = grub_strlen (msg);
*unicode_msg = grub_malloc (grub_strlen (msg) * sizeof (grub_uint32_t));
if (!*unicode_msg)
{
grub_printf ("utf8_to_ucs4 ERROR1: %s", msg);
return -1;
}
msg_len = grub_utf8_to_ucs4 (*unicode_msg, msg_len,
(grub_uint8_t *) msg, -1, 0);
*last_position = *unicode_msg + msg_len;
return msg_len;
}