grub/include/grub/charset.h
Vladimir 'phcoder' Serbinenko 4d8c476536 Avoid cutting in the middle of UTF-8 string.
* include/grub/charset.h (grub_getend): New function.
	* grub-core/script/function.c (grub_script_function_find): Use
	grub_getend.
	* grub-core/normal/completion.c (add_completion): Likewise.
2011-12-25 16:11:41 +01:00

197 lines
5.4 KiB
C

/*
* GRUB -- GRand Unified Bootloader
* Copyright (C) 1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009 Free Software Foundation, Inc.
*
* GRUB is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* GRUB is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GRUB. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef GRUB_CHARSET_HEADER
#define GRUB_CHARSET_HEADER 1
#include <grub/types.h>
#define GRUB_UINT8_1_LEADINGBIT 0x80
#define GRUB_UINT8_2_LEADINGBITS 0xc0
#define GRUB_UINT8_3_LEADINGBITS 0xe0
#define GRUB_UINT8_4_LEADINGBITS 0xf0
#define GRUB_UINT8_5_LEADINGBITS 0xf8
#define GRUB_UINT8_6_LEADINGBITS 0xfc
#define GRUB_UINT8_7_LEADINGBITS 0xfe
#define GRUB_UINT8_1_TRAILINGBIT 0x01
#define GRUB_UINT8_2_TRAILINGBITS 0x03
#define GRUB_UINT8_3_TRAILINGBITS 0x07
#define GRUB_UINT8_4_TRAILINGBITS 0x0f
#define GRUB_UINT8_5_TRAILINGBITS 0x1f
#define GRUB_UINT8_6_TRAILINGBITS 0x3f
#define GRUB_MAX_UTF8_PER_UTF16 4
#define GRUB_UCS2_LIMIT 0x10000
#define GRUB_UTF16_UPPER_SURROGATE(code) \
(0xD800 + ((((code) - GRUB_UCS2_LIMIT) >> 12) & 0xfff))
#define GRUB_UTF16_LOWER_SURROGATE(code) \
(0xDC00 + (((code) - GRUB_UCS2_LIMIT) & 0xfff))
grub_ssize_t
grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
const grub_uint8_t *src, grub_size_t srcsize,
const grub_uint8_t **srcend);
/* Determine the last position where the UTF-8 string [beg, end) can
be safely cut. */
static inline grub_size_t
grub_getend (const char *beg, const char *end)
{
const char *ptr;
for (ptr = end - 1; ptr >= beg; ptr--)
if ((*ptr & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT)
break;
if (ptr < beg)
return 0;
if ((*ptr & GRUB_UINT8_1_LEADINGBIT) == 0)
return ptr + 1 - beg;
if ((*ptr & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS
&& ptr + 2 <= end)
return ptr + 2 - beg;
if ((*ptr & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS
&& ptr + 3 <= end)
return ptr + 3 - beg;
if ((*ptr & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS
&& ptr + 4 <= end)
return ptr + 4 - beg;
/* Invalid character or incomplete. Cut before it. */
return ptr - beg;
}
/* Convert UTF-16 to UTF-8. */
static inline grub_uint8_t *
grub_utf16_to_utf8 (grub_uint8_t *dest, const grub_uint16_t *src,
grub_size_t size)
{
grub_uint32_t code_high = 0;
while (size--)
{
grub_uint32_t code = *src++;
if (code_high)
{
if (code >= 0xDC00 && code <= 0xDFFF)
{
/* Surrogate pair. */
code = ((code_high - 0xD800) << 12) + (code - 0xDC00) + 0x10000;
*dest++ = (code >> 18) | 0xF0;
*dest++ = ((code >> 12) & 0x3F) | 0x80;
*dest++ = ((code >> 6) & 0x3F) | 0x80;
*dest++ = (code & 0x3F) | 0x80;
}
else
{
/* Error... */
*dest++ = '?';
/* *src may be valid. Don't eat it. */
src--;
}
code_high = 0;
}
else
{
if (code <= 0x007F)
*dest++ = code;
else if (code <= 0x07FF)
{
*dest++ = (code >> 6) | 0xC0;
*dest++ = (code & 0x3F) | 0x80;
}
else if (code >= 0xD800 && code <= 0xDBFF)
{
code_high = code;
continue;
}
else if (code >= 0xDC00 && code <= 0xDFFF)
{
/* Error... */
*dest++ = '?';
}
else if (code < 0x10000)
{
*dest++ = (code >> 12) | 0xE0;
*dest++ = ((code >> 6) & 0x3F) | 0x80;
*dest++ = (code & 0x3F) | 0x80;
}
else
{
*dest++ = (code >> 18) | 0xF0;
*dest++ = ((code >> 12) & 0x3F) | 0x80;
*dest++ = ((code >> 6) & 0x3F) | 0x80;
*dest++ = (code & 0x3F) | 0x80;
}
}
}
return dest;
}
#define GRUB_MAX_UTF8_PER_LATIN1 2
/* Convert Latin1 to UTF-8. */
static inline grub_uint8_t *
grub_latin1_to_utf8 (grub_uint8_t *dest, const grub_uint8_t *src,
grub_size_t size)
{
while (size--)
{
if (!(*src & 0x80))
*dest++ = *src;
else
{
*dest++ = (*src >> 6) | 0xC0;
*dest++ = (*src & 0x3F) | 0x80;
}
src++;
}
return dest;
}
/* Convert UCS-4 to UTF-8. */
char *grub_ucs4_to_utf8_alloc (const grub_uint32_t *src, grub_size_t size);
int
grub_is_valid_utf8 (const grub_uint8_t *src, grub_size_t srcsize);
int grub_utf8_to_ucs4_alloc (const char *msg, grub_uint32_t **unicode_msg,
grub_uint32_t **last_position);
/* Process one character from UTF8 sequence.
At beginning set *code = 0, *count = 0. Returns 0 on failure and
1 on success. *count holds the number of trailing bytes. */
int
grub_utf8_process (grub_uint8_t c, grub_uint32_t *code, int *count);
void
grub_ucs4_to_utf8 (const grub_uint32_t *src, grub_size_t size,
grub_uint8_t *dest, grub_size_t destsize);
grub_size_t grub_utf8_to_ucs4 (grub_uint32_t *dest, grub_size_t destsize,
const grub_uint8_t *src, grub_size_t srcsize,
const grub_uint8_t **srcend);
/* Returns -2 if not enough space, -1 on invalid character. */
grub_ssize_t
grub_encode_utf8_character (grub_uint8_t *dest, grub_uint8_t *destend,
grub_uint32_t code);
#endif