* include/grub/efi/efi.h: Likewise. * include/grub/efi/api.h: Add guid for EFI-specified variables. * include/grub/charset.h (GRUB_MAX_UTF16_PER_UTF8): New definition. * grub-core/normal/charset.c (grub_utf8_process): Move from here ... * include/grub/charset.h (grub_utf8_process): ... to here. Inline. * grub-core/normal/charset.c (grub_utf8_to_utf16): Move from here ... * include/grub/charset.h (grub_utf8_to_utf16): ... to here. Inline.
		
			
				
	
	
		
			296 lines
		
	
	
	
		
			7.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			296 lines
		
	
	
	
		
			7.6 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  *  GRUB  --  GRand Unified Bootloader
 | |
|  *  Copyright (C) 1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009  Free Software Foundation, Inc.
 | |
|  *
 | |
|  *  GRUB is free software: you can redistribute it and/or modify
 | |
|  *  it under the terms of the GNU General Public License as published by
 | |
|  *  the Free Software Foundation, either version 3 of the License, or
 | |
|  *  (at your option) any later version.
 | |
|  *
 | |
|  *  GRUB is distributed in the hope that it will be useful,
 | |
|  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | |
|  *  GNU General Public License for more details.
 | |
|  *
 | |
|  *  You should have received a copy of the GNU General Public License
 | |
|  *  along with GRUB.  If not, see <http://www.gnu.org/licenses/>.
 | |
|  */
 | |
| 
 | |
| #ifndef GRUB_CHARSET_HEADER
 | |
| #define GRUB_CHARSET_HEADER	1
 | |
| 
 | |
| #include <grub/types.h>
 | |
| 
 | |
| #define GRUB_UINT8_1_LEADINGBIT 0x80
 | |
| #define GRUB_UINT8_2_LEADINGBITS 0xc0
 | |
| #define GRUB_UINT8_3_LEADINGBITS 0xe0
 | |
| #define GRUB_UINT8_4_LEADINGBITS 0xf0
 | |
| #define GRUB_UINT8_5_LEADINGBITS 0xf8
 | |
| #define GRUB_UINT8_6_LEADINGBITS 0xfc
 | |
| #define GRUB_UINT8_7_LEADINGBITS 0xfe
 | |
| 
 | |
| #define GRUB_UINT8_1_TRAILINGBIT 0x01
 | |
| #define GRUB_UINT8_2_TRAILINGBITS 0x03
 | |
| #define GRUB_UINT8_3_TRAILINGBITS 0x07
 | |
| #define GRUB_UINT8_4_TRAILINGBITS 0x0f
 | |
| #define GRUB_UINT8_5_TRAILINGBITS 0x1f
 | |
| #define GRUB_UINT8_6_TRAILINGBITS 0x3f
 | |
| 
 | |
| #define GRUB_MAX_UTF8_PER_UTF16 4
 | |
| /* You need at least one UTF-8 byte to have one UTF-16 word.
 | |
|    You need at least three UTF-8 bytes to have 2 UTF-16 words (surrogate pairs).
 | |
|  */
 | |
| #define GRUB_MAX_UTF16_PER_UTF8 1
 | |
| 
 | |
| #define GRUB_UCS2_LIMIT 0x10000
 | |
| #define GRUB_UTF16_UPPER_SURROGATE(code) \
 | |
|   (0xD800 + ((((code) - GRUB_UCS2_LIMIT) >> 12) & 0xfff))
 | |
| #define GRUB_UTF16_LOWER_SURROGATE(code) \
 | |
|   (0xDC00 + (((code) - GRUB_UCS2_LIMIT) & 0xfff))
 | |
| 
 | |
| /* Process one character from UTF8 sequence. 
 | |
|    At beginning set *code = 0, *count = 0. Returns 0 on failure and
 | |
|    1 on success. *count holds the number of trailing bytes.  */
 | |
| static inline int
 | |
| grub_utf8_process (grub_uint8_t c, grub_uint32_t *code, int *count)
 | |
| {
 | |
|   if (*count)
 | |
|     {
 | |
|       if ((c & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT)
 | |
| 	{
 | |
| 	  *count = 0;
 | |
| 	  /* invalid */
 | |
| 	  return 0;
 | |
| 	}
 | |
|       else
 | |
| 	{
 | |
| 	  *code <<= 6;
 | |
| 	  *code |= (c & GRUB_UINT8_6_TRAILINGBITS);
 | |
| 	  (*count)--;
 | |
| 	  return 1;
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|   if ((c & GRUB_UINT8_1_LEADINGBIT) == 0)
 | |
|     {
 | |
|       *code = c;
 | |
|       return 1;
 | |
|     }
 | |
|   if ((c & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS)
 | |
|     {
 | |
|       *count = 1;
 | |
|       *code = c & GRUB_UINT8_5_TRAILINGBITS;
 | |
|       return 1;
 | |
|     }
 | |
|   if ((c & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS)
 | |
|     {
 | |
|       *count = 2;
 | |
|       *code = c & GRUB_UINT8_4_TRAILINGBITS;
 | |
|       return 1;
 | |
|     }
 | |
|   if ((c & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS)
 | |
|     {
 | |
|       *count = 3;
 | |
|       *code = c & GRUB_UINT8_3_TRAILINGBITS;
 | |
|       return 1;
 | |
|     }
 | |
|   return 0;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* Convert a (possibly null-terminated) UTF-8 string of at most SRCSIZE
 | |
|    bytes (if SRCSIZE is -1, it is ignored) in length to a UTF-16 string.
 | |
|    Return the number of characters converted. DEST must be able to hold
 | |
|    at least DESTSIZE characters. If an invalid sequence is found, return -1.
 | |
|    If SRCEND is not NULL, then *SRCEND is set to the next byte after the
 | |
|    last byte used in SRC.  */
 | |
| static inline grub_size_t
 | |
| grub_utf8_to_utf16 (grub_uint16_t *dest, grub_size_t destsize,
 | |
| 		    const grub_uint8_t *src, grub_size_t srcsize,
 | |
| 		    const grub_uint8_t **srcend)
 | |
| {
 | |
|   grub_uint16_t *p = dest;
 | |
|   int count = 0;
 | |
|   grub_uint32_t code = 0;
 | |
| 
 | |
|   if (srcend)
 | |
|     *srcend = src;
 | |
| 
 | |
|   while (srcsize && destsize)
 | |
|     {
 | |
|       int was_count = count;
 | |
|       if (srcsize != (grub_size_t)-1)
 | |
| 	srcsize--;
 | |
|       if (!grub_utf8_process (*src++, &code, &count))
 | |
| 	{
 | |
| 	  code = '?';
 | |
| 	  count = 0;
 | |
| 	  /* Character c may be valid, don't eat it.  */
 | |
| 	  if (was_count)
 | |
| 	    src--;
 | |
| 	}
 | |
|       if (count != 0)
 | |
| 	continue;
 | |
|       if (code == 0)
 | |
| 	break;
 | |
|       if (destsize < 2 && code >= GRUB_UCS2_LIMIT)
 | |
| 	break;
 | |
|       if (code >= GRUB_UCS2_LIMIT)
 | |
| 	{
 | |
| 	  *p++ = GRUB_UTF16_UPPER_SURROGATE (code);
 | |
| 	  *p++ = GRUB_UTF16_LOWER_SURROGATE (code);
 | |
| 	  destsize -= 2;
 | |
| 	}
 | |
|       else
 | |
| 	{
 | |
| 	  *p++ = code;
 | |
| 	  destsize--;
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|   if (srcend)
 | |
|     *srcend = src;
 | |
|   return p - dest;
 | |
| }
 | |
| 
 | |
| /* Determine the last position where the UTF-8 string [beg, end) can
 | |
|    be safely cut. */
 | |
| static inline grub_size_t
 | |
| grub_getend (const char *beg, const char *end)
 | |
| {
 | |
|   const char *ptr;
 | |
|   for (ptr = end - 1; ptr >= beg; ptr--)
 | |
|     if ((*ptr & GRUB_UINT8_2_LEADINGBITS) != GRUB_UINT8_1_LEADINGBIT)
 | |
|       break;
 | |
|   if (ptr < beg)
 | |
|     return 0;
 | |
|   if ((*ptr & GRUB_UINT8_1_LEADINGBIT) == 0)
 | |
|     return ptr + 1 - beg;
 | |
|   if ((*ptr & GRUB_UINT8_3_LEADINGBITS) == GRUB_UINT8_2_LEADINGBITS
 | |
|       && ptr + 2 <= end)
 | |
|     return ptr + 2 - beg;
 | |
|   if ((*ptr & GRUB_UINT8_4_LEADINGBITS) == GRUB_UINT8_3_LEADINGBITS
 | |
|       && ptr + 3 <= end)
 | |
|     return ptr + 3 - beg;
 | |
|   if ((*ptr & GRUB_UINT8_5_LEADINGBITS) == GRUB_UINT8_4_LEADINGBITS
 | |
|       && ptr + 4 <= end)
 | |
|     return ptr + 4 - beg;
 | |
|   /* Invalid character or incomplete. Cut before it.  */
 | |
|   return ptr - beg;
 | |
| }
 | |
| 
 | |
| /* Convert UTF-16 to UTF-8.  */
 | |
| static inline grub_uint8_t *
 | |
| grub_utf16_to_utf8 (grub_uint8_t *dest, const grub_uint16_t *src,
 | |
| 		    grub_size_t size)
 | |
| {
 | |
|   grub_uint32_t code_high = 0;
 | |
| 
 | |
|   while (size--)
 | |
|     {
 | |
|       grub_uint32_t code = *src++;
 | |
| 
 | |
|       if (code_high)
 | |
| 	{
 | |
| 	  if (code >= 0xDC00 && code <= 0xDFFF)
 | |
| 	    {
 | |
| 	      /* Surrogate pair.  */
 | |
| 	      code = ((code_high - 0xD800) << 12) + (code - 0xDC00) + 0x10000;
 | |
| 
 | |
| 	      *dest++ = (code >> 18) | 0xF0;
 | |
| 	      *dest++ = ((code >> 12) & 0x3F) | 0x80;
 | |
| 	      *dest++ = ((code >> 6) & 0x3F) | 0x80;
 | |
| 	      *dest++ = (code & 0x3F) | 0x80;
 | |
| 	    }
 | |
| 	  else
 | |
| 	    {
 | |
| 	      /* Error...  */
 | |
| 	      *dest++ = '?';
 | |
| 	      /* *src may be valid. Don't eat it.  */
 | |
| 	      src--;
 | |
| 	    }
 | |
| 
 | |
| 	  code_high = 0;
 | |
| 	}
 | |
|       else
 | |
| 	{
 | |
| 	  if (code <= 0x007F)
 | |
| 	    *dest++ = code;
 | |
| 	  else if (code <= 0x07FF)
 | |
| 	    {
 | |
| 	      *dest++ = (code >> 6) | 0xC0;
 | |
| 	      *dest++ = (code & 0x3F) | 0x80;
 | |
| 	    }
 | |
| 	  else if (code >= 0xD800 && code <= 0xDBFF)
 | |
| 	    {
 | |
| 	      code_high = code;
 | |
| 	      continue;
 | |
| 	    }
 | |
| 	  else if (code >= 0xDC00 && code <= 0xDFFF)
 | |
| 	    {
 | |
| 	      /* Error... */
 | |
| 	      *dest++ = '?';
 | |
| 	    }
 | |
| 	  else if (code < 0x10000)
 | |
| 	    {
 | |
| 	      *dest++ = (code >> 12) | 0xE0;
 | |
| 	      *dest++ = ((code >> 6) & 0x3F) | 0x80;
 | |
| 	      *dest++ = (code & 0x3F) | 0x80;
 | |
| 	    }
 | |
| 	  else
 | |
| 	    {
 | |
| 	      *dest++ = (code >> 18) | 0xF0;
 | |
| 	      *dest++ = ((code >> 12) & 0x3F) | 0x80;
 | |
| 	      *dest++ = ((code >> 6) & 0x3F) | 0x80;
 | |
| 	      *dest++ = (code & 0x3F) | 0x80;
 | |
| 	    }
 | |
| 	}
 | |
|     }
 | |
| 
 | |
|   return dest;
 | |
| }
 | |
| 
 | |
| #define GRUB_MAX_UTF8_PER_LATIN1 2
 | |
| 
 | |
| /* Convert Latin1 to UTF-8.  */
 | |
| static inline grub_uint8_t *
 | |
| grub_latin1_to_utf8 (grub_uint8_t *dest, const grub_uint8_t *src,
 | |
| 		     grub_size_t size)
 | |
| {
 | |
|   while (size--)
 | |
|     {
 | |
|       if (!(*src & 0x80))
 | |
| 	*dest++ = *src;
 | |
|       else
 | |
| 	{
 | |
| 	  *dest++ = (*src >> 6) | 0xC0;
 | |
| 	  *dest++ = (*src & 0x3F) | 0x80;
 | |
| 	}
 | |
|       src++;
 | |
|     }
 | |
| 
 | |
|   return dest;
 | |
| }
 | |
| 
 | |
| /* Convert UCS-4 to UTF-8.  */
 | |
| char *grub_ucs4_to_utf8_alloc (const grub_uint32_t *src, grub_size_t size);
 | |
| 
 | |
| int
 | |
| grub_is_valid_utf8 (const grub_uint8_t *src, grub_size_t srcsize);
 | |
| 
 | |
| grub_ssize_t grub_utf8_to_ucs4_alloc (const char *msg,
 | |
| 				      grub_uint32_t **unicode_msg,
 | |
| 				      grub_uint32_t **last_position);
 | |
| 
 | |
| void
 | |
| grub_ucs4_to_utf8 (const grub_uint32_t *src, grub_size_t size,
 | |
| 		   grub_uint8_t *dest, grub_size_t destsize);
 | |
| grub_size_t grub_utf8_to_ucs4 (grub_uint32_t *dest, grub_size_t destsize,
 | |
| 			       const grub_uint8_t *src, grub_size_t srcsize,
 | |
| 			       const grub_uint8_t **srcend);
 | |
| /* Returns -2 if not enough space, -1 on invalid character.  */
 | |
| grub_ssize_t
 | |
| grub_encode_utf8_character (grub_uint8_t *dest, grub_uint8_t *destend,
 | |
| 			    grub_uint32_t code);
 | |
| 
 | |
| #endif
 |