efi/libstub: Get the exact UTF-8 length

efi_convert_cmdline currently overestimates the length of the equivalent
UTF-8 encoding.

snprintf can now be used to do the conversion to UTF-8, however, it does
not have a way to specify the size of the UTF-16 string, only the size
of the resulting UTF-8 string. So in order to use it, we need to
precalculate the exact UTF-8 size.

Signed-off-by: Arvind Sankar <nivedita@alum.mit.edu>
Link: https://lore.kernel.org/r/20200518190716.751506-24-nivedita@alum.mit.edu
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
This commit is contained in:
Arvind Sankar 2020-05-18 15:07:15 -04:00 committed by Ard Biesheuvel
parent a713979e44
commit 15c316bcbc

View file

@ -205,15 +205,6 @@ efi_status_t efi_parse_options(char const *cmdline)
return EFI_SUCCESS;
}
/*
* Get the number of UTF-8 bytes corresponding to an UTF-16 character.
* This overestimates for surrogates, but that is okay.
*/
static int efi_utf8_bytes(u16 c)
{
return 1 + (c >= 0x80) + (c >= 0x800);
}
/*
* Convert an UTF-16 string, not necessarily null terminated, to UTF-8.
*/
@ -274,10 +265,39 @@ char *efi_convert_cmdline(efi_loaded_image_t *image,
if (options) {
s2 = options;
while (*s2 && *s2 != '\n'
&& options_chars < load_options_chars) {
options_bytes += efi_utf8_bytes(*s2++);
while (options_chars < load_options_chars) {
u16 c = *s2++;
if (c == L'\0' || c == L'\n')
break;
/*
* Get the number of UTF-8 bytes corresponding to a
* UTF-16 character.
* The first part handles everything in the BMP.
*/
options_bytes += 1 + (c >= 0x80) + (c >= 0x800);
options_chars++;
/*
* Add one more byte for valid surrogate pairs. Invalid
* surrogates will be replaced with 0xfffd and take up
* only 3 bytes.
*/
if ((c & 0xfc00) == 0xd800) {
/*
* If the very last word is a high surrogate,
* we must ignore it since we can't access the
* low surrogate.
*/
if (options_chars == load_options_chars) {
options_bytes -= 3;
options_chars--;
break;
} else if ((*s2 & 0xfc00) == 0xdc00) {
options_bytes++;
options_chars++;
s2++;
}
}
}
}