Minor code cleanups.
This commit is contained in:
parent
4079668cda
commit
22b914e0ba
1 changed files with 7 additions and 4 deletions
|
@ -2242,7 +2242,7 @@ static void llm_load_vocab(
|
||||||
|
|
||||||
// special tokens
|
// special tokens
|
||||||
{
|
{
|
||||||
const std::vector<std::tuple<enum llm_kv, int32_t &>> special_token_types = {
|
const std::vector<std::pair<enum llm_kv, int32_t &>> special_token_types = {
|
||||||
{ LLM_KV_TOKENIZER_BOS_ID, vocab.special_bos_id },
|
{ LLM_KV_TOKENIZER_BOS_ID, vocab.special_bos_id },
|
||||||
{ LLM_KV_TOKENIZER_EOS_ID, vocab.special_eos_id },
|
{ LLM_KV_TOKENIZER_EOS_ID, vocab.special_eos_id },
|
||||||
{ LLM_KV_TOKENIZER_UNK_ID, vocab.special_unk_id },
|
{ LLM_KV_TOKENIZER_UNK_ID, vocab.special_unk_id },
|
||||||
|
@ -2254,7 +2254,10 @@ static void llm_load_vocab(
|
||||||
int32_t & id = std::get<1>(it), old_id = id;
|
int32_t & id = std::get<1>(it), old_id = id;
|
||||||
|
|
||||||
GGUF_GET_KEY(ctx, id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, key);
|
GGUF_GET_KEY(ctx, id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, key);
|
||||||
if (id != -1 && (id < 0 || size_t(id) >= vocab.id_to_token.size())) {
|
// Must be >= -1 and < vocab size. Since the key is unsigned, -1
|
||||||
|
// can only come from the default value, so there's no point in
|
||||||
|
// validating that.
|
||||||
|
if (size_t(id + 1) > vocab.id_to_token.size()) {
|
||||||
LLAMA_LOG_WARN("%s: bad special token: '%s' = %d, using default id %d\n",
|
LLAMA_LOG_WARN("%s: bad special token: '%s' = %d, using default id %d\n",
|
||||||
__func__, key.c_str(), id, old_id);
|
__func__, key.c_str(), id, old_id);
|
||||||
id = old_id;
|
id = old_id;
|
||||||
|
@ -6101,7 +6104,7 @@ static uint8_t llama_token_to_byte(const llama_vocab& vocab, llama_token id) {
|
||||||
}
|
}
|
||||||
|
|
||||||
static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) {
|
static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch) {
|
||||||
const char * hex = "0123456789ABCDEF";
|
static const char * hex = "0123456789ABCDEF";
|
||||||
switch (llama_vocab_get_type(vocab)) {
|
switch (llama_vocab_get_type(vocab)) {
|
||||||
case LLAMA_VOCAB_TYPE_SPM: {
|
case LLAMA_VOCAB_TYPE_SPM: {
|
||||||
const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 };
|
const char buf[7] = { '<', '0', 'x', hex[ch >> 4], hex[ch & 15], '>', 0 };
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue