llama.cpp : fix LF token
This commit is contained in:
parent
c767746399
commit
ab3ba64f62
1 changed files with 6 additions and 1 deletions
|
@ -1636,6 +1636,7 @@ static void llm_load_hparams(
|
||||||
|
|
||||||
// TODO: This should probably be in llama.h
|
// TODO: This should probably be in llama.h
|
||||||
static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab & vocab, std::string raw_text, bool bos);
|
static std::vector<llama_vocab::id> llama_tokenize_internal(const llama_vocab & vocab, std::string raw_text, bool bos);
|
||||||
|
static llama_token llama_byte_to_token(const llama_vocab & vocab, uint8_t ch);
|
||||||
|
|
||||||
static void llm_load_vocab(
|
static void llm_load_vocab(
|
||||||
llama_model_loader & ml,
|
llama_model_loader & ml,
|
||||||
|
@ -1737,7 +1738,11 @@ static void llm_load_vocab(
|
||||||
}
|
}
|
||||||
|
|
||||||
// determine the newline token: LLaMA "<0x0A>" == 10 == '\n', Falcon 193 == '\n'
|
// determine the newline token: LLaMA "<0x0A>" == 10 == '\n', Falcon 193 == '\n'
|
||||||
|
if (vocab.type == LLAMA_VOCAB_TYPE_SPM) {
|
||||||
|
vocab.linefeed_id = llama_byte_to_token(vocab, '\n');
|
||||||
|
} else {
|
||||||
vocab.linefeed_id = llama_tokenize_internal(vocab, "\n", false)[0];
|
vocab.linefeed_id = llama_tokenize_internal(vocab, "\n", false)[0];
|
||||||
|
}
|
||||||
|
|
||||||
// special tokens
|
// special tokens
|
||||||
GGUF_GET_KEY(ctx, vocab.special_bos_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_TOKENIZER_BOS_ID));
|
GGUF_GET_KEY(ctx, vocab.special_bos_id, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_TOKENIZER_BOS_ID));
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue