Merge 6ed2f795ae
into 48baa61ecc
This commit is contained in:
commit
1a8d832c60
2 changed files with 88 additions and 82 deletions
|
@ -265,7 +265,13 @@ private:
|
||||||
// output any symbols that did not form tokens as bytes.
|
// output any symbols that did not form tokens as bytes.
|
||||||
output.reserve(output.size() + symbol.n);
|
output.reserve(output.size() + symbol.n);
|
||||||
for (int j = 0; j < (int)symbol.n; ++j) {
|
for (int j = 0; j < (int)symbol.n; ++j) {
|
||||||
llama_vocab::id token_id = llama_byte_to_token_impl(vocab, symbol.text[j]);
|
llama_vocab::id token_id;
|
||||||
|
try {
|
||||||
|
token_id = llama_byte_to_token_impl(vocab, symbol.text[j]);
|
||||||
|
} catch(const std::exception & e) {
|
||||||
|
// not found, use UNK token instead.
|
||||||
|
token_id = vocab.special_unk_id;
|
||||||
|
}
|
||||||
output.push_back(token_id);
|
output.push_back(token_id);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
|
118
src/llama.cpp
118
src/llama.cpp
|
@ -6268,64 +6268,6 @@ static void llm_load_vocab(
|
||||||
}
|
}
|
||||||
GGML_ASSERT(vocab.id_to_token.size() == vocab.token_to_id.size());
|
GGML_ASSERT(vocab.id_to_token.size() == vocab.token_to_id.size());
|
||||||
|
|
||||||
// determine the newline token: LLaMA "<0x0A>" == 10 == '\n', Falcon 193 == '\n'
|
|
||||||
if (vocab.type == LLAMA_VOCAB_TYPE_SPM) {
|
|
||||||
// For Fill-In-the-Middle (FIM)/infill models which where converted
|
|
||||||
// prior to support of FIM special tokens in GGUF, the following
|
|
||||||
// will allow those models to continue to work. The general names
|
|
||||||
// of the known models are currently CodeLlama (LLM_ARCH_LLAMA) and
|
|
||||||
// CodeGemma (LLM_ARCH_GEMMA). This can potentially be removed once
|
|
||||||
// new versions of these models have been published.
|
|
||||||
std::string gen_name;
|
|
||||||
ml.get_key(LLM_KV_GENERAL_NAME, gen_name, false);
|
|
||||||
|
|
||||||
std::transform(gen_name.begin(), gen_name.end(), gen_name.begin(),
|
|
||||||
[](unsigned char c){ return std::tolower(c); });
|
|
||||||
|
|
||||||
if (gen_name.find("code") != std::string::npos) {
|
|
||||||
if (model.arch == LLM_ARCH_LLAMA
|
|
||||||
&& 32010 < vocab.id_to_token.size()
|
|
||||||
&& vocab.id_to_token[32007].text.find("<PRE>") != std::string::npos
|
|
||||||
&& vocab.id_to_token[32008].text.find("<SUF>") != std::string::npos
|
|
||||||
&& vocab.id_to_token[32009].text.find("<MID>") != std::string::npos
|
|
||||||
&& vocab.id_to_token[32010].text.find("<EOT>") != std::string::npos) {
|
|
||||||
vocab.special_prefix_id = 32007;
|
|
||||||
vocab.special_suffix_id = 32008;
|
|
||||||
vocab.special_middle_id = 32009;
|
|
||||||
vocab.special_eot_id = 32010;
|
|
||||||
} else if (model.arch == LLM_ARCH_GEMMA
|
|
||||||
&& 107 < vocab.id_to_token.size()
|
|
||||||
&& vocab.id_to_token[67].text == "<|fim_prefix|>"
|
|
||||||
&& vocab.id_to_token[69].text == "<|fim_suffix|>"
|
|
||||||
&& vocab.id_to_token[68].text == "<|fim_middle|>"
|
|
||||||
&& vocab.id_to_token[107].text == "<end_of_turn>") {
|
|
||||||
vocab.special_prefix_id = 67;
|
|
||||||
vocab.special_suffix_id = 69;
|
|
||||||
vocab.special_middle_id = 68;
|
|
||||||
// TODO: this is not EOT, it is "file separator" token, needs fix
|
|
||||||
// https://huggingface.co/google/codegemma-7b-it/blob/9b1d9231388358c04d90bd003458f5070d97db44/tokenizer_config.json#L565-L572
|
|
||||||
//vocab.special_eot_id = 70;
|
|
||||||
vocab.special_eot_id = 107;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
vocab.linefeed_id = llama_byte_to_token_impl(vocab, '\n');
|
|
||||||
} catch (const std::exception & e) {
|
|
||||||
LLAMA_LOG_WARN("%s: SPM vocabulary, but newline token not found: %s! Using special_pad_id instead.", __func__, e.what());
|
|
||||||
vocab.linefeed_id = vocab.special_pad_id;
|
|
||||||
}
|
|
||||||
} else if (vocab.type == LLAMA_VOCAB_TYPE_WPM) {
|
|
||||||
vocab.linefeed_id = vocab.special_pad_id;
|
|
||||||
} else if (vocab.type == LLAMA_VOCAB_TYPE_RWKV) {
|
|
||||||
const std::vector<int> ids = llama_tokenize_internal(vocab, "\n", false);
|
|
||||||
GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
|
|
||||||
vocab.linefeed_id = ids[0];
|
|
||||||
} else {
|
|
||||||
const std::vector<int> ids = llama_tokenize_internal(vocab, "\xC4\x8A", false); // U+010A
|
|
||||||
GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
|
|
||||||
vocab.linefeed_id = ids[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
// special tokens
|
// special tokens
|
||||||
{
|
{
|
||||||
const std::vector<std::pair<enum llm_kv, int32_t &>> special_token_types = {
|
const std::vector<std::pair<enum llm_kv, int32_t &>> special_token_types = {
|
||||||
|
@ -6406,6 +6348,64 @@ static void llm_load_vocab(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// determine the newline token: LLaMA "<0x0A>" == 10 == '\n', Falcon 193 == '\n'
|
||||||
|
if (vocab.type == LLAMA_VOCAB_TYPE_SPM) {
|
||||||
|
// For Fill-In-the-Middle (FIM)/infill models which where converted
|
||||||
|
// prior to support of FIM special tokens in GGUF, the following
|
||||||
|
// will allow those models to continue to work. The general names
|
||||||
|
// of the known models are currently CodeLlama (LLM_ARCH_LLAMA) and
|
||||||
|
// CodeGemma (LLM_ARCH_GEMMA). This can potentially be removed once
|
||||||
|
// new versions of these models have been published.
|
||||||
|
std::string gen_name;
|
||||||
|
ml.get_key(LLM_KV_GENERAL_NAME, gen_name, false);
|
||||||
|
|
||||||
|
std::transform(gen_name.begin(), gen_name.end(), gen_name.begin(),
|
||||||
|
[](unsigned char c){ return std::tolower(c); });
|
||||||
|
|
||||||
|
if (gen_name.find("code") != std::string::npos) {
|
||||||
|
if (model.arch == LLM_ARCH_LLAMA
|
||||||
|
&& 32010 < vocab.id_to_token.size()
|
||||||
|
&& vocab.id_to_token[32007].text.find("<PRE>") != std::string::npos
|
||||||
|
&& vocab.id_to_token[32008].text.find("<SUF>") != std::string::npos
|
||||||
|
&& vocab.id_to_token[32009].text.find("<MID>") != std::string::npos
|
||||||
|
&& vocab.id_to_token[32010].text.find("<EOT>") != std::string::npos) {
|
||||||
|
vocab.special_prefix_id = 32007;
|
||||||
|
vocab.special_suffix_id = 32008;
|
||||||
|
vocab.special_middle_id = 32009;
|
||||||
|
vocab.special_eot_id = 32010;
|
||||||
|
} else if (model.arch == LLM_ARCH_GEMMA
|
||||||
|
&& 107 < vocab.id_to_token.size()
|
||||||
|
&& vocab.id_to_token[67].text == "<|fim_prefix|>"
|
||||||
|
&& vocab.id_to_token[69].text == "<|fim_suffix|>"
|
||||||
|
&& vocab.id_to_token[68].text == "<|fim_middle|>"
|
||||||
|
&& vocab.id_to_token[107].text == "<end_of_turn>") {
|
||||||
|
vocab.special_prefix_id = 67;
|
||||||
|
vocab.special_suffix_id = 69;
|
||||||
|
vocab.special_middle_id = 68;
|
||||||
|
// TODO: this is not EOT, it is "file separator" token, needs fix
|
||||||
|
// https://huggingface.co/google/codegemma-7b-it/blob/9b1d9231388358c04d90bd003458f5070d97db44/tokenizer_config.json#L565-L572
|
||||||
|
//vocab.special_eot_id = 70;
|
||||||
|
vocab.special_eot_id = 107;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
vocab.linefeed_id = llama_byte_to_token_impl(vocab, '\n');
|
||||||
|
} catch (const std::exception & e) {
|
||||||
|
LLAMA_LOG_WARN("%s: SPM vocabulary, but newline token not found: %s! Using special_pad_id instead.", __func__, e.what());
|
||||||
|
vocab.linefeed_id = vocab.special_pad_id;
|
||||||
|
}
|
||||||
|
} else if (vocab.type == LLAMA_VOCAB_TYPE_WPM) {
|
||||||
|
vocab.linefeed_id = vocab.special_pad_id;
|
||||||
|
} else if (vocab.type == LLAMA_VOCAB_TYPE_RWKV) {
|
||||||
|
const std::vector<int> ids = llama_tokenize_internal(vocab, "\n", false);
|
||||||
|
GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
|
||||||
|
vocab.linefeed_id = ids[0];
|
||||||
|
} else {
|
||||||
|
const std::vector<int> ids = llama_tokenize_internal(vocab, "\xC4\x8A", false); // U+010A
|
||||||
|
GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
|
||||||
|
vocab.linefeed_id = ids[0];
|
||||||
|
}
|
||||||
|
|
||||||
// build special tokens cache
|
// build special tokens cache
|
||||||
{
|
{
|
||||||
for (llama_vocab::id id = 0; id < (llama_vocab::id)n_vocab; ++id) {
|
for (llama_vocab::id id = 0; id < (llama_vocab::id)n_vocab; ++id) {
|
||||||
|
@ -18111,7 +18111,7 @@ struct llama_context * llama_new_context_with_model(
|
||||||
cparams.n_batch = GGML_KQ_MASK_PAD;
|
cparams.n_batch = GGML_KQ_MASK_PAD;
|
||||||
}
|
}
|
||||||
|
|
||||||
cparams.n_ubatch = std::min(cparams.n_batch, params.n_ubatch == 0 ? params.n_batch : params.n_ubatch);
|
cparams.n_ubatch = hparams.causal_attn ? std::min(cparams.n_batch, params.n_ubatch == 0 ? params.n_batch : params.n_ubatch) : cparams.n_batch;
|
||||||
|
|
||||||
cparams.n_ctx_orig_yarn = params.yarn_orig_ctx != 0 ? params.yarn_orig_ctx :
|
cparams.n_ctx_orig_yarn = params.yarn_orig_ctx != 0 ? params.yarn_orig_ctx :
|
||||||
hparams.n_ctx_orig_yarn != 0 ? hparams.n_ctx_orig_yarn :
|
hparams.n_ctx_orig_yarn != 0 ? hparams.n_ctx_orig_yarn :
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue