account for space prefix character

This commit is contained in:
Sigbjørn Skjæret 2024-06-26 23:21:39 +02:00 committed by GitHub
parent c70d117c37
commit b2b9bd8cbf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -5152,10 +5152,10 @@ static void llm_load_vocab(
if (gen_name.find("code") != std::string::npos) { if (gen_name.find("code") != std::string::npos) {
if (model.arch == LLM_ARCH_LLAMA if (model.arch == LLM_ARCH_LLAMA
&& 32010 < vocab.id_to_token.size() && 32010 < vocab.id_to_token.size()
&& vocab.id_to_token[32007].text == "<PRE>" && vocab.id_to_token[32007].text.ends_with("<PRE>")
&& vocab.id_to_token[32008].text == "<SUF>" && vocab.id_to_token[32008].text.ends_with("<SUF>")
&& vocab.id_to_token[32009].text == "<MID>" && vocab.id_to_token[32009].text.ends_with("<MID>")
&& vocab.id_to_token[32010].text == "<EOT>") { && vocab.id_to_token[32010].text.ends_with("<EOT>")) {
vocab.special_prefix_id = 32007; vocab.special_prefix_id = 32007;
vocab.special_suffix_id = 32008; vocab.special_suffix_id = 32008;
vocab.special_middle_id = 32009; vocab.special_middle_id = 32009;