Correctly identify LF token for GPT-2 style BPE tokenizer
This commit is contained in:
parent
6171c9d258
commit
fe8d4df76b
1 changed files with 1 additions and 1 deletions
|
@ -1687,7 +1687,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
|
|||
GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
|
||||
linefeed_id = ids[0];
|
||||
} else {
|
||||
const std::vector<int> ids = tokenize("\xC4\x8A", false); // U+010A
|
||||
const std::vector<int> ids = tokenize("\n", false);
|
||||
|
||||
//GGML_ASSERT(!ids.empty() && "model vocab missing newline token");
|
||||
if (ids.empty()) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue