llama : the WPM vocabs use the CLS token as BOS (#10930)

* llama : the WPM vocabs use the CLS token as BOS

ggml-ci

* llama : add comment
This commit is contained in:
Georgi Gerganov 2024-12-24 09:44:20 +02:00 committed by GitHub
parent 60cfa728e2
commit 30caac3a68
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 2 additions and 2 deletions

View file

@ -45,7 +45,7 @@ struct llama_vocab {
id special_unk_id = 0;
id special_sep_id = LLAMA_TOKEN_NULL;
id special_pad_id = LLAMA_TOKEN_NULL;
id special_cls_id = LLAMA_TOKEN_NULL;
id special_cls_id = LLAMA_TOKEN_NULL; // TODO: revisit if this is really needed https://github.com/ggerganov/llama.cpp/pull/10930
id special_mask_id = LLAMA_TOKEN_NULL;
id linefeed_id = 13;