llama : remove notion of CLS token (#11064)

ggml-ci
This commit is contained in:
Georgi Gerganov 2025-01-12 12:15:53 +02:00 committed by GitHub
parent afa8a9ec9b
commit 08f10f69c3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 12 additions and 25 deletions

View file

@ -184,7 +184,6 @@ class Keys:
UNK_ID = "tokenizer.ggml.unknown_token_id"
SEP_ID = "tokenizer.ggml.seperator_token_id"
PAD_ID = "tokenizer.ggml.padding_token_id"
CLS_ID = "tokenizer.ggml.cls_token_id"
MASK_ID = "tokenizer.ggml.mask_token_id"
ADD_BOS = "tokenizer.ggml.add_bos_token"
ADD_EOS = "tokenizer.ggml.add_eos_token"
@ -1837,7 +1836,6 @@ KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
KEY_TOKENIZER_UNK_ID = Keys.Tokenizer.UNK_ID
KEY_TOKENIZER_SEP_ID = Keys.Tokenizer.SEP_ID
KEY_TOKENIZER_PAD_ID = Keys.Tokenizer.PAD_ID
KEY_TOKENIZER_CLS_ID = Keys.Tokenizer.CLS_ID
KEY_TOKENIZER_MASK_ID = Keys.Tokenizer.MASK_ID
KEY_TOKENIZER_HF_JSON = Keys.Tokenizer.HF_JSON
KEY_TOKENIZER_RWKV = Keys.Tokenizer.RWKV

View file

@ -857,9 +857,6 @@ class GGUFWriter:
def add_pad_token_id(self, id: int) -> None:
self.add_uint32(Keys.Tokenizer.PAD_ID, id)
def add_cls_token_id(self, id: int) -> None:
self.add_uint32(Keys.Tokenizer.CLS_ID, id)
def add_mask_token_id(self, id: int) -> None:
self.add_uint32(Keys.Tokenizer.MASK_ID, id)