convert : use presence of tokenizer.json to determine StableLM tokenizer loader
It's a less arbitrary heuristic than the vocab size.
This commit is contained in:
parent
a11f1497ef
commit
b7b53a5ccc
1 changed files with 4 additions and 4 deletions
|
@ -929,11 +929,11 @@ class PersimmonModel(Model):
|
|||
|
||||
class StableLMModel(Model):
|
||||
def set_vocab(self):
|
||||
# StableLM 2 1.6B uses a vocab in a similar format to Qwen's vocab
|
||||
if self.hparams["vocab_size"] > 100000:
|
||||
self._set_vocab_qwen()
|
||||
else:
|
||||
if (self.dir_model / "tokenizer.json").is_file():
|
||||
self._set_vocab_gpt2()
|
||||
else:
|
||||
# StableLM 2 1.6B uses a vocab in a similar format to Qwen's vocab
|
||||
self._set_vocab_qwen()
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
hparams = self.hparams
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue