From b7b53a5ccc03c1b0f262df185e1c90705bd3f016 Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Sun, 21 Jan 2024 11:01:31 -0500 Subject: [PATCH] convert : use presence of tokenizer.json to determine StableLM tokenizer loader It's a less arbitrary heuristic than the vocab size. --- convert-hf-to-gguf.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py index 26934f1cc..2531580d4 100755 --- a/convert-hf-to-gguf.py +++ b/convert-hf-to-gguf.py @@ -929,11 +929,11 @@ class PersimmonModel(Model): class StableLMModel(Model): def set_vocab(self): - # StableLM 2 1.6B uses a vocab in a similar format to Qwen's vocab - if self.hparams["vocab_size"] > 100000: - self._set_vocab_qwen() - else: + if (self.dir_model / "tokenizer.json").is_file(): self._set_vocab_gpt2() + else: + # StableLM 2 1.6B uses a vocab in a similar format to Qwen's vocab + self._set_vocab_qwen() def set_gguf_parameters(self): hparams = self.hparams