diff --git a/convert-hf-to-gguf.py b/convert-hf-to-gguf.py
index 91266185f..b8ec48835 100755
--- a/convert-hf-to-gguf.py
+++ b/convert-hf-to-gguf.py
@@ -31,7 +31,7 @@ if TYPE_CHECKING:
     from torch import Tensor
 
 if 'NO_LOCAL_GGUF' not in os.environ:
-    sys.path.insert(1, str(Path(__file__).parent / 'gguf-py'))
+    sys.path.insert(1, str(Path('gguf-py')))
 import gguf
 
 from convert import LlamaHfVocab
@@ -375,10 +375,13 @@ class Model:
         from transformers import AutoTokenizer
         tokenizer = AutoTokenizer.from_pretrained(self.dir_model)
         vocab_size = self.hparams.get("vocab_size", len(tokenizer.vocab))
-        assert tokenizer.vocab_size == vocab_size
+        assert max(tokenizer.vocab.values()) < vocab_size
+
+        tokpre = self.get_vocab_base_pre(tokenizer)
 
         reverse_vocab = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()}
         added_vocab = tokenizer.get_added_vocab()
+
         for i in range(vocab_size):
             if i not in reverse_vocab:
                 tokens.append(f"[PAD{i}]")
@@ -393,7 +396,6 @@ class Model:
                 tokens.append(reverse_vocab[i])
                 toktypes.append(gguf.TokenType.NORMAL)
 
-        tokpre = self.get_vocab_base_pre(tokenizer)
         return tokens, toktypes, tokpre
 
     # NOTE: this function is generated by convert-hf-to-gguf-update.py
@@ -2439,7 +2441,7 @@ class ArcticModel(Model):
 
         tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
         scores: list[float] = [-10000.0] * vocab_size
-        toktypes: list[int] = [SentencePieceTokenTypes.UNKNOWN] * vocab_size
+        toktypes: list[int] = [gguf.TokenType.UNKNOWN] * vocab_size
 
         for token_id in range(tokenizer.vocab_size()):
 
@@ -2447,15 +2449,15 @@ class ArcticModel(Model):
             text = piece.encode("utf-8")
             score = tokenizer.GetScore(token_id)
 
-            toktype = SentencePieceTokenTypes.NORMAL
+            toktype = gguf.TokenType.NORMAL
             if tokenizer.IsUnknown(token_id):
-                toktype = SentencePieceTokenTypes.UNKNOWN
+                toktype = gguf.TokenType.UNKNOWN
             elif tokenizer.IsControl(token_id):
-                toktype = SentencePieceTokenTypes.CONTROL
+                toktype = gguf.TokenType.CONTROL
             elif tokenizer.IsUnused(token_id):
-                toktype = SentencePieceTokenTypes.UNUSED
+                toktype = gguf.TokenType.UNUSED
             elif tokenizer.IsByte(token_id):
-                toktype = SentencePieceTokenTypes.BYTE
+                toktype = gguf.TokenType.BYTE
 
             tokens[token_id] = text
             scores[token_id] = score
@@ -2477,16 +2479,16 @@ class ArcticModel(Model):
                             continue
 
                         token_content = token_json["content"]
-                        token_type = SentencePieceTokenTypes.USER_DEFINED
+                        token_type = gguf.TokenType.USER_DEFINED
                         token_score = -10000.0
 
                         # Map unk_token to UNKNOWN, other special tokens to CONTROL
                         # Set the score to 0.0 as in the original tokenizer.model
                         if ("special" in token_json) and token_json["special"]:
                             if token_content == tokenizer_config_json["unk_token"]:
-                                token_type = SentencePieceTokenTypes.UNKNOWN
+                                token_type = gguf.TokenType.UNKNOWN
                             else:
-                                token_type = SentencePieceTokenTypes.CONTROL
+                                token_type = gguf.TokenType.CONTROL
                             token_score = 0.0
 
                         logger.info(f"Setting added token {token_id} to '{token_content}' (type: {token_type}, score: {token_score:.2f})")