Update convert-llama-h5-to-gguf.py

2023-07-29 21:38:01 +02:00 · 2023-07-29 21:38:01 +02:00 · 06c3e4a1a7
commit 06c3e4a1a7
parent 9577821487
1 changed files with 3 additions and 4 deletions
--- a/convert-llama-h5-to-gguf.py
+++ b/convert-llama-h5-to-gguf.py
@ -91,11 +91,13 @@ gguf_writer.write_float32(llm_arch + ".attention.layer_norm_rms_epsilon", hparam

 # TOKENIZATION

+print("write gguf tokenizer")
+
 tokens: List[str] = []
 scores: List[float] = []

 if Path(dir_model + "/tokenizer.model").is_file():
-    # vocab type SPIECE
+    # vocab type sentencepiece
    print("Adding sentencepiece tokenizer vocab.")
    tokenizer = SentencePieceProcessor(dir_model + "/tokenizer.model")

@ -123,15 +125,12 @@ if Path(dir_model + "/tokenizer.model").is_file():
        tokens.append(text)
        scores.append(score)

-print("write gguf tokens")
-
 gguf_writer.write_tokenizer_model("llama")
 gguf_writer.write_token_list(tokens)
 gguf_writer.write_token_scores(scores)

 # TENSORS

-
 # tensor info
 print("write gguf tensor info")