Update convert-llama-h5-to-gguf.py
This commit is contained in:
parent
9577821487
commit
06c3e4a1a7
1 changed files with 3 additions and 4 deletions
|
@ -91,11 +91,13 @@ gguf_writer.write_float32(llm_arch + ".attention.layer_norm_rms_epsilon", hparam
|
||||||
|
|
||||||
# TOKENIZATION
|
# TOKENIZATION
|
||||||
|
|
||||||
|
print("write gguf tokenizer")
|
||||||
|
|
||||||
tokens: List[str] = []
|
tokens: List[str] = []
|
||||||
scores: List[float] = []
|
scores: List[float] = []
|
||||||
|
|
||||||
if Path(dir_model + "/tokenizer.model").is_file():
|
if Path(dir_model + "/tokenizer.model").is_file():
|
||||||
# vocab type SPIECE
|
# vocab type sentencepiece
|
||||||
print("Adding sentencepiece tokenizer vocab.")
|
print("Adding sentencepiece tokenizer vocab.")
|
||||||
tokenizer = SentencePieceProcessor(dir_model + "/tokenizer.model")
|
tokenizer = SentencePieceProcessor(dir_model + "/tokenizer.model")
|
||||||
|
|
||||||
|
@ -123,15 +125,12 @@ if Path(dir_model + "/tokenizer.model").is_file():
|
||||||
tokens.append(text)
|
tokens.append(text)
|
||||||
scores.append(score)
|
scores.append(score)
|
||||||
|
|
||||||
print("write gguf tokens")
|
|
||||||
|
|
||||||
gguf_writer.write_tokenizer_model("llama")
|
gguf_writer.write_tokenizer_model("llama")
|
||||||
gguf_writer.write_token_list(tokens)
|
gguf_writer.write_token_list(tokens)
|
||||||
gguf_writer.write_token_scores(scores)
|
gguf_writer.write_token_scores(scores)
|
||||||
|
|
||||||
# TENSORS
|
# TENSORS
|
||||||
|
|
||||||
|
|
||||||
# tensor info
|
# tensor info
|
||||||
print("write gguf tensor info")
|
print("write gguf tensor info")
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue