gguf : make token scores and types optional (#3347)

2023-09-28 14:30:15 -04:00 · 2023-09-28 14:30:15 -04:00 · ecf90b1a51
commit ecf90b1a51
parent 2619109ad5
3 changed files with 8 additions and 22 deletions
--- a/convert-falcon-hf-to-gguf.py
+++ b/convert-falcon-hf-to-gguf.py
@ -133,8 +133,6 @@ gguf_writer.add_file_type(ftype)
 print("gguf: get tokenizer metadata")

 tokens: list[bytearray] = []
-scores: list[float] = []
-toktypes: list[int] = []

 tokenizer_json_file = dir_model / 'tokenizer.json'
 if not tokenizer_json_file.is_file():
@ -177,12 +175,8 @@ for i in range(vocab_size):
        text = bytearray(pad_token)

    tokens.append(text)
-    scores.append(0.0)                      # dymmy
-    toktypes.append(gguf.TokenType.NORMAL)  # dummy

 gguf_writer.add_token_list(tokens)
-gguf_writer.add_token_scores(scores)
-gguf_writer.add_token_types(toktypes)

 special_vocab = gguf.SpecialVocab(dir_model, load_merges = True)
 special_vocab.add_to_gguf(gguf_writer)