diff --git a/convert-falcon-hf-to-gguf.py b/convert-falcon-hf-to-gguf.py index b3e190a0f..32420796b 100644 --- a/convert-falcon-hf-to-gguf.py +++ b/convert-falcon-hf-to-gguf.py @@ -109,6 +109,8 @@ gguf_writer.add_layer_norm_eps(hparams["layer_norm_epsilon"]) print("gguf: get tokenizer metadata") tokens: List[str] = [] +scores: List[float] = [] +toktypes: List[int] = [] merges: List[str] = [] @@ -152,8 +154,12 @@ if Path(dir_model + "/tokenizer.json").is_file(): text = bytearray(pad_token) tokens.append(text) + scores.append(0.0) # dymmy + toktypes.append(gguf.TokenType.NORMAL) # dummy gguf_writer.add_token_list(tokens) + gguf_writer.add_token_scores(scores) + gguf_writer.add_token_types(toktypes) if "added_tokens" in tokenizer_json and Path(dir_model + "/tokenizer_config.json").is_file(): print("gguf: get special token ids") diff --git a/llama.cpp b/llama.cpp index 5c0bf6190..61e989810 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1968,7 +1968,7 @@ static bool llama_model_load( } break; case LLM_ARCH_FALCON: { - } + } break; default: throw std::runtime_error("unsupported architecture"); };