diff --git a/convert-llama-h5-to-gguf.py b/convert-llama-h5-to-gguf.py index d7706f618..56063f375 100644 --- a/convert-llama-h5-to-gguf.py +++ b/convert-llama-h5-to-gguf.py @@ -95,12 +95,21 @@ if "_name_or_path" in hparams: else: hf_repo="" +if "max_sequence_length" in hparams: + ctx_length = hparams["max_sequence_length"] +elif "max_position_embeddings" in hparams: + ctx_length = hparams["max_position_embeddings"] +else: + print("gguf: can not find ctx length parameter.") + sys.exit() + + gguf_writer.add_architecture(llm_arch) gguf_writer.add_name(last_dir) gguf_writer.add_file_type("All tensors F32" if ftype == 0 else "Most tensors F16, some F32") gguf_writer.add_source_hf_repo(hf_repo) gguf_writer.add_tensor_data_layout(llm_arch, "Meta AI original pth") -gguf_writer.add_context_length(llm_arch, hparams["max_position_embeddings"]) +gguf_writer.add_context_length(llm_arch, ctx_length) gguf_writer.add_embedding_length(llm_arch, hparams["hidden_size"]) gguf_writer.add_block_count(llm_arch, block_count) gguf_writer.add_feed_forward_length(llm_arch, hparams["intermediate_size"])