change kv metadata

This commit is contained in:
ngxson 2024-07-15 15:35:19 +02:00
parent 9175f4b77c
commit 0ba23bad6f
5 changed files with 36 additions and 16 deletions

View file

@ -186,6 +186,7 @@ class Model:
return new_name return new_name
def set_gguf_parameters(self): def set_gguf_parameters(self):
self.gguf_writer.add_type(gguf.GGUFType.MODEL)
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name) self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
self.gguf_writer.add_block_count(self.block_count) self.gguf_writer.add_block_count(self.block_count)

View file

@ -359,17 +359,16 @@ if __name__ == '__main__':
eager=args.no_lazy, eager=args.no_lazy,
model_name=None, model_name=None,
) )
logger.info("Set model parameters")
model_instance.set_gguf_parameters()
with open(lora_config, "r") as f: with open(lora_config, "r") as f:
lparams: dict[str, Any] = json.load(f) lparams: dict[str, Any] = json.load(f)
alpha = lparams["lora_alpha"] alpha = lparams["lora_alpha"]
model_instance.gguf_writer.add_string("training.type", "finetune_lora") model_instance.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[model_instance.model_arch])
model_instance.gguf_writer.add_float32("training.lora.alpha", float(alpha)) model_instance.gguf_writer.add_string(gguf.Keys.General.TYPE, gguf.GGUFType.ADAPTER)
model_instance.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")
model_instance.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, float(alpha))
model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION) model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
logger.info("Exporting model...") logger.info("Exporting model...")
model_instance.write() model_instance.write()

View file

@ -19,6 +19,7 @@ GGML_QUANT_VERSION = 2 # GGML_QNT_VERSION from ggml.h
class Keys: class Keys:
class General: class General:
TYPE = "general.type"
ARCHITECTURE = "general.architecture" ARCHITECTURE = "general.architecture"
QUANTIZATION_VERSION = "general.quantization_version" QUANTIZATION_VERSION = "general.quantization_version"
ALIGNMENT = "general.alignment" ALIGNMENT = "general.alignment"
@ -120,10 +121,17 @@ class Keys:
MIDDLE_ID = "tokenizer.ggml.middle_token_id" MIDDLE_ID = "tokenizer.ggml.middle_token_id"
EOT_ID = "tokenizer.ggml.eot_token_id" EOT_ID = "tokenizer.ggml.eot_token_id"
class Adapter:
TYPE = "adapter.type"
LORA_ALPHA = "adapter.lora.alpha"
# #
# recommended mapping of model tensor names for storage in gguf # recommended mapping of model tensor names for storage in gguf
# #
class GGUFType:
MODEL = "model"
ADAPTER = "adapter"
class MODEL_ARCH(IntEnum): class MODEL_ARCH(IntEnum):
LLAMA = auto() LLAMA = auto()

View file

@ -424,6 +424,9 @@ class GGUFWriter:
fout.close() fout.close()
self.fout = None self.fout = None
def add_type(self, type_name: str) -> None:
self.add_string(Keys.General.TYPE, type_name)
def add_architecture(self) -> None: def add_architecture(self) -> None:
self.add_string(Keys.General.ARCHITECTURE, self.arch) self.add_string(Keys.General.ARCHITECTURE, self.arch)

View file

@ -287,6 +287,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
}; };
enum llm_kv { enum llm_kv {
LLM_KV_GENERAL_TYPE,
LLM_KV_GENERAL_ARCHITECTURE, LLM_KV_GENERAL_ARCHITECTURE,
LLM_KV_GENERAL_QUANTIZATION_VERSION, LLM_KV_GENERAL_QUANTIZATION_VERSION,
LLM_KV_GENERAL_ALIGNMENT, LLM_KV_GENERAL_ALIGNMENT,
@ -378,11 +379,12 @@ enum llm_kv {
LLM_KV_TOKENIZER_MIDDLE_ID, LLM_KV_TOKENIZER_MIDDLE_ID,
LLM_KV_TOKENIZER_EOT_ID, LLM_KV_TOKENIZER_EOT_ID,
LLM_KV_TRAINING_TYPE, LLM_KV_ADAPTER_TYPE,
LLM_KV_TRAINING_LORA_ALPHA, LLM_KV_ADAPTER_LORA_ALPHA,
}; };
static const std::map<llm_kv, const char *> LLM_KV_NAMES = { static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
{ LLM_KV_GENERAL_TYPE, "general.type" },
{ LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" }, { LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" },
{ LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" }, { LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" },
{ LLM_KV_GENERAL_ALIGNMENT, "general.alignment" }, { LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
@ -474,8 +476,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
{ LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" }, { LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
{ LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" }, { LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" },
{ LLM_KV_TRAINING_TYPE, "training.type" }, { LLM_KV_ADAPTER_TYPE, "adapter.type" },
{ LLM_KV_TRAINING_LORA_ALPHA, "training.lora.alpha" }, { LLM_KV_ADAPTER_LORA_ALPHA, "adapter.lora.alpha" },
}; };
struct LLM_KV { struct LLM_KV {
@ -18596,20 +18598,27 @@ static void llama_lora_adapter_init_internal(struct llama_model * model, const c
return id < 0 ? 0.0f : gguf_get_val_f32(ctx_gguf, id); return id < 0 ? 0.0f : gguf_get_val_f32(ctx_gguf, id);
}; };
LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN); LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN);
auto lora_arch_name = get_kv_str(llm_kv(LLM_KV_GENERAL_ARCHITECTURE));
auto lora_arch = llm_arch_from_string(lora_arch_name); auto general_type = get_kv_str(llm_kv(LLM_KV_GENERAL_TYPE));
if (lora_arch != model->arch) { if (general_type != "adapter") {
gguf_free(ctx_gguf);
throw std::runtime_error("expect general.type to be 'adapter', but got: " + general_type);
}
auto general_arch_str = get_kv_str(llm_kv(LLM_KV_GENERAL_ARCHITECTURE));
auto general_arch = llm_arch_from_string(general_arch_str);
if (general_arch != model->arch) {
gguf_free(ctx_gguf); gguf_free(ctx_gguf);
throw std::runtime_error("model arch and LoRA arch mismatch"); throw std::runtime_error("model arch and LoRA arch mismatch");
} }
auto train_type = get_kv_str(llm_kv(LLM_KV_TRAINING_TYPE)); auto adapter_type = get_kv_str(llm_kv(LLM_KV_ADAPTER_TYPE));
if (train_type != "finetune_lora") { if (adapter_type != "lora") {
gguf_free(ctx_gguf); gguf_free(ctx_gguf);
throw std::runtime_error("expect training.type to be finetune_lora, but got: " + train_type); throw std::runtime_error("expect adapter.type to be 'lora', but got: " + adapter_type);
} }
adapter.alpha = get_kv_f32(llm_kv(LLM_KV_TRAINING_LORA_ALPHA)); adapter.alpha = get_kv_f32(llm_kv(LLM_KV_ADAPTER_LORA_ALPHA));
} }
int n_tensors = gguf_get_n_tensors(ctx_gguf); int n_tensors = gguf_get_n_tensors(ctx_gguf);