change kv metadata
This commit is contained in:
parent
9175f4b77c
commit
0ba23bad6f
5 changed files with 36 additions and 16 deletions
|
@ -186,6 +186,7 @@ class Model:
|
||||||
return new_name
|
return new_name
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
|
self.gguf_writer.add_type(gguf.GGUFType.MODEL)
|
||||||
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
|
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
|
||||||
self.gguf_writer.add_block_count(self.block_count)
|
self.gguf_writer.add_block_count(self.block_count)
|
||||||
|
|
||||||
|
|
|
@ -359,17 +359,16 @@ if __name__ == '__main__':
|
||||||
eager=args.no_lazy,
|
eager=args.no_lazy,
|
||||||
model_name=None,
|
model_name=None,
|
||||||
)
|
)
|
||||||
logger.info("Set model parameters")
|
|
||||||
model_instance.set_gguf_parameters()
|
|
||||||
|
|
||||||
with open(lora_config, "r") as f:
|
with open(lora_config, "r") as f:
|
||||||
lparams: dict[str, Any] = json.load(f)
|
lparams: dict[str, Any] = json.load(f)
|
||||||
|
|
||||||
alpha = lparams["lora_alpha"]
|
alpha = lparams["lora_alpha"]
|
||||||
|
|
||||||
model_instance.gguf_writer.add_string("training.type", "finetune_lora")
|
model_instance.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[model_instance.model_arch])
|
||||||
model_instance.gguf_writer.add_float32("training.lora.alpha", float(alpha))
|
model_instance.gguf_writer.add_string(gguf.Keys.General.TYPE, gguf.GGUFType.ADAPTER)
|
||||||
|
model_instance.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")
|
||||||
|
model_instance.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, float(alpha))
|
||||||
model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
|
model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
|
||||||
logger.info("Exporting model...")
|
logger.info("Exporting model...")
|
||||||
model_instance.write()
|
model_instance.write()
|
||||||
|
|
|
@ -19,6 +19,7 @@ GGML_QUANT_VERSION = 2 # GGML_QNT_VERSION from ggml.h
|
||||||
|
|
||||||
class Keys:
|
class Keys:
|
||||||
class General:
|
class General:
|
||||||
|
TYPE = "general.type"
|
||||||
ARCHITECTURE = "general.architecture"
|
ARCHITECTURE = "general.architecture"
|
||||||
QUANTIZATION_VERSION = "general.quantization_version"
|
QUANTIZATION_VERSION = "general.quantization_version"
|
||||||
ALIGNMENT = "general.alignment"
|
ALIGNMENT = "general.alignment"
|
||||||
|
@ -120,10 +121,17 @@ class Keys:
|
||||||
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
|
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
|
||||||
EOT_ID = "tokenizer.ggml.eot_token_id"
|
EOT_ID = "tokenizer.ggml.eot_token_id"
|
||||||
|
|
||||||
|
class Adapter:
|
||||||
|
TYPE = "adapter.type"
|
||||||
|
LORA_ALPHA = "adapter.lora.alpha"
|
||||||
|
|
||||||
#
|
#
|
||||||
# recommended mapping of model tensor names for storage in gguf
|
# recommended mapping of model tensor names for storage in gguf
|
||||||
#
|
#
|
||||||
|
|
||||||
|
class GGUFType:
|
||||||
|
MODEL = "model"
|
||||||
|
ADAPTER = "adapter"
|
||||||
|
|
||||||
class MODEL_ARCH(IntEnum):
|
class MODEL_ARCH(IntEnum):
|
||||||
LLAMA = auto()
|
LLAMA = auto()
|
||||||
|
|
|
@ -424,6 +424,9 @@ class GGUFWriter:
|
||||||
fout.close()
|
fout.close()
|
||||||
self.fout = None
|
self.fout = None
|
||||||
|
|
||||||
|
def add_type(self, type_name: str) -> None:
|
||||||
|
self.add_string(Keys.General.TYPE, type_name)
|
||||||
|
|
||||||
def add_architecture(self) -> None:
|
def add_architecture(self) -> None:
|
||||||
self.add_string(Keys.General.ARCHITECTURE, self.arch)
|
self.add_string(Keys.General.ARCHITECTURE, self.arch)
|
||||||
|
|
||||||
|
|
|
@ -287,6 +287,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
|
||||||
};
|
};
|
||||||
|
|
||||||
enum llm_kv {
|
enum llm_kv {
|
||||||
|
LLM_KV_GENERAL_TYPE,
|
||||||
LLM_KV_GENERAL_ARCHITECTURE,
|
LLM_KV_GENERAL_ARCHITECTURE,
|
||||||
LLM_KV_GENERAL_QUANTIZATION_VERSION,
|
LLM_KV_GENERAL_QUANTIZATION_VERSION,
|
||||||
LLM_KV_GENERAL_ALIGNMENT,
|
LLM_KV_GENERAL_ALIGNMENT,
|
||||||
|
@ -378,11 +379,12 @@ enum llm_kv {
|
||||||
LLM_KV_TOKENIZER_MIDDLE_ID,
|
LLM_KV_TOKENIZER_MIDDLE_ID,
|
||||||
LLM_KV_TOKENIZER_EOT_ID,
|
LLM_KV_TOKENIZER_EOT_ID,
|
||||||
|
|
||||||
LLM_KV_TRAINING_TYPE,
|
LLM_KV_ADAPTER_TYPE,
|
||||||
LLM_KV_TRAINING_LORA_ALPHA,
|
LLM_KV_ADAPTER_LORA_ALPHA,
|
||||||
};
|
};
|
||||||
|
|
||||||
static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
||||||
|
{ LLM_KV_GENERAL_TYPE, "general.type" },
|
||||||
{ LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" },
|
{ LLM_KV_GENERAL_ARCHITECTURE, "general.architecture" },
|
||||||
{ LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" },
|
{ LLM_KV_GENERAL_QUANTIZATION_VERSION, "general.quantization_version" },
|
||||||
{ LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
|
{ LLM_KV_GENERAL_ALIGNMENT, "general.alignment" },
|
||||||
|
@ -474,8 +476,8 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
||||||
{ LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
|
{ LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
|
||||||
{ LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" },
|
{ LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" },
|
||||||
|
|
||||||
{ LLM_KV_TRAINING_TYPE, "training.type" },
|
{ LLM_KV_ADAPTER_TYPE, "adapter.type" },
|
||||||
{ LLM_KV_TRAINING_LORA_ALPHA, "training.lora.alpha" },
|
{ LLM_KV_ADAPTER_LORA_ALPHA, "adapter.lora.alpha" },
|
||||||
};
|
};
|
||||||
|
|
||||||
struct LLM_KV {
|
struct LLM_KV {
|
||||||
|
@ -18596,20 +18598,27 @@ static void llama_lora_adapter_init_internal(struct llama_model * model, const c
|
||||||
return id < 0 ? 0.0f : gguf_get_val_f32(ctx_gguf, id);
|
return id < 0 ? 0.0f : gguf_get_val_f32(ctx_gguf, id);
|
||||||
};
|
};
|
||||||
LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN);
|
LLM_KV llm_kv = LLM_KV(LLM_ARCH_UNKNOWN);
|
||||||
auto lora_arch_name = get_kv_str(llm_kv(LLM_KV_GENERAL_ARCHITECTURE));
|
|
||||||
auto lora_arch = llm_arch_from_string(lora_arch_name);
|
auto general_type = get_kv_str(llm_kv(LLM_KV_GENERAL_TYPE));
|
||||||
if (lora_arch != model->arch) {
|
if (general_type != "adapter") {
|
||||||
|
gguf_free(ctx_gguf);
|
||||||
|
throw std::runtime_error("expect general.type to be 'adapter', but got: " + general_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto general_arch_str = get_kv_str(llm_kv(LLM_KV_GENERAL_ARCHITECTURE));
|
||||||
|
auto general_arch = llm_arch_from_string(general_arch_str);
|
||||||
|
if (general_arch != model->arch) {
|
||||||
gguf_free(ctx_gguf);
|
gguf_free(ctx_gguf);
|
||||||
throw std::runtime_error("model arch and LoRA arch mismatch");
|
throw std::runtime_error("model arch and LoRA arch mismatch");
|
||||||
}
|
}
|
||||||
|
|
||||||
auto train_type = get_kv_str(llm_kv(LLM_KV_TRAINING_TYPE));
|
auto adapter_type = get_kv_str(llm_kv(LLM_KV_ADAPTER_TYPE));
|
||||||
if (train_type != "finetune_lora") {
|
if (adapter_type != "lora") {
|
||||||
gguf_free(ctx_gguf);
|
gguf_free(ctx_gguf);
|
||||||
throw std::runtime_error("expect training.type to be finetune_lora, but got: " + train_type);
|
throw std::runtime_error("expect adapter.type to be 'lora', but got: " + adapter_type);
|
||||||
}
|
}
|
||||||
|
|
||||||
adapter.alpha = get_kv_f32(llm_kv(LLM_KV_TRAINING_LORA_ALPHA));
|
adapter.alpha = get_kv_f32(llm_kv(LLM_KV_ADAPTER_LORA_ALPHA));
|
||||||
}
|
}
|
||||||
|
|
||||||
int n_tensors = gguf_get_n_tensors(ctx_gguf);
|
int n_tensors = gguf_get_n_tensors(ctx_gguf);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue