update llama_hparams
This commit is contained in:
parent
241bb45714
commit
07553cfb0f
5 changed files with 31 additions and 10 deletions
|
@ -1980,6 +1980,12 @@ class Qwen2Model(Model):
|
||||||
class Qwen2VLModel(Model):
|
class Qwen2VLModel(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.QWEN2VL
|
model_arch = gguf.MODEL_ARCH.QWEN2VL
|
||||||
|
|
||||||
|
def set_gguf_parameters(self):
|
||||||
|
super().set_gguf_parameters()
|
||||||
|
mrope_section = self.hparams["rope_scaling"]["mrope_section"]
|
||||||
|
mrope_section += [0] * max(0, 4 - len(mrope_section))
|
||||||
|
self.gguf_writer.add_rope_dimension_sections(mrope_section)
|
||||||
|
|
||||||
def set_vocab(self):
|
def set_vocab(self):
|
||||||
try:
|
try:
|
||||||
self._set_vocab_sentencepiece()
|
self._set_vocab_sentencepiece()
|
||||||
|
|
|
@ -133,7 +133,10 @@ def main(args):
|
||||||
fout.add_uint32(k(KEY_BLOCK_COUNT, VISION), vcfg.depth)
|
fout.add_uint32(k(KEY_BLOCK_COUNT, VISION), vcfg.depth)
|
||||||
fout.add_uint32(k(KEY_FEED_FORWARD_LENGTH, VISION), 0) # BUG: not sure what this does
|
fout.add_uint32(k(KEY_FEED_FORWARD_LENGTH, VISION), 0) # BUG: not sure what this does
|
||||||
fout.add_name(model_name)
|
fout.add_name(model_name)
|
||||||
# fout.add_string("clip.vision.mm_patch_merge_type", v_hparams["mm_patch_merge_type"])
|
"""
|
||||||
|
HACK: Since vision rope related parameter aren't stored in the `Qwen2VLConfig,
|
||||||
|
it will be hardcoded in the `clip_image_build_graph` from `clip.cpp`.
|
||||||
|
"""
|
||||||
|
|
||||||
processor: Qwen2VLProcessor = AutoProcessor.from_pretrained(model_name)
|
processor: Qwen2VLProcessor = AutoProcessor.from_pretrained(model_name)
|
||||||
# breakpoint()
|
# breakpoint()
|
||||||
|
|
|
@ -131,6 +131,7 @@ class Keys:
|
||||||
|
|
||||||
class Rope:
|
class Rope:
|
||||||
DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
||||||
|
DIMENSION_SECTIONS = "{arch}.rope.dimension_sections"
|
||||||
FREQ_BASE = "{arch}.rope.freq_base"
|
FREQ_BASE = "{arch}.rope.freq_base"
|
||||||
SCALING_TYPE = "{arch}.rope.scaling.type"
|
SCALING_TYPE = "{arch}.rope.scaling.type"
|
||||||
SCALING_FACTOR = "{arch}.rope.scaling.factor"
|
SCALING_FACTOR = "{arch}.rope.scaling.factor"
|
||||||
|
|
|
@ -750,6 +750,9 @@ class GGUFWriter:
|
||||||
|
|
||||||
def add_rope_dimension_count(self, count: int) -> None:
|
def add_rope_dimension_count(self, count: int) -> None:
|
||||||
self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
|
self.add_uint32(Keys.Rope.DIMENSION_COUNT.format(arch=self.arch), count)
|
||||||
|
|
||||||
|
def add_rope_dimension_sections(self, dims: Sequence[int]) -> None:
|
||||||
|
self.add_array(Keys.Rope.DIMENSION_SECTIONS.format(arch=self.arch), dims)
|
||||||
|
|
||||||
def add_rope_freq_base(self, value: float) -> None:
|
def add_rope_freq_base(self, value: float) -> None:
|
||||||
self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)
|
self.add_float32(Keys.Rope.FREQ_BASE.format(arch=self.arch), value)
|
||||||
|
|
|
@ -310,6 +310,7 @@ enum llm_kv {
|
||||||
LLM_KV_ATTENTION_SCALE,
|
LLM_KV_ATTENTION_SCALE,
|
||||||
|
|
||||||
LLM_KV_ROPE_DIMENSION_COUNT,
|
LLM_KV_ROPE_DIMENSION_COUNT,
|
||||||
|
LLM_KV_ROPE_DIMENSION_SECTIONS,
|
||||||
LLM_KV_ROPE_FREQ_BASE,
|
LLM_KV_ROPE_FREQ_BASE,
|
||||||
LLM_KV_ROPE_SCALE_LINEAR,
|
LLM_KV_ROPE_SCALE_LINEAR,
|
||||||
LLM_KV_ROPE_SCALING_TYPE,
|
LLM_KV_ROPE_SCALING_TYPE,
|
||||||
|
@ -426,6 +427,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
||||||
{ LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
|
{ LLM_KV_ATTENTION_SCALE, "%s.attention.scale" },
|
||||||
|
|
||||||
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
|
{ LLM_KV_ROPE_DIMENSION_COUNT, "%s.rope.dimension_count" },
|
||||||
|
{ LLM_KV_ROPE_DIMENSION_SECTIONS, "%s.rope.dimension_sections" },
|
||||||
{ LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
|
{ LLM_KV_ROPE_FREQ_BASE, "%s.rope.freq_base" },
|
||||||
{ LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
|
{ LLM_KV_ROPE_SCALE_LINEAR, "%s.rope.scale_linear" },
|
||||||
{ LLM_KV_ROPE_SCALING_TYPE, "%s.rope.scaling.type" },
|
{ LLM_KV_ROPE_SCALING_TYPE, "%s.rope.scaling.type" },
|
||||||
|
@ -2429,11 +2431,12 @@ struct llama_hparams {
|
||||||
uint32_t time_decay_extra_dim = 0;
|
uint32_t time_decay_extra_dim = 0;
|
||||||
uint32_t wkv_head_size = 0;
|
uint32_t wkv_head_size = 0;
|
||||||
|
|
||||||
float rope_attn_factor = 1.0f;
|
float rope_attn_factor = 1.0f;
|
||||||
float rope_freq_base_train;
|
float rope_freq_base_train;
|
||||||
float rope_freq_scale_train;
|
float rope_freq_scale_train;
|
||||||
uint32_t n_ctx_orig_yarn;
|
uint32_t n_ctx_orig_yarn;
|
||||||
float rope_yarn_log_mul;
|
float rope_yarn_log_mul;
|
||||||
|
std::array<uint32_t, 4> rope_mrope_sections;
|
||||||
|
|
||||||
// for State Space Models
|
// for State Space Models
|
||||||
uint32_t ssm_d_conv = 0;
|
uint32_t ssm_d_conv = 0;
|
||||||
|
@ -2488,8 +2491,9 @@ struct llama_hparams {
|
||||||
if (this->n_ff_shexp != other.n_ff_shexp) return true;
|
if (this->n_ff_shexp != other.n_ff_shexp) return true;
|
||||||
if (this->n_expert_shared != other.n_expert_shared) return true;
|
if (this->n_expert_shared != other.n_expert_shared) return true;
|
||||||
|
|
||||||
if (this->rope_finetuned != other.rope_finetuned) return true;
|
if (this->rope_finetuned != other.rope_finetuned) return true;
|
||||||
if (this->n_ctx_orig_yarn != other.n_ctx_orig_yarn) return true;
|
if (this->n_ctx_orig_yarn != other.n_ctx_orig_yarn) return true;
|
||||||
|
if (this->rope_mrope_sections != other.rope_mrope_sections) return true;
|
||||||
|
|
||||||
if (this->ssm_d_conv != other.ssm_d_conv) return true;
|
if (this->ssm_d_conv != other.ssm_d_conv) return true;
|
||||||
if (this->ssm_d_inner != other.ssm_d_inner) return true;
|
if (this->ssm_d_inner != other.ssm_d_inner) return true;
|
||||||
|
@ -5710,8 +5714,12 @@ static void llm_load_hparams(
|
||||||
default: model.type = e_model::MODEL_UNKNOWN;
|
default: model.type = e_model::MODEL_UNKNOWN;
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_QWEN2:
|
|
||||||
case LLM_ARCH_QWEN2VL:
|
case LLM_ARCH_QWEN2VL:
|
||||||
|
{
|
||||||
|
std::fill(hparams.rope_mrope_sections.begin(), hparams.rope_mrope_sections.end(), 0);
|
||||||
|
ml.get_key_or_arr(LLM_KV_ROPE_DIMENSION_SECTIONS, hparams.rope_mrope_sections, 4, true);
|
||||||
|
}
|
||||||
|
case LLM_ARCH_QWEN2:
|
||||||
{
|
{
|
||||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||||
switch (hparams.n_layer) {
|
switch (hparams.n_layer) {
|
||||||
|
@ -12532,7 +12540,7 @@ struct llm_build_context {
|
||||||
|
|
||||||
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
|
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
|
||||||
struct ggml_tensor * KQ_mask = build_inp_KQ_mask();
|
struct ggml_tensor * KQ_mask = build_inp_KQ_mask();
|
||||||
int sections[4] = {16, 24, 24, 0}; // TODO: move this into gguf model file.
|
int * sections = (int *)hparams.rope_mrope_sections.data();
|
||||||
|
|
||||||
for (int il = 0; il < n_layer; ++il) {
|
for (int il = 0; il < n_layer; ++il) {
|
||||||
struct ggml_tensor * inpSA = inpL;
|
struct ggml_tensor * inpSA = inpL;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue