rem tabs
This commit is contained in:
parent
758612a984
commit
90766e15e2
1 changed files with 20 additions and 18 deletions
|
@ -5244,7 +5244,7 @@ static void llm_load_hparams(
|
||||||
case LLM_ARCH_CHAMELEON:
|
case LLM_ARCH_CHAMELEON:
|
||||||
{
|
{
|
||||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
|
||||||
hparams.f_norm_eps = 1e-5; // eps for qk-norm, torch default
|
hparams.f_norm_eps = 1e-5; // eps for qk-norm, torch default
|
||||||
ml.get_key(LLM_KV_SWIN_NORM, hparams.swin_norm);
|
ml.get_key(LLM_KV_SWIN_NORM, hparams.swin_norm);
|
||||||
|
|
||||||
switch (hparams.n_layer) {
|
switch (hparams.n_layer) {
|
||||||
|
@ -13718,11 +13718,11 @@ struct llm_build_context {
|
||||||
struct ggml_tensor * inpSA = inpL;
|
struct ggml_tensor * inpSA = inpL;
|
||||||
|
|
||||||
// norm
|
// norm
|
||||||
if (!hparams.swin_norm) {
|
if (!hparams.swin_norm) {
|
||||||
cur = llm_build_norm(ctx0, inpL, hparams,
|
cur = llm_build_norm(ctx0, inpL, hparams,
|
||||||
model.layers[il].attn_norm, NULL,
|
model.layers[il].attn_norm, NULL,
|
||||||
LLM_NORM_RMS, cb, il);
|
LLM_NORM_RMS, cb, il);
|
||||||
}
|
}
|
||||||
cb(cur, "attn_norm", il);
|
cb(cur, "attn_norm", il);
|
||||||
|
|
||||||
// self-attention
|
// self-attention
|
||||||
|
@ -13780,11 +13780,11 @@ struct llm_build_context {
|
||||||
model.layers[il].wo, nullptr,
|
model.layers[il].wo, nullptr,
|
||||||
Kcur, Vcur, Qcur, KQ_mask, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il);
|
Kcur, Vcur, Qcur, KQ_mask, n_tokens, kv_head, n_kv, 1.0f/sqrtf(float(n_embd_head)), cb, il);
|
||||||
|
|
||||||
if (hparams.swin_norm) {
|
if (hparams.swin_norm) {
|
||||||
cur = llm_build_norm(ctx0, cur, hparams,
|
cur = llm_build_norm(ctx0, cur, hparams,
|
||||||
model.layers[il].attn_norm, NULL,
|
model.layers[il].attn_norm, NULL,
|
||||||
LLM_NORM_RMS, cb, il);
|
LLM_NORM_RMS, cb, il);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (il == n_layer - 1) {
|
if (il == n_layer - 1) {
|
||||||
|
@ -13799,12 +13799,12 @@ struct llm_build_context {
|
||||||
cb(ffn_inp, "ffn_inp", il);
|
cb(ffn_inp, "ffn_inp", il);
|
||||||
|
|
||||||
// feed-forward network
|
// feed-forward network
|
||||||
if (!hparams.swin_norm) {
|
if (!hparams.swin_norm) {
|
||||||
cur = llm_build_norm(ctx0, ffn_inp, hparams,
|
cur = llm_build_norm(ctx0, ffn_inp, hparams,
|
||||||
model.layers[il].ffn_norm, NULL,
|
model.layers[il].ffn_norm, NULL,
|
||||||
LLM_NORM_RMS, cb, il);
|
LLM_NORM_RMS, cb, il);
|
||||||
cb(cur, "ffn_norm", il);
|
cb(cur, "ffn_norm", il);
|
||||||
}
|
}
|
||||||
|
|
||||||
cur = llm_build_ffn(ctx0, cur,
|
cur = llm_build_ffn(ctx0, cur,
|
||||||
model.layers[il].ffn_up, NULL, NULL,
|
model.layers[il].ffn_up, NULL, NULL,
|
||||||
|
@ -13814,12 +13814,12 @@ struct llm_build_context {
|
||||||
LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
|
LLM_FFN_SILU, LLM_FFN_PAR, cb, il);
|
||||||
cb(cur, "ffn_out", il);
|
cb(cur, "ffn_out", il);
|
||||||
|
|
||||||
if (hparams.swin_norm) {
|
if (hparams.swin_norm) {
|
||||||
cur = llm_build_norm(ctx0, cur, hparams,
|
cur = llm_build_norm(ctx0, cur, hparams,
|
||||||
model.layers[il].ffn_norm, NULL,
|
model.layers[il].ffn_norm, NULL,
|
||||||
LLM_NORM_RMS, cb, il);
|
LLM_NORM_RMS, cb, il);
|
||||||
cb(cur, "ffn_norm", il);
|
cb(cur, "ffn_norm", il);
|
||||||
}
|
}
|
||||||
|
|
||||||
cur = ggml_add(ctx0, cur, ffn_inp);
|
cur = ggml_add(ctx0, cur, ffn_inp);
|
||||||
cb(cur, "ffn_out", il);
|
cb(cur, "ffn_out", il);
|
||||||
|
@ -13842,13 +13842,15 @@ struct llm_build_context {
|
||||||
cur = ggml_mul_mat(ctx0, model.output, cur);
|
cur = ggml_mul_mat(ctx0, model.output, cur);
|
||||||
cb(cur, "result_output_with_img_logits", -1);
|
cb(cur, "result_output_with_img_logits", -1);
|
||||||
|
|
||||||
int img_token_end_idx = 8196;
|
// TODO: this suppresses the output of image tokens, which is required to enable text-only outputs.
|
||||||
int img_token_start_idx = 4;
|
// Needs to be removed once image outputs are supported.
|
||||||
int num_img_tokens = img_token_end_idx - img_token_start_idx;
|
int img_token_end_idx = 8196;
|
||||||
struct ggml_tensor * img_logits = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, num_img_tokens);
|
int img_token_start_idx = 4;
|
||||||
img_logits = ggml_add1(ctx0, img_logits, ggml_new_f32(ctx0, -FLT_MAX));
|
int num_img_tokens = img_token_end_idx - img_token_start_idx;
|
||||||
cb(img_logits, "img_logits", -1);
|
struct ggml_tensor * img_logits = ggml_new_tensor_1d(ctx0, GGML_TYPE_F32, num_img_tokens);
|
||||||
cur = ggml_set_1d(ctx0, cur, img_logits, ggml_element_size(cur) * img_token_start_idx);
|
img_logits = ggml_add1(ctx0, img_logits, ggml_new_f32(ctx0, -FLT_MAX));
|
||||||
|
cb(img_logits, "img_logits", -1);
|
||||||
|
cur = ggml_set_1d(ctx0, cur, img_logits, ggml_element_size(cur) * img_token_start_idx);
|
||||||
cb(cur, "result_output", -1);
|
cb(cur, "result_output", -1);
|
||||||
|
|
||||||
ggml_build_forward_expand(gf, cur);
|
ggml_build_forward_expand(gf, cur);
|
||||||
|
@ -15713,8 +15715,8 @@ struct llm_tokenizer_bpe {
|
||||||
break;
|
break;
|
||||||
case LLAMA_VOCAB_PRE_TYPE_CHAMELEON:
|
case LLAMA_VOCAB_PRE_TYPE_CHAMELEON:
|
||||||
regex_exprs = {
|
regex_exprs = {
|
||||||
"<sentinel:[0-9]+>", // Sentinel tokens
|
"<sentinel:[0-9]+>", // Sentinel tokens
|
||||||
"(IMGIMG)((A|B|C|D|E|F|G|H|I){1,4})Z", // Image tokens
|
"(IMGIMG)((A|B|C|D|E|F|G|H|I){1,4})Z", // Image tokens
|
||||||
"([\t\n]| | )", // directly from tokenizer.json
|
"([\t\n]| | )", // directly from tokenizer.json
|
||||||
"\\p{N}", // Individual digits
|
"\\p{N}", // Individual digits
|
||||||
"[\\p{P}\\$\\+<=>\\^~\\|`]+", // Punctuation
|
"[\\p{P}\\$\\+<=>\\^~\\|`]+", // Punctuation
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue