cleanup
This commit is contained in:
parent
34300a03bc
commit
a067ed8cdd
4 changed files with 76 additions and 86 deletions
|
@ -2811,80 +2811,6 @@ class DeepseekV2Model(Model):
|
||||||
if len(experts) > 0:
|
if len(experts) > 0:
|
||||||
raise ValueError(f"Unprocessed experts: {experts}")
|
raise ValueError(f"Unprocessed experts: {experts}")
|
||||||
|
|
||||||
@Model.register("JAISLMHeadModel")
|
|
||||||
class JaisModel(Model):
|
|
||||||
model_arch = gguf.MODEL_ARCH.JAIS
|
|
||||||
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
# SwigLU activation
|
|
||||||
assert self.hparams["activation_function"] == "swiglu"
|
|
||||||
# ALiBi position embedding
|
|
||||||
assert self.hparams["position_embedding_type"] == "alibi"
|
|
||||||
|
|
||||||
# Embeddings scale
|
|
||||||
self.embeddings_scale = 1.0
|
|
||||||
# note: For some JAIS flavors, output is tied to (same as) wte in original model
|
|
||||||
self.output_is_wte = False
|
|
||||||
if 'mup_embeddings_scale' in self.hparams:
|
|
||||||
self.output_is_wte = True # Hack (?)
|
|
||||||
self.embeddings_scale = self.hparams['mup_embeddings_scale']
|
|
||||||
elif 'embeddings_scale' in self.hparams:
|
|
||||||
self.embeddings_scale = self.hparams['embeddings_scale']
|
|
||||||
else:
|
|
||||||
assert False
|
|
||||||
|
|
||||||
self.width_scale = 1.0
|
|
||||||
if 'mup_output_alpha' in self.hparams:
|
|
||||||
assert 'mup_width_scale' in self.hparams
|
|
||||||
self.width_scale = self.hparams['mup_output_alpha'] * self.hparams['mup_width_scale']
|
|
||||||
elif 'width_scale' in self.hparams:
|
|
||||||
self.width_scale = self.hparams['width_scale']
|
|
||||||
else:
|
|
||||||
assert False
|
|
||||||
|
|
||||||
def set_vocab(self):
|
|
||||||
self._set_vocab_gpt2()
|
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
|
||||||
self.gguf_writer.add_name(self.dir_model.name)
|
|
||||||
self.gguf_writer.add_block_count(self.hparams["n_layer"])
|
|
||||||
self.gguf_writer.add_context_length(self.hparams["n_positions"])
|
|
||||||
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
|
|
||||||
self.gguf_writer.add_feed_forward_length(self.hparams["n_inner"])
|
|
||||||
self.gguf_writer.add_head_count(self.hparams["n_head"])
|
|
||||||
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
|
|
||||||
self.gguf_writer.add_file_type(self.ftype)
|
|
||||||
|
|
||||||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
|
||||||
del bid # unused
|
|
||||||
|
|
||||||
tensors: list[tuple[str, Tensor]] = []
|
|
||||||
|
|
||||||
# we don't need these
|
|
||||||
if name.endswith((".attn.bias", "relative_pe.slopes")):
|
|
||||||
return tensors
|
|
||||||
|
|
||||||
if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_fc2.weight")):
|
|
||||||
data_torch = data_torch.transpose(1, 0)
|
|
||||||
|
|
||||||
new_name = self.map_tensor_name(name)
|
|
||||||
|
|
||||||
if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
|
|
||||||
tensors.append((new_name, data_torch * self.embeddings_scale))
|
|
||||||
if self.output_is_wte:
|
|
||||||
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch * self.width_scale))
|
|
||||||
elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
|
|
||||||
assert not self.output_is_wte
|
|
||||||
tensors.append((new_name, data_torch * self.width_scale))
|
|
||||||
else:
|
|
||||||
tensors.append((new_name, data_torch))
|
|
||||||
|
|
||||||
return tensors
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Model.register("T5ForConditionalGeneration")
|
@Model.register("T5ForConditionalGeneration")
|
||||||
@Model.register("T5WithLMHeadModel")
|
@Model.register("T5WithLMHeadModel")
|
||||||
class T5Model(Model):
|
class T5Model(Model):
|
||||||
|
@ -3002,6 +2928,78 @@ class T5Model(Model):
|
||||||
|
|
||||||
return [(self.map_tensor_name(name), data_torch)]
|
return [(self.map_tensor_name(name), data_torch)]
|
||||||
|
|
||||||
|
@Model.register("JAISLMHeadModel")
|
||||||
|
class JaisModel(Model):
|
||||||
|
model_arch = gguf.MODEL_ARCH.JAIS
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
# SwigLU activation
|
||||||
|
assert self.hparams["activation_function"] == "swiglu"
|
||||||
|
# ALiBi position embedding
|
||||||
|
assert self.hparams["position_embedding_type"] == "alibi"
|
||||||
|
|
||||||
|
# Embeddings scale
|
||||||
|
self.embeddings_scale = 1.0
|
||||||
|
# note: For some JAIS flavors, output is tied to (same as) wte in original model
|
||||||
|
self.output_is_wte = False
|
||||||
|
if 'mup_embeddings_scale' in self.hparams:
|
||||||
|
self.output_is_wte = True # Hack (?)
|
||||||
|
self.embeddings_scale = self.hparams['mup_embeddings_scale']
|
||||||
|
elif 'embeddings_scale' in self.hparams:
|
||||||
|
self.embeddings_scale = self.hparams['embeddings_scale']
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
|
|
||||||
|
self.width_scale = 1.0
|
||||||
|
if 'mup_output_alpha' in self.hparams:
|
||||||
|
assert 'mup_width_scale' in self.hparams
|
||||||
|
self.width_scale = self.hparams['mup_output_alpha'] * self.hparams['mup_width_scale']
|
||||||
|
elif 'width_scale' in self.hparams:
|
||||||
|
self.width_scale = self.hparams['width_scale']
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
|
|
||||||
|
def set_vocab(self):
|
||||||
|
self._set_vocab_gpt2()
|
||||||
|
|
||||||
|
def set_gguf_parameters(self):
|
||||||
|
self.gguf_writer.add_name(self.dir_model.name)
|
||||||
|
self.gguf_writer.add_block_count(self.hparams["n_layer"])
|
||||||
|
self.gguf_writer.add_context_length(self.hparams["n_positions"])
|
||||||
|
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
|
||||||
|
self.gguf_writer.add_feed_forward_length(self.hparams["n_inner"])
|
||||||
|
self.gguf_writer.add_head_count(self.hparams["n_head"])
|
||||||
|
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
|
||||||
|
self.gguf_writer.add_file_type(self.ftype)
|
||||||
|
|
||||||
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||||
|
del bid # unused
|
||||||
|
|
||||||
|
tensors: list[tuple[str, Tensor]] = []
|
||||||
|
|
||||||
|
# we don't need these
|
||||||
|
if name.endswith((".attn.bias", "relative_pe.slopes")):
|
||||||
|
return tensors
|
||||||
|
|
||||||
|
if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_fc2.weight")):
|
||||||
|
data_torch = data_torch.transpose(1, 0)
|
||||||
|
|
||||||
|
new_name = self.map_tensor_name(name)
|
||||||
|
|
||||||
|
if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
|
||||||
|
tensors.append((new_name, data_torch * self.embeddings_scale))
|
||||||
|
if self.output_is_wte:
|
||||||
|
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch * self.width_scale))
|
||||||
|
elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
|
||||||
|
assert not self.output_is_wte
|
||||||
|
tensors.append((new_name, data_torch * self.width_scale))
|
||||||
|
else:
|
||||||
|
tensors.append((new_name, data_torch))
|
||||||
|
|
||||||
|
return tensors
|
||||||
|
|
||||||
|
|
||||||
###### CONVERSION LOGIC ######
|
###### CONVERSION LOGIC ######
|
||||||
|
|
||||||
|
|
|
@ -733,6 +733,7 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
// Console/Stream Output
|
// Console/Stream Output
|
||||||
fprintf(stdout, "%s", token_str.c_str());
|
fprintf(stdout, "%s", token_str.c_str());
|
||||||
|
|
||||||
// Record Displayed Tokens To Log
|
// Record Displayed Tokens To Log
|
||||||
// Note: Generated tokens are created one by one hence this check
|
// Note: Generated tokens are created one by one hence this check
|
||||||
if (embd.size() > 1) {
|
if (embd.size() > 1) {
|
||||||
|
|
|
@ -13516,13 +13516,13 @@ static void ggml_compute_forward_soft_max_f32(
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < nc; ++i) {
|
for (int i = 0; i < nc; ++i) {
|
||||||
wp[i] += slope*mp_f32[i];
|
wp[i] += slope*mp_f32[i];
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
for (int i = 0; i < nc; ++i) {
|
for (int i = 0; i < nc; ++i) {
|
||||||
|
//printf("p[%d] = %f\n", i, p[i]);
|
||||||
assert(!isnan(wp[i]));
|
assert(!isnan(wp[i]));
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -6977,6 +6977,8 @@ static bool llm_load_tensors(
|
||||||
} break;
|
} break;
|
||||||
case LLM_ARCH_JAIS:
|
case LLM_ARCH_JAIS:
|
||||||
{
|
{
|
||||||
|
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
|
||||||
|
|
||||||
// Output
|
// Output
|
||||||
{
|
{
|
||||||
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
|
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
|
||||||
|
@ -7009,7 +7011,6 @@ static bool llm_load_tensors(
|
||||||
|
|
||||||
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
|
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
|
||||||
layer.ffn_up_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff});
|
layer.ffn_up_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff});
|
||||||
|
|
||||||
}
|
}
|
||||||
} break;
|
} break;
|
||||||
default:
|
default:
|
||||||
|
@ -12384,23 +12385,13 @@ struct llm_build_context {
|
||||||
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
|
||||||
|
|
||||||
struct ggml_tensor * cur;
|
struct ggml_tensor * cur;
|
||||||
//struct ggml_tensor * pos;
|
|
||||||
struct ggml_tensor * inpL;
|
struct ggml_tensor * inpL;
|
||||||
|
|
||||||
inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb);
|
inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb);
|
||||||
|
|
||||||
// // inp_pos - contains the positions
|
|
||||||
// struct ggml_tensor * inp_pos = build_inp_pos();
|
|
||||||
|
|
||||||
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
|
// KQ_mask (mask for 1 head, it will be broadcasted to all heads)
|
||||||
struct ggml_tensor * KQ_mask = build_inp_KQ_mask();
|
struct ggml_tensor * KQ_mask = build_inp_KQ_mask();
|
||||||
|
|
||||||
// pos = ggml_get_rows(ctx0, model.pos_embd, inp_pos);
|
|
||||||
// cb(pos, "pos_embd", -1);
|
|
||||||
|
|
||||||
// inpL = ggml_add(ctx0, inpL, pos);
|
|
||||||
// cb(inpL, "inpL", -1);
|
|
||||||
|
|
||||||
for (int il = 0; il < n_layer; ++il) {
|
for (int il = 0; il < n_layer; ++il) {
|
||||||
cur = llm_build_norm(ctx0, inpL, hparams,
|
cur = llm_build_norm(ctx0, inpL, hparams,
|
||||||
model.layers[il].attn_norm,
|
model.layers[il].attn_norm,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue