This commit is contained in:
fmz 2024-06-25 12:04:08 -07:00
parent 34300a03bc
commit a067ed8cdd
4 changed files with 76 additions and 86 deletions

View file

@ -2811,80 +2811,6 @@ class DeepseekV2Model(Model):
if len(experts) > 0: if len(experts) > 0:
raise ValueError(f"Unprocessed experts: {experts}") raise ValueError(f"Unprocessed experts: {experts}")
@Model.register("JAISLMHeadModel")
class JaisModel(Model):
model_arch = gguf.MODEL_ARCH.JAIS
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# SwigLU activation
assert self.hparams["activation_function"] == "swiglu"
# ALiBi position embedding
assert self.hparams["position_embedding_type"] == "alibi"
# Embeddings scale
self.embeddings_scale = 1.0
# note: For some JAIS flavors, output is tied to (same as) wte in original model
self.output_is_wte = False
if 'mup_embeddings_scale' in self.hparams:
self.output_is_wte = True # Hack (?)
self.embeddings_scale = self.hparams['mup_embeddings_scale']
elif 'embeddings_scale' in self.hparams:
self.embeddings_scale = self.hparams['embeddings_scale']
else:
assert False
self.width_scale = 1.0
if 'mup_output_alpha' in self.hparams:
assert 'mup_width_scale' in self.hparams
self.width_scale = self.hparams['mup_output_alpha'] * self.hparams['mup_width_scale']
elif 'width_scale' in self.hparams:
self.width_scale = self.hparams['width_scale']
else:
assert False
def set_vocab(self):
self._set_vocab_gpt2()
def set_gguf_parameters(self):
self.gguf_writer.add_name(self.dir_model.name)
self.gguf_writer.add_block_count(self.hparams["n_layer"])
self.gguf_writer.add_context_length(self.hparams["n_positions"])
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
self.gguf_writer.add_feed_forward_length(self.hparams["n_inner"])
self.gguf_writer.add_head_count(self.hparams["n_head"])
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
self.gguf_writer.add_file_type(self.ftype)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
del bid # unused
tensors: list[tuple[str, Tensor]] = []
# we don't need these
if name.endswith((".attn.bias", "relative_pe.slopes")):
return tensors
if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_fc2.weight")):
data_torch = data_torch.transpose(1, 0)
new_name = self.map_tensor_name(name)
if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
tensors.append((new_name, data_torch * self.embeddings_scale))
if self.output_is_wte:
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch * self.width_scale))
elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
assert not self.output_is_wte
tensors.append((new_name, data_torch * self.width_scale))
else:
tensors.append((new_name, data_torch))
return tensors
@Model.register("T5ForConditionalGeneration") @Model.register("T5ForConditionalGeneration")
@Model.register("T5WithLMHeadModel") @Model.register("T5WithLMHeadModel")
class T5Model(Model): class T5Model(Model):
@ -3002,6 +2928,78 @@ class T5Model(Model):
return [(self.map_tensor_name(name), data_torch)] return [(self.map_tensor_name(name), data_torch)]
@Model.register("JAISLMHeadModel")
class JaisModel(Model):
model_arch = gguf.MODEL_ARCH.JAIS
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# SwigLU activation
assert self.hparams["activation_function"] == "swiglu"
# ALiBi position embedding
assert self.hparams["position_embedding_type"] == "alibi"
# Embeddings scale
self.embeddings_scale = 1.0
# note: For some JAIS flavors, output is tied to (same as) wte in original model
self.output_is_wte = False
if 'mup_embeddings_scale' in self.hparams:
self.output_is_wte = True # Hack (?)
self.embeddings_scale = self.hparams['mup_embeddings_scale']
elif 'embeddings_scale' in self.hparams:
self.embeddings_scale = self.hparams['embeddings_scale']
else:
assert False
self.width_scale = 1.0
if 'mup_output_alpha' in self.hparams:
assert 'mup_width_scale' in self.hparams
self.width_scale = self.hparams['mup_output_alpha'] * self.hparams['mup_width_scale']
elif 'width_scale' in self.hparams:
self.width_scale = self.hparams['width_scale']
else:
assert False
def set_vocab(self):
self._set_vocab_gpt2()
def set_gguf_parameters(self):
self.gguf_writer.add_name(self.dir_model.name)
self.gguf_writer.add_block_count(self.hparams["n_layer"])
self.gguf_writer.add_context_length(self.hparams["n_positions"])
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
self.gguf_writer.add_feed_forward_length(self.hparams["n_inner"])
self.gguf_writer.add_head_count(self.hparams["n_head"])
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
self.gguf_writer.add_file_type(self.ftype)
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
del bid # unused
tensors: list[tuple[str, Tensor]] = []
# we don't need these
if name.endswith((".attn.bias", "relative_pe.slopes")):
return tensors
if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_fc2.weight")):
data_torch = data_torch.transpose(1, 0)
new_name = self.map_tensor_name(name)
if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
tensors.append((new_name, data_torch * self.embeddings_scale))
if self.output_is_wte:
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch * self.width_scale))
elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
assert not self.output_is_wte
tensors.append((new_name, data_torch * self.width_scale))
else:
tensors.append((new_name, data_torch))
return tensors
###### CONVERSION LOGIC ###### ###### CONVERSION LOGIC ######

View file

@ -733,6 +733,7 @@ int main(int argc, char ** argv) {
// Console/Stream Output // Console/Stream Output
fprintf(stdout, "%s", token_str.c_str()); fprintf(stdout, "%s", token_str.c_str());
// Record Displayed Tokens To Log // Record Displayed Tokens To Log
// Note: Generated tokens are created one by one hence this check // Note: Generated tokens are created one by one hence this check
if (embd.size() > 1) { if (embd.size() > 1) {

View file

@ -13516,13 +13516,13 @@ static void ggml_compute_forward_soft_max_f32(
} else { } else {
for (int i = 0; i < nc; ++i) { for (int i = 0; i < nc; ++i) {
wp[i] += slope*mp_f32[i]; wp[i] += slope*mp_f32[i];
} }
} }
} }
#ifndef NDEBUG #ifndef NDEBUG
for (int i = 0; i < nc; ++i) { for (int i = 0; i < nc; ++i) {
//printf("p[%d] = %f\n", i, p[i]);
assert(!isnan(wp[i])); assert(!isnan(wp[i]));
} }
#endif #endif

View file

@ -6977,6 +6977,8 @@ static bool llm_load_tensors(
} break; } break;
case LLM_ARCH_JAIS: case LLM_ARCH_JAIS:
{ {
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
// Output // Output
{ {
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}); model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
@ -7009,7 +7011,6 @@ static bool llm_load_tensors(
layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}); layer.ffn_up = ml.create_tensor(ctx_split, tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff});
layer.ffn_up_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff}); layer.ffn_up_b = ml.create_tensor(ctx_layer, tn(LLM_TENSOR_FFN_UP, "bias", i), {n_ff});
} }
} break; } break;
default: default:
@ -12384,23 +12385,13 @@ struct llm_build_context {
GGML_ASSERT(n_embd_head == hparams.n_embd_head_k); GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
struct ggml_tensor * cur; struct ggml_tensor * cur;
//struct ggml_tensor * pos;
struct ggml_tensor * inpL; struct ggml_tensor * inpL;
inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb); inpL = llm_build_inp_embd(ctx0, lctx, hparams, batch, model.tok_embd, cb);
// // inp_pos - contains the positions
// struct ggml_tensor * inp_pos = build_inp_pos();
// KQ_mask (mask for 1 head, it will be broadcasted to all heads) // KQ_mask (mask for 1 head, it will be broadcasted to all heads)
struct ggml_tensor * KQ_mask = build_inp_KQ_mask(); struct ggml_tensor * KQ_mask = build_inp_KQ_mask();
// pos = ggml_get_rows(ctx0, model.pos_embd, inp_pos);
// cb(pos, "pos_embd", -1);
// inpL = ggml_add(ctx0, inpL, pos);
// cb(inpL, "inpL", -1);
for (int il = 0; il < n_layer; ++il) { for (int il = 0; il < n_layer; ++il) {
cur = llm_build_norm(ctx0, inpL, hparams, cur = llm_build_norm(ctx0, inpL, hparams,
model.layers[il].attn_norm, model.layers[il].attn_norm,