minor tweaks

This commit is contained in:
fmz 2024-07-01 15:29:04 -07:00
parent 2d4de517bb
commit 8b64c7ae46
2 changed files with 13 additions and 9 deletions

View file

@ -2972,6 +2972,8 @@ class JaisModel(Model):
else: else:
assert False assert False
self.max_alibi_bias = 8.0
def set_vocab(self): def set_vocab(self):
self._set_vocab_gpt2() self._set_vocab_gpt2()
@ -2985,12 +2987,6 @@ class JaisModel(Model):
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"]) self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
self.gguf_writer.add_file_type(self.ftype) self.gguf_writer.add_file_type(self.ftype)
# Hack to populate self.tensor_names
all(self.get_tensors())
if 'transformer.relative_pe.slopes' not in self.tensor_names:
self.gguf_writer.add_max_alibi_bias(8.0)
# else set later
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]: def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
del bid # unused del bid # unused
@ -3001,11 +2997,14 @@ class JaisModel(Model):
return tensors return tensors
if name.endswith(("relative_pe.slopes")): if name.endswith(("relative_pe.slopes")):
# calculate ALiBi bias # Calculate max ALiBi bias (this is the inverse of the ALiBi calculation)
# Some other models has max_alibi_bias spelled out explicitly in the hyperparams,
# but Jais's PyTorch model simply precalculates the slope values and places them
# in relative_pes.slopes
n_head_closest_log2 = 2 ** math.floor(math.log2(self.hparams["n_head"])) n_head_closest_log2 = 2 ** math.floor(math.log2(self.hparams["n_head"]))
first_val = float(data_torch._data[0]) first_val = float(data_torch._data[0])
alibi_bias = -round(math.log2(first_val) * n_head_closest_log2) self.max_alibi_bias = -round(math.log2(first_val) * n_head_closest_log2)
self.gguf_writer.add_max_alibi_bias(alibi_bias)
return tensors return tensors
if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_fc2.weight")): if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_fc2.weight")):
@ -3025,6 +3024,10 @@ class JaisModel(Model):
return tensors return tensors
def write_tensors(self):
super().write_tensors()
self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias)
###### CONVERSION LOGIC ###### ###### CONVERSION LOGIC ######

View file

@ -6942,6 +6942,7 @@ static bool llm_load_tensors(
case LLM_ARCH_BITNET: case LLM_ARCH_BITNET:
{ {
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}); model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
// output // output
{ {
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}); model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});