minor tweaks
This commit is contained in:
parent
2d4de517bb
commit
8b64c7ae46
2 changed files with 13 additions and 9 deletions
|
@ -2972,6 +2972,8 @@ class JaisModel(Model):
|
|||
else:
|
||||
assert False
|
||||
|
||||
self.max_alibi_bias = 8.0
|
||||
|
||||
def set_vocab(self):
|
||||
self._set_vocab_gpt2()
|
||||
|
||||
|
@ -2985,12 +2987,6 @@ class JaisModel(Model):
|
|||
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
|
||||
self.gguf_writer.add_file_type(self.ftype)
|
||||
|
||||
# Hack to populate self.tensor_names
|
||||
all(self.get_tensors())
|
||||
if 'transformer.relative_pe.slopes' not in self.tensor_names:
|
||||
self.gguf_writer.add_max_alibi_bias(8.0)
|
||||
# else set later
|
||||
|
||||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||
del bid # unused
|
||||
|
||||
|
@ -3001,11 +2997,14 @@ class JaisModel(Model):
|
|||
return tensors
|
||||
|
||||
if name.endswith(("relative_pe.slopes")):
|
||||
# calculate ALiBi bias
|
||||
# Calculate max ALiBi bias (this is the inverse of the ALiBi calculation)
|
||||
# Some other models has max_alibi_bias spelled out explicitly in the hyperparams,
|
||||
# but Jais's PyTorch model simply precalculates the slope values and places them
|
||||
# in relative_pes.slopes
|
||||
n_head_closest_log2 = 2 ** math.floor(math.log2(self.hparams["n_head"]))
|
||||
first_val = float(data_torch._data[0])
|
||||
alibi_bias = -round(math.log2(first_val) * n_head_closest_log2)
|
||||
self.gguf_writer.add_max_alibi_bias(alibi_bias)
|
||||
self.max_alibi_bias = -round(math.log2(first_val) * n_head_closest_log2)
|
||||
|
||||
return tensors
|
||||
|
||||
if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_fc2.weight")):
|
||||
|
@ -3025,6 +3024,10 @@ class JaisModel(Model):
|
|||
|
||||
return tensors
|
||||
|
||||
def write_tensors(self):
|
||||
super().write_tensors()
|
||||
self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias)
|
||||
|
||||
|
||||
###### CONVERSION LOGIC ######
|
||||
|
||||
|
|
|
@ -6942,6 +6942,7 @@ static bool llm_load_tensors(
|
|||
case LLM_ARCH_BITNET:
|
||||
{
|
||||
model.tok_embd = ml.create_tensor(ctx_input, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab});
|
||||
|
||||
// output
|
||||
{
|
||||
model.output_norm = ml.create_tensor(ctx_output, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd});
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue