refactor: better refactor
This commit is contained in:
parent
bfa0286866
commit
b97704c9a0
1 changed files with 10 additions and 39 deletions
|
@ -2711,7 +2711,7 @@ class StarCoder2Model(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.STARCODER2
|
model_arch = gguf.MODEL_ARCH.STARCODER2
|
||||||
|
|
||||||
|
|
||||||
@Model.register("MambaForCausalLM", "MambaLMHeadModel")
|
@Model.register("MambaForCausalLM", "MambaLMHeadModel", "FalconMambaForCausalLM")
|
||||||
class MambaModel(Model):
|
class MambaModel(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.MAMBA
|
model_arch = gguf.MODEL_ARCH.MAMBA
|
||||||
|
|
||||||
|
@ -2731,7 +2731,7 @@ class MambaModel(Model):
|
||||||
else:
|
else:
|
||||||
# Use the GPT-NeoX tokenizer when no tokenizer files are present
|
# Use the GPT-NeoX tokenizer when no tokenizer files are present
|
||||||
self._set_vocab_builtin("gpt-neox", vocab_size)
|
self._set_vocab_builtin("gpt-neox", vocab_size)
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
d_model = self.find_hparam(["hidden_size", "d_model"])
|
d_model = self.find_hparam(["hidden_size", "d_model"])
|
||||||
d_conv = self.find_hparam(["conv_kernel", "d_conv"], optional=True) or 4
|
d_conv = self.find_hparam(["conv_kernel", "d_conv"], optional=True) or 4
|
||||||
|
@ -2742,7 +2742,11 @@ class MambaModel(Model):
|
||||||
# ref: https://github.com/state-spaces/mamba/blob/ce59daea3a090d011d6476c6e5b97f6d58ddad8b/mamba_ssm/modules/mamba_simple.py#L58
|
# ref: https://github.com/state-spaces/mamba/blob/ce59daea3a090d011d6476c6e5b97f6d58ddad8b/mamba_ssm/modules/mamba_simple.py#L58
|
||||||
dt_rank = self.find_hparam(["time_step_rank", "dt_rank"], optional=True) or -(d_model // -16)
|
dt_rank = self.find_hparam(["time_step_rank", "dt_rank"], optional=True) or -(d_model // -16)
|
||||||
rms_norm_eps = self.find_hparam(["layer_norm_epsilon", "rms_norm_eps"], optional=True) or 1e-5
|
rms_norm_eps = self.find_hparam(["layer_norm_epsilon", "rms_norm_eps"], optional=True) or 1e-5
|
||||||
|
num_hidden_layers = self.find_hparam(["n_layer", "num_hidden_layers"])
|
||||||
|
use_b_dt_norm = False
|
||||||
|
# For falconmamba we do apply RMS norm on B / DT and C layers
|
||||||
|
if self.find_hparam(["model_type"]) in ["falcon_mamba"]:
|
||||||
|
use_b_dt_norm = True
|
||||||
# Fail early for models which don't have a block expansion factor of 2
|
# Fail early for models which don't have a block expansion factor of 2
|
||||||
assert d_inner == 2 * d_model
|
assert d_inner == 2 * d_model
|
||||||
|
|
||||||
|
@ -2750,13 +2754,13 @@ class MambaModel(Model):
|
||||||
self.gguf_writer.add_embedding_length(d_model)
|
self.gguf_writer.add_embedding_length(d_model)
|
||||||
self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading
|
self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading
|
||||||
self.gguf_writer.add_head_count(0) # unused, but seemingly required when loading
|
self.gguf_writer.add_head_count(0) # unused, but seemingly required when loading
|
||||||
self.gguf_writer.add_block_count(self.hparams["n_layer"])
|
self.gguf_writer.add_block_count(num_hidden_layers)
|
||||||
self.gguf_writer.add_ssm_conv_kernel(d_conv)
|
self.gguf_writer.add_ssm_conv_kernel(d_conv)
|
||||||
self.gguf_writer.add_ssm_inner_size(d_inner)
|
self.gguf_writer.add_ssm_inner_size(d_inner)
|
||||||
self.gguf_writer.add_ssm_state_size(d_state)
|
self.gguf_writer.add_ssm_state_size(d_state)
|
||||||
self.gguf_writer.add_ssm_time_step_rank(dt_rank)
|
self.gguf_writer.add_ssm_time_step_rank(dt_rank)
|
||||||
self.gguf_writer.add_layer_norm_rms_eps(rms_norm_eps)
|
self.gguf_writer.add_layer_norm_rms_eps(rms_norm_eps)
|
||||||
self.gguf_writer.add_mamba_b_dt_rms(False) # For classic Mamba we don't apply rms norm on B / DT layers
|
self.gguf_writer.add_mamba_b_dt_rms(use_b_dt_norm) # For classic Mamba we don't apply rms norm on B / DT layers
|
||||||
self.gguf_writer.add_file_type(self.ftype)
|
self.gguf_writer.add_file_type(self.ftype)
|
||||||
|
|
||||||
_tok_embd = None
|
_tok_embd = None
|
||||||
|
@ -3855,43 +3859,10 @@ class ExaoneModel(Model):
|
||||||
self.gguf_writer.add_tensor(self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), np.array(rope_factors, dtype=np.float32))
|
self.gguf_writer.add_tensor(self.format_tensor_name(gguf.MODEL_TENSOR.ROPE_FREQS), np.array(rope_factors, dtype=np.float32))
|
||||||
|
|
||||||
super().prepare_tensors()
|
super().prepare_tensors()
|
||||||
|
|
||||||
|
|
||||||
@Model.register("FalconMambaForCausalLM")
|
|
||||||
class FalconMambaModel(MambaModel):
|
|
||||||
model_arch = gguf.MODEL_ARCH.MAMBA
|
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
|
||||||
d_model = self.find_hparam(["hidden_size", "d_model"])
|
|
||||||
d_conv = self.find_hparam(["conv_kernel", "d_conv"], optional=True) or 4
|
|
||||||
d_inner = self.find_hparam(["intermediate_size", "d_inner"], optional=True) or 2 * d_model
|
|
||||||
d_state = self.find_hparam(["state_size", "d_state"], optional=True) or 16
|
|
||||||
# ceiling division
|
|
||||||
# ref: https://stackoverflow.com/a/17511341/22827863
|
|
||||||
# ref: https://github.com/state-spaces/mamba/blob/ce59daea3a090d011d6476c6e5b97f6d58ddad8b/mamba_ssm/modules/mamba_simple.py#L58
|
|
||||||
dt_rank = self.find_hparam(["time_step_rank", "dt_rank"], optional=True) or -(d_model // -16)
|
|
||||||
rms_norm_eps = self.find_hparam(["layer_norm_epsilon", "rms_norm_eps"], optional=True) or 1e-5
|
|
||||||
|
|
||||||
# Fail early for models which don't have a block expansion factor of 2
|
|
||||||
assert d_inner == 2 * d_model
|
|
||||||
|
|
||||||
self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default
|
|
||||||
self.gguf_writer.add_embedding_length(d_model)
|
|
||||||
self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading
|
|
||||||
self.gguf_writer.add_head_count(0) # unused, but seemingly required when loading
|
|
||||||
self.gguf_writer.add_block_count(self.hparams["num_hidden_layers"])
|
|
||||||
self.gguf_writer.add_ssm_conv_kernel(d_conv)
|
|
||||||
self.gguf_writer.add_mamba_b_dt_rms(True) # For FalconMamba we do apply rms norm on B / DT layers
|
|
||||||
self.gguf_writer.add_ssm_inner_size(d_inner)
|
|
||||||
self.gguf_writer.add_ssm_state_size(d_state)
|
|
||||||
self.gguf_writer.add_ssm_time_step_rank(dt_rank)
|
|
||||||
self.gguf_writer.add_layer_norm_rms_eps(rms_norm_eps)
|
|
||||||
self.gguf_writer.add_file_type(self.ftype)
|
|
||||||
|
|
||||||
|
|
||||||
###### CONVERSION LOGIC ######
|
###### CONVERSION LOGIC ######
|
||||||
|
|
||||||
|
|
||||||
# tree of lazy tensors
|
# tree of lazy tensors
|
||||||
class LazyTorchTensor(gguf.LazyBase):
|
class LazyTorchTensor(gguf.LazyBase):
|
||||||
_tensor_type = torch.Tensor
|
_tensor_type = torch.Tensor
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue