llama : support for falcon-mamba
architecture (#9074)
* feat: initial support for llama.cpp * fix: lint * refactor: better refactor * Update src/llama.cpp Co-authored-by: compilade <git@compilade.net> * Update src/llama.cpp Co-authored-by: compilade <git@compilade.net> * fix: address comments * Update convert_hf_to_gguf.py Co-authored-by: compilade <git@compilade.net> * fix: add more cleanup and harmonization * fix: lint * Update gguf-py/gguf/gguf_writer.py Co-authored-by: compilade <git@compilade.net> * fix: change name * Apply suggestions from code review Co-authored-by: compilade <git@compilade.net> * add in operator * fix: add `dt_b_c_rms` in `llm_load_print_meta` * fix: correct printf format for bool * fix: correct print format * Update src/llama.cpp Co-authored-by: compilade <git@compilade.net> * llama : quantize more Mamba tensors * llama : use f16 as the fallback of fallback quant types --------- Co-authored-by: compilade <git@compilade.net>
This commit is contained in:
parent
f63f603c87
commit
b40eb84895
5 changed files with 36 additions and 24 deletions
|
@ -130,6 +130,7 @@ class Keys:
|
|||
INNER_SIZE = "{arch}.ssm.inner_size"
|
||||
STATE_SIZE = "{arch}.ssm.state_size"
|
||||
TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
|
||||
DT_B_C_RMS = "{arch}.ssm.dt_b_c_rms"
|
||||
|
||||
class Tokenizer:
|
||||
MODEL = "tokenizer.ggml.model"
|
||||
|
@ -1372,6 +1373,7 @@ KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL
|
|||
KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
|
||||
KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
|
||||
KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
|
||||
KEY_SSM_DT_B_C_RMS = Keys.SSM.DT_B_C_RMS
|
||||
|
||||
# tokenization
|
||||
KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL
|
||||
|
|
|
@ -730,6 +730,9 @@ class GGUFWriter:
|
|||
def add_ssm_time_step_rank(self, value: int) -> None:
|
||||
self.add_uint32(Keys.SSM.TIME_STEP_RANK.format(arch=self.arch), value)
|
||||
|
||||
def add_ssm_dt_b_c_rms(self, value: bool) -> None:
|
||||
self.add_bool(Keys.SSM.DT_B_C_RMS.format(arch=self.arch), value)
|
||||
|
||||
def add_tokenizer_model(self, model: str) -> None:
|
||||
self.add_string(Keys.Tokenizer.MODEL, model)
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue