llama : support for falcon-mamba architecture (#9074)

* feat: initial support for llama.cpp

* fix: lint

* refactor: better refactor

* Update src/llama.cpp

Co-authored-by: compilade <git@compilade.net>

* Update src/llama.cpp

Co-authored-by: compilade <git@compilade.net>

* fix: address comments

* Update convert_hf_to_gguf.py

Co-authored-by: compilade <git@compilade.net>

* fix: add more cleanup and harmonization

* fix: lint

* Update gguf-py/gguf/gguf_writer.py

Co-authored-by: compilade <git@compilade.net>

* fix: change name

* Apply suggestions from code review

Co-authored-by: compilade <git@compilade.net>

* add in operator

* fix: add `dt_b_c_rms` in `llm_load_print_meta`

* fix: correct printf format for bool

* fix: correct print format

* Update src/llama.cpp

Co-authored-by: compilade <git@compilade.net>

* llama : quantize more Mamba tensors

* llama : use f16 as the fallback of fallback quant types

---------

Co-authored-by: compilade <git@compilade.net>
This commit is contained in:
Younes Belkada 2024-08-21 12:06:36 +04:00 committed by GitHub
parent f63f603c87
commit b40eb84895
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 36 additions and 24 deletions

View file

@ -130,6 +130,7 @@ class Keys:
INNER_SIZE = "{arch}.ssm.inner_size"
STATE_SIZE = "{arch}.ssm.state_size"
TIME_STEP_RANK = "{arch}.ssm.time_step_rank"
DT_B_C_RMS = "{arch}.ssm.dt_b_c_rms"
class Tokenizer:
MODEL = "tokenizer.ggml.model"
@ -1372,6 +1373,7 @@ KEY_SSM_CONV_KERNEL = Keys.SSM.CONV_KERNEL
KEY_SSM_INNER_SIZE = Keys.SSM.INNER_SIZE
KEY_SSM_STATE_SIZE = Keys.SSM.STATE_SIZE
KEY_SSM_TIME_STEP_RANK = Keys.SSM.TIME_STEP_RANK
KEY_SSM_DT_B_C_RMS = Keys.SSM.DT_B_C_RMS
# tokenization
KEY_TOKENIZER_MODEL = Keys.Tokenizer.MODEL

View file

@ -730,6 +730,9 @@ class GGUFWriter:
def add_ssm_time_step_rank(self, value: int) -> None:
self.add_uint32(Keys.SSM.TIME_STEP_RANK.format(arch=self.arch), value)
def add_ssm_dt_b_c_rms(self, value: bool) -> None:
self.add_bool(Keys.SSM.DT_B_C_RMS.format(arch=self.arch), value)
def add_tokenizer_model(self, model: str) -> None:
self.add_string(Keys.Tokenizer.MODEL, model)