llama : add support for Chameleon (#8543)
* convert chameleon hf to gguf * add chameleon tokenizer tests * fix lint * implement chameleon graph * add swin norm param * return qk norm weights and biases to original format * implement swin norm * suppress image token output * rem tabs * add comment to conversion * fix ci * check for k norm separately * adapt to new lora implementation * fix layer input for swin norm * move swin_norm in gguf writer * add comment regarding special token regex in chameleon pre-tokenizer * Update src/llama.cpp Co-authored-by: compilade <git@compilade.net> * fix punctuation regex in chameleon pre-tokenizer (@compilade) Co-authored-by: compilade <git@compilade.net> * fix lint * trigger ci --------- Co-authored-by: compilade <git@compilade.net>
This commit is contained in:
parent
43bcdd9703
commit
9a913110cf
10 changed files with 505 additions and 2 deletions
|
@ -640,6 +640,9 @@ class Model:
|
|||
if chkhsh == "fcace8b9cac38ce847670c970cd5892031a753a1ef381abd1d9af00f713da085":
|
||||
# ref: https://huggingface.co/microsoft/phi-2
|
||||
res = "phi-2"
|
||||
if chkhsh == "60824e3c0d9401f89943cbb2fff727f0e2d4c545ba4df2d6e4f09a6db0f5b450":
|
||||
# ref: https://huggingface.co/facebook/chameleon-7b
|
||||
res = "chameleon"
|
||||
|
||||
if res is None:
|
||||
logger.warning("\n")
|
||||
|
@ -4138,6 +4141,47 @@ class GraniteMoeModel(GraniteModel):
|
|||
return super().modify_tensors(data_torch, name, bid)
|
||||
|
||||
|
||||
@Model.register("ChameleonForCausalLM")
|
||||
class ChameleonModel(Model):
|
||||
model_arch = gguf.MODEL_ARCH.CHAMELEON
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
super().set_gguf_parameters()
|
||||
self.gguf_writer.add_swin_norm(self.hparams.get("swin_norm", False))
|
||||
|
||||
def set_vocab(self):
|
||||
self._set_vocab_gpt2()
|
||||
|
||||
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||
# ignore image tokenizer for now
|
||||
# TODO: remove this once image support is implemented for Chameleon
|
||||
if name.startswith("model.vqmodel"):
|
||||
return []
|
||||
|
||||
n_head = self.hparams["num_attention_heads"]
|
||||
n_kv_head = self.hparams.get("num_key_value_heads")
|
||||
hidden_dim = self.hparams.get("hidden_size")
|
||||
|
||||
if name.endswith(("q_proj.weight", "q_proj.bias")):
|
||||
data_torch = LlamaModel.permute(data_torch, n_head, n_head)
|
||||
if name.endswith(("k_proj.weight", "k_proj.bias")):
|
||||
data_torch = LlamaModel.permute(data_torch, n_head, n_kv_head)
|
||||
if name.endswith(("q_norm.weight", "q_norm.bias")):
|
||||
data_torch = ChameleonModel._reverse_hf_permute(data_torch, n_head, hidden_dim)
|
||||
if name.endswith(("k_norm.weight", "k_norm.bias")):
|
||||
data_torch = ChameleonModel._reverse_hf_permute(data_torch, n_kv_head, hidden_dim)
|
||||
|
||||
return [(self.map_tensor_name(name), data_torch)]
|
||||
|
||||
# see: https://github.com/huggingface/transformers/blob/72fb02c47dbbe1999ae105319f24631cad6e2e00/src/transformers/models/chameleon/convert_chameleon_weights_to_hf.py#L176-L203
|
||||
@staticmethod
|
||||
def _reverse_hf_permute(data_torch, n_heads, hidden_dim):
|
||||
head_dim = hidden_dim // n_heads
|
||||
data_torch = data_torch[0].view(2, head_dim // 2).t().reshape(1, -1)
|
||||
data_torch = data_torch.repeat_interleave(n_heads, 0)
|
||||
return data_torch
|
||||
|
||||
|
||||
###### CONVERSION LOGIC ######
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue