[Untested] Initial Persimmon support
This commit is contained in:
parent
94ba1db24a
commit
6f6856c6ea
1 changed files with 83 additions and 0 deletions
83
model.py
83
model.py
|
@ -53,6 +53,7 @@ class Model:
|
|||
return gguf.MODEL_ARCH.STARCODER
|
||||
if arch == "GPTRefactForCausalLM":
|
||||
return gguf.MODEL_ARCH.REFACT
|
||||
if arch == ""
|
||||
|
||||
raise NotImplementedError(f'Architecture "{arch}" not supported!')
|
||||
|
||||
|
@ -749,3 +750,85 @@ class RefactModel(Model):
|
|||
|
||||
self.gguf_writer.add_tensor(new_name, data)
|
||||
|
||||
class PersimmonModel(Model):
|
||||
def set_gguf_parameters(self):
|
||||
block_count = self.hparams["num_layers"]
|
||||
head_count = self.hparams["num_attention_heads"]
|
||||
head_count_kv = head_count
|
||||
ctx_length = self.hparams["seq_length"]
|
||||
hidden_size = self.hparams["hidden_size"]
|
||||
|
||||
self.gguf_writer.add_name('persimmon-8b-chat')
|
||||
self.gguf_writer.add_context_length(ctx_length)
|
||||
self.gguf_writer.add_embedding_length(hidden_size)
|
||||
self.gguf_writer.add_block_count(block_count)
|
||||
self.gguf_writer.add_feed_forward_length(self.hparams["ffn_hidden_size"])
|
||||
self.gguf_writer.add_rope_dimension_count(hidden_size // head_count)
|
||||
self.gguf_writer.add_head_count(head_count)
|
||||
self.gguf_writer.add_head_count_kv(head_count_kv)
|
||||
self.gguf_writer.add_rope_freq_base(self.hparams["rotary_emb_base"])
|
||||
self.gguf_writer.add_layer_norm_eps(self.hparams["layernorm_epsilon"])
|
||||
|
||||
def set_vocab(self):
|
||||
tokens, scores, toktypes = self._get_sentencepiece_tokenizer_info()
|
||||
self.gguf_writer.add_tokenizer_model('llama')
|
||||
self.gguf_writer.add_token_list(tokens)
|
||||
self.gguf_writer.add_token_scores(scores)
|
||||
self.gguf_writer.add_token_types(toktypes)
|
||||
self.gguf_writer.add_bos_token_id(71013)
|
||||
# self.gguf_writer.add_eos_token_id(71013)
|
||||
|
||||
def write_tensors(self):
|
||||
block_count = self.hparams["num_layers"]
|
||||
tensor_map = gguf.get_tensor_name_map(self.model_arch, block_count)
|
||||
print(tensor_map)
|
||||
for name, data in self.get_tensors():
|
||||
if name.endswith(".self_attention.rotary_emb.inv_freq"):
|
||||
continue
|
||||
old_dtype = data.dtype
|
||||
# TODO: FP16 conversion produces garbage outputs. (Q8_0 does not, so..?)
|
||||
data = data.to(torch.float32).squeeze().numpy()
|
||||
new_name = tensor_map.get_name(name, try_suffixes = (".weight", ".bias"))
|
||||
if new_name is None:
|
||||
print("Can not map tensor '" + name + "'")
|
||||
sys.exit()
|
||||
n_dims = len(data.shape)
|
||||
print(new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))
|
||||
self.gguf_writer.add_tensor(new_name, data)
|
||||
|
||||
|
||||
def _get_sentencepiece_tokenizer_info(self):
|
||||
from sentencepiece import SentencePieceProcessor
|
||||
tokenizer_path = self.dir_model / 'tokenizer.model'
|
||||
tokenizer = SentencePieceProcessor(str(tokenizer_path))
|
||||
|
||||
print('gguf: getting sentencepiece tokenizer from', tokenizer_path)
|
||||
print('gguf: adding tokens')
|
||||
tokens: list[bytes] = []
|
||||
scores: list[float] = []
|
||||
toktypes: list[int] = []
|
||||
|
||||
for i in range(tokenizer.vocab_size()):
|
||||
text: bytes
|
||||
score: float
|
||||
|
||||
piece = tokenizer.id_to_piece(i)
|
||||
text = piece.encode("utf-8")
|
||||
score = tokenizer.get_score(i)
|
||||
|
||||
toktype = 1
|
||||
if tokenizer.is_unknown(i):
|
||||
toktype = 2
|
||||
if tokenizer.is_control(i):
|
||||
toktype = 3
|
||||
if tokenizer.is_unused(i):
|
||||
toktype = 5
|
||||
if tokenizer.is_byte(i):
|
||||
toktype = 6
|
||||
|
||||
tokens.append(text)
|
||||
scores.append(score)
|
||||
toktypes.append(toktype)
|
||||
pass
|
||||
return tokens, scores, toktypes
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue