Flake8 fixes

This commit is contained in:
Galunid 2023-10-31 15:38:24 +01:00
parent dc3115f2a3
commit b2ba44eab2
2 changed files with 46 additions and 45 deletions

View file

@ -1,19 +1,13 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations from __future__ import annotations
from util import parse_args
import os
import sys import sys
from pathlib import Path
if 'NO_LOCAL_GGUF' not in os.environ:
sys.path.insert(1, str(Path(__file__).parent / 'gguf-py' / 'gguf'))
import gguf
import model import model
import util
args = util.parse_args()
args = parse_args()
dir_model = args.model dir_model = args.model
ftype = args.ftype ftype = args.ftype

View file

@ -12,6 +12,7 @@ from typing import TypeAlias, Any
NDArray: TypeAlias = 'np.ndarray[Any, Any]' NDArray: TypeAlias = 'np.ndarray[Any, Any]'
class Model: class Model:
def __init__(self, dir_model: Path, ftype: int, fname_out: Path): def __init__(self, dir_model: Path, ftype: int, fname_out: Path):
self.dir_model = dir_model self.dir_model = dir_model
@ -109,7 +110,8 @@ class Model:
def set_gguf_parameters(self): def set_gguf_parameters(self):
self.gguf_writer.add_name(self.dir_model.name) self.gguf_writer.add_name(self.dir_model.name)
self.gguf_writer.add_block_count(self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer")))) self.gguf_writer.add_block_count(self.hparams.get(
"n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer"))))
if "max_position_embeddings" in self.hparams: if "max_position_embeddings" in self.hparams:
self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"]) self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
if "hidden_size" in self.hparams: if "hidden_size" in self.hparams:
@ -118,7 +120,8 @@ class Model:
self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"]) self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
if "num_attention_head" in self.hparams: if "num_attention_head" in self.hparams:
self.gguf_writer.add_head_count(self.hparams["num_attention_heads"]) self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
self.gguf_writer.add_parallel_residual(self.hparams["use_parallel_residual"] if "use_parallel_residual" in self.hparams else True) self.gguf_writer.add_parallel_residual(
self.hparams["use_parallel_residual"] if "use_parallel_residual" in self.hparams else True)
def write_tensors(self): def write_tensors(self):
block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer"))) block_count = self.hparams.get("n_layers", self.hparams.get("num_hidden_layers", self.hparams.get("n_layer")))
@ -176,7 +179,6 @@ class Model:
hparams = json.load(f) hparams = json.load(f)
return hparams return hparams
@staticmethod @staticmethod
def from_model_architecture(model_architecture): def from_model_architecture(model_architecture):
if model_architecture == "StableLMEpochForCausalLM": if model_architecture == "StableLMEpochForCausalLM":
@ -199,10 +201,12 @@ class Model:
return PersimmonModel return PersimmonModel
return Model return Model
class StableLMModel(Model): class StableLMModel(Model):
def set_gguf_parameters(self): def set_gguf_parameters(self):
super().set_gguf_parameters() super().set_gguf_parameters()
self.gguf_writer.add_rope_dimension_count(int(self.hparams["rope_pct"]*(self.hparams["hidden_size"] // self.hparams["num_attention_heads"]))) self.gguf_writer.add_rope_dimension_count(
int(self.hparams["rope_pct"]*(self.hparams["hidden_size"] // self.hparams["num_attention_heads"])))
self.gguf_writer.add_layer_norm_eps(1e-5) self.gguf_writer.add_layer_norm_eps(1e-5)
@ -215,11 +219,14 @@ class GPTNeoXModel(Model):
self.gguf_writer.add_embedding_length(self.hparams["hidden_size"]) self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
self.gguf_writer.add_block_count(block_count) self.gguf_writer.add_block_count(block_count)
self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"]) self.gguf_writer.add_feed_forward_length(self.hparams["intermediate_size"])
self.gguf_writer.add_rope_dimension_count(int(self.hparams["rotary_pct"]*(self.hparams["hidden_size"]//self.hparams["num_attention_heads"]))) self.gguf_writer.add_rope_dimension_count(
int(self.hparams["rotary_pct"]*(self.hparams["hidden_size"]//self.hparams["num_attention_heads"])))
self.gguf_writer.add_head_count(self.hparams["num_attention_heads"]) self.gguf_writer.add_head_count(self.hparams["num_attention_heads"])
self.gguf_writer.add_parallel_residual(self.hparams["use_parallel_residual"] if "use_parallel_residual" in self.hparams else True) self.gguf_writer.add_parallel_residual(
self.hparams["use_parallel_residual"] if "use_parallel_residual" in self.hparams else True)
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_eps"]) self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_eps"])
class BloomModel(Model): class BloomModel(Model):
def set_gguf_parameters(self): def set_gguf_parameters(self):
self.gguf_writer.add_name("Bloom") self.gguf_writer.add_name("Bloom")
@ -307,6 +314,7 @@ class BloomModel(Model):
self.gguf_writer.add_tensor("output.weight", data) self.gguf_writer.add_tensor("output.weight", data)
print(name, "=>", "output.weight" + ", shape = " + str(data.shape) + ", " + str(old_dtype) + " --> " + str(data.dtype)) # noqa print(name, "=>", "output.weight" + ", shape = " + str(data.shape) + ", " + str(old_dtype) + " --> " + str(data.dtype)) # noqa
class MPTModel(Model): class MPTModel(Model):
def set_gguf_parameters(self): def set_gguf_parameters(self):
block_count = self.hparams["n_layers"] block_count = self.hparams["n_layers"]
@ -370,7 +378,6 @@ class MPTModel(Model):
self.gguf_writer.add_tensor("output.weight", data) self.gguf_writer.add_tensor("output.weight", data)
class BaichuanModel(Model): class BaichuanModel(Model):
def set_vocab(self): def set_vocab(self):
from sentencepiece import SentencePieceProcessor # type: ignore[import] from sentencepiece import SentencePieceProcessor # type: ignore[import]
@ -428,7 +435,6 @@ class BaichuanModel(Model):
scores.append(-1000.0) scores.append(-1000.0)
toktypes.append(4) # user-defined token type toktypes.append(4) # user-defined token type
self.gguf_writer.add_tokenizer_model("llama") self.gguf_writer.add_tokenizer_model("llama")
self.gguf_writer.add_token_list(tokens) self.gguf_writer.add_token_list(tokens)
self.gguf_writer.add_token_scores(scores) self.gguf_writer.add_token_scores(scores)
@ -474,12 +480,11 @@ class BaichuanModel(Model):
self.gguf_writer.add_head_count_kv(head_count_kv) self.gguf_writer.add_head_count_kv(head_count_kv)
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"]) self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
if "rope_scaling" in self.hparams and self.hparams["rope_scaling"] != None and "factor" in self.hparams["rope_scaling"]: if "rope_scaling" in self.hparams and self.hparams["rope_scaling"] is not None and "factor" in self.hparams["rope_scaling"]:
if "type" in self.hparams["rope_scaling"]: if "type" in self.hparams["rope_scaling"]:
if self.hparams["rope_scaling"]["type"] == "linear": if self.hparams["rope_scaling"]["type"] == "linear":
self.gguf_writer.add_rope_scale_linear(self.hparams["rope_scaling"]["factor"]) self.gguf_writer.add_rope_scale_linear(self.hparams["rope_scaling"]["factor"])
def _reverse_hf_permute(self, weights: NDArray, n_head: int, n_kv_head: int | None = None) -> NDArray: def _reverse_hf_permute(self, weights: NDArray, n_head: int, n_kv_head: int | None = None) -> NDArray:
if n_kv_head is not None and n_head != n_kv_head: if n_kv_head is not None and n_head != n_kv_head:
n_head //= n_kv_head n_head //= n_kv_head
@ -508,13 +513,15 @@ class BaichuanModel(Model):
else: else:
head_count_kv = head_count head_count_kv = head_count
for i in range(block_count): for i in range(block_count):
if f"model.layers.{i}.self_attn.W_pack.weight" in model_kv: if f"model.layers.{i}.self_attn.W_pack.weight" in model_kv:
print(f"Unpacking and permuting layer {i}") print(f"Unpacking and permuting layer {i}")
model_kv[f"model.layers.{i}.self_attn.q_proj.weight"] = self._reverse_hf_permute_part(model_kv[f"model.layers.{i}.self_attn.W_pack.weight"],0,head_count,head_count) model_kv[f"model.layers.{i}.self_attn.q_proj.weight"] = self._reverse_hf_permute_part(
model_kv[f"model.layers.{i}.self_attn.k_proj.weight"] = self._reverse_hf_permute_part(model_kv[f"model.layers.{i}.self_attn.W_pack.weight"],1,head_count,head_count_kv) model_kv[f"model.layers.{i}.self_attn.W_pack.weight"], 0, head_count, head_count)
model_kv[f"model.layers.{i}.self_attn.v_proj.weight"] = self._reverse_hf_part(model_kv[f"model.layers.{i}.self_attn.W_pack.weight"],2) model_kv[f"model.layers.{i}.self_attn.k_proj.weight"] = self._reverse_hf_permute_part(
model_kv[f"model.layers.{i}.self_attn.W_pack.weight"], 1, head_count, head_count_kv)
model_kv[f"model.layers.{i}.self_attn.v_proj.weight"] = self._reverse_hf_part(
model_kv[f"model.layers.{i}.self_attn.W_pack.weight"], 2)
del model_kv[f"model.layers.{i}.self_attn.W_pack.weight"] del model_kv[f"model.layers.{i}.self_attn.W_pack.weight"]
for name, data in model_kv.items(): for name, data in model_kv.items():
@ -647,6 +654,7 @@ class FalconModel(Model):
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
class StarCoderModel(Model): class StarCoderModel(Model):
def set_gguf_parameters(self): def set_gguf_parameters(self):
block_count = self.hparams["n_layer"] block_count = self.hparams["n_layer"]
@ -753,6 +761,7 @@ class RefactModel(Model):
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
class PersimmonModel(Model): class PersimmonModel(Model):
def set_gguf_parameters(self): def set_gguf_parameters(self):
block_count = self.hparams.get("num_layers", self.hparams.get("num_hidden_layers")) block_count = self.hparams.get("num_layers", self.hparams.get("num_hidden_layers"))
@ -797,7 +806,6 @@ class PersimmonModel(Model):
print(new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype)) print(new_name + ", n_dims = " + str(n_dims) + ", " + str(old_dtype) + " --> " + str(data.dtype))
self.gguf_writer.add_tensor(new_name, data) self.gguf_writer.add_tensor(new_name, data)
def _get_sentencepiece_tokenizer_info(self): def _get_sentencepiece_tokenizer_info(self):
from sentencepiece import SentencePieceProcessor from sentencepiece import SentencePieceProcessor
tokenizer_path = self.dir_model / 'tokenizer.model' tokenizer_path = self.dir_model / 'tokenizer.model'
@ -832,4 +840,3 @@ class PersimmonModel(Model):
toktypes.append(toktype) toktypes.append(toktype)
pass pass
return tokens, scores, toktypes return tokens, scores, toktypes