layer norm
This commit is contained in:
parent
435cfd788b
commit
b3ba05e5bc
5 changed files with 19 additions and 11 deletions
|
@ -2046,7 +2046,7 @@ class OuteTTSVocoderModel(Model):
|
|||
logger.debug(f"Skipping {name!r}")
|
||||
return []
|
||||
|
||||
print(f"{self.map_tensor_name(name)} -> {data_torch.shape}")
|
||||
logger.info(f"{self.map_tensor_name(name)} -> {data_torch.shape}")
|
||||
|
||||
return [(self.map_tensor_name(name), data_torch)]
|
||||
|
||||
|
|
|
@ -88,6 +88,16 @@ def flatten_state_dict(state_dict, parent_key='', sep='.'):
|
|||
if new_key == "feature_extractor.encodec.quantizer.vq.layers.0._codebook.embed":
|
||||
new_key = "backbone.embedding.weight"
|
||||
|
||||
# these are the only rows used
|
||||
# ref: https://github.com/edwko/OuteTTS/blob/a613e79c489d8256dd657ea9168d78de75895d82/outetts/wav_tokenizer/audio_codec.py#L100
|
||||
if new_key == "backbone.norm.scale.weight":
|
||||
new_key = "backbone.norm.weight"
|
||||
value = value[0]
|
||||
|
||||
if new_key == "backbone.norm.shift.weight":
|
||||
new_key = "backbone.norm.bias"
|
||||
value = value[0]
|
||||
|
||||
size_mb = value.element_size() * value.nelement() / (1024 * 1024)
|
||||
print(f"{size_mb:8.2f} MB - {new_key}: {value.shape}")
|
||||
|
||||
|
|
|
@ -267,7 +267,6 @@ class MODEL_ARCH(IntEnum):
|
|||
class MODEL_TENSOR(IntEnum):
|
||||
TOKEN_EMBD = auto()
|
||||
TOKEN_EMBD_NORM = auto()
|
||||
TOKEN_EMBD_SHIFT = auto()
|
||||
TOKEN_TYPES = auto()
|
||||
POS_EMBD = auto()
|
||||
OUTPUT = auto()
|
||||
|
@ -451,7 +450,6 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
|||
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
|
||||
MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
|
||||
MODEL_TENSOR.TOKEN_EMBD_SHIFT: "token_embd_shift",
|
||||
MODEL_TENSOR.TOKEN_TYPES: "token_types",
|
||||
MODEL_TENSOR.POS_EMBD: "position_embd",
|
||||
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
|
||||
|
@ -1415,7 +1413,6 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
|||
MODEL_ARCH.OUTETTS_VOC: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
||||
MODEL_TENSOR.TOKEN_EMBD_SHIFT,
|
||||
MODEL_TENSOR.CONV1D,
|
||||
MODEL_TENSOR.CONV_NEXT_DW,
|
||||
MODEL_TENSOR.CONV_NEXT_NORM,
|
||||
|
|
|
@ -43,11 +43,7 @@ class TensorNameMap:
|
|||
"emb_ln", # nomic-bert
|
||||
"transformer.norm", # openelm
|
||||
"rwkv.blocks.0.pre_ln", # rwkv
|
||||
"backbone.norm.scale", # outetts
|
||||
),
|
||||
|
||||
MODEL_TENSOR.TOKEN_EMBD_SHIFT: (
|
||||
"backbone.norm.shift", # outetts
|
||||
"backbone.norm", # outetts
|
||||
),
|
||||
|
||||
# Position embeddings
|
||||
|
|
|
@ -505,7 +505,6 @@ struct LLM_KV {
|
|||
enum llm_tensor {
|
||||
LLM_TENSOR_TOKEN_EMBD,
|
||||
LLM_TENSOR_TOKEN_EMBD_NORM,
|
||||
LLM_TENSOR_TOKEN_EMBD_SHIFT,
|
||||
LLM_TENSOR_TOKEN_TYPES,
|
||||
LLM_TENSOR_POS_EMBD,
|
||||
LLM_TENSOR_OUTPUT,
|
||||
|
@ -1619,7 +1618,6 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
|||
{
|
||||
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
||||
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
|
||||
{ LLM_TENSOR_TOKEN_EMBD_SHIFT, "token_embd_shift" },
|
||||
{ LLM_TENSOR_CONV1D, "conv1d" },
|
||||
{ LLM_TENSOR_CONV_NEXT_DW, "conv_next.dw" },
|
||||
{ LLM_TENSOR_CONV_NEXT_NORM, "conv_next.norm" },
|
||||
|
@ -9519,6 +9517,9 @@ static bool llm_load_tensors(
|
|||
{
|
||||
model.tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
|
||||
|
||||
model.tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {768}, 0);
|
||||
model.tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {768}, 0);
|
||||
|
||||
model.conv_1d = create_tensor(tn(LLM_TENSOR_CONV1D, "weight"), {7, n_embd, 768}, 0);
|
||||
model.conv_1d_b = create_tensor(tn(LLM_TENSOR_CONV1D, "bias"), {768}, 0);
|
||||
|
||||
|
@ -17337,6 +17338,10 @@ struct llm_build_context {
|
|||
LLM_NORM_GROUP, cb, 0);
|
||||
}
|
||||
|
||||
cur = llm_build_norm(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, cur)), hparams,
|
||||
model.tok_norm,
|
||||
model.tok_norm_b,
|
||||
LLM_NORM, cb, -1);
|
||||
|
||||
printf("cur: %d %d %d\n", cur->ne[0], cur->ne[1], cur->ne[2]);
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue