From b3ba05e5bc62de2154198a8f335b58d54457e259 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 10 Dec 2024 22:37:26 +0200 Subject: [PATCH] layer norm --- convert_hf_to_gguf.py | 2 +- examples/tts/convert_pt_to_hf.py | 10 ++++++++++ gguf-py/gguf/constants.py | 3 --- gguf-py/gguf/tensor_mapping.py | 6 +----- src/llama.cpp | 9 +++++++-- 5 files changed, 19 insertions(+), 11 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index ebeb3840c..a86490831 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -2046,7 +2046,7 @@ class OuteTTSVocoderModel(Model): logger.debug(f"Skipping {name!r}") return [] - print(f"{self.map_tensor_name(name)} -> {data_torch.shape}") + logger.info(f"{self.map_tensor_name(name)} -> {data_torch.shape}") return [(self.map_tensor_name(name), data_torch)] diff --git a/examples/tts/convert_pt_to_hf.py b/examples/tts/convert_pt_to_hf.py index 389d2de50..4a0d4bcc8 100644 --- a/examples/tts/convert_pt_to_hf.py +++ b/examples/tts/convert_pt_to_hf.py @@ -88,6 +88,16 @@ def flatten_state_dict(state_dict, parent_key='', sep='.'): if new_key == "feature_extractor.encodec.quantizer.vq.layers.0._codebook.embed": new_key = "backbone.embedding.weight" + # these are the only rows used + # ref: https://github.com/edwko/OuteTTS/blob/a613e79c489d8256dd657ea9168d78de75895d82/outetts/wav_tokenizer/audio_codec.py#L100 + if new_key == "backbone.norm.scale.weight": + new_key = "backbone.norm.weight" + value = value[0] + + if new_key == "backbone.norm.shift.weight": + new_key = "backbone.norm.bias" + value = value[0] + size_mb = value.element_size() * value.nelement() / (1024 * 1024) print(f"{size_mb:8.2f} MB - {new_key}: {value.shape}") diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 14e68cffa..81e434d11 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -267,7 +267,6 @@ class MODEL_ARCH(IntEnum): class MODEL_TENSOR(IntEnum): TOKEN_EMBD = auto() TOKEN_EMBD_NORM = auto() - TOKEN_EMBD_SHIFT = auto() TOKEN_TYPES = auto() POS_EMBD = auto() OUTPUT = auto() @@ -451,7 +450,6 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = { TENSOR_NAMES: dict[MODEL_TENSOR, str] = { MODEL_TENSOR.TOKEN_EMBD: "token_embd", MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm", - MODEL_TENSOR.TOKEN_EMBD_SHIFT: "token_embd_shift", MODEL_TENSOR.TOKEN_TYPES: "token_types", MODEL_TENSOR.POS_EMBD: "position_embd", MODEL_TENSOR.OUTPUT_NORM: "output_norm", @@ -1415,7 +1413,6 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { MODEL_ARCH.OUTETTS_VOC: [ MODEL_TENSOR.TOKEN_EMBD, MODEL_TENSOR.TOKEN_EMBD_NORM, - MODEL_TENSOR.TOKEN_EMBD_SHIFT, MODEL_TENSOR.CONV1D, MODEL_TENSOR.CONV_NEXT_DW, MODEL_TENSOR.CONV_NEXT_NORM, diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index 4355ccf11..872205e77 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -43,11 +43,7 @@ class TensorNameMap: "emb_ln", # nomic-bert "transformer.norm", # openelm "rwkv.blocks.0.pre_ln", # rwkv - "backbone.norm.scale", # outetts - ), - - MODEL_TENSOR.TOKEN_EMBD_SHIFT: ( - "backbone.norm.shift", # outetts + "backbone.norm", # outetts ), # Position embeddings diff --git a/src/llama.cpp b/src/llama.cpp index 31ce4b31a..6c38d9315 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -505,7 +505,6 @@ struct LLM_KV { enum llm_tensor { LLM_TENSOR_TOKEN_EMBD, LLM_TENSOR_TOKEN_EMBD_NORM, - LLM_TENSOR_TOKEN_EMBD_SHIFT, LLM_TENSOR_TOKEN_TYPES, LLM_TENSOR_POS_EMBD, LLM_TENSOR_OUTPUT, @@ -1619,7 +1618,6 @@ static const std::map> LLM_TENSOR_N { { LLM_TENSOR_TOKEN_EMBD, "token_embd" }, { LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" }, - { LLM_TENSOR_TOKEN_EMBD_SHIFT, "token_embd_shift" }, { LLM_TENSOR_CONV1D, "conv1d" }, { LLM_TENSOR_CONV_NEXT_DW, "conv_next.dw" }, { LLM_TENSOR_CONV_NEXT_NORM, "conv_next.norm" }, @@ -9519,6 +9517,9 @@ static bool llm_load_tensors( { model.tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); + model.tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {768}, 0); + model.tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {768}, 0); + model.conv_1d = create_tensor(tn(LLM_TENSOR_CONV1D, "weight"), {7, n_embd, 768}, 0); model.conv_1d_b = create_tensor(tn(LLM_TENSOR_CONV1D, "bias"), {768}, 0); @@ -17337,6 +17338,10 @@ struct llm_build_context { LLM_NORM_GROUP, cb, 0); } + cur = llm_build_norm(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, cur)), hparams, + model.tok_norm, + model.tok_norm_b, + LLM_NORM, cb, -1); printf("cur: %d %d %d\n", cur->ne[0], cur->ne[1], cur->ne[2]);