layer norm

This commit is contained in:
Georgi Gerganov 2024-12-10 22:37:26 +02:00
parent 435cfd788b
commit b3ba05e5bc
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
5 changed files with 19 additions and 11 deletions

View file

@ -2046,7 +2046,7 @@ class OuteTTSVocoderModel(Model):
logger.debug(f"Skipping {name!r}")
return []
print(f"{self.map_tensor_name(name)} -> {data_torch.shape}")
logger.info(f"{self.map_tensor_name(name)} -> {data_torch.shape}")
return [(self.map_tensor_name(name), data_torch)]

View file

@ -88,6 +88,16 @@ def flatten_state_dict(state_dict, parent_key='', sep='.'):
if new_key == "feature_extractor.encodec.quantizer.vq.layers.0._codebook.embed":
new_key = "backbone.embedding.weight"
# these are the only rows used
# ref: https://github.com/edwko/OuteTTS/blob/a613e79c489d8256dd657ea9168d78de75895d82/outetts/wav_tokenizer/audio_codec.py#L100
if new_key == "backbone.norm.scale.weight":
new_key = "backbone.norm.weight"
value = value[0]
if new_key == "backbone.norm.shift.weight":
new_key = "backbone.norm.bias"
value = value[0]
size_mb = value.element_size() * value.nelement() / (1024 * 1024)
print(f"{size_mb:8.2f} MB - {new_key}: {value.shape}")

View file

@ -267,7 +267,6 @@ class MODEL_ARCH(IntEnum):
class MODEL_TENSOR(IntEnum):
TOKEN_EMBD = auto()
TOKEN_EMBD_NORM = auto()
TOKEN_EMBD_SHIFT = auto()
TOKEN_TYPES = auto()
POS_EMBD = auto()
OUTPUT = auto()
@ -451,7 +450,6 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
MODEL_TENSOR.TOKEN_EMBD_SHIFT: "token_embd_shift",
MODEL_TENSOR.TOKEN_TYPES: "token_types",
MODEL_TENSOR.POS_EMBD: "position_embd",
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
@ -1415,7 +1413,6 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_ARCH.OUTETTS_VOC: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.TOKEN_EMBD_NORM,
MODEL_TENSOR.TOKEN_EMBD_SHIFT,
MODEL_TENSOR.CONV1D,
MODEL_TENSOR.CONV_NEXT_DW,
MODEL_TENSOR.CONV_NEXT_NORM,

View file

@ -43,11 +43,7 @@ class TensorNameMap:
"emb_ln", # nomic-bert
"transformer.norm", # openelm
"rwkv.blocks.0.pre_ln", # rwkv
"backbone.norm.scale", # outetts
),
MODEL_TENSOR.TOKEN_EMBD_SHIFT: (
"backbone.norm.shift", # outetts
"backbone.norm", # outetts
),
# Position embeddings

View file

@ -505,7 +505,6 @@ struct LLM_KV {
enum llm_tensor {
LLM_TENSOR_TOKEN_EMBD,
LLM_TENSOR_TOKEN_EMBD_NORM,
LLM_TENSOR_TOKEN_EMBD_SHIFT,
LLM_TENSOR_TOKEN_TYPES,
LLM_TENSOR_POS_EMBD,
LLM_TENSOR_OUTPUT,
@ -1619,7 +1618,6 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
{
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
{ LLM_TENSOR_TOKEN_EMBD_SHIFT, "token_embd_shift" },
{ LLM_TENSOR_CONV1D, "conv1d" },
{ LLM_TENSOR_CONV_NEXT_DW, "conv_next.dw" },
{ LLM_TENSOR_CONV_NEXT_NORM, "conv_next.norm" },
@ -9519,6 +9517,9 @@ static bool llm_load_tensors(
{
model.tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
model.tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {768}, 0);
model.tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {768}, 0);
model.conv_1d = create_tensor(tn(LLM_TENSOR_CONV1D, "weight"), {7, n_embd, 768}, 0);
model.conv_1d_b = create_tensor(tn(LLM_TENSOR_CONV1D, "bias"), {768}, 0);
@ -17337,6 +17338,10 @@ struct llm_build_context {
LLM_NORM_GROUP, cb, 0);
}
cur = llm_build_norm(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, cur)), hparams,
model.tok_norm,
model.tok_norm_b,
LLM_NORM, cb, -1);
printf("cur: %d %d %d\n", cur->ne[0], cur->ne[1], cur->ne[2]);