layer norm
This commit is contained in:
parent
435cfd788b
commit
b3ba05e5bc
5 changed files with 19 additions and 11 deletions
|
@ -2046,7 +2046,7 @@ class OuteTTSVocoderModel(Model):
|
||||||
logger.debug(f"Skipping {name!r}")
|
logger.debug(f"Skipping {name!r}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
print(f"{self.map_tensor_name(name)} -> {data_torch.shape}")
|
logger.info(f"{self.map_tensor_name(name)} -> {data_torch.shape}")
|
||||||
|
|
||||||
return [(self.map_tensor_name(name), data_torch)]
|
return [(self.map_tensor_name(name), data_torch)]
|
||||||
|
|
||||||
|
|
|
@ -88,6 +88,16 @@ def flatten_state_dict(state_dict, parent_key='', sep='.'):
|
||||||
if new_key == "feature_extractor.encodec.quantizer.vq.layers.0._codebook.embed":
|
if new_key == "feature_extractor.encodec.quantizer.vq.layers.0._codebook.embed":
|
||||||
new_key = "backbone.embedding.weight"
|
new_key = "backbone.embedding.weight"
|
||||||
|
|
||||||
|
# these are the only rows used
|
||||||
|
# ref: https://github.com/edwko/OuteTTS/blob/a613e79c489d8256dd657ea9168d78de75895d82/outetts/wav_tokenizer/audio_codec.py#L100
|
||||||
|
if new_key == "backbone.norm.scale.weight":
|
||||||
|
new_key = "backbone.norm.weight"
|
||||||
|
value = value[0]
|
||||||
|
|
||||||
|
if new_key == "backbone.norm.shift.weight":
|
||||||
|
new_key = "backbone.norm.bias"
|
||||||
|
value = value[0]
|
||||||
|
|
||||||
size_mb = value.element_size() * value.nelement() / (1024 * 1024)
|
size_mb = value.element_size() * value.nelement() / (1024 * 1024)
|
||||||
print(f"{size_mb:8.2f} MB - {new_key}: {value.shape}")
|
print(f"{size_mb:8.2f} MB - {new_key}: {value.shape}")
|
||||||
|
|
||||||
|
|
|
@ -267,7 +267,6 @@ class MODEL_ARCH(IntEnum):
|
||||||
class MODEL_TENSOR(IntEnum):
|
class MODEL_TENSOR(IntEnum):
|
||||||
TOKEN_EMBD = auto()
|
TOKEN_EMBD = auto()
|
||||||
TOKEN_EMBD_NORM = auto()
|
TOKEN_EMBD_NORM = auto()
|
||||||
TOKEN_EMBD_SHIFT = auto()
|
|
||||||
TOKEN_TYPES = auto()
|
TOKEN_TYPES = auto()
|
||||||
POS_EMBD = auto()
|
POS_EMBD = auto()
|
||||||
OUTPUT = auto()
|
OUTPUT = auto()
|
||||||
|
@ -451,7 +450,6 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||||
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
|
||||||
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
|
MODEL_TENSOR.TOKEN_EMBD: "token_embd",
|
||||||
MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
|
MODEL_TENSOR.TOKEN_EMBD_NORM: "token_embd_norm",
|
||||||
MODEL_TENSOR.TOKEN_EMBD_SHIFT: "token_embd_shift",
|
|
||||||
MODEL_TENSOR.TOKEN_TYPES: "token_types",
|
MODEL_TENSOR.TOKEN_TYPES: "token_types",
|
||||||
MODEL_TENSOR.POS_EMBD: "position_embd",
|
MODEL_TENSOR.POS_EMBD: "position_embd",
|
||||||
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
|
MODEL_TENSOR.OUTPUT_NORM: "output_norm",
|
||||||
|
@ -1415,7 +1413,6 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||||
MODEL_ARCH.OUTETTS_VOC: [
|
MODEL_ARCH.OUTETTS_VOC: [
|
||||||
MODEL_TENSOR.TOKEN_EMBD,
|
MODEL_TENSOR.TOKEN_EMBD,
|
||||||
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
MODEL_TENSOR.TOKEN_EMBD_NORM,
|
||||||
MODEL_TENSOR.TOKEN_EMBD_SHIFT,
|
|
||||||
MODEL_TENSOR.CONV1D,
|
MODEL_TENSOR.CONV1D,
|
||||||
MODEL_TENSOR.CONV_NEXT_DW,
|
MODEL_TENSOR.CONV_NEXT_DW,
|
||||||
MODEL_TENSOR.CONV_NEXT_NORM,
|
MODEL_TENSOR.CONV_NEXT_NORM,
|
||||||
|
|
|
@ -43,11 +43,7 @@ class TensorNameMap:
|
||||||
"emb_ln", # nomic-bert
|
"emb_ln", # nomic-bert
|
||||||
"transformer.norm", # openelm
|
"transformer.norm", # openelm
|
||||||
"rwkv.blocks.0.pre_ln", # rwkv
|
"rwkv.blocks.0.pre_ln", # rwkv
|
||||||
"backbone.norm.scale", # outetts
|
"backbone.norm", # outetts
|
||||||
),
|
|
||||||
|
|
||||||
MODEL_TENSOR.TOKEN_EMBD_SHIFT: (
|
|
||||||
"backbone.norm.shift", # outetts
|
|
||||||
),
|
),
|
||||||
|
|
||||||
# Position embeddings
|
# Position embeddings
|
||||||
|
|
|
@ -505,7 +505,6 @@ struct LLM_KV {
|
||||||
enum llm_tensor {
|
enum llm_tensor {
|
||||||
LLM_TENSOR_TOKEN_EMBD,
|
LLM_TENSOR_TOKEN_EMBD,
|
||||||
LLM_TENSOR_TOKEN_EMBD_NORM,
|
LLM_TENSOR_TOKEN_EMBD_NORM,
|
||||||
LLM_TENSOR_TOKEN_EMBD_SHIFT,
|
|
||||||
LLM_TENSOR_TOKEN_TYPES,
|
LLM_TENSOR_TOKEN_TYPES,
|
||||||
LLM_TENSOR_POS_EMBD,
|
LLM_TENSOR_POS_EMBD,
|
||||||
LLM_TENSOR_OUTPUT,
|
LLM_TENSOR_OUTPUT,
|
||||||
|
@ -1619,7 +1618,6 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
|
||||||
{
|
{
|
||||||
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
|
||||||
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
|
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
|
||||||
{ LLM_TENSOR_TOKEN_EMBD_SHIFT, "token_embd_shift" },
|
|
||||||
{ LLM_TENSOR_CONV1D, "conv1d" },
|
{ LLM_TENSOR_CONV1D, "conv1d" },
|
||||||
{ LLM_TENSOR_CONV_NEXT_DW, "conv_next.dw" },
|
{ LLM_TENSOR_CONV_NEXT_DW, "conv_next.dw" },
|
||||||
{ LLM_TENSOR_CONV_NEXT_NORM, "conv_next.norm" },
|
{ LLM_TENSOR_CONV_NEXT_NORM, "conv_next.norm" },
|
||||||
|
@ -9519,6 +9517,9 @@ static bool llm_load_tensors(
|
||||||
{
|
{
|
||||||
model.tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
|
model.tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
|
||||||
|
|
||||||
|
model.tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {768}, 0);
|
||||||
|
model.tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {768}, 0);
|
||||||
|
|
||||||
model.conv_1d = create_tensor(tn(LLM_TENSOR_CONV1D, "weight"), {7, n_embd, 768}, 0);
|
model.conv_1d = create_tensor(tn(LLM_TENSOR_CONV1D, "weight"), {7, n_embd, 768}, 0);
|
||||||
model.conv_1d_b = create_tensor(tn(LLM_TENSOR_CONV1D, "bias"), {768}, 0);
|
model.conv_1d_b = create_tensor(tn(LLM_TENSOR_CONV1D, "bias"), {768}, 0);
|
||||||
|
|
||||||
|
@ -17337,6 +17338,10 @@ struct llm_build_context {
|
||||||
LLM_NORM_GROUP, cb, 0);
|
LLM_NORM_GROUP, cb, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cur = llm_build_norm(ctx0, ggml_cont(ctx0, ggml_transpose(ctx0, cur)), hparams,
|
||||||
|
model.tok_norm,
|
||||||
|
model.tok_norm_b,
|
||||||
|
LLM_NORM, cb, -1);
|
||||||
|
|
||||||
printf("cur: %d %d %d\n", cur->ne[0], cur->ne[1], cur->ne[2]);
|
printf("cur: %d %d %d\n", cur->ne[0], cur->ne[1], cur->ne[2]);
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue