convert : refactor rope_freqs generation (#9396)

* convert : refactor rope_freqs generation

This should also fix vocab-only conversion for Phi-3.

* convert : adapt MiniCPM3 to separate rope_freqs insertion

MiniCPM3's tokenizer is treated as a SentencePiece tokenizer to avoid
having to run its custom Python code which mixes tokenization
in the same file as tool calls.

gguf-py : add long and short RoPE factors to tensor mappings

Empty, but the key names are used to populate the mappings.
This commit is contained in:
compilade 2024-10-01 02:31:36 -04:00 committed by GitHub
parent 6f1d9d71f4
commit 1927378bcc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 44 additions and 29 deletions

View file

@ -814,6 +814,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.ROPE_FACTORS_LONG,
MODEL_TENSOR.ROPE_FACTORS_SHORT,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_QKV,
MODEL_TENSOR.ATTN_Q,
@ -892,6 +894,8 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.ROPE_FACTORS_LONG,
MODEL_TENSOR.ROPE_FACTORS_SHORT,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_Q_A,
MODEL_TENSOR.ATTN_Q_B,

View file

@ -87,6 +87,9 @@ class TensorNameMap:
"rope.freqs", # llama-pth
"rotary_pos_emb.inv_freq", # chatglm
),
MODEL_TENSOR.ROPE_FACTORS_LONG: (),
MODEL_TENSOR.ROPE_FACTORS_SHORT: (),
}
block_mappings_cfg: dict[MODEL_TENSOR, tuple[str, ...]] = {