add phi3 128k support in convert-hf-to-gguf
This commit is contained in:
parent
11474e756d
commit
8fa413d8b5
3 changed files with 59 additions and 11 deletions
|
@ -14,6 +14,7 @@ from pathlib import Path
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Sequence, TypeVar, cast
|
from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Sequence, TypeVar, cast
|
||||||
|
|
||||||
|
import math
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import torch
|
import torch
|
||||||
|
|
||||||
|
@ -1784,23 +1785,57 @@ class Phi3MiniModel(Model):
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
block_count = self.find_hparam(["num_hidden_layers", "n_layer"])
|
block_count = self.find_hparam(["num_hidden_layers", "n_layer"])
|
||||||
|
|
||||||
rot_pct = 1.0
|
|
||||||
n_embd = self.find_hparam(["hidden_size", "n_embd"])
|
n_embd = self.find_hparam(["hidden_size", "n_embd"])
|
||||||
n_head = self.find_hparam(["num_attention_heads", "n_head"])
|
n_head = self.find_hparam(["num_attention_heads", "n_head"])
|
||||||
rms_eps = self.find_hparam(["rms_norm_eps"])
|
rms_eps = self.find_hparam(["rms_norm_eps"])
|
||||||
|
max_pos_embds = self.find_hparam(["n_positions", "max_position_embeddings"])
|
||||||
|
orig_max_pos_embds = self.find_hparam(["original_max_position_embeddings"])
|
||||||
|
rope_dims = n_embd // n_head
|
||||||
|
|
||||||
self.gguf_writer.add_name("Phi3")
|
self.gguf_writer.add_name("Phi3")
|
||||||
self.gguf_writer.add_context_length(self.find_hparam(["n_positions", "max_position_embeddings"]))
|
self.gguf_writer.add_context_length(max_pos_embds)
|
||||||
|
self.gguf_writer.add_rope_scaling_orig_ctx_len(orig_max_pos_embds)
|
||||||
self.gguf_writer.add_embedding_length(n_embd)
|
self.gguf_writer.add_embedding_length(n_embd)
|
||||||
self.gguf_writer.add_feed_forward_length(8192)
|
self.gguf_writer.add_feed_forward_length(self.find_hparam(["intermediate_size"]))
|
||||||
self.gguf_writer.add_block_count(block_count)
|
self.gguf_writer.add_block_count(block_count)
|
||||||
self.gguf_writer.add_head_count(n_head)
|
self.gguf_writer.add_head_count(n_head)
|
||||||
self.gguf_writer.add_head_count_kv(n_head)
|
self.gguf_writer.add_head_count_kv(n_head)
|
||||||
self.gguf_writer.add_layer_norm_rms_eps(rms_eps)
|
self.gguf_writer.add_layer_norm_rms_eps(rms_eps)
|
||||||
self.gguf_writer.add_rope_dimension_count(int(rot_pct * n_embd) // n_head)
|
self.gguf_writer.add_rope_dimension_count(rope_dims)
|
||||||
|
self.gguf_writer.add_rope_freq_base(self.find_hparam(["rope_theta"]))
|
||||||
self.gguf_writer.add_file_type(self.ftype)
|
self.gguf_writer.add_file_type(self.ftype)
|
||||||
|
|
||||||
|
# write rope scaling for long context (128k) model
|
||||||
|
rope_scaling = self.find_hparam(['rope_scaling'], True)
|
||||||
|
if (rope_scaling is None):
|
||||||
|
return
|
||||||
|
|
||||||
|
scale = max_pos_embds / orig_max_pos_embds
|
||||||
|
|
||||||
|
rope_scaling_type = rope_scaling.get('type', '').lower()
|
||||||
|
if len(rope_scaling_type) == 0:
|
||||||
|
raise KeyError(f'Missing the required key rope_scaling.type')
|
||||||
|
|
||||||
|
if rope_scaling_type == 'su':
|
||||||
|
attn_factor = math.sqrt(1 + math.log(scale) / math.log(orig_max_pos_embds)) if scale > 1.0 else 1.0
|
||||||
|
elif rope_scaling_type == 'yarn':
|
||||||
|
attn_factor = 0.1 * math.log(scale) + 1.0 if scale > 1.0 else 1.0
|
||||||
|
else:
|
||||||
|
raise NotImplementedError(f'The rope scaling type {rope_scaling_type} is not supported yet')
|
||||||
|
|
||||||
|
self.gguf_writer.add_rope_scaling_attn_factors(attn_factor)
|
||||||
|
|
||||||
|
long_factors = rope_scaling.get('long_factor', None)
|
||||||
|
short_factors = rope_scaling.get('short_factor', None)
|
||||||
|
|
||||||
|
if long_factors is None or short_factors is None:
|
||||||
|
raise KeyError(f'Missing the required key rope_scaling.long_factor or rope_scaling_short_factor')
|
||||||
|
|
||||||
|
if len(long_factors) != len(short_factors) or len(long_factors) != rope_dims / 2:
|
||||||
|
raise ValueError(f'The length of rope long and short factors must be {rope_dims / 2}')
|
||||||
|
|
||||||
|
self.gguf_writer.add_rope_scaling_freq_long_factors(long_factors)
|
||||||
|
self.gguf_writer.add_rope_scaling_freq_short_factors(short_factors)
|
||||||
|
|
||||||
@Model.register("PlamoForCausalLM")
|
@Model.register("PlamoForCausalLM")
|
||||||
class PlamoModel(Model):
|
class PlamoModel(Model):
|
||||||
|
|
|
@ -57,12 +57,15 @@ class Keys:
|
||||||
CAUSAL = "{arch}.attention.causal"
|
CAUSAL = "{arch}.attention.causal"
|
||||||
|
|
||||||
class Rope:
|
class Rope:
|
||||||
DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
DIMENSION_COUNT = "{arch}.rope.dimension_count"
|
||||||
FREQ_BASE = "{arch}.rope.freq_base"
|
FREQ_BASE = "{arch}.rope.freq_base"
|
||||||
SCALING_TYPE = "{arch}.rope.scaling.type"
|
SCALING_TYPE = "{arch}.rope.scaling.type"
|
||||||
SCALING_FACTOR = "{arch}.rope.scaling.factor"
|
SCALING_FACTOR = "{arch}.rope.scaling.factor"
|
||||||
SCALING_ORIG_CTX_LEN = "{arch}.rope.scaling.original_context_length"
|
SCALING_LONG_FACTORS = "{arch}.rope.scaling.freq_long_factors"
|
||||||
SCALING_FINETUNED = "{arch}.rope.scaling.finetuned"
|
SCALING_SHORT_FACTORS = "{arch}.rope.scaling.freq_short_factors"
|
||||||
|
SCALING_ATTN_FACTOR = "{arch}.rope.scaling.attn_factor"
|
||||||
|
SCALING_ORIG_CTX_LEN = "{arch}.rope.scaling.original_context_length"
|
||||||
|
SCALING_FINETUNED = "{arch}.rope.scaling.finetuned"
|
||||||
|
|
||||||
class SSM:
|
class SSM:
|
||||||
CONV_KERNEL = "{arch}.ssm.conv_kernel"
|
CONV_KERNEL = "{arch}.ssm.conv_kernel"
|
||||||
|
@ -780,6 +783,7 @@ class RopeScalingType(Enum):
|
||||||
NONE = 'none'
|
NONE = 'none'
|
||||||
LINEAR = 'linear'
|
LINEAR = 'linear'
|
||||||
YARN = 'yarn'
|
YARN = 'yarn'
|
||||||
|
SU = 'su'
|
||||||
|
|
||||||
|
|
||||||
class PoolingType(IntEnum):
|
class PoolingType(IntEnum):
|
||||||
|
|
|
@ -433,6 +433,15 @@ class GGUFWriter:
|
||||||
def add_rope_scaling_factor(self, value: float) -> None:
|
def add_rope_scaling_factor(self, value: float) -> None:
|
||||||
self.add_float32(Keys.Rope.SCALING_FACTOR.format(arch=self.arch), value)
|
self.add_float32(Keys.Rope.SCALING_FACTOR.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_rope_scaling_freq_long_factors(self, value: Sequence[float]) -> None:
|
||||||
|
self.add_array(Keys.Rope.SCALING_LONG_FACTORS.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_rope_scaling_freq_short_factors(self, value: Sequence[float]) -> None:
|
||||||
|
self.add_array(Keys.Rope.SCALING_SHORT_FACTORS.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
def add_rope_scaling_attn_factors(self, value: Sequence[float]) -> None:
|
||||||
|
self.add_float32(Keys.Rope.SCALING_ATTN_FACTOR.format(arch=self.arch), value)
|
||||||
|
|
||||||
def add_rope_scaling_orig_ctx_len(self, value: int) -> None:
|
def add_rope_scaling_orig_ctx_len(self, value: int) -> None:
|
||||||
self.add_uint32(Keys.Rope.SCALING_ORIG_CTX_LEN.format(arch=self.arch), value)
|
self.add_uint32(Keys.Rope.SCALING_ORIG_CTX_LEN.format(arch=self.arch), value)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue