fix: remove max_position_embeddings, use n_train_ctx
This commit is contained in:
parent
e1fa9dd24c
commit
f989ba151d
3 changed files with 8 additions and 18 deletions
|
@ -103,9 +103,8 @@ print("gguf: get model metadata")
|
|||
block_count = hparams["n_layer"]
|
||||
|
||||
gguf_writer.add_name("StarCoder")
|
||||
gguf_writer.add_context_length(2048) # not in config.json
|
||||
gguf_writer.add_context_length(hparams["n_positions"])
|
||||
gguf_writer.add_embedding_length(hparams["n_embd"])
|
||||
gguf_writer.add_max_position_embeddings(hparams["n_positions"])
|
||||
gguf_writer.add_feed_forward_length(4 * hparams["n_embd"])
|
||||
gguf_writer.add_block_count(block_count)
|
||||
gguf_writer.add_head_count(hparams["n_head"])
|
||||
|
|
|
@ -42,7 +42,6 @@ KEY_BLOCK_COUNT = "{arch}.block_count"
|
|||
KEY_FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
|
||||
KEY_USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual"
|
||||
KEY_TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout"
|
||||
KEY_MAX_POSITION_EMBEDDINGS = "{arch}.max_position_embeddings"
|
||||
|
||||
# attention
|
||||
KEY_ATTENTION_HEAD_COUNT = "{arch}.attention.head_count"
|
||||
|
@ -718,10 +717,6 @@ class GGUFWriter:
|
|||
self.add_uint32(
|
||||
KEY_EMBEDDING_LENGTH.format(arch=self.arch), length)
|
||||
|
||||
def add_max_position_embeddings(self, length: int):
|
||||
self.add_uint32(
|
||||
KEY_MAX_POSITION_EMBEDDINGS.format(arch=self.arch), length)
|
||||
|
||||
def add_block_count(self, length: int):
|
||||
self.add_uint32(
|
||||
KEY_BLOCK_COUNT.format(arch=self.arch), length)
|
||||
|
|
|
@ -193,7 +193,6 @@ enum llm_kv {
|
|||
LLM_KV_FEED_FORWARD_LENGTH,
|
||||
LLM_KV_USE_PARALLEL_RESIDUAL,
|
||||
LLM_KV_TENSOR_DATA_LAYOUT,
|
||||
LLM_KV_MAX_POSITION_EMBEDDINGS,
|
||||
|
||||
LLM_KV_ATTENTION_HEAD_COUNT,
|
||||
LLM_KV_ATTENTION_HEAD_COUNT_KV,
|
||||
|
@ -238,7 +237,6 @@ static std::map<llm_kv, std::string> LLM_KV_NAMES = {
|
|||
{ LLM_KV_FEED_FORWARD_LENGTH, "%s.feed_forward_length" },
|
||||
{ LLM_KV_USE_PARALLEL_RESIDUAL, "%s.use_parallel_residual" },
|
||||
{ LLM_KV_TENSOR_DATA_LAYOUT, "%s.tensor_data_layout" },
|
||||
{ LLM_KV_MAX_POSITION_EMBEDDINGS, "%s.max_position_embeddings" },
|
||||
|
||||
{ LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
|
||||
{ LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
|
||||
|
@ -940,7 +938,6 @@ struct llama_hparams {
|
|||
uint32_t n_layer = 32;
|
||||
uint32_t n_rot = 64;
|
||||
uint32_t n_ff = 11008;
|
||||
uint32_t n_positions = 0; // StarCoder
|
||||
|
||||
float f_norm_eps = 1e-5;
|
||||
float f_norm_rms_eps = 1e-5;
|
||||
|
@ -1668,7 +1665,6 @@ static void llm_load_hparams(
|
|||
GGUF_GET_KEY(ctx, hparams.n_ff, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_FEED_FORWARD_LENGTH));
|
||||
GGUF_GET_KEY(ctx, hparams.n_head, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_ATTENTION_HEAD_COUNT));
|
||||
GGUF_GET_KEY(ctx, hparams.n_layer, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_BLOCK_COUNT));
|
||||
GGUF_GET_KEY(ctx, hparams.n_positions, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_MAX_POSITION_EMBEDDINGS));
|
||||
|
||||
// n_head_kv is optional, default to n_head
|
||||
hparams.n_head_kv = hparams.n_head;
|
||||
|
@ -2215,7 +2211,7 @@ static void llm_load_tensors(
|
|||
case LLM_ARCH_STARCODER:
|
||||
{
|
||||
model.tok_embeddings = ml.create_tensor(ctx, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, GGML_BACKEND_CPU);
|
||||
model.pos_embeddings = ml.create_tensor(ctx, tn(LLM_TENSOR_POS_EMBD, "weight"), {n_embd, hparams.n_positions}, GGML_BACKEND_CPU);
|
||||
model.pos_embeddings = ml.create_tensor(ctx, tn(LLM_TENSOR_POS_EMBD, "weight"), {n_embd, hparams.n_ctx_train}, GGML_BACKEND_CPU);
|
||||
|
||||
// output
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue