fix: remove max_position_embeddings, use n_train_ctx
This commit is contained in:
parent
e1fa9dd24c
commit
f989ba151d
3 changed files with 8 additions and 18 deletions
|
@ -103,9 +103,8 @@ print("gguf: get model metadata")
|
||||||
block_count = hparams["n_layer"]
|
block_count = hparams["n_layer"]
|
||||||
|
|
||||||
gguf_writer.add_name("StarCoder")
|
gguf_writer.add_name("StarCoder")
|
||||||
gguf_writer.add_context_length(2048) # not in config.json
|
gguf_writer.add_context_length(hparams["n_positions"])
|
||||||
gguf_writer.add_embedding_length(hparams["n_embd"])
|
gguf_writer.add_embedding_length(hparams["n_embd"])
|
||||||
gguf_writer.add_max_position_embeddings(hparams["n_positions"])
|
|
||||||
gguf_writer.add_feed_forward_length(4 * hparams["n_embd"])
|
gguf_writer.add_feed_forward_length(4 * hparams["n_embd"])
|
||||||
gguf_writer.add_block_count(block_count)
|
gguf_writer.add_block_count(block_count)
|
||||||
gguf_writer.add_head_count(hparams["n_head"])
|
gguf_writer.add_head_count(hparams["n_head"])
|
||||||
|
|
|
@ -42,7 +42,6 @@ KEY_BLOCK_COUNT = "{arch}.block_count"
|
||||||
KEY_FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
|
KEY_FEED_FORWARD_LENGTH = "{arch}.feed_forward_length"
|
||||||
KEY_USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual"
|
KEY_USE_PARALLEL_RESIDUAL = "{arch}.use_parallel_residual"
|
||||||
KEY_TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout"
|
KEY_TENSOR_DATA_LAYOUT = "{arch}.tensor_data_layout"
|
||||||
KEY_MAX_POSITION_EMBEDDINGS = "{arch}.max_position_embeddings"
|
|
||||||
|
|
||||||
# attention
|
# attention
|
||||||
KEY_ATTENTION_HEAD_COUNT = "{arch}.attention.head_count"
|
KEY_ATTENTION_HEAD_COUNT = "{arch}.attention.head_count"
|
||||||
|
@ -718,10 +717,6 @@ class GGUFWriter:
|
||||||
self.add_uint32(
|
self.add_uint32(
|
||||||
KEY_EMBEDDING_LENGTH.format(arch=self.arch), length)
|
KEY_EMBEDDING_LENGTH.format(arch=self.arch), length)
|
||||||
|
|
||||||
def add_max_position_embeddings(self, length: int):
|
|
||||||
self.add_uint32(
|
|
||||||
KEY_MAX_POSITION_EMBEDDINGS.format(arch=self.arch), length)
|
|
||||||
|
|
||||||
def add_block_count(self, length: int):
|
def add_block_count(self, length: int):
|
||||||
self.add_uint32(
|
self.add_uint32(
|
||||||
KEY_BLOCK_COUNT.format(arch=self.arch), length)
|
KEY_BLOCK_COUNT.format(arch=self.arch), length)
|
||||||
|
|
|
@ -193,7 +193,6 @@ enum llm_kv {
|
||||||
LLM_KV_FEED_FORWARD_LENGTH,
|
LLM_KV_FEED_FORWARD_LENGTH,
|
||||||
LLM_KV_USE_PARALLEL_RESIDUAL,
|
LLM_KV_USE_PARALLEL_RESIDUAL,
|
||||||
LLM_KV_TENSOR_DATA_LAYOUT,
|
LLM_KV_TENSOR_DATA_LAYOUT,
|
||||||
LLM_KV_MAX_POSITION_EMBEDDINGS,
|
|
||||||
|
|
||||||
LLM_KV_ATTENTION_HEAD_COUNT,
|
LLM_KV_ATTENTION_HEAD_COUNT,
|
||||||
LLM_KV_ATTENTION_HEAD_COUNT_KV,
|
LLM_KV_ATTENTION_HEAD_COUNT_KV,
|
||||||
|
@ -238,7 +237,6 @@ static std::map<llm_kv, std::string> LLM_KV_NAMES = {
|
||||||
{ LLM_KV_FEED_FORWARD_LENGTH, "%s.feed_forward_length" },
|
{ LLM_KV_FEED_FORWARD_LENGTH, "%s.feed_forward_length" },
|
||||||
{ LLM_KV_USE_PARALLEL_RESIDUAL, "%s.use_parallel_residual" },
|
{ LLM_KV_USE_PARALLEL_RESIDUAL, "%s.use_parallel_residual" },
|
||||||
{ LLM_KV_TENSOR_DATA_LAYOUT, "%s.tensor_data_layout" },
|
{ LLM_KV_TENSOR_DATA_LAYOUT, "%s.tensor_data_layout" },
|
||||||
{ LLM_KV_MAX_POSITION_EMBEDDINGS, "%s.max_position_embeddings" },
|
|
||||||
|
|
||||||
{ LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
|
{ LLM_KV_ATTENTION_HEAD_COUNT, "%s.attention.head_count" },
|
||||||
{ LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
|
{ LLM_KV_ATTENTION_HEAD_COUNT_KV, "%s.attention.head_count_kv" },
|
||||||
|
@ -940,7 +938,6 @@ struct llama_hparams {
|
||||||
uint32_t n_layer = 32;
|
uint32_t n_layer = 32;
|
||||||
uint32_t n_rot = 64;
|
uint32_t n_rot = 64;
|
||||||
uint32_t n_ff = 11008;
|
uint32_t n_ff = 11008;
|
||||||
uint32_t n_positions = 0; // StarCoder
|
|
||||||
|
|
||||||
float f_norm_eps = 1e-5;
|
float f_norm_eps = 1e-5;
|
||||||
float f_norm_rms_eps = 1e-5;
|
float f_norm_rms_eps = 1e-5;
|
||||||
|
@ -1668,7 +1665,6 @@ static void llm_load_hparams(
|
||||||
GGUF_GET_KEY(ctx, hparams.n_ff, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_FEED_FORWARD_LENGTH));
|
GGUF_GET_KEY(ctx, hparams.n_ff, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_FEED_FORWARD_LENGTH));
|
||||||
GGUF_GET_KEY(ctx, hparams.n_head, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_ATTENTION_HEAD_COUNT));
|
GGUF_GET_KEY(ctx, hparams.n_head, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_ATTENTION_HEAD_COUNT));
|
||||||
GGUF_GET_KEY(ctx, hparams.n_layer, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_BLOCK_COUNT));
|
GGUF_GET_KEY(ctx, hparams.n_layer, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_BLOCK_COUNT));
|
||||||
GGUF_GET_KEY(ctx, hparams.n_positions, gguf_get_val_u32, GGUF_TYPE_UINT32, true, kv(LLM_KV_MAX_POSITION_EMBEDDINGS));
|
|
||||||
|
|
||||||
// n_head_kv is optional, default to n_head
|
// n_head_kv is optional, default to n_head
|
||||||
hparams.n_head_kv = hparams.n_head;
|
hparams.n_head_kv = hparams.n_head;
|
||||||
|
@ -2215,7 +2211,7 @@ static void llm_load_tensors(
|
||||||
case LLM_ARCH_STARCODER:
|
case LLM_ARCH_STARCODER:
|
||||||
{
|
{
|
||||||
model.tok_embeddings = ml.create_tensor(ctx, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, GGML_BACKEND_CPU);
|
model.tok_embeddings = ml.create_tensor(ctx, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, GGML_BACKEND_CPU);
|
||||||
model.pos_embeddings = ml.create_tensor(ctx, tn(LLM_TENSOR_POS_EMBD, "weight"), {n_embd, hparams.n_positions}, GGML_BACKEND_CPU);
|
model.pos_embeddings = ml.create_tensor(ctx, tn(LLM_TENSOR_POS_EMBD, "weight"), {n_embd, hparams.n_ctx_train}, GGML_BACKEND_CPU);
|
||||||
|
|
||||||
// output
|
// output
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue