hparams : remove n_vocab_types
ggml-ci
This commit is contained in:
parent
c2008b568f
commit
543fd01eb9
4 changed files with 13 additions and 9 deletions
|
@ -40,7 +40,6 @@ struct llama_hparams {
|
||||||
uint32_t n_embd_head_v; // dimension of values (d_v) aka n_embd_head
|
uint32_t n_embd_head_v; // dimension of values (d_v) aka n_embd_head
|
||||||
uint32_t n_expert = 0;
|
uint32_t n_expert = 0;
|
||||||
uint32_t n_expert_used = 0;
|
uint32_t n_expert_used = 0;
|
||||||
uint32_t n_vocab_type = 0; // for BERT-style token types
|
|
||||||
uint32_t n_rel_attn_bkts = 0;
|
uint32_t n_rel_attn_bkts = 0;
|
||||||
|
|
||||||
// for WavTokenizer
|
// for WavTokenizer
|
||||||
|
|
|
@ -497,8 +497,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||||
hparams.n_embd_head_v = 0;
|
hparams.n_embd_head_v = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// for differentiating model types
|
||||||
uint32_t n_vocab = 0;
|
uint32_t n_vocab = 0;
|
||||||
|
|
||||||
ml.get_key(LLM_KV_VOCAB_SIZE, n_vocab, false) || ml.get_arr_n(LLM_KV_TOKENIZER_LIST, n_vocab, false);
|
ml.get_key(LLM_KV_VOCAB_SIZE, n_vocab, false) || ml.get_arr_n(LLM_KV_TOKENIZER_LIST, n_vocab, false);
|
||||||
|
|
||||||
// arch-specific KVs
|
// arch-specific KVs
|
||||||
|
@ -622,7 +622,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||||
{
|
{
|
||||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||||
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
|
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
|
||||||
ml.get_key(LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, hparams.n_vocab_type);
|
|
||||||
ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type, false);
|
ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type, false);
|
||||||
|
|
||||||
switch (hparams.n_layer) {
|
switch (hparams.n_layer) {
|
||||||
|
@ -645,7 +644,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||||
{
|
{
|
||||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||||
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
|
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
|
||||||
ml.get_key(LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, hparams.n_vocab_type);
|
|
||||||
ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type, false);
|
ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type, false);
|
||||||
hparams.f_max_alibi_bias = 8.0f;
|
hparams.f_max_alibi_bias = 8.0f;
|
||||||
|
|
||||||
|
@ -659,7 +657,6 @@ void llama_model::load_hparams(llama_model_loader & ml) {
|
||||||
{
|
{
|
||||||
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_EPS, hparams.f_norm_eps);
|
||||||
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
|
ml.get_key(LLM_KV_ATTENTION_CAUSAL, hparams.causal_attn);
|
||||||
ml.get_key(LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, hparams.n_vocab_type);
|
|
||||||
ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type);
|
ml.get_key(LLM_KV_POOLING_TYPE, hparams.pooling_type);
|
||||||
|
|
||||||
if (hparams.n_layer == 12 && hparams.n_embd == 768) {
|
if (hparams.n_layer == 12 && hparams.n_embd == 768) {
|
||||||
|
@ -1367,7 +1364,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||||
const int64_t n_ff = hparams.n_ff();
|
const int64_t n_ff = hparams.n_ff();
|
||||||
const int64_t n_embd_gqa = n_embd_v_gqa;
|
const int64_t n_embd_gqa = n_embd_v_gqa;
|
||||||
const int64_t n_vocab = vocab.n_vocab();
|
const int64_t n_vocab = vocab.n_vocab();
|
||||||
const int64_t n_vocab_type = hparams.n_vocab_type;
|
const int64_t n_token_types = vocab.n_token_types();
|
||||||
const int64_t n_rot = hparams.n_rot;
|
const int64_t n_rot = hparams.n_rot;
|
||||||
const int64_t n_expert = hparams.n_expert;
|
const int64_t n_expert = hparams.n_expert;
|
||||||
const int64_t n_expert_used = hparams.n_expert_used;
|
const int64_t n_expert_used = hparams.n_expert_used;
|
||||||
|
@ -1812,7 +1809,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||||
case LLM_ARCH_NOMIC_BERT:
|
case LLM_ARCH_NOMIC_BERT:
|
||||||
{
|
{
|
||||||
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
|
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
|
||||||
type_embd = create_tensor(tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_vocab_type}, 0);
|
type_embd = create_tensor(tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_token_types}, 0);
|
||||||
|
|
||||||
if (arch == LLM_ARCH_BERT) {
|
if (arch == LLM_ARCH_BERT) {
|
||||||
pos_embd = create_tensor(tn(LLM_TENSOR_POS_EMBD, "weight"), {n_embd, n_ctx_train}, 0);
|
pos_embd = create_tensor(tn(LLM_TENSOR_POS_EMBD, "weight"), {n_embd, n_ctx_train}, 0);
|
||||||
|
@ -1866,7 +1863,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
|
||||||
case LLM_ARCH_JINA_BERT_V2:
|
case LLM_ARCH_JINA_BERT_V2:
|
||||||
{
|
{
|
||||||
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); // word_embeddings
|
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0); // word_embeddings
|
||||||
type_embd = create_tensor(tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_vocab_type}, 0); // token_type_embeddings
|
type_embd = create_tensor(tn(LLM_TENSOR_TOKEN_TYPES, "weight"), {n_embd, n_token_types}, 0); // token_type_embeddings
|
||||||
|
|
||||||
tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0); // LayerNorm
|
tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0); // LayerNorm
|
||||||
tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {n_embd}, 0); //LayerNorm bias
|
tok_norm_b = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "bias"), {n_embd}, 0); //LayerNorm bias
|
||||||
|
|
|
@ -1205,6 +1205,7 @@ struct fragment_buffer_variant {
|
||||||
|
|
||||||
struct llama_vocab::impl {
|
struct llama_vocab::impl {
|
||||||
uint32_t n_vocab = 0;
|
uint32_t n_vocab = 0;
|
||||||
|
uint32_t n_token_types = 0; // for BERT-style token types
|
||||||
|
|
||||||
std::unordered_map<std::string, llama_token> token_to_id;
|
std::unordered_map<std::string, llama_token> token_to_id;
|
||||||
std::vector<token_data> id_to_token;
|
std::vector<token_data> id_to_token;
|
||||||
|
@ -1286,6 +1287,7 @@ void llama_vocab::load(llama_model_loader & ml, const LLM_KV & kv) {
|
||||||
struct gguf_context * ctx = ml.meta.get();
|
struct gguf_context * ctx = ml.meta.get();
|
||||||
|
|
||||||
auto & n_vocab = pimpl->n_vocab;
|
auto & n_vocab = pimpl->n_vocab;
|
||||||
|
auto & n_token_types = pimpl->n_token_types;
|
||||||
auto & id_to_token = pimpl->id_to_token;
|
auto & id_to_token = pimpl->id_to_token;
|
||||||
auto & token_to_id = pimpl->token_to_id;
|
auto & token_to_id = pimpl->token_to_id;
|
||||||
auto & special_eog_ids = pimpl->special_eog_ids;
|
auto & special_eog_ids = pimpl->special_eog_ids;
|
||||||
|
@ -1300,6 +1302,8 @@ void llama_vocab::load(llama_model_loader & ml, const LLM_KV & kv) {
|
||||||
ml.get_key(LLM_KV_TOKENIZER_MODEL, tokenizer_model);
|
ml.get_key(LLM_KV_TOKENIZER_MODEL, tokenizer_model);
|
||||||
ml.get_key(LLM_KV_TOKENIZER_PRE, tokenizer_pre, false);
|
ml.get_key(LLM_KV_TOKENIZER_PRE, tokenizer_pre, false);
|
||||||
|
|
||||||
|
ml.get_key(LLM_KV_TOKENIZER_TOKEN_TYPE_COUNT, n_token_types, false);
|
||||||
|
|
||||||
if (tokenizer_model == "no_vocab" || tokenizer_model == "none") {
|
if (tokenizer_model == "no_vocab" || tokenizer_model == "none") {
|
||||||
type = LLAMA_VOCAB_TYPE_NONE;
|
type = LLAMA_VOCAB_TYPE_NONE;
|
||||||
|
|
||||||
|
@ -2013,6 +2017,10 @@ uint32_t llama_vocab::n_vocab() const {
|
||||||
return (uint32_t) pimpl->id_to_token.size();
|
return (uint32_t) pimpl->id_to_token.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t llama_vocab::n_token_types() const {
|
||||||
|
return (uint32_t) pimpl->n_token_types;
|
||||||
|
}
|
||||||
|
|
||||||
std::string llama_vocab::type_name() const{
|
std::string llama_vocab::type_name() const{
|
||||||
switch (type) {
|
switch (type) {
|
||||||
case LLAMA_VOCAB_TYPE_NONE: return "no vocab";
|
case LLAMA_VOCAB_TYPE_NONE: return "no vocab";
|
||||||
|
|
|
@ -24,8 +24,8 @@ struct llama_vocab {
|
||||||
enum llama_vocab_type get_type() const;
|
enum llama_vocab_type get_type() const;
|
||||||
enum llama_vocab_pre_type get_pre_type() const;
|
enum llama_vocab_pre_type get_pre_type() const;
|
||||||
|
|
||||||
// TODO: how to deduplicate with llama_hparams.n_vocab ?
|
|
||||||
uint32_t n_vocab() const;
|
uint32_t n_vocab() const;
|
||||||
|
uint32_t n_token_types() const;
|
||||||
|
|
||||||
std::string type_name() const;
|
std::string type_name() const;
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue