Merge branch 'ggerganov:master' into hk
This commit is contained in:
commit
7f576a7b49
10 changed files with 112 additions and 77 deletions
|
@ -153,6 +153,7 @@ Unless otherwise noted these projects are open-source with permissive licensing:
|
||||||
- [Faraday](https://faraday.dev/) (proprietary)
|
- [Faraday](https://faraday.dev/) (proprietary)
|
||||||
- [LMStudio](https://lmstudio.ai/) (proprietary)
|
- [LMStudio](https://lmstudio.ai/) (proprietary)
|
||||||
- [Layla](https://play.google.com/store/apps/details?id=com.laylalite) (proprietary)
|
- [Layla](https://play.google.com/store/apps/details?id=com.laylalite) (proprietary)
|
||||||
|
- [ramalama](https://github.com/containers/ramalama) (MIT)
|
||||||
- [LocalAI](https://github.com/mudler/LocalAI) (MIT)
|
- [LocalAI](https://github.com/mudler/LocalAI) (MIT)
|
||||||
- [LostRuins/koboldcpp](https://github.com/LostRuins/koboldcpp) (AGPL)
|
- [LostRuins/koboldcpp](https://github.com/LostRuins/koboldcpp) (AGPL)
|
||||||
- [Mozilla-Ocho/llamafile](https://github.com/Mozilla-Ocho/llamafile)
|
- [Mozilla-Ocho/llamafile](https://github.com/Mozilla-Ocho/llamafile)
|
||||||
|
|
|
@ -1670,10 +1670,6 @@ GGML_CALL static bool ggml_backend_cann_supports_op(ggml_backend_t backend,
|
||||||
// TODO: fix me
|
// TODO: fix me
|
||||||
// Current groupsize should not be greater than k-1 in
|
// Current groupsize should not be greater than k-1 in
|
||||||
// aclnnWeightQuantBatchMatmulV2GetWorkspaceSize().
|
// aclnnWeightQuantBatchMatmulV2GetWorkspaceSize().
|
||||||
if (op->src[0]->ne[0]-1 > QK8_0) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
case GGML_TYPE_Q4_0:
|
case GGML_TYPE_Q4_0:
|
||||||
return true;
|
return true;
|
||||||
default:
|
default:
|
||||||
|
|
|
@ -12,6 +12,9 @@ class QUANTIZE_FLOAT_TO_Q4_0 {
|
||||||
__aicore__ inline void init(GM_ADDR input, GM_ADDR output,
|
__aicore__ inline void init(GM_ADDR input, GM_ADDR output,
|
||||||
int64_t *input_ne_ub, size_t *input_nb_ub,
|
int64_t *input_ne_ub, size_t *input_nb_ub,
|
||||||
int64_t *output_ne_ub) {
|
int64_t *output_ne_ub) {
|
||||||
|
// TODO: fix test_case CPY(type_src=f16,type_dst=q4_0,ne=[256,4,4,4],
|
||||||
|
// permute=[0,0,0,0]):
|
||||||
|
// [CPY] NMSE = 0.000008343 > 0.000001000 FAIL
|
||||||
int64_t op_block_num = GetBlockNum();
|
int64_t op_block_num = GetBlockNum();
|
||||||
int64_t op_block_idx = GetBlockIdx();
|
int64_t op_block_idx = GetBlockIdx();
|
||||||
|
|
||||||
|
@ -61,13 +64,13 @@ class QUANTIZE_FLOAT_TO_Q4_0 {
|
||||||
pipe.InitBuffer(input_queue, BUFFER_NUM, Group_Size * sizeof(SRC_T));
|
pipe.InitBuffer(input_queue, BUFFER_NUM, Group_Size * sizeof(SRC_T));
|
||||||
pipe.InitBuffer(output_queue, BUFFER_NUM,
|
pipe.InitBuffer(output_queue, BUFFER_NUM,
|
||||||
Group_Size * sizeof(int8_t) / 2);
|
Group_Size * sizeof(int8_t) / 2);
|
||||||
pipe.InitBuffer(cast_queue , BUFFER_NUM, Group_Size * sizeof(float));
|
pipe.InitBuffer(cast_queue , 1, Group_Size * sizeof(float));
|
||||||
pipe.InitBuffer(work_queue, BUFFER_NUM, Group_Size*sizeof(float));
|
pipe.InitBuffer(work_queue, 1, Group_Size * sizeof(float));
|
||||||
pipe.InitBuffer(max_queue, BUFFER_NUM, Group_Size*sizeof(float));
|
pipe.InitBuffer(max_queue, 1, Group_Size * sizeof(float));
|
||||||
pipe.InitBuffer(min_queue, BUFFER_NUM, Group_Size*sizeof(float));
|
pipe.InitBuffer(min_queue, 1, Group_Size * sizeof(float));
|
||||||
pipe.InitBuffer(scale_queue, BUFFER_NUM, 16*sizeof(half));
|
pipe.InitBuffer(scale_queue, 1, Group_Size / 2 * sizeof(half));
|
||||||
pipe.InitBuffer(int8_queue, BUFFER_NUM, Group_Size * sizeof(int8_t));
|
pipe.InitBuffer(int8_queue, 1, Group_Size * sizeof(int8_t));
|
||||||
pipe.InitBuffer(half_queue, BUFFER_NUM, Group_Size * sizeof(half));
|
pipe.InitBuffer(half_queue, 1, Group_Size * sizeof(half));
|
||||||
}
|
}
|
||||||
|
|
||||||
__aicore__ inline void copy_in(uint32_t offset) {
|
__aicore__ inline void copy_in(uint32_t offset) {
|
||||||
|
@ -178,13 +181,15 @@ class QUANTIZE_FLOAT_TO_Q4_0 {
|
||||||
for (int64_t j = 0; j < group_size_in_row; j++) {
|
for (int64_t j = 0; j < group_size_in_row; j++) {
|
||||||
half scale = calculate_group(i, j);
|
half scale = calculate_group(i, j);
|
||||||
scale_local.SetValue(scale_local_offset++, scale);
|
scale_local.SetValue(scale_local_offset++, scale);
|
||||||
if (scale_local_offset == 16) {
|
// Copy Group_Size/2 length data each time.
|
||||||
|
if (scale_local_offset == Group_Size / 2) {
|
||||||
scale_local_offset = 0;
|
scale_local_offset = 0;
|
||||||
// TODO: OPTIMIZE ME
|
// TODO: OPTIMIZE ME
|
||||||
pipe_barrier(PIPE_ALL);
|
pipe_barrier(PIPE_ALL);
|
||||||
DataCopy(scale_gm[scale_global_offset], scale_local, 16);
|
DataCopy(scale_gm[scale_global_offset], scale_local,
|
||||||
|
Group_Size / 2);
|
||||||
pipe_barrier(PIPE_ALL);
|
pipe_barrier(PIPE_ALL);
|
||||||
scale_global_offset += 16;
|
scale_global_offset += Group_Size / 2;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2312,7 +2312,7 @@ inline static void ggml_vec_abs_f32 (const int n, float * y, const float * x) {
|
||||||
inline static void ggml_vec_sgn_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : ((x[i] < 0.f) ? -1.f : 0.f); }
|
inline static void ggml_vec_sgn_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : ((x[i] < 0.f) ? -1.f : 0.f); }
|
||||||
inline static void ggml_vec_step_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : 0.f; }
|
inline static void ggml_vec_step_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : 0.f; }
|
||||||
inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = tanhf(x[i]); }
|
inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = tanhf(x[i]); }
|
||||||
inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; }
|
inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expm1f(x[i]); }
|
||||||
inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
|
inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
|
||||||
inline static void ggml_vec_leaky_relu_f32 (const int n, float * y, const float * x, const float ns) { for (int i = 0; i < n; ++i) y[i] = ((x[i] > 0.f) ? x[i] : 0.f) + ns * ((x[i] < 0.0f) ? x[i] : 0.f); }
|
inline static void ggml_vec_leaky_relu_f32 (const int n, float * y, const float * x, const float ns) { for (int i = 0; i < n; ++i) y[i] = ((x[i] > 0.f) ? x[i] : 0.f) + ns * ((x[i] < 0.0f) ? x[i] : 0.f); }
|
||||||
inline static void ggml_vec_sigmoid_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = 1.f / (1.f + expf(-x[i])); }
|
inline static void ggml_vec_sigmoid_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = 1.f / (1.f + expf(-x[i])); }
|
||||||
|
|
|
@ -161,6 +161,7 @@ class Keys:
|
||||||
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
|
SUFFIX_ID = "tokenizer.ggml.suffix_token_id"
|
||||||
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
|
MIDDLE_ID = "tokenizer.ggml.middle_token_id"
|
||||||
EOT_ID = "tokenizer.ggml.eot_token_id"
|
EOT_ID = "tokenizer.ggml.eot_token_id"
|
||||||
|
EOM_ID = "tokenizer.ggml.eom_token_id"
|
||||||
|
|
||||||
class Adapter:
|
class Adapter:
|
||||||
TYPE = "adapter.type"
|
TYPE = "adapter.type"
|
||||||
|
@ -1327,3 +1328,4 @@ KEY_TOKENIZER_PRIFIX_ID = Keys.Tokenizer.PREFIX_ID
|
||||||
KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
|
KEY_TOKENIZER_SUFFIX_ID = Keys.Tokenizer.SUFFIX_ID
|
||||||
KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID
|
KEY_TOKENIZER_MIDDLE_ID = Keys.Tokenizer.MIDDLE_ID
|
||||||
KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
|
KEY_TOKENIZER_EOT_ID = Keys.Tokenizer.EOT_ID
|
||||||
|
KEY_TOKENIZER_EOM_ID = Keys.Tokenizer.EOM_ID
|
||||||
|
|
|
@ -828,6 +828,9 @@ class GGUFWriter:
|
||||||
def add_eot_token_id(self, id: int) -> None:
|
def add_eot_token_id(self, id: int) -> None:
|
||||||
self.add_uint32(Keys.Tokenizer.EOT_ID, id)
|
self.add_uint32(Keys.Tokenizer.EOT_ID, id)
|
||||||
|
|
||||||
|
def add_eom_token_id(self, id: int) -> None:
|
||||||
|
self.add_uint32(Keys.Tokenizer.EOM_ID, id)
|
||||||
|
|
||||||
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
|
def _pack(self, fmt: str, value: Any, skip_pack_prefix: bool = False) -> bytes:
|
||||||
pack_prefix = ''
|
pack_prefix = ''
|
||||||
if not skip_pack_prefix:
|
if not skip_pack_prefix:
|
||||||
|
|
|
@ -284,20 +284,67 @@ class Metadata:
|
||||||
########################
|
########################
|
||||||
if model_card is not None:
|
if model_card is not None:
|
||||||
|
|
||||||
if "model_name" in model_card and metadata.name is None:
|
def use_model_card_metadata(metadata_key: str, model_card_key: str):
|
||||||
# Not part of huggingface model card standard but notice some model creator using it
|
if model_card_key in model_card and getattr(metadata, metadata_key, None) is None:
|
||||||
# such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
|
setattr(metadata, metadata_key, model_card.get(model_card_key))
|
||||||
metadata.name = model_card.get("model_name")
|
|
||||||
|
|
||||||
if "model_creator" in model_card and metadata.author is None:
|
def use_array_model_card_metadata(metadata_key: str, model_card_key: str):
|
||||||
# Not part of huggingface model card standard but notice some model creator using it
|
# Note: Will append rather than replace if already exist
|
||||||
# such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
|
tags_value = model_card.get(model_card_key, None)
|
||||||
metadata.author = model_card.get("model_creator")
|
if tags_value is None:
|
||||||
|
return
|
||||||
|
|
||||||
if "model_type" in model_card and metadata.basename is None:
|
current_value = getattr(metadata, metadata_key, None)
|
||||||
# Not part of huggingface model card standard but notice some model creator using it
|
if current_value is None:
|
||||||
# such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
|
current_value = []
|
||||||
metadata.basename = model_card.get("model_type")
|
|
||||||
|
if isinstance(tags_value, str):
|
||||||
|
current_value.append(tags_value)
|
||||||
|
elif isinstance(tags_value, list):
|
||||||
|
current_value.extend(tags_value)
|
||||||
|
|
||||||
|
setattr(metadata, metadata_key, current_value)
|
||||||
|
|
||||||
|
# LLAMA.cpp's direct internal convention
|
||||||
|
# (Definitely not part of hugging face formal/informal standard)
|
||||||
|
#########################################
|
||||||
|
use_model_card_metadata("name", "name")
|
||||||
|
use_model_card_metadata("author", "author")
|
||||||
|
use_model_card_metadata("version", "version")
|
||||||
|
use_model_card_metadata("organization", "organization")
|
||||||
|
use_model_card_metadata("description", "description")
|
||||||
|
use_model_card_metadata("finetune", "finetune")
|
||||||
|
use_model_card_metadata("basename", "basename")
|
||||||
|
use_model_card_metadata("size_label", "size_label")
|
||||||
|
use_model_card_metadata("source_url", "url")
|
||||||
|
use_model_card_metadata("source_doi", "doi")
|
||||||
|
use_model_card_metadata("source_uuid", "uuid")
|
||||||
|
use_model_card_metadata("source_repo_url", "repo_url")
|
||||||
|
|
||||||
|
# LLAMA.cpp's huggingface style convention
|
||||||
|
# (Definitely not part of hugging face formal/informal standard... but with model_ appended to match their style)
|
||||||
|
###########################################
|
||||||
|
use_model_card_metadata("name", "model_name")
|
||||||
|
use_model_card_metadata("author", "model_author")
|
||||||
|
use_model_card_metadata("version", "model_version")
|
||||||
|
use_model_card_metadata("organization", "model_organization")
|
||||||
|
use_model_card_metadata("description", "model_description")
|
||||||
|
use_model_card_metadata("finetune", "model_finetune")
|
||||||
|
use_model_card_metadata("basename", "model_basename")
|
||||||
|
use_model_card_metadata("size_label", "model_size_label")
|
||||||
|
use_model_card_metadata("source_url", "model_url")
|
||||||
|
use_model_card_metadata("source_doi", "model_doi")
|
||||||
|
use_model_card_metadata("source_uuid", "model_uuid")
|
||||||
|
use_model_card_metadata("source_repo_url", "model_repo_url")
|
||||||
|
|
||||||
|
# Hugging Face Direct Convention
|
||||||
|
#################################
|
||||||
|
|
||||||
|
# Not part of huggingface model card standard but notice some model creator using it
|
||||||
|
# such as TheBloke in 'TheBloke/Mistral-7B-Instruct-v0.2-GGUF'
|
||||||
|
use_model_card_metadata("name", "model_name")
|
||||||
|
use_model_card_metadata("author", "model_creator")
|
||||||
|
use_model_card_metadata("basename", "model_type")
|
||||||
|
|
||||||
if "base_model" in model_card:
|
if "base_model" in model_card:
|
||||||
# This represents the parent models that this is based on
|
# This represents the parent models that this is based on
|
||||||
|
@ -329,58 +376,18 @@ class Metadata:
|
||||||
base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}"
|
base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}"
|
||||||
metadata.base_models.append(base_model)
|
metadata.base_models.append(base_model)
|
||||||
|
|
||||||
if "license" in model_card and metadata.license is None:
|
use_model_card_metadata("license", "license")
|
||||||
metadata.license = model_card.get("license")
|
use_model_card_metadata("license_name", "license_name")
|
||||||
|
use_model_card_metadata("license_link", "license_link")
|
||||||
|
|
||||||
if "license_name" in model_card and metadata.license_name is None:
|
use_array_model_card_metadata("tags", "tags")
|
||||||
metadata.license_name = model_card.get("license_name")
|
use_array_model_card_metadata("tags", "pipeline_tag")
|
||||||
|
|
||||||
if "license_link" in model_card and metadata.license_link is None:
|
use_array_model_card_metadata("languages", "languages")
|
||||||
metadata.license_link = model_card.get("license_link")
|
use_array_model_card_metadata("languages", "language")
|
||||||
|
|
||||||
tags_value = model_card.get("tags", None)
|
use_array_model_card_metadata("datasets", "datasets")
|
||||||
if tags_value is not None:
|
use_array_model_card_metadata("datasets", "dataset")
|
||||||
|
|
||||||
if metadata.tags is None:
|
|
||||||
metadata.tags = []
|
|
||||||
|
|
||||||
if isinstance(tags_value, str):
|
|
||||||
metadata.tags.append(tags_value)
|
|
||||||
elif isinstance(tags_value, list):
|
|
||||||
metadata.tags.extend(tags_value)
|
|
||||||
|
|
||||||
pipeline_tags_value = model_card.get("pipeline_tag", None)
|
|
||||||
if pipeline_tags_value is not None:
|
|
||||||
|
|
||||||
if metadata.tags is None:
|
|
||||||
metadata.tags = []
|
|
||||||
|
|
||||||
if isinstance(pipeline_tags_value, str):
|
|
||||||
metadata.tags.append(pipeline_tags_value)
|
|
||||||
elif isinstance(pipeline_tags_value, list):
|
|
||||||
metadata.tags.extend(pipeline_tags_value)
|
|
||||||
|
|
||||||
language_value = model_card.get("languages", model_card.get("language", None))
|
|
||||||
if language_value is not None:
|
|
||||||
|
|
||||||
if metadata.languages is None:
|
|
||||||
metadata.languages = []
|
|
||||||
|
|
||||||
if isinstance(language_value, str):
|
|
||||||
metadata.languages.append(language_value)
|
|
||||||
elif isinstance(language_value, list):
|
|
||||||
metadata.languages.extend(language_value)
|
|
||||||
|
|
||||||
dataset_value = model_card.get("datasets", model_card.get("dataset", None))
|
|
||||||
if dataset_value is not None:
|
|
||||||
|
|
||||||
if metadata.datasets is None:
|
|
||||||
metadata.datasets = []
|
|
||||||
|
|
||||||
if isinstance(dataset_value, str):
|
|
||||||
metadata.datasets.append(dataset_value)
|
|
||||||
elif isinstance(dataset_value, list):
|
|
||||||
metadata.datasets.extend(dataset_value)
|
|
||||||
|
|
||||||
# Hugging Face Parameter Heuristics
|
# Hugging Face Parameter Heuristics
|
||||||
####################################
|
####################################
|
||||||
|
|
|
@ -1444,7 +1444,8 @@ llama_token_attr llama_token_get_attr_impl(const struct llama_vocab & vocab, lla
|
||||||
bool llama_token_is_eog_impl(const struct llama_vocab & vocab, llama_token token) {
|
bool llama_token_is_eog_impl(const struct llama_vocab & vocab, llama_token token) {
|
||||||
return token != -1 && (
|
return token != -1 && (
|
||||||
token == llama_token_eos_impl(vocab) ||
|
token == llama_token_eos_impl(vocab) ||
|
||||||
token == llama_token_eot_impl(vocab)
|
token == llama_token_eot_impl(vocab) ||
|
||||||
|
token == llama_token_eom_impl(vocab)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1500,6 +1501,10 @@ llama_token llama_token_eot_impl(const struct llama_vocab & vocab) {
|
||||||
return vocab.special_eot_id;
|
return vocab.special_eot_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
llama_token llama_token_eom_impl(const struct llama_vocab & vocab) {
|
||||||
|
return vocab.special_eom_id;
|
||||||
|
}
|
||||||
|
|
||||||
int32_t llama_tokenize_impl(
|
int32_t llama_tokenize_impl(
|
||||||
const struct llama_vocab & vocab,
|
const struct llama_vocab & vocab,
|
||||||
const char * text,
|
const char * text,
|
||||||
|
|
|
@ -45,6 +45,7 @@ struct llama_vocab {
|
||||||
id special_suffix_id = -1;
|
id special_suffix_id = -1;
|
||||||
id special_middle_id = -1;
|
id special_middle_id = -1;
|
||||||
id special_eot_id = -1; // TODO: move above after "eos_id", and here add "file separator" token
|
id special_eot_id = -1; // TODO: move above after "eos_id", and here add "file separator" token
|
||||||
|
id special_eom_id = -1;
|
||||||
|
|
||||||
// tokenizer flags
|
// tokenizer flags
|
||||||
bool tokenizer_add_space_prefix = false;
|
bool tokenizer_add_space_prefix = false;
|
||||||
|
@ -101,6 +102,7 @@ llama_token llama_token_prefix_impl(const struct llama_vocab & vocab);
|
||||||
llama_token llama_token_middle_impl(const struct llama_vocab & vocab);
|
llama_token llama_token_middle_impl(const struct llama_vocab & vocab);
|
||||||
llama_token llama_token_suffix_impl(const struct llama_vocab & vocab);
|
llama_token llama_token_suffix_impl(const struct llama_vocab & vocab);
|
||||||
llama_token llama_token_eot_impl (const struct llama_vocab & vocab);
|
llama_token llama_token_eot_impl (const struct llama_vocab & vocab);
|
||||||
|
llama_token llama_token_eom_impl (const struct llama_vocab & vocab);
|
||||||
|
|
||||||
int32_t llama_tokenize_impl(
|
int32_t llama_tokenize_impl(
|
||||||
const struct llama_vocab & vocab,
|
const struct llama_vocab & vocab,
|
||||||
|
|
|
@ -359,6 +359,7 @@ enum llm_kv {
|
||||||
LLM_KV_TOKENIZER_SUFFIX_ID,
|
LLM_KV_TOKENIZER_SUFFIX_ID,
|
||||||
LLM_KV_TOKENIZER_MIDDLE_ID,
|
LLM_KV_TOKENIZER_MIDDLE_ID,
|
||||||
LLM_KV_TOKENIZER_EOT_ID,
|
LLM_KV_TOKENIZER_EOT_ID,
|
||||||
|
LLM_KV_TOKENIZER_EOM_ID,
|
||||||
|
|
||||||
LLM_KV_ADAPTER_TYPE,
|
LLM_KV_ADAPTER_TYPE,
|
||||||
LLM_KV_ADAPTER_LORA_ALPHA,
|
LLM_KV_ADAPTER_LORA_ALPHA,
|
||||||
|
@ -456,6 +457,7 @@ static const std::map<llm_kv, const char *> LLM_KV_NAMES = {
|
||||||
{ LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" },
|
{ LLM_KV_TOKENIZER_SUFFIX_ID, "tokenizer.ggml.suffix_token_id" },
|
||||||
{ LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
|
{ LLM_KV_TOKENIZER_MIDDLE_ID, "tokenizer.ggml.middle_token_id" },
|
||||||
{ LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" },
|
{ LLM_KV_TOKENIZER_EOT_ID, "tokenizer.ggml.eot_token_id" },
|
||||||
|
{ LLM_KV_TOKENIZER_EOM_ID, "tokenizer.ggml.eom_token_id" },
|
||||||
|
|
||||||
{ LLM_KV_ADAPTER_TYPE, "adapter.type" },
|
{ LLM_KV_ADAPTER_TYPE, "adapter.type" },
|
||||||
{ LLM_KV_ADAPTER_LORA_ALPHA, "adapter.lora.alpha" },
|
{ LLM_KV_ADAPTER_LORA_ALPHA, "adapter.lora.alpha" },
|
||||||
|
@ -5583,6 +5585,7 @@ static void llm_load_vocab(
|
||||||
{ LLM_KV_TOKENIZER_SUFFIX_ID, vocab.special_suffix_id },
|
{ LLM_KV_TOKENIZER_SUFFIX_ID, vocab.special_suffix_id },
|
||||||
{ LLM_KV_TOKENIZER_MIDDLE_ID, vocab.special_middle_id },
|
{ LLM_KV_TOKENIZER_MIDDLE_ID, vocab.special_middle_id },
|
||||||
{ LLM_KV_TOKENIZER_EOT_ID, vocab.special_eot_id },
|
{ LLM_KV_TOKENIZER_EOT_ID, vocab.special_eot_id },
|
||||||
|
{ LLM_KV_TOKENIZER_EOM_ID, vocab.special_eom_id },
|
||||||
};
|
};
|
||||||
|
|
||||||
for (const auto & it : special_token_types) {
|
for (const auto & it : special_token_types) {
|
||||||
|
@ -5635,6 +5638,17 @@ static void llm_load_vocab(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// find EOM token: "<|eom_id|>"
|
||||||
|
//
|
||||||
|
// TODO: convert scripts should provide this token through the KV metadata LLAMA_KV_TOKENIZER_EOM_ID
|
||||||
|
// for now, we apply this workaround to find the EOM token based on its text
|
||||||
|
if (vocab.special_eom_id == -1) {
|
||||||
|
const auto & t = vocab.token_to_id.find("<|eom_id|>");
|
||||||
|
if (t != vocab.token_to_id.end()) {
|
||||||
|
vocab.special_eom_id = t->second;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// build special tokens cache
|
// build special tokens cache
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue