resolve merge conflicts
This commit is contained in:
commit
66cffa8aff
127 changed files with 8174 additions and 6065 deletions
|
@ -14,7 +14,7 @@ int main(int argc, char ** argv) {
|
|||
std::thread([&model_path]() {
|
||||
llama_backend_init();
|
||||
auto * model = llama_model_load_from_file(model_path, llama_model_default_params());
|
||||
auto * ctx = llama_new_context_with_model(model, llama_context_default_params());
|
||||
auto * ctx = llama_init_from_model(model, llama_context_default_params());
|
||||
llama_free(ctx);
|
||||
llama_model_free(model);
|
||||
llama_backend_free();
|
||||
|
|
|
@ -1659,17 +1659,46 @@ struct test_rwkv_wkv6 : public test_case {
|
|||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
const int64_t n_tokens = n_seq_tokens * n_seqs;
|
||||
ggml_tensor * r = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ 1, head_size, head_count, n_tokens }.data());
|
||||
ggml_tensor * k = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ head_size, 1, head_count, n_tokens }.data());
|
||||
ggml_tensor * v = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ 1, head_size, head_count, n_tokens }.data());
|
||||
ggml_tensor * r = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
||||
ggml_tensor * k = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
||||
ggml_tensor * v = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
||||
ggml_tensor * tf = ggml_new_tensor(ctx, type, 2, std::vector<int64_t>{ head_size, head_count }.data());
|
||||
ggml_tensor * td = ggml_new_tensor(ctx, type, 4, std::vector<int64_t>{ 1, head_size, head_count, n_tokens }.data());
|
||||
ggml_tensor * td = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
||||
ggml_tensor * s = ggml_new_tensor(ctx, type, 2, std::vector<int64_t>{ head_size * head_size * head_count, n_seqs }.data());
|
||||
ggml_tensor * out = ggml_rwkv_wkv6(ctx, k, v, r, tf, td, s);
|
||||
return out;
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_GATED_LINEAR_ATTN
|
||||
struct test_gla : public test_case {
|
||||
const ggml_type type;
|
||||
|
||||
const int64_t head_count;
|
||||
const int64_t head_size;
|
||||
const int64_t n_seq_tokens;
|
||||
const int64_t n_seqs;
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR5(type, head_count, head_size, n_seq_tokens, n_seqs);
|
||||
}
|
||||
|
||||
test_gla(ggml_type type = GGML_TYPE_F32,
|
||||
int64_t head_count = 32, int64_t head_size = 64, int64_t n_seq_tokens = 32, int64_t n_seqs = 32)
|
||||
: type(type), head_count(head_count), head_size(head_size), n_seq_tokens(n_seq_tokens), n_seqs(n_seqs) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
const int64_t n_tokens = n_seq_tokens * n_seqs;
|
||||
ggml_tensor * q = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
||||
ggml_tensor * k = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
||||
ggml_tensor * v = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
||||
ggml_tensor * g = ggml_new_tensor(ctx, type, 3, std::vector<int64_t>{ head_size, head_count, n_tokens }.data());
|
||||
ggml_tensor * s = ggml_new_tensor(ctx, type, 2, std::vector<int64_t>{ head_size * head_size * head_count, n_seqs }.data());
|
||||
ggml_tensor * out = ggml_gated_linear_attn(ctx, k, v, q, g, s, pow(head_size, -0.5));
|
||||
return out;
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_MUL_MAT
|
||||
struct test_mul_mat : public test_case {
|
||||
const ggml_type type_a;
|
||||
|
@ -3626,6 +3655,11 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
|||
test_cases.emplace_back(new test_rwkv_wkv6(GGML_TYPE_F32, 32, 64, 32, 4));
|
||||
test_cases.emplace_back(new test_rwkv_wkv6(GGML_TYPE_F32, 32, 64, 128, 4));
|
||||
|
||||
test_cases.emplace_back(new test_gla(GGML_TYPE_F32, 32, 64, 1, 1));
|
||||
test_cases.emplace_back(new test_gla(GGML_TYPE_F32, 32, 64, 32, 1));
|
||||
test_cases.emplace_back(new test_gla(GGML_TYPE_F32, 32, 64, 32, 4));
|
||||
test_cases.emplace_back(new test_gla(GGML_TYPE_F32, 32, 64, 128, 4));
|
||||
|
||||
for (int i = 1; i < 9; ++i) {
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F32, 16, i, 256, { 1, 1}, {1, 1}));
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_Q4_0, GGML_TYPE_F32, 16, i, 256, { 1, 1}, {1, 1}));
|
||||
|
|
|
@ -78,7 +78,9 @@ int main(void) {
|
|||
// ai-sage/GigaChat-20B-A3B-instruct
|
||||
"{% if messages[0]['role'] == 'system' -%}\n {%- set loop_messages = messages[1:] -%}\n {%- set system_message = bos_token + messages[0]['content'] + additional_special_tokens[1] -%}\n{%- else -%}\n {%- set loop_messages = messages -%}\n {%- set system_message = bos_token + '' -%}\n{%- endif -%}\n{%- for message in loop_messages %}\n {% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {% endif %}\n \n {%- if loop.index0 == 0 -%}\n {{ system_message -}}\n {%- endif -%}\n {%- if message['role'] == 'user' -%}\n {{ message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1] -}}\n {{ 'available functions' + additional_special_tokens[0] + additional_special_tokens[2] + additional_special_tokens[3] + additional_special_tokens[1] -}}\n {%- endif -%}\n {%- if message['role'] == 'assistant' -%}\n {{ message['role'] + additional_special_tokens[0] + message['content'] + additional_special_tokens[1] -}}\n {%- endif -%}\n {%- if loop.last and add_generation_prompt -%}\n {{ 'assistant' + additional_special_tokens[0] -}}\n {%- endif -%}\n{%- endfor %}",
|
||||
// Infinigence/Megrez-3B-Instruct
|
||||
u8"{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|role_start|>system<|role_end|>你是Megrez-3B-Instruct,将针对用户的问题给出详细的、积极的回答。<|turn_end|>' }}{% endif %}{{ '<|role_start|>' + message['role'] + '<|role_end|>' + message['content'] + '<|turn_end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|role_start|>assistant<|role_end|>' }}{% endif %}"
|
||||
u8"{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|role_start|>system<|role_end|>你是Megrez-3B-Instruct,将针对用户的问题给出详细的、积极的回答。<|turn_end|>' }}{% endif %}{{ '<|role_start|>' + message['role'] + '<|role_end|>' + message['content'] + '<|turn_end|>' }}{% endfor %}{% if add_generation_prompt %}{{ '<|role_start|>assistant<|role_end|>' }}{% endif %}",
|
||||
// phi-4
|
||||
"{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|><|im_start|>assistant<|im_sep|>'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}",
|
||||
};
|
||||
std::vector<std::string> expected_output = {
|
||||
// teknium/OpenHermes-2.5-Mistral-7B
|
||||
|
@ -137,6 +139,8 @@ int main(void) {
|
|||
"<s>You are a helpful assistant<|message_sep|>user<|role_sep|>Hello<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>Hi there<|message_sep|>user<|role_sep|>Who are you<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|> I am an assistant <|message_sep|>user<|role_sep|>Another question<|message_sep|>available functions<|role_sep|>[]<|message_sep|>assistant<|role_sep|>",
|
||||
// Infinigence/Megrez-3B-Instruct
|
||||
"<|role_start|>system<|role_end|>You are a helpful assistant<|turn_end|><|role_start|>user<|role_end|>Hello<|turn_end|><|role_start|>assistant<|role_end|>Hi there<|turn_end|><|role_start|>user<|role_end|>Who are you<|turn_end|><|role_start|>assistant<|role_end|> I am an assistant <|turn_end|><|role_start|>user<|role_end|>Another question<|turn_end|><|role_start|>assistant<|role_end|>",
|
||||
// phi-4
|
||||
"<|im_start|>system<|im_sep|>You are a helpful assistant<|im_end|><|im_start|>user<|im_sep|>Hello<|im_end|><|im_start|>assistant<|im_sep|>Hi there<|im_end|><|im_start|>user<|im_sep|>Who are you<|im_end|><|im_start|>assistant<|im_sep|> I am an assistant <|im_end|><|im_start|>user<|im_sep|>Another question<|im_end|><|im_start|>assistant<|im_sep|>",
|
||||
};
|
||||
std::vector<char> formatted_chat(1024);
|
||||
int32_t res;
|
||||
|
@ -153,7 +157,7 @@ int main(void) {
|
|||
}
|
||||
|
||||
// test invalid chat template
|
||||
res = llama_chat_apply_template(nullptr, "INVALID TEMPLATE", conversation, message_count, true, formatted_chat.data(), formatted_chat.size());
|
||||
res = llama_chat_apply_template("INVALID TEMPLATE", conversation, message_count, true, formatted_chat.data(), formatted_chat.size());
|
||||
assert(res < 0);
|
||||
|
||||
for (size_t i = 0; i < templates.size(); i++) {
|
||||
|
@ -161,7 +165,6 @@ int main(void) {
|
|||
std::string expected = expected_output[i];
|
||||
formatted_chat.resize(1024);
|
||||
res = llama_chat_apply_template(
|
||||
nullptr,
|
||||
custom_template.c_str(),
|
||||
conversation,
|
||||
message_count,
|
||||
|
|
|
@ -161,7 +161,7 @@ int main(int argc, char **argv) {
|
|||
|
||||
auto cparams = llama_context_default_params();
|
||||
|
||||
ctx = llama_new_context_with_model(model, cparams);
|
||||
ctx = llama_init_from_model(model, cparams);
|
||||
|
||||
if (ctx == NULL) {
|
||||
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
||||
|
|
|
@ -55,7 +55,7 @@ int main(int argc, char **argv) {
|
|||
|
||||
auto cparams = llama_context_default_params();
|
||||
|
||||
ctx = llama_new_context_with_model(model, cparams);
|
||||
ctx = llama_init_from_model(model, cparams);
|
||||
|
||||
if (ctx == NULL) {
|
||||
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
||||
|
@ -64,8 +64,10 @@ int main(int argc, char **argv) {
|
|||
}
|
||||
}
|
||||
|
||||
//GGML_ASSERT(llama_vocab_type(model) == LLAMA_VOCAB_TYPE_BPE);
|
||||
if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_BPE) {
|
||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||
|
||||
//GGML_ASSERT(llama_vocab_type(vocab) == LLAMA_VOCAB_TYPE_BPE);
|
||||
if (llama_vocab_type(vocab) != LLAMA_VOCAB_TYPE_BPE) {
|
||||
return 99;
|
||||
}
|
||||
|
||||
|
@ -75,7 +77,7 @@ int main(int argc, char **argv) {
|
|||
atexit([]() { console::cleanup(); });
|
||||
#endif
|
||||
|
||||
const int n_vocab = llama_n_vocab(model);
|
||||
const int n_vocab = llama_vocab_n_tokens(vocab);
|
||||
|
||||
for (int i = 0; i < n_vocab; ++i) {
|
||||
std::string str = common_detokenize(ctx, std::vector<int>(1, i));
|
||||
|
|
|
@ -43,7 +43,7 @@ int main(int argc, char ** argv) {
|
|||
|
||||
auto cparams = llama_context_default_params();
|
||||
|
||||
ctx = llama_new_context_with_model(model, cparams);
|
||||
ctx = llama_init_from_model(model, cparams);
|
||||
|
||||
if (ctx == NULL) {
|
||||
fprintf(stderr, "%s: error: failed to load vocab '%s'\n", __func__, fname.c_str());
|
||||
|
@ -52,8 +52,10 @@ int main(int argc, char ** argv) {
|
|||
}
|
||||
}
|
||||
|
||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
||||
|
||||
//GGML_ASSERT(llama_vocab_type(model) == LLAMA_VOCAB_TYPE_SPM);
|
||||
if (llama_vocab_type(model) != LLAMA_VOCAB_TYPE_SPM) {
|
||||
if (llama_vocab_type(vocab) != LLAMA_VOCAB_TYPE_SPM) {
|
||||
return 99;
|
||||
}
|
||||
|
||||
|
@ -63,7 +65,7 @@ int main(int argc, char ** argv) {
|
|||
atexit([]() { console::cleanup(); });
|
||||
#endif
|
||||
|
||||
const int n_vocab = llama_n_vocab(model);
|
||||
const int n_vocab = llama_vocab_n_tokens(vocab);
|
||||
|
||||
for (int i = 0; i < n_vocab; ++i) {
|
||||
std::string str = common_detokenize(ctx, std::vector<int>(1, i), true);
|
||||
|
|
|
@ -76,7 +76,7 @@ class LibLlamaModel:
|
|||
self.ffi = libllama.ffi
|
||||
if isinstance(mparams, dict):
|
||||
mparams = libllama.model_default_params(**mparams)
|
||||
self.model = self.lib.llama_load_model_from_file(path_model.encode(), mparams)
|
||||
self.model = self.lib.llama_model_load_from_file(path_model.encode(), mparams)
|
||||
if not self.model:
|
||||
raise RuntimeError("error: failed to load model '%s'" % path_model)
|
||||
if isinstance(cparams, dict):
|
||||
|
@ -92,7 +92,7 @@ class LibLlamaModel:
|
|||
if self.ctx:
|
||||
self.lib.llama_free(self.ctx)
|
||||
if self.model:
|
||||
self.lib.llama_free_model(self.model)
|
||||
self.lib.llama_model_free(self.model)
|
||||
self.ctx = None
|
||||
self.model = None
|
||||
self.lib = None
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue