rope fixes
This commit is contained in:
parent
bc4ff72317
commit
42eabf2f2f
4 changed files with 15 additions and 10 deletions
13
llama.cpp
13
llama.cpp
|
@ -1245,10 +1245,15 @@ struct llama_vocab {
|
||||||
id special_eot_id = 32010;
|
id special_eot_id = 32010;
|
||||||
|
|
||||||
int find_bpe_rank(std::string token_left, std::string token_right) const {
|
int find_bpe_rank(std::string token_left, std::string token_right) const {
|
||||||
GGML_ASSERT(token_left.find(" ") == std::string::npos);
|
// GGML_ASSERT(token_left.find(" ") == std::string::npos);
|
||||||
GGML_ASSERT(token_left.find("\n") == std::string::npos);
|
// GGML_ASSERT(token_left.find("\n") == std::string::npos);
|
||||||
GGML_ASSERT(token_right.find(" ") == std::string::npos);
|
// GGML_ASSERT(token_right.find(" ") == std::string::npos);
|
||||||
GGML_ASSERT(token_right.find("\n") == std::string::npos);
|
// GGML_ASSERT(token_right.find("\n") == std::string::npos);
|
||||||
|
//the above breaks gguf v1 falcons
|
||||||
|
replace_all(token_left, " ", "\u0120");
|
||||||
|
replace_all(token_left, "\n", "\u010A");
|
||||||
|
replace_all(token_right, " ", "\u0120");
|
||||||
|
replace_all(token_right, "\n", "\u010A");
|
||||||
|
|
||||||
auto it = bpe_ranks.find(std::make_pair(token_left, token_right));
|
auto it = bpe_ranks.find(std::make_pair(token_left, token_right));
|
||||||
if (it == bpe_ranks.end()) {
|
if (it == bpe_ranks.end()) {
|
||||||
|
|
|
@ -494,8 +494,8 @@ bool gptj_eval(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor *Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_q_proj_w, cur), n_embd / n_head, n_head, N), KQ_pos, n_rot, 0, n_ctx, 0, freq_base, freq_scale, 0, 0, 0, 0);
|
struct ggml_tensor *Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_q_proj_w, cur), n_embd / n_head, n_head, N), KQ_pos, n_rot, 0, n_ctx, 0, freq_base, freq_scale, NAN, 1, 32, 1);
|
||||||
struct ggml_tensor *Kcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_k_proj_w, cur), n_embd / n_head, n_head, N), KQ_pos, n_rot, 0, n_ctx, 0, freq_base, freq_scale, 0, 0, 0, 0);
|
struct ggml_tensor *Kcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_k_proj_w, cur), n_embd / n_head, n_head, N), KQ_pos, n_rot, 0, n_ctx, 0, freq_base, freq_scale, NAN, 1, 32, 1);
|
||||||
|
|
||||||
// store key and value to memory
|
// store key and value to memory
|
||||||
{
|
{
|
||||||
|
|
|
@ -1614,11 +1614,11 @@ static struct ggml_cgraph * llama_v3_build_graph(
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct ggml_tensor *Kcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpk, n_embd_head, n_head_kv, N), KQ_pos, n_embd_head, 0, 0, 0, freq_base, freq_scale, 0, 0, 0, 0);
|
struct ggml_tensor *Kcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpk, n_embd_head, n_head_kv, N), KQ_pos, n_embd_head, 0, 0, 0, freq_base, freq_scale, NAN, 1, 32, 1);
|
||||||
offload_func_kq(Kcur);
|
offload_func_kq(Kcur);
|
||||||
ggml_set_name(Kcur, "Kcur");
|
ggml_set_name(Kcur, "Kcur");
|
||||||
|
|
||||||
struct ggml_tensor *Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpq, n_embd_head, n_head, N), KQ_pos, n_embd_head, 0, 0, 0, freq_base, freq_scale, 0, 0, 0, 0);
|
struct ggml_tensor *Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpq, n_embd_head, n_head, N), KQ_pos, n_embd_head, 0, 0, 0, freq_base, freq_scale, NAN, 1, 32, 1);
|
||||||
offload_func_kq(Qcur);
|
offload_func_kq(Qcur);
|
||||||
ggml_set_name(Qcur, "Qcur");
|
ggml_set_name(Qcur, "Qcur");
|
||||||
|
|
||||||
|
|
|
@ -522,8 +522,8 @@ bool gpt_neox_eval(
|
||||||
}
|
}
|
||||||
|
|
||||||
// using mode = 2 for GPT-NeoX mode
|
// using mode = 2 for GPT-NeoX mode
|
||||||
Qcur = ggml_rope_custom_inplace(ctx0, Qcur, KQ_pos, n_rot, 2, n_ctx, 0, freq_base, freq_scale, 0, 0, 0, 0);
|
Qcur = ggml_rope_custom_inplace(ctx0, Qcur, KQ_pos, n_rot, 2, n_ctx, 0, freq_base, freq_scale, NAN, 1, 32, 1);
|
||||||
Kcur = ggml_rope_custom_inplace(ctx0, Kcur, KQ_pos, n_rot, 2, n_ctx, 0, freq_base, freq_scale, 0, 0, 0, 0);
|
Kcur = ggml_rope_custom_inplace(ctx0, Kcur, KQ_pos, n_rot, 2, n_ctx, 0, freq_base, freq_scale, NAN, 1, 32, 1);
|
||||||
|
|
||||||
// store key and value to memory
|
// store key and value to memory
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue