diff --git a/llama.cpp b/llama.cpp index cbf1d6590..f7e167427 100644 --- a/llama.cpp +++ b/llama.cpp @@ -1245,10 +1245,15 @@ struct llama_vocab { id special_eot_id = 32010; int find_bpe_rank(std::string token_left, std::string token_right) const { - GGML_ASSERT(token_left.find(" ") == std::string::npos); - GGML_ASSERT(token_left.find("\n") == std::string::npos); - GGML_ASSERT(token_right.find(" ") == std::string::npos); - GGML_ASSERT(token_right.find("\n") == std::string::npos); + // GGML_ASSERT(token_left.find(" ") == std::string::npos); + // GGML_ASSERT(token_left.find("\n") == std::string::npos); + // GGML_ASSERT(token_right.find(" ") == std::string::npos); + // GGML_ASSERT(token_right.find("\n") == std::string::npos); + //the above breaks gguf v1 falcons + replace_all(token_left, " ", "\u0120"); + replace_all(token_left, "\n", "\u010A"); + replace_all(token_right, " ", "\u0120"); + replace_all(token_right, "\n", "\u010A"); auto it = bpe_ranks.find(std::make_pair(token_left, token_right)); if (it == bpe_ranks.end()) { diff --git a/otherarch/gptj_v3.cpp b/otherarch/gptj_v3.cpp index ea33172c7..ff82ded76 100644 --- a/otherarch/gptj_v3.cpp +++ b/otherarch/gptj_v3.cpp @@ -494,8 +494,8 @@ bool gptj_eval( } } - struct ggml_tensor *Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_q_proj_w, cur), n_embd / n_head, n_head, N), KQ_pos, n_rot, 0, n_ctx, 0, freq_base, freq_scale, 0, 0, 0, 0); - struct ggml_tensor *Kcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_k_proj_w, cur), n_embd / n_head, n_head, N), KQ_pos, n_rot, 0, n_ctx, 0, freq_base, freq_scale, 0, 0, 0, 0); + struct ggml_tensor *Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_q_proj_w, cur), n_embd / n_head, n_head, N), KQ_pos, n_rot, 0, n_ctx, 0, freq_base, freq_scale, NAN, 1, 32, 1); + struct ggml_tensor *Kcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].c_attn_k_proj_w, cur), n_embd / n_head, n_head, N), KQ_pos, n_rot, 0, n_ctx, 0, freq_base, freq_scale, NAN, 1, 32, 1); // store key and value to memory { diff --git a/otherarch/llama_v3.cpp b/otherarch/llama_v3.cpp index f0d38b661..3a12e53cf 100644 --- a/otherarch/llama_v3.cpp +++ b/otherarch/llama_v3.cpp @@ -1614,11 +1614,11 @@ static struct ggml_cgraph * llama_v3_build_graph( } #endif - struct ggml_tensor *Kcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpk, n_embd_head, n_head_kv, N), KQ_pos, n_embd_head, 0, 0, 0, freq_base, freq_scale, 0, 0, 0, 0); + struct ggml_tensor *Kcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpk, n_embd_head, n_head_kv, N), KQ_pos, n_embd_head, 0, 0, 0, freq_base, freq_scale, NAN, 1, 32, 1); offload_func_kq(Kcur); ggml_set_name(Kcur, "Kcur"); - struct ggml_tensor *Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpq, n_embd_head, n_head, N), KQ_pos, n_embd_head, 0, 0, 0, freq_base, freq_scale, 0, 0, 0, 0); + struct ggml_tensor *Qcur = ggml_rope_custom_inplace(ctx0, ggml_reshape_3d(ctx0, tmpq, n_embd_head, n_head, N), KQ_pos, n_embd_head, 0, 0, 0, freq_base, freq_scale, NAN, 1, 32, 1); offload_func_kq(Qcur); ggml_set_name(Qcur, "Qcur"); diff --git a/otherarch/neox_v3.cpp b/otherarch/neox_v3.cpp index e3cab8807..0bdfece34 100644 --- a/otherarch/neox_v3.cpp +++ b/otherarch/neox_v3.cpp @@ -522,8 +522,8 @@ bool gpt_neox_eval( } // using mode = 2 for GPT-NeoX mode - Qcur = ggml_rope_custom_inplace(ctx0, Qcur, KQ_pos, n_rot, 2, n_ctx, 0, freq_base, freq_scale, 0, 0, 0, 0); - Kcur = ggml_rope_custom_inplace(ctx0, Kcur, KQ_pos, n_rot, 2, n_ctx, 0, freq_base, freq_scale, 0, 0, 0, 0); + Qcur = ggml_rope_custom_inplace(ctx0, Qcur, KQ_pos, n_rot, 2, n_ctx, 0, freq_base, freq_scale, NAN, 1, 32, 1); + Kcur = ggml_rope_custom_inplace(ctx0, Kcur, KQ_pos, n_rot, 2, n_ctx, 0, freq_base, freq_scale, NAN, 1, 32, 1); // store key and value to memory {