From d30ab79b1887a69e626f58f30f195f10f4a79c20 Mon Sep 17 00:00:00 2001 From: slaren Date: Wed, 20 Sep 2023 12:42:50 +0200 Subject: [PATCH] fix rope shift --- ggml-cuda.cu | 15 +++++---------- llama.cpp | 18 ++++++++++++------ 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/ggml-cuda.cu b/ggml-cuda.cu index 20d00a487..87d2e2e71 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -4370,7 +4370,7 @@ static __global__ void rope(const T * x, T * dst, const int ncols, const int32_t const int i2 = row/p_delta_rows; const int p = has_pos ? pos[i2] : 0; - const float p0 = p * freq_scale; + const float p0 = p*freq_scale; const float theta = p0*powf(theta_scale, col/2); const float sin_theta = sinf(theta); const float cos_theta = cosf(theta); @@ -4396,7 +4396,7 @@ static __global__ void rope_neox(const T * x, T * dst, const int ncols, const in const int i2 = row/p_delta_rows; const int p = has_pos ? pos[i2] : 0; - const float p0 = p * freq_scale; + const float p0 = p*freq_scale; const float theta = p0*powf(theta_scale, col/2); const float sin_theta = sinf(theta); const float cos_theta = cosf(theta); @@ -6106,15 +6106,11 @@ inline void ggml_cuda_op_rope( const float theta_scale = powf(freq_base, -2.0f/n_dims); - int32_t * pos = nullptr; + const int32_t * pos = nullptr; if ((mode & 1) == 0) { GGML_ASSERT(src1->type == GGML_TYPE_I32); GGML_ASSERT(src1->ne[0] == ne2); - GGML_ASSERT(src1->backend == GGML_BACKEND_GPU); - struct ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu *) src1->extra; - int id; - CUDA_CHECK(cudaGetDevice(&id)); - pos = (int32_t *) src1_extra->data_device[id]; + pos = (const int32_t *) src1_dd; } const bool is_neox = mode & 2; @@ -7092,8 +7088,7 @@ void ggml_cuda_copy_to_device(struct ggml_tensor * tensor) { struct ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra; CUDA_CHECK(ggml_cuda_set_device(g_main_device)); - cudaStream_t main_stream = g_cudaStreams[g_main_device][0]; - CUDA_CHECK(cudaMemcpyAsync(extra->data_device[g_main_device], tensor->data, ggml_nbytes(tensor), cudaMemcpyHostToDevice, main_stream)); + CUDA_CHECK(cudaMemcpy(extra->data_device[g_main_device], tensor->data, ggml_nbytes(tensor), cudaMemcpyHostToDevice)); } void ggml_cuda_assign_buffers(struct ggml_tensor * tensor) { diff --git a/llama.cpp b/llama.cpp index 191f832f4..03672dd16 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2747,14 +2747,16 @@ static struct ggml_cgraph * llm_build_llama( } for (int il = 0; il < n_layer; ++il) { - ggml_build_forward_expand(gf, + struct ggml_tensor * tmp = ggml_rope_custom_inplace(ctx0, ggml_view_3d(ctx0, kv_self.k, n_embd_head, n_head_kv, n_ctx, ggml_element_size(kv_self.k)*n_embd_head, ggml_element_size(kv_self.k)*n_embd_gqa, ggml_element_size(kv_self.k)*n_embd_gqa*n_ctx*il), - K_shift, n_embd_head, 0, 0, freq_base, freq_scale)); + K_shift, n_embd_head, 0, 0, freq_base, freq_scale); + offload_func_kq(tmp); + ggml_build_forward_expand(gf, tmp); } } @@ -3137,14 +3139,16 @@ static struct ggml_cgraph * llm_build_baichaun( } for (int il = 0; il < n_layer; ++il) { - ggml_build_forward_expand(gf, + struct ggml_tensor * tmp = ggml_rope_custom_inplace(ctx0, ggml_view_3d(ctx0, kv_self.k, n_embd_head, n_head_kv, n_ctx, ggml_element_size(kv_self.k)*n_embd_head, ggml_element_size(kv_self.k)*n_embd_gqa, ggml_element_size(kv_self.k)*n_embd_gqa*n_ctx*il), - K_shift, n_embd_head, 0, 0, freq_base, freq_scale)); + K_shift, n_embd_head, 0, 0, freq_base, freq_scale); + offload_func_kq(tmp); + ggml_build_forward_expand(gf, tmp); } } @@ -3547,14 +3551,16 @@ static struct ggml_cgraph * llm_build_falcon( } for (int il = 0; il < n_layer; ++il) { - ggml_build_forward_expand(gf, + struct ggml_tensor * tmp = ggml_rope_custom_inplace(ctx0, ggml_view_3d(ctx0, kv_self.k, n_embd_head, n_head_kv, n_ctx, ggml_element_size(kv_self.k)*n_embd_head, ggml_element_size(kv_self.k)*n_embd_gqa, ggml_element_size(kv_self.k)*n_embd_gqa*n_ctx*il), - K_shift, n_embd_head, 2, 0, freq_base, freq_scale)); + K_shift, n_embd_head, 2, 0, freq_base, freq_scale); + offload_func_kq(tmp); + ggml_build_forward_expand(gf, tmp); } }