fix rope shift
This commit is contained in:
parent
2e92aefef3
commit
d30ab79b18
2 changed files with 17 additions and 16 deletions
11
ggml-cuda.cu
11
ggml-cuda.cu
|
@ -6106,15 +6106,11 @@ inline void ggml_cuda_op_rope(
|
||||||
|
|
||||||
const float theta_scale = powf(freq_base, -2.0f/n_dims);
|
const float theta_scale = powf(freq_base, -2.0f/n_dims);
|
||||||
|
|
||||||
int32_t * pos = nullptr;
|
const int32_t * pos = nullptr;
|
||||||
if ((mode & 1) == 0) {
|
if ((mode & 1) == 0) {
|
||||||
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
||||||
GGML_ASSERT(src1->ne[0] == ne2);
|
GGML_ASSERT(src1->ne[0] == ne2);
|
||||||
GGML_ASSERT(src1->backend == GGML_BACKEND_GPU);
|
pos = (const int32_t *) src1_dd;
|
||||||
struct ggml_tensor_extra_gpu * src1_extra = (ggml_tensor_extra_gpu *) src1->extra;
|
|
||||||
int id;
|
|
||||||
CUDA_CHECK(cudaGetDevice(&id));
|
|
||||||
pos = (int32_t *) src1_extra->data_device[id];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const bool is_neox = mode & 2;
|
const bool is_neox = mode & 2;
|
||||||
|
@ -7092,8 +7088,7 @@ void ggml_cuda_copy_to_device(struct ggml_tensor * tensor) {
|
||||||
|
|
||||||
struct ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
struct ggml_tensor_extra_gpu * extra = (ggml_tensor_extra_gpu *) tensor->extra;
|
||||||
CUDA_CHECK(ggml_cuda_set_device(g_main_device));
|
CUDA_CHECK(ggml_cuda_set_device(g_main_device));
|
||||||
cudaStream_t main_stream = g_cudaStreams[g_main_device][0];
|
CUDA_CHECK(cudaMemcpy(extra->data_device[g_main_device], tensor->data, ggml_nbytes(tensor), cudaMemcpyHostToDevice));
|
||||||
CUDA_CHECK(cudaMemcpyAsync(extra->data_device[g_main_device], tensor->data, ggml_nbytes(tensor), cudaMemcpyHostToDevice, main_stream));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ggml_cuda_assign_buffers(struct ggml_tensor * tensor) {
|
void ggml_cuda_assign_buffers(struct ggml_tensor * tensor) {
|
||||||
|
|
18
llama.cpp
18
llama.cpp
|
@ -2747,14 +2747,16 @@ static struct ggml_cgraph * llm_build_llama(
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int il = 0; il < n_layer; ++il) {
|
for (int il = 0; il < n_layer; ++il) {
|
||||||
ggml_build_forward_expand(gf,
|
struct ggml_tensor * tmp =
|
||||||
ggml_rope_custom_inplace(ctx0,
|
ggml_rope_custom_inplace(ctx0,
|
||||||
ggml_view_3d(ctx0, kv_self.k,
|
ggml_view_3d(ctx0, kv_self.k,
|
||||||
n_embd_head, n_head_kv, n_ctx,
|
n_embd_head, n_head_kv, n_ctx,
|
||||||
ggml_element_size(kv_self.k)*n_embd_head,
|
ggml_element_size(kv_self.k)*n_embd_head,
|
||||||
ggml_element_size(kv_self.k)*n_embd_gqa,
|
ggml_element_size(kv_self.k)*n_embd_gqa,
|
||||||
ggml_element_size(kv_self.k)*n_embd_gqa*n_ctx*il),
|
ggml_element_size(kv_self.k)*n_embd_gqa*n_ctx*il),
|
||||||
K_shift, n_embd_head, 0, 0, freq_base, freq_scale));
|
K_shift, n_embd_head, 0, 0, freq_base, freq_scale);
|
||||||
|
offload_func_kq(tmp);
|
||||||
|
ggml_build_forward_expand(gf, tmp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3137,14 +3139,16 @@ static struct ggml_cgraph * llm_build_baichaun(
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int il = 0; il < n_layer; ++il) {
|
for (int il = 0; il < n_layer; ++il) {
|
||||||
ggml_build_forward_expand(gf,
|
struct ggml_tensor * tmp =
|
||||||
ggml_rope_custom_inplace(ctx0,
|
ggml_rope_custom_inplace(ctx0,
|
||||||
ggml_view_3d(ctx0, kv_self.k,
|
ggml_view_3d(ctx0, kv_self.k,
|
||||||
n_embd_head, n_head_kv, n_ctx,
|
n_embd_head, n_head_kv, n_ctx,
|
||||||
ggml_element_size(kv_self.k)*n_embd_head,
|
ggml_element_size(kv_self.k)*n_embd_head,
|
||||||
ggml_element_size(kv_self.k)*n_embd_gqa,
|
ggml_element_size(kv_self.k)*n_embd_gqa,
|
||||||
ggml_element_size(kv_self.k)*n_embd_gqa*n_ctx*il),
|
ggml_element_size(kv_self.k)*n_embd_gqa*n_ctx*il),
|
||||||
K_shift, n_embd_head, 0, 0, freq_base, freq_scale));
|
K_shift, n_embd_head, 0, 0, freq_base, freq_scale);
|
||||||
|
offload_func_kq(tmp);
|
||||||
|
ggml_build_forward_expand(gf, tmp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -3547,14 +3551,16 @@ static struct ggml_cgraph * llm_build_falcon(
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int il = 0; il < n_layer; ++il) {
|
for (int il = 0; il < n_layer; ++il) {
|
||||||
ggml_build_forward_expand(gf,
|
struct ggml_tensor * tmp =
|
||||||
ggml_rope_custom_inplace(ctx0,
|
ggml_rope_custom_inplace(ctx0,
|
||||||
ggml_view_3d(ctx0, kv_self.k,
|
ggml_view_3d(ctx0, kv_self.k,
|
||||||
n_embd_head, n_head_kv, n_ctx,
|
n_embd_head, n_head_kv, n_ctx,
|
||||||
ggml_element_size(kv_self.k)*n_embd_head,
|
ggml_element_size(kv_self.k)*n_embd_head,
|
||||||
ggml_element_size(kv_self.k)*n_embd_gqa,
|
ggml_element_size(kv_self.k)*n_embd_gqa,
|
||||||
ggml_element_size(kv_self.k)*n_embd_gqa*n_ctx*il),
|
ggml_element_size(kv_self.k)*n_embd_gqa*n_ctx*il),
|
||||||
K_shift, n_embd_head, 2, 0, freq_base, freq_scale));
|
K_shift, n_embd_head, 2, 0, freq_base, freq_scale);
|
||||||
|
offload_func_kq(tmp);
|
||||||
|
ggml_build_forward_expand(gf, tmp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue