diff --git a/ggml/src/ggml.c b/ggml/src/ggml.c index c25f9caa3..3504e9ad3 100644 --- a/ggml/src/ggml.c +++ b/ggml/src/ggml.c @@ -8322,8 +8322,7 @@ static void ggml_compute_forward_dup_same_cont( GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0)); GGML_ASSERT(src0->type == dst->type); - const size_t nb00 = src0->nb[0]; - const size_t nb0 = dst->nb[0]; + const size_t nb0 = ggml_type_size(src0->type); const int ith = params->ith; // thread index const int nth = params->nth; // number of threads @@ -8337,8 +8336,8 @@ static void ggml_compute_forward_dup_same_cont( if (ie0 < ie1) { memcpy( ((char *) dst->data + ie0*nb0), - ((char *) src0->data + ie0*nb00), - (ie1 - ie0) * ggml_type_size(src0->type)); + ((char *) src0->data + ie0*nb0), + (ie1 - ie0) * nb0); } } @@ -8355,11 +8354,6 @@ static void ggml_compute_forward_dup_f16( const int ith = params->ith; // thread index const int nth = params->nth; // number of threads - if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) { - ggml_compute_forward_dup_same_cont(params, dst); - return; - } - // parallelize by rows const int nr = ne01; // number of rows per thread @@ -8624,11 +8618,6 @@ static void ggml_compute_forward_dup_bf16( const int ith = params->ith; // thread index const int nth = params->nth; // number of threads - if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) { - ggml_compute_forward_dup_same_cont(params, dst); - return; - } - // parallelize by rows const int nr = ne01; // number of rows per thread @@ -8980,11 +8969,6 @@ static void ggml_compute_forward_dup_f32( const int ith = params->ith; // thread index const int nth = params->nth; // number of threads - if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) { - ggml_compute_forward_dup_same_cont(params, dst); - return; - } - // parallelize by rows const int nr = ne01; // number of rows per thread @@ -9294,13 +9278,13 @@ static void ggml_compute_forward_dup_bytes( GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0)); GGML_ASSERT(src0->type == dst->type); + GGML_TENSOR_UNARY_OP_LOCALS; + if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst)) { ggml_compute_forward_dup_same_cont(params, dst); return; } - GGML_TENSOR_UNARY_OP_LOCALS; - const size_t type_size = ggml_type_size(src0->type); const int ith = params->ith; // thread index const int nth = params->nth; // number of threads diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index bd65e8cb3..c3536054a 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -2322,6 +2322,15 @@ static bool test_backend(ggml_backend_t backend, test_mode mode, const char * op } test_cases.emplace_back(new test_cont()); + test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 1 ,1})); + test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 3 ,5})); + test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 3, 5 ,7})); + test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 1 ,1})); + test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 3 ,5})); + test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 3, 5 ,7})); + test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 1 ,1})); + test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 3 ,5})); + test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 3, 5 ,7})); auto add_test_bin_bcast = [&](ggml_type type, std::array ne, std::array nr) { for (auto op : {ggml_add, ggml_mul, ggml_div}) {