diff --git a/ggml-cuda.cu b/ggml-cuda.cu index dc3237a45..e581eb42a 100644 --- a/ggml-cuda.cu +++ b/ggml-cuda.cu @@ -4,7 +4,6 @@ #include #include #include -#include #include #include #include @@ -8169,11 +8168,6 @@ static void ggml_cuda_op_mul_mat( dev[id].src1_ddf = dev[id].src1_ddf_alloc.alloc(is0, ggml_nelements(src1)); } - // if (strcmp(dst->name, "Qcur-0") == 0) { - // fprintf(stderr, "device synchronize for %s\n", dst->name); - // CUDA_CHECK(cudaDeviceSynchronize()); - // } - if (convert_src1_to_q8_1) { dev[id].src1_ddq = dev[id].src1_ddq_alloc.alloc(is0, nrows1*src1_padded_col_size*q8_1_ts/q8_1_bs); @@ -8216,7 +8210,7 @@ static void ggml_cuda_op_mul_mat( cudaStream_t stream = g_cudaStreams[id][is]; // wait for main GPU data if necessary - if (split && (id != g_main_device || is != is0)) { // TODO is this correct? + if (split && (id != g_main_device || is != is0)) { CUDA_CHECK(cudaStreamWaitEvent(stream, src0_extra->events[g_main_device][is0], 0)); } @@ -8322,7 +8316,7 @@ static void ggml_cuda_op_mul_mat( } // add event for the main device to wait on until other device is done - if (split && (id != g_main_device || is != is0)) { // TODO is this correct? + if (split && (id != g_main_device || is != is0)) { CUDA_CHECK(cudaEventRecord(src0_extra->events[id][is], stream)); } } @@ -9102,7 +9096,6 @@ static void ggml_cuda_clamp(const ggml_tensor * src0, const ggml_tensor * src1, } static void ggml_cuda_cpy(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { - // CUDA_CHECK(cudaDeviceSynchronize()); const int64_t ne = ggml_nelements(src0); GGML_ASSERT(ne == ggml_nelements(src1)); @@ -9166,7 +9159,6 @@ static void ggml_cuda_cpy(const ggml_tensor * src0, const ggml_tensor * src1, gg } (void) dst; - // CUDA_CHECK(cudaDeviceSynchronize()); } static void ggml_cuda_dup(const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) {