ggml_cuda_compute_forward

2023-05-24 12:55:50 +02:00 · 2023-05-24 12:55:50 +02:00 · 971920e935
commit 971920e935
parent 071dcd351b
4 changed files with 65 additions and 52 deletions
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@ -862,6 +862,10 @@ static void ggml_cuda_op(const ggml_tensor * src0, const ggml_tensor * src1, ggm
    }
 }
 bool ggml_cuda_can_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
    return src1->backend == GGML_BACKEND_CUDA;
 }
 void ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
    GGML_ASSERT(src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32);
    ggml_cuda_op<GGML_CUDA_OP_TYPE_FFF, ggml_cuda_op_mul>(src0, src1, dst);
@ -968,3 +972,34 @@ void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensor, const
    free(buf_host);
    fclose(fp);
 }
 bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor){
    switch (tensor->op) {
        case GGML_OP_MUL:
            if (!ggml_cuda_can_mul(tensor->src0, tensor->src1, tensor)) {
                return false;
            }
            if (params->ith != 0) {
                return true;
            }
            if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
                return true;
            }
            ggml_cuda_mul(tensor->src0, tensor->src1, tensor);
            return true;
        case GGML_OP_MUL_MAT:
            if (!ggml_cuda_can_mul_mat(tensor->src0, tensor->src1, tensor)) {
                return false;
            }
            if (params->ith != 0) {
                return true;
            }
            if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
                return true;
            }
            ggml_cuda_mul_mat(tensor->src0, tensor->src1, tensor, params->wdata, params->wsize);
            return true;
        default:
            return false;
    }
 }
--- a/ggml-cuda.h
+++ b/ggml-cuda.h
@ -16,6 +16,7 @@ void * ggml_cuda_host_malloc(size_t size);
 void   ggml_cuda_host_free(void * ptr);
 void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensors, size_t offset);
 bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
 #ifdef  __cplusplus
 }
--- a/ggml.c
+++ b/ggml.c
@ -3647,26 +3647,6 @@ struct ggml_context_container {
    struct ggml_context context;
 };
 //
 // compute types
 //
 enum ggml_task_type {
    GGML_TASK_INIT = 0,
    GGML_TASK_COMPUTE,
    GGML_TASK_FINALIZE,
 };
 struct ggml_compute_params {
    enum ggml_task_type type;
    int ith, nth;
    // work buffer for all threads
    size_t wsize;
    void * wdata;
 };
 //
 // ggml state
 //
@ -8166,14 +8146,7 @@ static void ggml_compute_forward_mul_f32(
    const int ith = params->ith;
    const int nth = params->nth;
-#ifdef GGML_USE_CUBLAS
+#ifdef GGML_USE_CLBLAST
    if (src1->backend == GGML_BACKEND_CUDA) {
        if (ith == 0) {
            ggml_cuda_mul(src0, src1, dst);
        }
        return;
    }
 #elif defined(GGML_USE_CLBLAST)
    if (src1->backend == GGML_BACKEND_CL) {
        if (ith == 0) {
            ggml_cl_mul(src0, src1, dst);
@ -9614,14 +9587,7 @@ static void ggml_compute_forward_mul_mat_f32(
    // nb01 >= nb00 - src0 is not transposed
    //   compute by src0 rows
-#if defined(GGML_USE_CUBLAS)
+#if defined(GGML_USE_CLBLAST)
    if (ggml_cuda_can_mul_mat(src0, src1, dst)) {
        if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
            ggml_cuda_mul_mat(src0, src1, dst, params->wdata, params->wsize);
        }
        return;
    }
 #elif defined(GGML_USE_CLBLAST)
    if (ggml_cl_can_mul_mat(src0, src1, dst)) {
        if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
            ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
@ -9786,14 +9752,7 @@ static void ggml_compute_forward_mul_mat_f16_f32(
    // nb01 >= nb00 - src0 is not transposed
    //   compute by src0 rows
-#if defined(GGML_USE_CUBLAS)
+#if defined(GGML_USE_CLBLAST)
    if (ggml_cuda_can_mul_mat(src0, src1, dst)) {
        if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
            ggml_cuda_mul_mat(src0, src1, dst, params->wdata, params->wsize);
        }
        return;
    }
 #elif defined(GGML_USE_CLBLAST)
    if (ggml_cl_can_mul_mat(src0, src1, dst)) {
        if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
            ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
@ -9998,14 +9957,7 @@ static void ggml_compute_forward_mul_mat_q_f32(
    // nb01 >= nb00 - src0 is not transposed
    //   compute by src0 rows
-#if defined(GGML_USE_CUBLAS)
+#if defined(GGML_USE_CLBLAST)
    if (ggml_cuda_can_mul_mat(src0, src1, dst)) {
        if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
            ggml_cuda_mul_mat(src0, src1, dst, params->wdata, params->wsize);
        }
        return;
    }
 #elif defined(GGML_USE_CLBLAST)
    if (ggml_cl_can_mul_mat(src0, src1, dst)) {
        if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
            ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
@ -12931,6 +12883,13 @@ static void ggml_compute_forward_map_binary(
 static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
    GGML_ASSERT(params);
 #ifdef GGML_USE_CUBLAS
    bool used_cuda = ggml_cuda_compute_forward(params, tensor);
    if (used_cuda) {
        return;
    }
 #endif // GGML_USE_CUBLAS
    switch (tensor->op) {
        case GGML_OP_DUP:
            {
--- a/ggml.h
+++ b/ggml.h
@ -413,6 +413,24 @@ extern "C" {
        bool   no_alloc;   // don't allocate memory for the tensor data
    };
    // compute types
    enum ggml_task_type {
        GGML_TASK_INIT = 0,
        GGML_TASK_COMPUTE,
        GGML_TASK_FINALIZE,
    };
    struct ggml_compute_params {
        enum ggml_task_type type;
        int ith, nth;
        // work buffer for all threads
        size_t wsize;
        void * wdata;
    };
    // misc
    GGML_API void    ggml_time_init(void); // call this once at the beginning of the program