ggml_cuda_compute_forward

This commit is contained in:
JohannesGaessler 2023-05-24 12:55:50 +02:00
parent 071dcd351b
commit 971920e935
4 changed files with 65 additions and 52 deletions

View file

@ -862,6 +862,10 @@ static void ggml_cuda_op(const ggml_tensor * src0, const ggml_tensor * src1, ggm
} }
} }
bool ggml_cuda_can_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
return src1->backend == GGML_BACKEND_CUDA;
}
void ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) { void ggml_cuda_mul(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst) {
GGML_ASSERT(src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32); GGML_ASSERT(src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 && dst->type == GGML_TYPE_F32);
ggml_cuda_op<GGML_CUDA_OP_TYPE_FFF, ggml_cuda_op_mul>(src0, src1, dst); ggml_cuda_op<GGML_CUDA_OP_TYPE_FFF, ggml_cuda_op_mul>(src0, src1, dst);
@ -968,3 +972,34 @@ void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensor, const
free(buf_host); free(buf_host);
fclose(fp); fclose(fp);
} }
bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor){
switch (tensor->op) {
case GGML_OP_MUL:
if (!ggml_cuda_can_mul(tensor->src0, tensor->src1, tensor)) {
return false;
}
if (params->ith != 0) {
return true;
}
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return true;
}
ggml_cuda_mul(tensor->src0, tensor->src1, tensor);
return true;
case GGML_OP_MUL_MAT:
if (!ggml_cuda_can_mul_mat(tensor->src0, tensor->src1, tensor)) {
return false;
}
if (params->ith != 0) {
return true;
}
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return true;
}
ggml_cuda_mul_mat(tensor->src0, tensor->src1, tensor, params->wdata, params->wsize);
return true;
default:
return false;
}
}

View file

@ -16,6 +16,7 @@ void * ggml_cuda_host_malloc(size_t size);
void ggml_cuda_host_free(void * ptr); void ggml_cuda_host_free(void * ptr);
void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensors, size_t offset); void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensors, size_t offset);
bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor);
#ifdef __cplusplus #ifdef __cplusplus
} }

63
ggml.c
View file

@ -3647,26 +3647,6 @@ struct ggml_context_container {
struct ggml_context context; struct ggml_context context;
}; };
//
// compute types
//
enum ggml_task_type {
GGML_TASK_INIT = 0,
GGML_TASK_COMPUTE,
GGML_TASK_FINALIZE,
};
struct ggml_compute_params {
enum ggml_task_type type;
int ith, nth;
// work buffer for all threads
size_t wsize;
void * wdata;
};
// //
// ggml state // ggml state
// //
@ -8166,14 +8146,7 @@ static void ggml_compute_forward_mul_f32(
const int ith = params->ith; const int ith = params->ith;
const int nth = params->nth; const int nth = params->nth;
#ifdef GGML_USE_CUBLAS #ifdef GGML_USE_CLBLAST
if (src1->backend == GGML_BACKEND_CUDA) {
if (ith == 0) {
ggml_cuda_mul(src0, src1, dst);
}
return;
}
#elif defined(GGML_USE_CLBLAST)
if (src1->backend == GGML_BACKEND_CL) { if (src1->backend == GGML_BACKEND_CL) {
if (ith == 0) { if (ith == 0) {
ggml_cl_mul(src0, src1, dst); ggml_cl_mul(src0, src1, dst);
@ -9614,14 +9587,7 @@ static void ggml_compute_forward_mul_mat_f32(
// nb01 >= nb00 - src0 is not transposed // nb01 >= nb00 - src0 is not transposed
// compute by src0 rows // compute by src0 rows
#if defined(GGML_USE_CUBLAS) #if defined(GGML_USE_CLBLAST)
if (ggml_cuda_can_mul_mat(src0, src1, dst)) {
if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
ggml_cuda_mul_mat(src0, src1, dst, params->wdata, params->wsize);
}
return;
}
#elif defined(GGML_USE_CLBLAST)
if (ggml_cl_can_mul_mat(src0, src1, dst)) { if (ggml_cl_can_mul_mat(src0, src1, dst)) {
if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) { if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize); ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
@ -9786,14 +9752,7 @@ static void ggml_compute_forward_mul_mat_f16_f32(
// nb01 >= nb00 - src0 is not transposed // nb01 >= nb00 - src0 is not transposed
// compute by src0 rows // compute by src0 rows
#if defined(GGML_USE_CUBLAS) #if defined(GGML_USE_CLBLAST)
if (ggml_cuda_can_mul_mat(src0, src1, dst)) {
if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
ggml_cuda_mul_mat(src0, src1, dst, params->wdata, params->wsize);
}
return;
}
#elif defined(GGML_USE_CLBLAST)
if (ggml_cl_can_mul_mat(src0, src1, dst)) { if (ggml_cl_can_mul_mat(src0, src1, dst)) {
if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) { if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize); ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
@ -9998,14 +9957,7 @@ static void ggml_compute_forward_mul_mat_q_f32(
// nb01 >= nb00 - src0 is not transposed // nb01 >= nb00 - src0 is not transposed
// compute by src0 rows // compute by src0 rows
#if defined(GGML_USE_CUBLAS) #if defined(GGML_USE_CLBLAST)
if (ggml_cuda_can_mul_mat(src0, src1, dst)) {
if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
ggml_cuda_mul_mat(src0, src1, dst, params->wdata, params->wsize);
}
return;
}
#elif defined(GGML_USE_CLBLAST)
if (ggml_cl_can_mul_mat(src0, src1, dst)) { if (ggml_cl_can_mul_mat(src0, src1, dst)) {
if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) { if (params->ith == 0 && params->type == GGML_TASK_COMPUTE) {
ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize); ggml_cl_mul_mat(src0, src1, dst, params->wdata, params->wsize);
@ -12931,6 +12883,13 @@ static void ggml_compute_forward_map_binary(
static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) { static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
GGML_ASSERT(params); GGML_ASSERT(params);
#ifdef GGML_USE_CUBLAS
bool used_cuda = ggml_cuda_compute_forward(params, tensor);
if (used_cuda) {
return;
}
#endif // GGML_USE_CUBLAS
switch (tensor->op) { switch (tensor->op) {
case GGML_OP_DUP: case GGML_OP_DUP:
{ {

18
ggml.h
View file

@ -413,6 +413,24 @@ extern "C" {
bool no_alloc; // don't allocate memory for the tensor data bool no_alloc; // don't allocate memory for the tensor data
}; };
// compute types
enum ggml_task_type {
GGML_TASK_INIT = 0,
GGML_TASK_COMPUTE,
GGML_TASK_FINALIZE,
};
struct ggml_compute_params {
enum ggml_task_type type;
int ith, nth;
// work buffer for all threads
size_t wsize;
void * wdata;
};
// misc // misc
GGML_API void ggml_time_init(void); // call this once at the beginning of the program GGML_API void ggml_time_init(void); // call this once at the beginning of the program