fixed OP_OUT_PROD and OP_NONE

This commit is contained in:
mqy 2023-06-19 01:05:34 +08:00
parent 98728632c6
commit 6609c229e8

23
ggml.c
View file

@ -15655,8 +15655,7 @@ int ggml_get_task_profiles(
p[0].stages[1].valid = true; p[0].stages[1].valid = true;
p[0].stages[1].parallel = true; p[0].stages[1].parallel = true;
} break; } break;
case GGML_OP_MUL_MAT: case GGML_OP_MUL_MAT: {
case GGML_OP_OUT_PROD: { // FIXME: is this correct?
enum ggml_type src0_t = tensor->src0->type; enum ggml_type src0_t = tensor->src0->type;
if (src0_t == GGML_TYPE_F32) { if (src0_t == GGML_TYPE_F32) {
p[0].stages[1].valid = true; p[0].stages[1].valid = true;
@ -15673,6 +15672,15 @@ int ggml_get_task_profiles(
GGML_ASSERT(false); GGML_ASSERT(false);
} }
} break; } break;
case GGML_OP_OUT_PROD: {
enum ggml_type src0_t = tensor->src0->type;
if (src0_t == GGML_TYPE_F32) {
p[0].stages[1].valid = true;
p[0].stages[1].parallel = true;
} else {
GGML_ASSERT(false);
}
} break;
case GGML_OP_SCALE: { case GGML_OP_SCALE: {
p[0].stages[1].valid = true; p[0].stages[1].valid = true;
p[0].stages[1].parallel = true; p[0].stages[1].parallel = true;
@ -15810,13 +15818,12 @@ static void ggml_optimize_tensor_task_profile(
struct ggml_tensor *tensor, struct ggml_task_profile *profiles, struct ggml_tensor *tensor, struct ggml_task_profile *profiles,
int n_profiles, struct ggml_mulmat_tune *tune) { int n_profiles, struct ggml_mulmat_tune *tune) {
if (tensor->op != GGML_OP_MUL_MAT && tensor->op != GGML_OP_OUT_PROD) { if (tensor->op != GGML_OP_MUL_MAT) {
return; return;
} }
GGML_ASSERT(tensor); GGML_ASSERT(tensor);
GGML_ASSERT(tensor->op == GGML_OP_MUL_MAT || GGML_ASSERT(tensor->op == GGML_OP_MUL_MAT);
tensor->op == GGML_OP_OUT_PROD);
GGML_ASSERT(tensor->task_profile.id == n_profiles); GGML_ASSERT(tensor->task_profile.id == n_profiles);
GGML_ASSERT(profiles); GGML_ASSERT(profiles);
@ -15949,7 +15956,9 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
for (int i = 0; i < cgraph->n_nodes; i++) { for (int i = 0; i < cgraph->n_nodes; i++) {
struct ggml_tensor * node = cgraph->nodes[i]; struct ggml_tensor * node = cgraph->nodes[i];
GGML_ASSERT (node->op != GGML_OP_NONE); if (node->op == GGML_OP_NONE) {
continue;
}
if (node->task_profile.id == 0) { if (node->task_profile.id == 0) {
ggml_set_tensor_task_profile(node, cgraph->tune); ggml_set_tensor_task_profile(node, cgraph->tune);
@ -16031,7 +16040,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
{ {
} break; } break;
case GGML_OP_MUL_MAT: case GGML_OP_MUL_MAT:
case GGML_OP_OUT_PROD: // FIXME: is this correct? case GGML_OP_OUT_PROD:
{ {
size_t cur = 0; size_t cur = 0;
GGML_ASSERT(node->src1->type == GGML_TYPE_F32); GGML_ASSERT(node->src1->type == GGML_TYPE_F32);