fixed OP_OUT_PROD and OP_NONE

2023-06-19 01:05:34 +08:00 · 2023-06-19 01:05:34 +08:00 · 6609c229e8
commit 6609c229e8
parent 98728632c6
1 changed files with 16 additions and 7 deletions
--- a/ggml.c
+++ b/ggml.c
@ -15655,8 +15655,7 @@ int ggml_get_task_profiles(
        p[0].stages[1].valid = true;
        p[0].stages[1].parallel = true;
    } break;
-    case GGML_OP_MUL_MAT:
-    case GGML_OP_OUT_PROD: { // FIXME: is this correct?
+    case GGML_OP_MUL_MAT: {
        enum ggml_type src0_t = tensor->src0->type;
        if (src0_t == GGML_TYPE_F32) {
            p[0].stages[1].valid = true;
@ -15673,6 +15672,15 @@ int ggml_get_task_profiles(
            GGML_ASSERT(false);
        }
    } break;
+    case GGML_OP_OUT_PROD: {
+        enum ggml_type src0_t = tensor->src0->type;
+        if (src0_t == GGML_TYPE_F32) {
+            p[0].stages[1].valid = true;
+            p[0].stages[1].parallel = true;
+        } else {
+            GGML_ASSERT(false);
+        }
+    } break;
    case GGML_OP_SCALE: {
        p[0].stages[1].valid = true;
        p[0].stages[1].parallel = true;
@ -15810,13 +15818,12 @@ static void ggml_optimize_tensor_task_profile(
    struct ggml_tensor *tensor, struct ggml_task_profile *profiles,
    int n_profiles, struct ggml_mulmat_tune *tune) {

-    if (tensor->op != GGML_OP_MUL_MAT && tensor->op != GGML_OP_OUT_PROD) {
+    if (tensor->op != GGML_OP_MUL_MAT) {
        return;
    }

    GGML_ASSERT(tensor);
-    GGML_ASSERT(tensor->op == GGML_OP_MUL_MAT ||
-                tensor->op == GGML_OP_OUT_PROD);
+    GGML_ASSERT(tensor->op == GGML_OP_MUL_MAT);
    GGML_ASSERT(tensor->task_profile.id == n_profiles);

    GGML_ASSERT(profiles);
@ -15949,7 +15956,9 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
        for (int i = 0; i < cgraph->n_nodes; i++) {
            struct ggml_tensor * node = cgraph->nodes[i];

-            GGML_ASSERT (node->op != GGML_OP_NONE);
+            if (node->op == GGML_OP_NONE) {
+                continue;
+            }

            if (node->task_profile.id == 0) {
                ggml_set_tensor_task_profile(node, cgraph->tune);
@ -16031,7 +16040,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
                    {
                    } break;
                case GGML_OP_MUL_MAT:
-                case GGML_OP_OUT_PROD: // FIXME: is this correct?
+                case GGML_OP_OUT_PROD:
                    {
                        size_t cur = 0;
                        GGML_ASSERT(node->src1->type == GGML_TYPE_F32);