sync : ggml (conv ops + cuda MSVC fixes) (#3765)

ggml-ci
2023-10-24 21:51:20 +03:00 · 2023-10-24 21:51:20 +03:00 · b2f7e04bd3
commit b2f7e04bd3
parent abd21fc99f
3 changed files with 369 additions and 94 deletions
--- a/ggml.h
+++ b/ggml.h
@ -401,15 +401,16 @@ extern "C" {
        GGML_OP_ALIBI,
        GGML_OP_CLAMP,
        GGML_OP_CONV_1D,
-        GGML_OP_CONV_2D,
+        GGML_OP_CONV_1D_STAGE_0,  // internal
+        GGML_OP_CONV_1D_STAGE_1,  // internal
        GGML_OP_CONV_TRANSPOSE_1D,
+        GGML_OP_CONV_2D,
+        GGML_OP_CONV_2D_STAGE_0, // internal
+        GGML_OP_CONV_2D_STAGE_1, // internal
        GGML_OP_CONV_TRANSPOSE_2D,
        GGML_OP_POOL_1D,
        GGML_OP_POOL_2D,

-        GGML_OP_CONV_1D_STAGE_0,  // internal
-        GGML_OP_CONV_1D_STAGE_1,  // internal
-
        GGML_OP_UPSCALE, // nearest interpolate

        GGML_OP_FLASH_ATTN,
@ -1020,9 +1021,9 @@ extern "C" {
            struct ggml_tensor  * b,
            float                 eps);

-    // A: n columns, m rows
-    // B: n columns, p rows  (i.e. we transpose it internally)
-    // result is m columns, p rows
+    // A: k columns, n rows => [ne03, ne02, n, k]
+    // B: k columns, m rows  (i.e. we transpose it internally) => [ne03 * x, ne02 * y, m, k]
+    // result is n columns, m rows => [ne03 * x, ne02 * y, m, n]
    GGML_API struct ggml_tensor * ggml_mul_mat(
            struct ggml_context * ctx,
            struct ggml_tensor  * a,