sync : ggml (#5452)

* ggml-alloc : v3 (ggml/727) * ggml-alloc v3 ggml-ci * fix ci ggml-ci * whisper : check for backend buffer allocation failures * whisper : avoid leaks when initialization fails * cleanup ggml-ci * style fixes ggml-ci * sync : ggml * update llama.cpp, clip.cpp, export-lora.cpp * update finetune.cpp, train-text-from-scratch.cpp ggml-ci * ggml-backend : reduce alignment to 32 to match gguf and fix mmap --------- Co-authored-by: slaren <slarengh@gmail.com>
2024-02-12 09:16:06 +02:00 · 2024-02-12 09:16:06 +02:00 · 3b169441df
commit 3b169441df
parent 3bdc4cd0f5
12 changed files with 1287 additions and 1362 deletions
--- a/ggml.c
+++ b/ggml.c
@ -2649,7 +2649,7 @@ static struct ggml_tensor * ggml_new_tensor_impl(
        /*.nb           =*/ { 0, 0, 0, 0 },
        /*.op           =*/ GGML_OP_NONE,
        /*.op_params    =*/ { 0 },
-        /*.is_param     =*/ false,
+        /*.flags        =*/ 0,
        /*.grad         =*/ NULL,
        /*.src          =*/ { NULL },
        /*.perf_runs    =*/ 0,
@ -6551,7 +6551,7 @@ struct ggml_tensor * ggml_cross_entropy_loss_back(
 void ggml_set_param(
        struct ggml_context * ctx,
        struct ggml_tensor * tensor) {
-    tensor->is_param = true;
+    tensor->flags |= GGML_TENSOR_FLAG_PARAM;

    GGML_ASSERT(tensor->grad == NULL);
    tensor->grad = ggml_dup_tensor(ctx, tensor);
@ -15367,7 +15367,7 @@ static struct ggml_tensor * ggml_recompute_graph_node(
        return NULL;
    }

-    if (node->is_param) {
+    if (node->flags & GGML_TENSOR_FLAG_PARAM) {
        return node;
    }

@ -15401,7 +15401,7 @@ static struct ggml_tensor * ggml_recompute_graph_node(

    clone->op       = node->op;
    clone->grad     = node->grad;
-    clone->is_param = node->is_param;
+    clone->flags    = node->flags;
    clone->extra    = node->extra;
    for (int k = 0; k < GGML_MAX_DIMS; ++k) {
        clone->nb[k] = node->nb[k];
@ -16433,7 +16433,7 @@ void ggml_build_backward_expand(struct ggml_context * ctx, struct ggml_cgraph *
    for (int i = 0; i < gf->n_nodes; i++) {
        struct ggml_tensor * node = gf->nodes[i];

-        if (node->is_param) {
+        if (node->flags & GGML_TENSOR_FLAG_PARAM) {
            GGML_PRINT_DEBUG("%s: found root node %p\n", __func__, (void *) node);
            ggml_build_forward_expand(gb, node->grad);
        }
@ -17918,7 +17918,7 @@ void ggml_graph_print(const struct ggml_cgraph * cgraph) {
        GGML_PRINT(" - %3d: [ %5" PRId64 ", %5" PRId64 ", %5" PRId64 "] %16s %s (%3d) cpu = %7.3f / %7.3f ms, wall = %7.3f / %7.3f ms\n",
                i,
                node->ne[0], node->ne[1], node->ne[2],
-                ggml_op_name(node->op), node->is_param ? "x" : node->grad ? "g" : " ", node->perf_runs,
+                ggml_op_name(node->op), (node->flags & GGML_TENSOR_FLAG_PARAM) ? "x" : node->grad ? "g" : " ", node->perf_runs,
                (double) node->perf_cycles  / (double) ggml_cycles_per_ms(),
                (double) node->perf_cycles  / (double) ggml_cycles_per_ms() / (double) node->perf_runs,
                (double) node->perf_time_us / 1000.0,
@ -18011,7 +18011,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
            continue;
        }

-        if (node->is_param) {
+        if (node->flags & GGML_TENSOR_FLAG_PARAM) {
            snprintf(color, sizeof(color), "yellow");
        } else if (node->grad) {
            if (ggml_graph_find(gf, node)) {
@ -18185,7 +18185,7 @@ static enum ggml_opt_result ggml_opt_adam(
    int np = 0;
    int64_t nx = 0;
    for (int i = 0; i < gf->n_nodes; ++i) {
-        if (gf->nodes[i]->is_param) {
+        if (gf->nodes[i]->flags & GGML_TENSOR_FLAG_PARAM) {
            GGML_PRINT_DEBUG("found param %d: grad->op = %d\n", np, gf->nodes[i]->grad->op);

            GGML_ASSERT(np < GGML_MAX_PARAMS);
@ -18548,7 +18548,7 @@ static enum ggml_opt_result ggml_opt_lbfgs(
    int np = 0;
    int nx = 0;
    for (int i = 0; i < gf->n_nodes; ++i) {
-        if (gf->nodes[i]->is_param) {
+        if (gf->nodes[i]->flags & GGML_TENSOR_FLAG_PARAM) {
            GGML_PRINT_DEBUG("found param %d: grad->op = %d\n", np, gf->nodes[i]->grad->op);

            GGML_ASSERT(np < GGML_MAX_PARAMS);
@ -19023,6 +19023,16 @@ enum ggml_opt_result ggml_opt_resume_g(

 ////////////////////////////////////////////////////////////////////////////////

+void ggml_set_input(struct ggml_tensor * tensor) {
+    tensor->flags |= GGML_TENSOR_FLAG_INPUT;
+}
+
+void ggml_set_output(struct ggml_tensor * tensor) {
+    tensor->flags |= GGML_TENSOR_FLAG_OUTPUT;
+}
+
+////////////////////////////////////////////////////////////////////////////////
+
 void ggml_quantize_init(enum ggml_type type) {
    ggml_critical_section_start();