ggml-alloc: avoid return silently

In certain cases, the allocate_node() function may silently return without performing any memory allocation.
2023-08-24 01:34:57 -04:00 · 2023-08-24 01:34:57 -04:00 · 0c268a83e8
commit 0c268a83e8
parent ee8b2aa75d
2 changed files with 2 additions and 7 deletions
--- a/ggml-alloc.c
+++ b/ggml-alloc.c
@ -441,8 +441,8 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
                        else {
                            AT_PRINTF("reusing parent %s for %s\n", parent->name, node->name);
                            node->data = parent->data;
+                            return;
                        }
-                        return;
                    }
                }
            }
@ -528,7 +528,7 @@ static size_t ggml_allocator_alloc_graph_tensors_n(
                }
                AT_PRINTF("\n");
            }
-            
+

            // update parents
            // update immediately if there is no parse_seq
--- a/llama.cpp
+++ b/llama.cpp
@ -2704,11 +2704,6 @@ static struct ggml_cgraph * llm_build_falcon(
            struct ggml_tensor * inpFF = attn_norm;

            cur = ggml_mul_mat(ctx0, model.layers[il].w3, inpFF);
-
-            // TODO: this is temporary needed to introduce artificial dependency between FF and ATTN
-            //       adding this, because there seems to be a bug in the Metal concurrency optimization
-            //       without this line, the results are non-deterministic and wrong
-            cur->src[2] = attn_out;
            offload_func(cur);

            cur = ggml_gelu(ctx0, cur);