metal : bug-fix when enable ggml-alloc (#2757)
* metal: better memory alloc w/ concurrency dispatch The ggml-alloc should only free tensors at memory barriers. * ggml-alloc: avoid return silently In certain cases, the allocate_node() function may silently return without performing any memory allocation.
This commit is contained in:
parent
8f8c28e89c
commit
38b16dfca6
2 changed files with 77 additions and 69 deletions
|
@ -2707,11 +2707,6 @@ static struct ggml_cgraph * llm_build_falcon(
|
|||
struct ggml_tensor * inpFF = attn_norm;
|
||||
|
||||
cur = ggml_mul_mat(ctx0, model.layers[il].w3, inpFF);
|
||||
|
||||
// TODO: this is temporary needed to introduce artificial dependency between FF and ATTN
|
||||
// adding this, because there seems to be a bug in the Metal concurrency optimization
|
||||
// without this line, the results are non-deterministic and wrong
|
||||
cur->src[2] = attn_out;
|
||||
offload_func(cur);
|
||||
|
||||
cur = ggml_gelu(ctx0, cur);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue