ggml-alloc: avoid return silently
In certain cases, the allocate_node() function may silently return without performing any memory allocation.
This commit is contained in:
parent
ee8b2aa75d
commit
0c268a83e8
2 changed files with 2 additions and 7 deletions
|
@ -441,11 +441,11 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node)
|
||||||
else {
|
else {
|
||||||
AT_PRINTF("reusing parent %s for %s\n", parent->name, node->name);
|
AT_PRINTF("reusing parent %s for %s\n", parent->name, node->name);
|
||||||
node->data = parent->data;
|
node->data = parent->data;
|
||||||
}
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
ggml_allocr_alloc(alloc, node);
|
ggml_allocr_alloc(alloc, node);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2704,11 +2704,6 @@ static struct ggml_cgraph * llm_build_falcon(
|
||||||
struct ggml_tensor * inpFF = attn_norm;
|
struct ggml_tensor * inpFF = attn_norm;
|
||||||
|
|
||||||
cur = ggml_mul_mat(ctx0, model.layers[il].w3, inpFF);
|
cur = ggml_mul_mat(ctx0, model.layers[il].w3, inpFF);
|
||||||
|
|
||||||
// TODO: this is temporary needed to introduce artificial dependency between FF and ATTN
|
|
||||||
// adding this, because there seems to be a bug in the Metal concurrency optimization
|
|
||||||
// without this line, the results are non-deterministic and wrong
|
|
||||||
cur->src[2] = attn_out;
|
|
||||||
offload_func(cur);
|
offload_func(cur);
|
||||||
|
|
||||||
cur = ggml_gelu(ctx0, cur);
|
cur = ggml_gelu(ctx0, cur);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue