metal : enable ggml-alloc (#2627)
* metal: enable ggml-alloc Make ggml-alloc work with concurrently dispatch. * style-fix Co-authored-by: slaren <slarengh@gmail.com> --------- Co-authored-by: slaren <slarengh@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
bf83bff674
commit
fc8ef549e5
5 changed files with 61 additions and 26 deletions
|
@ -63,10 +63,13 @@ void ggml_metal_get_tensor(struct ggml_metal_context * ctx, struct ggml_tensor *
|
|||
|
||||
// try to find operations that can be run concurrently in the graph
|
||||
// you should run it again if the topology of your graph changes
|
||||
void ggml_metal_graph_find_concurrency(struct ggml_metal_context * ctx, struct ggml_cgraph * gf);
|
||||
void ggml_metal_graph_find_concurrency(struct ggml_metal_context * ctx, struct ggml_cgraph * gf, bool check_mem);
|
||||
|
||||
// if the graph has been optimized for concurrently dispatch
|
||||
bool ggml_metal_if_optimized(struct ggml_metal_context * ctx);
|
||||
// if the graph has been optimized for concurrently dispatch, return length of the concur_list if optimized
|
||||
int ggml_metal_if_optimized(struct ggml_metal_context * ctx);
|
||||
|
||||
// output the concur_list for ggml_alloc
|
||||
int * ggml_metal_get_concur_list(struct ggml_metal_context * ctx);
|
||||
|
||||
// same as ggml_graph_compute but uses Metal
|
||||
// creates gf->n_threads command buffers in parallel
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue