metal : relax conditions on fast matrix multiplication kernel (#3168)

* metal : relax conditions on fast matrix multiplication kernel

* metal : revert the concurrnecy change because it was wrong

* llama : remove experimental stuff
This commit is contained in:
Georgi Gerganov 2023-09-15 11:09:24 +03:00 committed by GitHub
parent 76164fe2e6
commit a51b687657
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 100 additions and 51 deletions

View file

@ -3429,10 +3429,6 @@ static bool llama_eval_internal(
if (lctx.ctx_metal) {
ggml_metal_set_n_cb (lctx.ctx_metal, n_threads);
ggml_metal_graph_compute(lctx.ctx_metal, gf);
ggml_metal_get_tensor (lctx.ctx_metal, res);
if (!lctx.embedding.empty()) {
ggml_metal_get_tensor(lctx.ctx_metal, embeddings);
}
} else {
ggml_graph_compute_helper(lctx.work_buffer, gf, n_threads);
}