metal : remove unnecessary copies

This commit is contained in:
Georgi Gerganov 2023-06-05 23:23:00 +03:00
parent c38b0bbf82
commit e129f0bd76
No known key found for this signature in database
GPG key ID: 449E073F9DC10735

View file

@ -53,7 +53,6 @@ enum e_model {
MODEL_65B, MODEL_65B,
}; };
static const size_t MB = 1024*1024; static const size_t MB = 1024*1024;
// computed for n_ctx == 2048 // computed for n_ctx == 2048
@ -1261,12 +1260,6 @@ static bool llama_eval_internal(
ggml_set_name(embd, "embd"); ggml_set_name(embd, "embd");
memcpy(embd->data, tokens, N*ggml_element_size(embd)); memcpy(embd->data, tokens, N*ggml_element_size(embd));
#ifdef GGML_USE_METAL
if (lctx.ctx_metal && N == 1) {
ggml_metal_set_tensor(lctx.ctx_metal, embd);
}
#endif
struct ggml_tensor * cur; struct ggml_tensor * cur;
struct ggml_tensor * inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd); struct ggml_tensor * inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd);
@ -1456,12 +1449,6 @@ static bool llama_eval_internal(
// But for now, we have focused only on Matrix x Vector Metal multiplication. // But for now, we have focused only on Matrix x Vector Metal multiplication.
// //
ggml_graph_compute(ctx0, &gf); ggml_graph_compute(ctx0, &gf);
if (lctx.ctx_metal) {
// We need to sync the CPU KV cache with the GPU KV cache
ggml_metal_set_tensor(lctx.ctx_metal, kv_self.k);
ggml_metal_set_tensor(lctx.ctx_metal, kv_self.v);
}
} }
#else #else
ggml_graph_compute(ctx0, &gf); ggml_graph_compute(ctx0, &gf);