metal : remove unnecessary copies

2023-06-05 23:23:00 +03:00 · 2023-06-05 23:23:00 +03:00 · e129f0bd76
commit e129f0bd76
parent c38b0bbf82
1 changed files with 0 additions and 13 deletions
--- a/llama.cpp
+++ b/llama.cpp
@ -53,7 +53,6 @@ enum e_model {
    MODEL_65B,
 };
 static const size_t MB = 1024*1024;
 // computed for n_ctx == 2048
@ -1261,12 +1260,6 @@ static bool llama_eval_internal(
    ggml_set_name(embd, "embd");
    memcpy(embd->data, tokens, N*ggml_element_size(embd));
 #ifdef GGML_USE_METAL
    if (lctx.ctx_metal && N == 1) {
        ggml_metal_set_tensor(lctx.ctx_metal, embd);
    }
 #endif
    struct ggml_tensor * cur;
    struct ggml_tensor * inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd);
@ -1456,12 +1449,6 @@ static bool llama_eval_internal(
        // But for now, we have focused only on Matrix x Vector Metal multiplication.
        //
        ggml_graph_compute(ctx0, &gf);
        if (lctx.ctx_metal) {
            // We need to sync the CPU KV cache with the GPU KV cache
            ggml_metal_set_tensor(lctx.ctx_metal, kv_self.k);
            ggml_metal_set_tensor(lctx.ctx_metal, kv_self.v);
        }
    }
 #else
    ggml_graph_compute(ctx0, &gf);