metal : remove unnecessary copies
This commit is contained in:
parent
c38b0bbf82
commit
e129f0bd76
1 changed files with 0 additions and 13 deletions
13
llama.cpp
13
llama.cpp
|
@ -53,7 +53,6 @@ enum e_model {
|
||||||
MODEL_65B,
|
MODEL_65B,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
static const size_t MB = 1024*1024;
|
static const size_t MB = 1024*1024;
|
||||||
|
|
||||||
// computed for n_ctx == 2048
|
// computed for n_ctx == 2048
|
||||||
|
@ -1261,12 +1260,6 @@ static bool llama_eval_internal(
|
||||||
ggml_set_name(embd, "embd");
|
ggml_set_name(embd, "embd");
|
||||||
memcpy(embd->data, tokens, N*ggml_element_size(embd));
|
memcpy(embd->data, tokens, N*ggml_element_size(embd));
|
||||||
|
|
||||||
#ifdef GGML_USE_METAL
|
|
||||||
if (lctx.ctx_metal && N == 1) {
|
|
||||||
ggml_metal_set_tensor(lctx.ctx_metal, embd);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
struct ggml_tensor * cur;
|
struct ggml_tensor * cur;
|
||||||
struct ggml_tensor * inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd);
|
struct ggml_tensor * inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd);
|
||||||
|
|
||||||
|
@ -1456,12 +1449,6 @@ static bool llama_eval_internal(
|
||||||
// But for now, we have focused only on Matrix x Vector Metal multiplication.
|
// But for now, we have focused only on Matrix x Vector Metal multiplication.
|
||||||
//
|
//
|
||||||
ggml_graph_compute(ctx0, &gf);
|
ggml_graph_compute(ctx0, &gf);
|
||||||
|
|
||||||
if (lctx.ctx_metal) {
|
|
||||||
// We need to sync the CPU KV cache with the GPU KV cache
|
|
||||||
ggml_metal_set_tensor(lctx.ctx_metal, kv_self.k);
|
|
||||||
ggml_metal_set_tensor(lctx.ctx_metal, kv_self.v);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
ggml_graph_compute(ctx0, &gf);
|
ggml_graph_compute(ctx0, &gf);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue