llama : fix kv_cache n
init (close #1903)
This commit is contained in:
parent
86c7571864
commit
051e1b0e6a
3 changed files with 4 additions and 0 deletions
|
@ -886,6 +886,7 @@ static bool kv_cache_init(
|
|||
const int64_t n_elements = n_embd*n_mem;
|
||||
|
||||
cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*MB);
|
||||
cache.n = 0;
|
||||
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = cache.buf.size;
|
||||
|
@ -904,6 +905,7 @@ static bool kv_cache_init(
|
|||
ggml_set_name(cache.k, "cache_k");
|
||||
ggml_set_name(cache.v, "cache_v");
|
||||
|
||||
(void) n_gpu_layers;
|
||||
#ifdef GGML_USE_CUBLAS
|
||||
if (n_gpu_layers > n_layer + 1) {
|
||||
ggml_cuda_assign_buffers_no_scratch(cache.v);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue