refact : fix convert script + zero out KV cache to avoid nans (#3523)
* refact : fix convert script + zero out KV cache to avoid nans * ggml : silu(-inf) should never happen * metal : assert various kernel requirements
This commit is contained in:
parent
dcc09d2596
commit
fcca0a7004
6 changed files with 51 additions and 91 deletions
|
@ -1325,7 +1325,11 @@ static bool llama_kv_cache_init(
|
|||
cache.cells.clear();
|
||||
cache.cells.resize(n_ctx);
|
||||
|
||||
// TODO: this should be:
|
||||
// cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*ggml_tensor_overhead());
|
||||
// change it and test that it works
|
||||
cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*MB);
|
||||
memset(cache.buf.data, 0, cache.buf.size);
|
||||
|
||||
struct ggml_init_params params;
|
||||
params.mem_size = cache.buf.size;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue