refact : fix convert script + zero out KV cache to avoid nans (#3523)

* refact : fix convert script + zero out KV cache to avoid nans

* ggml : silu(-inf) should never happen

* metal : assert various kernel requirements
This commit is contained in:
Georgi Gerganov 2023-10-09 14:32:17 +03:00 committed by GitHub
parent dcc09d2596
commit fcca0a7004
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 51 additions and 91 deletions

View file

@ -1325,7 +1325,11 @@ static bool llama_kv_cache_init(
cache.cells.clear();
cache.cells.resize(n_ctx);
// TODO: this should be:
// cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*ggml_tensor_overhead());
// change it and test that it works
cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*MB);
memset(cache.buf.data, 0, cache.buf.size);
struct ggml_init_params params;
params.mem_size = cache.buf.size;