ggml : add asserts for type conversion in fattn kernels (#9971)

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-10-21 16:20:46 +03:00 committed by GitHub
parent d5ebd79c76
commit f594bc80ba
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 8 additions and 4 deletions

View file

@ -19243,7 +19243,7 @@ struct llama_context * llama_new_context_with_model(
params.flash_attn = false;
}
if (params.type_v != GGML_TYPE_F16 && !params.flash_attn) {
if (ggml_is_quantized(params.type_v) && !params.flash_attn) {
LLAMA_LOG_ERROR("%s: V cache quantization requires flash_attn\n", __func__);
return nullptr;
}