error if type_v != FP16 and not flash_attn
This commit is contained in:
parent
cc7aef6829
commit
d8a0b87091
1 changed files with 5 additions and 0 deletions
|
@ -15866,6 +15866,11 @@ struct llama_context * llama_new_context_with_model(
|
|||
params.flash_attn = false;
|
||||
}
|
||||
|
||||
if (params.type_v != GGML_TYPE_F16 && !params.flash_attn) {
|
||||
LLAMA_LOG_ERROR("%s: V cache quantization requires flash_attn\n", __func__);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
llama_context * ctx = new llama_context(*model);
|
||||
|
||||
const auto & hparams = model->hparams;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue