llama : init kq_pos only if needed
ggml-ci
This commit is contained in:
parent
833490b16f
commit
1657f92d2f
1 changed files with 6 additions and 1 deletions
|
@ -1561,6 +1561,7 @@ struct llama_hparams {
|
||||||
float f_max_alibi_bias = 0.0f;
|
float f_max_alibi_bias = 0.0f;
|
||||||
|
|
||||||
bool causal_attn = true;
|
bool causal_attn = true;
|
||||||
|
bool need_kq_pos = false;
|
||||||
|
|
||||||
uint32_t pooling_type = LLAMA_POOLING_NONE;
|
uint32_t pooling_type = LLAMA_POOLING_NONE;
|
||||||
|
|
||||||
|
@ -3242,6 +3243,10 @@ static void llm_load_hparams(
|
||||||
}
|
}
|
||||||
|
|
||||||
model.ftype = ml.ftype;
|
model.ftype = ml.ftype;
|
||||||
|
|
||||||
|
if (hparams.f_max_alibi_bias > 0.0f) {
|
||||||
|
hparams.need_kq_pos = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: This should probably be in llama.h
|
// TODO: This should probably be in llama.h
|
||||||
|
@ -7529,7 +7534,7 @@ static void llama_set_inputs(llama_context & lctx, const llama_batch & batch) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
{
|
if (hparams.need_kq_pos) {
|
||||||
const int64_t n_kv = kv_self.n;
|
const int64_t n_kv = kv_self.n;
|
||||||
|
|
||||||
assert(ggml_backend_buffer_is_host(lctx.inp_KQ_pos->buffer));
|
assert(ggml_backend_buffer_is_host(lctx.inp_KQ_pos->buffer));
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue