* Use F16 for memory_k and memory_v * add command line switch to use f16 instead of f32 for memory k+v --------- Co-authored-by: Ty Everett <ty@tyweb.us>
This commit is contained in:
parent
160bfb217d
commit
0b366e7357
3 changed files with 11 additions and 6 deletions
1
utils.h
1
utils.h
|
@ -18,6 +18,7 @@ struct gpt_params {
|
|||
int32_t n_predict = 128; // new tokens to predict
|
||||
int32_t repeat_last_n = 64; // last n tokens to penalize
|
||||
int32_t n_ctx = 512; //context size
|
||||
bool memory_f16 = false; // use f16 instead of f32 for memory kv
|
||||
|
||||
// sampling parameters
|
||||
int32_t top_k = 40;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue