Use correct type of pooling for embedding models (#5500)
Use correct type of pooling for embedding models
This commit is contained in:
parent
c06e45d729
commit
4524290e87
5 changed files with 94 additions and 31 deletions
6
llama.h
6
llama.h
|
@ -112,6 +112,12 @@ extern "C" {
|
|||
LLAMA_ROPE_SCALING_MAX_VALUE = LLAMA_ROPE_SCALING_YARN,
|
||||
};
|
||||
|
||||
enum llama_pooling_type {
|
||||
LLAMA_POOLING_NONE = 0,
|
||||
LLAMA_POOLING_MEAN = 1,
|
||||
LLAMA_POOLING_CLS = 2,
|
||||
};
|
||||
|
||||
enum llama_split_mode {
|
||||
LLAMA_SPLIT_NONE = 0, // single GPU
|
||||
LLAMA_SPLIT_LAYER = 1, // split layers and KV across GPUs
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue