Use correct type of pooling for embedding models (#5500)

Use correct type of pooling for embedding models
This commit is contained in:
Douglas Hanley 2024-02-15 11:21:49 -06:00 committed by GitHub
parent c06e45d729
commit 4524290e87
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 94 additions and 31 deletions

View file

@ -112,6 +112,12 @@ extern "C" {
LLAMA_ROPE_SCALING_MAX_VALUE = LLAMA_ROPE_SCALING_YARN,
};
enum llama_pooling_type {
LLAMA_POOLING_NONE = 0,
LLAMA_POOLING_MEAN = 1,
LLAMA_POOLING_CLS = 2,
};
enum llama_split_mode {
LLAMA_SPLIT_NONE = 0, // single GPU
LLAMA_SPLIT_LAYER = 1, // split layers and KV across GPUs