llama : streamline embeddings from "non-embedding" models (#8087)

This commit is contained in:
Douglas Hanley 2024-07-05 02:05:56 -05:00 committed by GitHub
parent bcefa03bc0
commit d12f781074
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 36 additions and 10 deletions

View file

@ -99,6 +99,7 @@ struct gpt_params {
enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs
enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
enum llama_attention_type attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED; // attention type for embeddings
// // sampling parameters
struct llama_sampling_params sparams;