server : output embeddings for all tokens when pooling = none (#10861)
* server : add "tokens" output ggml-ci * server : output embeddings for all tokens when pooling = none ggml-ci * server : update readme [no ci] * server : fix spacing [no ci] Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com> * server : be explicit about the pooling type in the tests ggml-ci * server : update /embeddings and /v1/embeddings endpoints ggml-ci * server : do not normalize embeddings when there is no pooling ggml-ci * server : update readme ggml-ci * server : fixes * tests : update server tests ggml-ci * server : update readme [no ci] * server : remove rebase artifact --------- Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
This commit is contained in:
		
							parent
							
								
									0e70ba686e
								
							
						
					
					
						commit
						152610eda9
					
				
					 8 changed files with 158 additions and 37 deletions
				
			
		|  | @ -75,7 +75,7 @@ static std::vector<std::vector<float>> encode(llama_context * ctx, const std::ve | |||
|         } | ||||
| 
 | ||||
|         std::vector<float> emb_norm(emb_unorm.size()); | ||||
|         common_embd_normalize(emb_unorm.data(), emb_norm.data(), n_embd); | ||||
|         common_embd_normalize(emb_unorm.data(), emb_norm.data(), n_embd, 2); | ||||
|         result.push_back(emb_norm); | ||||
| 
 | ||||
| #ifdef GRIT_DEBUG | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue