server : normalize embeddings (#5956)
* output normalize embedding in '/v1/embeddings' * common : reuse llama_embd_normalize * common : better normalize impl --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
2c4f566c88
commit
fb215c3832
4 changed files with 30 additions and 14 deletions
|
@ -1327,6 +1327,8 @@ struct server_context {
|
|||
|
||||
const int n_embd = llama_n_embd(model);
|
||||
|
||||
std::vector<float> embd_res(n_embd, 0.0f);
|
||||
|
||||
for (int i = 0; i < batch.n_tokens; ++i) {
|
||||
if (!batch.logits[i] || batch.seq_id[i][0] != slot.id + 1) {
|
||||
continue;
|
||||
|
@ -1350,8 +1352,10 @@ struct server_context {
|
|||
continue;
|
||||
}
|
||||
|
||||
llama_embd_normalize(embd, embd_res.data(), n_embd);
|
||||
|
||||
res.data = json {
|
||||
{"embedding", std::vector<float>(embd, embd + n_embd)},
|
||||
{"embedding", embd_res},
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -3354,6 +3358,8 @@ int main(int argc, char ** argv) {
|
|||
// get the result
|
||||
server_task_result result = ctx_server.queue_results.recv(id_task);
|
||||
ctx_server.queue_results.remove_waiting_task_id(id_task);
|
||||
|
||||
// append to the responses
|
||||
responses.push_back(result.data);
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue