Server: Use multi-task for embeddings endpoint (#6001)

* use multitask for embd endpoint

* specify types

* remove redundant {"n_predict", 0}
This commit is contained in:
Xuan Son Nguyen 2024-03-13 11:39:11 +01:00 committed by GitHub
parent 306d34be7a
commit 99b71c068f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 38 additions and 50 deletions

View file

@ -529,6 +529,16 @@ static std::vector<json> format_partial_response_oaicompat(json result, const st
}
static json format_embeddings_response_oaicompat(const json & request, const json & embeddings) {
json data = json::array();
int i = 0;
for (auto & elem : embeddings) {
data.push_back(json{
{"embedding", json_value(elem, "embedding", json::array())},
{"index", i++},
{"object", "embedding"}
});
}
json res = json {
{"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
{"object", "list"},
@ -536,7 +546,7 @@ static json format_embeddings_response_oaicompat(const json & request, const jso
{"prompt_tokens", 0},
{"total_tokens", 0}
}},
{"data", embeddings}
{"data", data}
};
return res;