llama : add reranking support (#9510)
* py : add XLMRobertaForSequenceClassification [no ci] * py : fix scalar-tensor conversion [no ci] * py : fix position embeddings chop [no ci] * llama : read new cls tensors [no ci] * llama : add classigication head (wip) [no ci] * llama : add "rank" pooling type ggml-ci * server : add rerank endpoint ggml-ci * llama : aboud ggml_repeat during classification * rerank : cleanup + comments * server : accept /rerank endpoint in addition to /v1/rerank [no ci] * embedding : parse special tokens * jina : support v1 reranker * vocab : minor style ggml-ci * server : initiate tests for later ggml-ci * server : add docs * llama : add comment [no ci] * llama : fix uninitialized tensors * ci : add rerank tests ggml-ci * add reranking test * change test data * Update examples/server/server.cpp Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com> * add `--reranking` argument * update server docs * llama : fix comment [no ci] ggml-ci --------- Co-authored-by: Xuan Son Nguyen <son@huggingface.co> Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
This commit is contained in:
parent
1b2f992cd2
commit
f4d2b8846a
18 changed files with 602 additions and 56 deletions
|
@ -537,7 +537,7 @@ static json format_embeddings_response_oaicompat(const json & request, const jso
|
|||
json res = json {
|
||||
{"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
|
||||
{"object", "list"},
|
||||
{"usage", json {
|
||||
{"usage", json { // TODO: fill
|
||||
{"prompt_tokens", 0},
|
||||
{"total_tokens", 0}
|
||||
}},
|
||||
|
@ -547,6 +547,29 @@ static json format_embeddings_response_oaicompat(const json & request, const jso
|
|||
return res;
|
||||
}
|
||||
|
||||
static json format_response_rerank(const json & request, const json & ranks) {
|
||||
json data = json::array();
|
||||
int i = 0;
|
||||
for (const auto & rank : ranks) {
|
||||
data.push_back(json{
|
||||
{"index", i++},
|
||||
{"relevance_score", json_value(rank, "score", 0.0)},
|
||||
});
|
||||
}
|
||||
|
||||
json res = json {
|
||||
{"model", json_value(request, "model", std::string(DEFAULT_OAICOMPAT_MODEL))},
|
||||
{"object", "list"},
|
||||
{"usage", json { // TODO: fill
|
||||
{"prompt_tokens", 0},
|
||||
{"total_tokens", 0}
|
||||
}},
|
||||
{"results", data}
|
||||
};
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static bool is_valid_utf8(const std::string & str) {
|
||||
const unsigned char* bytes = reinterpret_cast<const unsigned char*>(str.data());
|
||||
const unsigned char* end = bytes + str.length();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue