server : fill usage info in embeddings and rerank responses (#10852)
* server : fill usage info in embeddings response * server : fill usage info in reranking response
This commit is contained in:
parent
382bc7f2e8
commit
05c3a444b8
4 changed files with 77 additions and 10 deletions
|
@ -97,3 +97,33 @@ def test_same_prompt_give_same_result():
|
|||
vi = res.body['data'][i]['embedding']
|
||||
for x, y in zip(v0, vi):
|
||||
assert abs(x - y) < EPSILON
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"content,n_tokens",
|
||||
[
|
||||
("I believe the meaning of life is", 7),
|
||||
("This is a test", 4),
|
||||
]
|
||||
)
|
||||
def test_embedding_usage_single(content, n_tokens):
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_request("POST", "/embeddings", data={"input": content})
|
||||
assert res.status_code == 200
|
||||
assert res.body['usage']['prompt_tokens'] == res.body['usage']['total_tokens']
|
||||
assert res.body['usage']['prompt_tokens'] == n_tokens
|
||||
|
||||
|
||||
def test_embedding_usage_multiple():
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_request("POST", "/embeddings", data={
|
||||
"input": [
|
||||
"I believe the meaning of life is",
|
||||
"I believe the meaning of life is",
|
||||
],
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert res.body['usage']['prompt_tokens'] == res.body['usage']['total_tokens']
|
||||
assert res.body['usage']['prompt_tokens'] == 2 * 7
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue