server: #5655 - continue to update other slots on embedding concurrent request.
server: tests: add multi users embeddings as fixed
This commit is contained in:
parent
525213d2f5
commit
09b77b4c9e
3 changed files with 25 additions and 34 deletions
|
@ -1836,7 +1836,7 @@ struct llama_server_context
|
||||||
send_embedding(slot);
|
send_embedding(slot);
|
||||||
slot.release();
|
slot.release();
|
||||||
slot.i_batch = -1;
|
slot.i_batch = -1;
|
||||||
return true;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
completion_token_output result;
|
completion_token_output result;
|
||||||
|
|
|
@ -1,36 +1,4 @@
|
||||||
# List of ongoing issues
|
# List of ongoing issues
|
||||||
@bug
|
@bug
|
||||||
Feature: Issues
|
Feature: Issues
|
||||||
# Issue #5655
|
# No confirmed issue at the moment
|
||||||
Scenario: Multi users embeddings
|
|
||||||
Given a server listening on localhost:8080
|
|
||||||
And a model file stories260K.gguf
|
|
||||||
And a model alias tinyllama-2
|
|
||||||
And 42 as server seed
|
|
||||||
And 64 KV cache size
|
|
||||||
And 2 slots
|
|
||||||
And continuous batching
|
|
||||||
And embeddings extraction
|
|
||||||
Then the server is starting
|
|
||||||
Then the server is healthy
|
|
||||||
|
|
||||||
Given a prompt:
|
|
||||||
"""
|
|
||||||
Write a very long story about AI.
|
|
||||||
"""
|
|
||||||
And a prompt:
|
|
||||||
"""
|
|
||||||
Write another very long music lyrics.
|
|
||||||
"""
|
|
||||||
And a prompt:
|
|
||||||
"""
|
|
||||||
Write a very long poem.
|
|
||||||
"""
|
|
||||||
And a prompt:
|
|
||||||
"""
|
|
||||||
Write a very long joke.
|
|
||||||
"""
|
|
||||||
Given concurrent embedding requests
|
|
||||||
Then the server is busy
|
|
||||||
Then the server is idle
|
|
||||||
Then all embeddings are generated
|
|
||||||
|
|
|
@ -8,6 +8,7 @@ Feature: Parallel
|
||||||
And 42 as server seed
|
And 42 as server seed
|
||||||
And 64 KV cache size
|
And 64 KV cache size
|
||||||
And 2 slots
|
And 2 slots
|
||||||
|
And embeddings extraction
|
||||||
And continuous batching
|
And continuous batching
|
||||||
Then the server is starting
|
Then the server is starting
|
||||||
Then the server is healthy
|
Then the server is healthy
|
||||||
|
@ -75,3 +76,25 @@ Feature: Parallel
|
||||||
Then the server is busy
|
Then the server is busy
|
||||||
Then the server is idle
|
Then the server is idle
|
||||||
Then all prompts are predicted
|
Then all prompts are predicted
|
||||||
|
|
||||||
|
Scenario: Multi users embeddings
|
||||||
|
Given a prompt:
|
||||||
|
"""
|
||||||
|
Write a very long story about AI.
|
||||||
|
"""
|
||||||
|
And a prompt:
|
||||||
|
"""
|
||||||
|
Write another very long music lyrics.
|
||||||
|
"""
|
||||||
|
And a prompt:
|
||||||
|
"""
|
||||||
|
Write a very long poem.
|
||||||
|
"""
|
||||||
|
And a prompt:
|
||||||
|
"""
|
||||||
|
Write a very long joke.
|
||||||
|
"""
|
||||||
|
Given concurrent embedding requests
|
||||||
|
Then the server is busy
|
||||||
|
Then the server is idle
|
||||||
|
Then all embeddings are generated
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue