server: tests: add embeddings scenario
This commit is contained in:
parent
1ecda0d13e
commit
e6d482088d
3 changed files with 49 additions and 2 deletions
|
@ -1,6 +1,6 @@
|
||||||
# Server Integration Test
|
# Server Integration Test
|
||||||
|
|
||||||
Functional server tests suite.
|
Server tests scenario using [BDD](https://en.wikipedia.org/wiki/Behavior-driven_development) with [behave](https://behave.readthedocs.io/en/latest/).
|
||||||
|
|
||||||
### Install dependencies
|
### Install dependencies
|
||||||
`pip install -r requirements.txt`
|
`pip install -r requirements.txt`
|
||||||
|
@ -9,3 +9,8 @@ Functional server tests suite.
|
||||||
1. Build the server
|
1. Build the server
|
||||||
2. download a GGUF model: `./scripts/hf.sh --repo ggml-org/models --file tinyllamas/stories260K.gguf`
|
2. download a GGUF model: `./scripts/hf.sh --repo ggml-org/models --file tinyllamas/stories260K.gguf`
|
||||||
3. Start the test: `./tests.sh stories260K.gguf -ngl 23`
|
3. Start the test: `./tests.sh stories260K.gguf -ngl 23`
|
||||||
|
|
||||||
|
### Skipped scenario
|
||||||
|
|
||||||
|
Scenario must be annotated with `@llama.cpp` to be included in the scope.
|
||||||
|
`@bug` annotation aims to link a scenario with a GitHub issue.
|
|
@ -98,3 +98,21 @@ Feature: llama.cpp server
|
||||||
And all slots are idle
|
And all slots are idle
|
||||||
Then all prompts are predicted
|
Then all prompts are predicted
|
||||||
|
|
||||||
|
|
||||||
|
@llama.cpp
|
||||||
|
Scenario: Embedding
|
||||||
|
When embeddings are computed for:
|
||||||
|
"""
|
||||||
|
What is the capital of France ?
|
||||||
|
"""
|
||||||
|
Then embeddings are generated
|
||||||
|
|
||||||
|
|
||||||
|
@llama.cpp
|
||||||
|
Scenario: OAI Embeddings compatibility
|
||||||
|
Given a model tinyllama-2
|
||||||
|
When an OAI compatible embeddings computation request for:
|
||||||
|
"""
|
||||||
|
What is the capital of Spain ?
|
||||||
|
"""
|
||||||
|
Then embeddings are generated
|
|
@ -20,7 +20,6 @@ def step_server_config(context, server_fqdn, server_port, n_slots, seed):
|
||||||
context.prompts = []
|
context.prompts = []
|
||||||
|
|
||||||
openai.api_key = 'llama.cpp'
|
openai.api_key = 'llama.cpp'
|
||||||
openai.api_base = f'{context.base_url}/v1/chat'
|
|
||||||
|
|
||||||
|
|
||||||
@step(u"the server is {expecting_status}")
|
@step(u"the server is {expecting_status}")
|
||||||
|
@ -141,6 +140,30 @@ def step_all_prompts_are_predicted(context):
|
||||||
assert_n_tokens_predicted(completion)
|
assert_n_tokens_predicted(completion)
|
||||||
|
|
||||||
|
|
||||||
|
@step(u'embeddings are computed for')
|
||||||
|
def step_compute_embedding(context):
|
||||||
|
response = requests.post(f'{context.base_url}/embedding', json={
|
||||||
|
"content": context.text,
|
||||||
|
})
|
||||||
|
assert response.status_code == 200
|
||||||
|
context.embeddings = response.json()['embedding']
|
||||||
|
|
||||||
|
|
||||||
|
@step(u'embeddings are generated')
|
||||||
|
def step_compute_embeddings(context):
|
||||||
|
assert len(context.embeddings) > 0
|
||||||
|
|
||||||
|
|
||||||
|
@step(u'an OAI compatible embeddings computation request for')
|
||||||
|
def step_oai_compute_embedding(context):
|
||||||
|
openai.api_base = f'{context.base_url}/v1'
|
||||||
|
embeddings = openai.Embedding.create(
|
||||||
|
model=context.model,
|
||||||
|
input=context.text,
|
||||||
|
)
|
||||||
|
context.embeddings = embeddings
|
||||||
|
|
||||||
|
|
||||||
def concurrent_requests(context, f_completion):
|
def concurrent_requests(context, f_completion):
|
||||||
context.completions.clear()
|
context.completions.clear()
|
||||||
context.completion_threads.clear()
|
context.completion_threads.clear()
|
||||||
|
@ -162,6 +185,7 @@ def request_completion(context, prompt, n_predict=None):
|
||||||
|
|
||||||
|
|
||||||
def oai_chat_completions(context, user_prompt):
|
def oai_chat_completions(context, user_prompt):
|
||||||
|
openai.api_base = f'{context.base_url}/v1/chat'
|
||||||
chat_completion = openai.Completion.create(
|
chat_completion = openai.Completion.create(
|
||||||
messages=[
|
messages=[
|
||||||
{
|
{
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue