server: tests: adding OAI compatible embedding with multiple inputs

2024-02-24 18:32:29 +01:00 · 2024-02-24 18:32:29 +01:00 · 04f4cbbd9e
commit 04f4cbbd9e
parent 466987eb7b
2 changed files with 40 additions and 2 deletions
--- a/examples/server/tests/features/server.feature
+++ b/examples/server/tests/features/server.feature
@ -60,6 +60,19 @@ Feature: llama.cpp server
    """
    Then embeddings are generated
  Scenario: OAI Embeddings compatibility with multiple inputs
    Given a model tinyllama-2
    Given a prompt:
      """
      In which country Paris is located ?
      """
    And a prompt:
      """
      Is Madrid the capital of Spain ?
      """
    When an OAI compatible embeddings computation request for multiple inputs
    Then embeddings are generated
  Scenario: Tokenize / Detokenize
    When tokenizing:
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@ -1,4 +1,5 @@
 import asyncio
 import collections
 import json
 import os
 import re
@ -321,7 +322,15 @@ async def step_compute_embedding(context):
@step(u'embeddings are generated')
 def step_assert_embeddings(context):
    if len(context.prompts) == 0:
        assert_embeddings(context.embeddings)
    else:
        assert len(context.embeddings) == len(context.prompts), (f"unexpected response:\n"
                                                                 f"context.prompts={context.prompts}\n"
                                                                 f"context.embeddings={context.embeddings}")
        for embedding in context.embeddings:
            context.prompts.pop()
            assert_embeddings(embedding)
@step(u'an OAI compatible embeddings computation request for')
@ -333,6 +342,15 @@ async def step_oai_compute_embeddings(context):
                                                      model=context.model)
@step(u'an OAI compatible embeddings computation request for multiple inputs')
@async_run_until_complete
 async def step_oai_compute_embeddings_multiple_inputs(context):
    context.embeddings = await request_oai_embeddings(context.prompts,
                                                      base_url=context.base_url,
                                                      user_api_key=context.user_api_key,
                                                      model=context.model)
@step(u'concurrent embedding requests')
@async_run_until_complete()
 async def step_concurrent_embedding_requests(context):
@ -607,10 +625,17 @@ async def request_oai_embeddings(input,
    else:
        openai.api_key = user_api_key
        openai.api_base = f'{base_url}/v1'
-        embeddings = openai.Embedding.create(
+        oai_embeddings = openai.Embedding.create(
            model=model,
            input=input,
        )
        if isinstance(input, collections.abc.Sequence):
            embeddings = []
            for an_oai_embeddings in oai_embeddings.data:
                embeddings.append(an_oai_embeddings.embedding)
        else:
            embeddings = oai_embeddings.data.embedding
        return embeddings