server: tests: add tokenize/detokenize scenario

2024-02-21 00:13:53 +01:00 · 2024-02-21 00:13:53 +01:00 · 1065f6d41b
commit 1065f6d41b
parent e6d482088d
3 changed files with 35 additions and 3 deletions
--- a/examples/server/tests/README.md
+++ b/examples/server/tests/README.md
@ -13,4 +13,4 @@ Server tests scenario using [BDD](https://en.wikipedia.org/wiki/Behavior-driven_
 ### Skipped scenario
 Scenario must be annotated with `@llama.cpp` to be included in the scope.
-`@bug` annotation aims to link a scenario with a GitHub issue.
+`@bug` annotation aims to link a scenario with a GitHub issue.
--- a/examples/server/tests/features/server.feature
+++ b/examples/server/tests/features/server.feature
@ -103,7 +103,7 @@ Feature: llama.cpp server
  Scenario: Embedding
    When embeddings are computed for:
    """
-    What is the capital of France ?
+    What is the capital of Bulgaria ?
    """
    Then embeddings are generated
@ -115,4 +115,14 @@ Feature: llama.cpp server
    """
    What is the capital of Spain ?
    """
-    Then embeddings are generated
+    Then embeddings are generated
  @llama.cpp
  Scenario: Tokenize / Detokenize
    When tokenizing:
    """
    What is the capital of France ?
    """
    Then tokens can be detokenize
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@ -164,6 +164,28 @@ def step_oai_compute_embedding(context):
    context.embeddings = embeddings
@step(u'tokenizing')
 def step_tokenize(context):
    context.tokenized_text = context.text
    response = requests.post(f'{context.base_url}/tokenize', json={
        "content":context.tokenized_text,
    })
    assert response.status_code == 200
    context.tokens = response.json()['tokens']
@step(u'tokens can be detokenize')
 def step_detokenize(context):
    assert len(context.tokens) > 0
    response = requests.post(f'{context.base_url}/detokenize', json={
        "tokens": context.tokens,
    })
    assert response.status_code == 200
    print(response.json())
    # FIXME the detokenize answer contains a space prefix ?
    assert context.tokenized_text == response.json()['content'].strip()
 def concurrent_requests(context, f_completion):
    context.completions.clear()
    context.completion_threads.clear()