server: tests: add tokenize/detokenize scenario

2024-02-21 00:13:53 +01:00 · 2024-02-21 00:13:53 +01:00 · 1065f6d41b
commit 1065f6d41b
parent e6d482088d
3 changed files with 35 additions and 3 deletions
--- a/examples/server/tests/README.md
+++ b/examples/server/tests/README.md
@ -13,4 +13,4 @@ Server tests scenario using [BDD](https://en.wikipedia.org/wiki/Behavior-driven_
 ### Skipped scenario

 Scenario must be annotated with `@llama.cpp` to be included in the scope.
-`@bug` annotation aims to link a scenario with a GitHub issue.
+`@bug` annotation aims to link a scenario with a GitHub issue.
--- a/examples/server/tests/features/server.feature
+++ b/examples/server/tests/features/server.feature
@ -103,7 +103,7 @@ Feature: llama.cpp server
  Scenario: Embedding
    When embeddings are computed for:
    """
-    What is the capital of France ?
+    What is the capital of Bulgaria ?
    """
    Then embeddings are generated

@ -115,4 +115,14 @@ Feature: llama.cpp server
    """
    What is the capital of Spain ?
    """
-    Then embeddings are generated
+    Then embeddings are generated
+
+
+  @llama.cpp
+  Scenario: Tokenize / Detokenize
+    When tokenizing:
+    """
+    What is the capital of France ?
+    """
+    Then tokens can be detokenize
+
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@ -164,6 +164,28 @@ def step_oai_compute_embedding(context):
    context.embeddings = embeddings


+@step(u'tokenizing')
+def step_tokenize(context):
+    context.tokenized_text = context.text
+    response = requests.post(f'{context.base_url}/tokenize', json={
+        "content":context.tokenized_text,
+    })
+    assert response.status_code == 200
+    context.tokens = response.json()['tokens']
+
+
+@step(u'tokens can be detokenize')
+def step_detokenize(context):
+    assert len(context.tokens) > 0
+    response = requests.post(f'{context.base_url}/detokenize', json={
+        "tokens": context.tokens,
+    })
+    assert response.status_code == 200
+    print(response.json())
+    # FIXME the detokenize answer contains a space prefix ?
+    assert context.tokenized_text == response.json()['content'].strip()
+
+
 def concurrent_requests(context, f_completion):
    context.completions.clear()
    context.completion_threads.clear()