diff --git a/examples/server/tests/README.md b/examples/server/tests/README.md
index 3cdcc5ca3..569e675b7 100644
--- a/examples/server/tests/README.md
+++ b/examples/server/tests/README.md
@@ -1,6 +1,6 @@
 # Server Integration Test
 
-Functional server tests suite.
+Server tests scenario using [BDD](https://en.wikipedia.org/wiki/Behavior-driven_development) with [behave](https://behave.readthedocs.io/en/latest/).
 
 ### Install dependencies
 `pip install -r requirements.txt`
@@ -9,3 +9,8 @@ Functional server tests suite.
 1. Build the server
 2. download a GGUF model: `./scripts/hf.sh --repo ggml-org/models --file tinyllamas/stories260K.gguf`
 3. Start the test: `./tests.sh stories260K.gguf -ngl 23`
+
+### Skipped scenario
+
+Scenario must be annotated with `@llama.cpp` to be included in the scope.
+`@bug` annotation aims to link a scenario with a GitHub issue.
\ No newline at end of file
diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature
index df376b0f2..5f6b161c8 100644
--- a/examples/server/tests/features/server.feature
+++ b/examples/server/tests/features/server.feature
@@ -98,3 +98,21 @@ Feature: llama.cpp server
     And  all slots are idle
     Then all prompts are predicted
 
+
+  @llama.cpp
+  Scenario: Embedding
+    When embeddings are computed for:
+    """
+    What is the capital of France ?
+    """
+    Then embeddings are generated
+
+
+  @llama.cpp
+  Scenario: OAI Embeddings compatibility
+    Given a model tinyllama-2
+    When an OAI compatible embeddings computation request for:
+    """
+    What is the capital of Spain ?
+    """
+    Then embeddings are generated
\ No newline at end of file
diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py
index 75e893afa..140e02626 100644
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -20,7 +20,6 @@ def step_server_config(context, server_fqdn, server_port, n_slots, seed):
     context.prompts = []
 
     openai.api_key = 'llama.cpp'
-    openai.api_base = f'{context.base_url}/v1/chat'
 
 
 @step(u"the server is {expecting_status}")
@@ -141,6 +140,30 @@ def step_all_prompts_are_predicted(context):
         assert_n_tokens_predicted(completion)
 
 
+@step(u'embeddings are computed for')
+def step_compute_embedding(context):
+    response = requests.post(f'{context.base_url}/embedding', json={
+        "content": context.text,
+    })
+    assert response.status_code == 200
+    context.embeddings = response.json()['embedding']
+
+
+@step(u'embeddings are generated')
+def step_compute_embeddings(context):
+    assert len(context.embeddings) > 0
+
+
+@step(u'an OAI compatible embeddings computation request for')
+def step_oai_compute_embedding(context):
+    openai.api_base = f'{context.base_url}/v1'
+    embeddings = openai.Embedding.create(
+        model=context.model,
+        input=context.text,
+    )
+    context.embeddings = embeddings
+
+
 def concurrent_requests(context, f_completion):
     context.completions.clear()
     context.completion_threads.clear()
@@ -162,6 +185,7 @@ def request_completion(context, prompt, n_predict=None):
 
 
 def oai_chat_completions(context, user_prompt):
+    openai.api_base = f'{context.base_url}/v1/chat'
     chat_completion = openai.Completion.create(
         messages=[
             {