diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature
index b571582a7..436078ca5 100644
--- a/examples/server/tests/features/server.feature
+++ b/examples/server/tests/features/server.feature
@@ -75,10 +75,14 @@ Feature: llama.cpp server
     When an OAI compatible embeddings computation request for multiple inputs
     Then embeddings are generated
 
-
   Scenario: Tokenize / Detokenize
     When tokenizing:
     """
     What is the capital of France ?
     """
     Then tokens can be detokenize
+
+  Scenario: Models available
+    Given available models
+    Then  1 models are supported
+    Then  model 0 is tinyllama-2
\ No newline at end of file
diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py
index 381da105e..7e7cca211 100644
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@@ -472,6 +472,27 @@ async def step_prometheus_metrics_exported(context):
             assert metric_exported, "No metrics exported"
 
 
+@step(u'available models')
+def step_available_models(context):
+    # openai client always expects an api_key
+    openai.api_key = context.user_api_key if context.user_api_key is not None else 'nope'
+    openai.api_base = f'{context.base_url}/v1'
+    context.models = openai.Model.list().data
+
+
+@step(u'{n_model} models are supported')
+def step_supported_models(context, n_model):
+    if context.debug:
+        print("server models available:", context.models)
+    assert len(context.models) == int(n_model)
+
+
+@step(u'model {i_model} is {model_alias}')
+def step_supported_models(context, i_model, model_alias):
+    model = context.models[int(i_model)]
+    assert model.id == model_alias, f"model id {model.id} == {model_alias}"
+
+
 async def concurrent_requests(context, f_completion, *args, **kwargs):
     n_prompts = len(context.prompts)
     if context.debug:
@@ -724,7 +745,7 @@ async def wait_for_health_status(context,
         print(f"Starting checking for health for expected_health_status={expected_health_status}")
     timeout = 3  # seconds
     if expected_health_status == 'ok':
-        timeout = 10 # CI slow inference
+        timeout = 10  # CI slow inference
     interval = 0.5
     counter = 0
     async with aiohttp.ClientSession() as session: