server: tests: add models endpoint scenario
This commit is contained in:
parent
cb5e8f7fc4
commit
73a7e42692
2 changed files with 27 additions and 2 deletions
|
@ -75,10 +75,14 @@ Feature: llama.cpp server
|
|||
When an OAI compatible embeddings computation request for multiple inputs
|
||||
Then embeddings are generated
|
||||
|
||||
|
||||
Scenario: Tokenize / Detokenize
|
||||
When tokenizing:
|
||||
"""
|
||||
What is the capital of France ?
|
||||
"""
|
||||
Then tokens can be detokenize
|
||||
|
||||
Scenario: Models available
|
||||
Given available models
|
||||
Then 1 models are supported
|
||||
Then model 0 is tinyllama-2
|
|
@ -472,6 +472,27 @@ async def step_prometheus_metrics_exported(context):
|
|||
assert metric_exported, "No metrics exported"
|
||||
|
||||
|
||||
@step(u'available models')
|
||||
def step_available_models(context):
|
||||
# openai client always expects an api_key
|
||||
openai.api_key = context.user_api_key if context.user_api_key is not None else 'nope'
|
||||
openai.api_base = f'{context.base_url}/v1'
|
||||
context.models = openai.Model.list().data
|
||||
|
||||
|
||||
@step(u'{n_model} models are supported')
|
||||
def step_supported_models(context, n_model):
|
||||
if context.debug:
|
||||
print("server models available:", context.models)
|
||||
assert len(context.models) == int(n_model)
|
||||
|
||||
|
||||
@step(u'model {i_model} is {model_alias}')
|
||||
def step_supported_models(context, i_model, model_alias):
|
||||
model = context.models[int(i_model)]
|
||||
assert model.id == model_alias, f"model id {model.id} == {model_alias}"
|
||||
|
||||
|
||||
async def concurrent_requests(context, f_completion, *args, **kwargs):
|
||||
n_prompts = len(context.prompts)
|
||||
if context.debug:
|
||||
|
@ -724,7 +745,7 @@ async def wait_for_health_status(context,
|
|||
print(f"Starting checking for health for expected_health_status={expected_health_status}")
|
||||
timeout = 3 # seconds
|
||||
if expected_health_status == 'ok':
|
||||
timeout = 10 # CI slow inference
|
||||
timeout = 10 # CI slow inference
|
||||
interval = 0.5
|
||||
counter = 0
|
||||
async with aiohttp.ClientSession() as session:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue