This commit is contained in:
Vinesh Janarthanan 2024-11-25 21:10:25 -06:00 committed by GitHub
commit 8930e8850e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 38 additions and 2 deletions

View file

@ -230,6 +230,8 @@ struct server_slot {
if (params.n_predict != -1) { if (params.n_predict != -1) {
n_remaining = params.n_predict - n_decoded; n_remaining = params.n_predict - n_decoded;
} else if (global_params.n_predict == -2) {
n_remaining = n_ctx - n_past - 1;
} else if (global_params.n_predict != -1) { } else if (global_params.n_predict != -1) {
n_remaining = global_params.n_predict - n_decoded; n_remaining = global_params.n_predict - n_decoded;
} }

View file

@ -0,0 +1,32 @@
@llama.cpp
@n_predict
Feature: llama.cpp server
Background: Server startup
Given a server listening on localhost:8080
And a model file test-model.gguf
And a model alias tinyllama-2
And 42 as server seed
And 64 KV cache size
Scenario: Generate N tokens
And 12 max tokens to predict
Then the server is starting
Then the server is healthy
Given a prompt:
"""
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
"""
And a completion request with no api error
Then 12 tokens are predicted
Scenario: Generate tokens until context is full
And -2 server max tokens to predict
Then the server is starting
Then the server is healthy
Given a prompt:
"""
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
"""
And a completion request with no api error
Then 11 tokens are predicted

View file

@ -159,8 +159,10 @@ def step_n_slots(context, n_slots: int):
@step('{n_predict:d} server max tokens to predict') @step('{n_predict:d} server max tokens to predict')
def step_server_n_predict(context, n_predict: int): def step_server_n_predict(context, n_predict: int):
context.n_server_predict = n_predict if n_predict > 0 else None if n_predict > 0 or n_predict in (-1, -2):
context.n_server_predict = n_predict
else:
context.n_server_predict = None
@step('{slot_save_path} as slot save path') @step('{slot_save_path} as slot save path')
def step_slot_save_path(context, slot_save_path: str): def step_slot_save_path(context, slot_save_path: str):