Merge 3c8b10560a
into 0eb4e12bee
This commit is contained in:
commit
8930e8850e
3 changed files with 38 additions and 2 deletions
|
@ -230,6 +230,8 @@ struct server_slot {
|
|||
|
||||
if (params.n_predict != -1) {
|
||||
n_remaining = params.n_predict - n_decoded;
|
||||
} else if (global_params.n_predict == -2) {
|
||||
n_remaining = n_ctx - n_past - 1;
|
||||
} else if (global_params.n_predict != -1) {
|
||||
n_remaining = global_params.n_predict - n_decoded;
|
||||
}
|
||||
|
|
32
examples/server/tests/features/n_predict.feature
Normal file
32
examples/server/tests/features/n_predict.feature
Normal file
|
@ -0,0 +1,32 @@
|
|||
@llama.cpp
|
||||
@n_predict
|
||||
Feature: llama.cpp server
|
||||
|
||||
Background: Server startup
|
||||
Given a server listening on localhost:8080
|
||||
And a model file test-model.gguf
|
||||
And a model alias tinyllama-2
|
||||
And 42 as server seed
|
||||
And 64 KV cache size
|
||||
|
||||
Scenario: Generate N tokens
|
||||
And 12 max tokens to predict
|
||||
Then the server is starting
|
||||
Then the server is healthy
|
||||
Given a prompt:
|
||||
"""
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
|
||||
"""
|
||||
And a completion request with no api error
|
||||
Then 12 tokens are predicted
|
||||
|
||||
Scenario: Generate tokens until context is full
|
||||
And -2 server max tokens to predict
|
||||
Then the server is starting
|
||||
Then the server is healthy
|
||||
Given a prompt:
|
||||
"""
|
||||
Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua.
|
||||
"""
|
||||
And a completion request with no api error
|
||||
Then 11 tokens are predicted
|
|
@ -159,8 +159,10 @@ def step_n_slots(context, n_slots: int):
|
|||
|
||||
@step('{n_predict:d} server max tokens to predict')
|
||||
def step_server_n_predict(context, n_predict: int):
|
||||
context.n_server_predict = n_predict if n_predict > 0 else None
|
||||
|
||||
if n_predict > 0 or n_predict in (-1, -2):
|
||||
context.n_server_predict = n_predict
|
||||
else:
|
||||
context.n_server_predict = None
|
||||
|
||||
@step('{slot_save_path} as slot save path')
|
||||
def step_slot_save_path(context, slot_save_path: str):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue