diff --git a/examples/server/README.md b/examples/server/README.md index 824fd5194..fbd2be8d4 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -154,7 +154,7 @@ node . `ignore_eos`: Ignore end of stream token and continue generating (default: false). - `logit_bias`: Modify the likelihood of a token appearing in the generated text completion. For example, use `logit-bias: [[15043,1]]` to increase the likelihood of the token 'Hello', or `logit-bias: [[15043,-1]]` to decrease its likelihood. Setting the value to false, `logit-bias: [[15043,false]]` ensures that the token `Hello` is never produced (default: []). + `logit_bias`: Modify the likelihood of a token appearing in the generated text completion. For example, use `"logit_bias": [[15043,1.0]]` to increase the likelihood of the token 'Hello', or `"logit_bias": [[15043,-1.0]]` to decrease its likelihood. Setting the value to false, `"logit_bias": [[15043,false]]` ensures that the token `Hello` is never produced (default: []). - **POST** `/tokenize`: Tokenize a given text. diff --git a/examples/server/server.cpp b/examples/server/server.cpp index f28308267..8a1ad0086 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -700,7 +700,7 @@ static void parse_options_completion(const json & body, llama_server_context & l if (el.is_array() && el.size() == 2 && el[0].is_number_integer()) { llama_token tok = el[0].get(); if (tok >= 0 && tok < n_vocab) { - if (el[1].is_number_float()) { + if (el[1].is_number()) { llama.params.logit_bias[tok] = el[1].get(); } else if (el[1].is_boolean() && !el[1].get()) { llama.params.logit_bias[tok] = -INFINITY;