Spaces to 4 and other code style cleanup. Notes in README.
This commit is contained in:
parent
ccd85e0a6b
commit
a9c34779f6
2 changed files with 842 additions and 842 deletions
|
@ -23,6 +23,8 @@ Command line options:
|
|||
|
||||
## Quick Start
|
||||
|
||||
**Note:** The server is not built by default. Make sure to add `LLAMA_BUILD_SERVER=ON` to your CMake command.
|
||||
|
||||
To get started right away, run the following command, making sure to use the correct path for the model you have:
|
||||
|
||||
### Unix-based systems (Linux, macOS, etc.):
|
||||
|
@ -99,7 +101,7 @@ node .
|
|||
|
||||
`top_p`: Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.9).
|
||||
|
||||
`n_predict`: Set the number of tokens to predict when generating text (default: 128, -1 = infinity).
|
||||
`n_predict`: Set the number of tokens to predict when generating text. **Note:** May exceed the the limit slightly if the last token is a partial multibyte character. (default: 128, -1 = infinity).
|
||||
|
||||
`n_keep`: Specify the number of tokens from the initial prompt to retain when the model resets its internal context.
|
||||
By default, this value is set to 0 (meaning no tokens are kept). Use `-1` to retain all tokens from the initial prompt.
|
||||
|
|
|
@ -282,24 +282,18 @@ struct llama_server_context
|
|||
{
|
||||
// Greedy sampling
|
||||
id = llama_sample_token_greedy(ctx, &candidates_p);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
if (mirostat == 1)
|
||||
{
|
||||
static float mirostat_mu = 2.0f * mirostat_tau;
|
||||
const int mirostat_m = 100;
|
||||
llama_sample_temperature(ctx, &candidates_p, temp);
|
||||
id = llama_sample_token_mirostat(ctx, &candidates_p, mirostat_tau, mirostat_eta, mirostat_m, &mirostat_mu);
|
||||
}
|
||||
else if (mirostat == 2)
|
||||
{
|
||||
} else if (mirostat == 2) {
|
||||
static float mirostat_mu = 2.0f * mirostat_tau;
|
||||
llama_sample_temperature(ctx, &candidates_p, temp);
|
||||
id = llama_sample_token_mirostat_v2(ctx, &candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu);
|
||||
}
|
||||
else
|
||||
{
|
||||
} else {
|
||||
// Temperature sampling
|
||||
llama_sample_tail_free(ctx, &candidates_p, tfs_z, 1);
|
||||
llama_sample_typical(ctx, &candidates_p, typical_p, 1);
|
||||
|
@ -343,7 +337,8 @@ struct llama_server_context
|
|||
const size_t tmp = word.size() + last_token_size;
|
||||
const size_t from_pos = text.size() > tmp ? text.size() - tmp : 0;
|
||||
pos = text.find(word, from_pos);
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
pos = find_partial_stop_string(word, text);
|
||||
}
|
||||
if (pos != std::string::npos &&
|
||||
|
@ -855,7 +850,8 @@ int main(int argc, char **argv)
|
|||
res.set_content(
|
||||
data.dump(llama.json_indent, ' ', false, json::error_handler_t::replace),
|
||||
"application/json");
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
const auto chunked_content_provider = [&](size_t, DataSink& sink) {
|
||||
size_t sent_count = 0;
|
||||
|
||||
|
@ -955,9 +951,11 @@ int main(int argc, char **argv)
|
|||
char buf[BUFSIZ];
|
||||
try {
|
||||
std::rethrow_exception(std::move(ep));
|
||||
} catch (std::exception &e) {
|
||||
}
|
||||
catch (std::exception& e) {
|
||||
snprintf(buf, sizeof(buf), fmt, e.what());
|
||||
} catch (...) {
|
||||
}
|
||||
catch (...) {
|
||||
snprintf(buf, sizeof(buf), fmt, "Unknown Exception");
|
||||
}
|
||||
res.set_content(buf, "text/plain");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue