Spaces to 4 and other code style cleanup. Notes in README.
This commit is contained in:
parent
ccd85e0a6b
commit
a9c34779f6
2 changed files with 842 additions and 842 deletions
|
@ -23,6 +23,8 @@ Command line options:
|
||||||
|
|
||||||
## Quick Start
|
## Quick Start
|
||||||
|
|
||||||
|
**Note:** The server is not built by default. Make sure to add `LLAMA_BUILD_SERVER=ON` to your CMake command.
|
||||||
|
|
||||||
To get started right away, run the following command, making sure to use the correct path for the model you have:
|
To get started right away, run the following command, making sure to use the correct path for the model you have:
|
||||||
|
|
||||||
### Unix-based systems (Linux, macOS, etc.):
|
### Unix-based systems (Linux, macOS, etc.):
|
||||||
|
@ -99,7 +101,7 @@ node .
|
||||||
|
|
||||||
`top_p`: Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.9).
|
`top_p`: Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.9).
|
||||||
|
|
||||||
`n_predict`: Set the number of tokens to predict when generating text (default: 128, -1 = infinity).
|
`n_predict`: Set the number of tokens to predict when generating text. **Note:** May exceed the the limit slightly if the last token is a partial multibyte character. (default: 128, -1 = infinity).
|
||||||
|
|
||||||
`n_keep`: Specify the number of tokens from the initial prompt to retain when the model resets its internal context.
|
`n_keep`: Specify the number of tokens from the initial prompt to retain when the model resets its internal context.
|
||||||
By default, this value is set to 0 (meaning no tokens are kept). Use `-1` to retain all tokens from the initial prompt.
|
By default, this value is set to 0 (meaning no tokens are kept). Use `-1` to retain all tokens from the initial prompt.
|
||||||
|
|
|
@ -282,24 +282,18 @@ struct llama_server_context
|
||||||
{
|
{
|
||||||
// Greedy sampling
|
// Greedy sampling
|
||||||
id = llama_sample_token_greedy(ctx, &candidates_p);
|
id = llama_sample_token_greedy(ctx, &candidates_p);
|
||||||
}
|
} else {
|
||||||
else
|
|
||||||
{
|
|
||||||
if (mirostat == 1)
|
if (mirostat == 1)
|
||||||
{
|
{
|
||||||
static float mirostat_mu = 2.0f * mirostat_tau;
|
static float mirostat_mu = 2.0f * mirostat_tau;
|
||||||
const int mirostat_m = 100;
|
const int mirostat_m = 100;
|
||||||
llama_sample_temperature(ctx, &candidates_p, temp);
|
llama_sample_temperature(ctx, &candidates_p, temp);
|
||||||
id = llama_sample_token_mirostat(ctx, &candidates_p, mirostat_tau, mirostat_eta, mirostat_m, &mirostat_mu);
|
id = llama_sample_token_mirostat(ctx, &candidates_p, mirostat_tau, mirostat_eta, mirostat_m, &mirostat_mu);
|
||||||
}
|
} else if (mirostat == 2) {
|
||||||
else if (mirostat == 2)
|
|
||||||
{
|
|
||||||
static float mirostat_mu = 2.0f * mirostat_tau;
|
static float mirostat_mu = 2.0f * mirostat_tau;
|
||||||
llama_sample_temperature(ctx, &candidates_p, temp);
|
llama_sample_temperature(ctx, &candidates_p, temp);
|
||||||
id = llama_sample_token_mirostat_v2(ctx, &candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu);
|
id = llama_sample_token_mirostat_v2(ctx, &candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu);
|
||||||
}
|
} else {
|
||||||
else
|
|
||||||
{
|
|
||||||
// Temperature sampling
|
// Temperature sampling
|
||||||
llama_sample_tail_free(ctx, &candidates_p, tfs_z, 1);
|
llama_sample_tail_free(ctx, &candidates_p, tfs_z, 1);
|
||||||
llama_sample_typical(ctx, &candidates_p, typical_p, 1);
|
llama_sample_typical(ctx, &candidates_p, typical_p, 1);
|
||||||
|
@ -343,7 +337,8 @@ struct llama_server_context
|
||||||
const size_t tmp = word.size() + last_token_size;
|
const size_t tmp = word.size() + last_token_size;
|
||||||
const size_t from_pos = text.size() > tmp ? text.size() - tmp : 0;
|
const size_t from_pos = text.size() > tmp ? text.size() - tmp : 0;
|
||||||
pos = text.find(word, from_pos);
|
pos = text.find(word, from_pos);
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
pos = find_partial_stop_string(word, text);
|
pos = find_partial_stop_string(word, text);
|
||||||
}
|
}
|
||||||
if (pos != std::string::npos &&
|
if (pos != std::string::npos &&
|
||||||
|
@ -855,7 +850,8 @@ int main(int argc, char **argv)
|
||||||
res.set_content(
|
res.set_content(
|
||||||
data.dump(llama.json_indent, ' ', false, json::error_handler_t::replace),
|
data.dump(llama.json_indent, ' ', false, json::error_handler_t::replace),
|
||||||
"application/json");
|
"application/json");
|
||||||
} else {
|
}
|
||||||
|
else {
|
||||||
const auto chunked_content_provider = [&](size_t, DataSink& sink) {
|
const auto chunked_content_provider = [&](size_t, DataSink& sink) {
|
||||||
size_t sent_count = 0;
|
size_t sent_count = 0;
|
||||||
|
|
||||||
|
@ -955,9 +951,11 @@ int main(int argc, char **argv)
|
||||||
char buf[BUFSIZ];
|
char buf[BUFSIZ];
|
||||||
try {
|
try {
|
||||||
std::rethrow_exception(std::move(ep));
|
std::rethrow_exception(std::move(ep));
|
||||||
} catch (std::exception &e) {
|
}
|
||||||
|
catch (std::exception& e) {
|
||||||
snprintf(buf, sizeof(buf), fmt, e.what());
|
snprintf(buf, sizeof(buf), fmt, e.what());
|
||||||
} catch (...) {
|
}
|
||||||
|
catch (...) {
|
||||||
snprintf(buf, sizeof(buf), fmt, "Unknown Exception");
|
snprintf(buf, sizeof(buf), fmt, "Unknown Exception");
|
||||||
}
|
}
|
||||||
res.set_content(buf, "text/plain");
|
res.set_content(buf, "text/plain");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue