Spaces to 4 and other code style cleanup. Notes in README.

2023-06-09 04:47:18 -04:00 · 2023-06-09 04:47:18 -04:00 · a9c34779f6
commit a9c34779f6
parent ccd85e0a6b
2 changed files with 842 additions and 842 deletions
--- a/examples/server/README.md
+++ b/examples/server/README.md
@ -23,6 +23,8 @@ Command line options:

 ## Quick Start

+**Note:** The server is not built by default. Make sure to add `LLAMA_BUILD_SERVER=ON` to your CMake command.
+
 To get started right away, run the following command, making sure to use the correct path for the model you have:

 ### Unix-based systems (Linux, macOS, etc.):
@ -99,7 +101,7 @@ node .

    `top_p`: Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P (default: 0.9).

-    `n_predict`: Set the number of tokens to predict when generating text (default: 128, -1 = infinity).
+    `n_predict`: Set the number of tokens to predict when generating text. **Note:** May exceed the the limit slightly if the last token is a partial multibyte character.  (default: 128, -1 = infinity).

    `n_keep`: Specify the number of tokens from the initial prompt to retain when the model resets its internal context.
    By default, this value is set to 0 (meaning no tokens are kept). Use `-1` to retain all tokens from the initial prompt.
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -282,24 +282,18 @@ struct llama_server_context
            {
                // Greedy sampling
                id = llama_sample_token_greedy(ctx, &candidates_p);
-      }
-      else
-      {
+            } else {
                if (mirostat == 1)
                {
                    static float mirostat_mu = 2.0f * mirostat_tau;
                    const int mirostat_m = 100;
                    llama_sample_temperature(ctx, &candidates_p, temp);
                    id = llama_sample_token_mirostat(ctx, &candidates_p, mirostat_tau, mirostat_eta, mirostat_m, &mirostat_mu);
-        }
-        else if (mirostat == 2)
-        {
+                } else if (mirostat == 2) {
                    static float mirostat_mu = 2.0f * mirostat_tau;
                    llama_sample_temperature(ctx, &candidates_p, temp);
                    id = llama_sample_token_mirostat_v2(ctx, &candidates_p, mirostat_tau, mirostat_eta, &mirostat_mu);
-        }
-        else
-        {
+                } else {
                    // Temperature sampling
                    llama_sample_tail_free(ctx, &candidates_p, tfs_z, 1);
                    llama_sample_typical(ctx, &candidates_p, typical_p, 1);
@ -343,7 +337,8 @@ struct llama_server_context
                const size_t tmp = word.size() + last_token_size;
                const size_t from_pos = text.size() > tmp ? text.size() - tmp : 0;
                pos = text.find(word, from_pos);
-        } else {
+            }
+            else {
                pos = find_partial_stop_string(word, text);
            }
            if (pos != std::string::npos &&
@ -855,7 +850,8 @@ int main(int argc, char **argv)
            res.set_content(
                data.dump(llama.json_indent, ' ', false, json::error_handler_t::replace),
                "application/json");
-      } else {
+        }
+        else {
            const auto chunked_content_provider = [&](size_t, DataSink& sink) {
                size_t sent_count = 0;

@ -955,9 +951,11 @@ int main(int argc, char **argv)
        char buf[BUFSIZ];
        try {
            std::rethrow_exception(std::move(ep));
-      } catch (std::exception &e) {
+        }
+        catch (std::exception& e) {
            snprintf(buf, sizeof(buf), fmt, e.what());
-      } catch (...) {
+        }
+        catch (...) {
            snprintf(buf, sizeof(buf), fmt, "Unknown Exception");
        }
        res.set_content(buf, "text/plain");