Eliminate tokenizes post, add option "special" to tokenize

Eliminate tab compression from modified files.
2023-10-23 11:48:37 -04:00 · 2023-10-23 11:48:37 -04:00 · 4150e74d04
commit 4150e74d04
parent 5872e4f4da
2 changed files with 1762 additions and 1776 deletions
--- a/examples/server/README.md
+++ b/examples/server/README.md
@ -222,18 +222,12 @@ node index.js

    *Options:*

+    `special`: Boolean indicating if special tokens should be tokenized (default false)
+
    `content`: Set the text to tokenize.

    Note that the special `BOS` token is not added in front of the text and also a space character is not inserted automatically as it is for `/completion`.

-   **POST** `/tokenizes`: Tokenize a given text with special tokens.
-
-    *Options:*
-
-    `content`: Set the text to tokenize with special tokens.
-
-    Note that the special `BOS` token is not added in front of the text and also a space character is not inserted automatically as it is for `/completion`.
-
 -   **POST** `/detokenize`: Convert tokens to text.

    *Options:*
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -176,6 +176,7 @@ struct slot_params
 {
    bool stream       = true;
    bool cache_prompt = false; // remember the prompt to avoid reprocessing all prompt
+    bool special      = false; // use in tokenizer call to tokenize special tokens

    uint32_t seed      = -1; // RNG seed
    int32_t  n_keep    =  0; // number of tokens to keep from initial prompt
@ -747,6 +748,7 @@ struct llama_server_context

        slot->params.stream           = json_value(data, "stream",            false);
        slot->params.cache_prompt     = json_value(data, "cache_prompt",      false);
+        slot->params.special          = json_value(data, "special",           false);
        slot->params.n_predict        = json_value(data, "n_predict",         default_params.n_predict);
        slot->sparams.top_k           = json_value(data, "top_k",             default_sparams.top_k);
        slot->sparams.top_p           = json_value(data, "top_p",             default_sparams.top_p);
@ -2494,22 +2496,12 @@ int main(int argc, char **argv)
    svr.Post("/tokenize", [&llama](const httplib::Request &req, httplib::Response &res)
            {
                const json body = json::parse(req.body);
-		std::vector<llama_token> tokens;
-		if (body.count("content") != 0)
-		{
-		    tokens = llama.tokenize(body["content"], false);
-		}
-		const json data = format_tokenizer_response(tokens);
-		return res.set_content(data.dump(), "application/json");
-	    });
+                bool special = json_value(body, "special", false);

-    svr.Post("/tokenizes", [&llama](const httplib::Request &req, httplib::Response &res)
-	    {
-		const json body = json::parse(req.body);
                std::vector<llama_token> tokens;
                if (body.count("content") != 0)
                {
-		    tokens = llama.tokenize(body["content"], false, true);
+                    tokens = llama.tokenize(body["content"], false, special);
                }
                const json data = format_tokenizer_response(tokens);
                return res.set_content(data.dump(), "application/json");