From eb082012278036b020f117b69545b85b5fa12870 Mon Sep 17 00:00:00 2001 From: FSSRepo Date: Fri, 13 Oct 2023 14:28:06 -0400 Subject: [PATCH] add changes to README.md --- examples/server/README.md | 8 ++++++++ examples/server/server.cpp | 8 ++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/examples/server/README.md b/examples/server/README.md index 8a079ae26..e28e5845b 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -24,6 +24,8 @@ Command line options: - `--port`: Set the port to listen. Default: `8080`. - `--path`: path from which to serve static files (default examples/server/public) - `--embedding`: Enable embedding extraction, Default: disabled. +- `-np N`, `--parallel N`: Set the number of slots for process requests (default: 1) +- `-cb`, `--cont-batching`: enable continuous batching (a.k.a dynamic batching) (default: disabled) ## Build @@ -158,6 +160,12 @@ node index.js `n_probs`: If greater than 0, the response also contains the probabilities of top N tokens for each generated token (default: 0) + `slot_id`: Assign the completion task to an specific slot. If is -1 the task will be assigned to a Idle slot (default: -1) + + `cache_prompt`: Save the prompt and generation for avoid reprocess entire prompt if a part of this isn't change (default: false) + + `system_prompt`: Change the system prompt (initial prompt of all slots), this is useful for chat applications. + - **POST** `/tokenize`: Tokenize a given text. *Options:* diff --git a/examples/server/server.cpp b/examples/server/server.cpp index d839d6ce5..0741ebf11 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -78,8 +78,8 @@ enum slot_command { struct slot_params { bool stream = true; - uint32_t seed = -1; // RNG seed - int32_t n_predict = -1; // new tokens to predict + uint32_t seed = -1; // RNG seed + int32_t n_predict = -1; // new tokens to predict std::string grammar = ""; // optional BNF-like grammar to constrain sampling bool cache_prompt = false; // remember a the prompt to avoid reprocessing all prompt std::vector antiprompt; @@ -563,7 +563,7 @@ struct llama_server_context } void processSystemPromptData(json sys_props) { - system_prompt = sys_props.value("system_prompt", ""); + system_prompt = sys_props.value("prompt", ""); user_name = sys_props.value("anti_prompt", ""); assistant_name = sys_props.value("assistant_name", ""); notifySystemPromptChanged(); @@ -872,7 +872,7 @@ struct llama_server_context return true; } - // context shift + // context shift takes effect only when there is a single slot if(slots.size() == 1) { llama_client_slot slot = slots[0]; if (slot.cache_tokens.size() >= (size_t)n_ctx)