From b3afd6c86a324026a6206fce6d8a890b8696e5cf Mon Sep 17 00:00:00 2001 From: HanishKVC Date: Fri, 24 May 2024 23:16:55 +0530 Subject: [PATCH] SimpleChat:Add n_predict (equiv max_tokens) for llamacpp server The /completions endpoint of examples/server doesnt take max_tokens, instead it takes the internal n_predict, for now add the same on the client side, maybe later add max_tokens to /completions endpoint handling. --- examples/server/public_simplechat/readme.md | 4 ++++ examples/server/public_simplechat/simplechat.js | 1 + 2 files changed, 5 insertions(+) diff --git a/examples/server/public_simplechat/readme.md b/examples/server/public_simplechat/readme.md index 0a1c28131..585ece888 100644 --- a/examples/server/public_simplechat/readme.md +++ b/examples/server/public_simplechat/readme.md @@ -174,6 +174,10 @@ Set max_tokens to 1024, so that a relatively large previous reponse doesnt eat u available wrt next query-response. However dont forget that the server when started should also be started with a model context size of 1k or more, to be on safe side. + The /completions endpoint of examples/server doesnt take max_tokens, instead it takes the + internal n_predict, for now add the same here on the client side, maybe later add max_tokens + to /completions endpoint handling code on server side. + Frequency and presence penalty fields are set to 1.2 in the set of fields sent to server along with the user query. So that the model is partly set to try avoid repeating text in its response. diff --git a/examples/server/public_simplechat/simplechat.js b/examples/server/public_simplechat/simplechat.js index 973f6046e..0c48da879 100644 --- a/examples/server/public_simplechat/simplechat.js +++ b/examples/server/public_simplechat/simplechat.js @@ -578,6 +578,7 @@ class Me { "max_tokens": 1024, "frequency_penalty": 1.2, "presence_penalty": 1.2, + "n_predict": 1024 }; }