fix unexpected behavior when multiple requests are canceled

This commit is contained in:
FSSRepo 2023-10-09 08:14:42 -04:00
parent 8d3681ddbe
commit 6a2e064d68
2 changed files with 2 additions and 4 deletions

View file

@ -131,7 +131,6 @@ function updateView() {
async function call_llama(options) { async function call_llama(options) {
try { try {
controller = new AbortController(); controller = new AbortController();
signal = controller.signal;
const response = await fetch("/completion", { const response = await fetch("/completion", {
method: "POST", method: "POST",
body: JSON.stringify(options), body: JSON.stringify(options),
@ -139,7 +138,7 @@ async function call_llama(options) {
"Content-Type": "application/json", "Content-Type": "application/json",
Accept: "text/event-stream", Accept: "text/event-stream",
}, },
signal: signal signal: controller.signal
}); });
const reader = response.body.getReader(); const reader = response.body.getReader();
const decoder = new TextDecoder(); const decoder = new TextDecoder();

View file

@ -310,6 +310,7 @@ struct server_parallel_context {
slot.command = NONE; slot.command = NONE;
slot.n_prompt = 0; slot.n_prompt = 0;
slot.n_tokens_predicted = 0; slot.n_tokens_predicted = 0;
slot.sampled_tokens.clear();
continue; continue;
} }
@ -346,8 +347,6 @@ struct server_parallel_context {
// do not prepend BOS because we have a system prompt! // do not prepend BOS because we have a system prompt!
std::vector<llama_token> tokens_prompt; std::vector<llama_token> tokens_prompt;
tokens_prompt = ::llama_tokenize(ctx, slot.prompt, false); tokens_prompt = ::llama_tokenize(ctx, slot.prompt, false);
slot.n_tokens_predicted = 0;
slot.sampled_tokens.clear();
for (size_t i = 0; i < tokens_prompt.size(); ++i) { for (size_t i = 0; i < tokens_prompt.size(); ++i) {
batch.token [batch.n_tokens] = tokens_prompt[i]; batch.token [batch.n_tokens] = tokens_prompt[i];