fix unexpected behavior when multiple requests are canceled
This commit is contained in:
parent
8d3681ddbe
commit
6a2e064d68
2 changed files with 2 additions and 4 deletions
|
@ -131,7 +131,6 @@ function updateView() {
|
|||
async function call_llama(options) {
|
||||
try {
|
||||
controller = new AbortController();
|
||||
signal = controller.signal;
|
||||
const response = await fetch("/completion", {
|
||||
method: "POST",
|
||||
body: JSON.stringify(options),
|
||||
|
@ -139,7 +138,7 @@ async function call_llama(options) {
|
|||
"Content-Type": "application/json",
|
||||
Accept: "text/event-stream",
|
||||
},
|
||||
signal: signal
|
||||
signal: controller.signal
|
||||
});
|
||||
const reader = response.body.getReader();
|
||||
const decoder = new TextDecoder();
|
||||
|
|
|
@ -310,6 +310,7 @@ struct server_parallel_context {
|
|||
slot.command = NONE;
|
||||
slot.n_prompt = 0;
|
||||
slot.n_tokens_predicted = 0;
|
||||
slot.sampled_tokens.clear();
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -346,8 +347,6 @@ struct server_parallel_context {
|
|||
// do not prepend BOS because we have a system prompt!
|
||||
std::vector<llama_token> tokens_prompt;
|
||||
tokens_prompt = ::llama_tokenize(ctx, slot.prompt, false);
|
||||
slot.n_tokens_predicted = 0;
|
||||
slot.sampled_tokens.clear();
|
||||
|
||||
for (size_t i = 0; i < tokens_prompt.size(); ++i) {
|
||||
batch.token [batch.n_tokens] = tokens_prompt[i];
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue