diff --git a/examples/server-parallel/README.md b/examples/server-parallel/README.md index 3ac8439b5..1ffd089b0 100644 --- a/examples/server-parallel/README.md +++ b/examples/server-parallel/README.md @@ -20,4 +20,4 @@ server-parallel.exe -m models\7B\ggml-model.gguf --ctx_size 2048 -t 4 -ngl 33 -- The above command will start a server that by default listens on `127.0.0.1:8080`. You can consume the endpoints with Postman or NodeJS with axios library. You can visit the web front end at the same url. -# This example is a Proof of Concept, have bugs and unexpected behaivors \ No newline at end of file +# This example is a Proof of Concept, have bugs and unexpected behaivors diff --git a/examples/server-parallel/index.h b/examples/server-parallel/index.h index dfe62a327..f3a160292 100644 --- a/examples/server-parallel/index.h +++ b/examples/server-parallel/index.h @@ -152,4 +152,4 @@ const auto index_html = R"( -)"; \ No newline at end of file +)"; diff --git a/examples/server-parallel/server.cpp b/examples/server-parallel/server.cpp index fb1b28af6..d99849576 100644 --- a/examples/server-parallel/server.cpp +++ b/examples/server-parallel/server.cpp @@ -249,7 +249,7 @@ struct server_parallel_context { request_clean_kv = false; LOG_TEE("%s: clearing the KV cache\n", __func__); } - + // assign workload to the slots if (params.cont_batching || batch.n_tokens == 0) { for (llama_client_slot & slot : slots) { @@ -287,7 +287,7 @@ struct server_parallel_context { } } } - + if (batch.n_tokens == 0) { return true; } @@ -806,4 +806,4 @@ int main(int argc, char **argv) { return 1; } -} \ No newline at end of file +}