diff --git a/examples/server/README.md b/examples/server/README.md index 9a4b454ce..1c1181869 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -1,12 +1,15 @@ # llama.cpp/example/server This example demonstrates a simple HTTP API server and a simple web front end to interact with llama.cpp. -It is set only to run on the local machine using http://127.0.0.1:8080 but it can serve a local network or a public network if the router allows port forwarding. -To make the server accessible to other machines on the local or public network change the server username to '0.0.0.0'. + +It is set by default to run only on the local machine using http://127.0.0.1:8080 but it can serve a local network by using **--host "0.0.0.0". It will then run on http://0.0.0.0:8080 and to access the server on the host machine the url must be http://IPaddress:8080 So for example http://192.168.1.42:8080 will have the same effect as localhost:8080 when the servername is 127.0.0.1 -Command line options: +**Server Graceful Shut Down** + +To ensure that all processes terminate gracefully with memory deallocation, always shut the server down with **Ctrl+C** and wait for the message **ggml-metal-free: deallocating**. + # LLaMA.cpp HTTP Server Fast, lightweight, pure C/C++ HTTP server based on [httplib](https://github.com/yhirose/cpp-httplib), [nlohmann::json](https://github.com/nlohmann/json) and **llama.cpp**. diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 1dccb2194..46660ac6d 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -386,7 +386,7 @@ struct llama_metrics { // requires just `slots` and `params.n_ctx` as parameters static void kvgraphics(std::vector& slots) { - int max_length = 128; + int max_length = 144; int num_blocks = slots.size(); size_t slot_cache_size = slots[0].n_ctx; bool cls_flag = true; // this flag only prevents repeated cls inside one call @@ -411,7 +411,7 @@ static void kvgraphics(std::vector& slots) { printf("\033[2J"); cls_flag = false; } - printf("\033[1;0H\033[K**************************\n\033[KKVcache occupancy by slot:\n\033[K**************************\n"); + printf("\033[1;0H\033[K***************************************\n\033[KLLAMA SERVER KVcache occupancy by slot:\n\033[K***************************************\n"); // we can know and control how many lines of output we are printing so just start below that and fix the graphics location printf("\033[%d;0H", 5); @@ -3198,7 +3198,7 @@ int main(int argc, char **argv) if (received_api_key != cut_api) { LOG("%s != %s and length left = %zu, length right = %zu\n", received_api_key.c_str(), cut_api.c_str(),received_api_key.size(), cut_api.size()); } else if (received_api_key == cut_api) { - LOG("%s = %s FOUND IT!!!\n", received_api_key.c_str(), cut_api.c_str()); + LOG("%s = %s Found matching api key.\n", received_api_key.c_str(), cut_api.c_str()); return true; } }