Server header and README.md
This commit is contained in:
parent
465ced3808
commit
02702d975d
2 changed files with 9 additions and 6 deletions
|
@ -1,12 +1,15 @@
|
|||
# llama.cpp/example/server
|
||||
|
||||
This example demonstrates a simple HTTP API server and a simple web front end to interact with llama.cpp.
|
||||
It is set only to run on the local machine using http://127.0.0.1:8080 but it can serve a local network or a public network if the router allows port forwarding.
|
||||
To make the server accessible to other machines on the local or public network change the server username to '0.0.0.0'.
|
||||
|
||||
It is set by default to run only on the local machine using http://127.0.0.1:8080 but it can serve a local network by using **--host "0.0.0.0".
|
||||
It will then run on http://0.0.0.0:8080 and to access the server on the host machine the url must be http://IPaddress:8080
|
||||
So for example http://192.168.1.42:8080 will have the same effect as localhost:8080 when the servername is 127.0.0.1
|
||||
|
||||
Command line options:
|
||||
**Server Graceful Shut Down**
|
||||
|
||||
To ensure that all processes terminate gracefully with memory deallocation, always shut the server down with **Ctrl+C** and wait for the message **ggml-metal-free: deallocating**.
|
||||
|
||||
# LLaMA.cpp HTTP Server
|
||||
|
||||
Fast, lightweight, pure C/C++ HTTP server based on [httplib](https://github.com/yhirose/cpp-httplib), [nlohmann::json](https://github.com/nlohmann/json) and **llama.cpp**.
|
||||
|
|
|
@ -386,7 +386,7 @@ struct llama_metrics {
|
|||
// requires just `slots` and `params.n_ctx` as parameters
|
||||
static void kvgraphics(std::vector<llama_client_slot>& slots) {
|
||||
|
||||
int max_length = 128;
|
||||
int max_length = 144;
|
||||
int num_blocks = slots.size();
|
||||
size_t slot_cache_size = slots[0].n_ctx;
|
||||
bool cls_flag = true; // this flag only prevents repeated cls inside one call
|
||||
|
@ -411,7 +411,7 @@ static void kvgraphics(std::vector<llama_client_slot>& slots) {
|
|||
printf("\033[2J");
|
||||
cls_flag = false;
|
||||
}
|
||||
printf("\033[1;0H\033[K**************************\n\033[KKVcache occupancy by slot:\n\033[K**************************\n");
|
||||
printf("\033[1;0H\033[K***************************************\n\033[KLLAMA SERVER KVcache occupancy by slot:\n\033[K***************************************\n");
|
||||
|
||||
// we can know and control how many lines of output we are printing so just start below that and fix the graphics location
|
||||
printf("\033[%d;0H", 5);
|
||||
|
@ -3198,7 +3198,7 @@ int main(int argc, char **argv)
|
|||
if (received_api_key != cut_api) {
|
||||
LOG("%s != %s and length left = %zu, length right = %zu\n", received_api_key.c_str(), cut_api.c_str(),received_api_key.size(), cut_api.size());
|
||||
} else if (received_api_key == cut_api) {
|
||||
LOG("%s = %s FOUND IT!!!\n", received_api_key.c_str(), cut_api.c_str());
|
||||
LOG("%s = %s Found matching api key.\n", received_api_key.c_str(), cut_api.c_str());
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue