diff --git a/Llamaserver.py b/Llamaserver.py index dd5dba287..4969a2410 100644 --- a/Llamaserver.py +++ b/Llamaserver.py @@ -70,7 +70,7 @@ def send_request(q, question, event, count, num_requests): bar = make_progress_bar(bar, count, num_requests) q.task_done() elif response.status_code == 429 and not q.empty(): - event.set() + # event.set() print("Server return too many requests; back off!! Reset event.") else: print(f"Server responded with code {response.status_code}\n") diff --git a/examples/server/server.cpp b/examples/server/server.cpp index bb0c99587..7e0e571b7 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -324,7 +324,7 @@ static void kvgraphics(std::vector& slots, int cache_size) { std::string slot_symbol3 = ""; #ifdef DEBUG - return; // do not display graphics when in debug build + return; // do not display graphics when in debug build (doesn't seem to work) #endif // return if empty @@ -374,7 +374,7 @@ static void kvgraphics(std::vector& slots, int cache_size) { } printf(" %4zu/%5zu %2d %s %s %s\n", slots[i].cache_tokens.size(), slot_cache_size, slots[i].id, slot_symbol1.c_str(), slot_symbol2.c_str(), slot_symbol3.c_str()); } - printf("\n\033[%dJ", 0); + printf("\n\033[%dJ", num_blocks); // move cursor to end of cache display } struct llama_server_context