From 7f0d8987ebeea309714155a04a80c4d36c51466f Mon Sep 17 00:00:00 2001 From: pudepiedj Date: Mon, 19 Feb 2024 12:14:23 +0000 Subject: [PATCH] minor updates and TCPshellscript --- Llamaserver.py | 9 +++-- examples/CMakeLists.txt | 1 + examples/cmap-example/CMakeLists.txt | 4 +-- examples/cmap-example/TCPshellscript.cpp | 46 ++++++++++++++++++++++++ examples/server/server.cpp | 17 ++++++--- examples/server/utils.hpp | 16 +++++---- 6 files changed, 77 insertions(+), 16 deletions(-) create mode 100644 examples/cmap-example/TCPshellscript.cpp diff --git a/Llamaserver.py b/Llamaserver.py index 2dba932f4..dd5dba287 100644 --- a/Llamaserver.py +++ b/Llamaserver.py @@ -9,11 +9,14 @@ def print_dict(data): for k, v in data.items(): if isinstance(v, dict): print_dict(v) - elif isinstance(v, list): + elif k == "content": + print(f"Key: {k:>30}: {v}") + return + elif isinstance(data, list): for entry in v: print_dict(entry) - elif k == "content": - print(f"Key: {k:>30}: {v}") + elif isinstance(data, str): + print(f"Incoming string is {data}.\n") return def print_response(text): diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index 653abc73a..7fb2e1e92 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -17,6 +17,7 @@ else() add_subdirectory(batched-bench) add_subdirectory(beam-search) add_subdirectory(benchmark) + add_subdirectory(cmap-example) add_subdirectory(convert-llama2c-to-ggml) add_subdirectory(embedding) add_subdirectory(finetune) diff --git a/examples/cmap-example/CMakeLists.txt b/examples/cmap-example/CMakeLists.txt index 6298b2c7e..d62ca26cf 100644 --- a/examples/cmap-example/CMakeLists.txt +++ b/examples/cmap-example/CMakeLists.txt @@ -1,6 +1,6 @@ -set(TARGET kvcacheviz) +set(TARGET TCPshellscript) include_directories(${CMAKE_CURRENT_SOURCE_DIR}) -add_executable(${TARGET} kvcacheviz.cpp) +add_executable(${TARGET} TCPshellscript.cpp) install(TARGETS ${TARGET} RUNTIME) target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT}) if (WIN32) diff --git a/examples/cmap-example/TCPshellscript.cpp b/examples/cmap-example/TCPshellscript.cpp new file mode 100644 index 000000000..12b46ec9b --- /dev/null +++ b/examples/cmap-example/TCPshellscript.cpp @@ -0,0 +1,46 @@ +// Code to run the terminal shell command `lsof -i :8080' from C++ + +#include +#include +#include +#include +#include + +using namespace std; + +static string get_port_usage(int port) { + // Build the command string + string command = "lsof -i :" + to_string(port); + + // Create a pipe for capturing output + FILE *pipe = popen(command.c_str(), "r"); + if (!pipe) { + cerr << "Error opening pipe" << endl; + return ""; + } + + // Read the output from the pipe + string output; + char buffer[128]; + while (fgets(buffer, sizeof(buffer), pipe) != nullptr) { + output += buffer; + } + + // Close the pipe + pclose(pipe); + + return output; +} + +int main() { + int port = 8080; + string output = get_port_usage(port); + + if (output.empty()) { + cerr << "Error getting port " << port << " usage" << endl; + } else { + cout << "Port " << port << " usage:" << endl << output << endl; + } + + return 0; +} diff --git a/examples/server/server.cpp b/examples/server/server.cpp index ec586e2ed..bb0c99587 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -508,7 +508,7 @@ struct llama_server_context default_generation_settings_for_props = get_formatted_generation(slots.front()); default_generation_settings_for_props["seed"] = -1; - batch = llama_batch_init(n_ctx_slot, 0, params.n_parallel); // this works fine with the slot context and saves VRAM + batch = llama_batch_init(n_ctx, 0, params.n_parallel); // this works fine with the slot context and saves VRAM } std::vector tokenize(const json & json_prompt, bool add_bos) const @@ -567,9 +567,13 @@ struct llama_server_context for (llama_client_slot & slot : slots) { + if (slot.state == IDLE && slot.command != LOAD_PROMPT) { + LOG_TEE("Hijacking the first available slot %d\n", slot.id); + return &slot; + } if (slot.id == id && slot.available()) { - LOG_TEE("Using available slot called by id: %d", slot.id); + LOG_TEE("Using if-based available slot called by id: %d", slot.id); return &slot; } @@ -577,7 +581,7 @@ struct llama_server_context { last_used = &slot; t_last = slot.t_last_used; - LOG_TEE("reusing earliest released slot id: %d\n", slot.id); + LOG_TEE("Using time-based slot id: %d\n", slot.id); break; } } @@ -1441,7 +1445,8 @@ struct llama_server_context switch (task.type) { case TASK_TYPE_COMPLETION: { - llama_client_slot *slot = get_slot(json_value(task.data, "slot_id", -1)); + printf("Task data %d.\n", task.id); + llama_client_slot *slot = get_slot(json_value(task.data, "slot_id", -1)); // returns nullptr if no slot available if (slot == nullptr) { // if no slot is available, we defer this task for processing later @@ -2006,6 +2011,7 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms, printf(" --mmproj MMPROJ_FILE path to a multimodal projector file for LLaVA.\n"); printf(" --log-disable disables logging to a file.\n"); printf(" --slots-endpoint-disable disables slots monitoring endpoint.\n"); + printf(" -skvg, --show-graphics enable graphics displaying kvcache occupancy (default: false)"); printf("\n"); printf(" -n, --n-predict maximum tokens to predict (default: %d)\n", params.n_predict); printf(" --override-kv KEY=TYPE:VALUE\n"); @@ -2716,7 +2722,7 @@ int main(int argc, char **argv) svr.Get("/slots", [&](const httplib::Request&, httplib::Response& res) { json slots; for (llama_client_slot & slot : llama.slots) { - json slot_data = llama.get_formated_generation(slot); + json slot_data = llama.get_formatted_generation(slot); slot_data["id"] = slot.id; slot_data["task_id"] = slot.task_id; slot_data["state"] = slot.state; @@ -2902,6 +2908,7 @@ int main(int argc, char **argv) json data = json::parse(req.body); const int task_id = llama.queue_tasks.get_new_id(); llama.queue_results.add_waiting_task_id(task_id); + LOG_TEE("Initiated new task %d.\n", task_id); llama.request_completion(task_id, data, false, false, -1); if (!json_value(data, "stream", false)) { std::string completion_text; diff --git a/examples/server/utils.hpp b/examples/server/utils.hpp index 0ee670dba..f016221a6 100644 --- a/examples/server/utils.hpp +++ b/examples/server/utils.hpp @@ -53,7 +53,7 @@ enum task_type { }; struct task_server { - int id = -1; // to be filled by llama_server_queue + int id = -1; // for any instance, task id is not assigned yet; to be filled by llama_server_queue int target_id; task_type type; json data; @@ -162,6 +162,9 @@ template static T json_value(const json &body, const std::string &key, const T &default_value) { // Fallback null to default value + if (body.contains(key) && !body.at(key).is_null()) { + LOG_TEE("Body at %s in %d\n", key.c_str(), int(body.at(key))); + } return body.contains(key) && !body.at(key).is_null() ? body.value(key, default_value) : default_value; @@ -238,6 +241,7 @@ struct llama_server_queue { task.id = id++; } queue_tasks.push_back(std::move(task)); + //LOG_TEE("Queue now has %2zu members.\n", queue_tasks.size()); condition_tasks.notify_one(); return task.id; } @@ -246,11 +250,13 @@ struct llama_server_queue { void defer(task_server task) { std::unique_lock lock(mutex_tasks); queue_tasks_deferred.push_back(std::move(task)); + LOG_TEE("Deferred task queue now has %3zu members.\n", queue_tasks_deferred.size()); } - // Get the next id for creating anew task + // Get the next id for creating a new task int get_new_id() { std::unique_lock lock(mutex_tasks); + LOG_TEE("New task id returned with value %d.\n", id); return id++; } @@ -293,7 +299,7 @@ struct llama_server_queue { running = true; while (true) { // new task arrived - LOG_VERBOSE("have new task", {}); + LOG_VERBOSE("have new task number %d.\n", {}); { while (true) { @@ -305,10 +311,8 @@ struct llama_server_queue { task_server task = queue_tasks.front(); queue_tasks.erase(queue_tasks.begin()); lock.unlock(); - LOG_VERBOSE("callback_new_task", {}); callback_new_task(task); } - LOG_VERBOSE("callback_all_task_finished", {}); // process and update all the multitasks auto queue_iterator = queue_multitasks.begin(); while (queue_iterator != queue_multitasks.end()) @@ -326,7 +330,7 @@ struct llama_server_queue { ++queue_iterator; } } - // all tasks in the current loop is finished + // all tasks in the current loop are finished callback_all_task_finished(); } LOG_VERBOSE("wait for new task", {});