minor updates and TCPshellscript
This commit is contained in:
parent
d2f97227ba
commit
7f0d8987eb
6 changed files with 77 additions and 16 deletions
|
@ -9,12 +9,15 @@ def print_dict(data):
|
||||||
for k, v in data.items():
|
for k, v in data.items():
|
||||||
if isinstance(v, dict):
|
if isinstance(v, dict):
|
||||||
print_dict(v)
|
print_dict(v)
|
||||||
elif isinstance(v, list):
|
|
||||||
for entry in v:
|
|
||||||
print_dict(entry)
|
|
||||||
elif k == "content":
|
elif k == "content":
|
||||||
print(f"Key: {k:>30}: {v}")
|
print(f"Key: {k:>30}: {v}")
|
||||||
return
|
return
|
||||||
|
elif isinstance(data, list):
|
||||||
|
for entry in v:
|
||||||
|
print_dict(entry)
|
||||||
|
elif isinstance(data, str):
|
||||||
|
print(f"Incoming string is {data}.\n")
|
||||||
|
return
|
||||||
|
|
||||||
def print_response(text):
|
def print_response(text):
|
||||||
print(text)
|
print(text)
|
||||||
|
|
|
@ -17,6 +17,7 @@ else()
|
||||||
add_subdirectory(batched-bench)
|
add_subdirectory(batched-bench)
|
||||||
add_subdirectory(beam-search)
|
add_subdirectory(beam-search)
|
||||||
add_subdirectory(benchmark)
|
add_subdirectory(benchmark)
|
||||||
|
add_subdirectory(cmap-example)
|
||||||
add_subdirectory(convert-llama2c-to-ggml)
|
add_subdirectory(convert-llama2c-to-ggml)
|
||||||
add_subdirectory(embedding)
|
add_subdirectory(embedding)
|
||||||
add_subdirectory(finetune)
|
add_subdirectory(finetune)
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
set(TARGET kvcacheviz)
|
set(TARGET TCPshellscript)
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
|
||||||
add_executable(${TARGET} kvcacheviz.cpp)
|
add_executable(${TARGET} TCPshellscript.cpp)
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE common llava ${CMAKE_THREAD_LIBS_INIT})
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
|
|
46
examples/cmap-example/TCPshellscript.cpp
Normal file
46
examples/cmap-example/TCPshellscript.cpp
Normal file
|
@ -0,0 +1,46 @@
|
||||||
|
// Code to run the terminal shell command `lsof -i :8080' from C++
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <fstream>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
static string get_port_usage(int port) {
|
||||||
|
// Build the command string
|
||||||
|
string command = "lsof -i :" + to_string(port);
|
||||||
|
|
||||||
|
// Create a pipe for capturing output
|
||||||
|
FILE *pipe = popen(command.c_str(), "r");
|
||||||
|
if (!pipe) {
|
||||||
|
cerr << "Error opening pipe" << endl;
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the output from the pipe
|
||||||
|
string output;
|
||||||
|
char buffer[128];
|
||||||
|
while (fgets(buffer, sizeof(buffer), pipe) != nullptr) {
|
||||||
|
output += buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close the pipe
|
||||||
|
pclose(pipe);
|
||||||
|
|
||||||
|
return output;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
int port = 8080;
|
||||||
|
string output = get_port_usage(port);
|
||||||
|
|
||||||
|
if (output.empty()) {
|
||||||
|
cerr << "Error getting port " << port << " usage" << endl;
|
||||||
|
} else {
|
||||||
|
cout << "Port " << port << " usage:" << endl << output << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -508,7 +508,7 @@ struct llama_server_context
|
||||||
default_generation_settings_for_props = get_formatted_generation(slots.front());
|
default_generation_settings_for_props = get_formatted_generation(slots.front());
|
||||||
default_generation_settings_for_props["seed"] = -1;
|
default_generation_settings_for_props["seed"] = -1;
|
||||||
|
|
||||||
batch = llama_batch_init(n_ctx_slot, 0, params.n_parallel); // this works fine with the slot context and saves VRAM
|
batch = llama_batch_init(n_ctx, 0, params.n_parallel); // this works fine with the slot context and saves VRAM
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<llama_token> tokenize(const json & json_prompt, bool add_bos) const
|
std::vector<llama_token> tokenize(const json & json_prompt, bool add_bos) const
|
||||||
|
@ -567,9 +567,13 @@ struct llama_server_context
|
||||||
|
|
||||||
for (llama_client_slot & slot : slots)
|
for (llama_client_slot & slot : slots)
|
||||||
{
|
{
|
||||||
|
if (slot.state == IDLE && slot.command != LOAD_PROMPT) {
|
||||||
|
LOG_TEE("Hijacking the first available slot %d\n", slot.id);
|
||||||
|
return &slot;
|
||||||
|
}
|
||||||
if (slot.id == id && slot.available())
|
if (slot.id == id && slot.available())
|
||||||
{
|
{
|
||||||
LOG_TEE("Using available slot called by id: %d", slot.id);
|
LOG_TEE("Using if-based available slot called by id: %d", slot.id);
|
||||||
return &slot;
|
return &slot;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -577,7 +581,7 @@ struct llama_server_context
|
||||||
{
|
{
|
||||||
last_used = &slot;
|
last_used = &slot;
|
||||||
t_last = slot.t_last_used;
|
t_last = slot.t_last_used;
|
||||||
LOG_TEE("reusing earliest released slot id: %d\n", slot.id);
|
LOG_TEE("Using time-based slot id: %d\n", slot.id);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1441,7 +1445,8 @@ struct llama_server_context
|
||||||
switch (task.type)
|
switch (task.type)
|
||||||
{
|
{
|
||||||
case TASK_TYPE_COMPLETION: {
|
case TASK_TYPE_COMPLETION: {
|
||||||
llama_client_slot *slot = get_slot(json_value(task.data, "slot_id", -1));
|
printf("Task data %d.\n", task.id);
|
||||||
|
llama_client_slot *slot = get_slot(json_value(task.data, "slot_id", -1)); // returns nullptr if no slot available
|
||||||
if (slot == nullptr)
|
if (slot == nullptr)
|
||||||
{
|
{
|
||||||
// if no slot is available, we defer this task for processing later
|
// if no slot is available, we defer this task for processing later
|
||||||
|
@ -2006,6 +2011,7 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms,
|
||||||
printf(" --mmproj MMPROJ_FILE path to a multimodal projector file for LLaVA.\n");
|
printf(" --mmproj MMPROJ_FILE path to a multimodal projector file for LLaVA.\n");
|
||||||
printf(" --log-disable disables logging to a file.\n");
|
printf(" --log-disable disables logging to a file.\n");
|
||||||
printf(" --slots-endpoint-disable disables slots monitoring endpoint.\n");
|
printf(" --slots-endpoint-disable disables slots monitoring endpoint.\n");
|
||||||
|
printf(" -skvg, --show-graphics enable graphics displaying kvcache occupancy (default: false)");
|
||||||
printf("\n");
|
printf("\n");
|
||||||
printf(" -n, --n-predict maximum tokens to predict (default: %d)\n", params.n_predict);
|
printf(" -n, --n-predict maximum tokens to predict (default: %d)\n", params.n_predict);
|
||||||
printf(" --override-kv KEY=TYPE:VALUE\n");
|
printf(" --override-kv KEY=TYPE:VALUE\n");
|
||||||
|
@ -2716,7 +2722,7 @@ int main(int argc, char **argv)
|
||||||
svr.Get("/slots", [&](const httplib::Request&, httplib::Response& res) {
|
svr.Get("/slots", [&](const httplib::Request&, httplib::Response& res) {
|
||||||
json slots;
|
json slots;
|
||||||
for (llama_client_slot & slot : llama.slots) {
|
for (llama_client_slot & slot : llama.slots) {
|
||||||
json slot_data = llama.get_formated_generation(slot);
|
json slot_data = llama.get_formatted_generation(slot);
|
||||||
slot_data["id"] = slot.id;
|
slot_data["id"] = slot.id;
|
||||||
slot_data["task_id"] = slot.task_id;
|
slot_data["task_id"] = slot.task_id;
|
||||||
slot_data["state"] = slot.state;
|
slot_data["state"] = slot.state;
|
||||||
|
@ -2902,6 +2908,7 @@ int main(int argc, char **argv)
|
||||||
json data = json::parse(req.body);
|
json data = json::parse(req.body);
|
||||||
const int task_id = llama.queue_tasks.get_new_id();
|
const int task_id = llama.queue_tasks.get_new_id();
|
||||||
llama.queue_results.add_waiting_task_id(task_id);
|
llama.queue_results.add_waiting_task_id(task_id);
|
||||||
|
LOG_TEE("Initiated new task %d.\n", task_id);
|
||||||
llama.request_completion(task_id, data, false, false, -1);
|
llama.request_completion(task_id, data, false, false, -1);
|
||||||
if (!json_value(data, "stream", false)) {
|
if (!json_value(data, "stream", false)) {
|
||||||
std::string completion_text;
|
std::string completion_text;
|
||||||
|
|
|
@ -53,7 +53,7 @@ enum task_type {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct task_server {
|
struct task_server {
|
||||||
int id = -1; // to be filled by llama_server_queue
|
int id = -1; // for any instance, task id is not assigned yet; to be filled by llama_server_queue
|
||||||
int target_id;
|
int target_id;
|
||||||
task_type type;
|
task_type type;
|
||||||
json data;
|
json data;
|
||||||
|
@ -162,6 +162,9 @@ template <typename T>
|
||||||
static T json_value(const json &body, const std::string &key, const T &default_value)
|
static T json_value(const json &body, const std::string &key, const T &default_value)
|
||||||
{
|
{
|
||||||
// Fallback null to default value
|
// Fallback null to default value
|
||||||
|
if (body.contains(key) && !body.at(key).is_null()) {
|
||||||
|
LOG_TEE("Body at %s in %d\n", key.c_str(), int(body.at(key)));
|
||||||
|
}
|
||||||
return body.contains(key) && !body.at(key).is_null()
|
return body.contains(key) && !body.at(key).is_null()
|
||||||
? body.value(key, default_value)
|
? body.value(key, default_value)
|
||||||
: default_value;
|
: default_value;
|
||||||
|
@ -238,6 +241,7 @@ struct llama_server_queue {
|
||||||
task.id = id++;
|
task.id = id++;
|
||||||
}
|
}
|
||||||
queue_tasks.push_back(std::move(task));
|
queue_tasks.push_back(std::move(task));
|
||||||
|
//LOG_TEE("Queue now has %2zu members.\n", queue_tasks.size());
|
||||||
condition_tasks.notify_one();
|
condition_tasks.notify_one();
|
||||||
return task.id;
|
return task.id;
|
||||||
}
|
}
|
||||||
|
@ -246,11 +250,13 @@ struct llama_server_queue {
|
||||||
void defer(task_server task) {
|
void defer(task_server task) {
|
||||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
queue_tasks_deferred.push_back(std::move(task));
|
queue_tasks_deferred.push_back(std::move(task));
|
||||||
|
LOG_TEE("Deferred task queue now has %3zu members.\n", queue_tasks_deferred.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the next id for creating a new task
|
// Get the next id for creating a new task
|
||||||
int get_new_id() {
|
int get_new_id() {
|
||||||
std::unique_lock<std::mutex> lock(mutex_tasks);
|
std::unique_lock<std::mutex> lock(mutex_tasks);
|
||||||
|
LOG_TEE("New task id returned with value %d.\n", id);
|
||||||
return id++;
|
return id++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -293,7 +299,7 @@ struct llama_server_queue {
|
||||||
running = true;
|
running = true;
|
||||||
while (true) {
|
while (true) {
|
||||||
// new task arrived
|
// new task arrived
|
||||||
LOG_VERBOSE("have new task", {});
|
LOG_VERBOSE("have new task number %d.\n", {});
|
||||||
{
|
{
|
||||||
while (true)
|
while (true)
|
||||||
{
|
{
|
||||||
|
@ -305,10 +311,8 @@ struct llama_server_queue {
|
||||||
task_server task = queue_tasks.front();
|
task_server task = queue_tasks.front();
|
||||||
queue_tasks.erase(queue_tasks.begin());
|
queue_tasks.erase(queue_tasks.begin());
|
||||||
lock.unlock();
|
lock.unlock();
|
||||||
LOG_VERBOSE("callback_new_task", {});
|
|
||||||
callback_new_task(task);
|
callback_new_task(task);
|
||||||
}
|
}
|
||||||
LOG_VERBOSE("callback_all_task_finished", {});
|
|
||||||
// process and update all the multitasks
|
// process and update all the multitasks
|
||||||
auto queue_iterator = queue_multitasks.begin();
|
auto queue_iterator = queue_multitasks.begin();
|
||||||
while (queue_iterator != queue_multitasks.end())
|
while (queue_iterator != queue_multitasks.end())
|
||||||
|
@ -326,7 +330,7 @@ struct llama_server_queue {
|
||||||
++queue_iterator;
|
++queue_iterator;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// all tasks in the current loop is finished
|
// all tasks in the current loop are finished
|
||||||
callback_all_task_finished();
|
callback_all_task_finished();
|
||||||
}
|
}
|
||||||
LOG_VERBOSE("wait for new task", {});
|
LOG_VERBOSE("wait for new task", {});
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue