Renaming some vars
This commit is contained in:
parent
f51554180a
commit
5d61ae8d2a
2 changed files with 9 additions and 52 deletions
|
@ -253,8 +253,8 @@ struct server_slot {
|
|||
std::string stdout_reset;
|
||||
std::string stderr_target;
|
||||
std::string stderr_reset;
|
||||
double t_token = t_prompt_processing / num_prompt_tokens_processed;
|
||||
double n_tokens_second = 1e3 / t_prompt_processing * num_prompt_tokens_processed;
|
||||
double t_token = t_prompt_processing / n_prompt_tokens_processed;
|
||||
double n_tokens_second = 1e3 / t_prompt_processing * n_prompt_tokens_processed;
|
||||
//printf("\033[72;0H]");
|
||||
sprintf(buffer, "prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)",
|
||||
t_prompt_processing, n_prompt_tokens_processed,
|
||||
|
@ -328,7 +328,7 @@ struct server_metrics {
|
|||
|
||||
// experimental/diagostic graphic to show kvcache status
|
||||
// requires just `slots` and `params.n_ctx` as parameters
|
||||
static void kvgraphics(std::vector<llama_client_slot>& slots) {
|
||||
static void kvgraphics(std::vector<server_slot>& slots) {
|
||||
|
||||
int max_length = 144;
|
||||
int num_blocks = slots.size();
|
||||
|
@ -604,7 +604,7 @@ struct llama_server_context
|
|||
// the logic seems wrong in this function
|
||||
// why should there be an id in a task matching a slot.id before a slot has been assigned?
|
||||
// most commonly id = -1 so we deal with that first rather than the specified id > 0
|
||||
llama_client_slot* get_slot(int id) {
|
||||
server_slot* get_slot(int id) {
|
||||
int64_t t_last = ggml_time_us();
|
||||
server_slot *last_used = nullptr;
|
||||
|
||||
|
@ -1168,7 +1168,7 @@ struct llama_server_context
|
|||
queue_results.send(res);
|
||||
}
|
||||
|
||||
json get_formatted_generation(llama_client_slot &slot)
|
||||
json get_formatted_generation(server_slot &slot)
|
||||
{
|
||||
const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
|
||||
const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
|
||||
|
@ -1270,7 +1270,7 @@ struct llama_server_context
|
|||
{"model", params.model_alias},
|
||||
{"tokens_predicted", slot.n_decoded},
|
||||
{"tokens_evaluated", slot.n_prompt_tokens},
|
||||
{"generation_settings", get_formated_generation(slot)},
|
||||
{"generation_settings", get_formatted_generation(slot)},
|
||||
{"prompt", slot.prompt},
|
||||
{"truncated", slot.truncated},
|
||||
{"stopped_eos", slot.stopped_eos},
|
||||
|
@ -1559,7 +1559,7 @@ struct llama_server_context
|
|||
int n_idle_slots = 0;
|
||||
int n_processing_slots = 0;
|
||||
|
||||
for (llama_client_slot &slot: slots) {
|
||||
for (server_slot &slot: slots) {
|
||||
json slot_data = get_formatted_generation(slot);
|
||||
slot_data["id"] = slot.id;
|
||||
slot_data["task_id"] = slot.task_id;
|
||||
|
|
|
@ -137,48 +137,6 @@ struct task_multi {
|
|||
std::vector<task_result> results{};
|
||||
};
|
||||
|
||||
// TODO: can become bool if we can't find use of more states; MAYBE there is a case for RESERVED to keep slots dedicated to chats?
|
||||
enum slot_state
|
||||
{
|
||||
IDLE,
|
||||
PROCESSING,
|
||||
};
|
||||
|
||||
enum slot_command
|
||||
{
|
||||
NONE,
|
||||
LOAD_PROMPT,
|
||||
RELEASE,
|
||||
};
|
||||
|
||||
struct slot_params
|
||||
{
|
||||
bool stream = true;
|
||||
bool cache_prompt = false; // remember the prompt to avoid reprocessing all prompt
|
||||
|
||||
uint32_t seed = -1; // RNG seed
|
||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||
int32_t n_predict = -1; // new tokens to predict
|
||||
|
||||
std::vector<std::string> antiprompt;
|
||||
|
||||
json input_prefix;
|
||||
json input_suffix;
|
||||
};
|
||||
|
||||
struct slot_image
|
||||
{
|
||||
int32_t id;
|
||||
|
||||
bool request_encode_image = false;
|
||||
float * image_embedding = nullptr;
|
||||
int32_t image_tokens = 0;
|
||||
|
||||
clip_image_u8 * img_data;
|
||||
|
||||
std::string prefix_prompt; // before of this image
|
||||
};
|
||||
|
||||
// completion token output with probabilities
|
||||
struct completion_token_output {
|
||||
struct token_prob
|
||||
|
@ -198,7 +156,6 @@ struct token_translator {
|
|||
std::string operator()(const completion_token_output &cto) const { return (*this)(cto.tok); }
|
||||
};
|
||||
|
||||
static inline void server_log(const char *level, const char *function, int line, const char *message, const nlohmann::ordered_json &extra) {
|
||||
static inline void server_log(
|
||||
const char *level,
|
||||
const char *function,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue