fix spacing
This commit is contained in:
parent
e2992ea332
commit
51381f8f5d
1 changed files with 51 additions and 45 deletions
|
@ -33,8 +33,7 @@
|
||||||
|
|
||||||
using json = nlohmann::json;
|
using json = nlohmann::json;
|
||||||
|
|
||||||
struct server_params
|
struct server_params {
|
||||||
{
|
|
||||||
std::string hostname = "127.0.0.1";
|
std::string hostname = "127.0.0.1";
|
||||||
std::vector<std::string> api_keys;
|
std::vector<std::string> api_keys;
|
||||||
std::string public_path = "examples/server/public";
|
std::string public_path = "examples/server/public";
|
||||||
|
@ -177,8 +176,7 @@ struct server_slot {
|
||||||
|
|
||||||
generated_token_probs.clear();
|
generated_token_probs.clear();
|
||||||
|
|
||||||
for (slot_image & img : images)
|
for (slot_image & img : images) {
|
||||||
{
|
|
||||||
free(img.image_embedding);
|
free(img.image_embedding);
|
||||||
if (img.img_data) {
|
if (img.img_data) {
|
||||||
clip_image_u8_free(img.img_data);
|
clip_image_u8_free(img.img_data);
|
||||||
|
@ -190,19 +188,15 @@ struct server_slot {
|
||||||
}
|
}
|
||||||
|
|
||||||
bool has_budget(gpt_params &global_params) {
|
bool has_budget(gpt_params &global_params) {
|
||||||
if (params.n_predict == -1 && global_params.n_predict == -1)
|
if (params.n_predict == -1 && global_params.n_predict == -1) {
|
||||||
{
|
|
||||||
return true; // limitless
|
return true; // limitless
|
||||||
}
|
}
|
||||||
|
|
||||||
n_remaining = -1;
|
n_remaining = -1;
|
||||||
|
|
||||||
if (params.n_predict != -1)
|
if (params.n_predict != -1) {
|
||||||
{
|
|
||||||
n_remaining = params.n_predict - n_decoded;
|
n_remaining = params.n_predict - n_decoded;
|
||||||
}
|
} else if (global_params.n_predict != -1) {
|
||||||
else if (global_params.n_predict != -1)
|
|
||||||
{
|
|
||||||
n_remaining = global_params.n_predict - n_decoded;
|
n_remaining = global_params.n_predict - n_decoded;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -218,8 +212,7 @@ struct server_slot {
|
||||||
}
|
}
|
||||||
|
|
||||||
void add_token_string(const completion_token_output &token) {
|
void add_token_string(const completion_token_output &token) {
|
||||||
if (command == RELEASE)
|
if (command == RELEASE) {
|
||||||
{
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
cache_tokens.push_back(token.tok);
|
cache_tokens.push_back(token.tok);
|
||||||
|
@ -1227,9 +1220,7 @@ struct llama_server_context
|
||||||
const int n_embd = llama_n_embd(model);
|
const int n_embd = llama_n_embd(model);
|
||||||
if (!params.embedding)
|
if (!params.embedding)
|
||||||
{
|
{
|
||||||
LOG_WARNING("embedding disabled", {
|
LOG_WARNING("embedding disabled", {{"params.embedding", params.embedding}});
|
||||||
{"params.embedding", params.embedding},
|
|
||||||
});
|
|
||||||
res.result_json = json
|
res.result_json = json
|
||||||
{
|
{
|
||||||
{"embedding", std::vector<float>(n_embd, 0.0f)},
|
{"embedding", std::vector<float>(n_embd, 0.0f)},
|
||||||
|
@ -1329,7 +1320,17 @@ struct llama_server_context
|
||||||
}
|
}
|
||||||
|
|
||||||
const int n_embd = llama_n_embd(model);
|
const int n_embd = llama_n_embd(model);
|
||||||
llama_batch batch_img = { n_eval, nullptr, (img.image_embedding + i * n_embd), nullptr, nullptr, nullptr, nullptr, slot.n_past, 1, 0, };
|
llama_batch batch_img = {
|
||||||
|
n_eval,
|
||||||
|
nullptr,
|
||||||
|
(img.image_embedding + i * n_embd),
|
||||||
|
nullptr,
|
||||||
|
nullptr,
|
||||||
|
nullptr,
|
||||||
|
nullptr,
|
||||||
|
slot.n_past,
|
||||||
|
1, 0
|
||||||
|
};
|
||||||
if (llama_decode(ctx, batch_img))
|
if (llama_decode(ctx, batch_img))
|
||||||
{
|
{
|
||||||
LOG_TEE("%s : failed to eval image\n", __func__);
|
LOG_TEE("%s : failed to eval image\n", __func__);
|
||||||
|
@ -1714,8 +1715,13 @@ struct llama_server_context
|
||||||
const int n_block_size = n_left / 2;
|
const int n_block_size = n_left / 2;
|
||||||
const int erased_blocks = (slot.n_prompt_tokens - slot.params.n_keep - n_block_size) / n_block_size;
|
const int erased_blocks = (slot.n_prompt_tokens - slot.params.n_keep - n_block_size) / n_block_size;
|
||||||
|
|
||||||
std::vector<llama_token> new_tokens(prompt_tokens.begin(), prompt_tokens.begin() + slot.params.n_keep);
|
std::vector<llama_token> new_tokens(
|
||||||
new_tokens.insert(new_tokens.end(), prompt_tokens.begin() + slot.params.n_keep + erased_blocks * n_block_size, prompt_tokens.end());
|
prompt_tokens.begin(),
|
||||||
|
prompt_tokens.begin() + slot.params.n_keep);
|
||||||
|
new_tokens.insert(
|
||||||
|
new_tokens.end(),
|
||||||
|
prompt_tokens.begin() + slot.params.n_keep + erased_blocks * n_block_size,
|
||||||
|
prompt_tokens.end());
|
||||||
|
|
||||||
LOG_VERBOSE("input truncated", {
|
LOG_VERBOSE("input truncated", {
|
||||||
{"n_ctx", slot.n_ctx},
|
{"n_ctx", slot.n_ctx},
|
||||||
|
@ -1843,8 +1849,8 @@ struct llama_server_context
|
||||||
if (has_images && !ingest_images(slot, n_batch))
|
if (has_images && !ingest_images(slot, n_batch))
|
||||||
{
|
{
|
||||||
LOG_ERROR("failed processing images", {
|
LOG_ERROR("failed processing images", {
|
||||||
"slot_id", slot.id,
|
{"slot_id", slot.id},
|
||||||
"task_id", slot.task_id,
|
{"task_id", slot.task_id},
|
||||||
});
|
});
|
||||||
// FIXME @phymbert: to be properly tested
|
// FIXME @phymbert: to be properly tested
|
||||||
// early returning without changing the slot state will block the slot for ever
|
// early returning without changing the slot state will block the slot for ever
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue