Merge branch 'ggerganov:master' into master
This commit is contained in:
commit
eaf938889c
4 changed files with 8 additions and 9 deletions
|
@ -1090,22 +1090,21 @@ struct server_context {
|
||||||
size_t pos = std::min(slot.n_sent_text, slot.generated_text.size());
|
size_t pos = std::min(slot.n_sent_text, slot.generated_text.size());
|
||||||
|
|
||||||
const std::string str_test = slot.generated_text.substr(pos);
|
const std::string str_test = slot.generated_text.substr(pos);
|
||||||
bool is_stop_full = false;
|
bool send_text = true;
|
||||||
|
|
||||||
size_t stop_pos = slot.find_stopping_strings(str_test, token_str.size(), STOP_TYPE_FULL);
|
size_t stop_pos = slot.find_stopping_strings(str_test, token_str.size(), STOP_TYPE_FULL);
|
||||||
if (stop_pos != std::string::npos) {
|
if (stop_pos != std::string::npos) {
|
||||||
is_stop_full = true;
|
|
||||||
slot.generated_text.erase(
|
slot.generated_text.erase(
|
||||||
slot.generated_text.begin() + pos + stop_pos,
|
slot.generated_text.begin() + pos + stop_pos,
|
||||||
slot.generated_text.end());
|
slot.generated_text.end());
|
||||||
pos = std::min(slot.n_sent_text, slot.generated_text.size());
|
pos = std::min(slot.n_sent_text, slot.generated_text.size());
|
||||||
} else {
|
} else if (slot.has_next_token) {
|
||||||
is_stop_full = false;
|
|
||||||
stop_pos = slot.find_stopping_strings(str_test, token_str.size(), STOP_TYPE_PARTIAL);
|
stop_pos = slot.find_stopping_strings(str_test, token_str.size(), STOP_TYPE_PARTIAL);
|
||||||
|
send_text = stop_pos == std::string::npos;
|
||||||
}
|
}
|
||||||
|
|
||||||
// check if there is any token to predict
|
// check if there is any token to predict
|
||||||
if (stop_pos == std::string::npos || (!slot.has_next_token && !is_stop_full && stop_pos > 0)) {
|
if (send_text) {
|
||||||
// no send the stop word in the response
|
// no send the stop word in the response
|
||||||
result.text_to_send = slot.generated_text.substr(pos, std::string::npos);
|
result.text_to_send = slot.generated_text.substr(pos, std::string::npos);
|
||||||
slot.n_sent_text += result.text_to_send.size();
|
slot.n_sent_text += result.text_to_send.size();
|
||||||
|
|
|
@ -348,7 +348,6 @@ struct tensor_alloc {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct leaf_alloc {
|
struct leaf_alloc {
|
||||||
int buffer_id;
|
|
||||||
struct tensor_alloc leaf;
|
struct tensor_alloc leaf;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -740,7 +739,6 @@ bool ggml_gallocr_reserve_n(ggml_gallocr_t galloc, struct ggml_cgraph * graph, c
|
||||||
for (int i = 0; i < graph->n_leafs; i++) {
|
for (int i = 0; i < graph->n_leafs; i++) {
|
||||||
struct ggml_tensor * leaf = graph->leafs[i];
|
struct ggml_tensor * leaf = graph->leafs[i];
|
||||||
struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
|
struct hash_node * hn = ggml_gallocr_hash_get(galloc, leaf);
|
||||||
galloc->leaf_allocs[i].buffer_id = hn->buffer_id;
|
|
||||||
if (leaf->view_src || leaf->data) {
|
if (leaf->view_src || leaf->data) {
|
||||||
galloc->leaf_allocs[i].leaf.buffer_id = -1;
|
galloc->leaf_allocs[i].leaf.buffer_id = -1;
|
||||||
galloc->leaf_allocs[i].leaf.offset = SIZE_MAX;
|
galloc->leaf_allocs[i].leaf.offset = SIZE_MAX;
|
||||||
|
|
|
@ -1 +1 @@
|
||||||
564f42082f858f9674b2a2e06e9e779d9ed2c754
|
2327bda7a55ac6b72614ac5ebd5c5a5e02553b9b
|
||||||
|
|
|
@ -16095,9 +16095,11 @@ struct llm_build_context {
|
||||||
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
|
cur = ggml_get_rows(ctx0, cur, inp_out_ids);
|
||||||
|
|
||||||
cur = llm_build_norm(ctx0, cur, hparams, model.output_norm, model.output_norm_b, LLM_NORM, cb, -1);
|
cur = llm_build_norm(ctx0, cur, hparams, model.output_norm, model.output_norm_b, LLM_NORM, cb, -1);
|
||||||
cur = llm_build_lora_mm(lctx, ctx0, model.output, cur);
|
cb(cur, "result_norm", -1);
|
||||||
|
|
||||||
|
cur = llm_build_lora_mm(lctx, ctx0, model.output, cur);
|
||||||
cb(cur, "result_output", -1);
|
cb(cur, "result_output", -1);
|
||||||
|
|
||||||
ggml_build_forward_expand(gf, cur);
|
ggml_build_forward_expand(gf, cur);
|
||||||
|
|
||||||
return gf;
|
return gf;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue