Merge branch 'master' into xsn/fix_lora
This commit is contained in:
commit
e68344cb06
79 changed files with 3369 additions and 411 deletions
|
@ -1,3 +1,7 @@
|
|||
#if defined(_MSC_VER)
|
||||
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING
|
||||
#endif
|
||||
|
||||
#include "common.h"
|
||||
// Change JSON_ASSERT from assert() to GGML_ASSERT:
|
||||
#define JSON_ASSERT GGML_ASSERT
|
||||
|
|
|
@ -630,7 +630,7 @@ inline std::string LOG_TOKENS_TOSTR_PRETTY(const C & ctx, const T & tokens)
|
|||
buf << "[ ";
|
||||
|
||||
bool first = true;
|
||||
for (const auto &token : tokens)
|
||||
for (const auto & token : tokens)
|
||||
{
|
||||
if (!first) {
|
||||
buf << ", ";
|
||||
|
|
|
@ -282,8 +282,6 @@ static llama_token llama_sampling_sample_impl(
|
|||
GGML_ASSERT(!original_logits.empty());
|
||||
}
|
||||
llama_token id = 0;
|
||||
// Get a pointer to the logits
|
||||
float * logits = llama_get_logits_ith(ctx_main, idx);
|
||||
|
||||
if (temp < 0.0) {
|
||||
// greedy sampling, with probs
|
||||
|
@ -324,6 +322,9 @@ static llama_token llama_sampling_sample_impl(
|
|||
}
|
||||
|
||||
if (ctx_sampling->grammar != NULL && !is_resampling) {
|
||||
// Get a pointer to the logits
|
||||
float * logits = llama_get_logits_ith(ctx_main, idx);
|
||||
|
||||
// Create an array with a single token data element for the sampled id
|
||||
llama_token_data single_token_data = {id, logits[id], 0.0f};
|
||||
llama_token_data_array single_token_data_array = { &single_token_data, 1, false };
|
||||
|
@ -377,7 +378,7 @@ static llama_token_data_array llama_sampling_prepare_impl(
|
|||
if (ctx_sampling->grammar != NULL && !apply_grammar) {
|
||||
GGML_ASSERT(original_logits != NULL);
|
||||
// Only make a copy of the original logits if we are not applying grammar checks, not sure if I actually have to do this.
|
||||
*original_logits = {logits, logits + llama_n_vocab(llama_get_model(ctx_main))};
|
||||
*original_logits = {logits, logits + n_vocab};
|
||||
}
|
||||
|
||||
// apply params.logit_bias map
|
||||
|
@ -390,10 +391,10 @@ static llama_token_data_array llama_sampling_prepare_impl(
|
|||
llama_sample_apply_guidance(ctx_main, logits, logits_guidance, params.cfg_scale);
|
||||
}
|
||||
|
||||
cur.clear();
|
||||
cur.resize(n_vocab);
|
||||
|
||||
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
||||
cur.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
|
||||
cur[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};
|
||||
}
|
||||
|
||||
llama_token_data_array cur_p = { cur.data(), cur.size(), false };
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue