finetune bug fixes to compile with merged in code from master
This commit is contained in:
parent
daedc6f419
commit
5ce92aed37
1 changed files with 54 additions and 54 deletions
|
@ -17,8 +17,6 @@
|
||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static const float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
|
|
||||||
|
|
||||||
struct random_normal_distribution {
|
struct random_normal_distribution {
|
||||||
std::mt19937 gen;
|
std::mt19937 gen;
|
||||||
std::normal_distribution<float> rd;
|
std::normal_distribution<float> rd;
|
||||||
|
@ -195,11 +193,13 @@ struct my_llama_hparams {
|
||||||
uint32_t n_vocab = 32000;
|
uint32_t n_vocab = 32000;
|
||||||
uint32_t n_ctx = 512; // this is provided as user input?
|
uint32_t n_ctx = 512; // this is provided as user input?
|
||||||
uint32_t n_embd = 4096;
|
uint32_t n_embd = 4096;
|
||||||
uint32_t n_mult = 4;
|
uint32_t n_ff = 11008;
|
||||||
uint32_t n_head = 32;
|
uint32_t n_head = 32;
|
||||||
uint32_t n_layer = 32;
|
uint32_t n_layer = 32;
|
||||||
uint32_t n_rot = 64;
|
uint32_t n_rot = 64;
|
||||||
|
|
||||||
|
float f_rms_norm_eps = 1e-5f;
|
||||||
|
|
||||||
bool operator!=(const my_llama_hparams& other) const {
|
bool operator!=(const my_llama_hparams& other) const {
|
||||||
return memcmp(this, &other, sizeof(other));
|
return memcmp(this, &other, sizeof(other));
|
||||||
}
|
}
|
||||||
|
@ -304,18 +304,12 @@ struct my_llama_lora {
|
||||||
uint32_t train_tokens = 0;
|
uint32_t train_tokens = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32_t get_n_ff(const struct my_llama_hparams* hparams) {
|
|
||||||
const uint32_t n_ff = ((2*(4*hparams->n_embd)/3 + hparams->n_mult - 1)/hparams->n_mult)*hparams->n_mult;
|
|
||||||
return n_ff;
|
|
||||||
}
|
|
||||||
|
|
||||||
void print_params(struct my_llama_hparams * params) {
|
void print_params(struct my_llama_hparams * params) {
|
||||||
printf("%s: n_vocab: %u\n", __func__, params->n_vocab);
|
printf("%s: n_vocab: %u\n", __func__, params->n_vocab);
|
||||||
printf("%s: n_ctx: %u\n", __func__, params->n_ctx);
|
printf("%s: n_ctx: %u\n", __func__, params->n_ctx);
|
||||||
printf("%s: n_embd: %u\n", __func__, params->n_embd);
|
printf("%s: n_embd: %u\n", __func__, params->n_embd);
|
||||||
printf("%s: n_mult: %u\n", __func__, params->n_mult);
|
printf("%s: n_ff: %u\n", __func__, params->n_ff);
|
||||||
printf("%s: n_head: %u\n", __func__, params->n_head);
|
printf("%s: n_head: %u\n", __func__, params->n_head);
|
||||||
printf("%s: n_ff: %u\n", __func__, get_n_ff(params));
|
|
||||||
printf("%s: n_layer: %u\n", __func__, params->n_layer);
|
printf("%s: n_layer: %u\n", __func__, params->n_layer);
|
||||||
printf("%s: n_rot: %u\n", __func__, params->n_rot);
|
printf("%s: n_rot: %u\n", __func__, params->n_rot);
|
||||||
}
|
}
|
||||||
|
@ -338,19 +332,18 @@ void print_lora_params(struct my_llama_lora_hparams * params) {
|
||||||
void init_model(struct llama_model * input, struct my_llama_model * model, uint32_t n_ctx) {
|
void init_model(struct llama_model * input, struct my_llama_model * model, uint32_t n_ctx) {
|
||||||
auto & hparams = model->hparams;
|
auto & hparams = model->hparams;
|
||||||
|
|
||||||
hparams.n_vocab = llama_n_vocab_from_model(input);
|
hparams.n_vocab = llama_model_n_vocab(input);
|
||||||
hparams.n_ctx = n_ctx;
|
hparams.n_ctx = n_ctx;
|
||||||
hparams.n_embd = llama_n_embd_from_model(input);
|
hparams.n_embd = llama_model_n_embd(input);
|
||||||
hparams.n_mult = llama_n_mult_from_model(input);
|
hparams.n_ff = llama_model_n_ff(input);
|
||||||
hparams.n_head = llama_n_head_from_model(input);
|
hparams.n_head = llama_model_n_head(input);
|
||||||
hparams.n_layer = llama_n_layer_from_model(input);
|
hparams.n_layer = llama_model_n_layer(input);
|
||||||
hparams.n_rot = llama_n_rot_from_model(input);
|
hparams.n_rot = llama_model_n_rot(input);
|
||||||
|
|
||||||
const uint32_t n_embd = hparams.n_embd;
|
const uint32_t n_embd = hparams.n_embd;
|
||||||
const uint32_t n_layer = hparams.n_layer;
|
const uint32_t n_layer = hparams.n_layer;
|
||||||
const uint32_t n_vocab = hparams.n_vocab;
|
const uint32_t n_vocab = hparams.n_vocab;
|
||||||
|
const uint32_t n_ff = hparams.n_ff;
|
||||||
const uint32_t n_ff = get_n_ff(&hparams);
|
|
||||||
|
|
||||||
model->tok_embeddings = llama_get_model_tensor(input, "tok_embeddings.weight");
|
model->tok_embeddings = llama_get_model_tensor(input, "tok_embeddings.weight");
|
||||||
model->norm = llama_get_model_tensor(input, "norm.weight");
|
model->norm = llama_get_model_tensor(input, "norm.weight");
|
||||||
|
@ -398,7 +391,7 @@ void init_lora(const struct my_llama_model * model, struct my_llama_lora * lora)
|
||||||
const uint32_t n_embd = model->hparams.n_embd;
|
const uint32_t n_embd = model->hparams.n_embd;
|
||||||
const uint32_t n_layer = model->hparams.n_layer;
|
const uint32_t n_layer = model->hparams.n_layer;
|
||||||
const uint32_t n_vocab = model->hparams.n_vocab;
|
const uint32_t n_vocab = model->hparams.n_vocab;
|
||||||
const uint32_t n_ff = get_n_ff(&model->hparams);
|
const uint32_t n_ff = model->hparams.n_ff;
|
||||||
|
|
||||||
struct ggml_context * ctx = lora->ctx;
|
struct ggml_context * ctx = lora->ctx;
|
||||||
|
|
||||||
|
@ -603,6 +596,8 @@ struct ggml_tensor * forward(
|
||||||
const int n_head = hparams.n_head;
|
const int n_head = hparams.n_head;
|
||||||
const int n_rot = hparams.n_rot;
|
const int n_rot = hparams.n_rot;
|
||||||
|
|
||||||
|
const float rms_norm_eps = hparams.f_rms_norm_eps;
|
||||||
|
|
||||||
GGML_ASSERT(n_layer == lora->layers.size());
|
GGML_ASSERT(n_layer == lora->layers.size());
|
||||||
|
|
||||||
struct ggml_tensor * tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
|
struct ggml_tensor * tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
|
||||||
|
@ -1082,7 +1077,8 @@ struct ggml_tensor * llama_build_lora_finetune_graphs(
|
||||||
const int n_layer = hparams.n_layer;
|
const int n_layer = hparams.n_layer;
|
||||||
const int n_head = hparams.n_head;
|
const int n_head = hparams.n_head;
|
||||||
const int n_rot = hparams.n_rot;
|
const int n_rot = hparams.n_rot;
|
||||||
const int n_ff = get_n_ff(&hparams);
|
const int n_ff = hparams.n_ff;
|
||||||
|
const float rms_norm_eps = hparams.f_rms_norm_eps;
|
||||||
const int rope_mode = 0;
|
const int rope_mode = 0;
|
||||||
|
|
||||||
GGML_ASSERT(n_layer == lora->layers.size());
|
GGML_ASSERT(n_layer == lora->layers.size());
|
||||||
|
@ -1317,7 +1313,7 @@ void print_matrix(struct ggml_tensor * probs) {
|
||||||
|
|
||||||
|
|
||||||
void print_token(struct llama_context * ctx, llama_token token) {
|
void print_token(struct llama_context * ctx, llama_token token) {
|
||||||
printf("%s", llama_token_to_str(ctx, token));
|
printf("%s", llama_token_get_text(ctx, token));
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_tokens(struct llama_context* ctx, struct ggml_tensor * tokens) {
|
void print_tokens(struct llama_context* ctx, struct ggml_tensor * tokens) {
|
||||||
|
@ -1351,7 +1347,7 @@ void print_tokens_batch(struct llama_context* ctx, struct ggml_tensor * tokens)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_example_targets(const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
|
void get_example_targets(struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
|
||||||
int n_tokens = tokens_input->ne[0];
|
int n_tokens = tokens_input->ne[0];
|
||||||
int n_vocab = target_logits->ne[0];
|
int n_vocab = target_logits->ne[0];
|
||||||
|
|
||||||
|
@ -1360,7 +1356,7 @@ void get_example_targets(const int * train_samples, size_t n_train_samples, cons
|
||||||
|
|
||||||
ggml_set_f32(target_logits, -1.0f/n_vocab);
|
ggml_set_f32(target_logits, -1.0f/n_vocab);
|
||||||
ggml_set_f32(target_probs, 0.0f);
|
ggml_set_f32(target_probs, 0.0f);
|
||||||
ggml_set_i32_1d(tokens_input, 0, llama_token_bos());
|
ggml_set_i32_1d(tokens_input, 0, llama_token_bos(lctx));
|
||||||
for (int i=1; i<n_tokens+1; ++i) {
|
for (int i=1; i<n_tokens+1; ++i) {
|
||||||
int token = clamp(train_data[sample+i-1], 0, n_vocab-1);
|
int token = clamp(train_data[sample+i-1], 0, n_vocab-1);
|
||||||
set_f32_2d(target_logits, token, i-1, +1.0f);
|
set_f32_2d(target_logits, token, i-1, +1.0f);
|
||||||
|
@ -1371,7 +1367,7 @@ void get_example_targets(const int * train_samples, size_t n_train_samples, cons
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void get_example_targets_batch(const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
|
void get_example_targets_batch(struct llama_context* lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
|
||||||
GGML_ASSERT(tokens_input->n_dims == 2);
|
GGML_ASSERT(tokens_input->n_dims == 2);
|
||||||
GGML_ASSERT(target_logits->n_dims == 3);
|
GGML_ASSERT(target_logits->n_dims == 3);
|
||||||
GGML_ASSERT(target_probs->n_dims == 3);
|
GGML_ASSERT(target_probs->n_dims == 3);
|
||||||
|
@ -1394,7 +1390,7 @@ void get_example_targets_batch(const int * train_samples, size_t n_train_samples
|
||||||
// printf("%s: sample_idx=%zu sample=%zu\n", __func__, sample_idx, sample);
|
// printf("%s: sample_idx=%zu sample=%zu\n", __func__, sample_idx, sample);
|
||||||
GGML_ASSERT(sample+n_tokens-1 < n_train_data);
|
GGML_ASSERT(sample+n_tokens-1 < n_train_data);
|
||||||
|
|
||||||
set_i32_2d(tokens_input, 0, k, llama_token_bos());
|
set_i32_2d(tokens_input, 0, k, llama_token_bos(lctx));
|
||||||
for (int i=1; i<n_tokens+1; ++i) {
|
for (int i=1; i<n_tokens+1; ++i) {
|
||||||
int token = clamp(train_data[sample+i-1], 0, n_vocab-1);
|
int token = clamp(train_data[sample+i-1], 0, n_vocab-1);
|
||||||
set_f32_3d(target_logits, token, i-1, k, +1.0f);
|
set_f32_3d(target_logits, token, i-1, k, +1.0f);
|
||||||
|
@ -1544,7 +1540,7 @@ int tokenize_file(struct llama_context * lctx, const char * filename, std::vecto
|
||||||
const char * in = buf.data();
|
const char * in = buf.data();
|
||||||
const char * end = buf.data() + buf.size();
|
const char * end = buf.data() + buf.size();
|
||||||
for (int i = 0; i < (int) out.size(); ++i) {
|
for (int i = 0; i < (int) out.size(); ++i) {
|
||||||
const char * s = llama_token_to_str(lctx, out[i]);
|
const char * s = llama_token_get_text(lctx, out[i]);
|
||||||
int len = strlen(s);
|
int len = strlen(s);
|
||||||
if (in >= end) {
|
if (in >= end) {
|
||||||
printf("%s: unexpected end of original text.\n", __func__);
|
printf("%s: unexpected end of original text.\n", __func__);
|
||||||
|
@ -1617,7 +1613,7 @@ void init_sampler(struct my_llama_sampler * sampler, struct llama_context * ctx)
|
||||||
sampler->mirostat_mu = 2.0f * sampler->params.mirostat_tau;
|
sampler->mirostat_mu = 2.0f * sampler->params.mirostat_tau;
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_token sample(struct my_llama_sampler * sampler, float * logits, const llama_token * last_tokens, int n_last_tokens) {
|
llama_token sample(struct llama_context * lctx, struct my_llama_sampler * sampler, float * logits, const llama_token * last_tokens, int n_last_tokens) {
|
||||||
GGML_ASSERT(sampler->ctx != NULL);
|
GGML_ASSERT(sampler->ctx != NULL);
|
||||||
|
|
||||||
struct llama_context * ctx = sampler->ctx;
|
struct llama_context * ctx = sampler->ctx;
|
||||||
|
@ -1638,7 +1634,7 @@ llama_token sample(struct my_llama_sampler * sampler, float * logits, const llam
|
||||||
const auto params = sampler->params;
|
const auto params = sampler->params;
|
||||||
|
|
||||||
// Apply penalties
|
// Apply penalties
|
||||||
const float nl_logit = logits[llama_token_nl()];
|
const float nl_logit = logits[llama_token_nl(lctx)];
|
||||||
|
|
||||||
const int n_last = std::min(std::min(n_last_tokens, params.repeat_last_n), sampler->n_ctx);
|
const int n_last = std::min(std::min(n_last_tokens, params.repeat_last_n), sampler->n_ctx);
|
||||||
|
|
||||||
|
@ -1657,7 +1653,7 @@ llama_token sample(struct my_llama_sampler * sampler, float * logits, const llam
|
||||||
params.presence_penalty);
|
params.presence_penalty);
|
||||||
|
|
||||||
if (!params.penalize_nl) {
|
if (!params.penalize_nl) {
|
||||||
logits[llama_token_nl()] = nl_logit;
|
logits[llama_token_nl(lctx)] = nl_logit;
|
||||||
}
|
}
|
||||||
|
|
||||||
llama_token token = 0;
|
llama_token token = 0;
|
||||||
|
@ -1884,7 +1880,7 @@ void save_checkpoint(struct my_llama_model * model, struct my_llama_lora * lora,
|
||||||
file.write_u32(lora->train_tokens);
|
file.write_u32(lora->train_tokens);
|
||||||
file.write_u32(model->hparams.n_vocab);
|
file.write_u32(model->hparams.n_vocab);
|
||||||
file.write_u32(model->hparams.n_embd);
|
file.write_u32(model->hparams.n_embd);
|
||||||
file.write_u32(model->hparams.n_mult);
|
//file.write_u32(model->hparams.n_mult);
|
||||||
file.write_u32(model->hparams.n_head);
|
file.write_u32(model->hparams.n_head);
|
||||||
file.write_u32(model->hparams.n_layer);
|
file.write_u32(model->hparams.n_layer);
|
||||||
file.write_u32(model->hparams.n_rot);
|
file.write_u32(model->hparams.n_rot);
|
||||||
|
@ -1961,7 +1957,7 @@ bool load_checkpoint(struct my_llama_model * model, struct my_llama_lora * lora,
|
||||||
uint32_t n_rot = file.read_u32();
|
uint32_t n_rot = file.read_u32();
|
||||||
GGML_ASSERT(n_vocab == model->hparams.n_vocab);
|
GGML_ASSERT(n_vocab == model->hparams.n_vocab);
|
||||||
GGML_ASSERT(n_embd == model->hparams.n_embd);
|
GGML_ASSERT(n_embd == model->hparams.n_embd);
|
||||||
GGML_ASSERT(n_mult == model->hparams.n_mult);
|
//GGML_ASSERT(n_mult == model->hparams.n_mult);
|
||||||
GGML_ASSERT(n_head == model->hparams.n_head);
|
GGML_ASSERT(n_head == model->hparams.n_head);
|
||||||
GGML_ASSERT(n_layer == model->hparams.n_layer);
|
GGML_ASSERT(n_layer == model->hparams.n_layer);
|
||||||
GGML_ASSERT(n_rot == model->hparams.n_rot);
|
GGML_ASSERT(n_rot == model->hparams.n_rot);
|
||||||
|
@ -2042,8 +2038,9 @@ void save_as_llama_lora(struct my_llama_lora * lora, const char * filename, cons
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uint32_t LLAMA_FILE_MAGIC_LORA = 0x67676C61; // 'ggla'
|
||||||
// write_magic
|
// write_magic
|
||||||
file.write_u32(LLAMA_FILE_MAGIC_GGLA); // magic
|
file.write_u32(LLAMA_FILE_MAGIC_LORA); // magic
|
||||||
file.write_u32(1); // version
|
file.write_u32(1); // version
|
||||||
// write_hparams
|
// write_hparams
|
||||||
file.write_u32(lora->hparams.lora_r);
|
file.write_u32(lora->hparams.lora_r);
|
||||||
|
@ -2667,6 +2664,7 @@ struct opt_callback_data {
|
||||||
struct ggml_opt_context * opt;
|
struct ggml_opt_context * opt;
|
||||||
struct my_llama_model * model;
|
struct my_llama_model * model;
|
||||||
struct my_llama_lora * lora;
|
struct my_llama_lora * lora;
|
||||||
|
struct llama_context * lctx;
|
||||||
int last_save_iter;
|
int last_save_iter;
|
||||||
llama_token * tokens_data;
|
llama_token * tokens_data;
|
||||||
size_t tokens_size;
|
size_t tokens_size;
|
||||||
|
@ -2728,6 +2726,7 @@ void opt_callback(void * vdata, float * sched) {
|
||||||
}
|
}
|
||||||
|
|
||||||
get_example_targets_batch(
|
get_example_targets_batch(
|
||||||
|
data->lctx,
|
||||||
data->samples_data,
|
data->samples_data,
|
||||||
data->samples_size,
|
data->samples_size,
|
||||||
data->tokens_data,
|
data->tokens_data,
|
||||||
|
@ -2760,24 +2759,24 @@ int main(int argc, char ** argv) {
|
||||||
struct llama_model * lmodel = llama_load_model_from_file(params.fn_model_base, llama_params);
|
struct llama_model * lmodel = llama_load_model_from_file(params.fn_model_base, llama_params);
|
||||||
struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params);
|
struct llama_context * lctx = llama_new_context_with_model(lmodel, llama_params);
|
||||||
|
|
||||||
struct llama_vocab vocab;
|
//struct llama_vocab vocab;
|
||||||
{
|
//{
|
||||||
std::vector<const char *> strings;
|
// std::vector<const char *> strings;
|
||||||
std::vector<float> scores;
|
// std::vector<float> scores;
|
||||||
int n_vocab = llama_n_vocab(lctx);
|
// int n_vocab = llama_n_vocab(lctx);
|
||||||
strings.resize(n_vocab, NULL);
|
// strings.resize(n_vocab, NULL);
|
||||||
scores.resize(n_vocab, 0);
|
// scores.resize(n_vocab, 0);
|
||||||
n_vocab = llama_get_vocab(lctx, strings.data(), scores.data(), n_vocab);
|
// n_vocab = llama_get_vocab(lctx, strings.data(), scores.data(), n_vocab);
|
||||||
GGML_ASSERT(n_vocab == llama_n_vocab(lctx));
|
// GGML_ASSERT(n_vocab == llama_n_vocab(lctx));
|
||||||
vocab.id_to_token.resize(n_vocab);
|
// vocab.id_to_token.resize(n_vocab);
|
||||||
for (int i=0; i<n_vocab; ++i) {
|
// for (int i=0; i<n_vocab; ++i) {
|
||||||
std::string tok = std::string(strings[i]);
|
// std::string tok = std::string(strings[i]);
|
||||||
float score = scores[i];
|
// float score = scores[i];
|
||||||
vocab.id_to_token[i].tok = tok;
|
// vocab.id_to_token[i].tok = tok;
|
||||||
vocab.id_to_token[i].score = score;
|
// vocab.id_to_token[i].score = score;
|
||||||
vocab.token_to_id.emplace(tok, i);
|
// vocab.token_to_id.emplace(tok, i);
|
||||||
}
|
// }
|
||||||
}
|
//}
|
||||||
|
|
||||||
printf("%s: tokenize training data\n", __func__);
|
printf("%s: tokenize training data\n", __func__);
|
||||||
std::vector<llama_token> train_tokens;
|
std::vector<llama_token> train_tokens;
|
||||||
|
@ -2911,7 +2910,7 @@ int main(int argc, char ** argv) {
|
||||||
std::vector<int> train_samples;
|
std::vector<int> train_samples;
|
||||||
train_samples.push_back(0);
|
train_samples.push_back(0);
|
||||||
for (int i = 1; i < (int) train_tokens.size() - n_tokens; ++i) {
|
for (int i = 1; i < (int) train_tokens.size() - n_tokens; ++i) {
|
||||||
if (!params.samples_start_after_nl || (train_tokens[i-1] == llama_token_nl())) {
|
if (!params.samples_start_after_nl || (train_tokens[i-1] == llama_token_nl(lctx))) {
|
||||||
train_samples.push_back(i);
|
train_samples.push_back(i);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2929,6 +2928,7 @@ int main(int argc, char ** argv) {
|
||||||
opt_cb_data.opt = opt;
|
opt_cb_data.opt = opt;
|
||||||
opt_cb_data.model = &model;
|
opt_cb_data.model = &model;
|
||||||
opt_cb_data.lora = &lora;
|
opt_cb_data.lora = &lora;
|
||||||
|
opt_cb_data.lctx = lctx;
|
||||||
opt_cb_data.last_save_iter = opt->iter;
|
opt_cb_data.last_save_iter = opt->iter;
|
||||||
opt_cb_data.tokens_data = train_tokens.data();
|
opt_cb_data.tokens_data = train_tokens.data();
|
||||||
opt_cb_data.tokens_size = train_tokens.size();
|
opt_cb_data.tokens_size = train_tokens.size();
|
||||||
|
@ -3031,7 +3031,7 @@ int main(int argc, char ** argv) {
|
||||||
for (int i=0; i<n_batch; ++i) {
|
for (int i=0; i<n_batch; ++i) {
|
||||||
init_sampler(&sampler, lctx);
|
init_sampler(&sampler, lctx);
|
||||||
for (int k=0; k<n_tokens; ++k) {
|
for (int k=0; k<n_tokens; ++k) {
|
||||||
int32_t token = sample(&sampler,
|
int32_t token = sample(lctx, &sampler,
|
||||||
(float *) ((char *) logits->data + i*logits->nb[2] + k*logits->nb[1]),
|
(float *) ((char *) logits->data + i*logits->nb[2] + k*logits->nb[1]),
|
||||||
(llama_token *) ((char *) tokens_input->data + i*tokens_input->nb[1]),
|
(llama_token *) ((char *) tokens_input->data + i*tokens_input->nb[1]),
|
||||||
k);
|
k);
|
||||||
|
@ -3101,7 +3101,7 @@ int main(int argc, char ** argv) {
|
||||||
struct ggml_tensor * target_logits = ggml_new_tensor_2d(lora.ctx, GGML_TYPE_F32, n_vocab, n_tokens);
|
struct ggml_tensor * target_logits = ggml_new_tensor_2d(lora.ctx, GGML_TYPE_F32, n_vocab, n_tokens);
|
||||||
struct ggml_tensor * target_probs = ggml_new_tensor_2d(lora.ctx, GGML_TYPE_F32, n_vocab, n_tokens);
|
struct ggml_tensor * target_probs = ggml_new_tensor_2d(lora.ctx, GGML_TYPE_F32, n_vocab, n_tokens);
|
||||||
|
|
||||||
get_example_targets(train_samples.data(), train_samples.size(), train_tokens.data(), train_tokens.size(), rand()%train_samples.size(), tokens_input, target_logits, target_probs);
|
get_example_targets(lctx, train_samples.data(), train_samples.size(), train_tokens.data(), train_tokens.size(), rand()%train_samples.size(), tokens_input, target_logits, target_probs);
|
||||||
for (int i=sample_ctx; i<n_tokens; ++i) {
|
for (int i=sample_ctx; i<n_tokens; ++i) {
|
||||||
ggml_set_i32_1d(tokens_input, i, n_vocab/2);
|
ggml_set_i32_1d(tokens_input, i, n_vocab/2);
|
||||||
}
|
}
|
||||||
|
@ -3131,7 +3131,7 @@ int main(int argc, char ** argv) {
|
||||||
//struct ggml_tensor * probs = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_vocab, sample_ctx);
|
//struct ggml_tensor * probs = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_vocab, sample_ctx);
|
||||||
|
|
||||||
// set_logits_masked(logits, token_notavail, -1e9);
|
// set_logits_masked(logits, token_notavail, -1e9);
|
||||||
int token = sample(&sampler,
|
int token = sample(lctx, &sampler,
|
||||||
(float *) ((char *) logits->data + (sample_ctx-1)*logits->nb[1]),
|
(float *) ((char *) logits->data + (sample_ctx-1)*logits->nb[1]),
|
||||||
(llama_token *) tokens_input->data,
|
(llama_token *) tokens_input->data,
|
||||||
sample_ctx-1);
|
sample_ctx-1);
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue