minor : fix compile warnings + minor style changes
This commit is contained in:
parent
6b7487d104
commit
e829421eda
3 changed files with 39 additions and 41 deletions
|
@ -953,7 +953,7 @@ struct ggml_tensor * forward_batch_wo_cache(
|
|||
const int N = n_tokens;
|
||||
|
||||
const auto & hparams = model->hparams;
|
||||
const int n_ctx = hparams.n_ctx;
|
||||
//const int n_ctx = hparams.n_ctx;
|
||||
const int n_vocab = hparams.n_vocab;
|
||||
const int n_embd = hparams.n_embd;
|
||||
const int n_layer = hparams.n_layer;
|
||||
|
@ -1181,7 +1181,7 @@ struct ggml_tensor * forward_batch_wo_cache_flash_attn(
|
|||
const int N = n_tokens;
|
||||
|
||||
const auto & hparams = model->hparams;
|
||||
const int n_ctx = hparams.n_ctx;
|
||||
//const int n_ctx = hparams.n_ctx;
|
||||
const int n_vocab = hparams.n_vocab;
|
||||
const int n_embd = hparams.n_embd;
|
||||
const int n_layer = hparams.n_layer;
|
||||
|
@ -1368,7 +1368,7 @@ struct ggml_tensor * forward_batch_wo_cache_flash_attn_train(
|
|||
gf->work = NULL;
|
||||
|
||||
const auto & hparams = model->hparams;
|
||||
const int n_ctx = hparams.n_ctx;
|
||||
//const int n_ctx = hparams.n_ctx;
|
||||
const int n_vocab = hparams.n_vocab;
|
||||
const int n_embd = hparams.n_embd;
|
||||
const int n_layer = hparams.n_layer;
|
||||
|
@ -1894,7 +1894,7 @@ void print_tokens(struct llama_context* ctx, struct ggml_tensor * tokens) {
|
|||
|
||||
void print_tokens_batch(struct llama_context* ctx, struct ggml_tensor * tokens) {
|
||||
for (int i1=0; i1<tokens->ne[1]; ++i1) {
|
||||
int num_newline = 0;
|
||||
//int num_newline = 0;
|
||||
for (int i0=0; i0<tokens->ne[0]; ++i0) {
|
||||
int token = get_i32_2d(tokens, i0, i1);
|
||||
print_token(ctx, token);
|
||||
|
@ -1920,7 +1920,7 @@ void get_example_targets(const int * train_samples, size_t n_train_samples, cons
|
|||
int n_tokens = tokens_input->ne[0];
|
||||
int n_vocab = target_logits->ne[0];
|
||||
|
||||
int sample = train_samples[example_id % n_train_samples];
|
||||
size_t sample = train_samples[example_id % n_train_samples];
|
||||
GGML_ASSERT(sample+n_tokens-1 < n_train_data);
|
||||
|
||||
ggml_set_f32(target_logits, -1.0f/n_vocab);
|
||||
|
@ -1936,7 +1936,7 @@ void get_example_targets(const int * train_samples, size_t n_train_samples, cons
|
|||
}
|
||||
}
|
||||
|
||||
void get_example_targets_batch(struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
|
||||
void get_example_targets_batch(struct llama_context * /*lctx*/, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
|
||||
GGML_ASSERT(tokens_input->n_dims == 2);
|
||||
GGML_ASSERT(target_logits->n_dims == 3);
|
||||
GGML_ASSERT(target_probs->n_dims == 3);
|
||||
|
@ -1953,7 +1953,7 @@ void get_example_targets_batch(struct llama_context * lctx, const int * train_sa
|
|||
ggml_set_f32(target_probs, 0.0f);
|
||||
for (int k=0; k<n_batch; ++k) {
|
||||
// printf("%s: batch %d\n", __func__, k);
|
||||
int sample = train_samples[(example_id*n_batch + k) % n_train_samples];
|
||||
size_t sample = train_samples[(example_id*n_batch + k) % n_train_samples];
|
||||
GGML_ASSERT(sample+n_tokens-1 < n_train_data);
|
||||
|
||||
set_i32_2d(tokens_input, 0, k, llama_token_bos());
|
||||
|
@ -2120,7 +2120,7 @@ int tokenize_file(struct llama_context * lctx, const char * filename, std::vecto
|
|||
if (verify) {
|
||||
const char * in = buf.data();
|
||||
const char * end = buf.data() + buf.size();
|
||||
for (int i=0; i < out.size(); ++i) {
|
||||
for (int i = 0; i < (int) out.size(); ++i) {
|
||||
const char * s = llama_token_to_str(lctx, out[i]);
|
||||
int len = strlen(s);
|
||||
if (in >= end) {
|
||||
|
@ -2264,7 +2264,7 @@ llama_token sample(struct my_llama_sampler * sampler, float * logits, const llam
|
|||
}
|
||||
|
||||
void set_logits_masked(struct ggml_tensor * logits, std::vector<bool>& mask, float value) {
|
||||
GGML_ASSERT(logits->ne[0] == mask.size());
|
||||
GGML_ASSERT(logits->ne[0] == (int64_t) mask.size());
|
||||
for (int i2 = 0; i2 < logits->ne[2]; ++i2) {
|
||||
for (int i1 = 0; i1 < logits->ne[1]; ++i1) {
|
||||
for (int i0 = 0; i0 < logits->ne[0]; ++i0) {
|
||||
|
@ -2301,7 +2301,7 @@ void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) {
|
|||
}
|
||||
|
||||
void read_tensor(struct llama_file * file, struct ggml_tensor * tensor) {
|
||||
uint32_t nd = file->read_u32();
|
||||
int32_t nd = file->read_u32();
|
||||
GGML_ASSERT(nd == tensor->n_dims);
|
||||
|
||||
uint32_t name_len = file->read_u32();
|
||||
|
@ -3003,7 +3003,7 @@ int main(int argc, char ** argv) {
|
|||
if (tokenize_file(lctx, params.fn_train_data, train_tokens) < 0) {
|
||||
fprintf(stderr, "%s: failed to tokenize file '%s'\n", __func__, params.fn_train_data);
|
||||
}
|
||||
printf("%s: number of training tokens: %d\n", __func__, train_tokens.size());
|
||||
printf("%s: number of training tokens: %d\n", __func__, (int) train_tokens.size());
|
||||
|
||||
struct my_llama_model model;
|
||||
model.hparams.n_vocab = llama_n_vocab(lctx);
|
||||
|
@ -3020,7 +3020,7 @@ int main(int argc, char ** argv) {
|
|||
std::vector<bool> token_notavail;
|
||||
token_noccurs.resize(model.hparams.n_vocab, 0);
|
||||
token_notavail.resize(model.hparams.n_vocab, true);
|
||||
for (int i=0; i<train_tokens.size(); ++i) {
|
||||
for (int i = 0; i < (int) train_tokens.size(); ++i) {
|
||||
++token_noccurs[train_tokens[i]];
|
||||
token_notavail[train_tokens[i]] = false;
|
||||
}
|
||||
|
@ -3028,7 +3028,7 @@ int main(int argc, char ** argv) {
|
|||
std::vector<float> token_freq;
|
||||
token_freq.resize(model.hparams.n_vocab, 0);
|
||||
int n_unique_tokens = 0;
|
||||
for (int i=0; i<token_noccurs.size(); ++i) {
|
||||
for (int i = 0; i < (int) token_noccurs.size(); ++i) {
|
||||
token_freq[i] = (float) token_noccurs[i] / (float) train_tokens.size();
|
||||
n_unique_tokens += (token_noccurs[i] > 0) ? 1 : 0;
|
||||
}
|
||||
|
@ -3104,26 +3104,26 @@ int main(int argc, char ** argv) {
|
|||
uint8_t * compute_buf_1 = new uint8_t[size_buf_1];
|
||||
uint8_t * compute_buf_2 = new uint8_t[size_buf_2];
|
||||
|
||||
GGML_ASSERT(train_tokens.size() > n_tokens);;
|
||||
GGML_ASSERT(n_tokens < (int) train_tokens.size());
|
||||
std::vector<int> train_samples;
|
||||
train_samples.push_back(0);
|
||||
for (int i=1; i<train_tokens.size()-n_tokens; ++i) {
|
||||
for (int i = 1; i < (int) train_tokens.size() - n_tokens; ++i) {
|
||||
if (!params.samples_start_after_nl || (train_tokens[i-1] == llama_token_nl())) {
|
||||
train_samples.push_back(i);
|
||||
}
|
||||
}
|
||||
shuffle_ints(train_samples.data(), train_samples.data() + train_samples.size());
|
||||
for (int i=0; i<train_samples.size(); ++i) {
|
||||
GGML_ASSERT(train_samples[i]+n_tokens-1 < train_tokens.size());
|
||||
for (int i = 0; i < (int) train_samples.size(); ++i) {
|
||||
GGML_ASSERT(train_samples[i]+n_tokens-1 < (int) train_tokens.size());
|
||||
}
|
||||
|
||||
printf("%s: begin training\n", __func__);
|
||||
|
||||
for (int ex=0; ex<params.n_examples; ++ex) {
|
||||
if (ex*n_batch >= train_samples.size()) {
|
||||
for (int ex = 0; ex < params.n_examples; ++ex) {
|
||||
if (ex*n_batch >= (int) train_samples.size()) {
|
||||
shuffle_ints(train_samples.data(), train_samples.data() + train_samples.size());
|
||||
for (int i=0; i<train_samples.size(); ++i) {
|
||||
GGML_ASSERT(train_samples[i]+n_tokens-1 < train_tokens.size());
|
||||
for (int i = 0; i < (int) train_samples.size(); ++i) {
|
||||
GGML_ASSERT(train_samples[i]+n_tokens-1 < (int) train_tokens.size());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3134,11 +3134,11 @@ int main(int argc, char ** argv) {
|
|||
};
|
||||
struct ggml_context * ctx0 = ggml_init(cparams);
|
||||
|
||||
struct ggml_tensor * after_opt_best_samples = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
|
||||
struct ggml_tensor * after_opt_probs = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch);
|
||||
struct ggml_tensor * tokens_input = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
|
||||
struct ggml_tensor * target_logits = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch);
|
||||
struct ggml_tensor * target_probs = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch);
|
||||
struct ggml_tensor * after_opt_best_samples = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
|
||||
//struct ggml_tensor * after_opt_probs = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch);
|
||||
struct ggml_tensor * tokens_input = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
|
||||
struct ggml_tensor * target_logits = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch);
|
||||
struct ggml_tensor * target_probs = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch);
|
||||
|
||||
int n_past = 0;
|
||||
|
||||
|
@ -3293,8 +3293,8 @@ int main(int argc, char ** argv) {
|
|||
ggml_build_forward_expand(&gf, logits);
|
||||
ggml_graph_compute(ctx0, &gf);
|
||||
|
||||
struct ggml_tensor * best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, sample_ctx);
|
||||
struct ggml_tensor * probs = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_vocab, sample_ctx);
|
||||
//struct ggml_tensor * best_samples = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, sample_ctx);
|
||||
//struct ggml_tensor * probs = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_vocab, sample_ctx);
|
||||
|
||||
// set_logits_masked(logits, token_notavail, -1e9);
|
||||
int token = sample(&sampler,
|
||||
|
|
18
ggml.c
18
ggml.c
|
@ -10498,7 +10498,7 @@ static void ggml_compute_forward_out_prod_f32(
|
|||
const int64_t ne03 = src0->ne[3];
|
||||
|
||||
const int64_t ne10 = src1->ne[0];
|
||||
const int64_t ne11 = src1->ne[1];
|
||||
//const int64_t ne11 = src1->ne[1];
|
||||
const int64_t ne12 = src1->ne[2];
|
||||
const int64_t ne13 = src1->ne[3];
|
||||
|
||||
|
@ -10587,11 +10587,10 @@ static void ggml_compute_forward_out_prod_f32(
|
|||
const int64_t i02 = i2;
|
||||
const int64_t i03 = i3;
|
||||
|
||||
const int64_t i10 = i1;
|
||||
//const int64_t i10 = i1;
|
||||
const int64_t i12 = i2;
|
||||
const int64_t i13 = i3;
|
||||
|
||||
|
||||
for (int64_t i01 = 0; i01 < ne01; ++i01) {
|
||||
const int64_t i11 = i01;
|
||||
|
||||
|
@ -13956,8 +13955,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|||
return;
|
||||
}
|
||||
|
||||
const float eps = 1e-9f;
|
||||
|
||||
const double eps = 1e-9;
|
||||
|
||||
// rows per thread
|
||||
const int dr = (nr + nth - 1)/nth;
|
||||
|
@ -14002,7 +14000,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|||
// sum = 1.0/sum;
|
||||
}
|
||||
// avoid log(0) by rescaling from [0..1] to [eps..1]
|
||||
sum = (1.0f - eps) / sum;
|
||||
sum = (1.0 - eps) / sum;
|
||||
ggml_vec_scale_f32(nc, st, sum);
|
||||
ggml_vec_add1_f32(nc, st, st, eps);
|
||||
ggml_vec_log_f32(nc, st, st);
|
||||
|
@ -14054,8 +14052,6 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|||
const int64_t ith = params->ith;
|
||||
const int64_t nth = params->nth;
|
||||
|
||||
float * sums = (float *) params->wdata;
|
||||
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||
return;
|
||||
}
|
||||
|
@ -14090,6 +14086,8 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|||
#endif
|
||||
// step by step explanation:
|
||||
{
|
||||
//float * sums = (float *) params->wdata;
|
||||
|
||||
// forward pass with annotated gradients from backward pass
|
||||
// (built by going in reverse operation order, adding to gradients of current operation args)
|
||||
// st0 = exp(s0-max(s0)) grad[st0] = grad[st1]*(1.0 - eps)/sum
|
||||
|
@ -14162,10 +14160,10 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|||
float dot_st1_dst1 = 0;
|
||||
ggml_vec_scale_f32(nc, sm, sum);
|
||||
ggml_vec_cpy_f32 (nc, ds0, sm);
|
||||
ggml_vec_scale_f32(nc, ds0, (1.0 - eps));
|
||||
ggml_vec_scale_f32(nc, ds0, (1.0f - eps));
|
||||
ggml_vec_add1_f32 (nc, ds0, ds0, eps);
|
||||
ggml_vec_div_f32 (nc, ds0, s1, ds0);
|
||||
ggml_vec_scale_f32(nc, ds0, -(1.0 - eps)*d[0]);
|
||||
ggml_vec_scale_f32(nc, ds0, -(1.0f - eps)*d[0]);
|
||||
ggml_vec_dot_f32 (nc, &dot_st1_dst1, sm, ds0);
|
||||
ggml_vec_acc1_f32 (nc, ds0, -dot_st1_dst1);
|
||||
ggml_vec_mul_f32 (nc, ds0, ds0, sm);
|
||||
|
|
6
llama.h
6
llama.h
|
@ -193,9 +193,9 @@ extern "C" {
|
|||
// Returns number of results.
|
||||
LLAMA_API int llama_get_vocab(
|
||||
const struct llama_context * ctx,
|
||||
const char * * strings,
|
||||
float * scores,
|
||||
int capacity);
|
||||
const char * * strings,
|
||||
float * scores,
|
||||
int capacity);
|
||||
|
||||
// Token logits obtained from the last call to llama_eval()
|
||||
// The logits for the last token are stored in the last row
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue