simplify code

This commit is contained in:
xaedes 2023-05-22 20:53:57 +02:00
parent 0651679302
commit d3acbf644e
No known key found for this signature in database
GPG key ID: 30030EDD817EA2B1

View file

@ -1144,58 +1144,9 @@ struct ggml_tensor * forward_batch_wo_cache(
return inpL; return inpL;
} }
void print_row(struct ggml_tensor * probs, int i) { void set_f32_3d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, int64_t i2, float value) {
for (int k = 0; k < probs->ne[0]; ++k) { float * ptr = (float *) ((char *) tensor->data + i0*tensor->nb[0] + i1*tensor->nb[1] + i2*tensor->nb[2]);
float p = ggml_get_f32_1d(probs, i*probs->ne[0] + k); *ptr = value;
printf(" %.2f", p);
}
printf("\n");
}
void print_matrix(struct ggml_tensor * probs) {
assert(probs->n_dims == 2);
for (int i = 0; i < probs->ne[1]; ++i) {
for (int k = 0; k < probs->ne[0]; ++k) {
float p = ggml_get_f32_1d(probs, i*probs->ne[0] + k);
printf(" %.2f", p);
}
printf("\n");
}
}
void print_token(struct llama_context * ctx, llama_token token) {
printf("%s", llama_token_to_str(ctx, token));
}
void print_tokens(struct llama_context* ctx, struct ggml_tensor * tokens) {
for (int i=0; i<tokens->ne[0]; ++i) {
int token = ggml_get_i32_1d(tokens, i);
print_token(ctx, token);
}
}
void print_tokens_batch(struct llama_context* ctx, struct ggml_tensor * tokens) {
for (int i1=0; i1<tokens->ne[1]; ++i1) {
int num_newline = 0;
for (int i0=0; i0<tokens->ne[0]; ++i0) {
int token = ggml_get_i32_1d(tokens, i0 + i1*tokens->ne[0]);
bool isnl = (token == llama_token_nl());
if (isnl) {
++num_newline;
}
if (isnl) {
if (num_newline < 2) {
print_token(ctx, token);
} else {
printf("\\n");
}
} else {
print_token(ctx, token);
}
}
printf("\n--\n");
}
} }
void set_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, float value) { void set_f32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1, float value) {
@ -1218,13 +1169,65 @@ int32_t get_i32_2d(struct ggml_tensor * tensor, int64_t i0, int64_t i1) {
return *ptr; return *ptr;
} }
void print_row(struct ggml_tensor * probs, int i) {
for (int k = 0; k < probs->ne[0]; ++k) {
float p = get_f32_2d(probs, k, i);
printf(" %.2f", p);
}
printf("\n");
}
void print_matrix(struct ggml_tensor * probs) {
assert(probs->n_dims == 2);
for (int i = 0; i < probs->ne[1]; ++i) {
for (int k = 0; k < probs->ne[0]; ++k) {
float p = get_f32_2d(probs, k, i);
printf(" %.2f", p);
}
printf("\n");
}
}
void print_token(struct llama_context * ctx, llama_token token) {
printf("%s", llama_token_to_str(ctx, token));
}
void print_tokens(struct llama_context* ctx, struct ggml_tensor * tokens) {
for (int i=0; i<tokens->ne[0]; ++i) {
int token = ggml_get_i32_1d(tokens, i);
print_token(ctx, token);
}
}
void print_tokens_batch(struct llama_context* ctx, struct ggml_tensor * tokens) {
for (int i1=0; i1<tokens->ne[1]; ++i1) {
int num_newline = 0;
for (int i0=0; i0<tokens->ne[0]; ++i0) {
int token = get_i32_2d(tokens, i0, i1);
print_token(ctx, token);
// bool isnl = (token == llama_token_nl());
// if (isnl) {
// ++num_newline;
// }
// if (isnl) {
// if (num_newline < 2) {
// print_token(ctx, token);
// } else {
// printf("\\n");
// }
// } else {
// print_token(ctx, token);
// }
}
printf("\n--\n");
}
}
void get_example_targets(const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) { void get_example_targets(const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
int n_tokens = tokens_input->ne[0]; int n_tokens = tokens_input->ne[0];
int n_vocab = target_logits->ne[0]; int n_vocab = target_logits->ne[0];
const float eps = 1e-6f;
const float target_prob = 1.0f;
int sample = train_samples[example_id % n_train_samples]; int sample = train_samples[example_id % n_train_samples];
GGML_ASSERT(sample+n_tokens-1 < n_train_data); GGML_ASSERT(sample+n_tokens-1 < n_train_data);
@ -1241,38 +1244,42 @@ void get_example_targets(const int * train_samples, size_t n_train_samples, cons
} }
} }
void get_example_targets_batch(struct ggml_context * ctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) { void get_example_targets_batch(struct llama_context * lctx, const int * train_samples, size_t n_train_samples, const llama_token * train_data, size_t n_train_data, int example_id, struct ggml_tensor * tokens_input, struct ggml_tensor * target_logits, struct ggml_tensor * target_probs) {
GGML_ASSERT(tokens_input->n_dims == 2); GGML_ASSERT(tokens_input->n_dims == 2);
GGML_ASSERT(target_logits->n_dims == 3); GGML_ASSERT(target_logits->n_dims == 3);
GGML_ASSERT(target_probs->n_dims == 3); GGML_ASSERT(target_probs->n_dims == 3);
int n_vocab = target_logits->ne[0];
int n_tokens = tokens_input->ne[0]; int n_tokens = tokens_input->ne[0];
int n_batch = tokens_input->ne[1]; int n_batch = tokens_input->ne[1];
GGML_ASSERT(n_tokens == target_logits->ne[1]); GGML_ASSERT(n_tokens == target_logits->ne[1]);
GGML_ASSERT(n_batch == target_logits->ne[2]); GGML_ASSERT(n_batch == target_logits->ne[2]);
GGML_ASSERT(n_vocab == target_probs->ne[0]);
GGML_ASSERT(n_tokens == target_probs->ne[1]); GGML_ASSERT(n_tokens == target_probs->ne[1]);
GGML_ASSERT(n_batch == target_probs->ne[2]); GGML_ASSERT(n_batch == target_probs->ne[2]);
ggml_set_f32(target_logits, -1.0f/n_vocab);
ggml_set_f32(target_probs, 0.0f);
for (int k=0; k<n_batch; ++k) { for (int k=0; k<n_batch; ++k) {
struct ggml_tensor * tokens_input_k = ggml_view_1d(ctx, // printf("%s: batch %d\n", __func__, k);
tokens_input, int sample = train_samples[(example_id*n_batch + k) % n_train_samples];
tokens_input->ne[0], GGML_ASSERT(sample+n_tokens-1 < n_train_data);
k*tokens_input->nb[1]);
struct ggml_tensor * target_logits_k = ggml_view_2d(ctx, set_i32_2d(tokens_input, 0, k, llama_token_bos());
target_logits, for (int i=1; i<n_tokens+1; ++i) {
target_logits->ne[0], int token = clamp(train_data[sample+i-1], 0, n_vocab-1);
target_logits->ne[1], // print_token(lctx, token);
target_logits->nb[1], set_f32_3d(target_logits, token, i-1, k, +1.0f);
k*target_logits->nb[2]); set_f32_3d(target_probs, token, i-1, k, -1.0f);
if (i<n_tokens) {
struct ggml_tensor * target_probs_k = ggml_view_2d(ctx, set_i32_2d(tokens_input, i, k, token);
target_probs, }
target_probs->ne[0], }
target_probs->ne[1], // printf("\n=\n");
target_probs->nb[1], // for (int i=0; i<n_tokens; ++i) {
k*target_probs->nb[2]); // int token = get_i32_2d(tokens_input, i, k);
// print_token(lctx, token);
get_example_targets(train_samples, n_train_samples, train_data, n_train_data, // }
example_id*n_batch + k, tokens_input_k, target_logits_k, target_probs_k); // printf("\n-\n");
} }
} }
@ -1423,11 +1430,30 @@ int tokenize_file(struct llama_context * lctx, const char * filename, std::vecto
out.resize(buf.size()); out.resize(buf.size());
int n_tokens = llama_tokenize(lctx, buf.data(), out.data(), buf.size(), false); int n_tokens = llama_tokenize(lctx, buf.data(), out.data(), buf.size(), false);
if (n_tokens >= 0) { if (n_tokens >= 0) {
out.resize(n_tokens); out.resize(n_tokens);
} }
bool verify = false;
if (verify) {
const char * in = buf.data();
const char * end = buf.data() + buf.size();
for (int i=0; i < out.size(); ++i) {
const char * s = llama_token_to_str(lctx, out[i]);
int len = strlen(s);
if (in >= end) {
printf("%s: unexpected end of original text.\n", __func__);
break;
}
const bool matches = (strncmp(in, s, len) == 0);
if (matches) {
in += len;
} else {
printf("%s: mismatch: expected '%s', but got '%s'\n", __func__, std::string(in, len).c_str(), s);
}
}
}
return n_tokens; return n_tokens;
} }
@ -1841,9 +1867,9 @@ float cosine_decay_restart(int decay_steps, const float alpha, int step, float r
} }
int main(int argc, char ** argv) { int main(int argc, char ** argv) {
const char * default_model = "ggml-vic7b-uncensored-q4_0.bin"; const char * default_model = "ggml-vic7b-uncensored-q4_0.bin";
const char * default_train = "shakespeare.txt"; const char * default_train = "shakespeare.txt";
const char * default_chkpt_in = "checkpoint.bin"; const char * default_chkpt_in = "checkpoint.bin";
const char * default_chkpt_out = "checkpoint.bin"; const char * default_chkpt_out = "checkpoint.bin";
const char * default_argv[5] = {argv[0], default_model, default_train, default_chkpt_in, default_chkpt_out}; const char * default_argv[5] = {argv[0], default_model, default_train, default_chkpt_in, default_chkpt_out};
@ -1890,6 +1916,7 @@ int main(int argc, char ** argv) {
++token_noccurs[train_tokens[i]]; ++token_noccurs[train_tokens[i]];
token_notavail[train_tokens[i]] = false; token_notavail[train_tokens[i]] = false;
} }
std::vector<float> token_freq; std::vector<float> token_freq;
token_freq.resize(model.hparams.n_vocab, 0); token_freq.resize(model.hparams.n_vocab, 0);
int n_unique_tokens = 0; int n_unique_tokens = 0;
@ -1901,10 +1928,9 @@ int main(int argc, char ** argv) {
struct my_llama_kv_cache kv_self; struct my_llama_kv_cache kv_self;
int n_batch = 32;
struct ggml_init_params lcparams; struct ggml_init_params lcparams;
lcparams.mem_size = 1024ll*1024ll*1024ll*8ll; lcparams.mem_size = 1024ll*1024ll*1024ll*2ll;
lcparams.mem_buffer = NULL; lcparams.mem_buffer = NULL;
lcparams.no_alloc = false; lcparams.no_alloc = false;
@ -1913,15 +1939,21 @@ int main(int argc, char ** argv) {
my_llama_sampler sampler; my_llama_sampler sampler;
int n_threads = 6;
int n_batch = 32;
int n_examples = 32;
int n_threads = 6; bool samples_start_after_nl = false;
bool use_adam = true; bool use_adam = true;
int warmup = 100; int warmup = 100;
int cos_decay_steps = 1000; int cos_decay_steps = 1000;
float cos_decay_restart = 1.1f; float cos_decay_restart = 1.1f;
float cos_decay_alpha = 0.0f; float cos_decay_alpha = 0.0f;
int n_tokens = model.hparams.n_ctx;
int n_vocab = model.hparams.n_vocab;
struct ggml_opt_context * opt = (struct ggml_opt_context *) alloca(sizeof(struct ggml_opt_context)); struct ggml_opt_context * opt = (struct ggml_opt_context *) alloca(sizeof(struct ggml_opt_context));
memset(opt, 0, sizeof(struct ggml_opt_context)); memset(opt, 0, sizeof(struct ggml_opt_context));
@ -1965,12 +1997,7 @@ int main(int argc, char ** argv) {
size_t compute_size = 1024ll*1024ll*1024ll*32ll; size_t compute_size = 1024ll*1024ll*1024ll*32ll;
uint8_t * compute_addr = new uint8_t[compute_size]; uint8_t * compute_addr = new uint8_t[compute_size];
int n_examples = 256;
int n_tokens = model.hparams.n_ctx;
int n_vocab = model.hparams.n_vocab;
bool samples_start_after_nl = false;
std::vector<int> train_samples; std::vector<int> train_samples;
train_samples.push_back(0); train_samples.push_back(0);
@ -2012,18 +2039,14 @@ int main(int argc, char ** argv) {
ggml_cgraph gf = {}; ggml_cgraph gf = {};
gf.n_threads = n_threads; gf.n_threads = n_threads;
get_example_targets_batch(ctx0, train_samples.data(), train_samples.size(), train_tokens.data(), train_tokens.size(), ex, tokens_input, target_logits, target_probs); get_example_targets_batch(lctx, train_samples.data(), train_samples.size(), train_tokens.data(), train_tokens.size(), ex, tokens_input, target_logits, target_probs);
struct ggml_tensor * logits = struct ggml_tensor * logits =
(n_past == 0) (n_past == 0)
? forward_batch_wo_cache(&model, ctx0, &gf, tokens_input, n_tokens, n_batch) ? forward_batch_wo_cache(&model, ctx0, &gf, tokens_input, n_tokens, n_batch)
: forward_batch(&model, &kv_self, ctx0, &gf, tokens_input, n_tokens, n_past, n_batch); : forward_batch(&model, &kv_self, ctx0, &gf, tokens_input, n_tokens, n_past, n_batch);
// struct ggml_tensor * se = square_error_loss(ctx0, logits, target_logits); struct ggml_tensor * e = cross_entropy_loss(ctx0, logits, target_probs);
struct ggml_tensor * ce = cross_entropy_loss(ctx0, logits, target_probs);
// struct ggml_tensor * e = ggml_add(ctx0, se, ce);
struct ggml_tensor * e = ce;
// struct ggml_tensor * e = se;
ggml_build_forward_expand(&gf, e); ggml_build_forward_expand(&gf, e);
ggml_graph_compute(ctx0, &gf); ggml_graph_compute(ctx0, &gf);
@ -2043,9 +2066,8 @@ int main(int argc, char ** argv) {
size_t used_mem_after_opt = ggml_used_mem(ctx0); size_t used_mem_after_opt = ggml_used_mem(ctx0);
model.train_its = opt->iter; model.train_its = opt->iter;
// model.train_its += use_adam ? opt_params_adam.adam.n_iter : opt_params_lbfgs.lbfgs.n_iter;
model.train_samples += n_batch; model.train_samples += n_batch;
model.train_tokens += n_batch * n_tokens; model.train_tokens += n_batch * n_tokens;
ggml_build_forward_expand(&gf, e); ggml_build_forward_expand(&gf, e);
ggml_graph_compute(ctx0, &gf); ggml_graph_compute(ctx0, &gf);