train : fix compile warnings

This commit is contained in:
Georgi Gerganov 2023-08-28 22:26:10 +03:00
parent f6828cba9e
commit 93535a460a
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
5 changed files with 33 additions and 29 deletions

View file

@ -15,6 +15,7 @@
#include <string> #include <string>
#include <unordered_set> #include <unordered_set>
#include <vector> #include <vector>
#include <cinttypes>
#if defined(__APPLE__) && defined(__MACH__) #if defined(__APPLE__) && defined(__MACH__)
#include <sys/types.h> #include <sys/types.h>
@ -938,8 +939,8 @@ std::string get_sortable_timestamp() {
const int64_t ns = std::chrono::duration_cast<std::chrono::nanoseconds>( const int64_t ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
current_time.time_since_epoch() % 1000000000).count(); current_time.time_since_epoch() % 1000000000).count();
char timestamp_ns[10]; char timestamp_ns[11];
snprintf(timestamp_ns, 11, "%09ld", ns); snprintf(timestamp_ns, 11, "%09" PRId64, ns);
return std::string(timestamp_no_ns) + "." + std::string(timestamp_ns); return std::string(timestamp_no_ns) + "." + std::string(timestamp_ns);
} }

View file

@ -681,7 +681,6 @@ void save_as_llama_model(struct llama_vocab * vocab, struct my_llama_model * mod
// for rms-att-weight // for rms-att-weight
int row_length = model->hparams.n_embd; int row_length = model->hparams.n_embd;
const auto & hparams = model->hparams;
int n_ff = model->hparams.n_ff; int n_ff = model->hparams.n_ff;
for (uint32_t i = 0; i < model->hparams.n_layer; ++i){ for (uint32_t i = 0; i < model->hparams.n_layer; ++i){

View file

@ -314,15 +314,13 @@ void init_model(struct my_llama_model * model) {
model->train_samples = 0; model->train_samples = 0;
model->train_tokens = 0; model->train_tokens = 0;
const char * arch = "llama";
std::vector<char> tn_buf; std::vector<char> tn_buf;
tn_buf.resize(GGML_MAX_NAME); tn_buf.resize(GGML_MAX_NAME);
auto tn = [arch, &tn_buf](const char * key) -> const char * { auto tn = [&tn_buf](const char * key) -> const char * {
snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", key); snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", key);
return tn_buf.data(); return tn_buf.data();
}; };
auto tni = [arch, &tn_buf](const char * key, int bid) -> const char * { auto tni = [&tn_buf](const char * key, int bid) -> const char * {
snprintf(tn_buf.data(), tn_buf.size(), key, bid); snprintf(tn_buf.data(), tn_buf.size(), key, bid);
std::string s = tn_buf.data(); std::string s = tn_buf.data();
snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", s.c_str()); snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", s.c_str());
@ -470,7 +468,7 @@ static size_t hash_find(void * hash_table[], void * p) {
} }
static bool hash_insert(void * hash_table[], void * p) { static bool hash_insert(void * hash_table[], void * p) {
size_t h = hash(p); //size_t h = hash(p);
size_t i = hash_find(hash_table, p); size_t i = hash_find(hash_table, p);
GGML_ASSERT(i < GGML_GRAPH_HASHTABLE_SIZE); // assert that not full GGML_ASSERT(i < GGML_GRAPH_HASHTABLE_SIZE); // assert that not full
@ -494,7 +492,7 @@ struct hash_map {
void * keys[GGML_GRAPH_HASHTABLE_SIZE]; void * keys[GGML_GRAPH_HASHTABLE_SIZE];
void * vals[GGML_GRAPH_HASHTABLE_SIZE]; void * vals[GGML_GRAPH_HASHTABLE_SIZE];
}; };
static const size_t HASH_MAP_SIZE = sizeof(struct hash_map); //static const size_t HASH_MAP_SIZE = sizeof(struct hash_map);
struct hash_map * new_hash_map() { struct hash_map * new_hash_map() {
struct hash_map * result = new struct hash_map; struct hash_map * result = new struct hash_map;
@ -677,7 +675,6 @@ struct ggml_tensor * llama_build_train_graphs(
const float f_norm_rms_eps = hparams.f_norm_rms_eps; const float f_norm_rms_eps = hparams.f_norm_rms_eps;
const float rope_freq_base = hparams.rope_freq_base; const float rope_freq_base = hparams.rope_freq_base;
const float rope_freq_scale = hparams.rope_freq_scale; const float rope_freq_scale = hparams.rope_freq_scale;
const int rope_mode = 0;
auto set_name = [](struct ggml_tensor * t, const char * n) { auto set_name = [](struct ggml_tensor * t, const char * n) {
ggml_set_name(t, n); ggml_set_name(t, n);
@ -687,8 +684,12 @@ struct ggml_tensor * llama_build_train_graphs(
}; };
// rope has so much parameters that we make a custom function for it // rope has so much parameters that we make a custom function for it
auto rope = [ctx, n_past, n_rot, rope_mode, n_ctx, rope_freq_base, rope_freq_scale] auto rope = [ctx, n_rot, n_ctx, rope_freq_base, rope_freq_scale]
(struct ggml_tensor * t) -> struct ggml_tensor * { (struct ggml_tensor * t) -> struct ggml_tensor * {
// not capturing these, to silcence warnings
const int n_past = 0;
const int rope_mode = 0;
return ggml_rope_custom(ctx, return ggml_rope_custom(ctx,
t, n_past, n_rot, rope_mode, n_ctx, t, n_past, n_rot, rope_mode, n_ctx,
rope_freq_base, rope_freq_scale); rope_freq_base, rope_freq_scale);
@ -803,14 +804,14 @@ struct ggml_tensor * llama_build_train_graphs(
} }
// allocating checkpoints in one block to reduce memory fragmentation // allocating checkpoints in one block to reduce memory fragmentation
// note: they will be freed in reverse order // note: they will be freed in reverse order
for (int i = 0; i < checkpoints.size(); ++i) { for (int i = 0; i < (int) checkpoints.size(); ++i) {
if (checkpoints[i]->data == NULL && !ggml_is_view(checkpoints[i])) { if (checkpoints[i]->data == NULL && !ggml_is_view(checkpoints[i])) {
ggml_allocr_alloc(alloc, checkpoints[i]); ggml_allocr_alloc(alloc, checkpoints[i]);
} }
} }
int n_leafs_after = gb->n_leafs; //int n_leafs_after = gb->n_leafs;
int n_nodes_after = gb->n_nodes; //int n_nodes_after = gb->n_nodes;
ggml_allocr_alloc_graph(alloc, gb); ggml_allocr_alloc_graph(alloc, gb);
@ -1061,6 +1062,8 @@ bool are_same_layout(struct ggml_tensor * a, struct ggml_tensor * b) {
GGML_ASSERT(a->type == b->type); GGML_ASSERT(a->type == b->type);
GGML_ASSERT(ggml_are_same_shape(a, b)); GGML_ASSERT(ggml_are_same_shape(a, b));
GGML_ASSERT(ggml_is_contiguous(a) && ggml_is_contiguous(b)); GGML_ASSERT(ggml_is_contiguous(a) && ggml_is_contiguous(b));
return true;
} }
void read_tensor_by_name(struct ggml_tensor * dst, struct ggml_context * ctx, const char * name) { void read_tensor_by_name(struct ggml_tensor * dst, struct ggml_context * ctx, const char * name) {
@ -1217,11 +1220,11 @@ void load_llama_model_gguf(struct gguf_context * fctx, struct ggml_context * f_g
std::vector<char> tn_buf; std::vector<char> tn_buf;
tn_buf.resize(GGML_MAX_NAME); tn_buf.resize(GGML_MAX_NAME);
auto tn = [&arch, &tn_buf](const char * key) -> const char * { auto tn = [&tn_buf](const char * key) -> const char * {
snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", key); snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", key);
return tn_buf.data(); return tn_buf.data();
}; };
auto tni = [&arch, &tn_buf](const char * key, int bid) -> const char * { auto tni = [&tn_buf](const char * key, int bid) -> const char * {
snprintf(tn_buf.data(), tn_buf.size(), key, bid); snprintf(tn_buf.data(), tn_buf.size(), key, bid);
std::string s = tn_buf.data(); std::string s = tn_buf.data();
snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", s.c_str()); snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", s.c_str());
@ -2194,7 +2197,7 @@ int main(int argc, char ** argv) {
ggml_set_no_alloc(ctx0, false); ggml_set_no_alloc(ctx0, false);
// don't use alloc for input tensors, so we can safely fill them with data // don't use alloc for input tensors, so we can safely fill them with data
struct ggml_tensor * after_opt_best_samples = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch); //struct ggml_tensor * after_opt_best_samples = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
//struct ggml_tensor * after_opt_probs = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch); //struct ggml_tensor * after_opt_probs = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch);
struct ggml_tensor * tokens_input = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch); struct ggml_tensor * tokens_input = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
struct ggml_tensor * target_logits = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch); struct ggml_tensor * target_logits = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch);

16
ggml.c
View file

@ -9448,6 +9448,8 @@ static void ggml_compute_forward_div_f32(
#ifdef GGML_USE_ACCELERATE #ifdef GGML_USE_ACCELERATE
UNUSED(ggml_vec_div_f32);
vDSP_vdiv( vDSP_vdiv(
(float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1, (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1,
(float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1, (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1,
@ -13936,7 +13938,7 @@ static void ggml_compute_forward_flash_attn_f32(
vvexpf(S, S, &Mup); vvexpf(S, S, &Mup);
ggml_vec_sum_f32(Mup, &sum, S); ggml_vec_sum_f32(Mup, &sum, S);
#else #else
uint16_t scvt[GGML_SOFT_MAX_UNROLL]; uint16_t scvt[GGML_SOFT_MAX_UNROLL]; UNUSED(scvt);
ggml_float sump[GGML_SOFT_MAX_UNROLL] = { 0.0 }; ggml_float sump[GGML_SOFT_MAX_UNROLL] = { 0.0 };
for (int i = 0; i < Mup; i += GGML_SOFT_MAX_UNROLL) { for (int i = 0; i < Mup; i += GGML_SOFT_MAX_UNROLL) {
@ -14530,7 +14532,7 @@ static void ggml_compute_forward_flash_attn_back_f32(
vvexpf(SM, SM, &Mup); vvexpf(SM, SM, &Mup);
ggml_vec_sum_f32(Mup, &sum, SM); ggml_vec_sum_f32(Mup, &sum, SM);
#else #else
uint16_t scvt[GGML_SOFT_MAX_UNROLL]; uint16_t scvt[GGML_SOFT_MAX_UNROLL]; UNUSED(scvt);
ggml_float sump[GGML_SOFT_MAX_UNROLL] = { 0.0 }; ggml_float sump[GGML_SOFT_MAX_UNROLL] = { 0.0 };
for (int i = 0; i < Mup; i += GGML_SOFT_MAX_UNROLL) { for (int i = 0; i < Mup; i += GGML_SOFT_MAX_UNROLL) {
@ -15330,7 +15332,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
float max = -INFINITY; float max = -INFINITY;
ggml_vec_max_f32(nc, &max, s0); ggml_vec_max_f32(nc, &max, s0);
uint16_t scvt; uint16_t scvt; UNUSED(scvt);
for (int i = 0; i < nc; i++) { for (int i = 0; i < nc; i++) {
if (s0[i] == -INFINITY) { if (s0[i] == -INFINITY) {
st[i] = 0.0f; st[i] = 0.0f;
@ -15410,7 +15412,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
return; return;
} }
const double eps = 1e-9f; const double eps = 1e-9;
// TODO: handle transposed/permuted matrices // TODO: handle transposed/permuted matrices
const int64_t nc = src0->ne[0]; const int64_t nc = src0->ne[0];
@ -15444,7 +15446,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
float max = -INFINITY; float max = -INFINITY;
ggml_vec_max_f32(nc, &max, s0); ggml_vec_max_f32(nc, &max, s0);
uint16_t scvt; uint16_t scvt; UNUSED(scvt);
for (int i = 0; i < nc; i++) { for (int i = 0; i < nc; i++) {
if (s0[i] == -INFINITY) { if (s0[i] == -INFINITY) {
ds0[i] = 0.0f; ds0[i] = 0.0f;
@ -18495,7 +18497,7 @@ static enum ggml_opt_result ggml_opt_adam(
const int64_t ne = ggml_nelements(ps[p]); const int64_t ne = ggml_nelements(ps[p]);
for (int64_t j = 0; j < ne; ++j) { for (int64_t j = 0; j < ne; ++j) {
float g = ggml_get_f32_1d(ps[p]->grad, j); float g = ggml_get_f32_1d(ps[p]->grad, j);
sum += g*g; sum += (ggml_float)(g*g);
} }
} }
ggml_float norm = sqrt(sum); ggml_float norm = sqrt(sum);
@ -18508,7 +18510,7 @@ static enum ggml_opt_result ggml_opt_adam(
int64_t i = 0; int64_t i = 0;
for (int p = 0; p < np; ++p) { for (int p = 0; p < np; ++p) {
const int64_t ne = ggml_nelements(ps[p]); const int64_t ne = ggml_nelements(ps[p]);
const float p_decay = ((ps[p]->n_dims >= decay_min_ndim) ? decay : 0.0) * sched; const float p_decay = ((ps[p]->n_dims >= decay_min_ndim) ? decay : 0.0f) * sched;
for (int64_t j = 0; j < ne; ++j) { for (int64_t j = 0; j < ne; ++j) {
float x = ggml_get_f32_1d(ps[p], j); float x = ggml_get_f32_1d(ps[p], j);
float g = ggml_get_f32_1d(ps[p]->grad, j)*gnorm; float g = ggml_get_f32_1d(ps[p]->grad, j)*gnorm;

View file

@ -6248,7 +6248,6 @@ const char * llama_print_system_info(void) {
} }
void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx) { void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx) {
fprintf(stream, "\n"); fprintf(stream, "\n");
fprintf(stream, "###########\n"); fprintf(stream, "###########\n");
fprintf(stream, "# Timings #\n"); fprintf(stream, "# Timings #\n");
@ -6264,10 +6263,10 @@ void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx) {
fprintf(stream, "n_eval: %d # number of tokens generated (excluding the first one)\n", ctx->n_eval); fprintf(stream, "n_eval: %d # number of tokens generated (excluding the first one)\n", ctx->n_eval);
fprintf(stream, "n_p_eval: %d # number of tokens processed in batches at the beginning\n", ctx->n_p_eval); fprintf(stream, "n_p_eval: %d # number of tokens processed in batches at the beginning\n", ctx->n_p_eval);
fprintf(stream, "n_sample: %d # number of sampled tokens\n", ctx->n_sample); fprintf(stream, "n_sample: %d # number of sampled tokens\n", ctx->n_sample);
fprintf(stream, "t_eval_us: %ld # total microseconds spent generating tokens\n", ctx->t_eval_us); fprintf(stream, "t_eval_us: %" PRId64 " # total microseconds spent generating tokens\n", ctx->t_eval_us);
fprintf(stream, "t_load_us: %ld # total microseconds spent loading the model\n", ctx->t_load_us); fprintf(stream, "t_load_us: %" PRId64 " # total microseconds spent loading the model\n", ctx->t_load_us);
fprintf(stream, "t_p_eval_us: %ld # total microseconds spent prompt processing\n", ctx->t_p_eval_us); fprintf(stream, "t_p_eval_us: %" PRId64 " # total microseconds spent prompt processing\n", ctx->t_p_eval_us);
fprintf(stream, "t_sample_us: %ld # total microseconds spent sampling\n", ctx->t_sample_us); fprintf(stream, "t_sample_us: %" PRId64 " # total microseconds spent sampling\n", ctx->t_sample_us);
fprintf(stream, "ts_eval: %.2f # tokens / second during generation\n", fprintf(stream, "ts_eval: %.2f # tokens / second during generation\n",
1.0e6 * ctx->n_eval / ctx->t_eval_us); 1.0e6 * ctx->n_eval / ctx->t_eval_us);
fprintf(stream, "ts_p_eval: %.2f # tokens / second during prompt processing\n", fprintf(stream, "ts_p_eval: %.2f # tokens / second during prompt processing\n",