train : fix compile warnings
This commit is contained in:
parent
f6828cba9e
commit
93535a460a
5 changed files with 33 additions and 29 deletions
|
@ -15,6 +15,7 @@
|
|||
#include <string>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
#include <cinttypes>
|
||||
|
||||
#if defined(__APPLE__) && defined(__MACH__)
|
||||
#include <sys/types.h>
|
||||
|
@ -938,8 +939,8 @@ std::string get_sortable_timestamp() {
|
|||
|
||||
const int64_t ns = std::chrono::duration_cast<std::chrono::nanoseconds>(
|
||||
current_time.time_since_epoch() % 1000000000).count();
|
||||
char timestamp_ns[10];
|
||||
snprintf(timestamp_ns, 11, "%09ld", ns);
|
||||
char timestamp_ns[11];
|
||||
snprintf(timestamp_ns, 11, "%09" PRId64, ns);
|
||||
|
||||
return std::string(timestamp_no_ns) + "." + std::string(timestamp_ns);
|
||||
}
|
||||
|
|
|
@ -681,7 +681,6 @@ void save_as_llama_model(struct llama_vocab * vocab, struct my_llama_model * mod
|
|||
|
||||
// for rms-att-weight
|
||||
int row_length = model->hparams.n_embd;
|
||||
const auto & hparams = model->hparams;
|
||||
int n_ff = model->hparams.n_ff;
|
||||
|
||||
for (uint32_t i = 0; i < model->hparams.n_layer; ++i){
|
||||
|
|
|
@ -314,15 +314,13 @@ void init_model(struct my_llama_model * model) {
|
|||
model->train_samples = 0;
|
||||
model->train_tokens = 0;
|
||||
|
||||
const char * arch = "llama";
|
||||
|
||||
std::vector<char> tn_buf;
|
||||
tn_buf.resize(GGML_MAX_NAME);
|
||||
auto tn = [arch, &tn_buf](const char * key) -> const char * {
|
||||
auto tn = [&tn_buf](const char * key) -> const char * {
|
||||
snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", key);
|
||||
return tn_buf.data();
|
||||
};
|
||||
auto tni = [arch, &tn_buf](const char * key, int bid) -> const char * {
|
||||
auto tni = [&tn_buf](const char * key, int bid) -> const char * {
|
||||
snprintf(tn_buf.data(), tn_buf.size(), key, bid);
|
||||
std::string s = tn_buf.data();
|
||||
snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", s.c_str());
|
||||
|
@ -470,7 +468,7 @@ static size_t hash_find(void * hash_table[], void * p) {
|
|||
}
|
||||
|
||||
static bool hash_insert(void * hash_table[], void * p) {
|
||||
size_t h = hash(p);
|
||||
//size_t h = hash(p);
|
||||
size_t i = hash_find(hash_table, p);
|
||||
|
||||
GGML_ASSERT(i < GGML_GRAPH_HASHTABLE_SIZE); // assert that not full
|
||||
|
@ -494,7 +492,7 @@ struct hash_map {
|
|||
void * keys[GGML_GRAPH_HASHTABLE_SIZE];
|
||||
void * vals[GGML_GRAPH_HASHTABLE_SIZE];
|
||||
};
|
||||
static const size_t HASH_MAP_SIZE = sizeof(struct hash_map);
|
||||
//static const size_t HASH_MAP_SIZE = sizeof(struct hash_map);
|
||||
|
||||
struct hash_map * new_hash_map() {
|
||||
struct hash_map * result = new struct hash_map;
|
||||
|
@ -677,7 +675,6 @@ struct ggml_tensor * llama_build_train_graphs(
|
|||
const float f_norm_rms_eps = hparams.f_norm_rms_eps;
|
||||
const float rope_freq_base = hparams.rope_freq_base;
|
||||
const float rope_freq_scale = hparams.rope_freq_scale;
|
||||
const int rope_mode = 0;
|
||||
|
||||
auto set_name = [](struct ggml_tensor * t, const char * n) {
|
||||
ggml_set_name(t, n);
|
||||
|
@ -687,8 +684,12 @@ struct ggml_tensor * llama_build_train_graphs(
|
|||
};
|
||||
|
||||
// rope has so much parameters that we make a custom function for it
|
||||
auto rope = [ctx, n_past, n_rot, rope_mode, n_ctx, rope_freq_base, rope_freq_scale]
|
||||
auto rope = [ctx, n_rot, n_ctx, rope_freq_base, rope_freq_scale]
|
||||
(struct ggml_tensor * t) -> struct ggml_tensor * {
|
||||
// not capturing these, to silcence warnings
|
||||
const int n_past = 0;
|
||||
const int rope_mode = 0;
|
||||
|
||||
return ggml_rope_custom(ctx,
|
||||
t, n_past, n_rot, rope_mode, n_ctx,
|
||||
rope_freq_base, rope_freq_scale);
|
||||
|
@ -803,14 +804,14 @@ struct ggml_tensor * llama_build_train_graphs(
|
|||
}
|
||||
// allocating checkpoints in one block to reduce memory fragmentation
|
||||
// note: they will be freed in reverse order
|
||||
for (int i = 0; i < checkpoints.size(); ++i) {
|
||||
for (int i = 0; i < (int) checkpoints.size(); ++i) {
|
||||
if (checkpoints[i]->data == NULL && !ggml_is_view(checkpoints[i])) {
|
||||
ggml_allocr_alloc(alloc, checkpoints[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int n_leafs_after = gb->n_leafs;
|
||||
int n_nodes_after = gb->n_nodes;
|
||||
//int n_leafs_after = gb->n_leafs;
|
||||
//int n_nodes_after = gb->n_nodes;
|
||||
|
||||
ggml_allocr_alloc_graph(alloc, gb);
|
||||
|
||||
|
@ -1061,6 +1062,8 @@ bool are_same_layout(struct ggml_tensor * a, struct ggml_tensor * b) {
|
|||
GGML_ASSERT(a->type == b->type);
|
||||
GGML_ASSERT(ggml_are_same_shape(a, b));
|
||||
GGML_ASSERT(ggml_is_contiguous(a) && ggml_is_contiguous(b));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void read_tensor_by_name(struct ggml_tensor * dst, struct ggml_context * ctx, const char * name) {
|
||||
|
@ -1217,11 +1220,11 @@ void load_llama_model_gguf(struct gguf_context * fctx, struct ggml_context * f_g
|
|||
|
||||
std::vector<char> tn_buf;
|
||||
tn_buf.resize(GGML_MAX_NAME);
|
||||
auto tn = [&arch, &tn_buf](const char * key) -> const char * {
|
||||
auto tn = [&tn_buf](const char * key) -> const char * {
|
||||
snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", key);
|
||||
return tn_buf.data();
|
||||
};
|
||||
auto tni = [&arch, &tn_buf](const char * key, int bid) -> const char * {
|
||||
auto tni = [&tn_buf](const char * key, int bid) -> const char * {
|
||||
snprintf(tn_buf.data(), tn_buf.size(), key, bid);
|
||||
std::string s = tn_buf.data();
|
||||
snprintf(tn_buf.data(), tn_buf.size(), "%s.weight", s.c_str());
|
||||
|
@ -2194,7 +2197,7 @@ int main(int argc, char ** argv) {
|
|||
ggml_set_no_alloc(ctx0, false);
|
||||
|
||||
// don't use alloc for input tensors, so we can safely fill them with data
|
||||
struct ggml_tensor * after_opt_best_samples = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
|
||||
//struct ggml_tensor * after_opt_best_samples = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
|
||||
//struct ggml_tensor * after_opt_probs = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch);
|
||||
struct ggml_tensor * tokens_input = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
|
||||
struct ggml_tensor * target_logits = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch);
|
||||
|
|
16
ggml.c
16
ggml.c
|
@ -9448,6 +9448,8 @@ static void ggml_compute_forward_div_f32(
|
|||
|
||||
|
||||
#ifdef GGML_USE_ACCELERATE
|
||||
UNUSED(ggml_vec_div_f32);
|
||||
|
||||
vDSP_vdiv(
|
||||
(float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1,
|
||||
(float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1,
|
||||
|
@ -13936,7 +13938,7 @@ static void ggml_compute_forward_flash_attn_f32(
|
|||
vvexpf(S, S, &Mup);
|
||||
ggml_vec_sum_f32(Mup, &sum, S);
|
||||
#else
|
||||
uint16_t scvt[GGML_SOFT_MAX_UNROLL];
|
||||
uint16_t scvt[GGML_SOFT_MAX_UNROLL]; UNUSED(scvt);
|
||||
ggml_float sump[GGML_SOFT_MAX_UNROLL] = { 0.0 };
|
||||
|
||||
for (int i = 0; i < Mup; i += GGML_SOFT_MAX_UNROLL) {
|
||||
|
@ -14530,7 +14532,7 @@ static void ggml_compute_forward_flash_attn_back_f32(
|
|||
vvexpf(SM, SM, &Mup);
|
||||
ggml_vec_sum_f32(Mup, &sum, SM);
|
||||
#else
|
||||
uint16_t scvt[GGML_SOFT_MAX_UNROLL];
|
||||
uint16_t scvt[GGML_SOFT_MAX_UNROLL]; UNUSED(scvt);
|
||||
ggml_float sump[GGML_SOFT_MAX_UNROLL] = { 0.0 };
|
||||
|
||||
for (int i = 0; i < Mup; i += GGML_SOFT_MAX_UNROLL) {
|
||||
|
@ -15330,7 +15332,7 @@ static void ggml_compute_forward_cross_entropy_loss_f32(
|
|||
float max = -INFINITY;
|
||||
ggml_vec_max_f32(nc, &max, s0);
|
||||
|
||||
uint16_t scvt;
|
||||
uint16_t scvt; UNUSED(scvt);
|
||||
for (int i = 0; i < nc; i++) {
|
||||
if (s0[i] == -INFINITY) {
|
||||
st[i] = 0.0f;
|
||||
|
@ -15410,7 +15412,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|||
return;
|
||||
}
|
||||
|
||||
const double eps = 1e-9f;
|
||||
const double eps = 1e-9;
|
||||
|
||||
// TODO: handle transposed/permuted matrices
|
||||
const int64_t nc = src0->ne[0];
|
||||
|
@ -15444,7 +15446,7 @@ static void ggml_compute_forward_cross_entropy_loss_back_f32(
|
|||
float max = -INFINITY;
|
||||
ggml_vec_max_f32(nc, &max, s0);
|
||||
|
||||
uint16_t scvt;
|
||||
uint16_t scvt; UNUSED(scvt);
|
||||
for (int i = 0; i < nc; i++) {
|
||||
if (s0[i] == -INFINITY) {
|
||||
ds0[i] = 0.0f;
|
||||
|
@ -18495,7 +18497,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|||
const int64_t ne = ggml_nelements(ps[p]);
|
||||
for (int64_t j = 0; j < ne; ++j) {
|
||||
float g = ggml_get_f32_1d(ps[p]->grad, j);
|
||||
sum += g*g;
|
||||
sum += (ggml_float)(g*g);
|
||||
}
|
||||
}
|
||||
ggml_float norm = sqrt(sum);
|
||||
|
@ -18508,7 +18510,7 @@ static enum ggml_opt_result ggml_opt_adam(
|
|||
int64_t i = 0;
|
||||
for (int p = 0; p < np; ++p) {
|
||||
const int64_t ne = ggml_nelements(ps[p]);
|
||||
const float p_decay = ((ps[p]->n_dims >= decay_min_ndim) ? decay : 0.0) * sched;
|
||||
const float p_decay = ((ps[p]->n_dims >= decay_min_ndim) ? decay : 0.0f) * sched;
|
||||
for (int64_t j = 0; j < ne; ++j) {
|
||||
float x = ggml_get_f32_1d(ps[p], j);
|
||||
float g = ggml_get_f32_1d(ps[p]->grad, j)*gnorm;
|
||||
|
|
|
@ -6248,7 +6248,6 @@ const char * llama_print_system_info(void) {
|
|||
}
|
||||
|
||||
void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx) {
|
||||
|
||||
fprintf(stream, "\n");
|
||||
fprintf(stream, "###########\n");
|
||||
fprintf(stream, "# Timings #\n");
|
||||
|
@ -6264,10 +6263,10 @@ void llama_dump_timing_info_yaml(FILE * stream, const llama_context * ctx) {
|
|||
fprintf(stream, "n_eval: %d # number of tokens generated (excluding the first one)\n", ctx->n_eval);
|
||||
fprintf(stream, "n_p_eval: %d # number of tokens processed in batches at the beginning\n", ctx->n_p_eval);
|
||||
fprintf(stream, "n_sample: %d # number of sampled tokens\n", ctx->n_sample);
|
||||
fprintf(stream, "t_eval_us: %ld # total microseconds spent generating tokens\n", ctx->t_eval_us);
|
||||
fprintf(stream, "t_load_us: %ld # total microseconds spent loading the model\n", ctx->t_load_us);
|
||||
fprintf(stream, "t_p_eval_us: %ld # total microseconds spent prompt processing\n", ctx->t_p_eval_us);
|
||||
fprintf(stream, "t_sample_us: %ld # total microseconds spent sampling\n", ctx->t_sample_us);
|
||||
fprintf(stream, "t_eval_us: %" PRId64 " # total microseconds spent generating tokens\n", ctx->t_eval_us);
|
||||
fprintf(stream, "t_load_us: %" PRId64 " # total microseconds spent loading the model\n", ctx->t_load_us);
|
||||
fprintf(stream, "t_p_eval_us: %" PRId64 " # total microseconds spent prompt processing\n", ctx->t_p_eval_us);
|
||||
fprintf(stream, "t_sample_us: %" PRId64 " # total microseconds spent sampling\n", ctx->t_sample_us);
|
||||
fprintf(stream, "ts_eval: %.2f # tokens / second during generation\n",
|
||||
1.0e6 * ctx->n_eval / ctx->t_eval_us);
|
||||
fprintf(stream, "ts_p_eval: %.2f # tokens / second during prompt processing\n",
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue