minor : fix compiler warnings + indentation style

This commit is contained in:
Georgi Gerganov 2023-05-13 09:55:17 +03:00
parent b9ef08ccab
commit f977243ded
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
2 changed files with 63 additions and 56 deletions

View file

@ -134,7 +134,7 @@ struct llama_hparams {
}; };
uint32_t get_n_ff(const struct llama_hparams* hparams) { uint32_t get_n_ff(const struct llama_hparams* hparams) {
uint32_t n_ff = ((2*(4*hparams->n_embd)/3 + hparams->n_mult - 1)/hparams->n_mult)*hparams->n_mult; const uint32_t n_ff = ((2*(4*hparams->n_embd)/3 + hparams->n_mult - 1)/hparams->n_mult)*hparams->n_mult;
return n_ff; return n_ff;
} }
@ -241,7 +241,7 @@ void init_model(struct llama_model * model) {
const uint32_t n_layer = hparams.n_layer; const uint32_t n_layer = hparams.n_layer;
const uint32_t n_vocab = hparams.n_vocab; const uint32_t n_vocab = hparams.n_vocab;
uint32_t n_ff = get_n_ff(&hparams); const uint32_t n_ff = get_n_ff(&hparams);
struct ggml_context * ctx = model->ctx; struct ggml_context * ctx = model->ctx;
@ -275,11 +275,12 @@ void init_model_lora(struct llama_model_lora * model) {
const auto & hparams = model->hparams; const auto & hparams = model->hparams;
const uint32_t n_embd = hparams.n_embd; const uint32_t n_embd = hparams.n_embd;
const uint32_t n_mult = hparams.n_mult;
const uint32_t n_layer = hparams.n_layer; const uint32_t n_layer = hparams.n_layer;
const uint32_t n_vocab = hparams.n_vocab; const uint32_t n_vocab = hparams.n_vocab;
const uint32_t n_lora = hparams.n_lora; const uint32_t n_lora = hparams.n_lora;
uint32_t n_ff = ((2*(4*hparams.n_embd)/3 + hparams.n_mult - 1)/hparams.n_mult)*hparams.n_mult; const uint32_t n_ff = ((2*(4*n_embd)/3 + n_mult - 1)/n_mult)*n_mult;
struct ggml_context * ctx = model->ctx; struct ggml_context * ctx = model->ctx;
@ -315,7 +316,9 @@ void init_model_lora(struct llama_model_lora * model) {
void set_param_model(struct llama_model * model) { void set_param_model(struct llama_model * model) {
const auto& hparams = model->hparams; const auto& hparams = model->hparams;
const uint32_t n_layer = hparams.n_layer; const uint32_t n_layer = hparams.n_layer;
struct ggml_context* ctx = model->ctx; struct ggml_context* ctx = model->ctx;
ggml_set_param(ctx, model->tok_embeddings); ggml_set_param(ctx, model->tok_embeddings);
@ -339,7 +342,9 @@ void set_param_model(struct llama_model * model) {
void set_param_model_lora(struct llama_model_lora * model) { void set_param_model_lora(struct llama_model_lora * model) {
const auto& hparams = model->hparams; const auto& hparams = model->hparams;
const uint32_t n_layer = hparams.n_layer; const uint32_t n_layer = hparams.n_layer;
struct ggml_context* ctx = model->ctx; struct ggml_context* ctx = model->ctx;
ggml_set_param(ctx, model->tok_embeddings); ggml_set_param(ctx, model->tok_embeddings);
@ -369,11 +374,7 @@ void set_param_model_lora(struct llama_model_lora * model) {
void randomize_model(struct llama_model * model, int seed, float mean, float std, float min, float max) { void randomize_model(struct llama_model * model, int seed, float mean, float std, float min, float max) {
const auto & hparams = model->hparams; const auto & hparams = model->hparams;
const uint32_t n_embd = hparams.n_embd;
const uint32_t n_layer = hparams.n_layer; const uint32_t n_layer = hparams.n_layer;
const uint32_t n_vocab = hparams.n_vocab;
uint32_t n_ff = ((2*(4*hparams.n_embd)/3 + hparams.n_mult - 1)/hparams.n_mult)*hparams.n_mult;
struct random_normal_distribution rnd; struct random_normal_distribution rnd;
init_random_normal_distribution(&rnd, seed, mean, std, min, max); init_random_normal_distribution(&rnd, seed, mean, std, min, max);
@ -402,11 +403,7 @@ void randomize_model(struct llama_model * model, int seed, float mean, float std
void randomize_model_lora(struct llama_model_lora * model, int seed, float mean, float std, float min, float max) { void randomize_model_lora(struct llama_model_lora * model, int seed, float mean, float std, float min, float max) {
const auto & hparams = model->hparams; const auto & hparams = model->hparams;
const uint32_t n_embd = hparams.n_embd;
const uint32_t n_layer = hparams.n_layer; const uint32_t n_layer = hparams.n_layer;
const uint32_t n_vocab = hparams.n_vocab;
uint32_t n_ff = ((2*(4*hparams.n_embd)/3 + hparams.n_mult - 1)/hparams.n_mult)*hparams.n_mult;
struct random_normal_distribution rnd; struct random_normal_distribution rnd;
init_random_normal_distribution(&rnd, seed, mean, std, min, max); init_random_normal_distribution(&rnd, seed, mean, std, min, max);
@ -438,9 +435,10 @@ void randomize_model_lora(struct llama_model_lora * model, int seed, float mean,
bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int n_batch) { bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int n_batch) {
const auto & hparams = model->hparams; const auto & hparams = model->hparams;
const int n_ctx = hparams.n_ctx;
const int n_embd = hparams.n_embd; const uint32_t n_ctx = hparams.n_ctx;
const int n_layer = hparams.n_layer; const uint32_t n_embd = hparams.n_embd;
const uint32_t n_layer = hparams.n_layer;
const int64_t n_mem = n_layer*n_ctx*n_batch; const int64_t n_mem = n_layer*n_ctx*n_batch;
const int64_t n_elements = n_embd*n_mem; const int64_t n_elements = n_embd*n_mem;
@ -473,9 +471,10 @@ bool init_kv_cache(struct llama_kv_cache* cache, struct llama_model * model, int
bool init_kv_cache_lora(struct llama_kv_cache* cache, struct llama_model_lora * model, int n_batch) { bool init_kv_cache_lora(struct llama_kv_cache* cache, struct llama_model_lora * model, int n_batch) {
const auto & hparams = model->hparams; const auto & hparams = model->hparams;
const int n_ctx = hparams.n_ctx;
const int n_embd = hparams.n_embd; const uint32_t n_ctx = hparams.n_ctx;
const int n_layer = hparams.n_layer; const uint32_t n_embd = hparams.n_embd;
const uint32_t n_layer = hparams.n_layer;
const int64_t n_mem = n_layer*n_ctx*n_batch; const int64_t n_mem = n_layer*n_ctx*n_batch;
const int64_t n_elements = n_embd*n_mem; const int64_t n_elements = n_embd*n_mem;
@ -1062,12 +1061,12 @@ struct ggml_tensor * forward_lora(
struct llama_kv_cache& kv_self = *cache; struct llama_kv_cache& kv_self = *cache;
const auto & hparams = model->hparams; const auto & hparams = model->hparams;
const int n_ctx = hparams.n_ctx; const int n_ctx = hparams.n_ctx;
const int n_embd = hparams.n_embd; const int n_embd = hparams.n_embd;
const int n_layer = hparams.n_layer; const int n_layer = hparams.n_layer;
const int n_head = hparams.n_head; const int n_head = hparams.n_head;
const int n_rot = hparams.n_rot; const int n_rot = hparams.n_rot;
const int n_lora = hparams.n_lora;
struct ggml_tensor * tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N); struct ggml_tensor * tokens = ggml_new_tensor_1d(ctx0, GGML_TYPE_I32, N);
memcpy(tokens->data, tokens_input->data, N*ggml_element_size(tokens)); memcpy(tokens->data, tokens_input->data, N*ggml_element_size(tokens));
@ -1431,7 +1430,6 @@ void get_example_targets_batch(struct ggml_context * ctx, int example_id, struct
GGML_ASSERT( targets->n_dims == 3); GGML_ASSERT( targets->n_dims == 3);
int n_tokens = tokens_input->ne[0]; int n_tokens = tokens_input->ne[0];
int n_batch = tokens_input->ne[1]; int n_batch = tokens_input->ne[1];
int n_vocab = targets->ne[0];
GGML_ASSERT(n_tokens == targets->ne[1]); GGML_ASSERT(n_tokens == targets->ne[1]);
GGML_ASSERT(n_batch == targets->ne[2]); GGML_ASSERT(n_batch == targets->ne[2]);
@ -1481,6 +1479,12 @@ struct ggml_tensor * cross_entropy_loss(struct ggml_context * ctx, struct ggml_t
} }
int main(int argc, char ** argv) { int main(int argc, char ** argv) {
if (argc < 1) {
fprintf(stderr, "usage: %s\n", argv[0]);
return 1;
}
struct ggml_init_params lcparams; struct ggml_init_params lcparams;
lcparams.mem_size = 1024ll*1024ll*1024ll; lcparams.mem_size = 1024ll*1024ll*1024ll;
lcparams.mem_buffer = NULL; lcparams.mem_buffer = NULL;
@ -1565,7 +1569,6 @@ int main(int argc, char ** argv) {
struct ggml_context * ctx0 = ggml_init(params); struct ggml_context * ctx0 = ggml_init(params);
struct ggml_tensor * after_opt_best_samples = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch); struct ggml_tensor * after_opt_best_samples = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);
struct ggml_tensor * after_opt_probs = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch); struct ggml_tensor * after_opt_probs = ggml_new_tensor_3d(ctx0, GGML_TYPE_F32, n_vocab, n_tokens, n_batch);
struct ggml_tensor * tokens_input = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch); struct ggml_tensor * tokens_input = ggml_new_tensor_2d(ctx0, GGML_TYPE_I32, n_tokens, n_batch);

38
ggml.c
View file

@ -3978,12 +3978,12 @@ inline static float ggml_silu_f32(float x) {
return x/(1.0f + expf(-x)); return x/(1.0f + expf(-x));
} }
inline static void ggml_vec_silu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) { //inline static void ggml_vec_silu_f16(const int n, ggml_fp16_t * y, const ggml_fp16_t * x) {
const uint16_t * i16 = (const uint16_t *) x; // const uint16_t * i16 = (const uint16_t *) x;
for (int i = 0; i < n; ++i) { // for (int i = 0; i < n; ++i) {
y[i] = table_silu_f16[i16[i]]; // y[i] = table_silu_f16[i16[i]];
} // }
} //}
#ifdef GGML_SILU_FP16 #ifdef GGML_SILU_FP16
inline static void ggml_vec_silu_f32(const int n, float * y, const float * x) { inline static void ggml_vec_silu_f32(const int n, float * y, const float * x) {
@ -4512,9 +4512,9 @@ static inline int ggml_up32(int n) {
return (n + 31) & ~31; return (n + 31) & ~31;
} }
static inline int ggml_up64(int n) { //static inline int ggml_up64(int n) {
return (n + 63) & ~63; // return (n + 63) & ~63;
} //}
static inline int ggml_up(int n, int m) { static inline int ggml_up(int n, int m) {
// assert m is a power of 2 // assert m is a power of 2
@ -8165,6 +8165,8 @@ static void ggml_compute_forward_add1_f32(
const int i1 = (ir - i3*ne2*ne1 - i2*ne1); const int i1 = (ir - i3*ne2*ne1 - i2*ne1);
#ifdef GGML_USE_ACCELERATE #ifdef GGML_USE_ACCELERATE
UNUSED(ggml_vec_add1_f32);
vDSP_vadd( vDSP_vadd(
(float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1, (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1,
(float *) ((char *) src1->data), 0, (float *) ((char *) src1->data), 0,
@ -8680,6 +8682,8 @@ static void ggml_compute_forward_mul_f32(
#ifdef GGML_USE_ACCELERATE #ifdef GGML_USE_ACCELERATE
UNUSED(ggml_vec_mul_f32);
vDSP_vmul( vDSP_vmul(
(float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1, (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1,
(float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1, (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1,
@ -9831,15 +9835,15 @@ static void ggml_compute_forward_rms_norm_back_f32(
sum_xdz += (ggml_float)(x[i00] * dz[i00]); sum_xdz += (ggml_float)(x[i00] * dz[i00]);
} }
const ggml_float mean = sum_xx/ne00; //const float mean = (float)(sum_xx)/ne00;
const ggml_float mean_eps = sum_xx/ne00 + eps; const float mean_eps = (float)(sum_xx)/ne00 + eps;
const ggml_float sum_eps = sum_xx + eps*ne00; const float sum_eps = (float)(sum_xx) + eps*ne00;
const ggml_float mean_xdz = sum_xdz/ne00; //const float mean_xdz = (float)(sum_xdz)/ne00;
// we could cache rms from forward pass to improve performance. // we could cache rms from forward pass to improve performance.
// to do this implement ggml_rms and compose ggml_rms_norm using ggml_rms. // to do this implement ggml_rms and compose ggml_rms_norm using ggml_rms.
const ggml_float rms = sqrtf(mean_eps); //const float rms = sqrtf(mean_eps);
const ggml_float rrms = 1.0f / sqrtf(mean_eps); const float rrms = 1.0f / sqrtf(mean_eps);
const ggml_float scale = -rrms/(ne00 * mean_eps); // -1/(n*rms**3) //const float scale = -rrms/(ne00 * mean_eps); // -1/(n*rms**3)
{ {
// z = rms_norm(x) // z = rms_norm(x)
@ -9939,7 +9943,7 @@ static void ggml_compute_forward_rms_norm_back_f32(
ggml_vec_cpy_f32 (ne00, dx, x); ggml_vec_cpy_f32 (ne00, dx, x);
// ggml_vec_scale_f32(ne00, dx, -mean_xdz/mean_eps); // ggml_vec_scale_f32(ne00, dx, -mean_xdz/mean_eps);
ggml_vec_scale_f32(ne00, dx, -sum_xdz/sum_eps); ggml_vec_scale_f32(ne00, dx, (float)(-sum_xdz)/sum_eps);
ggml_vec_acc_f32 (ne00, dx, dz); ggml_vec_acc_f32 (ne00, dx, dz);
ggml_vec_scale_f32(ne00, dx, rrms); ggml_vec_scale_f32(ne00, dx, rrms);
} }