diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp index 9ddb080af..8b7063376 100644 --- a/examples/baby-llama/baby-llama.cpp +++ b/examples/baby-llama/baby-llama.cpp @@ -381,7 +381,7 @@ void randomize_model(struct llama_model * model, int seed, float mean, float std randomize_tensor_normal(model->tok_embeddings, model->tok_embeddings->n_dims, model->tok_embeddings->ne, &rnd); randomize_tensor_normal(model->norm, model->norm->n_dims, model->norm->ne, &rnd); randomize_tensor_normal(model->output, model->output->n_dims, model->output->ne, &rnd); - + for (uint32_t i = 0; i < n_layer; ++i) { auto & layer = model->layers[i]; randomize_tensor_normal(layer.attention_norm, layer.attention_norm->n_dims, layer.attention_norm->ne, &rnd); @@ -415,7 +415,7 @@ void randomize_model_lora(struct llama_model_lora * model, int seed, float mean, randomize_tensor_normal(model->norm, model->norm->n_dims, model->norm->ne, &rnd); randomize_tensor_normal(model->outputa, model->outputa->n_dims, model->outputa->ne, &rnd); randomize_tensor_normal(model->outputb, model->outputb->n_dims, model->outputb->ne, &rnd); - + for (uint32_t i = 0; i < n_layer; ++i) { auto & layer = model->layers[i]; randomize_tensor_normal(layer.attention_norm, layer.attention_norm->n_dims, layer.attention_norm->ne, &rnd); @@ -508,14 +508,14 @@ bool init_kv_cache_lora(struct llama_kv_cache* cache, struct llama_model_lora * } struct ggml_tensor * forward( - struct llama_model * model, - struct llama_kv_cache * cache, + struct llama_model * model, + struct llama_kv_cache * cache, struct ggml_context * ctx0, struct ggml_cgraph * gf, struct ggml_tensor * tokens_input, const int n_tokens, const int n_past) { - + const int N = n_tokens; struct llama_kv_cache& kv_self = *cache; @@ -569,11 +569,11 @@ struct ggml_tensor * forward( // Vcur shape [n_embd, N, 1, 1] struct ggml_tensor * Vcur = ggml_cont(ctx0, ggml_transpose(ctx0, ggml_reshape_2d(ctx0, ggml_mul_mat(ctx0, model->layers[il].wv, cur), n_embd, N))); - // kv_self.k shape [n_embd * n_ctx * n_layer, 1] - // kv_self.v shape [n_embd * n_ctx * n_layer, 1] + // kv_self.k shape [n_embd * n_ctx * n_layer, 1] + // kv_self.v shape [n_embd * n_ctx * n_layer, 1] // k shape [n_embd * N, 1] == kv_self.k[:,n_past:n_past+N,il,0] // v shape [N, n_embd, 1, 1] == kv_self.v[:,n_past:n_past+N,il,0] - + /* { struct ggml_tensor * k = ggml_view_1d(ctx0, kv_self.k, N*n_embd, (ggml_element_size(kv_self.k)*n_embd)*(il*n_ctx + n_past)); struct ggml_tensor * v = ggml_view_2d(ctx0, kv_self.v, N, n_embd, @@ -597,7 +597,7 @@ struct ggml_tensor * forward( Qcur, 0, 2, 1, 3); - // kv_self.k shape [n_embd * n_ctx * n_layer, 1] + // kv_self.k shape [n_embd * n_ctx * n_layer, 1] // K shape [n_embd/n_head, n_past + N, n_head, 1] struct ggml_tensor * K = ggml_permute(ctx0, @@ -641,7 +641,7 @@ struct ggml_tensor * forward( // KQV_merged = KQV.permute(0, 2, 1, 3) // KQV_merged shape [n_embd/n_head, n_head, N, 1] struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3); - // KQV_merged shape + // KQV_merged shape // cur = KQV_merged.contiguous().view(n_embd, N) // cur shape [n_embd,N,1,1] @@ -734,14 +734,14 @@ struct ggml_tensor * forward( struct ggml_tensor * forward_lora( - struct llama_model_lora * model, - struct llama_kv_cache * cache, + struct llama_model_lora * model, + struct llama_kv_cache * cache, struct ggml_context * ctx0, struct ggml_cgraph * gf, struct ggml_tensor * tokens_input, const int n_tokens, const int n_past) { - + const int N = n_tokens; struct llama_kv_cache& kv_self = *cache; @@ -784,23 +784,23 @@ struct ggml_tensor * forward_lora( // wk shape [n_embd, n_embd, 1, 1] // Qcur shape [n_embd/n_head, n_head, N, 1] // Kcur shape [n_embd/n_head, n_head, N, 1] - struct ggml_tensor * Qcur = ggml_rope(ctx0, - ggml_reshape_3d(ctx0, - ggml_mul_mat(ctx0, - model->layers[il].wqa, - ggml_mul_mat(ctx0, - model->layers[il].wqb, - cur)), - n_embd/n_head, n_head, N), + struct ggml_tensor * Qcur = ggml_rope(ctx0, + ggml_reshape_3d(ctx0, + ggml_mul_mat(ctx0, + model->layers[il].wqa, + ggml_mul_mat(ctx0, + model->layers[il].wqb, + cur)), + n_embd/n_head, n_head, N), n_past, n_rot, 0); - struct ggml_tensor * Kcur = ggml_rope(ctx0, - ggml_reshape_3d(ctx0, - ggml_mul_mat(ctx0, - model->layers[il].wka, - ggml_mul_mat(ctx0, - model->layers[il].wkb, - cur)), - n_embd/n_head, n_head, N), + struct ggml_tensor * Kcur = ggml_rope(ctx0, + ggml_reshape_3d(ctx0, + ggml_mul_mat(ctx0, + model->layers[il].wka, + ggml_mul_mat(ctx0, + model->layers[il].wkb, + cur)), + n_embd/n_head, n_head, N), n_past, n_rot, 0); // store key and value to memory @@ -808,21 +808,21 @@ struct ggml_tensor * forward_lora( // compute the transposed [N, n_embd] V matrix // wv shape [n_embd, n_embd, 1, 1] // Vcur shape [n_embd, N, 1, 1] - struct ggml_tensor * Vcur = ggml_cont(ctx0, - ggml_transpose(ctx0, - ggml_reshape_2d(ctx0, - ggml_mul_mat(ctx0, - model->layers[il].wva, - ggml_mul_mat(ctx0, - model->layers[il].wvb, - cur)), + struct ggml_tensor * Vcur = ggml_cont(ctx0, + ggml_transpose(ctx0, + ggml_reshape_2d(ctx0, + ggml_mul_mat(ctx0, + model->layers[il].wva, + ggml_mul_mat(ctx0, + model->layers[il].wvb, + cur)), n_embd, N))); - // kv_self.k shape [n_embd * n_ctx * n_layer, 1] - // kv_self.v shape [n_embd * n_ctx * n_layer, 1] + // kv_self.k shape [n_embd * n_ctx * n_layer, 1] + // kv_self.v shape [n_embd * n_ctx * n_layer, 1] // k shape [n_embd * N, 1] == kv_self.k[:,n_past:n_past+N,il,0] // v shape [N, n_embd, 1, 1] == kv_self.v[:,n_past:n_past+N,il,0] - + /* { struct ggml_tensor * k = ggml_view_1d(ctx0, kv_self.k, N*n_embd, (ggml_element_size(kv_self.k)*n_embd)*(il*n_ctx + n_past)); struct ggml_tensor * v = ggml_view_2d(ctx0, kv_self.v, N, n_embd, @@ -846,7 +846,7 @@ struct ggml_tensor * forward_lora( Qcur, 0, 2, 1, 3); - // kv_self.k shape [n_embd * n_ctx * n_layer, 1] + // kv_self.k shape [n_embd * n_ctx * n_layer, 1] // K shape [n_embd/n_head, n_past + N, n_head, 1] struct ggml_tensor * K = ggml_permute(ctx0, @@ -890,7 +890,7 @@ struct ggml_tensor * forward_lora( // KQV_merged = KQV.permute(0, 2, 1, 3) // KQV_merged shape [n_embd/n_head, n_head, N, 1] struct ggml_tensor * KQV_merged = ggml_permute(ctx0, KQV, 0, 2, 1, 3); - // KQV_merged shape + // KQV_merged shape // cur = KQV_merged.contiguous().view(n_embd, N) // cur shape [n_embd,N,1,1] @@ -974,10 +974,10 @@ struct ggml_tensor * forward_lora( // lm_head // inpL shape [n_vocab,N,1,1] - inpL = ggml_mul_mat(ctx0, - model->outputa, - ggml_mul_mat(ctx0, - model->outputb, + inpL = ggml_mul_mat(ctx0, + model->outputa, + ggml_mul_mat(ctx0, + model->outputb, inpL)); // ggml_set_scratch(ctx0, { 0, 0, nullptr, }); @@ -1094,12 +1094,12 @@ struct ggml_tensor * square_error_loss(struct ggml_context * ctx, struct ggml_te struct ggml_tensor * cross_entropy_loss(struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b) { const float eps = 1e-3; - return - ggml_sum(ctx, - ggml_neg(ctx, - ggml_sum_rows(ctx, - ggml_mul(ctx, - ggml_soft_max(ctx, a), + return + ggml_sum(ctx, + ggml_neg(ctx, + ggml_sum_rows(ctx, + ggml_mul(ctx, + ggml_soft_max(ctx, a), ggml_log(ctx, ggml_add1(ctx, ggml_soft_max(ctx, b), @@ -1169,7 +1169,7 @@ int main(int argc, char ** argv) { */ // key + value cache for the self attention - struct llama_kv_cache kv_self; + struct llama_kv_cache kv_self; printf("init_kv_cache\n"); kv_self.ctx = model.ctx; init_kv_cache(&kv_self, &model); @@ -1221,17 +1221,17 @@ int main(int argc, char ** argv) { struct ggml_tensor * logits2 = forward(&model, &kv_self, ctx0, &gf, tokens_input2, n_tokens, n_past); // struct ggml_tensor * logits3 = forward(&model, &kv_self, ctx0, &gf, tokens_input3, n_tokens, n_past); // struct ggml_tensor * logits4 = forward(&model, &kv_self, ctx0, &gf, tokens_input4, n_tokens, n_past); - + // struct ggml_tensor * e = cross_entropy_loss(ctx0, targets1, logits1); // struct ggml_tensor * e = square_error_loss(ctx0, targets1, logits1); - + struct ggml_tensor * e = ggml_add(ctx0, square_error_loss(ctx0, targets1, logits1), square_error_loss(ctx0, targets2, logits2)); // struct ggml_tensor * e = ggml_add(ctx0, // cross_entropy_loss(ctx0, targets1, logits1), // cross_entropy_loss(ctx0, targets2, logits2)); - // struct ggml_tensor * e = ggml_add(ctx0, + // struct ggml_tensor * e = ggml_add(ctx0, // ggml_add(ctx0, // cross_entropy_loss(ctx0, targets1, logits1), // cross_entropy_loss(ctx0, targets2, logits2)), @@ -1260,7 +1260,7 @@ int main(int argc, char ** argv) { opt_params_lbfgs.lbfgs.n_iter = 16; // ggml_opt(ctx0, opt_params_adam, e); ggml_opt(ctx0, opt_params_lbfgs, e); - // + // ggml_build_forward_expand(&gf, e); ggml_graph_compute(ctx0, &gf); @@ -1292,7 +1292,7 @@ int main(int argc, char ** argv) { struct ggml_tensor * tokens_input = ggml_new_tensor_1d(model.ctx, GGML_TYPE_I32, n_tokens); struct ggml_tensor * targets = ggml_new_tensor_2d(model.ctx, GGML_TYPE_F32, n_vocab, n_tokens); - + get_example_targets(137, tokens_input, targets); for (int i=sample_ctx; itype]); } -} +} static void ggml_compute_forward_dup_f16( const struct ggml_compute_params * params, const struct ggml_tensor * src0, @@ -7818,7 +7818,7 @@ static void ggml_compute_forward_add_f32( vDSP_vadd( (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1, (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1, - (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1, + (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1, ne0); #else ggml_vec_add_f32(ne0, @@ -8177,7 +8177,7 @@ static void ggml_compute_forward_add1_f32( vDSP_vadd( (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1, (float *) ((char *) src1->data), 0, - (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1, + (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1, ne0); #else ggml_vec_add1_f32(ne0, @@ -8438,17 +8438,17 @@ static void ggml_compute_forward_acc_f32( struct ggml_tensor * dst) { GGML_ASSERT(ggml_are_same_shape(src0, dst)); GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0)); - + GGML_ASSERT(opt0->type == GGML_TYPE_I32); GGML_ASSERT(ggml_nelements(opt0) == 5); - // view src0 and dst with these strides and data offset inbytes during acc + // view src0 and dst with these strides and data offset inbytes during acc // nb0 is implicitely element_size because src0 and dst are contiguous - size_t nb1 = ((int32_t *) opt0->data)[0]; - size_t nb2 = ((int32_t *) opt0->data)[1]; - size_t nb3 = ((int32_t *) opt0->data)[2]; + size_t nb1 = ((int32_t *) opt0->data)[0]; + size_t nb2 = ((int32_t *) opt0->data)[1]; + size_t nb3 = ((int32_t *) opt0->data)[2]; size_t offset = ((int32_t *) opt0->data)[3]; - bool inplace = (bool) ((int32_t *) opt0->data)[4]; + bool inplace = (bool) ((int32_t *) opt0->data)[4]; if (!inplace && (params->type == GGML_TASK_INIT)) { // memcpy needs to be synchronized across threads to avoid race conditions. @@ -8596,7 +8596,7 @@ static void ggml_compute_forward_sub_f32( vDSP_vsub( (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1, (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1, - (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1, + (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1, ne0); #else ggml_vec_sub_f32(ne0, @@ -8692,7 +8692,7 @@ static void ggml_compute_forward_mul_f32( vDSP_vmul( (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1, (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1, - (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1, + (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1, ne0); #else ggml_vec_mul_f32(ne0, @@ -8788,7 +8788,7 @@ static void ggml_compute_forward_div_f32( vDSP_vdiv( (float *) ((char *) src0->data + i3*nb03 + i2*nb02 + i1*nb01), 1, (float *) ((char *) src1->data + i3*nb13 + i2*nb12 + i1*nb11), 1, - (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1, + (float *) ((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 ), 1, ne0); #else ggml_vec_div_f32(ne0, @@ -9189,9 +9189,9 @@ static void ggml_compute_forward_repeat_f32( const size_t nb01 = src0->nb[1]; const size_t nb02 = src0->nb[2]; const size_t nb03 = src0->nb[3]; - + // guaranteed to be an integer due to the check in ggml_can_repeat - const int nr0 = (int)(ne0/ne00); + const int nr0 = (int)(ne0/ne00); const int nr1 = (int)(ne1/ne01); const int nr2 = (int)(ne2/ne02); const int nr3 = (int)(ne3/ne03); @@ -9850,12 +9850,12 @@ static void ggml_compute_forward_rms_norm_back_f32( { // z = rms_norm(x) - // - // rms_norm(src0) = + // + // rms_norm(src0) = // scale( - // src0, + // src0, // div( - // 1, + // 1, // sqrt( // add( // scale( @@ -9868,17 +9868,17 @@ static void ggml_compute_forward_rms_norm_back_f32( // postorder: // ## op args grad // 00 param src0 grad[#00] - // 01 const 1 + // 01 const 1 // 02 sqr (#00) grad[#02] // 03 sum (#02) grad[#03] - // 04 const 1/N + // 04 const 1/N // 05 scale (#03, #04) grad[#05] - // 06 const eps + // 06 const eps // 07 add (#05, #06) grad[#07] // 08 sqrt (#07) grad[#08] // 09 div (#01,#08) grad[#09] // 10 scale (#00,#09) grad[#10] - // + // // backward pass, given grad[#10] // #10: scale // grad[#00] += scale(grad[#10],#09) @@ -9893,7 +9893,7 @@ static void ggml_compute_forward_rms_norm_back_f32( // grad[#03] += scale(grad[#05],#04) // #03: sum // grad[#02] += repeat(grad[#03], #02) - // #02: + // #02: // grad[#00] += scale(mul(#00, grad[#02]), 2.0) // // substitute and simplify: @@ -10716,17 +10716,17 @@ static void ggml_compute_forward_set_f32( struct ggml_tensor * dst) { GGML_ASSERT(ggml_are_same_shape(src0, dst)); GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0)); - + GGML_ASSERT(opt0->type == GGML_TYPE_I32); GGML_ASSERT(ggml_nelements(opt0) == 5); - // view src0 and dst with these strides and data offset inbytes during set + // view src0 and dst with these strides and data offset inbytes during set // nb0 is implicitely element_size because src0 and dst are contiguous - size_t nb1 = ((int32_t *) opt0->data)[0]; - size_t nb2 = ((int32_t *) opt0->data)[1]; - size_t nb3 = ((int32_t *) opt0->data)[2]; + size_t nb1 = ((int32_t *) opt0->data)[0]; + size_t nb2 = ((int32_t *) opt0->data)[1]; + size_t nb3 = ((int32_t *) opt0->data)[2]; size_t offset = ((int32_t *) opt0->data)[3]; - bool inplace = (bool) ((int32_t *) opt0->data)[4]; + bool inplace = (bool) ((int32_t *) opt0->data)[4]; if (!inplace && (params->type == GGML_TASK_INIT)) { // memcpy needs to be synchronized across threads to avoid race conditions. @@ -13420,7 +13420,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm case GGML_OP_ROPE_BACK: { ggml_compute_forward_rope_back(params, tensor->src0, tensor->src1, tensor); - } break; + } break; case GGML_OP_ALIBI: { ggml_compute_forward_alibi(params, tensor->src0, tensor->src1, tensor); @@ -13521,7 +13521,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor src1->grad->ne[2], src1->grad->ne[3], nb1, nb2, nb3, offset); - + src1->grad = ggml_add_impl(ctx, src1->grad, @@ -13664,7 +13664,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor // transpose [nc0*nr0,1,1] // reshape [nc0,nr0,1,1] reshape_1d or reshape_2d // add to src0->grad - + int64_t ne[4] = {nc0,ncr,nr0,nrr}; struct ggml_tensor* F00 = tensor->grad; @@ -13846,7 +13846,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor const size_t offset = (( int32_t * ) tensor->opt[0]->data)[3]; struct ggml_tensor * tensor_grad_view = NULL; - + if (src0->grad || src1->grad) { GGML_ASSERT(src0->type == tensor->type); GGML_ASSERT(tensor->grad->type == tensor->type); @@ -13862,10 +13862,10 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor } if (src0->grad) { - src0->grad = ggml_add_impl(ctx, - src0->grad, + src0->grad = ggml_add_impl(ctx, + src0->grad, ggml_acc_impl(ctx, - tensor->grad, + tensor->grad, ggml_neg(ctx, tensor_grad_view), nb1, nb2, nb3, offset, false), inplace); @@ -13944,7 +13944,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor nb2 = (nb2 / n0) * ng; nb3 = (nb3 / n0) * ng; } - + src0->grad = ggml_acc_impl(ctx, src0->grad, tensor->grad, nb1, nb2, nb3, offset, inplace); } } break; @@ -14040,18 +14040,18 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor // necessary for llama if (src0->grad) { // y = softmax(x) - // + // // Jii = yi - yi*yi // Jij = -yi*yj // J = diag(y)-y.*y // dx = J * dy // dxk = sum(Jkj * dyk) - - int64_t ne2[4] = { - tensor->ne[0], - 1, - tensor->ne[1]*tensor->ne[2], - tensor->ne[3] + + int64_t ne2[4] = { + tensor->ne[0], + 1, + tensor->ne[1]*tensor->ne[2], + tensor->ne[3] }; struct ggml_tensor * tensor2 = ggml_cont(ctx, ggml_reshape_4d(ctx, diff --git a/ggml.h b/ggml.h index 3df640674..2aeff15ff 100644 --- a/ggml.h +++ b/ggml.h @@ -649,7 +649,7 @@ extern "C" { struct ggml_context * ctx, struct ggml_tensor * a, struct ggml_tensor * b); - + // in-place, returns view(a) GGML_API struct ggml_tensor * ggml_scale_inplace( struct ggml_context * ctx, @@ -787,7 +787,7 @@ extern "C" { int64_t ne3, size_t nb1, // row stride in bytes size_t nb2, // slice stride in bytes - size_t nb3, + size_t nb3, size_t offset); GGML_API struct ggml_tensor * ggml_permute( @@ -862,7 +862,7 @@ extern "C" { int n_dims, int mode); - // in-place, returns view(a) + // in-place, returns view(a) GGML_API struct ggml_tensor * ggml_rope_inplace( struct ggml_context * ctx, struct ggml_tensor * a, diff --git a/tests/test-grad0.c b/tests/test-grad0.c index 64f40a02e..f1d20340c 100644 --- a/tests/test-grad0.c +++ b/tests/test-grad0.c @@ -156,7 +156,7 @@ struct ggml_tensor * get_random_tensor_int( float get_element(const struct ggml_tensor * t, int idx) { if (t->type == GGML_TYPE_F32) { return ((float *)t->data)[idx]; - } else if (t->type == GGML_TYPE_I32) { + } else if (t->type == GGML_TYPE_I32) { return ((int32_t *)t->data)[idx]; } else { assert(false); @@ -591,9 +591,9 @@ int main(int argc, const char ** argv) { #ifdef GGML_SILU_FP16 // due to GGML_SILU_FP16 the finite difference method will be slightly wrong -> increase error bounds. - check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 0.5, INFINITY); + check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 0.5, INFINITY); #else - check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); #endif } } @@ -610,7 +610,7 @@ int main(int argc, const char ** argv) { struct ggml_tensor * f = ggml_sum(ctx0, ggml_rms_norm(ctx0, x[0])); - check_gradient("rms_norm", ctx0, x, f, ndims, nargs, 1e-4f, 1.0f, INFINITY); + check_gradient("rms_norm", ctx0, x, f, ndims, nargs, 1e-4f, 1.0f, INFINITY); } } @@ -630,7 +630,7 @@ int main(int argc, const char ** argv) { struct ggml_tensor * f = ggml_sum(ctx0, ggml_scale(ctx0, x[0], x[1])); - check_gradient("scale", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + check_gradient("scale", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); } } @@ -975,10 +975,10 @@ int main(int argc, const char ** argv) { int64_t ne2[4]; const int nargs = 1; - for (int ndims = 1; ndims <= 4; ++ndims) + for (int ndims = 1; ndims <= 4; ++ndims) { // ggml_permute will set axes of dimensions below n_dims to 1. - // to make ggml_permute work correctly on all axes, + // to make ggml_permute work correctly on all axes, // the input tensor needs maximal n_dim of 4. for (int i=0; i