diff --git a/ggml.c b/ggml.c index 5b88cbbe8..af0730816 100644 --- a/ggml.c +++ b/ggml.c @@ -13335,7 +13335,7 @@ static void ggml_compute_forward_soft_max_f32( const float slope = (max_bias > 0.0f) ? h < n_head_log2 ? powf(m0, h + 1) : powf(m1, 2*(h - n_head_log2) + 1) : 1.0f; float * sp = (float *)((char *) src0->data + i1*src0->nb[1]); - float * dp = (float *)((char *) dst->data + i1*dst->nb[1]); + float * dp = (float *)((char *) dst->data + i1*dst->nb[1]); // broadcast the mask across rows ggml_fp16_t * mp_f16 = src1 ? (ggml_fp16_t *)((char *) src1->data) + (i1%ne01)*ne00 : NULL; diff --git a/llama.cpp b/llama.cpp index 8c3b54110..dbfb6e2b7 100644 --- a/llama.cpp +++ b/llama.cpp @@ -6403,7 +6403,7 @@ static struct ggml_tensor * llm_build_ffn( llm_ffn_gate_type type_gate, const llm_build_cb & cb, int il) { - struct ggml_tensor * tmp = up ? ggml_mul_mat(ctx, up, cur): cur; + struct ggml_tensor * tmp = up ? ggml_mul_mat(ctx, up, cur) : cur; cb(tmp, "ffn_up", il); if (up_b) { @@ -8372,7 +8372,6 @@ struct llm_build_context { // output layer norm cur = llm_build_norm(ctx0, cur, hparams, model.layers[il].layer_out_norm, model.layers[il].layer_out_norm_b, LLM_NORM, cb, il); - // input for next layer inpL = cur; } @@ -12806,8 +12805,6 @@ static std::vector llama_tokenize_internal(const llama_vocab & } } - //GGML_ASSERT(vocab.special_add_eos != 1); - //TODO: Check this, why this tokenizer does not add at the end, why not leaving up to the `gguf` exporter? if (add_special && vocab.special_add_eos == 1) { GGML_ASSERT(vocab.special_add_eos != -1); output.push_back(vocab.special_eos_id);