diff --git a/ggml.c b/ggml.c index dbef99312..d241b9da6 100644 --- a/ggml.c +++ b/ggml.c @@ -3802,7 +3802,7 @@ static inline int ggml_up(int n, int m) { //////////////////////////////////////////////////////////////////////////////// -struct ggml_context * ggml_init(struct ggml_init_params params) { +struct ggml_context * ggml_init(const struct ggml_init_params params) { // make this function thread safe ggml_critical_section_start(); @@ -3936,7 +3936,7 @@ size_t ggml_used_mem(const struct ggml_context * ctx) { return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size; } -size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch) { +size_t ggml_set_scratch(struct ggml_context * ctx, const struct ggml_scratch scratch) { const size_t result = ctx->scratch.data ? ctx->scratch.offs : 0; ctx->scratch = scratch; @@ -6458,7 +6458,7 @@ void ggml_set_param( static void ggml_compute_forward_dup_same_cont( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0)); GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0)); GGML_ASSERT(src0->type == dst->type); @@ -7064,7 +7064,7 @@ static void ggml_compute_forward_dup_f32( static void ggml_compute_forward_dup( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) { ggml_compute_forward_dup_same_cont(params, src0, dst); return; @@ -7710,7 +7710,7 @@ static void ggml_compute_forward_add1( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -7842,7 +7842,7 @@ static void ggml_compute_forward_acc( const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * opt0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: @@ -7946,7 +7946,7 @@ static void ggml_compute_forward_sub( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -8045,7 +8045,7 @@ static void ggml_compute_forward_mul( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -8141,7 +8141,7 @@ static void ggml_compute_forward_div( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -8183,7 +8183,7 @@ static void ggml_compute_forward_sqr_f32( static void ggml_compute_forward_sqr( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -8225,7 +8225,7 @@ static void ggml_compute_forward_sqrt_f32( static void ggml_compute_forward_sqrt( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -8268,7 +8268,7 @@ static void ggml_compute_forward_log_f32( static void ggml_compute_forward_log( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -8286,7 +8286,7 @@ static void ggml_compute_forward_log( static void ggml_compute_forward_sum_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { assert(params->ith == 0); assert(ggml_is_scalar(dst)); @@ -8325,7 +8325,7 @@ static void ggml_compute_forward_sum_f32( static void ggml_compute_forward_sum( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -8343,7 +8343,7 @@ static void ggml_compute_forward_sum( static void ggml_compute_forward_sum_rows_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { GGML_ASSERT(params->ith == 0); if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) { @@ -8392,7 +8392,7 @@ static void ggml_compute_forward_sum_rows_f32( static void ggml_compute_forward_sum_rows( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -8463,7 +8463,7 @@ static void ggml_compute_forward_mean_f32( static void ggml_compute_forward_mean( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -8481,7 +8481,7 @@ static void ggml_compute_forward_mean( static void ggml_compute_forward_repeat_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { GGML_ASSERT(params->ith == 0); GGML_ASSERT(ggml_can_repeat(src0, dst)); @@ -8542,7 +8542,7 @@ static void ggml_compute_forward_repeat_f32( static void ggml_compute_forward_repeat( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -8560,7 +8560,7 @@ static void ggml_compute_forward_repeat( static void ggml_compute_forward_abs_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { assert(params->ith == 0); assert(ggml_are_same_shape(src0, dst)); @@ -8584,7 +8584,7 @@ static void ggml_compute_forward_abs_f32( static void ggml_compute_forward_abs( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -8626,7 +8626,7 @@ static void ggml_compute_forward_sgn_f32( static void ggml_compute_forward_sgn( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -8668,7 +8668,7 @@ static void ggml_compute_forward_neg_f32( static void ggml_compute_forward_neg( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -8710,7 +8710,7 @@ static void ggml_compute_forward_step_f32( static void ggml_compute_forward_step( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -8752,7 +8752,7 @@ static void ggml_compute_forward_relu_f32( static void ggml_compute_forward_relu( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -8811,7 +8811,7 @@ static void ggml_compute_forward_gelu_f32( static void ggml_compute_forward_gelu( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -8872,7 +8872,7 @@ static void ggml_compute_forward_silu_f32( static void ggml_compute_forward_silu( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -8937,7 +8937,7 @@ static void ggml_compute_forward_silu_back( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * grad, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -9016,7 +9016,7 @@ static void ggml_compute_forward_norm_f32( static void ggml_compute_forward_norm( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -9090,7 +9090,7 @@ static void ggml_compute_forward_rms_norm_f32( static void ggml_compute_forward_rms_norm( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -9279,7 +9279,7 @@ static void ggml_compute_forward_rms_norm_back( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -9937,7 +9937,7 @@ static void ggml_compute_forward_mul_mat( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_Q4_0: case GGML_TYPE_Q4_1: @@ -10013,7 +10013,7 @@ static void ggml_compute_forward_scale( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -10114,7 +10114,7 @@ static void ggml_compute_forward_set( const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * opt0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: @@ -10140,7 +10140,7 @@ static void ggml_compute_forward_set( static void ggml_compute_forward_cpy( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { ggml_compute_forward_dup(params, src0, dst); } @@ -10149,7 +10149,7 @@ static void ggml_compute_forward_cpy( static void ggml_compute_forward_cont( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { ggml_compute_forward_dup(params, src0, dst); } @@ -10158,7 +10158,7 @@ static void ggml_compute_forward_cont( static void ggml_compute_forward_reshape( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { // NOP UNUSED(params); UNUSED(src0); @@ -10285,7 +10285,7 @@ static void ggml_compute_forward_get_rows( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_Q4_0: case GGML_TYPE_Q4_1: @@ -10403,7 +10403,7 @@ static void ggml_compute_forward_get_rows_back( const struct ggml_tensor * src0, const struct ggml_tensor * src1, const struct ggml_tensor * opt0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F16: { @@ -10498,7 +10498,7 @@ static void ggml_compute_forward_diag_f32( static void ggml_compute_forward_diag( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -10517,7 +10517,7 @@ static void ggml_compute_forward_diag_mask_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst, + const struct ggml_tensor * dst, const float value) { assert(src1->type == GGML_TYPE_I32); assert(ggml_nelements(src1) == 2); @@ -10569,7 +10569,7 @@ static void ggml_compute_forward_diag_mask_inf( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -10586,7 +10586,7 @@ static void ggml_compute_forward_diag_mask_zero( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -10675,7 +10675,7 @@ static void ggml_compute_forward_soft_max_f32( static void ggml_compute_forward_soft_max( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F32: { @@ -10694,7 +10694,7 @@ static void ggml_compute_forward_alibi_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { assert(params->ith == 0); assert(src1->type == GGML_TYPE_I32); assert(ggml_nelements(src1) == 2); @@ -10757,7 +10757,7 @@ static void ggml_compute_forward_alibi_f16( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { assert(params->ith == 0); assert(src1->type == GGML_TYPE_I32); assert(ggml_nelements(src1) == 2); @@ -10820,7 +10820,7 @@ static void ggml_compute_forward_alibi( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F16: { @@ -10852,7 +10852,7 @@ static void ggml_compute_forward_rope_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { GGML_ASSERT(src1->type == GGML_TYPE_I32); GGML_ASSERT(ggml_nelements(src1) == 3); @@ -10965,7 +10965,7 @@ static void ggml_compute_forward_rope_f16( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { GGML_ASSERT(src1->type == GGML_TYPE_I32); GGML_ASSERT(ggml_nelements(src1) == 3); @@ -11078,7 +11078,7 @@ static void ggml_compute_forward_rope( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F16: { @@ -11327,7 +11327,7 @@ static void ggml_compute_forward_rope_back( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F16: { @@ -11350,7 +11350,7 @@ static void ggml_compute_forward_conv_1d_1s_f16_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { GGML_ASSERT(src0->type == GGML_TYPE_F16); GGML_ASSERT(src1->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -11470,7 +11470,7 @@ static void ggml_compute_forward_conv_1d_1s_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(src1->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -11590,7 +11590,7 @@ static void ggml_compute_forward_conv_1d_1s( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F16: { @@ -11613,7 +11613,7 @@ static void ggml_compute_forward_conv_1d_2s_f16_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { GGML_ASSERT(src0->type == GGML_TYPE_F16); GGML_ASSERT(src1->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -11733,7 +11733,7 @@ static void ggml_compute_forward_conv_1d_2s_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { GGML_ASSERT(src0->type == GGML_TYPE_F32); GGML_ASSERT(src1->type == GGML_TYPE_F32); GGML_ASSERT( dst->type == GGML_TYPE_F32); @@ -11853,7 +11853,7 @@ static void ggml_compute_forward_conv_1d_2s( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (src0->type) { case GGML_TYPE_F16: { @@ -11878,7 +11878,7 @@ static void ggml_compute_forward_flash_attn_f32( const struct ggml_tensor * k, const struct ggml_tensor * v, const bool masked, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { int64_t t0 = ggml_perf_time_us(); UNUSED(t0); @@ -12087,7 +12087,7 @@ static void ggml_compute_forward_flash_attn_f16( const struct ggml_tensor * k, const struct ggml_tensor * v, const bool masked, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { int64_t t0 = ggml_perf_time_us(); UNUSED(t0); @@ -12333,7 +12333,7 @@ static void ggml_compute_forward_flash_attn( const struct ggml_tensor * k, const struct ggml_tensor * v, const bool masked, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (q->type) { case GGML_TYPE_F16: { @@ -12539,7 +12539,7 @@ static void ggml_compute_forward_flash_ff( const struct ggml_tensor * b1, const struct ggml_tensor * c0, const struct ggml_tensor * c1, - struct ggml_tensor * dst) { + const struct ggml_tensor * dst) { switch (b0->type) { case GGML_TYPE_F16: { @@ -12586,7 +12586,7 @@ static void ggml_compute_forward_map_unary_f32( static void ggml_compute_forward_map_unary( const struct ggml_compute_params * params, const struct ggml_tensor * src0, - struct ggml_tensor * dst, + const struct ggml_tensor * dst, const ggml_unary_op_f32_t fun) { switch (src0->type) { case GGML_TYPE_F32: @@ -12606,7 +12606,7 @@ static void ggml_compute_forward_map_binary_f32( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst, + const struct ggml_tensor * dst, const ggml_binary_op_f32_t fun) { assert(params->ith == 0); assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst)); @@ -12635,7 +12635,7 @@ static void ggml_compute_forward_map_binary( const struct ggml_compute_params * params, const struct ggml_tensor * src0, const struct ggml_tensor * src1, - struct ggml_tensor * dst, + const struct ggml_tensor * dst, const ggml_binary_op_f32_t fun) { switch (src0->type) { case GGML_TYPE_F32: @@ -12651,7 +12651,7 @@ static void ggml_compute_forward_map_binary( ///////////////////////////////// -static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) { +static void ggml_compute_forward(const struct ggml_compute_params * params, const struct ggml_tensor * tensor) { GGML_ASSERT(params); switch (tensor->op) { @@ -14405,7 +14405,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph "label=\"", (void *) node, color); - if (strlen(node->name) > 0) { + if (node->name[0] != '\0') { fprintf(fp, "%s |", node->name); } @@ -14430,7 +14430,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph "label=\"", (void *) node, color); - if (strlen(node->name) > 0) { + if (node->name[0] != '\0') { fprintf(fp, "%s | ", node->name); } if (ggml_nelements(node) == 1) { @@ -14543,7 +14543,7 @@ static void ggml_opt_get_grad(int np, struct ggml_tensor * const ps[], float * g static enum ggml_opt_result ggml_opt_adam( struct ggml_context * ctx, - struct ggml_opt_params params, + const struct ggml_opt_params params, struct ggml_tensor * f, struct ggml_cgraph * gf, struct ggml_cgraph * gb) { @@ -15120,7 +15120,7 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) { enum ggml_opt_result ggml_opt( struct ggml_context * ctx, - struct ggml_opt_params params, + const struct ggml_opt_params params, struct ggml_tensor * f) { bool free_ctx = false; if (ctx == NULL) { diff --git a/ggml.h b/ggml.h index 255541d02..cc332dfdb 100644 --- a/ggml.h +++ b/ggml.h @@ -442,7 +442,7 @@ extern "C" { GGML_API size_t ggml_used_mem(const struct ggml_context * ctx); - GGML_API size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch); + GGML_API size_t ggml_set_scratch(struct ggml_context * ctx, const struct ggml_scratch scratch); GGML_API struct ggml_tensor * ggml_new_tensor( struct ggml_context * ctx, @@ -1061,7 +1061,7 @@ extern "C" { // optimize the function defined by the tensor f GGML_API enum ggml_opt_result ggml_opt( struct ggml_context * ctx, - struct ggml_opt_params params, + const struct ggml_opt_params params, struct ggml_tensor * f); // diff --git a/llama.cpp b/llama.cpp index 1f9d37844..ad2b2d923 100644 --- a/llama.cpp +++ b/llama.cpp @@ -592,6 +592,9 @@ struct llama_model_loader { auto * first_file = new llama_file_loader(fname_base.c_str(), 0, tensors_map); file_loaders.emplace_back(first_file); uint32_t n_parts = vocab_only ? 1 : guess_n_parts(); + if (n_parts != 1) { + file_loaders.reserve(n_parts - 1); + } for (uint32_t i = 1; i < n_parts; i++) { std::string fname = fname_base + "." + std::to_string(i); auto * ith_file = new llama_file_loader(fname.c_str(), i, tensors_map); @@ -891,10 +894,11 @@ static void llama_model_load_internal( std::unique_ptr ml(new llama_model_loader(fname, use_mmap, vocab_only)); - lctx.vocab = std::move(ml->file_loaders.at(0)->vocab); + const auto & loader = ml->file_loaders.at(0); + lctx.vocab = std::move(loader->vocab); auto & model = lctx.model; - model.hparams = ml->file_loaders.at(0)->hparams; - llama_file_version file_version = ml->file_loaders.at(0)->file_version; + model.hparams = loader->hparams; + llama_file_version file_version = loader->file_version; auto & hparams = model.hparams; uint32_t n_ff = ((2*(4*hparams.n_embd)/3 + hparams.n_mult - 1)/hparams.n_mult)*hparams.n_mult; @@ -1019,7 +1023,8 @@ static void llama_model_load_internal( ml->done_getting_tensors(); // populate `tensors_by_name` - for (llama_load_tensor & lt : ml->tensors_map.tensors) { + model.tensors_by_name.reserve(ml->tensors_map.tensors.size()); + for (const auto & lt : ml->tensors_map.tensors) { model.tensors_by_name.emplace_back(lt.name, lt.ggml_tensor); } @@ -1143,6 +1148,8 @@ static bool llama_eval_internal( struct ggml_tensor * inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd); for (int il = 0; il < n_layer; ++il) { + const auto & layer = model.layers[il]; + struct ggml_tensor * inpSA = inpL; struct ggml_tensor * cur; @@ -1155,22 +1162,22 @@ static bool llama_eval_internal( // cur = attention_norm*cur cur = ggml_mul(ctx0, - ggml_repeat(ctx0, model.layers[il].attention_norm, cur), + ggml_repeat(ctx0, layer.attention_norm, cur), cur); } // self-attention { // compute Q and K and RoPE them - struct ggml_tensor * Qcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].wq, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0); - struct ggml_tensor * Kcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].wk, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0); + struct ggml_tensor * Qcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, layer.wq, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0); + struct ggml_tensor * Kcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, layer.wk, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0); ggml_set_name(Qcur, "Qcur"); ggml_set_name(Kcur, "Kcur"); // store key and value to memory { // compute the transposed [N, n_embd] V matrix - struct ggml_tensor * Vcur = ggml_transpose(ctx0, ggml_reshape_2d(ctx0, ggml_mul_mat(ctx0, model.layers[il].wv, cur), n_embd, N)); + struct ggml_tensor * Vcur = ggml_transpose(ctx0, ggml_reshape_2d(ctx0, ggml_mul_mat(ctx0, layer.wv, cur), n_embd, N)); struct ggml_tensor * k = ggml_view_1d(ctx0, kv_self.k, N*n_embd, (ggml_element_size(kv_self.k)*n_embd)*(il*n_ctx + n_past)); struct ggml_tensor * v = ggml_view_2d(ctx0, kv_self.v, N, n_embd, @@ -1249,7 +1256,7 @@ static bool llama_eval_internal( // projection (no bias) cur = ggml_mul_mat(ctx0, - model.layers[il].wo, + layer.wo, cur); } @@ -1265,16 +1272,16 @@ static bool llama_eval_internal( // cur = ffn_norm*cur cur = ggml_mul(ctx0, - ggml_repeat(ctx0, model.layers[il].ffn_norm, cur), + ggml_repeat(ctx0, layer.ffn_norm, cur), cur); } struct ggml_tensor * tmp = ggml_mul_mat(ctx0, - model.layers[il].w3, + layer.w3, cur); cur = ggml_mul_mat(ctx0, - model.layers[il].w1, + layer.w1, cur); // SILU activation @@ -1283,7 +1290,7 @@ static bool llama_eval_internal( cur = ggml_mul(ctx0, cur, tmp); cur = ggml_mul_mat(ctx0, - model.layers[il].w2, + layer.w2, cur); } @@ -1450,7 +1457,7 @@ struct llama_tokenizer { // keep substituting the highest frequency pairs for as long as we can. while (!work_queue_.empty()) { - auto bigram = work_queue_.top(); + const auto& bigram = work_queue_.top(); work_queue_.pop(); auto & left_sym = symbols_[bigram.left]; @@ -1485,6 +1492,7 @@ struct llama_tokenizer { if (token == vocab_.token_to_id.end()) { // output any symbols that did not form tokens as bytes. + output.reserve(symbol.n); for (int j = 0; j < (int) symbol.n; ++j) { llama_vocab::id token_id = static_cast(symbol.text[j]) + 3; output.push_back(token_id); @@ -1703,8 +1711,9 @@ void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * c // Compute the absolute difference between negative log probability and entropy for each candidate std::vector shifted_scores; + shifted_scores.reserve(candidates->size); for (size_t i = 0; i < candidates->size; ++i) { - float shifted_score = fabsf(-logf(candidates->data[i].p) - entropy); + const float shifted_score = fabsf(-logf(candidates->data[i].p) - entropy); shifted_scores.push_back(shifted_score); } @@ -1733,6 +1742,7 @@ void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * c // Resize the output vector to keep only the locally typical tokens std::vector new_candidates; + new_candidates.reserve(last_idx); for (size_t i = 0; i < last_idx; ++i) { size_t idx = indices[i]; new_candidates.push_back(candidates->data[idx]); @@ -2258,7 +2268,8 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char * // create a name -> tensor map of the model to accelerate lookups std::unordered_map model_tensors; - for (auto & kv: model.tensors_by_name) { + model_tensors.reserve(model.tensors_by_name.size()); + for (const auto & kv: model.tensors_by_name) { model_tensors.insert(kv); } @@ -2374,12 +2385,13 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char * ggml_tensor * base_t; if (model_loader) { // load from base model - if (model_loader->tensors_map.name_to_idx.find(base_name) == model_loader->tensors_map.name_to_idx.end()) { + auto & tmap = model_loader->tensors_map; + if (tmap.name_to_idx.find(base_name) == tmap.name_to_idx.end()) { fprintf(stderr, "%s: error: tensor '%s' not found in base model\n", __func__, base_name.c_str()); return 1; } - size_t idx = model_loader->tensors_map.name_to_idx[base_name]; - llama_load_tensor & lt = model_loader->tensors_map.tensors[idx]; + size_t idx = tmap.name_to_idx[base_name]; + llama_load_tensor & lt = tmap.tensors[idx]; base_t = model_loader->get_tensor(base_name, { (uint32_t)dest_t->ne[0], (uint32_t)dest_t->ne[1] }); lt.data = (uint8_t *) lt.ggml_tensor->data; model_loader->load_data_for(lt); @@ -2513,11 +2525,12 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) { std::stringstream rng_ss; rng_ss << ctx->rng; - const size_t rng_size = rng_ss.str().size(); + const auto & rng = rng_ss.str(); + const size_t rng_size = rng.size(); char rng_buf[LLAMA_MAX_RNG_STATE]; memset(&rng_buf[0], 0, LLAMA_MAX_RNG_STATE); - memcpy(&rng_buf[0], rng_ss.str().data(), rng_ss.str().size()); + memcpy(&rng_buf[0], rng.data(), rng.size()); memcpy(out, &rng_size, sizeof(rng_size)); out += sizeof(rng_size); memcpy(out, &rng_buf[0], LLAMA_MAX_RNG_STATE); out += LLAMA_MAX_RNG_STATE; @@ -2901,7 +2914,7 @@ void llama_reset_timings(struct llama_context * ctx) { const char * llama_print_system_info(void) { static std::string s; - s = ""; + s.clear(); s += "AVX = " + std::to_string(ggml_cpu_has_avx()) + " | "; s += "AVX2 = " + std::to_string(ggml_cpu_has_avx2()) + " | "; s += "AVX512 = " + std::to_string(ggml_cpu_has_avx512()) + " | ";