Code refactor and optimize using reserve
This commit is contained in:
parent
5ea4339273
commit
5f2c9ce21e
3 changed files with 106 additions and 93 deletions
138
ggml.c
138
ggml.c
|
@ -3802,7 +3802,7 @@ static inline int ggml_up(int n, int m) {
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
struct ggml_context * ggml_init(struct ggml_init_params params) {
|
struct ggml_context * ggml_init(const struct ggml_init_params params) {
|
||||||
// make this function thread safe
|
// make this function thread safe
|
||||||
ggml_critical_section_start();
|
ggml_critical_section_start();
|
||||||
|
|
||||||
|
@ -3936,7 +3936,7 @@ size_t ggml_used_mem(const struct ggml_context * ctx) {
|
||||||
return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
|
return ctx->objects_end == NULL ? 0 : ctx->objects_end->offs + ctx->objects_end->size;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch) {
|
size_t ggml_set_scratch(struct ggml_context * ctx, const struct ggml_scratch scratch) {
|
||||||
const size_t result = ctx->scratch.data ? ctx->scratch.offs : 0;
|
const size_t result = ctx->scratch.data ? ctx->scratch.offs : 0;
|
||||||
|
|
||||||
ctx->scratch = scratch;
|
ctx->scratch = scratch;
|
||||||
|
@ -6458,7 +6458,7 @@ void ggml_set_param(
|
||||||
static void ggml_compute_forward_dup_same_cont(
|
static void ggml_compute_forward_dup_same_cont(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
GGML_ASSERT(ggml_nelements(dst) == ggml_nelements(src0));
|
||||||
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
GGML_ASSERT(ggml_is_contiguous(dst) && ggml_is_contiguous(src0));
|
||||||
GGML_ASSERT(src0->type == dst->type);
|
GGML_ASSERT(src0->type == dst->type);
|
||||||
|
@ -7064,7 +7064,7 @@ static void ggml_compute_forward_dup_f32(
|
||||||
static void ggml_compute_forward_dup(
|
static void ggml_compute_forward_dup(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
|
if (ggml_is_contiguous(src0) && ggml_is_contiguous(dst) && src0->type == dst->type) {
|
||||||
ggml_compute_forward_dup_same_cont(params, src0, dst);
|
ggml_compute_forward_dup_same_cont(params, src0, dst);
|
||||||
return;
|
return;
|
||||||
|
@ -7710,7 +7710,7 @@ static void ggml_compute_forward_add1(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -7842,7 +7842,7 @@ static void ggml_compute_forward_acc(
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
const struct ggml_tensor * opt0,
|
const struct ggml_tensor * opt0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
|
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
|
@ -7946,7 +7946,7 @@ static void ggml_compute_forward_sub(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -8045,7 +8045,7 @@ static void ggml_compute_forward_mul(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -8141,7 +8141,7 @@ static void ggml_compute_forward_div(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -8183,7 +8183,7 @@ static void ggml_compute_forward_sqr_f32(
|
||||||
static void ggml_compute_forward_sqr(
|
static void ggml_compute_forward_sqr(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -8225,7 +8225,7 @@ static void ggml_compute_forward_sqrt_f32(
|
||||||
static void ggml_compute_forward_sqrt(
|
static void ggml_compute_forward_sqrt(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -8268,7 +8268,7 @@ static void ggml_compute_forward_log_f32(
|
||||||
static void ggml_compute_forward_log(
|
static void ggml_compute_forward_log(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -8286,7 +8286,7 @@ static void ggml_compute_forward_log(
|
||||||
static void ggml_compute_forward_sum_f32(
|
static void ggml_compute_forward_sum_f32(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
assert(params->ith == 0);
|
assert(params->ith == 0);
|
||||||
assert(ggml_is_scalar(dst));
|
assert(ggml_is_scalar(dst));
|
||||||
|
|
||||||
|
@ -8325,7 +8325,7 @@ static void ggml_compute_forward_sum_f32(
|
||||||
static void ggml_compute_forward_sum(
|
static void ggml_compute_forward_sum(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -8343,7 +8343,7 @@ static void ggml_compute_forward_sum(
|
||||||
static void ggml_compute_forward_sum_rows_f32(
|
static void ggml_compute_forward_sum_rows_f32(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
GGML_ASSERT(params->ith == 0);
|
GGML_ASSERT(params->ith == 0);
|
||||||
|
|
||||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||||
|
@ -8392,7 +8392,7 @@ static void ggml_compute_forward_sum_rows_f32(
|
||||||
static void ggml_compute_forward_sum_rows(
|
static void ggml_compute_forward_sum_rows(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -8463,7 +8463,7 @@ static void ggml_compute_forward_mean_f32(
|
||||||
static void ggml_compute_forward_mean(
|
static void ggml_compute_forward_mean(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -8481,7 +8481,7 @@ static void ggml_compute_forward_mean(
|
||||||
static void ggml_compute_forward_repeat_f32(
|
static void ggml_compute_forward_repeat_f32(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
GGML_ASSERT(params->ith == 0);
|
GGML_ASSERT(params->ith == 0);
|
||||||
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
GGML_ASSERT(ggml_can_repeat(src0, dst));
|
||||||
|
|
||||||
|
@ -8542,7 +8542,7 @@ static void ggml_compute_forward_repeat_f32(
|
||||||
static void ggml_compute_forward_repeat(
|
static void ggml_compute_forward_repeat(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -8560,7 +8560,7 @@ static void ggml_compute_forward_repeat(
|
||||||
static void ggml_compute_forward_abs_f32(
|
static void ggml_compute_forward_abs_f32(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
assert(params->ith == 0);
|
assert(params->ith == 0);
|
||||||
assert(ggml_are_same_shape(src0, dst));
|
assert(ggml_are_same_shape(src0, dst));
|
||||||
|
|
||||||
|
@ -8584,7 +8584,7 @@ static void ggml_compute_forward_abs_f32(
|
||||||
static void ggml_compute_forward_abs(
|
static void ggml_compute_forward_abs(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -8626,7 +8626,7 @@ static void ggml_compute_forward_sgn_f32(
|
||||||
static void ggml_compute_forward_sgn(
|
static void ggml_compute_forward_sgn(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -8668,7 +8668,7 @@ static void ggml_compute_forward_neg_f32(
|
||||||
static void ggml_compute_forward_neg(
|
static void ggml_compute_forward_neg(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -8710,7 +8710,7 @@ static void ggml_compute_forward_step_f32(
|
||||||
static void ggml_compute_forward_step(
|
static void ggml_compute_forward_step(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -8752,7 +8752,7 @@ static void ggml_compute_forward_relu_f32(
|
||||||
static void ggml_compute_forward_relu(
|
static void ggml_compute_forward_relu(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -8811,7 +8811,7 @@ static void ggml_compute_forward_gelu_f32(
|
||||||
static void ggml_compute_forward_gelu(
|
static void ggml_compute_forward_gelu(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -8872,7 +8872,7 @@ static void ggml_compute_forward_silu_f32(
|
||||||
static void ggml_compute_forward_silu(
|
static void ggml_compute_forward_silu(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -8937,7 +8937,7 @@ static void ggml_compute_forward_silu_back(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * grad,
|
const struct ggml_tensor * grad,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -9016,7 +9016,7 @@ static void ggml_compute_forward_norm_f32(
|
||||||
static void ggml_compute_forward_norm(
|
static void ggml_compute_forward_norm(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -9090,7 +9090,7 @@ static void ggml_compute_forward_rms_norm_f32(
|
||||||
static void ggml_compute_forward_rms_norm(
|
static void ggml_compute_forward_rms_norm(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -9279,7 +9279,7 @@ static void ggml_compute_forward_rms_norm_back(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -9937,7 +9937,7 @@ static void ggml_compute_forward_mul_mat(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_Q4_0:
|
case GGML_TYPE_Q4_0:
|
||||||
case GGML_TYPE_Q4_1:
|
case GGML_TYPE_Q4_1:
|
||||||
|
@ -10013,7 +10013,7 @@ static void ggml_compute_forward_scale(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -10114,7 +10114,7 @@ static void ggml_compute_forward_set(
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
const struct ggml_tensor * opt0,
|
const struct ggml_tensor * opt0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
|
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
|
@ -10140,7 +10140,7 @@ static void ggml_compute_forward_set(
|
||||||
static void ggml_compute_forward_cpy(
|
static void ggml_compute_forward_cpy(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
ggml_compute_forward_dup(params, src0, dst);
|
ggml_compute_forward_dup(params, src0, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10149,7 +10149,7 @@ static void ggml_compute_forward_cpy(
|
||||||
static void ggml_compute_forward_cont(
|
static void ggml_compute_forward_cont(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
ggml_compute_forward_dup(params, src0, dst);
|
ggml_compute_forward_dup(params, src0, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10158,7 +10158,7 @@ static void ggml_compute_forward_cont(
|
||||||
static void ggml_compute_forward_reshape(
|
static void ggml_compute_forward_reshape(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
// NOP
|
// NOP
|
||||||
UNUSED(params);
|
UNUSED(params);
|
||||||
UNUSED(src0);
|
UNUSED(src0);
|
||||||
|
@ -10285,7 +10285,7 @@ static void ggml_compute_forward_get_rows(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_Q4_0:
|
case GGML_TYPE_Q4_0:
|
||||||
case GGML_TYPE_Q4_1:
|
case GGML_TYPE_Q4_1:
|
||||||
|
@ -10403,7 +10403,7 @@ static void ggml_compute_forward_get_rows_back(
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
const struct ggml_tensor * opt0,
|
const struct ggml_tensor * opt0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
{
|
{
|
||||||
|
@ -10498,7 +10498,7 @@ static void ggml_compute_forward_diag_f32(
|
||||||
static void ggml_compute_forward_diag(
|
static void ggml_compute_forward_diag(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -10517,7 +10517,7 @@ static void ggml_compute_forward_diag_mask_f32(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst,
|
const struct ggml_tensor * dst,
|
||||||
const float value) {
|
const float value) {
|
||||||
assert(src1->type == GGML_TYPE_I32);
|
assert(src1->type == GGML_TYPE_I32);
|
||||||
assert(ggml_nelements(src1) == 2);
|
assert(ggml_nelements(src1) == 2);
|
||||||
|
@ -10569,7 +10569,7 @@ static void ggml_compute_forward_diag_mask_inf(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -10586,7 +10586,7 @@ static void ggml_compute_forward_diag_mask_zero(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -10675,7 +10675,7 @@ static void ggml_compute_forward_soft_max_f32(
|
||||||
static void ggml_compute_forward_soft_max(
|
static void ggml_compute_forward_soft_max(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
{
|
{
|
||||||
|
@ -10694,7 +10694,7 @@ static void ggml_compute_forward_alibi_f32(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
assert(params->ith == 0);
|
assert(params->ith == 0);
|
||||||
assert(src1->type == GGML_TYPE_I32);
|
assert(src1->type == GGML_TYPE_I32);
|
||||||
assert(ggml_nelements(src1) == 2);
|
assert(ggml_nelements(src1) == 2);
|
||||||
|
@ -10757,7 +10757,7 @@ static void ggml_compute_forward_alibi_f16(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
assert(params->ith == 0);
|
assert(params->ith == 0);
|
||||||
assert(src1->type == GGML_TYPE_I32);
|
assert(src1->type == GGML_TYPE_I32);
|
||||||
assert(ggml_nelements(src1) == 2);
|
assert(ggml_nelements(src1) == 2);
|
||||||
|
@ -10820,7 +10820,7 @@ static void ggml_compute_forward_alibi(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
{
|
{
|
||||||
|
@ -10852,7 +10852,7 @@ static void ggml_compute_forward_rope_f32(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
||||||
GGML_ASSERT(ggml_nelements(src1) == 3);
|
GGML_ASSERT(ggml_nelements(src1) == 3);
|
||||||
|
|
||||||
|
@ -10965,7 +10965,7 @@ static void ggml_compute_forward_rope_f16(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
GGML_ASSERT(src1->type == GGML_TYPE_I32);
|
||||||
GGML_ASSERT(ggml_nelements(src1) == 3);
|
GGML_ASSERT(ggml_nelements(src1) == 3);
|
||||||
|
|
||||||
|
@ -11078,7 +11078,7 @@ static void ggml_compute_forward_rope(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
{
|
{
|
||||||
|
@ -11327,7 +11327,7 @@ static void ggml_compute_forward_rope_back(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
{
|
{
|
||||||
|
@ -11350,7 +11350,7 @@ static void ggml_compute_forward_conv_1d_1s_f16_f32(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
||||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||||
|
@ -11470,7 +11470,7 @@ static void ggml_compute_forward_conv_1d_1s_f32(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
||||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||||
|
@ -11590,7 +11590,7 @@ static void ggml_compute_forward_conv_1d_1s(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
{
|
{
|
||||||
|
@ -11613,7 +11613,7 @@ static void ggml_compute_forward_conv_1d_2s_f16_f32(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
GGML_ASSERT(src0->type == GGML_TYPE_F16);
|
||||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||||
|
@ -11733,7 +11733,7 @@ static void ggml_compute_forward_conv_1d_2s_f32(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
GGML_ASSERT(src0->type == GGML_TYPE_F32);
|
||||||
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
GGML_ASSERT(src1->type == GGML_TYPE_F32);
|
||||||
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
GGML_ASSERT( dst->type == GGML_TYPE_F32);
|
||||||
|
@ -11853,7 +11853,7 @@ static void ggml_compute_forward_conv_1d_2s(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
{
|
{
|
||||||
|
@ -11878,7 +11878,7 @@ static void ggml_compute_forward_flash_attn_f32(
|
||||||
const struct ggml_tensor * k,
|
const struct ggml_tensor * k,
|
||||||
const struct ggml_tensor * v,
|
const struct ggml_tensor * v,
|
||||||
const bool masked,
|
const bool masked,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
int64_t t0 = ggml_perf_time_us();
|
int64_t t0 = ggml_perf_time_us();
|
||||||
UNUSED(t0);
|
UNUSED(t0);
|
||||||
|
|
||||||
|
@ -12087,7 +12087,7 @@ static void ggml_compute_forward_flash_attn_f16(
|
||||||
const struct ggml_tensor * k,
|
const struct ggml_tensor * k,
|
||||||
const struct ggml_tensor * v,
|
const struct ggml_tensor * v,
|
||||||
const bool masked,
|
const bool masked,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
int64_t t0 = ggml_perf_time_us();
|
int64_t t0 = ggml_perf_time_us();
|
||||||
UNUSED(t0);
|
UNUSED(t0);
|
||||||
|
|
||||||
|
@ -12333,7 +12333,7 @@ static void ggml_compute_forward_flash_attn(
|
||||||
const struct ggml_tensor * k,
|
const struct ggml_tensor * k,
|
||||||
const struct ggml_tensor * v,
|
const struct ggml_tensor * v,
|
||||||
const bool masked,
|
const bool masked,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (q->type) {
|
switch (q->type) {
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
{
|
{
|
||||||
|
@ -12539,7 +12539,7 @@ static void ggml_compute_forward_flash_ff(
|
||||||
const struct ggml_tensor * b1,
|
const struct ggml_tensor * b1,
|
||||||
const struct ggml_tensor * c0,
|
const struct ggml_tensor * c0,
|
||||||
const struct ggml_tensor * c1,
|
const struct ggml_tensor * c1,
|
||||||
struct ggml_tensor * dst) {
|
const struct ggml_tensor * dst) {
|
||||||
switch (b0->type) {
|
switch (b0->type) {
|
||||||
case GGML_TYPE_F16:
|
case GGML_TYPE_F16:
|
||||||
{
|
{
|
||||||
|
@ -12586,7 +12586,7 @@ static void ggml_compute_forward_map_unary_f32(
|
||||||
static void ggml_compute_forward_map_unary(
|
static void ggml_compute_forward_map_unary(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
struct ggml_tensor * dst,
|
const struct ggml_tensor * dst,
|
||||||
const ggml_unary_op_f32_t fun) {
|
const ggml_unary_op_f32_t fun) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
|
@ -12606,7 +12606,7 @@ static void ggml_compute_forward_map_binary_f32(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst,
|
const struct ggml_tensor * dst,
|
||||||
const ggml_binary_op_f32_t fun) {
|
const ggml_binary_op_f32_t fun) {
|
||||||
assert(params->ith == 0);
|
assert(params->ith == 0);
|
||||||
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
assert(ggml_are_same_shape(src0, src1) && ggml_are_same_shape(src0, dst));
|
||||||
|
@ -12635,7 +12635,7 @@ static void ggml_compute_forward_map_binary(
|
||||||
const struct ggml_compute_params * params,
|
const struct ggml_compute_params * params,
|
||||||
const struct ggml_tensor * src0,
|
const struct ggml_tensor * src0,
|
||||||
const struct ggml_tensor * src1,
|
const struct ggml_tensor * src1,
|
||||||
struct ggml_tensor * dst,
|
const struct ggml_tensor * dst,
|
||||||
const ggml_binary_op_f32_t fun) {
|
const ggml_binary_op_f32_t fun) {
|
||||||
switch (src0->type) {
|
switch (src0->type) {
|
||||||
case GGML_TYPE_F32:
|
case GGML_TYPE_F32:
|
||||||
|
@ -12651,7 +12651,7 @@ static void ggml_compute_forward_map_binary(
|
||||||
|
|
||||||
/////////////////////////////////
|
/////////////////////////////////
|
||||||
|
|
||||||
static void ggml_compute_forward(struct ggml_compute_params * params, struct ggml_tensor * tensor) {
|
static void ggml_compute_forward(const struct ggml_compute_params * params, const struct ggml_tensor * tensor) {
|
||||||
GGML_ASSERT(params);
|
GGML_ASSERT(params);
|
||||||
|
|
||||||
switch (tensor->op) {
|
switch (tensor->op) {
|
||||||
|
@ -14405,7 +14405,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
|
||||||
"label=\"",
|
"label=\"",
|
||||||
(void *) node, color);
|
(void *) node, color);
|
||||||
|
|
||||||
if (strlen(node->name) > 0) {
|
if (node->name[0] != '\0') {
|
||||||
fprintf(fp, "%s |", node->name);
|
fprintf(fp, "%s |", node->name);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -14430,7 +14430,7 @@ void ggml_graph_dump_dot(const struct ggml_cgraph * gb, const struct ggml_cgraph
|
||||||
"label=\"<x>",
|
"label=\"<x>",
|
||||||
(void *) node, color);
|
(void *) node, color);
|
||||||
|
|
||||||
if (strlen(node->name) > 0) {
|
if (node->name[0] != '\0') {
|
||||||
fprintf(fp, "%s | ", node->name);
|
fprintf(fp, "%s | ", node->name);
|
||||||
}
|
}
|
||||||
if (ggml_nelements(node) == 1) {
|
if (ggml_nelements(node) == 1) {
|
||||||
|
@ -14543,7 +14543,7 @@ static void ggml_opt_get_grad(int np, struct ggml_tensor * const ps[], float * g
|
||||||
|
|
||||||
static enum ggml_opt_result ggml_opt_adam(
|
static enum ggml_opt_result ggml_opt_adam(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_opt_params params,
|
const struct ggml_opt_params params,
|
||||||
struct ggml_tensor * f,
|
struct ggml_tensor * f,
|
||||||
struct ggml_cgraph * gf,
|
struct ggml_cgraph * gf,
|
||||||
struct ggml_cgraph * gb) {
|
struct ggml_cgraph * gb) {
|
||||||
|
@ -15120,7 +15120,7 @@ struct ggml_opt_params ggml_opt_default_params(enum ggml_opt_type type) {
|
||||||
|
|
||||||
enum ggml_opt_result ggml_opt(
|
enum ggml_opt_result ggml_opt(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_opt_params params,
|
const struct ggml_opt_params params,
|
||||||
struct ggml_tensor * f) {
|
struct ggml_tensor * f) {
|
||||||
bool free_ctx = false;
|
bool free_ctx = false;
|
||||||
if (ctx == NULL) {
|
if (ctx == NULL) {
|
||||||
|
|
4
ggml.h
4
ggml.h
|
@ -442,7 +442,7 @@ extern "C" {
|
||||||
|
|
||||||
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
|
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
|
||||||
|
|
||||||
GGML_API size_t ggml_set_scratch(struct ggml_context * ctx, struct ggml_scratch scratch);
|
GGML_API size_t ggml_set_scratch(struct ggml_context * ctx, const struct ggml_scratch scratch);
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_new_tensor(
|
GGML_API struct ggml_tensor * ggml_new_tensor(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
|
@ -1061,7 +1061,7 @@ extern "C" {
|
||||||
// optimize the function defined by the tensor f
|
// optimize the function defined by the tensor f
|
||||||
GGML_API enum ggml_opt_result ggml_opt(
|
GGML_API enum ggml_opt_result ggml_opt(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_opt_params params,
|
const struct ggml_opt_params params,
|
||||||
struct ggml_tensor * f);
|
struct ggml_tensor * f);
|
||||||
|
|
||||||
//
|
//
|
||||||
|
|
57
llama.cpp
57
llama.cpp
|
@ -592,6 +592,9 @@ struct llama_model_loader {
|
||||||
auto * first_file = new llama_file_loader(fname_base.c_str(), 0, tensors_map);
|
auto * first_file = new llama_file_loader(fname_base.c_str(), 0, tensors_map);
|
||||||
file_loaders.emplace_back(first_file);
|
file_loaders.emplace_back(first_file);
|
||||||
uint32_t n_parts = vocab_only ? 1 : guess_n_parts();
|
uint32_t n_parts = vocab_only ? 1 : guess_n_parts();
|
||||||
|
if (n_parts != 1) {
|
||||||
|
file_loaders.reserve(n_parts - 1);
|
||||||
|
}
|
||||||
for (uint32_t i = 1; i < n_parts; i++) {
|
for (uint32_t i = 1; i < n_parts; i++) {
|
||||||
std::string fname = fname_base + "." + std::to_string(i);
|
std::string fname = fname_base + "." + std::to_string(i);
|
||||||
auto * ith_file = new llama_file_loader(fname.c_str(), i, tensors_map);
|
auto * ith_file = new llama_file_loader(fname.c_str(), i, tensors_map);
|
||||||
|
@ -891,10 +894,11 @@ static void llama_model_load_internal(
|
||||||
|
|
||||||
std::unique_ptr<llama_model_loader> ml(new llama_model_loader(fname, use_mmap, vocab_only));
|
std::unique_ptr<llama_model_loader> ml(new llama_model_loader(fname, use_mmap, vocab_only));
|
||||||
|
|
||||||
lctx.vocab = std::move(ml->file_loaders.at(0)->vocab);
|
const auto & loader = ml->file_loaders.at(0);
|
||||||
|
lctx.vocab = std::move(loader->vocab);
|
||||||
auto & model = lctx.model;
|
auto & model = lctx.model;
|
||||||
model.hparams = ml->file_loaders.at(0)->hparams;
|
model.hparams = loader->hparams;
|
||||||
llama_file_version file_version = ml->file_loaders.at(0)->file_version;
|
llama_file_version file_version = loader->file_version;
|
||||||
auto & hparams = model.hparams;
|
auto & hparams = model.hparams;
|
||||||
uint32_t n_ff = ((2*(4*hparams.n_embd)/3 + hparams.n_mult - 1)/hparams.n_mult)*hparams.n_mult;
|
uint32_t n_ff = ((2*(4*hparams.n_embd)/3 + hparams.n_mult - 1)/hparams.n_mult)*hparams.n_mult;
|
||||||
|
|
||||||
|
@ -1019,7 +1023,8 @@ static void llama_model_load_internal(
|
||||||
ml->done_getting_tensors();
|
ml->done_getting_tensors();
|
||||||
|
|
||||||
// populate `tensors_by_name`
|
// populate `tensors_by_name`
|
||||||
for (llama_load_tensor & lt : ml->tensors_map.tensors) {
|
model.tensors_by_name.reserve(ml->tensors_map.tensors.size());
|
||||||
|
for (const auto & lt : ml->tensors_map.tensors) {
|
||||||
model.tensors_by_name.emplace_back(lt.name, lt.ggml_tensor);
|
model.tensors_by_name.emplace_back(lt.name, lt.ggml_tensor);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1143,6 +1148,8 @@ static bool llama_eval_internal(
|
||||||
struct ggml_tensor * inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd);
|
struct ggml_tensor * inpL = ggml_get_rows(ctx0, model.tok_embeddings, embd);
|
||||||
|
|
||||||
for (int il = 0; il < n_layer; ++il) {
|
for (int il = 0; il < n_layer; ++il) {
|
||||||
|
const auto & layer = model.layers[il];
|
||||||
|
|
||||||
struct ggml_tensor * inpSA = inpL;
|
struct ggml_tensor * inpSA = inpL;
|
||||||
|
|
||||||
struct ggml_tensor * cur;
|
struct ggml_tensor * cur;
|
||||||
|
@ -1155,22 +1162,22 @@ static bool llama_eval_internal(
|
||||||
|
|
||||||
// cur = attention_norm*cur
|
// cur = attention_norm*cur
|
||||||
cur = ggml_mul(ctx0,
|
cur = ggml_mul(ctx0,
|
||||||
ggml_repeat(ctx0, model.layers[il].attention_norm, cur),
|
ggml_repeat(ctx0, layer.attention_norm, cur),
|
||||||
cur);
|
cur);
|
||||||
}
|
}
|
||||||
|
|
||||||
// self-attention
|
// self-attention
|
||||||
{
|
{
|
||||||
// compute Q and K and RoPE them
|
// compute Q and K and RoPE them
|
||||||
struct ggml_tensor * Qcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].wq, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0);
|
struct ggml_tensor * Qcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, layer.wq, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0);
|
||||||
struct ggml_tensor * Kcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, model.layers[il].wk, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0);
|
struct ggml_tensor * Kcur = ggml_rope_inplace(ctx0, ggml_reshape_3d(ctx0, ggml_mul_mat(ctx0, layer.wk, cur), n_embd/n_head, n_head, N), n_past, n_rot, 0);
|
||||||
ggml_set_name(Qcur, "Qcur");
|
ggml_set_name(Qcur, "Qcur");
|
||||||
ggml_set_name(Kcur, "Kcur");
|
ggml_set_name(Kcur, "Kcur");
|
||||||
|
|
||||||
// store key and value to memory
|
// store key and value to memory
|
||||||
{
|
{
|
||||||
// compute the transposed [N, n_embd] V matrix
|
// compute the transposed [N, n_embd] V matrix
|
||||||
struct ggml_tensor * Vcur = ggml_transpose(ctx0, ggml_reshape_2d(ctx0, ggml_mul_mat(ctx0, model.layers[il].wv, cur), n_embd, N));
|
struct ggml_tensor * Vcur = ggml_transpose(ctx0, ggml_reshape_2d(ctx0, ggml_mul_mat(ctx0, layer.wv, cur), n_embd, N));
|
||||||
|
|
||||||
struct ggml_tensor * k = ggml_view_1d(ctx0, kv_self.k, N*n_embd, (ggml_element_size(kv_self.k)*n_embd)*(il*n_ctx + n_past));
|
struct ggml_tensor * k = ggml_view_1d(ctx0, kv_self.k, N*n_embd, (ggml_element_size(kv_self.k)*n_embd)*(il*n_ctx + n_past));
|
||||||
struct ggml_tensor * v = ggml_view_2d(ctx0, kv_self.v, N, n_embd,
|
struct ggml_tensor * v = ggml_view_2d(ctx0, kv_self.v, N, n_embd,
|
||||||
|
@ -1249,7 +1256,7 @@ static bool llama_eval_internal(
|
||||||
|
|
||||||
// projection (no bias)
|
// projection (no bias)
|
||||||
cur = ggml_mul_mat(ctx0,
|
cur = ggml_mul_mat(ctx0,
|
||||||
model.layers[il].wo,
|
layer.wo,
|
||||||
cur);
|
cur);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1265,16 +1272,16 @@ static bool llama_eval_internal(
|
||||||
|
|
||||||
// cur = ffn_norm*cur
|
// cur = ffn_norm*cur
|
||||||
cur = ggml_mul(ctx0,
|
cur = ggml_mul(ctx0,
|
||||||
ggml_repeat(ctx0, model.layers[il].ffn_norm, cur),
|
ggml_repeat(ctx0, layer.ffn_norm, cur),
|
||||||
cur);
|
cur);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_tensor * tmp = ggml_mul_mat(ctx0,
|
struct ggml_tensor * tmp = ggml_mul_mat(ctx0,
|
||||||
model.layers[il].w3,
|
layer.w3,
|
||||||
cur);
|
cur);
|
||||||
|
|
||||||
cur = ggml_mul_mat(ctx0,
|
cur = ggml_mul_mat(ctx0,
|
||||||
model.layers[il].w1,
|
layer.w1,
|
||||||
cur);
|
cur);
|
||||||
|
|
||||||
// SILU activation
|
// SILU activation
|
||||||
|
@ -1283,7 +1290,7 @@ static bool llama_eval_internal(
|
||||||
cur = ggml_mul(ctx0, cur, tmp);
|
cur = ggml_mul(ctx0, cur, tmp);
|
||||||
|
|
||||||
cur = ggml_mul_mat(ctx0,
|
cur = ggml_mul_mat(ctx0,
|
||||||
model.layers[il].w2,
|
layer.w2,
|
||||||
cur);
|
cur);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1450,7 +1457,7 @@ struct llama_tokenizer {
|
||||||
|
|
||||||
// keep substituting the highest frequency pairs for as long as we can.
|
// keep substituting the highest frequency pairs for as long as we can.
|
||||||
while (!work_queue_.empty()) {
|
while (!work_queue_.empty()) {
|
||||||
auto bigram = work_queue_.top();
|
const auto& bigram = work_queue_.top();
|
||||||
work_queue_.pop();
|
work_queue_.pop();
|
||||||
|
|
||||||
auto & left_sym = symbols_[bigram.left];
|
auto & left_sym = symbols_[bigram.left];
|
||||||
|
@ -1485,6 +1492,7 @@ struct llama_tokenizer {
|
||||||
|
|
||||||
if (token == vocab_.token_to_id.end()) {
|
if (token == vocab_.token_to_id.end()) {
|
||||||
// output any symbols that did not form tokens as bytes.
|
// output any symbols that did not form tokens as bytes.
|
||||||
|
output.reserve(symbol.n);
|
||||||
for (int j = 0; j < (int) symbol.n; ++j) {
|
for (int j = 0; j < (int) symbol.n; ++j) {
|
||||||
llama_vocab::id token_id = static_cast<uint8_t>(symbol.text[j]) + 3;
|
llama_vocab::id token_id = static_cast<uint8_t>(symbol.text[j]) + 3;
|
||||||
output.push_back(token_id);
|
output.push_back(token_id);
|
||||||
|
@ -1703,8 +1711,9 @@ void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * c
|
||||||
|
|
||||||
// Compute the absolute difference between negative log probability and entropy for each candidate
|
// Compute the absolute difference between negative log probability and entropy for each candidate
|
||||||
std::vector<float> shifted_scores;
|
std::vector<float> shifted_scores;
|
||||||
|
shifted_scores.reserve(candidates->size);
|
||||||
for (size_t i = 0; i < candidates->size; ++i) {
|
for (size_t i = 0; i < candidates->size; ++i) {
|
||||||
float shifted_score = fabsf(-logf(candidates->data[i].p) - entropy);
|
const float shifted_score = fabsf(-logf(candidates->data[i].p) - entropy);
|
||||||
shifted_scores.push_back(shifted_score);
|
shifted_scores.push_back(shifted_score);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1733,6 +1742,7 @@ void llama_sample_typical(struct llama_context * ctx, llama_token_data_array * c
|
||||||
|
|
||||||
// Resize the output vector to keep only the locally typical tokens
|
// Resize the output vector to keep only the locally typical tokens
|
||||||
std::vector<llama_token_data> new_candidates;
|
std::vector<llama_token_data> new_candidates;
|
||||||
|
new_candidates.reserve(last_idx);
|
||||||
for (size_t i = 0; i < last_idx; ++i) {
|
for (size_t i = 0; i < last_idx; ++i) {
|
||||||
size_t idx = indices[i];
|
size_t idx = indices[i];
|
||||||
new_candidates.push_back(candidates->data[idx]);
|
new_candidates.push_back(candidates->data[idx]);
|
||||||
|
@ -2258,7 +2268,8 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
|
||||||
|
|
||||||
// create a name -> tensor map of the model to accelerate lookups
|
// create a name -> tensor map of the model to accelerate lookups
|
||||||
std::unordered_map<std::string, struct ggml_tensor*> model_tensors;
|
std::unordered_map<std::string, struct ggml_tensor*> model_tensors;
|
||||||
for (auto & kv: model.tensors_by_name) {
|
model_tensors.reserve(model.tensors_by_name.size());
|
||||||
|
for (const auto & kv: model.tensors_by_name) {
|
||||||
model_tensors.insert(kv);
|
model_tensors.insert(kv);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2374,12 +2385,13 @@ int llama_apply_lora_from_file_internal(struct llama_context * ctx, const char *
|
||||||
ggml_tensor * base_t;
|
ggml_tensor * base_t;
|
||||||
if (model_loader) {
|
if (model_loader) {
|
||||||
// load from base model
|
// load from base model
|
||||||
if (model_loader->tensors_map.name_to_idx.find(base_name) == model_loader->tensors_map.name_to_idx.end()) {
|
auto & tmap = model_loader->tensors_map;
|
||||||
|
if (tmap.name_to_idx.find(base_name) == tmap.name_to_idx.end()) {
|
||||||
fprintf(stderr, "%s: error: tensor '%s' not found in base model\n", __func__, base_name.c_str());
|
fprintf(stderr, "%s: error: tensor '%s' not found in base model\n", __func__, base_name.c_str());
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
size_t idx = model_loader->tensors_map.name_to_idx[base_name];
|
size_t idx = tmap.name_to_idx[base_name];
|
||||||
llama_load_tensor & lt = model_loader->tensors_map.tensors[idx];
|
llama_load_tensor & lt = tmap.tensors[idx];
|
||||||
base_t = model_loader->get_tensor(base_name, { (uint32_t)dest_t->ne[0], (uint32_t)dest_t->ne[1] });
|
base_t = model_loader->get_tensor(base_name, { (uint32_t)dest_t->ne[0], (uint32_t)dest_t->ne[1] });
|
||||||
lt.data = (uint8_t *) lt.ggml_tensor->data;
|
lt.data = (uint8_t *) lt.ggml_tensor->data;
|
||||||
model_loader->load_data_for(lt);
|
model_loader->load_data_for(lt);
|
||||||
|
@ -2513,11 +2525,12 @@ size_t llama_copy_state_data(struct llama_context * ctx, uint8_t * dst) {
|
||||||
std::stringstream rng_ss;
|
std::stringstream rng_ss;
|
||||||
rng_ss << ctx->rng;
|
rng_ss << ctx->rng;
|
||||||
|
|
||||||
const size_t rng_size = rng_ss.str().size();
|
const auto & rng = rng_ss.str();
|
||||||
|
const size_t rng_size = rng.size();
|
||||||
char rng_buf[LLAMA_MAX_RNG_STATE];
|
char rng_buf[LLAMA_MAX_RNG_STATE];
|
||||||
|
|
||||||
memset(&rng_buf[0], 0, LLAMA_MAX_RNG_STATE);
|
memset(&rng_buf[0], 0, LLAMA_MAX_RNG_STATE);
|
||||||
memcpy(&rng_buf[0], rng_ss.str().data(), rng_ss.str().size());
|
memcpy(&rng_buf[0], rng.data(), rng.size());
|
||||||
|
|
||||||
memcpy(out, &rng_size, sizeof(rng_size)); out += sizeof(rng_size);
|
memcpy(out, &rng_size, sizeof(rng_size)); out += sizeof(rng_size);
|
||||||
memcpy(out, &rng_buf[0], LLAMA_MAX_RNG_STATE); out += LLAMA_MAX_RNG_STATE;
|
memcpy(out, &rng_buf[0], LLAMA_MAX_RNG_STATE); out += LLAMA_MAX_RNG_STATE;
|
||||||
|
@ -2901,7 +2914,7 @@ void llama_reset_timings(struct llama_context * ctx) {
|
||||||
const char * llama_print_system_info(void) {
|
const char * llama_print_system_info(void) {
|
||||||
static std::string s;
|
static std::string s;
|
||||||
|
|
||||||
s = "";
|
s.clear();
|
||||||
s += "AVX = " + std::to_string(ggml_cpu_has_avx()) + " | ";
|
s += "AVX = " + std::to_string(ggml_cpu_has_avx()) + " | ";
|
||||||
s += "AVX2 = " + std::to_string(ggml_cpu_has_avx2()) + " | ";
|
s += "AVX2 = " + std::to_string(ggml_cpu_has_avx2()) + " | ";
|
||||||
s += "AVX512 = " + std::to_string(ggml_cpu_has_avx512()) + " | ";
|
s += "AVX512 = " + std::to_string(ggml_cpu_has_avx512()) + " | ";
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue