ggml : sync latest ggml_mul_mat_id
This commit is contained in:
parent
a3eefe95a8
commit
861cd67899
4 changed files with 114 additions and 75 deletions
61
ggml.c
61
ggml.c
|
@ -4083,7 +4083,9 @@ struct ggml_tensor * ggml_mul_mat_id(
|
|||
int64_t n_as = ids->ne[0];
|
||||
|
||||
GGML_ASSERT(ids->type == GGML_TYPE_I32);
|
||||
GGML_ASSERT(ggml_is_vector(ids));
|
||||
GGML_ASSERT(ids->ne[2] == 1 && ids->ne[3] == 1);
|
||||
GGML_ASSERT(ids->ne[1] == b->ne[1]);
|
||||
GGML_ASSERT(ids->ne[2] == b->ne[2] && ids->ne[3] == b->ne[3]);
|
||||
GGML_ASSERT(n_as > 0 && n_as <= GGML_MAX_SRC - 2);
|
||||
GGML_ASSERT(id >= 0 && id < n_as);
|
||||
|
||||
|
@ -9519,11 +9521,16 @@ static bool ggml_compute_forward_mul_mat_use_blas(
|
|||
}
|
||||
#endif
|
||||
|
||||
// off1 = offset in i11 and i1
|
||||
// cne1 = ne11 and ne1
|
||||
// in a normal matrix multiplication, off1 = 0 and cne1 = ne1
|
||||
// during GGML_TASK_INIT, the full src1 is converted regardless of off1 and cne1
|
||||
static void ggml_compute_forward_mul_mat(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * src0,
|
||||
const struct ggml_tensor * src1,
|
||||
struct ggml_tensor * dst) {
|
||||
struct ggml_tensor * dst,
|
||||
int64_t off1, int64_t cne1) {
|
||||
int64_t t0 = ggml_perf_time_us();
|
||||
UNUSED(t0);
|
||||
|
||||
|
@ -9591,10 +9598,9 @@ static void ggml_compute_forward_mul_mat(
|
|||
const int64_t i03 = i13/r3;
|
||||
const int64_t i02 = i12/r2;
|
||||
|
||||
const void * x = (char *) src0->data + i02*nb02 + i03*nb03;
|
||||
const float * y = (float *) ((char *) src1->data + i12*nb12 + i13*nb13);
|
||||
|
||||
float * d = (float *) ((char *) dst->data + i12*nb2 + i13*nb3);
|
||||
const void * x = (char *) src0->data + i02*nb02 + i03*nb03;
|
||||
const float * y = (float *) ((char *) src1->data + off1*nb11 + i12*nb12 + i13*nb13);
|
||||
float * d = (float *) ((char *) dst->data + off1*nb1 + i12*nb2 + i13*nb3);
|
||||
|
||||
if (type != GGML_TYPE_F32) {
|
||||
float * const wdata = params->wdata;
|
||||
|
@ -9611,10 +9617,10 @@ static void ggml_compute_forward_mul_mat(
|
|||
}
|
||||
|
||||
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans,
|
||||
ne11, ne01, ne10,
|
||||
1.0f, y, ne10,
|
||||
x, ne00,
|
||||
0.0f, d, ne01);
|
||||
cne1, ne01, ne10,
|
||||
1.0f, y, ne10,
|
||||
x, ne00,
|
||||
0.0f, d, ne01);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9630,6 +9636,7 @@ static void ggml_compute_forward_mul_mat(
|
|||
const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type);
|
||||
|
||||
assert(params->wsize >= ne11*ne12*ne13*row_size);
|
||||
assert(src1->type == GGML_TYPE_F32);
|
||||
|
||||
for (int64_t i13 = 0; i13 < ne13; ++i13) {
|
||||
for (int64_t i12 = 0; i12 < ne12; ++i12) {
|
||||
|
@ -9652,7 +9659,7 @@ static void ggml_compute_forward_mul_mat(
|
|||
const size_t row_size = ne10*ggml_type_size(vec_dot_type)/ggml_blck_size(vec_dot_type);
|
||||
|
||||
const int64_t nr0 = ne01; // src0 rows
|
||||
const int64_t nr1 = ne11*ne12*ne13; // src1 rows
|
||||
const int64_t nr1 = cne1*ne12*ne13; // src1 rows
|
||||
|
||||
//printf("nr0 = %lld, nr1 = %lld\n", nr0, nr1);
|
||||
|
||||
|
@ -9694,9 +9701,9 @@ static void ggml_compute_forward_mul_mat(
|
|||
for (int64_t iir1 = ir110; iir1 < ir111; iir1 += blck_1) {
|
||||
for (int64_t iir0 = ir010; iir0 < ir011; iir0 += blck_0) {
|
||||
for (int64_t ir1 = iir1; ir1 < iir1 + blck_1 && ir1 < ir111; ++ir1) {
|
||||
const int64_t i13 = (ir1/(ne12*ne11));
|
||||
const int64_t i12 = (ir1 - i13*ne12*ne11)/ne11;
|
||||
const int64_t i11 = (ir1 - i13*ne12*ne11 - i12*ne11);
|
||||
const int64_t i13 = (ir1/(ne12*cne1));
|
||||
const int64_t i12 = (ir1 - i13*ne12*cne1)/cne1;
|
||||
const int64_t i11 = (ir1 - i13*ne12*cne1 - i12*cne1) + off1;
|
||||
|
||||
// broadcast src0 into src1
|
||||
const int64_t i03 = i13/r3;
|
||||
|
@ -9736,20 +9743,26 @@ static void ggml_compute_forward_mul_mat(
|
|||
|
||||
static void ggml_compute_forward_mul_mat_id(
|
||||
const struct ggml_compute_params * params,
|
||||
const struct ggml_tensor * src0,
|
||||
const struct ggml_tensor * src1,
|
||||
struct ggml_tensor * dst) {
|
||||
|
||||
const struct ggml_tensor * ids = dst->src[0];
|
||||
const struct ggml_tensor * src1 = dst->src[1];
|
||||
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
|
||||
// during GGML_TASK_INIT the entire src1 is converted to vec_dot_type
|
||||
ggml_compute_forward_mul_mat(params, dst->src[2], src1, dst, 0, dst->ne[1]);
|
||||
return;
|
||||
}
|
||||
|
||||
const struct ggml_tensor * ids = src0;
|
||||
const int id = ggml_get_op_params_i32(dst, 0);
|
||||
|
||||
const int a_id = ((int32_t *)ids->data)[id];
|
||||
for (int64_t i01 = 0; i01 < ids->ne[1]; i01++) {
|
||||
const int32_t row_id = *(const int32_t *) ((const char *) ids->data + i01*ids->nb[1] + id*ids->nb[0]);
|
||||
GGML_ASSERT(row_id >= 0 && row_id < ids->ne[0]);
|
||||
|
||||
GGML_ASSERT(a_id >= 0 && a_id < ids->ne[0]);
|
||||
|
||||
const struct ggml_tensor * src0 = dst->src[a_id + 2];
|
||||
|
||||
ggml_compute_forward_mul_mat(params, src0, src1, dst);
|
||||
const struct ggml_tensor * src0_row = dst->src[row_id + 2];
|
||||
ggml_compute_forward_mul_mat(params, src0_row, src1, dst, i01, 1);
|
||||
}
|
||||
}
|
||||
|
||||
// ggml_compute_forward_out_prod
|
||||
|
@ -14037,11 +14050,11 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
|
|||
} break;
|
||||
case GGML_OP_MUL_MAT:
|
||||
{
|
||||
ggml_compute_forward_mul_mat(params, tensor->src[0], tensor->src[1], tensor);
|
||||
ggml_compute_forward_mul_mat(params, tensor->src[0], tensor->src[1], tensor, 0, tensor->ne[1]);
|
||||
} break;
|
||||
case GGML_OP_MUL_MAT_ID:
|
||||
{
|
||||
ggml_compute_forward_mul_mat_id(params, tensor);
|
||||
ggml_compute_forward_mul_mat_id(params, tensor->src[0], tensor->src[1], tensor);
|
||||
} break;
|
||||
case GGML_OP_OUT_PROD:
|
||||
{
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue