ggml : sync (mem align to header + conv_transpose_2d fixes)

ggml-ci
This commit is contained in:
Georgi Gerganov 2023-08-28 13:52:41 +03:00
parent dd0dc366da
commit 93497ac66b
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
2 changed files with 21 additions and 19 deletions

22
ggml.c
View file

@ -157,12 +157,6 @@ typedef void * thread_ret_t;
//#define GGML_SOFT_MAX_ACCELERATE //#define GGML_SOFT_MAX_ACCELERATE
#endif #endif
#if UINTPTR_MAX == 0xFFFFFFFF
#define GGML_MEM_ALIGN 4
#else
#define GGML_MEM_ALIGN 16
#endif
// //
// logging // logging
// //
@ -7098,11 +7092,13 @@ struct ggml_tensor * ggml_conv_transpose_2d_p0(
}; };
struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne); struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
ggml_set_op_params_i32(result, 0, stride);
result->op = GGML_OP_CONV_TRANSPOSE_2D; result->op = GGML_OP_CONV_TRANSPOSE_2D;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src[0] = a; result->src[0] = a;
result->src[1] = b; result->src[1] = b;
result->src[2] = ggml_new_i32(ctx, stride);
return result; return result;
} }
@ -13498,7 +13494,6 @@ static void ggml_compute_forward_conv_transpose_2d(
const struct ggml_compute_params * params, const struct ggml_compute_params * params,
const struct ggml_tensor * src0, const struct ggml_tensor * src0,
const struct ggml_tensor * src1, const struct ggml_tensor * src1,
const struct ggml_tensor * opt0,
struct ggml_tensor * dst) { struct ggml_tensor * dst) {
GGML_ASSERT(src0->type == GGML_TYPE_F16); GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32); GGML_ASSERT(src1->type == GGML_TYPE_F32);
@ -13558,7 +13553,7 @@ static void ggml_compute_forward_conv_transpose_2d(
return; return;
} }
const int32_t stride = ((const int32_t*)(opt0->data))[0]; const int32_t stride = ggml_get_op_params_i32(dst, 0);
// total patches in dst // total patches in dst
const int np = ne2; const int np = ne2;
@ -13571,7 +13566,7 @@ static void ggml_compute_forward_conv_transpose_2d(
const int ip1 = MIN(ip0 + dp, np); const int ip1 = MIN(ip0 + dp, np);
ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0; ggml_fp16_t * const wdata = (ggml_fp16_t *) params->wdata + 0;
ggml_fp16_t * const wdata_src = (ggml_fp16_t *) params->wdata + nk; ggml_fp16_t * const wdata_src = wdata + nk;
for (int i2 = ip0; i2 < ip1; i2++) { // Cout for (int i2 = ip0; i2 < ip1; i2++) { // Cout
float * dst_data = (float *)((char *) dst->data + i2*nb2); float * dst_data = (float *)((char *) dst->data + i2*nb2);
@ -13583,9 +13578,8 @@ static void ggml_compute_forward_conv_transpose_2d(
for (int i00 = 0; i00 < ne00; i00++) { for (int i00 = 0; i00 < ne00; i00++) {
float v = 0; float v = 0;
ggml_vec_dot_f16(ne03, &v, ggml_vec_dot_f16(ne03, &v,
(ggml_fp16_t *) wdata_src + i1n, wdata_src + i1n,
(ggml_fp16_t *) wdata_kernel + i01*ne00*ne03 + i00*ne03); wdata_kernel + i01*ne00*ne03 + i00*ne03);
dst_data[(i11*stride + i01)*ne0 + i10*stride + i00] += v; dst_data[(i11*stride + i01)*ne0 + i10*stride + i00] += v;
} }
} }
@ -15732,7 +15726,7 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
} break; } break;
case GGML_OP_CONV_TRANSPOSE_2D: case GGML_OP_CONV_TRANSPOSE_2D:
{ {
ggml_compute_forward_conv_transpose_2d(params, tensor->src[0], tensor->src[1], tensor->src[2], tensor); ggml_compute_forward_conv_transpose_2d(params, tensor->src[0], tensor->src[1], tensor);
} break; } break;
case GGML_OP_POOL_1D: case GGML_OP_POOL_1D:
{ {

18
ggml.h
View file

@ -130,13 +130,16 @@
// The data of the tensor is accessed via the "data" pointer. For example: // The data of the tensor is accessed via the "data" pointer. For example:
// //
// { // {
// struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, 2, 3); // const int nx = 2;
// const int ny = 3;
// //
// // a[2, 1] = 1.0f; // struct ggml_tensor * a = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, nx, ny);
// *(float *) ((char *) a->data + 2*a->nb[1] + 1*a->nb[0]) = 1.0f;
// //
// // a[0, 2] = 2.0f; // for (int y = 0; y < ny; y++) {
// *(float *) ((char *) a->data + 0*a->nb[1] + 2*a->nb[0]) = 2.0f; // for (int x = 0; x < nx; x++) {
// *(float *) ((char *) a->data + y*a->nb[1] + x*a->nb[0]) = x + y;
// }
// }
// //
// ... // ...
// } // }
@ -211,6 +214,11 @@
#define GGML_MAX_OP_PARAMS 32 #define GGML_MAX_OP_PARAMS 32
#define GGML_DEFAULT_N_THREADS 4 #define GGML_DEFAULT_N_THREADS 4
#if UINTPTR_MAX == 0xFFFFFFFF
#define GGML_MEM_ALIGN 4
#else
#define GGML_MEM_ALIGN 16
#endif
#define GGML_EXIT_SUCCESS 0 #define GGML_EXIT_SUCCESS 0
#define GGML_EXIT_ABORTED 1 #define GGML_EXIT_ABORTED 1