Merge branch 'master' into concedo_experimental
# Conflicts: # tests/test-grad0.c # tests/test-opt.c
This commit is contained in:
commit
8a9b40840b
5 changed files with 472 additions and 487 deletions
|
@ -3954,18 +3954,23 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_
|
||||||
}
|
}
|
||||||
func = ggml_cuda_mul;
|
func = ggml_cuda_mul;
|
||||||
break;
|
break;
|
||||||
case GGML_OP_GELU:
|
case GGML_OP_UNARY:
|
||||||
|
switch (ggml_get_unary_op(tensor)) {
|
||||||
|
case GGML_UNARY_OP_GELU:
|
||||||
if (!any_on_device) {
|
if (!any_on_device) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
func = ggml_cuda_gelu;
|
func = ggml_cuda_gelu;
|
||||||
break;
|
break;
|
||||||
case GGML_OP_SILU:
|
case GGML_UNARY_OP_SILU:
|
||||||
if (!any_on_device) {
|
if (!any_on_device) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
func = ggml_cuda_silu;
|
func = ggml_cuda_silu;
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
} break;
|
||||||
case GGML_OP_NORM:
|
case GGML_OP_NORM:
|
||||||
if (!any_on_device) {
|
if (!any_on_device) {
|
||||||
return false;
|
return false;
|
||||||
|
|
16
ggml-metal.m
16
ggml-metal.m
|
@ -519,7 +519,9 @@ void ggml_metal_graph_compute(
|
||||||
|
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_SILU:
|
case GGML_OP_UNARY:
|
||||||
|
switch (ggml_get_unary_op(gf->nodes[i])) {
|
||||||
|
case GGML_UNARY_OP_SILU:
|
||||||
{
|
{
|
||||||
if (encoder == nil) {
|
if (encoder == nil) {
|
||||||
encoder = [command_buffer computeCommandEncoder];
|
encoder = [command_buffer computeCommandEncoder];
|
||||||
|
@ -533,7 +535,7 @@ void ggml_metal_graph_compute(
|
||||||
|
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_RELU:
|
case GGML_UNARY_OP_RELU:
|
||||||
{
|
{
|
||||||
if (encoder == nil) {
|
if (encoder == nil) {
|
||||||
encoder = [command_buffer computeCommandEncoder];
|
encoder = [command_buffer computeCommandEncoder];
|
||||||
|
@ -547,7 +549,7 @@ void ggml_metal_graph_compute(
|
||||||
|
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
|
||||||
} break;
|
} break;
|
||||||
case GGML_OP_GELU:
|
case GGML_UNARY_OP_GELU:
|
||||||
{
|
{
|
||||||
if (encoder == nil) {
|
if (encoder == nil) {
|
||||||
encoder = [command_buffer computeCommandEncoder];
|
encoder = [command_buffer computeCommandEncoder];
|
||||||
|
@ -561,6 +563,12 @@ void ggml_metal_graph_compute(
|
||||||
|
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
|
||||||
} break;
|
} break;
|
||||||
|
default:
|
||||||
|
{
|
||||||
|
fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
|
||||||
|
GGML_ASSERT(false);
|
||||||
|
}
|
||||||
|
} break;
|
||||||
case GGML_OP_SOFT_MAX:
|
case GGML_OP_SOFT_MAX:
|
||||||
{
|
{
|
||||||
if (encoder == nil) {
|
if (encoder == nil) {
|
||||||
|
@ -979,10 +987,12 @@ void ggml_metal_graph_compute(
|
||||||
[encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
|
[encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)];
|
||||||
} break;
|
} break;
|
||||||
default:
|
default:
|
||||||
|
{
|
||||||
fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
|
fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op));
|
||||||
GGML_ASSERT(false);
|
GGML_ASSERT(false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (encoder != nil) {
|
if (encoder != nil) {
|
||||||
[encoder endEncoding];
|
[encoder endEncoding];
|
||||||
|
|
60
ggml.h
60
ggml.h
|
@ -329,16 +329,6 @@ extern "C" {
|
||||||
GGML_OP_ARGMAX,
|
GGML_OP_ARGMAX,
|
||||||
GGML_OP_REPEAT,
|
GGML_OP_REPEAT,
|
||||||
GGML_OP_REPEAT_BACK,
|
GGML_OP_REPEAT_BACK,
|
||||||
GGML_OP_ABS,
|
|
||||||
GGML_OP_SGN,
|
|
||||||
GGML_OP_NEG,
|
|
||||||
GGML_OP_STEP,
|
|
||||||
GGML_OP_TANH,
|
|
||||||
GGML_OP_ELU,
|
|
||||||
GGML_OP_RELU,
|
|
||||||
GGML_OP_GELU,
|
|
||||||
GGML_OP_GELU_QUICK,
|
|
||||||
GGML_OP_SILU,
|
|
||||||
GGML_OP_SILU_BACK,
|
GGML_OP_SILU_BACK,
|
||||||
GGML_OP_NORM, // normalize
|
GGML_OP_NORM, // normalize
|
||||||
GGML_OP_RMS_NORM,
|
GGML_OP_RMS_NORM,
|
||||||
|
@ -377,6 +367,8 @@ extern "C" {
|
||||||
GGML_OP_WIN_PART,
|
GGML_OP_WIN_PART,
|
||||||
GGML_OP_WIN_UNPART,
|
GGML_OP_WIN_UNPART,
|
||||||
|
|
||||||
|
GGML_OP_UNARY,
|
||||||
|
|
||||||
GGML_OP_MAP_UNARY,
|
GGML_OP_MAP_UNARY,
|
||||||
GGML_OP_MAP_BINARY,
|
GGML_OP_MAP_BINARY,
|
||||||
|
|
||||||
|
@ -390,6 +382,18 @@ extern "C" {
|
||||||
GGML_OP_COUNT,
|
GGML_OP_COUNT,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum ggml_unary_op {
|
||||||
|
GGML_UNARY_OP_ABS,
|
||||||
|
GGML_UNARY_OP_SGN,
|
||||||
|
GGML_UNARY_OP_NEG,
|
||||||
|
GGML_UNARY_OP_STEP,
|
||||||
|
GGML_UNARY_OP_TANH,
|
||||||
|
GGML_UNARY_OP_ELU,
|
||||||
|
GGML_UNARY_OP_RELU,
|
||||||
|
GGML_UNARY_OP_GELU,
|
||||||
|
GGML_UNARY_OP_GELU_QUICK,
|
||||||
|
GGML_UNARY_OP_SILU,
|
||||||
|
};
|
||||||
|
|
||||||
// ggml object
|
// ggml object
|
||||||
struct ggml_object {
|
struct ggml_object {
|
||||||
|
@ -534,6 +538,7 @@ extern "C" {
|
||||||
|
|
||||||
GGML_API const char * ggml_type_name(enum ggml_type type);
|
GGML_API const char * ggml_type_name(enum ggml_type type);
|
||||||
GGML_API const char * ggml_op_name (enum ggml_op op);
|
GGML_API const char * ggml_op_name (enum ggml_op op);
|
||||||
|
GGML_API const char * ggml_op_symbol(enum ggml_op op);
|
||||||
|
|
||||||
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
GGML_API size_t ggml_element_size(const struct ggml_tensor * tensor);
|
||||||
|
|
||||||
|
@ -557,6 +562,7 @@ extern "C" {
|
||||||
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
|
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
|
||||||
|
|
||||||
GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
|
GGML_API size_t ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch);
|
||||||
|
GGML_API bool ggml_get_no_alloc(struct ggml_context * ctx);
|
||||||
GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
|
GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
|
||||||
|
|
||||||
GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx);
|
GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx);
|
||||||
|
@ -616,9 +622,11 @@ extern "C" {
|
||||||
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
|
||||||
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
|
||||||
|
|
||||||
GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
|
GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor);
|
||||||
GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
|
|
||||||
GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...);
|
GGML_API const char * ggml_get_name (const struct ggml_tensor * tensor);
|
||||||
|
GGML_API struct ggml_tensor * ggml_set_name ( struct ggml_tensor * tensor, const char * name);
|
||||||
|
GGML_API struct ggml_tensor * ggml_format_name( struct ggml_tensor * tensor, const char * fmt, ...);
|
||||||
|
|
||||||
//
|
//
|
||||||
// operations on tensors with backpropagation
|
// operations on tensors with backpropagation
|
||||||
|
@ -628,6 +636,11 @@ extern "C" {
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a);
|
struct ggml_tensor * a);
|
||||||
|
|
||||||
|
// in-place, returns view(a)
|
||||||
|
GGML_API struct ggml_tensor * ggml_dup_inplace(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a);
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_add(
|
GGML_API struct ggml_tensor * ggml_add(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
|
@ -951,11 +964,22 @@ extern "C" {
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
struct ggml_tensor * b);
|
struct ggml_tensor * b);
|
||||||
|
|
||||||
|
// a -> b, in-place, return view(b)
|
||||||
|
GGML_API struct ggml_tensor * ggml_cpy_inplace(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
struct ggml_tensor * b);
|
||||||
|
|
||||||
// make contiguous
|
// make contiguous
|
||||||
GGML_API struct ggml_tensor * ggml_cont(
|
GGML_API struct ggml_tensor * ggml_cont(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a);
|
struct ggml_tensor * a);
|
||||||
|
|
||||||
|
// make contiguous, in-place
|
||||||
|
GGML_API struct ggml_tensor * ggml_cont_inplace(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a);
|
||||||
|
|
||||||
// return view(a), b specifies the new shape
|
// return view(a), b specifies the new shape
|
||||||
// TODO: when we start computing gradient, make a copy instead of view
|
// TODO: when we start computing gradient, make a copy instead of view
|
||||||
GGML_API struct ggml_tensor * ggml_reshape(
|
GGML_API struct ggml_tensor * ggml_reshape(
|
||||||
|
@ -1267,6 +1291,16 @@ extern "C" {
|
||||||
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||||
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_unary(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
enum ggml_unary_op op);
|
||||||
|
|
||||||
|
GGML_API struct ggml_tensor * ggml_unary_inplace(
|
||||||
|
struct ggml_context * ctx,
|
||||||
|
struct ggml_tensor * a,
|
||||||
|
enum ggml_unary_op op);
|
||||||
|
|
||||||
GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
GGML_API struct ggml_tensor * ggml_map_unary_f32(
|
||||||
struct ggml_context * ctx,
|
struct ggml_context * ctx,
|
||||||
struct ggml_tensor * a,
|
struct ggml_tensor * a,
|
||||||
|
|
|
@ -3297,8 +3297,7 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
int8_t aux8[QK_K];
|
||||||
uint8_t aux8[QK_K];
|
|
||||||
int16_t aux16[16];
|
int16_t aux16[16];
|
||||||
float sums [8];
|
float sums [8];
|
||||||
memset(sums, 0, 8*sizeof(float));
|
memset(sums, 0, 8*sizeof(float));
|
||||||
|
@ -3308,7 +3307,7 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
|
||||||
const uint8_t * restrict q4 = x[i].qs;
|
const uint8_t * restrict q4 = x[i].qs;
|
||||||
const uint8_t * restrict hm = x[i].qh;
|
const uint8_t * restrict hm = x[i].qh;
|
||||||
const int8_t * restrict q8 = y[i].qs;
|
const int8_t * restrict q8 = y[i].qs;
|
||||||
uint8_t * restrict a = aux8;
|
int8_t * restrict a = aux8;
|
||||||
for (int l = 0; l < 32; ++l) {
|
for (int l = 0; l < 32; ++l) {
|
||||||
a[l+ 0] = q4[l] & 0xF;
|
a[l+ 0] = q4[l] & 0xF;
|
||||||
a[l+32] = q4[l] >> 4;
|
a[l+32] = q4[l] >> 4;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue