ggml : introduce GGML_CALL function annotation (#4850)
This change makes it possible to build ggml-cuda.cu and ggml-metal.m as independent dynamic shared objects, that may be conditionally linked at runtime in a multiplatform binary. It introduces a GGML_CALL annotation that documents which functions have a cyclic call relationship, between the application code and GPU modules. This change does nothing, unless the build defines -DGGML_MULTIPLATFORM which causes back-references and function pointers to conform to MS ABI which is supported by NVCC, ROCm, XCode, GCC and Clang across platforms
This commit is contained in:
parent
d75c232e1d
commit
a0b3ac8c48
9 changed files with 244 additions and 235 deletions
32
ggml.c
32
ggml.c
|
@ -1990,19 +1990,19 @@ void ggml_print_objects(const struct ggml_context * ctx) {
|
|||
GGML_PRINT("%s: --- end ---\n", __func__);
|
||||
}
|
||||
|
||||
int64_t ggml_nelements(const struct ggml_tensor * tensor) {
|
||||
GGML_CALL int64_t ggml_nelements(const struct ggml_tensor * tensor) {
|
||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||
|
||||
return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
|
||||
}
|
||||
|
||||
int64_t ggml_nrows(const struct ggml_tensor * tensor) {
|
||||
GGML_CALL int64_t ggml_nrows(const struct ggml_tensor * tensor) {
|
||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||
|
||||
return tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
|
||||
}
|
||||
|
||||
size_t ggml_nbytes(const struct ggml_tensor * tensor) {
|
||||
GGML_CALL size_t ggml_nbytes(const struct ggml_tensor * tensor) {
|
||||
size_t nbytes;
|
||||
size_t blck_size = ggml_blck_size(tensor->type);
|
||||
if (blck_size == 1) {
|
||||
|
@ -2025,15 +2025,15 @@ size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
|
|||
return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
|
||||
}
|
||||
|
||||
int ggml_blck_size(enum ggml_type type) {
|
||||
GGML_CALL int ggml_blck_size(enum ggml_type type) {
|
||||
return type_traits[type].blck_size;
|
||||
}
|
||||
|
||||
size_t ggml_type_size(enum ggml_type type) {
|
||||
GGML_CALL size_t ggml_type_size(enum ggml_type type) {
|
||||
return type_traits[type].type_size;
|
||||
}
|
||||
|
||||
size_t ggml_row_size(enum ggml_type type, int64_t ne) {
|
||||
GGML_CALL size_t ggml_row_size(enum ggml_type type, int64_t ne) {
|
||||
assert(ne % ggml_blck_size(type) == 0);
|
||||
return ggml_type_size(type)*ne/ggml_blck_size(type);
|
||||
}
|
||||
|
@ -2042,15 +2042,15 @@ double ggml_type_sizef(enum ggml_type type) {
|
|||
return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
|
||||
}
|
||||
|
||||
const char * ggml_type_name(enum ggml_type type) {
|
||||
GGML_CALL const char * ggml_type_name(enum ggml_type type) {
|
||||
return type_traits[type].type_name;
|
||||
}
|
||||
|
||||
bool ggml_is_quantized(enum ggml_type type) {
|
||||
GGML_CALL bool ggml_is_quantized(enum ggml_type type) {
|
||||
return type_traits[type].is_quantized;
|
||||
}
|
||||
|
||||
const char * ggml_op_name(enum ggml_op op) {
|
||||
GGML_CALL const char * ggml_op_name(enum ggml_op op) {
|
||||
return GGML_OP_NAME[op];
|
||||
}
|
||||
|
||||
|
@ -2062,7 +2062,7 @@ const char * ggml_unary_op_name(enum ggml_unary_op op) {
|
|||
return GGML_UNARY_OP_NAME[op];
|
||||
}
|
||||
|
||||
const char * ggml_op_desc(const struct ggml_tensor * t) {
|
||||
GGML_CALL const char * ggml_op_desc(const struct ggml_tensor * t) {
|
||||
if (t->op == GGML_OP_UNARY) {
|
||||
enum ggml_unary_op uop = ggml_get_unary_op(t);
|
||||
return ggml_unary_op_name(uop);
|
||||
|
@ -2072,7 +2072,7 @@ const char * ggml_op_desc(const struct ggml_tensor * t) {
|
|||
}
|
||||
}
|
||||
|
||||
size_t ggml_element_size(const struct ggml_tensor * tensor) {
|
||||
GGML_CALL size_t ggml_element_size(const struct ggml_tensor * tensor) {
|
||||
return ggml_type_size(tensor->type);
|
||||
}
|
||||
|
||||
|
@ -2154,11 +2154,11 @@ size_t ggml_tensor_overhead(void) {
|
|||
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
|
||||
}
|
||||
|
||||
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
|
||||
GGML_CALL bool ggml_is_transposed(const struct ggml_tensor * tensor) {
|
||||
return tensor->nb[0] > tensor->nb[1];
|
||||
}
|
||||
|
||||
bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
|
||||
GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
|
||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||
|
||||
return
|
||||
|
@ -2177,7 +2177,7 @@ static inline bool ggml_is_contiguous_except_dim_1(const struct ggml_tensor * te
|
|||
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
|
||||
}
|
||||
|
||||
bool ggml_is_permuted(const struct ggml_tensor * tensor) {
|
||||
GGML_CALL bool ggml_is_permuted(const struct ggml_tensor * tensor) {
|
||||
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
|
||||
|
||||
return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3];
|
||||
|
@ -3079,7 +3079,7 @@ float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
|
|||
return (float *)(tensor->data);
|
||||
}
|
||||
|
||||
enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
|
||||
GGML_CALL enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
|
||||
GGML_ASSERT(tensor->op == GGML_OP_UNARY);
|
||||
return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
|
||||
}
|
||||
|
@ -11653,7 +11653,7 @@ static void ggml_rope_cache_init(
|
|||
}
|
||||
}
|
||||
|
||||
void ggml_rope_yarn_corr_dims(
|
||||
GGML_CALL void ggml_rope_yarn_corr_dims(
|
||||
int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]
|
||||
) {
|
||||
// start and end correction dims
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue