Merge branch 'master' into gg/imatrix-gpu-4931

ggml-ci
This commit is contained in:
Georgi Gerganov 2024-01-17 18:41:47 +02:00
commit 2917e6b528
No known key found for this signature in database
GPG key ID: 449E073F9DC10735
73 changed files with 4569 additions and 2234 deletions

60
ggml.c
View file

@ -1984,19 +1984,19 @@ void ggml_print_objects(const struct ggml_context * ctx) {
GGML_PRINT("%s: --- end ---\n", __func__);
}
int64_t ggml_nelements(const struct ggml_tensor * tensor) {
GGML_CALL int64_t ggml_nelements(const struct ggml_tensor * tensor) {
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
return tensor->ne[0]*tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
}
int64_t ggml_nrows(const struct ggml_tensor * tensor) {
GGML_CALL int64_t ggml_nrows(const struct ggml_tensor * tensor) {
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
return tensor->ne[1]*tensor->ne[2]*tensor->ne[3];
}
size_t ggml_nbytes(const struct ggml_tensor * tensor) {
GGML_CALL size_t ggml_nbytes(const struct ggml_tensor * tensor) {
size_t nbytes;
size_t blck_size = ggml_blck_size(tensor->type);
if (blck_size == 1) {
@ -2019,15 +2019,15 @@ size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) {
return GGML_PAD(ggml_nbytes(tensor), GGML_MEM_ALIGN);
}
int ggml_blck_size(enum ggml_type type) {
GGML_CALL int ggml_blck_size(enum ggml_type type) {
return type_traits[type].blck_size;
}
size_t ggml_type_size(enum ggml_type type) {
GGML_CALL size_t ggml_type_size(enum ggml_type type) {
return type_traits[type].type_size;
}
size_t ggml_row_size(enum ggml_type type, int64_t ne) {
GGML_CALL size_t ggml_row_size(enum ggml_type type, int64_t ne) {
assert(ne % ggml_blck_size(type) == 0);
return ggml_type_size(type)*ne/ggml_blck_size(type);
}
@ -2036,15 +2036,15 @@ double ggml_type_sizef(enum ggml_type type) {
return ((double)(type_traits[type].type_size))/type_traits[type].blck_size;
}
const char * ggml_type_name(enum ggml_type type) {
GGML_CALL const char * ggml_type_name(enum ggml_type type) {
return type_traits[type].type_name;
}
bool ggml_is_quantized(enum ggml_type type) {
GGML_CALL bool ggml_is_quantized(enum ggml_type type) {
return type_traits[type].is_quantized;
}
const char * ggml_op_name(enum ggml_op op) {
GGML_CALL const char * ggml_op_name(enum ggml_op op) {
return GGML_OP_NAME[op];
}
@ -2056,7 +2056,7 @@ const char * ggml_unary_op_name(enum ggml_unary_op op) {
return GGML_UNARY_OP_NAME[op];
}
const char * ggml_op_desc(const struct ggml_tensor * t) {
GGML_CALL const char * ggml_op_desc(const struct ggml_tensor * t) {
if (t->op == GGML_OP_UNARY) {
enum ggml_unary_op uop = ggml_get_unary_op(t);
return ggml_unary_op_name(uop);
@ -2066,7 +2066,7 @@ const char * ggml_op_desc(const struct ggml_tensor * t) {
}
}
size_t ggml_element_size(const struct ggml_tensor * tensor) {
GGML_CALL size_t ggml_element_size(const struct ggml_tensor * tensor) {
return ggml_type_size(tensor->type);
}
@ -2148,11 +2148,11 @@ size_t ggml_tensor_overhead(void) {
return GGML_OBJECT_SIZE + GGML_TENSOR_SIZE;
}
bool ggml_is_transposed(const struct ggml_tensor * tensor) {
GGML_CALL bool ggml_is_transposed(const struct ggml_tensor * tensor) {
return tensor->nb[0] > tensor->nb[1];
}
bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
GGML_CALL bool ggml_is_contiguous(const struct ggml_tensor * tensor) {
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
return
@ -2171,7 +2171,7 @@ static inline bool ggml_is_contiguous_except_dim_1(const struct ggml_tensor * te
tensor->nb[3] == tensor->nb[2]*tensor->ne[2];
}
bool ggml_is_permuted(const struct ggml_tensor * tensor) {
GGML_CALL bool ggml_is_permuted(const struct ggml_tensor * tensor) {
static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function");
return tensor->nb[0] > tensor->nb[1] || tensor->nb[1] > tensor->nb[2] || tensor->nb[2] > tensor->nb[3];
@ -3073,7 +3073,7 @@ float * ggml_get_data_f32(const struct ggml_tensor * tensor) {
return (float *)(tensor->data);
}
enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
GGML_CALL enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor) {
GGML_ASSERT(tensor->op == GGML_OP_UNARY);
return (enum ggml_unary_op) ggml_get_op_params_i32(tensor, 0);
}
@ -11639,7 +11639,7 @@ static void ggml_rope_cache_init(
}
}
void ggml_rope_yarn_corr_dims(
GGML_CALL void ggml_rope_yarn_corr_dims(
int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]
) {
// start and end correction dims
@ -18660,26 +18660,38 @@ size_t ggml_quantize_chunk(enum ggml_type type, const float * src, void * dst, i
case GGML_TYPE_Q4_0:
{
GGML_ASSERT(start % QK4_0 == 0);
block_q4_0 * block = (block_q4_0*)dst + start / QK4_0;
result = ggml_quantize_q4_0(src + start, block, n, n, hist);
GGML_ASSERT(start % n_per_row == 0);
size_t start_row = start / n_per_row;
size_t row_size = ggml_row_size(type, n_per_row);
result = quantize_q4_0(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
GGML_ASSERT(result == row_size * nrows);
} break;
case GGML_TYPE_Q4_1:
{
GGML_ASSERT(start % QK4_1 == 0);
block_q4_1 * block = (block_q4_1*)dst + start / QK4_1;
result = ggml_quantize_q4_1(src + start, block, n, n, hist);
GGML_ASSERT(start % n_per_row == 0);
size_t start_row = start / n_per_row;
size_t row_size = ggml_row_size(type, n_per_row);
result = quantize_q4_1(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
GGML_ASSERT(result == row_size * nrows);
} break;
case GGML_TYPE_Q5_0:
{
GGML_ASSERT(start % QK5_0 == 0);
block_q5_0 * block = (block_q5_0*)dst + start / QK5_0;
result = ggml_quantize_q5_0(src + start, block, n, n, hist);
GGML_ASSERT(start % n_per_row == 0);
size_t start_row = start / n_per_row;
size_t row_size = ggml_row_size(type, n_per_row);
result = quantize_q5_0(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
GGML_ASSERT(result == row_size * nrows);
} break;
case GGML_TYPE_Q5_1:
{
GGML_ASSERT(start % QK5_1 == 0);
block_q5_1 * block = (block_q5_1*)dst + start / QK5_1;
result = ggml_quantize_q5_1(src + start, block, n, n, hist);
GGML_ASSERT(start % n_per_row == 0);
size_t start_row = start / n_per_row;
size_t row_size = ggml_row_size(type, n_per_row);
result = quantize_q5_1(src + start, (char *)dst + start_row * row_size, nrows, n_per_row, hist, imatrix);
GGML_ASSERT(result == row_size * nrows);
} break;
case GGML_TYPE_Q8_0:
{