ggml : refactor online repacking (#10446)
* rename ggml-cpu-aarch64.c to .cpp * reformat extra cpu backend. - clean Q4_0_N_M and IQ4_0_N_M - remove from "file" tensor type - allow only with dynamic repack - extract cpu extra bufts and convert to C++ - hbm - "aarch64" - more generic use of extra buffer - generalise extra_supports_op - new API for "cpu-accel": - amx - aarch64 * clang-format * Clean Q4_0_N_M ref Enable restrict on C++ * add op GGML_OP_MUL_MAT_ID for Q4_0_N_M with runtime repack * added/corrected control on tensor size for Q4 repacking. * Update ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Update ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * add debug logs on repacks. --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
parent
c2a16c0bdb
commit
19d8762ab6
33 changed files with 1136 additions and 1049 deletions
|
@ -384,15 +384,15 @@ extern "C" {
|
|||
GGML_TYPE_F64 = 28,
|
||||
GGML_TYPE_IQ1_M = 29,
|
||||
GGML_TYPE_BF16 = 30,
|
||||
GGML_TYPE_Q4_0_4_4 = 31,
|
||||
GGML_TYPE_Q4_0_4_8 = 32,
|
||||
GGML_TYPE_Q4_0_8_8 = 33,
|
||||
// GGML_TYPE_Q4_0_4_4 = 31, support has been removed from gguf files
|
||||
// GGML_TYPE_Q4_0_4_8 = 32,
|
||||
// GGML_TYPE_Q4_0_8_8 = 33,
|
||||
GGML_TYPE_TQ1_0 = 34,
|
||||
GGML_TYPE_TQ2_0 = 35,
|
||||
GGML_TYPE_IQ4_NL_4_4 = 36,
|
||||
// GGML_TYPE_IQ4_NL_4_4 = 36,
|
||||
// GGML_TYPE_IQ4_NL_4_8 = 37,
|
||||
// GGML_TYPE_IQ4_NL_8_8 = 38,
|
||||
GGML_TYPE_COUNT,
|
||||
GGML_TYPE_COUNT = 39,
|
||||
};
|
||||
|
||||
// precision
|
||||
|
@ -433,9 +433,6 @@ extern "C" {
|
|||
GGML_FTYPE_MOSTLY_IQ4_XS = 22, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_IQ1_M = 23, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_BF16 = 24, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q4_0_4_4 = 25, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q4_0_4_8 = 26, // except 1d tensors
|
||||
GGML_FTYPE_MOSTLY_Q4_0_8_8 = 27, // except 1d tensors
|
||||
};
|
||||
|
||||
// available tensor operations:
|
||||
|
@ -2205,11 +2202,19 @@ extern "C" {
|
|||
GGML_API size_t gguf_get_meta_size(const struct gguf_context * ctx);
|
||||
GGML_API void gguf_get_meta_data(const struct gguf_context * ctx, void * data);
|
||||
|
||||
#ifdef __cplusplus
|
||||
// restrict not standard in C++
|
||||
#define GGML_RESTRICT
|
||||
#ifdef __cplusplus
|
||||
// restrict not standard in C++
|
||||
# if defined(__GNUC__)
|
||||
# define GGML_RESTRICT __restrict__
|
||||
# elif defined(__clang__)
|
||||
# define GGML_RESTRICT __restrict
|
||||
# elif defined(_MSC_VER)
|
||||
# define GGML_RESTRICT __restrict
|
||||
# else
|
||||
# define GGML_RESTRICT
|
||||
# endif
|
||||
#else
|
||||
#define GGML_RESTRICT restrict
|
||||
# define GGML_RESTRICT restrict
|
||||
#endif
|
||||
typedef void (*ggml_to_float_t) (const void * GGML_RESTRICT x, float * GGML_RESTRICT y, int64_t k);
|
||||
typedef void (*ggml_from_float_t)(const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue