imatrix : offload to GPU support (#4957)
* backend : add eval callback ggml-ci * backend : group nodes in a single compute when user don't need them * backend : clean-up the implementation ggml-ci * simple : do not perform tensor data copy if not needed * simple : fix * imatrix : offload to GPU support * imatrix : fix ggml_mul_mat_id hanlding ggml-ci * ci : add imatrix test ggml-ci * ci : rearrange output ggml-ci
This commit is contained in:
parent
44a1a4a41a
commit
ba69bbc84c
4 changed files with 129 additions and 54 deletions
6
ggml.h
6
ggml.h
|
@ -2085,12 +2085,6 @@ extern "C" {
|
|||
GGML_API void ggml_init_iq2_quantization(enum ggml_type type);
|
||||
GGML_API void ggml_deinit_iq2_quantization(enum ggml_type type);
|
||||
|
||||
//
|
||||
// Importance matrix
|
||||
//
|
||||
typedef void(*ggml_collect_imatrix_t)(const struct ggml_tensor * src0, const struct ggml_tensor * src1);
|
||||
GGML_API void ggml_set_imatrix_collection(ggml_collect_imatrix_t imatrix_collect);
|
||||
|
||||
//
|
||||
// gguf
|
||||
//
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue