OpenCL Token Generation Acceleration (#1459)
* Move back to C++ for OpenCL * Refactor OpenCL code to work more like the CUDA code, add missing functions * Deduplicate dequant kernels * Add OpenCL compile options * Use compile args for preprocessing constants * Restore default platform + device selection by id behavior --------- Co-authored-by: Johannes Gäßler <johannesg@5d6.de> Co-authored-by: Henri Vasserman <henv@hot.ee>
This commit is contained in:
parent
7e4ea5beff
commit
2e6cd4b025
8 changed files with 1113 additions and 536 deletions
|
@ -1,23 +1,21 @@
|
|||
#pragma once
|
||||
|
||||
#include "ggml.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void ggml_cl_init(void);
|
||||
|
||||
enum ggml_blas_order {
|
||||
GGML_BLAS_ORDER_ROW_MAJOR = 101,
|
||||
GGML_BLAS_ORDER_COLUMN_MAJOR = 102,
|
||||
};
|
||||
bool ggml_cl_can_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
||||
size_t ggml_cl_mul_mat_get_wsize(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst);
|
||||
void ggml_cl_mul_mat(const struct ggml_tensor * src0, const struct ggml_tensor * src1, struct ggml_tensor * dst, void * wdata, size_t wsize);
|
||||
|
||||
enum ggml_blas_op {
|
||||
GGML_BLAS_OP_N = 111,
|
||||
GGML_BLAS_OP_T = 112,
|
||||
GGML_BLAS_OP_C = 113,
|
||||
};
|
||||
void * ggml_cl_host_malloc(size_t size);
|
||||
void ggml_cl_host_free(void * ptr);
|
||||
|
||||
void ggml_cl_sgemm_wrapper(const enum ggml_blas_order order, const enum ggml_blas_op trans_a, const enum ggml_blas_op trans_b, const int m, const int n, const int k, const float alpha, const void *host_a, const int lda, const float *host_b, const int ldb, const float beta, float *host_c, const int ldc, const int btype);
|
||||
void ggml_cl_transform_tensor(struct ggml_tensor * tensor);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue