Improve cuBLAS performance by dequantizing on the GPU (#1065)
This commit is contained in:
parent
834695fe3a
commit
02d6988121
5 changed files with 221 additions and 41 deletions
11
ggml-cuda.h
Normal file
11
ggml-cuda.h
Normal file
|
@ -0,0 +1,11 @@
|
|||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void dequantize_row_q4_0_cuda(const void * vx, float * y, int k, cudaStream_t stream);
|
||||
void dequantize_row_q4_1_cuda(const void * vx, float * y, int k, cudaStream_t stream);
|
||||
void dequantize_row_q4_2_cuda(const void * vx, float * y, int k, cudaStream_t stream);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue