Improve cuBLAS performance by dequantizing on the GPU (#1065)

This commit is contained in:
slaren 2023-04-20 03:14:14 +02:00 committed by GitHub
parent 834695fe3a
commit 02d6988121
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 221 additions and 41 deletions

11
ggml-cuda.h Normal file
View file

@ -0,0 +1,11 @@
#ifdef __cplusplus
extern "C" {
#endif
void dequantize_row_q4_0_cuda(const void * vx, float * y, int k, cudaStream_t stream);
void dequantize_row_q4_1_cuda(const void * vx, float * y, int k, cudaStream_t stream);
void dequantize_row_q4_2_cuda(const void * vx, float * y, int k, cudaStream_t stream);
#ifdef __cplusplus
}
#endif