Improve cuBLAS performance by dequantizing on the GPU (#1065)

2023-04-20 03:14:14 +02:00 · 2023-04-20 03:14:14 +02:00 · 02d6988121
commit 02d6988121
parent 834695fe3a
5 changed files with 221 additions and 41 deletions
--- a/ggml-cuda.h
+++ b/ggml-cuda.h
@ -0,0 +1,11 @@
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+void dequantize_row_q4_0_cuda(const void * vx, float * y, int k, cudaStream_t stream);
+void dequantize_row_q4_1_cuda(const void * vx, float * y, int k, cudaStream_t stream);
+void dequantize_row_q4_2_cuda(const void * vx, float * y, int k, cudaStream_t stream);
+
+#ifdef  __cplusplus
+}
+#endif