From 504a47abf89ca00cbdcdb8c9dd2d678a9c3c305a Mon Sep 17 00:00:00 2001 From: Aidan Date: Tue, 2 Jul 2024 12:38:17 +0100 Subject: [PATCH] Vec load quantized values --- ggml/src/ggml-sycl/common.hpp | 6 ++++++ ggml/src/ggml-sycl/dequantize.hpp | 7 +++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp index dfd4a7c2c..476d847ca 100644 --- a/ggml/src/ggml-sycl/common.hpp +++ b/ggml/src/ggml-sycl/common.hpp @@ -351,4 +351,10 @@ static __dpct_inline__ float warp_reduce_max(float x, return x; } +// Helper for vec loading aligned data +template +inline sycl::vec vec_aligned_load(const Tp* aligned_ptr) { + return *reinterpret_cast*>(aligned_ptr); +} + #endif // GGML_SYCL_COMMON_HPP diff --git a/ggml/src/ggml-sycl/dequantize.hpp b/ggml/src/ggml-sycl/dequantize.hpp index d8a9f954c..ed8ad098b 100644 --- a/ggml/src/ggml-sycl/dequantize.hpp +++ b/ggml/src/ggml-sycl/dequantize.hpp @@ -327,8 +327,6 @@ static void dequantize_block_q4_K(const void * __restrict__ vx, dst_t * __restri scales_local[tid] = x[i].scales[tid]; item_ct1.barrier(sycl::access::fence_space::local_space); - const uint8_t * q = x[i].qs + 32*il + n*ir; - uint8_t sc, m; get_scale_min_k4(is + 0, scales_local, sc, m); const float d1 = dall * sc; @@ -337,9 +335,10 @@ static void dequantize_block_q4_K(const void * __restrict__ vx, dst_t * __restri const float d2 = dall * sc; const float m2 = dmin * m; + sycl::vec q_vec = vec_aligned_load(x[i].qs + 32*il + n*ir); for (int l = 0; l < n; ++l) { - y[l + 0] = d1 * (q[l] & 0xF) - m1; - y[l +32] = d2 * (q[l] >> 4) - m2; + y[l + 0] = d1 * (q_vec[l] & 0xF) - m1; + y[l +32] = d2 * (q_vec[l] >> 4) - m2; } #else const int tid = item_ct1.get_local_id(2);