Dequant improvements rebase (#8255)

* Single load for half2

* Store scales in local mem

* Vec load quantized values
This commit is contained in:
AidanBeltonS 2024-07-03 02:55:34 +01:00 committed by GitHub
parent a27152b602
commit fadde67135
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 30 additions and 13 deletions

View file

@ -351,4 +351,10 @@ static __dpct_inline__ float warp_reduce_max(float x,
return x;
}
// Helper for vec loading aligned data
template <typename Tp, int n>
inline sycl::vec<Tp, n> vec_aligned_load(const Tp* aligned_ptr) {
return *reinterpret_cast<const sycl::vec<Tp, n>*>(aligned_ptr);
}
#endif // GGML_SYCL_COMMON_HPP