From cce6e67f44b946b355ac9c4dc0c4762d491ccdb5 Mon Sep 17 00:00:00 2001
From: Concedo <39025047+LostRuins@users.noreply.github.com>
Date: Tue, 20 Jun 2023 22:45:16 +0800
Subject: [PATCH] fixing address spaces

---
 ggml-opencl.cpp | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/ggml-opencl.cpp b/ggml-opencl.cpp
index 35019819f..28fe6d15b 100644
--- a/ggml-opencl.cpp
+++ b/ggml-opencl.cpp
@@ -362,13 +362,13 @@ __kernel void dequantize_mul_mat_vec_q2_K(__global const struct block_q2_K * xx,
 
     for (int i = ix; i < num_blocks_per_row; i += K_QUANTS_PER_ITERATION) {
 
-        const float   * y = yy + i * QK_K + y_offset;
-        const uint8_t * q = x[i].qs + q_offset;
+        __global const float   * y = yy + i * QK_K + y_offset;
+        __global const uint8_t * q = x[i].qs + q_offset;
 
         const float dall = vload_half(0, &x[i].d);
         const float dmin = vload_half(0, &x[i].dmin);
 
-        const uint32_t * a = (const uint32_t *)(x[i].scales + s_offset);
+        __global const uint32_t * a = (__global const uint32_t *)(x[i].scales + s_offset);
         aux[0] = a[0] & 0x0f0f0f0f;
         aux[1] = a[1] & 0x0f0f0f0f;
         aux[2] = (a[0] >> 4) & 0x0f0f0f0f;
@@ -439,11 +439,11 @@ __kernel void dequantize_mul_mat_vec_q3_K(__global const struct block_q3_K * xx,
 
     for (int i = ix; i < num_blocks_per_row; i += K_QUANTS_PER_ITERATION) {
 
-        const float   * y  = yy + i * QK_K + y_offset;
-        const uint8_t * q = x[i].qs + q_offset;
-        const uint8_t * h = x[i].hmask + l0;
+        __global const float   * y  = yy + i * QK_K + y_offset;
+        __global const uint8_t * q = x[i].qs + q_offset;
+        __global const uint8_t * h = x[i].hmask + l0;
 
-        const uint16_t * a = (const uint16_t *)x[i].scales;
+        __global const uint16_t * a = (__global const uint16_t *)x[i].scales;
         utmp[0] = ((a[0] >> s_shift) & kmask2) | (((a[4] >> (s_shift + 0)) & kmask1) << 4);
         utmp[1] = ((a[1] >> s_shift) & kmask2) | (((a[5] >> (s_shift + 0)) & kmask1) << 4);
         utmp[2] = ((a[2] >> s_shift) & kmask2) | (((a[4] >> (s_shift + 2)) & kmask1) << 4);
@@ -515,15 +515,15 @@ __kernel void dequantize_mul_mat_vec_q4_K(__global const struct block_q4_K * xx,
 
     for (int i = ix; i < num_blocks_per_row; i += K_QUANTS_PER_ITERATION) {
 
-        const uint8_t * q1 = x[i].qs + q_offset;
-        const uint8_t * q2 = q1 + 64;
-        const float   * y1 = yy + i*QK_K + y_offset;
-        const float   * y2 = y1 + 128;
+        __global const uint8_t * q1 = x[i].qs + q_offset;
+        __global const uint8_t * q2 = q1 + 64;
+        __global const float   * y1 = yy + i*QK_K + y_offset;
+        __global const float   * y2 = y1 + 128;
 
         const float dall = vload_half(0, &x[i].d);
         const float dmin = vload_half(0, &x[i].dmin);
 
-        const uint16_t * a = (const uint16_t *)x[i].scales;
+        __global const uint16_t * a = (__global const uint16_t *)x[i].scales;
         aux[0] = a[im+0] & kmask1;
         aux[1] = a[im+2] & kmask1;
         aux[2] = ((a[im+4] >> 0) & kmask2) | ((a[im+0] & kmask3) >> 2);
@@ -589,16 +589,16 @@ __kernel void dequantize_mul_mat_vec_q5_K(__global const struct block_q5_K * xx,
 
     for (int i = ix; i < num_blocks_per_row; i += 2) {
 
-        const uint8_t * ql1 = x[i].qs + q_offset;
-        const uint8_t * ql2 = ql1 + 64;
-        const uint8_t * qh  = x[i].qh + l0;
-        const float   * y1  = yy + i*QK_K + y_offset;
-        const float   * y2  = y1 + 128;
+        __global const uint8_t * ql1 = x[i].qs + q_offset;
+        __global const uint8_t * ql2 = ql1 + 64;
+        __global const uint8_t * qh  = x[i].qh + l0;
+        __global const float   * y1  = yy + i*QK_K + y_offset;
+        __global const float   * y2  = y1 + 128;
 
         const float dall = vload_half(0, &x[i].d);
         const float dmin = vload_half(0, &x[i].dmin);
 
-        const uint16_t * a = (const uint16_t *)x[i].scales;
+        __global const uint16_t * a = (__global const uint16_t *)x[i].scales;
         aux[0] = a[im+0] & kmask1;
         aux[1] = a[im+2] & kmask1;
         aux[2] = ((a[im+4] >> 0) & kmask2) | ((a[im+0] & kmask3) >> 2);
@@ -668,10 +668,10 @@ __kernel void dequantize_mul_mat_vec_q6_K(__global const struct block_q6_K * xx,
 
     for (int i = ix; i < num_blocks_per_row; i += K_QUANTS_PER_ITERATION) {
 
-        const float   * y  = yy + i * QK_K + y_offset;
-        const uint8_t * ql = x[i].ql + ql_offset;
-        const uint8_t * qh = x[i].qh + qh_offset;
-        const int8_t  * s  = x[i].scales + s_offset;
+        __global const float   * y  = yy + i * QK_K + y_offset;
+        __global const uint8_t * ql = x[i].ql + ql_offset;
+        __global const uint8_t * qh = x[i].qh + qh_offset;
+        __global const int8_t  * s  = x[i].scales + s_offset;
 
         const float d = vload_half(0, &x[i].d);