Merge branch 'concedo-opencl-dev' into concedo_experimental

# Conflicts: # ggml-opencl.cpp
2023-06-04 18:07:27 +08:00 · 2023-06-04 18:07:27 +08:00 · 1ddbb9acd9
commit 1ddbb9acd9
parent dd4b5c64b8 64e3e74556
1 changed files with 3 additions and 1 deletions
--- a/ggml-opencl.cpp
+++ b/ggml-opencl.cpp
@ -4,6 +4,7 @@
 #include <atomic>
 #include <sstream>
 #include <vector>
+#include <limits>

 #define CL_TARGET_OPENCL_VERSION 110
 #include <clblast.h>
@ -616,7 +617,7 @@ static cl_mem ggml_cl_pool_malloc(size_t size, size_t * actual_size, cl_mem_flag
    scoped_spin_lock lock(g_cl_pool_lock);
    cl_int err;

-    int best_i = -1, best_size = (size_t)-1; //smallest unused buffer that fits our needs
+    int best_i = -1, best_size = std::numeric_limits<size_t>::max(); //smallest unused buffer that fits our needs
    int worst_i = -1, worst_size = 0; //largest unused buffer seen so far
    for (int i = 0; i < MAX_CL_BUFFERS; ++i) {
        cl_buffer &b = g_cl_buffer_pool[i];
@ -725,6 +726,7 @@ static void ggml_cl_mul_f32(const ggml_tensor * src0, const ggml_tensor * src1,
    cl_mem d_Y = (cl_mem) src1->data; // src1 is already on device, broadcasted.
    cl_mem d_D = ggml_cl_pool_malloc(ne0 * sizeof(float), &d_size, CL_MEM_READ_WRITE); // dst

+
    for (int64_t i03 = 0; i03 < ne03; i03++) {
        for (int64_t i02 = 0; i02 < ne02; i02++) {
            const int i0 = i03*ne02 + i02;