switch to upstream implementation of pool malloc

2023-08-07 15:16:37 +08:00 · 2023-08-07 15:16:37 +08:00 · 9f16a4c4ef
commit 9f16a4c4ef
parent 6659652c9f
1 changed files with 36 additions and 28 deletions
--- a/ggml-cuda.cu
+++ b/ggml-cuda.cu
@ -4293,45 +4293,53 @@ static void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size) {
    scoped_spin_lock lock(g_cuda_pool_lock);
    int id;
    CUDA_CHECK(cudaGetDevice(&id));
-
+#ifdef DEBUG_CUDA_MALLOC
-    int best_i = -1;
+    int nnz = 0;
-    size_t best_size = std::numeric_limits<size_t>::max(); //smallest unused buffer that fits our needs
+    size_t max_size = 0, tot_size = 0;
-    int worst_i = -1;
+#endif
-    size_t worst_size = 0; //largest unused buffer seen so far
+    size_t best_diff = 1ull << 36;
-
+    int ibest = -1;
    for (int i = 0; i < MAX_CUDA_BUFFERS; ++i) {
        cuda_buffer& b = g_cuda_buffer_pool[id][i];
-        if (b.size > 0 && b.size >= size && b.size < best_size)
+        if (b.ptr != nullptr) {
-        {
+#ifdef DEBUG_CUDA_MALLOC
-            best_i = i;
+            ++nnz;
-            best_size = b.size;
+            tot_size += b.size;
-        }
+            if (b.size > max_size) max_size = b.size;
-        if (b.size > 0 && b.size > worst_size)
+#endif
-        {
+            if (b.size >= size) {
-            worst_i = i;
+                size_t diff = b.size - size;
-            worst_size = b.size;
+                if (diff < best_diff) {
                    best_diff = diff;
                    ibest = i;
                    if (!best_diff) {
                        void * ptr = b.ptr;
                        *actual_size = b.size;
                        b.ptr = nullptr;
                        b.size = 0;
                        return ptr;
                    }
                }
            }
        }
    }
-    if(best_i!=-1) //found the smallest buffer that fits our needs
+    if (ibest >= 0) {
-    {
+        cuda_buffer& b = g_cuda_buffer_pool[id][ibest];
        cuda_buffer& b = g_cuda_buffer_pool[id][best_i];
        void * ptr = b.ptr;
        *actual_size = b.size;
        b.ptr = nullptr;
        b.size = 0;
        return ptr;
    }
-    if(worst_i!=-1) //no buffer that fits our needs, resize largest one to save memory
+#ifdef DEBUG_CUDA_MALLOC
-    {
+    fprintf(stderr, "%s: %d buffers, max_size = %u MB, tot_size = %u MB, requested %u MB\n", __func__, nnz,
-        cuda_buffer& b = g_cuda_buffer_pool[id][worst_i];
+            (uint32_t)(max_size/1024/1024), (uint32_t)(tot_size/1024/1024), (uint32_t)(size/1024/1024));
-        b.size = 0;
+#endif
        void * ptr = b.ptr;
        cudaFree(ptr);
        b.ptr = ptr = nullptr;
    }
    void * ptr;
-    CUDA_CHECK(cudaMalloc((void **) &ptr, size));
+    size_t look_ahead_size = (size_t) (1.05 * size);
-    *actual_size = size;
+    look_ahead_size = 256 * ((look_ahead_size + 255)/256);
    CUDA_CHECK(cudaMalloc((void **) &ptr, look_ahead_size));
    *actual_size = look_ahead_size;
    return ptr;
 }