diff --git a/ggml/src/ggml-opencl2/CMakeLists.txt b/ggml/src/ggml-opencl2/CMakeLists.txt
index 6f0db48f6..6965cf026 100644
--- a/ggml/src/ggml-opencl2/CMakeLists.txt
+++ b/ggml/src/ggml-opencl2/CMakeLists.txt
@@ -26,12 +26,6 @@ if (OpenCL_FOUND)
 
     add_compile_definitions(GGML_OPENCL_SOA_Q)
 
-    if (GGML_OPENCL_SMALL_ALLOC)
-        message(STATUS "OpenCL will allocate a separate buffer for each tensor. "
-            "The default behavior allocates a large buffer to hold multiple tensors.")
-        add_compile_definitions(GGML_OPENCL_SMALL_ALLOC)
-    endif ()
-
     if (GGML_OPENCL_USE_ADRENO_KERNELS)
         message(STATUS "OpenCL will use matmul kernels optimized for Adreno")
         add_compile_definitions(GGML_OPENCL_USE_ADRENO_KERNELS)
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 2c5e84681..2d3ea0994 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -17,15 +17,6 @@ add_library(llama
             unicode-data.cpp
             )
 
-# TODO: This is intrusive. We intend to remove SMALL_ALLOC path once the we fully
-# migrate to the non SMALL_ALLOC path.
-if (GGML_OPENCL)
-    add_compile_definitions(GGML_USE_OPENCL)
-    if (GGML_OPENCL_SMALL_ALLOC)
-        add_compile_definitions(GGML_OPENCL_SMALL_ALLOC)
-    endif ()
-endif ()
-
 target_include_directories(llama PUBLIC . ../include)
 target_compile_features   (llama PUBLIC cxx_std_17) # don't bump
 
diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp
index 0770e7cd6..9dd41260a 100644
--- a/tests/test-backend-ops.cpp
+++ b/tests/test-backend-ops.cpp
@@ -465,6 +465,7 @@ struct test_case {
         // post-graph sentinel
         add_sentinel(ctx);
 
+        // allocate
         ggml_backend_buffer_t buf = ggml_backend_alloc_ctx_tensors(ctx, backend1);
         if (buf == NULL) {
             printf("failed to allocate tensors [%s] ", ggml_backend_name(backend1));