diff --git a/llama.cpp b/llama.cpp
index 006620965..a9a7794ae 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -762,6 +762,7 @@ struct llama_model_loader {
 
             // allocate temp buffer if not using mmap
             if (!use_mmap && lt.data == NULL) {
+                GGML_ASSERT(lt.ggml_tensor->backend != GGML_BACKEND_CPU);
                 lt.data = (uint8_t*)malloc(ggml_nbytes(lt.ggml_tensor));
             }