From 61726bd9421e2b9c2720cd1349d2d4119b151eaa Mon Sep 17 00:00:00 2001
From: Howard Su <howard0su@gmail.com>
Date: Mon, 12 Jun 2023 20:19:26 +0800
Subject: [PATCH] Add assert to make sure we only allocate temp buffer for
 non-CPU backend tensor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
---
 llama.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llama.cpp b/llama.cpp
index 006620965..a9a7794ae 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -762,6 +762,7 @@ struct llama_model_loader {
 
             // allocate temp buffer if not using mmap
             if (!use_mmap && lt.data == NULL) {
+                GGML_ASSERT(lt.ggml_tensor->backend != GGML_BACKEND_CPU);
                 lt.data = (uint8_t*)malloc(ggml_nbytes(lt.ggml_tensor));
             }