diff --git a/ggml-metal.m b/ggml-metal.m
index 836e6e42c..adcb3b0f0 100644
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -750,19 +750,18 @@ void ggml_metal_graph_compute(
                 id<MTLBuffer> id_dst  = dst  ? ggml_metal_get_buffer(ctx, dst,  &offs_dst)  : nil;
 
                 //GGML_METAL_LOG_INFO("%s: op - %s\n", __func__, ggml_op_name(dst->op));
-                if (src0) {
-                    GGML_METAL_LOG_INFO("%s: src0 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src0t), ne00, ne01, ne02,
-                            ggml_is_contiguous(src0), src0->name);
-                }
-                if (src1) {
-                    GGML_METAL_LOG_INFO("%s: src1 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src1t), ne10, ne11, ne12,
-                            ggml_is_contiguous(src1), src1->name);
-                }
-                if (dst) {
-                    GGML_METAL_LOG_INFO("%s: dst  - %4s [%5lld, %5lld, %5lld], 1, %s\n",  __func__, ggml_type_name(dstt),  ne0,  ne1,  ne2,
-                            dst->name);
-                }
-*/
+                //if (src0) {
+                //    GGML_METAL_LOG_INFO("%s: src0 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src0t), ne00, ne01, ne02,
+                //            ggml_is_contiguous(src0), src0->name);
+                //}
+                //if (src1) {
+                //    GGML_METAL_LOG_INFO("%s: src1 - %4s [%5lld, %5lld, %5lld], %d, %s\n", __func__, ggml_type_name(src1t), ne10, ne11, ne12,
+                //            ggml_is_contiguous(src1), src1->name);
+                //}
+                //if (dst) {
+                //    GGML_METAL_LOG_INFO("%s: dst  - %4s [%5lld, %5lld, %5lld], 1, %s\n",  __func__, ggml_type_name(dstt),  ne0,  ne1,  ne2,
+                //            dst->name);
+                //}
 
                 switch (dst->op) {
                     case GGML_OP_NONE:
@@ -775,8 +774,6 @@ void ggml_metal_graph_compute(
                         } break;
                     case GGML_OP_CONCAT:
                         {
-                            GGML_ASSERT(ggml_is_contiguous(src0));
-                            GGML_ASSERT(ggml_is_contiguous(src1));
 
                             int64_t nb = ne00;
                             [encoder setComputePipelineState:ctx->pipeline_concat];
@@ -951,6 +948,8 @@ void ggml_metal_graph_compute(
                         } break;
                     case GGML_OP_SQR:
                         {
+                            GGML_ASSERT(ggml_is_contiguous(src0));
+
                             [encoder setComputePipelineState:ctx->pipeline_sqr];
                             [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
                             [encoder setBuffer:id_dst offset:offs_dst atIndex:1];
diff --git a/llama.cpp b/llama.cpp
index db2865219..0f276e7f7 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -3037,11 +3037,9 @@ static struct ggml_cgraph * llm_build_baichaun(
     struct ggml_init_params params = {
         /*.mem_size   =*/ buf_compute.size,
         /*.mem_buffer =*/ buf_compute.data,
-        /*.no_alloc   =*/ false,
+        /*.no_alloc   =*/ true,
     };
 
-    params.no_alloc = true;
-
     struct ggml_context * ctx0 = ggml_init(params);
 
     ggml_cgraph * gf = ggml_new_graph(ctx0);
@@ -3445,11 +3443,9 @@ static struct ggml_cgraph * llm_build_falcon(
     struct ggml_init_params params = {
         /*.mem_size   =*/ buf_compute.size,
         /*.mem_buffer =*/ buf_compute.data,
-        /*.no_alloc   =*/ false,
+        /*.no_alloc   =*/ true,
     };
 
-    params.no_alloc = true;
-
     struct ggml_context * ctx0 = ggml_init(params);
 
     ggml_cgraph * gf = ggml_new_graph(ctx0);
@@ -3805,11 +3801,9 @@ static struct ggml_cgraph * llm_build_starcoder(
     struct ggml_init_params params = {
         /*.mem_size   =*/ buf_compute.size,
         /*.mem_buffer =*/ buf_compute.data,
-        /*.no_alloc   =*/ false,
+        /*.no_alloc   =*/ true,
     };
 
-    params.no_alloc = true;
-
     struct ggml_context * ctx0 = ggml_init(params);
 
     ggml_cgraph * gf = ggml_new_graph(ctx0);