From cd4d3df8207c16bc3f44d0688ffce8fd89016957 Mon Sep 17 00:00:00 2001 From: Phillip Kravtsov Date: Mon, 2 Oct 2023 10:26:39 -0700 Subject: [PATCH] Formatting changes --- ggml-metal.m | 5 ++--- gguf-py/gguf/gguf.py | 4 ++-- llama.cpp | 11 +++++------ 3 files changed, 9 insertions(+), 11 deletions(-) diff --git a/ggml-metal.m b/ggml-metal.m index adcb3b0f0..9f754fe62 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -870,8 +870,8 @@ void ggml_metal_graph_compute( } break; case GGML_OP_MUL: { - GGML_ASSERT(ggml_is_contiguous(src1)); GGML_ASSERT(ggml_is_contiguous(src0)); + GGML_ASSERT(ggml_is_contiguous(src1)); // utilize float4 GGML_ASSERT(ne00 % 4 == 0); @@ -952,9 +952,8 @@ void ggml_metal_graph_compute( [encoder setComputePipelineState:ctx->pipeline_sqr]; [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0]; - [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; + [encoder setBuffer:id_dst offset:offs_dst atIndex:1]; - //GGML_METAL_LOG_WARN("%s: node %3d, op = %8s dispatching \n", __func__, i, ggml_op_name(dst->op)); const int64_t n = ggml_nelements(dst); [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; } break; diff --git a/gguf-py/gguf/gguf.py b/gguf-py/gguf/gguf.py index 882b96bc6..56b309125 100644 --- a/gguf-py/gguf/gguf.py +++ b/gguf-py/gguf/gguf.py @@ -85,7 +85,7 @@ class MODEL_ARCH(IntEnum): GPTNEOX : int = auto() MPT : int = auto() STARCODER : int = auto() - PERSIMMON : int = auto() + PERSIMMON : int = auto() class MODEL_TENSOR(IntEnum): @@ -190,7 +190,7 @@ MODEL_TENSOR_NAMES: dict[MODEL_ARCH, dict[MODEL_TENSOR, str]] = { MODEL_TENSOR.FFN_UP: "blk.{bid}.ffn_up", }, MODEL_ARCH.PERSIMMON: { - MODEL_TENSOR.TOKEN_EMBD: "token_embd", + MODEL_TENSOR.TOKEN_EMBD: "token_embd", MODEL_TENSOR.OUTPUT: "output", MODEL_TENSOR.OUTPUT_NORM: "output_norm", MODEL_TENSOR.ATTN_NORM: "blk.{bid}.attn_norm", diff --git a/llama.cpp b/llama.cpp index 0f276e7f7..c2d10d597 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2473,7 +2473,7 @@ static void llm_load_tensors( } break; case LLM_ARCH_PERSIMMON: { - model.tok_embeddings = ml.create_tensor(ctx, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, GGML_BACKEND_CPU); + model.tok_embeddings = ml.create_tensor(ctx, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, GGML_BACKEND_CPU); model.output_norm = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, GGML_BACKEND_CPU); model.output_norm_b = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd}, GGML_BACKEND_CPU); model.output = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, GGML_BACKEND_CPU); @@ -2612,8 +2612,8 @@ static bool llama_model_load( } static struct ggml_cgraph * llm_build_llama( - llama_context & lctx, - const llama_batch & batch) { + llama_context & lctx, + const llama_batch & batch) { const auto & model = lctx.model; const auto & hparams = model.hparams; const auto & cparams = lctx.cparams; @@ -4016,9 +4016,8 @@ static struct ggml_cgraph * llm_build_starcoder( static struct ggml_cgraph * llm_build_persimmon( - llama_context & lctx, - const llama_batch & batch -) { + llama_context & lctx, + const llama_batch & batch) { const auto & model = lctx.model; const auto & hparams = model.hparams;