Formatting changes

2023-10-02 10:26:39 -07:00 · 2023-10-02 10:26:39 -07:00 · cd4d3df820
commit cd4d3df820
parent e6bf87f785
3 changed files with 9 additions and 11 deletions
--- a/ggml-metal.m
+++ b/ggml-metal.m
@ -870,8 +870,8 @@ void ggml_metal_graph_compute(
                        } break;
                    case GGML_OP_MUL:
                        {
-                            GGML_ASSERT(ggml_is_contiguous(src1));
                            GGML_ASSERT(ggml_is_contiguous(src0));
+                            GGML_ASSERT(ggml_is_contiguous(src1));

                            // utilize float4
                            GGML_ASSERT(ne00 % 4 == 0);
@ -952,9 +952,8 @@ void ggml_metal_graph_compute(

                            [encoder setComputePipelineState:ctx->pipeline_sqr];
                            [encoder setBuffer:id_src0 offset:offs_src0 atIndex:0];
-                            [encoder setBuffer:id_dst offset:offs_dst atIndex:1];
+                            [encoder setBuffer:id_dst  offset:offs_dst atIndex:1];

-                            //GGML_METAL_LOG_WARN("%s: node %3d, op = %8s dispatching \n", __func__, i, ggml_op_name(dst->op));
                            const int64_t n = ggml_nelements(dst);
                            [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)];
                        } break;
--- a/gguf-py/gguf/gguf.py
+++ b/gguf-py/gguf/gguf.py
@ -85,7 +85,7 @@ class MODEL_ARCH(IntEnum):
    GPTNEOX       : int = auto()
    MPT           : int = auto()
    STARCODER     : int = auto()
-    PERSIMMON         : int = auto()
+    PERSIMMON     : int = auto()


 class MODEL_TENSOR(IntEnum):
@ -190,7 +190,7 @@ MODEL_TENSOR_NAMES: dict[MODEL_ARCH, dict[MODEL_TENSOR, str]] = {
        MODEL_TENSOR.FFN_UP:        "blk.{bid}.ffn_up",
    },
    MODEL_ARCH.PERSIMMON: {
-        MODEL_TENSOR.TOKEN_EMBD:   "token_embd",
+        MODEL_TENSOR.TOKEN_EMBD:    "token_embd",
        MODEL_TENSOR.OUTPUT:        "output",
        MODEL_TENSOR.OUTPUT_NORM:   "output_norm",
        MODEL_TENSOR.ATTN_NORM:     "blk.{bid}.attn_norm",
--- a/llama.cpp
+++ b/llama.cpp
@ -2473,7 +2473,7 @@ static void llm_load_tensors(
                } break;
            case LLM_ARCH_PERSIMMON:
                { 
-                    model.tok_embeddings = ml.create_tensor(ctx, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, GGML_BACKEND_CPU);
+                    model.tok_embeddings = ml.create_tensor(ctx, tn(LLM_TENSOR_TOKEN_EMBD, "weight"),  {n_embd, n_vocab}, GGML_BACKEND_CPU);
                    model.output_norm    = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd},          GGML_BACKEND_CPU);
                    model.output_norm_b  = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT_NORM, "bias"),   {n_embd},          GGML_BACKEND_CPU);
                    model.output         = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT,      "weight"), {n_embd, n_vocab}, GGML_BACKEND_CPU);
@ -2612,8 +2612,8 @@ static bool llama_model_load(
 }

 static struct ggml_cgraph * llm_build_llama(
-        llama_context & lctx,
-        const llama_batch & batch) {
+    llama_context & lctx,
+    const llama_batch & batch) {
    const auto & model   = lctx.model;
    const auto & hparams = model.hparams;
    const auto & cparams = lctx.cparams;
@ -4016,9 +4016,8 @@ static struct ggml_cgraph * llm_build_starcoder(


 static struct ggml_cgraph * llm_build_persimmon(
-    llama_context & lctx,
-    const llama_batch & batch
-) {
+         llama_context & lctx,
+     const llama_batch & batch) {
    const auto & model = lctx.model;
    const auto & hparams = model.hparams;