From c90ed9f16b3c6551d3080a617a143d38909ae30d Mon Sep 17 00:00:00 2001 From: Phillip Kravtsov Date: Tue, 3 Oct 2023 13:18:23 -0700 Subject: [PATCH] Fix editorconfig formatting --- convert-persimmon-to-gguf.py | 6 +++--- ggml-metal.metal | 2 +- llama.cpp | 20 ++++++++++---------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/convert-persimmon-to-gguf.py b/convert-persimmon-to-gguf.py index 307610428..e022ffe46 100644 --- a/convert-persimmon-to-gguf.py +++ b/convert-persimmon-to-gguf.py @@ -24,7 +24,7 @@ def _flatten_dict(dct, tensors, prefix=None): def _get_sentencepiece_tokenizer_info(dir_model: Path): tokenizer_path = dir_model / 'adept_vocab.model' print('gguf: getting sentencepiece tokenizer from', tokenizer_path) - tokenizer = SentencePieceProcessor(str(tokenizer_path)) + tokenizer = SentencePieceProcessor(str(tokenizer_path)) print('gguf: adding tokens') tokens: list[bytes] = [] scores: list[float] = [] @@ -70,7 +70,7 @@ def main(): arch = gguf.MODEL_ARCH.PERSIMMON gguf_writer = gguf.GGUFWriter(args.outfile, gguf.MODEL_ARCH_NAMES[arch]) - + block_count = hparams.num_layers head_count = hparams.num_attention_heads head_count_kv = head_count @@ -127,4 +127,4 @@ def main(): if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/ggml-metal.metal b/ggml-metal.metal index b7038aa70..2ed1964fe 100644 --- a/ggml-metal.metal +++ b/ggml-metal.metal @@ -1129,7 +1129,7 @@ kernel void kernel_concat( uint3 tgpig[[threadgroup_position_in_grid]], uint3 tpitg[[thread_position_in_threadgroup]], uint3 ntg[[threads_per_threadgroup]]) { - + const int64_t i03 = tgpig.z; const int64_t i02 = tgpig.y; const int64_t i01 = tgpig.x; diff --git a/llama.cpp b/llama.cpp index 7e3c663cc..150ba152b 100644 --- a/llama.cpp +++ b/llama.cpp @@ -2476,7 +2476,7 @@ static void llm_load_tensors( } } break; case LLM_ARCH_PERSIMMON: - { + { model.tok_embeddings = ml.create_tensor(ctx, tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, GGML_BACKEND_CPU); model.output_norm = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, GGML_BACKEND_CPU); model.output_norm_b = ml.create_tensor(ctx, tn(LLM_TENSOR_OUTPUT_NORM, "bias"), {n_embd}, GGML_BACKEND_CPU); @@ -4162,8 +4162,8 @@ static struct ggml_cgraph * llm_build_persimmon( cur = ggml_add(ctx0, cur, model.layers[il].attn_norm_b); offload_func(cur); ggml_format_name(cur, "input_layernorm_%d", il); - } - // self attention + } + // self attention { cur = ggml_mul_mat(ctx0, model.layers[il].wqkv, cur); offload_func_kq(cur); @@ -4206,7 +4206,7 @@ static struct ggml_cgraph * llm_build_persimmon( offload_func_v(tmpk); tmpk = ggml_add(ctx0, tmpk, model.layers[il].attn_k_norm_b); offload_func_v(tmpk); - + // RoPE the first n_rot of q/k, pass the other half, and concat. struct ggml_tensor * qrot = ggml_view_3d( ctx0, tmpq, n_rot, n_head, n_tokens, @@ -4227,7 +4227,7 @@ static struct ggml_cgraph * llm_build_persimmon( // get the second half of tmpq, e.g tmpq[n_rot:, :, :] struct ggml_tensor * qpass = ggml_view_3d( - ctx0, tmpq, n_rot, n_head, n_tokens, + ctx0, tmpq, n_rot, n_head, n_tokens, ggml_element_size(tmpq) * n_embd_head, ggml_element_size(tmpq) * n_embd_head * n_head, ggml_element_size(tmpq) * n_rot @@ -4328,9 +4328,9 @@ static struct ggml_cgraph * llm_build_persimmon( offload_func_kq(KQ_soft_max); ggml_set_name(KQ_soft_max, "KQ_soft_max"); - struct ggml_tensor * V = + struct ggml_tensor * V = ggml_view_3d(ctx0, kv_self.v, - n_kv, n_embd_head, n_head_kv, + n_kv, n_embd_head, n_head_kv, ggml_element_size(kv_self.v)*n_ctx, ggml_element_size(kv_self.v)*n_ctx*n_embd_head, ggml_element_size(kv_self.v)*n_ctx*n_embd_gqa*il); @@ -4361,11 +4361,11 @@ static struct ggml_cgraph * llm_build_persimmon( ggml_set_name(inpFF, "inpFF"); { // MLP - { + { // Norm cur = ggml_norm(ctx0, inpFF, norm_eps); offload_func(cur); - cur = ggml_add(ctx0, + cur = ggml_add(ctx0, ggml_mul(ctx0, cur, model.layers[il].ffn_norm), model.layers[il].ffn_norm_b ); @@ -4386,7 +4386,7 @@ static struct ggml_cgraph * llm_build_persimmon( cur = ggml_mul_mat(ctx0, model.layers[il].w2, cur); offload_func(cur); - cur = ggml_add(ctx0, + cur = ggml_add(ctx0, cur, model.layers[il].b2); offload_func(cur);