From 5f81588780339fd58c172591aa8273a198a20bca Mon Sep 17 00:00:00 2001 From: Zack Li Date: Tue, 10 Sep 2024 20:50:54 +0000 Subject: [PATCH] support ggml --- CMakeLists.txt | 12 ++++---- Makefile | 14 +++++----- examples/gguf-hash/CMakeLists.txt | 2 +- examples/gguf/CMakeLists.txt | 2 +- examples/llava/CMakeLists.txt | 4 +-- examples/rpc/CMakeLists.txt | 2 +- ggml/src/kompute | 1 - {ggml => ggml_llama}/.gitignore | 0 {ggml => ggml_llama}/CMakeLists.txt | 10 +++---- {ggml => ggml_llama}/cmake/FindSIMD.cmake | 0 {ggml => ggml_llama}/include/ggml-alloc.h | 0 {ggml => ggml_llama}/include/ggml-backend.h | 0 {ggml => ggml_llama}/include/ggml-blas.h | 0 {ggml => ggml_llama}/include/ggml-cann.h | 0 {ggml => ggml_llama}/include/ggml-cuda.h | 0 {ggml => ggml_llama}/include/ggml-kompute.h | 0 {ggml => ggml_llama}/include/ggml-metal.h | 0 {ggml => ggml_llama}/include/ggml-rpc.h | 0 {ggml => ggml_llama}/include/ggml-sycl.h | 0 {ggml => ggml_llama}/include/ggml-vulkan.h | 0 {ggml => ggml_llama}/include/ggml.h | 0 {ggml => ggml_llama}/src/CMakeLists.txt | 28 +++++++++---------- {ggml => ggml_llama}/src/ggml-aarch64.c | 0 {ggml => ggml_llama}/src/ggml-aarch64.h | 0 {ggml => ggml_llama}/src/ggml-alloc.c | 0 {ggml => ggml_llama}/src/ggml-backend-impl.h | 0 {ggml => ggml_llama}/src/ggml-backend.c | 0 {ggml => ggml_llama}/src/ggml-blas.cpp | 0 {ggml => ggml_llama}/src/ggml-cann.cpp | 0 .../src/ggml-cann/.clang-format | 0 {ggml => ggml_llama}/src/ggml-cann/Doxyfile | 0 .../src/ggml-cann/acl_tensor.cpp | 0 .../src/ggml-cann/acl_tensor.h | 0 .../src/ggml-cann/aclnn_ops.cpp | 0 .../src/ggml-cann/aclnn_ops.h | 0 {ggml => ggml_llama}/src/ggml-cann/common.h | 0 .../src/ggml-cann/kernels/CMakeLists.txt | 0 .../src/ggml-cann/kernels/ascendc_kernels.h | 0 .../src/ggml-cann/kernels/dup.cpp | 0 .../src/ggml-cann/kernels/get_row_f16.cpp | 0 .../src/ggml-cann/kernels/get_row_f32.cpp | 0 .../src/ggml-cann/kernels/get_row_q4_0.cpp | 0 .../src/ggml-cann/kernels/get_row_q8_0.cpp | 0 .../ggml-cann/kernels/quantize_f16_q8_0.cpp | 0 .../ggml-cann/kernels/quantize_f32_q8_0.cpp | 0 .../kernels/quantize_float_to_q4_0.cpp | 0 {ggml => ggml_llama}/src/ggml-common.h | 0 {ggml => ggml_llama}/src/ggml-cuda.cu | 0 {ggml => ggml_llama}/src/ggml-cuda/acc.cu | 0 {ggml => ggml_llama}/src/ggml-cuda/acc.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/arange.cu | 0 {ggml => ggml_llama}/src/ggml-cuda/arange.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/argsort.cu | 0 .../src/ggml-cuda/argsort.cuh | 0 .../src/ggml-cuda/binbcast.cu | 0 .../src/ggml-cuda/binbcast.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/clamp.cu | 0 {ggml => ggml_llama}/src/ggml-cuda/clamp.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/common.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/concat.cu | 0 {ggml => ggml_llama}/src/ggml-cuda/concat.cuh | 0 .../src/ggml-cuda/conv-transpose-1d.cu | 0 .../src/ggml-cuda/conv-transpose-1d.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/convert.cu | 0 .../src/ggml-cuda/convert.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/cpy.cu | 0 {ggml => ggml_llama}/src/ggml-cuda/cpy.cuh | 0 .../src/ggml-cuda/cross-entropy-loss.cu | 0 .../src/ggml-cuda/cross-entropy-loss.cuh | 0 .../src/ggml-cuda/dequantize.cuh | 0 .../src/ggml-cuda/diagmask.cu | 0 .../src/ggml-cuda/diagmask.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/dmmv.cu | 0 {ggml => ggml_llama}/src/ggml-cuda/dmmv.cuh | 0 .../src/ggml-cuda/fattn-common.cuh | 0 .../src/ggml-cuda/fattn-tile-f16.cu | 0 .../src/ggml-cuda/fattn-tile-f16.cuh | 0 .../src/ggml-cuda/fattn-tile-f32.cu | 0 .../src/ggml-cuda/fattn-tile-f32.cuh | 0 .../src/ggml-cuda/fattn-vec-f16.cuh | 0 .../src/ggml-cuda/fattn-vec-f32.cuh | 0 .../src/ggml-cuda/fattn-wmma-f16.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/fattn.cu | 0 {ggml => ggml_llama}/src/ggml-cuda/fattn.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/getrows.cu | 0 .../src/ggml-cuda/getrows.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/im2col.cu | 0 {ggml => ggml_llama}/src/ggml-cuda/im2col.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/mma.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/mmq.cu | 0 {ggml => ggml_llama}/src/ggml-cuda/mmq.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/mmvq.cu | 0 {ggml => ggml_llama}/src/ggml-cuda/mmvq.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/norm.cu | 0 {ggml => ggml_llama}/src/ggml-cuda/norm.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/pad.cu | 0 {ggml => ggml_llama}/src/ggml-cuda/pad.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/pool2d.cu | 0 {ggml => ggml_llama}/src/ggml-cuda/pool2d.cuh | 0 .../src/ggml-cuda/quantize.cu | 0 .../src/ggml-cuda/quantize.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/rope.cu | 0 {ggml => ggml_llama}/src/ggml-cuda/rope.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/scale.cu | 0 {ggml => ggml_llama}/src/ggml-cuda/scale.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/softmax.cu | 0 .../src/ggml-cuda/softmax.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/sumrows.cu | 0 .../src/ggml-cuda/sumrows.cuh | 0 .../fattn-vec-f16-instance-hs128-f16-f16.cu | 0 .../fattn-vec-f16-instance-hs128-f16-q4_0.cu | 0 .../fattn-vec-f16-instance-hs128-f16-q4_1.cu | 0 .../fattn-vec-f16-instance-hs128-f16-q5_0.cu | 0 .../fattn-vec-f16-instance-hs128-f16-q5_1.cu | 0 .../fattn-vec-f16-instance-hs128-f16-q8_0.cu | 0 .../fattn-vec-f16-instance-hs128-q4_0-f16.cu | 0 .../fattn-vec-f16-instance-hs128-q4_0-q4_0.cu | 0 .../fattn-vec-f16-instance-hs128-q4_0-q4_1.cu | 0 .../fattn-vec-f16-instance-hs128-q4_0-q5_0.cu | 0 .../fattn-vec-f16-instance-hs128-q4_0-q5_1.cu | 0 .../fattn-vec-f16-instance-hs128-q4_0-q8_0.cu | 0 .../fattn-vec-f16-instance-hs128-q4_1-f16.cu | 0 .../fattn-vec-f16-instance-hs128-q4_1-q4_0.cu | 0 .../fattn-vec-f16-instance-hs128-q4_1-q4_1.cu | 0 .../fattn-vec-f16-instance-hs128-q4_1-q5_0.cu | 0 .../fattn-vec-f16-instance-hs128-q4_1-q5_1.cu | 0 .../fattn-vec-f16-instance-hs128-q4_1-q8_0.cu | 0 .../fattn-vec-f16-instance-hs128-q5_0-f16.cu | 0 .../fattn-vec-f16-instance-hs128-q5_0-q4_0.cu | 0 .../fattn-vec-f16-instance-hs128-q5_0-q4_1.cu | 0 .../fattn-vec-f16-instance-hs128-q5_0-q5_0.cu | 0 .../fattn-vec-f16-instance-hs128-q5_0-q5_1.cu | 0 .../fattn-vec-f16-instance-hs128-q5_0-q8_0.cu | 0 .../fattn-vec-f16-instance-hs128-q5_1-f16.cu | 0 .../fattn-vec-f16-instance-hs128-q5_1-q4_0.cu | 0 .../fattn-vec-f16-instance-hs128-q5_1-q4_1.cu | 0 .../fattn-vec-f16-instance-hs128-q5_1-q5_0.cu | 0 .../fattn-vec-f16-instance-hs128-q5_1-q5_1.cu | 0 .../fattn-vec-f16-instance-hs128-q5_1-q8_0.cu | 0 .../fattn-vec-f16-instance-hs128-q8_0-f16.cu | 0 .../fattn-vec-f16-instance-hs128-q8_0-q4_0.cu | 0 .../fattn-vec-f16-instance-hs128-q8_0-q4_1.cu | 0 .../fattn-vec-f16-instance-hs128-q8_0-q5_0.cu | 0 .../fattn-vec-f16-instance-hs128-q8_0-q5_1.cu | 0 .../fattn-vec-f16-instance-hs128-q8_0-q8_0.cu | 0 .../fattn-vec-f16-instance-hs256-f16-f16.cu | 0 .../fattn-vec-f16-instance-hs64-f16-f16.cu | 0 .../fattn-vec-f16-instance-hs64-f16-q4_0.cu | 0 .../fattn-vec-f16-instance-hs64-f16-q4_1.cu | 0 .../fattn-vec-f16-instance-hs64-f16-q5_0.cu | 0 .../fattn-vec-f16-instance-hs64-f16-q5_1.cu | 0 .../fattn-vec-f16-instance-hs64-f16-q8_0.cu | 0 .../fattn-vec-f32-instance-hs128-f16-f16.cu | 0 .../fattn-vec-f32-instance-hs128-f16-q4_0.cu | 0 .../fattn-vec-f32-instance-hs128-f16-q4_1.cu | 0 .../fattn-vec-f32-instance-hs128-f16-q5_0.cu | 0 .../fattn-vec-f32-instance-hs128-f16-q5_1.cu | 0 .../fattn-vec-f32-instance-hs128-f16-q8_0.cu | 0 .../fattn-vec-f32-instance-hs128-q4_0-f16.cu | 0 .../fattn-vec-f32-instance-hs128-q4_0-q4_0.cu | 0 .../fattn-vec-f32-instance-hs128-q4_0-q4_1.cu | 0 .../fattn-vec-f32-instance-hs128-q4_0-q5_0.cu | 0 .../fattn-vec-f32-instance-hs128-q4_0-q5_1.cu | 0 .../fattn-vec-f32-instance-hs128-q4_0-q8_0.cu | 0 .../fattn-vec-f32-instance-hs128-q4_1-f16.cu | 0 .../fattn-vec-f32-instance-hs128-q4_1-q4_0.cu | 0 .../fattn-vec-f32-instance-hs128-q4_1-q4_1.cu | 0 .../fattn-vec-f32-instance-hs128-q4_1-q5_0.cu | 0 .../fattn-vec-f32-instance-hs128-q4_1-q5_1.cu | 0 .../fattn-vec-f32-instance-hs128-q4_1-q8_0.cu | 0 .../fattn-vec-f32-instance-hs128-q5_0-f16.cu | 0 .../fattn-vec-f32-instance-hs128-q5_0-q4_0.cu | 0 .../fattn-vec-f32-instance-hs128-q5_0-q4_1.cu | 0 .../fattn-vec-f32-instance-hs128-q5_0-q5_0.cu | 0 .../fattn-vec-f32-instance-hs128-q5_0-q5_1.cu | 0 .../fattn-vec-f32-instance-hs128-q5_0-q8_0.cu | 0 .../fattn-vec-f32-instance-hs128-q5_1-f16.cu | 0 .../fattn-vec-f32-instance-hs128-q5_1-q4_0.cu | 0 .../fattn-vec-f32-instance-hs128-q5_1-q4_1.cu | 0 .../fattn-vec-f32-instance-hs128-q5_1-q5_0.cu | 0 .../fattn-vec-f32-instance-hs128-q5_1-q5_1.cu | 0 .../fattn-vec-f32-instance-hs128-q5_1-q8_0.cu | 0 .../fattn-vec-f32-instance-hs128-q8_0-f16.cu | 0 .../fattn-vec-f32-instance-hs128-q8_0-q4_0.cu | 0 .../fattn-vec-f32-instance-hs128-q8_0-q4_1.cu | 0 .../fattn-vec-f32-instance-hs128-q8_0-q5_0.cu | 0 .../fattn-vec-f32-instance-hs128-q8_0-q5_1.cu | 0 .../fattn-vec-f32-instance-hs128-q8_0-q8_0.cu | 0 .../fattn-vec-f32-instance-hs256-f16-f16.cu | 0 .../fattn-vec-f32-instance-hs64-f16-f16.cu | 0 .../fattn-vec-f32-instance-hs64-f16-q4_0.cu | 0 .../fattn-vec-f32-instance-hs64-f16-q4_1.cu | 0 .../fattn-vec-f32-instance-hs64-f16-q5_0.cu | 0 .../fattn-vec-f32-instance-hs64-f16-q5_1.cu | 0 .../fattn-vec-f32-instance-hs64-f16-q8_0.cu | 0 .../fattn-wmma-f16-instance-kqfloat-cpb16.cu | 0 .../fattn-wmma-f16-instance-kqfloat-cpb32.cu | 0 .../fattn-wmma-f16-instance-kqhalf-cpb16.cu | 0 .../fattn-wmma-f16-instance-kqhalf-cpb32.cu | 0 .../fattn-wmma-f16-instance-kqhalf-cpb8.cu | 0 .../template-instances/generate_cu_files.py | 0 .../template-instances/mmq-instance-iq1_s.cu | 0 .../template-instances/mmq-instance-iq2_s.cu | 0 .../template-instances/mmq-instance-iq2_xs.cu | 0 .../mmq-instance-iq2_xxs.cu | 0 .../template-instances/mmq-instance-iq3_s.cu | 0 .../mmq-instance-iq3_xxs.cu | 0 .../template-instances/mmq-instance-iq4_nl.cu | 0 .../template-instances/mmq-instance-iq4_xs.cu | 0 .../template-instances/mmq-instance-q2_k.cu | 0 .../template-instances/mmq-instance-q3_k.cu | 0 .../template-instances/mmq-instance-q4_0.cu | 0 .../template-instances/mmq-instance-q4_1.cu | 0 .../template-instances/mmq-instance-q4_k.cu | 0 .../template-instances/mmq-instance-q5_0.cu | 0 .../template-instances/mmq-instance-q5_1.cu | 0 .../template-instances/mmq-instance-q5_k.cu | 0 .../template-instances/mmq-instance-q6_k.cu | 0 .../template-instances/mmq-instance-q8_0.cu | 0 {ggml => ggml_llama}/src/ggml-cuda/tsembd.cu | 0 {ggml => ggml_llama}/src/ggml-cuda/tsembd.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/unary.cu | 0 {ggml => ggml_llama}/src/ggml-cuda/unary.cuh | 0 {ggml => ggml_llama}/src/ggml-cuda/upscale.cu | 0 .../src/ggml-cuda/upscale.cuh | 0 .../src/ggml-cuda/vecdotq.cuh | 0 .../src/ggml-cuda/vendors/cuda.h | 0 .../src/ggml-cuda/vendors/hip.h | 0 .../src/ggml-cuda/vendors/musa.h | 0 {ggml => ggml_llama}/src/ggml-impl.h | 0 {ggml => ggml_llama}/src/ggml-kompute.cpp | 0 {ggml => ggml_llama}/src/ggml-metal.m | 0 {ggml => ggml_llama}/src/ggml-metal.metal | 0 {ggml => ggml_llama}/src/ggml-quants.c | 0 {ggml => ggml_llama}/src/ggml-quants.h | 0 {ggml => ggml_llama}/src/ggml-rpc.cpp | 0 {ggml => ggml_llama}/src/ggml-sycl.cpp | 0 .../src/ggml-sycl/backend.hpp | 0 {ggml => ggml_llama}/src/ggml-sycl/common.cpp | 0 {ggml => ggml_llama}/src/ggml-sycl/common.hpp | 0 {ggml => ggml_llama}/src/ggml-sycl/concat.cpp | 0 {ggml => ggml_llama}/src/ggml-sycl/concat.hpp | 0 {ggml => ggml_llama}/src/ggml-sycl/conv.cpp | 0 {ggml => ggml_llama}/src/ggml-sycl/conv.hpp | 0 .../src/ggml-sycl/convert.cpp | 0 .../src/ggml-sycl/convert.hpp | 0 .../src/ggml-sycl/dequantize.hpp | 0 {ggml => ggml_llama}/src/ggml-sycl/dmmv.cpp | 0 {ggml => ggml_llama}/src/ggml-sycl/dmmv.hpp | 0 .../src/ggml-sycl/dpct/helper.hpp | 0 {ggml => ggml_llama}/src/ggml-sycl/gemm.hpp | 0 {ggml => ggml_llama}/src/ggml-sycl/im2col.cpp | 0 {ggml => ggml_llama}/src/ggml-sycl/im2col.hpp | 0 {ggml => ggml_llama}/src/ggml-sycl/mmq.cpp | 0 {ggml => ggml_llama}/src/ggml-sycl/mmq.hpp | 0 {ggml => ggml_llama}/src/ggml-sycl/mmvq.cpp | 0 {ggml => ggml_llama}/src/ggml-sycl/mmvq.hpp | 0 {ggml => ggml_llama}/src/ggml-sycl/norm.cpp | 0 {ggml => ggml_llama}/src/ggml-sycl/norm.hpp | 0 .../src/ggml-sycl/presets.hpp | 0 {ggml => ggml_llama}/src/ggml-sycl/rope.cpp | 0 {ggml => ggml_llama}/src/ggml-sycl/rope.hpp | 0 .../src/ggml-sycl/softmax.cpp | 0 .../src/ggml-sycl/softmax.hpp | 0 {ggml => ggml_llama}/src/ggml-sycl/tsembd.cpp | 0 {ggml => ggml_llama}/src/ggml-sycl/tsembd.hpp | 0 .../src/ggml-sycl/vecdotq.hpp | 0 {ggml => ggml_llama}/src/ggml-vulkan.cpp | 0 {ggml => ggml_llama}/src/ggml.c | 0 .../src/kompute-shaders/common.comp | 0 .../src/kompute-shaders/op_add.comp | 0 .../src/kompute-shaders/op_addrow.comp | 0 .../src/kompute-shaders/op_cpy_f16_f16.comp | 0 .../src/kompute-shaders/op_cpy_f16_f32.comp | 0 .../src/kompute-shaders/op_cpy_f32_f16.comp | 0 .../src/kompute-shaders/op_cpy_f32_f32.comp | 0 .../src/kompute-shaders/op_diagmask.comp | 0 .../src/kompute-shaders/op_gelu.comp | 0 .../src/kompute-shaders/op_getrows.comp | 0 .../src/kompute-shaders/op_getrows_f16.comp | 0 .../src/kompute-shaders/op_getrows_f32.comp | 0 .../src/kompute-shaders/op_getrows_q4_0.comp | 0 .../src/kompute-shaders/op_getrows_q4_1.comp | 0 .../src/kompute-shaders/op_getrows_q6_k.comp | 0 .../src/kompute-shaders/op_mul.comp | 0 .../src/kompute-shaders/op_mul_mat_f16.comp | 0 .../kompute-shaders/op_mul_mat_mat_f32.comp | 0 .../src/kompute-shaders/op_mul_mat_q4_0.comp | 0 .../src/kompute-shaders/op_mul_mat_q4_1.comp | 0 .../src/kompute-shaders/op_mul_mat_q6_k.comp | 0 .../src/kompute-shaders/op_mul_mat_q8_0.comp | 0 .../src/kompute-shaders/op_mul_mv_q_n.comp | 0 .../kompute-shaders/op_mul_mv_q_n_pre.comp | 0 .../src/kompute-shaders/op_norm.comp | 0 .../src/kompute-shaders/op_relu.comp | 0 .../src/kompute-shaders/op_rmsnorm.comp | 0 .../src/kompute-shaders/op_rope_f16.comp | 0 .../src/kompute-shaders/op_rope_f32.comp | 0 .../src/kompute-shaders/op_scale.comp | 0 .../src/kompute-shaders/op_scale_8.comp | 0 .../src/kompute-shaders/op_silu.comp | 0 .../src/kompute-shaders/op_softmax.comp | 0 .../src/kompute-shaders/rope_common.comp | 0 {ggml => ggml_llama}/src/llamafile/sgemm.cpp | 0 {ggml => ggml_llama}/src/llamafile/sgemm.h | 0 .../src/vulkan-shaders/CMakeLists.txt | 0 .../src/vulkan-shaders/acc.comp | 0 .../src/vulkan-shaders/add.comp | 0 .../src/vulkan-shaders/argsort.comp | 0 .../src/vulkan-shaders/clamp.comp | 0 .../src/vulkan-shaders/concat.comp | 0 .../src/vulkan-shaders/copy.comp | 0 .../src/vulkan-shaders/cos.comp | 0 .../src/vulkan-shaders/dequant_f32.comp | 0 .../src/vulkan-shaders/dequant_funcs.comp | 0 .../src/vulkan-shaders/dequant_head.comp | 0 .../src/vulkan-shaders/dequant_iq4_nl.comp | 0 .../src/vulkan-shaders/dequant_q2_k.comp | 0 .../src/vulkan-shaders/dequant_q3_k.comp | 0 .../src/vulkan-shaders/dequant_q4_0.comp | 0 .../src/vulkan-shaders/dequant_q4_1.comp | 0 .../src/vulkan-shaders/dequant_q4_k.comp | 0 .../src/vulkan-shaders/dequant_q5_0.comp | 0 .../src/vulkan-shaders/dequant_q5_1.comp | 0 .../src/vulkan-shaders/dequant_q5_k.comp | 0 .../src/vulkan-shaders/dequant_q6_k.comp | 0 .../src/vulkan-shaders/dequant_q8_0.comp | 0 .../src/vulkan-shaders/diag_mask_inf.comp | 0 .../src/vulkan-shaders/div.comp | 0 .../src/vulkan-shaders/gelu.comp | 0 .../src/vulkan-shaders/gelu_quick.comp | 0 .../vulkan-shaders/generic_binary_head.comp | 0 .../src/vulkan-shaders/generic_head.comp | 0 .../vulkan-shaders/generic_unary_head.comp | 0 .../src/vulkan-shaders/get_rows.comp | 0 .../src/vulkan-shaders/get_rows_quant.comp | 0 .../src/vulkan-shaders/group_norm.comp | 0 .../src/vulkan-shaders/im2col.comp | 0 .../src/vulkan-shaders/leaky_relu.comp | 0 .../src/vulkan-shaders/mul.comp | 0 .../mul_mat_split_k_reduce.comp | 0 .../src/vulkan-shaders/mul_mat_vec.comp | 0 .../src/vulkan-shaders/mul_mat_vec_base.comp | 0 .../src/vulkan-shaders/mul_mat_vec_nc.comp | 0 .../src/vulkan-shaders/mul_mat_vec_p021.comp | 0 .../src/vulkan-shaders/mul_mat_vec_q2_k.comp | 0 .../src/vulkan-shaders/mul_mat_vec_q3_k.comp | 0 .../src/vulkan-shaders/mul_mat_vec_q4_k.comp | 0 .../src/vulkan-shaders/mul_mat_vec_q5_k.comp | 0 .../src/vulkan-shaders/mul_mat_vec_q6_k.comp | 0 .../src/vulkan-shaders/mul_mm.comp | 0 .../src/vulkan-shaders/norm.comp | 0 .../src/vulkan-shaders/pad.comp | 0 .../src/vulkan-shaders/relu.comp | 0 .../src/vulkan-shaders/repeat.comp | 0 .../src/vulkan-shaders/rms_norm.comp | 0 .../src/vulkan-shaders/rope_head.comp | 0 .../src/vulkan-shaders/rope_neox.comp | 0 .../src/vulkan-shaders/rope_norm.comp | 0 .../src/vulkan-shaders/scale.comp | 0 .../src/vulkan-shaders/silu.comp | 0 .../src/vulkan-shaders/sin.comp | 0 .../src/vulkan-shaders/soft_max.comp | 0 .../src/vulkan-shaders/square.comp | 0 .../src/vulkan-shaders/sum_rows.comp | 0 .../src/vulkan-shaders/tanh.comp | 0 .../vulkan-shaders/timestep_embedding.comp | 0 .../src/vulkan-shaders/types.comp | 0 .../src/vulkan-shaders/upscale.comp | 0 .../src/vulkan-shaders/vulkan-shaders-gen.cpp | 0 src/CMakeLists.txt | 2 +- 371 files changed, 38 insertions(+), 39 deletions(-) delete mode 160000 ggml/src/kompute rename {ggml => ggml_llama}/.gitignore (100%) rename {ggml => ggml_llama}/CMakeLists.txt (96%) rename {ggml => ggml_llama}/cmake/FindSIMD.cmake (100%) rename {ggml => ggml_llama}/include/ggml-alloc.h (100%) rename {ggml => ggml_llama}/include/ggml-backend.h (100%) rename {ggml => ggml_llama}/include/ggml-blas.h (100%) rename {ggml => ggml_llama}/include/ggml-cann.h (100%) rename {ggml => ggml_llama}/include/ggml-cuda.h (100%) rename {ggml => ggml_llama}/include/ggml-kompute.h (100%) rename {ggml => ggml_llama}/include/ggml-metal.h (100%) rename {ggml => ggml_llama}/include/ggml-rpc.h (100%) rename {ggml => ggml_llama}/include/ggml-sycl.h (100%) rename {ggml => ggml_llama}/include/ggml-vulkan.h (100%) rename {ggml => ggml_llama}/include/ggml.h (100%) rename {ggml => ggml_llama}/src/CMakeLists.txt (98%) rename {ggml => ggml_llama}/src/ggml-aarch64.c (100%) rename {ggml => ggml_llama}/src/ggml-aarch64.h (100%) rename {ggml => ggml_llama}/src/ggml-alloc.c (100%) rename {ggml => ggml_llama}/src/ggml-backend-impl.h (100%) rename {ggml => ggml_llama}/src/ggml-backend.c (100%) rename {ggml => ggml_llama}/src/ggml-blas.cpp (100%) rename {ggml => ggml_llama}/src/ggml-cann.cpp (100%) rename {ggml => ggml_llama}/src/ggml-cann/.clang-format (100%) rename {ggml => ggml_llama}/src/ggml-cann/Doxyfile (100%) rename {ggml => ggml_llama}/src/ggml-cann/acl_tensor.cpp (100%) rename {ggml => ggml_llama}/src/ggml-cann/acl_tensor.h (100%) rename {ggml => ggml_llama}/src/ggml-cann/aclnn_ops.cpp (100%) rename {ggml => ggml_llama}/src/ggml-cann/aclnn_ops.h (100%) rename {ggml => ggml_llama}/src/ggml-cann/common.h (100%) rename {ggml => ggml_llama}/src/ggml-cann/kernels/CMakeLists.txt (100%) rename {ggml => ggml_llama}/src/ggml-cann/kernels/ascendc_kernels.h (100%) rename {ggml => ggml_llama}/src/ggml-cann/kernels/dup.cpp (100%) rename {ggml => ggml_llama}/src/ggml-cann/kernels/get_row_f16.cpp (100%) rename {ggml => ggml_llama}/src/ggml-cann/kernels/get_row_f32.cpp (100%) rename {ggml => ggml_llama}/src/ggml-cann/kernels/get_row_q4_0.cpp (100%) rename {ggml => ggml_llama}/src/ggml-cann/kernels/get_row_q8_0.cpp (100%) rename {ggml => ggml_llama}/src/ggml-cann/kernels/quantize_f16_q8_0.cpp (100%) rename {ggml => ggml_llama}/src/ggml-cann/kernels/quantize_f32_q8_0.cpp (100%) rename {ggml => ggml_llama}/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp (100%) rename {ggml => ggml_llama}/src/ggml-common.h (100%) rename {ggml => ggml_llama}/src/ggml-cuda.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/acc.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/acc.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/arange.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/arange.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/argsort.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/argsort.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/binbcast.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/binbcast.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/clamp.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/clamp.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/common.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/concat.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/concat.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/conv-transpose-1d.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/conv-transpose-1d.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/convert.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/convert.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/cpy.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/cpy.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/cross-entropy-loss.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/cross-entropy-loss.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/dequantize.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/diagmask.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/diagmask.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/dmmv.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/dmmv.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/fattn-common.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/fattn-tile-f16.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/fattn-tile-f16.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/fattn-tile-f32.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/fattn-tile-f32.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/fattn-vec-f16.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/fattn-vec-f32.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/fattn-wmma-f16.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/fattn.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/fattn.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/getrows.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/getrows.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/im2col.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/im2col.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/mma.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/mmq.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/mmq.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/mmvq.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/mmvq.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/norm.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/norm.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/pad.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/pad.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/pool2d.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/pool2d.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/quantize.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/quantize.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/rope.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/rope.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/scale.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/scale.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/softmax.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/softmax.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/sumrows.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/sumrows.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/generate_cu_files.py (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/tsembd.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/tsembd.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/unary.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/unary.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/upscale.cu (100%) rename {ggml => ggml_llama}/src/ggml-cuda/upscale.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/vecdotq.cuh (100%) rename {ggml => ggml_llama}/src/ggml-cuda/vendors/cuda.h (100%) rename {ggml => ggml_llama}/src/ggml-cuda/vendors/hip.h (100%) rename {ggml => ggml_llama}/src/ggml-cuda/vendors/musa.h (100%) rename {ggml => ggml_llama}/src/ggml-impl.h (100%) rename {ggml => ggml_llama}/src/ggml-kompute.cpp (100%) rename {ggml => ggml_llama}/src/ggml-metal.m (100%) rename {ggml => ggml_llama}/src/ggml-metal.metal (100%) rename {ggml => ggml_llama}/src/ggml-quants.c (100%) rename {ggml => ggml_llama}/src/ggml-quants.h (100%) rename {ggml => ggml_llama}/src/ggml-rpc.cpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl.cpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/backend.hpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/common.cpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/common.hpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/concat.cpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/concat.hpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/conv.cpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/conv.hpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/convert.cpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/convert.hpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/dequantize.hpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/dmmv.cpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/dmmv.hpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/dpct/helper.hpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/gemm.hpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/im2col.cpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/im2col.hpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/mmq.cpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/mmq.hpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/mmvq.cpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/mmvq.hpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/norm.cpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/norm.hpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/presets.hpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/rope.cpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/rope.hpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/softmax.cpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/softmax.hpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/tsembd.cpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/tsembd.hpp (100%) rename {ggml => ggml_llama}/src/ggml-sycl/vecdotq.hpp (100%) rename {ggml => ggml_llama}/src/ggml-vulkan.cpp (100%) rename {ggml => ggml_llama}/src/ggml.c (100%) rename {ggml => ggml_llama}/src/kompute-shaders/common.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_add.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_addrow.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_cpy_f16_f16.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_cpy_f16_f32.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_cpy_f32_f16.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_cpy_f32_f32.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_diagmask.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_gelu.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_getrows.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_getrows_f16.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_getrows_f32.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_getrows_q4_0.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_getrows_q4_1.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_getrows_q6_k.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_mul.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_mul_mat_f16.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_mul_mat_mat_f32.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_mul_mat_q4_0.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_mul_mat_q4_1.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_mul_mat_q6_k.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_mul_mat_q8_0.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_mul_mv_q_n.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_mul_mv_q_n_pre.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_norm.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_relu.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_rmsnorm.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_rope_f16.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_rope_f32.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_scale.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_scale_8.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_silu.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/op_softmax.comp (100%) rename {ggml => ggml_llama}/src/kompute-shaders/rope_common.comp (100%) rename {ggml => ggml_llama}/src/llamafile/sgemm.cpp (100%) rename {ggml => ggml_llama}/src/llamafile/sgemm.h (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/CMakeLists.txt (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/acc.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/add.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/argsort.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/clamp.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/concat.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/copy.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/cos.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/dequant_f32.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/dequant_funcs.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/dequant_head.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/dequant_iq4_nl.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/dequant_q2_k.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/dequant_q3_k.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/dequant_q4_0.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/dequant_q4_1.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/dequant_q4_k.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/dequant_q5_0.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/dequant_q5_1.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/dequant_q5_k.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/dequant_q6_k.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/dequant_q8_0.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/diag_mask_inf.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/div.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/gelu.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/gelu_quick.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/generic_binary_head.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/generic_head.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/generic_unary_head.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/get_rows.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/get_rows_quant.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/group_norm.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/im2col.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/leaky_relu.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/mul.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/mul_mat_split_k_reduce.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/mul_mat_vec.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/mul_mat_vec_base.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/mul_mat_vec_nc.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/mul_mat_vec_p021.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/mul_mat_vec_q2_k.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/mul_mat_vec_q3_k.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/mul_mat_vec_q4_k.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/mul_mat_vec_q5_k.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/mul_mat_vec_q6_k.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/mul_mm.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/norm.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/pad.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/relu.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/repeat.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/rms_norm.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/rope_head.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/rope_neox.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/rope_norm.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/scale.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/silu.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/sin.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/soft_max.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/square.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/sum_rows.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/tanh.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/timestep_embedding.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/types.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/upscale.comp (100%) rename {ggml => ggml_llama}/src/vulkan-shaders/vulkan-shaders-gen.cpp (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index a31320635..cf4a48f43 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,9 +112,9 @@ llama_option_depr(WARNING LLAMA_CANN GGML_CANN) # build the library # -if (NOT TARGET ggml) - add_subdirectory(ggml) - # ... otherwise assume ggml is added by a parent CMakeLists.txt +if (NOT TARGET ggml_llama) + add_subdirectory(ggml_llama) + # ... otherwise assume ggml_llama is added by a parent CMakeLists.txt endif() add_subdirectory(src) @@ -139,11 +139,11 @@ set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location o # determining _precisely_ which defines are necessary for the llama-config # package. # -get_target_property(GGML_DIRECTORY ggml SOURCE_DIR) +get_target_property(GGML_DIRECTORY ggml_llama SOURCE_DIR) get_directory_property(GGML_DIR_DEFINES DIRECTORY ${GGML_DIRECTORY} COMPILE_DEFINITIONS) -get_target_property(GGML_TARGET_DEFINES ggml COMPILE_DEFINITIONS) +get_target_property(GGML_TARGET_DEFINES ggml_llama COMPILE_DEFINITIONS) set(GGML_TRANSIENT_DEFINES ${GGML_TARGET_DEFINES} ${GGML_DIR_DEFINES}) -get_target_property(GGML_LINK_LIBRARIES ggml LINK_LIBRARIES) +get_target_property(GGML_LINK_LIBRARIES ggml_llama LINK_LIBRARIES) set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h) install(TARGETS llama LIBRARY PUBLIC_HEADER) diff --git a/Makefile b/Makefile index 332496cfc..90364d0b9 100644 --- a/Makefile +++ b/Makefile @@ -876,7 +876,7 @@ ifdef GGML_METAL_NDEBUG endif ifdef GGML_METAL_EMBED_LIBRARY MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY - OBJ_GGML += ggml/src/ggml-metal-embed.o + OBJ_GGML += ggml/src/ggml-metal-embed-llama.o endif endif # GGML_METAL @@ -888,20 +888,20 @@ ggml/src/ggml-metal.o: \ $(CC) $(CFLAGS) -c $< -o $@ ifdef GGML_METAL_EMBED_LIBRARY -ggml/src/ggml-metal-embed.o: \ +ggml/src/ggml-metal-embed-llama.o: \ ggml/src/ggml-metal.metal \ ggml/src/ggml-common.h @echo "Embedding Metal library" - @sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-embed.metal + @sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-embed-llama.metal $(eval TEMP_ASSEMBLY=$(shell mktemp -d)) @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s - @echo ".incbin \"ggml/src/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s + @echo ".incbin \"ggml/src/ggml-metal-embed-llama.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s - $(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@ - @rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s + $(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed-llama.s -o $@ + @rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed-llama.s @rmdir ${TEMP_ASSEMBLY} endif endif # GGML_METAL @@ -1213,7 +1213,7 @@ clean: rm -vrf ggml/src/*.o rm -rvf ggml/src/llamafile/*.o rm -rvf common/build-info.cpp - rm -vrf ggml/src/ggml-metal-embed.metal + rm -vrf ggml/src/ggml-metal-embed-llama.metal rm -vrf ggml/src/ggml-cuda/*.o rm -vrf ggml/src/ggml-cuda/template-instances/*.o rm -rvf $(BUILD_TARGETS) diff --git a/examples/gguf-hash/CMakeLists.txt b/examples/gguf-hash/CMakeLists.txt index 633f45535..6ef93bcfb 100644 --- a/examples/gguf-hash/CMakeLists.txt +++ b/examples/gguf-hash/CMakeLists.txt @@ -11,5 +11,5 @@ target_link_libraries(${TARGET} PRIVATE sha1) add_library(sha256 OBJECT deps/sha256/sha256.c deps/sha256/sha256.h) target_link_libraries(${TARGET} PRIVATE sha256) -target_link_libraries(${TARGET} PRIVATE ggml ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE ggml_llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) diff --git a/examples/gguf/CMakeLists.txt b/examples/gguf/CMakeLists.txt index a9569b411..dcdbb0f14 100644 --- a/examples/gguf/CMakeLists.txt +++ b/examples/gguf/CMakeLists.txt @@ -1,5 +1,5 @@ set(TARGET llama-gguf) add_executable(${TARGET} gguf.cpp) install(TARGETS ${TARGET} RUNTIME) -target_link_libraries(${TARGET} PRIVATE ggml ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(${TARGET} PRIVATE ggml_llama ${CMAKE_THREAD_LIBS_INIT}) target_compile_features(${TARGET} PRIVATE cxx_std_11) diff --git a/examples/llava/CMakeLists.txt b/examples/llava/CMakeLists.txt index bbf5fec58..ab992e047 100644 --- a/examples/llava/CMakeLists.txt +++ b/examples/llava/CMakeLists.txt @@ -5,7 +5,7 @@ add_library(llava OBJECT clip.h ) -target_link_libraries(llava PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT}) +target_link_libraries(llava PRIVATE ggml_llama llama ${CMAKE_THREAD_LIBS_INIT}) target_include_directories(llava PUBLIC .) target_include_directories(llava PUBLIC ../..) @@ -18,7 +18,7 @@ if (BUILD_SHARED_LIBS) set_target_properties(llava PROPERTIES POSITION_INDEPENDENT_CODE ON) target_compile_definitions(llava PRIVATE LLAMA_SHARED LLAMA_BUILD) add_library(llava_shared SHARED $) - target_link_libraries(llava_shared PRIVATE ggml llama ${CMAKE_THREAD_LIBS_INIT}) + target_link_libraries(llava_shared PRIVATE ggml_llama llama ${CMAKE_THREAD_LIBS_INIT}) install(TARGETS llava_shared LIBRARY) endif() diff --git a/examples/rpc/CMakeLists.txt b/examples/rpc/CMakeLists.txt index ae48fb98d..aa1f0175b 100644 --- a/examples/rpc/CMakeLists.txt +++ b/examples/rpc/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable(rpc-server rpc-server.cpp) -target_link_libraries(rpc-server PRIVATE ggml llama) +target_link_libraries(rpc-server PRIVATE ggml_llama llama) diff --git a/ggml/src/kompute b/ggml/src/kompute deleted file mode 160000 index 4565194ed..000000000 --- a/ggml/src/kompute +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4565194ed7c32d1d2efa32ceab4d3c6cae006306 diff --git a/ggml/.gitignore b/ggml_llama/.gitignore similarity index 100% rename from ggml/.gitignore rename to ggml_llama/.gitignore diff --git a/ggml/CMakeLists.txt b/ggml_llama/CMakeLists.txt similarity index 96% rename from ggml/CMakeLists.txt rename to ggml_llama/CMakeLists.txt index cc1685884..df37a54cc 100644 --- a/ggml/CMakeLists.txt +++ b/ggml_llama/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories. -project("ggml" C CXX) +project("ggml_llama" C CXX) include(CheckIncludeFileCXX) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) @@ -216,14 +216,14 @@ set(GGML_PUBLIC_HEADERS include/ggml-sycl.h include/ggml-vulkan.h) -set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}") +set_target_properties(ggml_llama PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}") #if (GGML_METAL) -# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal") +# set_target_properties(ggml_llama PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal") #endif() -install(TARGETS ggml PUBLIC_HEADER) +install(TARGETS ggml_llama PUBLIC_HEADER) if (BUILD_SHARED_LIBS) - install(TARGETS ggml LIBRARY) + install(TARGETS ggml_llama LIBRARY) endif() if (GGML_METAL) diff --git a/ggml/cmake/FindSIMD.cmake b/ggml_llama/cmake/FindSIMD.cmake similarity index 100% rename from ggml/cmake/FindSIMD.cmake rename to ggml_llama/cmake/FindSIMD.cmake diff --git a/ggml/include/ggml-alloc.h b/ggml_llama/include/ggml-alloc.h similarity index 100% rename from ggml/include/ggml-alloc.h rename to ggml_llama/include/ggml-alloc.h diff --git a/ggml/include/ggml-backend.h b/ggml_llama/include/ggml-backend.h similarity index 100% rename from ggml/include/ggml-backend.h rename to ggml_llama/include/ggml-backend.h diff --git a/ggml/include/ggml-blas.h b/ggml_llama/include/ggml-blas.h similarity index 100% rename from ggml/include/ggml-blas.h rename to ggml_llama/include/ggml-blas.h diff --git a/ggml/include/ggml-cann.h b/ggml_llama/include/ggml-cann.h similarity index 100% rename from ggml/include/ggml-cann.h rename to ggml_llama/include/ggml-cann.h diff --git a/ggml/include/ggml-cuda.h b/ggml_llama/include/ggml-cuda.h similarity index 100% rename from ggml/include/ggml-cuda.h rename to ggml_llama/include/ggml-cuda.h diff --git a/ggml/include/ggml-kompute.h b/ggml_llama/include/ggml-kompute.h similarity index 100% rename from ggml/include/ggml-kompute.h rename to ggml_llama/include/ggml-kompute.h diff --git a/ggml/include/ggml-metal.h b/ggml_llama/include/ggml-metal.h similarity index 100% rename from ggml/include/ggml-metal.h rename to ggml_llama/include/ggml-metal.h diff --git a/ggml/include/ggml-rpc.h b/ggml_llama/include/ggml-rpc.h similarity index 100% rename from ggml/include/ggml-rpc.h rename to ggml_llama/include/ggml-rpc.h diff --git a/ggml/include/ggml-sycl.h b/ggml_llama/include/ggml-sycl.h similarity index 100% rename from ggml/include/ggml-sycl.h rename to ggml_llama/include/ggml-sycl.h diff --git a/ggml/include/ggml-vulkan.h b/ggml_llama/include/ggml-vulkan.h similarity index 100% rename from ggml/include/ggml-vulkan.h rename to ggml_llama/include/ggml-vulkan.h diff --git a/ggml/include/ggml.h b/ggml_llama/include/ggml.h similarity index 100% rename from ggml/include/ggml.h rename to ggml_llama/include/ggml.h diff --git a/ggml/src/CMakeLists.txt b/ggml_llama/src/CMakeLists.txt similarity index 98% rename from ggml/src/CMakeLists.txt rename to ggml_llama/src/CMakeLists.txt index ff84b9bb5..e867c7d74 100644 --- a/ggml/src/CMakeLists.txt +++ b/ggml_llama/src/CMakeLists.txt @@ -70,8 +70,8 @@ if (GGML_METAL) file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated") # merge ggml-common.h and ggml-metal.metal into a single file - set(METALLIB_EMBED_ASM "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.s") - set(METALLIB_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.metal") + set(METALLIB_EMBED_ASM "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed-llama.s") + set(METALLIB_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed-llama.metal") add_custom_command( OUTPUT ${METALLIB_EMBED_ASM} @@ -827,7 +827,7 @@ if (GGML_CPU_HBM) add_compile_definitions(GGML_USE_CPU_HBM) - target_link_libraries(ggml PUBLIC memkind) + target_link_libraries(ggml_llama PUBLIC memkind) endif() if (GGML_CANN) @@ -1291,7 +1291,7 @@ endif() # ggml -add_library(ggml +add_library(ggml_llama ../include/ggml.h ../include/ggml-alloc.h ../include/ggml-backend.h @@ -1315,25 +1315,25 @@ add_library(ggml ) if (EMSCRIPTEN) - set_target_properties(ggml PROPERTIES COMPILE_FLAGS "-msimd128") + set_target_properties(ggml_llama PROPERTIES COMPILE_FLAGS "-msimd128") endif() -target_compile_definitions(ggml PUBLIC ${GGML_CDEF_PUBLIC}) -target_include_directories(ggml PUBLIC ../include) -target_include_directories(ggml PRIVATE . ${GGML_EXTRA_INCLUDES}) -target_link_directories(ggml PRIVATE ${GGML_EXTRA_LIBDIRS}) -target_compile_features (ggml PRIVATE c_std_11) # don't bump +target_compile_definitions(ggml_llama PUBLIC ${GGML_CDEF_PUBLIC}) +target_include_directories(ggml_llama PUBLIC ../include) +target_include_directories(ggml_llama PRIVATE . ${GGML_EXTRA_INCLUDES}) +target_link_directories(ggml_llama PRIVATE ${GGML_EXTRA_LIBDIRS}) +target_compile_features (ggml_llama PRIVATE c_std_11) # don't bump -target_link_libraries(ggml PRIVATE Threads::Threads ${GGML_EXTRA_LIBS}) +target_link_libraries(ggml_llama PRIVATE Threads::Threads ${GGML_EXTRA_LIBS}) find_library(MATH_LIBRARY m) if (MATH_LIBRARY) if (NOT WIN32 OR NOT GGML_SYCL) - target_link_libraries(ggml PRIVATE ${MATH_LIBRARY}) + target_link_libraries(ggml_llama PRIVATE ${MATH_LIBRARY}) endif() endif() if (BUILD_SHARED_LIBS) - set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON) - target_compile_definitions(ggml PRIVATE GGML_SHARED GGML_BUILD) + set_target_properties(ggml_llama PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_compile_definitions(ggml_llama PRIVATE GGML_SHARED GGML_BUILD) endif() diff --git a/ggml/src/ggml-aarch64.c b/ggml_llama/src/ggml-aarch64.c similarity index 100% rename from ggml/src/ggml-aarch64.c rename to ggml_llama/src/ggml-aarch64.c diff --git a/ggml/src/ggml-aarch64.h b/ggml_llama/src/ggml-aarch64.h similarity index 100% rename from ggml/src/ggml-aarch64.h rename to ggml_llama/src/ggml-aarch64.h diff --git a/ggml/src/ggml-alloc.c b/ggml_llama/src/ggml-alloc.c similarity index 100% rename from ggml/src/ggml-alloc.c rename to ggml_llama/src/ggml-alloc.c diff --git a/ggml/src/ggml-backend-impl.h b/ggml_llama/src/ggml-backend-impl.h similarity index 100% rename from ggml/src/ggml-backend-impl.h rename to ggml_llama/src/ggml-backend-impl.h diff --git a/ggml/src/ggml-backend.c b/ggml_llama/src/ggml-backend.c similarity index 100% rename from ggml/src/ggml-backend.c rename to ggml_llama/src/ggml-backend.c diff --git a/ggml/src/ggml-blas.cpp b/ggml_llama/src/ggml-blas.cpp similarity index 100% rename from ggml/src/ggml-blas.cpp rename to ggml_llama/src/ggml-blas.cpp diff --git a/ggml/src/ggml-cann.cpp b/ggml_llama/src/ggml-cann.cpp similarity index 100% rename from ggml/src/ggml-cann.cpp rename to ggml_llama/src/ggml-cann.cpp diff --git a/ggml/src/ggml-cann/.clang-format b/ggml_llama/src/ggml-cann/.clang-format similarity index 100% rename from ggml/src/ggml-cann/.clang-format rename to ggml_llama/src/ggml-cann/.clang-format diff --git a/ggml/src/ggml-cann/Doxyfile b/ggml_llama/src/ggml-cann/Doxyfile similarity index 100% rename from ggml/src/ggml-cann/Doxyfile rename to ggml_llama/src/ggml-cann/Doxyfile diff --git a/ggml/src/ggml-cann/acl_tensor.cpp b/ggml_llama/src/ggml-cann/acl_tensor.cpp similarity index 100% rename from ggml/src/ggml-cann/acl_tensor.cpp rename to ggml_llama/src/ggml-cann/acl_tensor.cpp diff --git a/ggml/src/ggml-cann/acl_tensor.h b/ggml_llama/src/ggml-cann/acl_tensor.h similarity index 100% rename from ggml/src/ggml-cann/acl_tensor.h rename to ggml_llama/src/ggml-cann/acl_tensor.h diff --git a/ggml/src/ggml-cann/aclnn_ops.cpp b/ggml_llama/src/ggml-cann/aclnn_ops.cpp similarity index 100% rename from ggml/src/ggml-cann/aclnn_ops.cpp rename to ggml_llama/src/ggml-cann/aclnn_ops.cpp diff --git a/ggml/src/ggml-cann/aclnn_ops.h b/ggml_llama/src/ggml-cann/aclnn_ops.h similarity index 100% rename from ggml/src/ggml-cann/aclnn_ops.h rename to ggml_llama/src/ggml-cann/aclnn_ops.h diff --git a/ggml/src/ggml-cann/common.h b/ggml_llama/src/ggml-cann/common.h similarity index 100% rename from ggml/src/ggml-cann/common.h rename to ggml_llama/src/ggml-cann/common.h diff --git a/ggml/src/ggml-cann/kernels/CMakeLists.txt b/ggml_llama/src/ggml-cann/kernels/CMakeLists.txt similarity index 100% rename from ggml/src/ggml-cann/kernels/CMakeLists.txt rename to ggml_llama/src/ggml-cann/kernels/CMakeLists.txt diff --git a/ggml/src/ggml-cann/kernels/ascendc_kernels.h b/ggml_llama/src/ggml-cann/kernels/ascendc_kernels.h similarity index 100% rename from ggml/src/ggml-cann/kernels/ascendc_kernels.h rename to ggml_llama/src/ggml-cann/kernels/ascendc_kernels.h diff --git a/ggml/src/ggml-cann/kernels/dup.cpp b/ggml_llama/src/ggml-cann/kernels/dup.cpp similarity index 100% rename from ggml/src/ggml-cann/kernels/dup.cpp rename to ggml_llama/src/ggml-cann/kernels/dup.cpp diff --git a/ggml/src/ggml-cann/kernels/get_row_f16.cpp b/ggml_llama/src/ggml-cann/kernels/get_row_f16.cpp similarity index 100% rename from ggml/src/ggml-cann/kernels/get_row_f16.cpp rename to ggml_llama/src/ggml-cann/kernels/get_row_f16.cpp diff --git a/ggml/src/ggml-cann/kernels/get_row_f32.cpp b/ggml_llama/src/ggml-cann/kernels/get_row_f32.cpp similarity index 100% rename from ggml/src/ggml-cann/kernels/get_row_f32.cpp rename to ggml_llama/src/ggml-cann/kernels/get_row_f32.cpp diff --git a/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp b/ggml_llama/src/ggml-cann/kernels/get_row_q4_0.cpp similarity index 100% rename from ggml/src/ggml-cann/kernels/get_row_q4_0.cpp rename to ggml_llama/src/ggml-cann/kernels/get_row_q4_0.cpp diff --git a/ggml/src/ggml-cann/kernels/get_row_q8_0.cpp b/ggml_llama/src/ggml-cann/kernels/get_row_q8_0.cpp similarity index 100% rename from ggml/src/ggml-cann/kernels/get_row_q8_0.cpp rename to ggml_llama/src/ggml-cann/kernels/get_row_q8_0.cpp diff --git a/ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp b/ggml_llama/src/ggml-cann/kernels/quantize_f16_q8_0.cpp similarity index 100% rename from ggml/src/ggml-cann/kernels/quantize_f16_q8_0.cpp rename to ggml_llama/src/ggml-cann/kernels/quantize_f16_q8_0.cpp diff --git a/ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp b/ggml_llama/src/ggml-cann/kernels/quantize_f32_q8_0.cpp similarity index 100% rename from ggml/src/ggml-cann/kernels/quantize_f32_q8_0.cpp rename to ggml_llama/src/ggml-cann/kernels/quantize_f32_q8_0.cpp diff --git a/ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp b/ggml_llama/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp similarity index 100% rename from ggml/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp rename to ggml_llama/src/ggml-cann/kernels/quantize_float_to_q4_0.cpp diff --git a/ggml/src/ggml-common.h b/ggml_llama/src/ggml-common.h similarity index 100% rename from ggml/src/ggml-common.h rename to ggml_llama/src/ggml-common.h diff --git a/ggml/src/ggml-cuda.cu b/ggml_llama/src/ggml-cuda.cu similarity index 100% rename from ggml/src/ggml-cuda.cu rename to ggml_llama/src/ggml-cuda.cu diff --git a/ggml/src/ggml-cuda/acc.cu b/ggml_llama/src/ggml-cuda/acc.cu similarity index 100% rename from ggml/src/ggml-cuda/acc.cu rename to ggml_llama/src/ggml-cuda/acc.cu diff --git a/ggml/src/ggml-cuda/acc.cuh b/ggml_llama/src/ggml-cuda/acc.cuh similarity index 100% rename from ggml/src/ggml-cuda/acc.cuh rename to ggml_llama/src/ggml-cuda/acc.cuh diff --git a/ggml/src/ggml-cuda/arange.cu b/ggml_llama/src/ggml-cuda/arange.cu similarity index 100% rename from ggml/src/ggml-cuda/arange.cu rename to ggml_llama/src/ggml-cuda/arange.cu diff --git a/ggml/src/ggml-cuda/arange.cuh b/ggml_llama/src/ggml-cuda/arange.cuh similarity index 100% rename from ggml/src/ggml-cuda/arange.cuh rename to ggml_llama/src/ggml-cuda/arange.cuh diff --git a/ggml/src/ggml-cuda/argsort.cu b/ggml_llama/src/ggml-cuda/argsort.cu similarity index 100% rename from ggml/src/ggml-cuda/argsort.cu rename to ggml_llama/src/ggml-cuda/argsort.cu diff --git a/ggml/src/ggml-cuda/argsort.cuh b/ggml_llama/src/ggml-cuda/argsort.cuh similarity index 100% rename from ggml/src/ggml-cuda/argsort.cuh rename to ggml_llama/src/ggml-cuda/argsort.cuh diff --git a/ggml/src/ggml-cuda/binbcast.cu b/ggml_llama/src/ggml-cuda/binbcast.cu similarity index 100% rename from ggml/src/ggml-cuda/binbcast.cu rename to ggml_llama/src/ggml-cuda/binbcast.cu diff --git a/ggml/src/ggml-cuda/binbcast.cuh b/ggml_llama/src/ggml-cuda/binbcast.cuh similarity index 100% rename from ggml/src/ggml-cuda/binbcast.cuh rename to ggml_llama/src/ggml-cuda/binbcast.cuh diff --git a/ggml/src/ggml-cuda/clamp.cu b/ggml_llama/src/ggml-cuda/clamp.cu similarity index 100% rename from ggml/src/ggml-cuda/clamp.cu rename to ggml_llama/src/ggml-cuda/clamp.cu diff --git a/ggml/src/ggml-cuda/clamp.cuh b/ggml_llama/src/ggml-cuda/clamp.cuh similarity index 100% rename from ggml/src/ggml-cuda/clamp.cuh rename to ggml_llama/src/ggml-cuda/clamp.cuh diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml_llama/src/ggml-cuda/common.cuh similarity index 100% rename from ggml/src/ggml-cuda/common.cuh rename to ggml_llama/src/ggml-cuda/common.cuh diff --git a/ggml/src/ggml-cuda/concat.cu b/ggml_llama/src/ggml-cuda/concat.cu similarity index 100% rename from ggml/src/ggml-cuda/concat.cu rename to ggml_llama/src/ggml-cuda/concat.cu diff --git a/ggml/src/ggml-cuda/concat.cuh b/ggml_llama/src/ggml-cuda/concat.cuh similarity index 100% rename from ggml/src/ggml-cuda/concat.cuh rename to ggml_llama/src/ggml-cuda/concat.cuh diff --git a/ggml/src/ggml-cuda/conv-transpose-1d.cu b/ggml_llama/src/ggml-cuda/conv-transpose-1d.cu similarity index 100% rename from ggml/src/ggml-cuda/conv-transpose-1d.cu rename to ggml_llama/src/ggml-cuda/conv-transpose-1d.cu diff --git a/ggml/src/ggml-cuda/conv-transpose-1d.cuh b/ggml_llama/src/ggml-cuda/conv-transpose-1d.cuh similarity index 100% rename from ggml/src/ggml-cuda/conv-transpose-1d.cuh rename to ggml_llama/src/ggml-cuda/conv-transpose-1d.cuh diff --git a/ggml/src/ggml-cuda/convert.cu b/ggml_llama/src/ggml-cuda/convert.cu similarity index 100% rename from ggml/src/ggml-cuda/convert.cu rename to ggml_llama/src/ggml-cuda/convert.cu diff --git a/ggml/src/ggml-cuda/convert.cuh b/ggml_llama/src/ggml-cuda/convert.cuh similarity index 100% rename from ggml/src/ggml-cuda/convert.cuh rename to ggml_llama/src/ggml-cuda/convert.cuh diff --git a/ggml/src/ggml-cuda/cpy.cu b/ggml_llama/src/ggml-cuda/cpy.cu similarity index 100% rename from ggml/src/ggml-cuda/cpy.cu rename to ggml_llama/src/ggml-cuda/cpy.cu diff --git a/ggml/src/ggml-cuda/cpy.cuh b/ggml_llama/src/ggml-cuda/cpy.cuh similarity index 100% rename from ggml/src/ggml-cuda/cpy.cuh rename to ggml_llama/src/ggml-cuda/cpy.cuh diff --git a/ggml/src/ggml-cuda/cross-entropy-loss.cu b/ggml_llama/src/ggml-cuda/cross-entropy-loss.cu similarity index 100% rename from ggml/src/ggml-cuda/cross-entropy-loss.cu rename to ggml_llama/src/ggml-cuda/cross-entropy-loss.cu diff --git a/ggml/src/ggml-cuda/cross-entropy-loss.cuh b/ggml_llama/src/ggml-cuda/cross-entropy-loss.cuh similarity index 100% rename from ggml/src/ggml-cuda/cross-entropy-loss.cuh rename to ggml_llama/src/ggml-cuda/cross-entropy-loss.cuh diff --git a/ggml/src/ggml-cuda/dequantize.cuh b/ggml_llama/src/ggml-cuda/dequantize.cuh similarity index 100% rename from ggml/src/ggml-cuda/dequantize.cuh rename to ggml_llama/src/ggml-cuda/dequantize.cuh diff --git a/ggml/src/ggml-cuda/diagmask.cu b/ggml_llama/src/ggml-cuda/diagmask.cu similarity index 100% rename from ggml/src/ggml-cuda/diagmask.cu rename to ggml_llama/src/ggml-cuda/diagmask.cu diff --git a/ggml/src/ggml-cuda/diagmask.cuh b/ggml_llama/src/ggml-cuda/diagmask.cuh similarity index 100% rename from ggml/src/ggml-cuda/diagmask.cuh rename to ggml_llama/src/ggml-cuda/diagmask.cuh diff --git a/ggml/src/ggml-cuda/dmmv.cu b/ggml_llama/src/ggml-cuda/dmmv.cu similarity index 100% rename from ggml/src/ggml-cuda/dmmv.cu rename to ggml_llama/src/ggml-cuda/dmmv.cu diff --git a/ggml/src/ggml-cuda/dmmv.cuh b/ggml_llama/src/ggml-cuda/dmmv.cuh similarity index 100% rename from ggml/src/ggml-cuda/dmmv.cuh rename to ggml_llama/src/ggml-cuda/dmmv.cuh diff --git a/ggml/src/ggml-cuda/fattn-common.cuh b/ggml_llama/src/ggml-cuda/fattn-common.cuh similarity index 100% rename from ggml/src/ggml-cuda/fattn-common.cuh rename to ggml_llama/src/ggml-cuda/fattn-common.cuh diff --git a/ggml/src/ggml-cuda/fattn-tile-f16.cu b/ggml_llama/src/ggml-cuda/fattn-tile-f16.cu similarity index 100% rename from ggml/src/ggml-cuda/fattn-tile-f16.cu rename to ggml_llama/src/ggml-cuda/fattn-tile-f16.cu diff --git a/ggml/src/ggml-cuda/fattn-tile-f16.cuh b/ggml_llama/src/ggml-cuda/fattn-tile-f16.cuh similarity index 100% rename from ggml/src/ggml-cuda/fattn-tile-f16.cuh rename to ggml_llama/src/ggml-cuda/fattn-tile-f16.cuh diff --git a/ggml/src/ggml-cuda/fattn-tile-f32.cu b/ggml_llama/src/ggml-cuda/fattn-tile-f32.cu similarity index 100% rename from ggml/src/ggml-cuda/fattn-tile-f32.cu rename to ggml_llama/src/ggml-cuda/fattn-tile-f32.cu diff --git a/ggml/src/ggml-cuda/fattn-tile-f32.cuh b/ggml_llama/src/ggml-cuda/fattn-tile-f32.cuh similarity index 100% rename from ggml/src/ggml-cuda/fattn-tile-f32.cuh rename to ggml_llama/src/ggml-cuda/fattn-tile-f32.cuh diff --git a/ggml/src/ggml-cuda/fattn-vec-f16.cuh b/ggml_llama/src/ggml-cuda/fattn-vec-f16.cuh similarity index 100% rename from ggml/src/ggml-cuda/fattn-vec-f16.cuh rename to ggml_llama/src/ggml-cuda/fattn-vec-f16.cuh diff --git a/ggml/src/ggml-cuda/fattn-vec-f32.cuh b/ggml_llama/src/ggml-cuda/fattn-vec-f32.cuh similarity index 100% rename from ggml/src/ggml-cuda/fattn-vec-f32.cuh rename to ggml_llama/src/ggml-cuda/fattn-vec-f32.cuh diff --git a/ggml/src/ggml-cuda/fattn-wmma-f16.cuh b/ggml_llama/src/ggml-cuda/fattn-wmma-f16.cuh similarity index 100% rename from ggml/src/ggml-cuda/fattn-wmma-f16.cuh rename to ggml_llama/src/ggml-cuda/fattn-wmma-f16.cuh diff --git a/ggml/src/ggml-cuda/fattn.cu b/ggml_llama/src/ggml-cuda/fattn.cu similarity index 100% rename from ggml/src/ggml-cuda/fattn.cu rename to ggml_llama/src/ggml-cuda/fattn.cu diff --git a/ggml/src/ggml-cuda/fattn.cuh b/ggml_llama/src/ggml-cuda/fattn.cuh similarity index 100% rename from ggml/src/ggml-cuda/fattn.cuh rename to ggml_llama/src/ggml-cuda/fattn.cuh diff --git a/ggml/src/ggml-cuda/getrows.cu b/ggml_llama/src/ggml-cuda/getrows.cu similarity index 100% rename from ggml/src/ggml-cuda/getrows.cu rename to ggml_llama/src/ggml-cuda/getrows.cu diff --git a/ggml/src/ggml-cuda/getrows.cuh b/ggml_llama/src/ggml-cuda/getrows.cuh similarity index 100% rename from ggml/src/ggml-cuda/getrows.cuh rename to ggml_llama/src/ggml-cuda/getrows.cuh diff --git a/ggml/src/ggml-cuda/im2col.cu b/ggml_llama/src/ggml-cuda/im2col.cu similarity index 100% rename from ggml/src/ggml-cuda/im2col.cu rename to ggml_llama/src/ggml-cuda/im2col.cu diff --git a/ggml/src/ggml-cuda/im2col.cuh b/ggml_llama/src/ggml-cuda/im2col.cuh similarity index 100% rename from ggml/src/ggml-cuda/im2col.cuh rename to ggml_llama/src/ggml-cuda/im2col.cuh diff --git a/ggml/src/ggml-cuda/mma.cuh b/ggml_llama/src/ggml-cuda/mma.cuh similarity index 100% rename from ggml/src/ggml-cuda/mma.cuh rename to ggml_llama/src/ggml-cuda/mma.cuh diff --git a/ggml/src/ggml-cuda/mmq.cu b/ggml_llama/src/ggml-cuda/mmq.cu similarity index 100% rename from ggml/src/ggml-cuda/mmq.cu rename to ggml_llama/src/ggml-cuda/mmq.cu diff --git a/ggml/src/ggml-cuda/mmq.cuh b/ggml_llama/src/ggml-cuda/mmq.cuh similarity index 100% rename from ggml/src/ggml-cuda/mmq.cuh rename to ggml_llama/src/ggml-cuda/mmq.cuh diff --git a/ggml/src/ggml-cuda/mmvq.cu b/ggml_llama/src/ggml-cuda/mmvq.cu similarity index 100% rename from ggml/src/ggml-cuda/mmvq.cu rename to ggml_llama/src/ggml-cuda/mmvq.cu diff --git a/ggml/src/ggml-cuda/mmvq.cuh b/ggml_llama/src/ggml-cuda/mmvq.cuh similarity index 100% rename from ggml/src/ggml-cuda/mmvq.cuh rename to ggml_llama/src/ggml-cuda/mmvq.cuh diff --git a/ggml/src/ggml-cuda/norm.cu b/ggml_llama/src/ggml-cuda/norm.cu similarity index 100% rename from ggml/src/ggml-cuda/norm.cu rename to ggml_llama/src/ggml-cuda/norm.cu diff --git a/ggml/src/ggml-cuda/norm.cuh b/ggml_llama/src/ggml-cuda/norm.cuh similarity index 100% rename from ggml/src/ggml-cuda/norm.cuh rename to ggml_llama/src/ggml-cuda/norm.cuh diff --git a/ggml/src/ggml-cuda/pad.cu b/ggml_llama/src/ggml-cuda/pad.cu similarity index 100% rename from ggml/src/ggml-cuda/pad.cu rename to ggml_llama/src/ggml-cuda/pad.cu diff --git a/ggml/src/ggml-cuda/pad.cuh b/ggml_llama/src/ggml-cuda/pad.cuh similarity index 100% rename from ggml/src/ggml-cuda/pad.cuh rename to ggml_llama/src/ggml-cuda/pad.cuh diff --git a/ggml/src/ggml-cuda/pool2d.cu b/ggml_llama/src/ggml-cuda/pool2d.cu similarity index 100% rename from ggml/src/ggml-cuda/pool2d.cu rename to ggml_llama/src/ggml-cuda/pool2d.cu diff --git a/ggml/src/ggml-cuda/pool2d.cuh b/ggml_llama/src/ggml-cuda/pool2d.cuh similarity index 100% rename from ggml/src/ggml-cuda/pool2d.cuh rename to ggml_llama/src/ggml-cuda/pool2d.cuh diff --git a/ggml/src/ggml-cuda/quantize.cu b/ggml_llama/src/ggml-cuda/quantize.cu similarity index 100% rename from ggml/src/ggml-cuda/quantize.cu rename to ggml_llama/src/ggml-cuda/quantize.cu diff --git a/ggml/src/ggml-cuda/quantize.cuh b/ggml_llama/src/ggml-cuda/quantize.cuh similarity index 100% rename from ggml/src/ggml-cuda/quantize.cuh rename to ggml_llama/src/ggml-cuda/quantize.cuh diff --git a/ggml/src/ggml-cuda/rope.cu b/ggml_llama/src/ggml-cuda/rope.cu similarity index 100% rename from ggml/src/ggml-cuda/rope.cu rename to ggml_llama/src/ggml-cuda/rope.cu diff --git a/ggml/src/ggml-cuda/rope.cuh b/ggml_llama/src/ggml-cuda/rope.cuh similarity index 100% rename from ggml/src/ggml-cuda/rope.cuh rename to ggml_llama/src/ggml-cuda/rope.cuh diff --git a/ggml/src/ggml-cuda/scale.cu b/ggml_llama/src/ggml-cuda/scale.cu similarity index 100% rename from ggml/src/ggml-cuda/scale.cu rename to ggml_llama/src/ggml-cuda/scale.cu diff --git a/ggml/src/ggml-cuda/scale.cuh b/ggml_llama/src/ggml-cuda/scale.cuh similarity index 100% rename from ggml/src/ggml-cuda/scale.cuh rename to ggml_llama/src/ggml-cuda/scale.cuh diff --git a/ggml/src/ggml-cuda/softmax.cu b/ggml_llama/src/ggml-cuda/softmax.cu similarity index 100% rename from ggml/src/ggml-cuda/softmax.cu rename to ggml_llama/src/ggml-cuda/softmax.cu diff --git a/ggml/src/ggml-cuda/softmax.cuh b/ggml_llama/src/ggml-cuda/softmax.cuh similarity index 100% rename from ggml/src/ggml-cuda/softmax.cuh rename to ggml_llama/src/ggml-cuda/softmax.cuh diff --git a/ggml/src/ggml-cuda/sumrows.cu b/ggml_llama/src/ggml-cuda/sumrows.cu similarity index 100% rename from ggml/src/ggml-cuda/sumrows.cu rename to ggml_llama/src/ggml-cuda/sumrows.cu diff --git a/ggml/src/ggml-cuda/sumrows.cuh b/ggml_llama/src/ggml-cuda/sumrows.cuh similarity index 100% rename from ggml/src/ggml-cuda/sumrows.cuh rename to ggml_llama/src/ggml-cuda/sumrows.cuh diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-f16.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q4_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q5_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-f16-q8_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-f16.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q4_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q5_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_0-q8_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-f16.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q4_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q5_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q4_1-q8_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-f16.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q4_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q5_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_0-q8_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-f16.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q4_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q5_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q5_1-q8_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-f16.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q4_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q5_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs128-q8_0-q8_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs256-f16-f16.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-f16.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q4_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q5_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f16-instance-hs64-f16-q8_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-f16.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q4_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q5_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-f16-q8_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-f16.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q4_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q5_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_0-q8_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-f16.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q4_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q5_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q4_1-q8_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-f16.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q4_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q5_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_0-q8_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-f16.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q4_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q5_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q5_1-q8_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-f16.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q4_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q5_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs128-q8_0-q8_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs256-f16-f16.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-f16.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q4_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q5_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-vec-f32-instance-hs64-f16-q8_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb16.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqfloat-cpb32.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb16.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb32.cu diff --git a/ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu b/ggml_llama/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu rename to ggml_llama/src/ggml-cuda/template-instances/fattn-wmma-f16-instance-kqhalf-cpb8.cu diff --git a/ggml/src/ggml-cuda/template-instances/generate_cu_files.py b/ggml_llama/src/ggml-cuda/template-instances/generate_cu_files.py similarity index 100% rename from ggml/src/ggml-cuda/template-instances/generate_cu_files.py rename to ggml_llama/src/ggml-cuda/template-instances/generate_cu_files.py diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu b/ggml_llama/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu rename to ggml_llama/src/ggml-cuda/template-instances/mmq-instance-iq1_s.cu diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu b/ggml_llama/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu rename to ggml_llama/src/ggml-cuda/template-instances/mmq-instance-iq2_s.cu diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu b/ggml_llama/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu rename to ggml_llama/src/ggml-cuda/template-instances/mmq-instance-iq2_xs.cu diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu b/ggml_llama/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu rename to ggml_llama/src/ggml-cuda/template-instances/mmq-instance-iq2_xxs.cu diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu b/ggml_llama/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu rename to ggml_llama/src/ggml-cuda/template-instances/mmq-instance-iq3_s.cu diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu b/ggml_llama/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu rename to ggml_llama/src/ggml-cuda/template-instances/mmq-instance-iq3_xxs.cu diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu b/ggml_llama/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu rename to ggml_llama/src/ggml-cuda/template-instances/mmq-instance-iq4_nl.cu diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu b/ggml_llama/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu rename to ggml_llama/src/ggml-cuda/template-instances/mmq-instance-iq4_xs.cu diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu b/ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu rename to ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q2_k.cu diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu b/ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu rename to ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q3_k.cu diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu b/ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q4_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu b/ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q4_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu b/ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu rename to ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q4_k.cu diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu b/ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q5_0.cu diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu b/ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu rename to ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q5_1.cu diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu b/ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu rename to ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q5_k.cu diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu b/ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu rename to ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q6_k.cu diff --git a/ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu b/ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu similarity index 100% rename from ggml/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu rename to ggml_llama/src/ggml-cuda/template-instances/mmq-instance-q8_0.cu diff --git a/ggml/src/ggml-cuda/tsembd.cu b/ggml_llama/src/ggml-cuda/tsembd.cu similarity index 100% rename from ggml/src/ggml-cuda/tsembd.cu rename to ggml_llama/src/ggml-cuda/tsembd.cu diff --git a/ggml/src/ggml-cuda/tsembd.cuh b/ggml_llama/src/ggml-cuda/tsembd.cuh similarity index 100% rename from ggml/src/ggml-cuda/tsembd.cuh rename to ggml_llama/src/ggml-cuda/tsembd.cuh diff --git a/ggml/src/ggml-cuda/unary.cu b/ggml_llama/src/ggml-cuda/unary.cu similarity index 100% rename from ggml/src/ggml-cuda/unary.cu rename to ggml_llama/src/ggml-cuda/unary.cu diff --git a/ggml/src/ggml-cuda/unary.cuh b/ggml_llama/src/ggml-cuda/unary.cuh similarity index 100% rename from ggml/src/ggml-cuda/unary.cuh rename to ggml_llama/src/ggml-cuda/unary.cuh diff --git a/ggml/src/ggml-cuda/upscale.cu b/ggml_llama/src/ggml-cuda/upscale.cu similarity index 100% rename from ggml/src/ggml-cuda/upscale.cu rename to ggml_llama/src/ggml-cuda/upscale.cu diff --git a/ggml/src/ggml-cuda/upscale.cuh b/ggml_llama/src/ggml-cuda/upscale.cuh similarity index 100% rename from ggml/src/ggml-cuda/upscale.cuh rename to ggml_llama/src/ggml-cuda/upscale.cuh diff --git a/ggml/src/ggml-cuda/vecdotq.cuh b/ggml_llama/src/ggml-cuda/vecdotq.cuh similarity index 100% rename from ggml/src/ggml-cuda/vecdotq.cuh rename to ggml_llama/src/ggml-cuda/vecdotq.cuh diff --git a/ggml/src/ggml-cuda/vendors/cuda.h b/ggml_llama/src/ggml-cuda/vendors/cuda.h similarity index 100% rename from ggml/src/ggml-cuda/vendors/cuda.h rename to ggml_llama/src/ggml-cuda/vendors/cuda.h diff --git a/ggml/src/ggml-cuda/vendors/hip.h b/ggml_llama/src/ggml-cuda/vendors/hip.h similarity index 100% rename from ggml/src/ggml-cuda/vendors/hip.h rename to ggml_llama/src/ggml-cuda/vendors/hip.h diff --git a/ggml/src/ggml-cuda/vendors/musa.h b/ggml_llama/src/ggml-cuda/vendors/musa.h similarity index 100% rename from ggml/src/ggml-cuda/vendors/musa.h rename to ggml_llama/src/ggml-cuda/vendors/musa.h diff --git a/ggml/src/ggml-impl.h b/ggml_llama/src/ggml-impl.h similarity index 100% rename from ggml/src/ggml-impl.h rename to ggml_llama/src/ggml-impl.h diff --git a/ggml/src/ggml-kompute.cpp b/ggml_llama/src/ggml-kompute.cpp similarity index 100% rename from ggml/src/ggml-kompute.cpp rename to ggml_llama/src/ggml-kompute.cpp diff --git a/ggml/src/ggml-metal.m b/ggml_llama/src/ggml-metal.m similarity index 100% rename from ggml/src/ggml-metal.m rename to ggml_llama/src/ggml-metal.m diff --git a/ggml/src/ggml-metal.metal b/ggml_llama/src/ggml-metal.metal similarity index 100% rename from ggml/src/ggml-metal.metal rename to ggml_llama/src/ggml-metal.metal diff --git a/ggml/src/ggml-quants.c b/ggml_llama/src/ggml-quants.c similarity index 100% rename from ggml/src/ggml-quants.c rename to ggml_llama/src/ggml-quants.c diff --git a/ggml/src/ggml-quants.h b/ggml_llama/src/ggml-quants.h similarity index 100% rename from ggml/src/ggml-quants.h rename to ggml_llama/src/ggml-quants.h diff --git a/ggml/src/ggml-rpc.cpp b/ggml_llama/src/ggml-rpc.cpp similarity index 100% rename from ggml/src/ggml-rpc.cpp rename to ggml_llama/src/ggml-rpc.cpp diff --git a/ggml/src/ggml-sycl.cpp b/ggml_llama/src/ggml-sycl.cpp similarity index 100% rename from ggml/src/ggml-sycl.cpp rename to ggml_llama/src/ggml-sycl.cpp diff --git a/ggml/src/ggml-sycl/backend.hpp b/ggml_llama/src/ggml-sycl/backend.hpp similarity index 100% rename from ggml/src/ggml-sycl/backend.hpp rename to ggml_llama/src/ggml-sycl/backend.hpp diff --git a/ggml/src/ggml-sycl/common.cpp b/ggml_llama/src/ggml-sycl/common.cpp similarity index 100% rename from ggml/src/ggml-sycl/common.cpp rename to ggml_llama/src/ggml-sycl/common.cpp diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml_llama/src/ggml-sycl/common.hpp similarity index 100% rename from ggml/src/ggml-sycl/common.hpp rename to ggml_llama/src/ggml-sycl/common.hpp diff --git a/ggml/src/ggml-sycl/concat.cpp b/ggml_llama/src/ggml-sycl/concat.cpp similarity index 100% rename from ggml/src/ggml-sycl/concat.cpp rename to ggml_llama/src/ggml-sycl/concat.cpp diff --git a/ggml/src/ggml-sycl/concat.hpp b/ggml_llama/src/ggml-sycl/concat.hpp similarity index 100% rename from ggml/src/ggml-sycl/concat.hpp rename to ggml_llama/src/ggml-sycl/concat.hpp diff --git a/ggml/src/ggml-sycl/conv.cpp b/ggml_llama/src/ggml-sycl/conv.cpp similarity index 100% rename from ggml/src/ggml-sycl/conv.cpp rename to ggml_llama/src/ggml-sycl/conv.cpp diff --git a/ggml/src/ggml-sycl/conv.hpp b/ggml_llama/src/ggml-sycl/conv.hpp similarity index 100% rename from ggml/src/ggml-sycl/conv.hpp rename to ggml_llama/src/ggml-sycl/conv.hpp diff --git a/ggml/src/ggml-sycl/convert.cpp b/ggml_llama/src/ggml-sycl/convert.cpp similarity index 100% rename from ggml/src/ggml-sycl/convert.cpp rename to ggml_llama/src/ggml-sycl/convert.cpp diff --git a/ggml/src/ggml-sycl/convert.hpp b/ggml_llama/src/ggml-sycl/convert.hpp similarity index 100% rename from ggml/src/ggml-sycl/convert.hpp rename to ggml_llama/src/ggml-sycl/convert.hpp diff --git a/ggml/src/ggml-sycl/dequantize.hpp b/ggml_llama/src/ggml-sycl/dequantize.hpp similarity index 100% rename from ggml/src/ggml-sycl/dequantize.hpp rename to ggml_llama/src/ggml-sycl/dequantize.hpp diff --git a/ggml/src/ggml-sycl/dmmv.cpp b/ggml_llama/src/ggml-sycl/dmmv.cpp similarity index 100% rename from ggml/src/ggml-sycl/dmmv.cpp rename to ggml_llama/src/ggml-sycl/dmmv.cpp diff --git a/ggml/src/ggml-sycl/dmmv.hpp b/ggml_llama/src/ggml-sycl/dmmv.hpp similarity index 100% rename from ggml/src/ggml-sycl/dmmv.hpp rename to ggml_llama/src/ggml-sycl/dmmv.hpp diff --git a/ggml/src/ggml-sycl/dpct/helper.hpp b/ggml_llama/src/ggml-sycl/dpct/helper.hpp similarity index 100% rename from ggml/src/ggml-sycl/dpct/helper.hpp rename to ggml_llama/src/ggml-sycl/dpct/helper.hpp diff --git a/ggml/src/ggml-sycl/gemm.hpp b/ggml_llama/src/ggml-sycl/gemm.hpp similarity index 100% rename from ggml/src/ggml-sycl/gemm.hpp rename to ggml_llama/src/ggml-sycl/gemm.hpp diff --git a/ggml/src/ggml-sycl/im2col.cpp b/ggml_llama/src/ggml-sycl/im2col.cpp similarity index 100% rename from ggml/src/ggml-sycl/im2col.cpp rename to ggml_llama/src/ggml-sycl/im2col.cpp diff --git a/ggml/src/ggml-sycl/im2col.hpp b/ggml_llama/src/ggml-sycl/im2col.hpp similarity index 100% rename from ggml/src/ggml-sycl/im2col.hpp rename to ggml_llama/src/ggml-sycl/im2col.hpp diff --git a/ggml/src/ggml-sycl/mmq.cpp b/ggml_llama/src/ggml-sycl/mmq.cpp similarity index 100% rename from ggml/src/ggml-sycl/mmq.cpp rename to ggml_llama/src/ggml-sycl/mmq.cpp diff --git a/ggml/src/ggml-sycl/mmq.hpp b/ggml_llama/src/ggml-sycl/mmq.hpp similarity index 100% rename from ggml/src/ggml-sycl/mmq.hpp rename to ggml_llama/src/ggml-sycl/mmq.hpp diff --git a/ggml/src/ggml-sycl/mmvq.cpp b/ggml_llama/src/ggml-sycl/mmvq.cpp similarity index 100% rename from ggml/src/ggml-sycl/mmvq.cpp rename to ggml_llama/src/ggml-sycl/mmvq.cpp diff --git a/ggml/src/ggml-sycl/mmvq.hpp b/ggml_llama/src/ggml-sycl/mmvq.hpp similarity index 100% rename from ggml/src/ggml-sycl/mmvq.hpp rename to ggml_llama/src/ggml-sycl/mmvq.hpp diff --git a/ggml/src/ggml-sycl/norm.cpp b/ggml_llama/src/ggml-sycl/norm.cpp similarity index 100% rename from ggml/src/ggml-sycl/norm.cpp rename to ggml_llama/src/ggml-sycl/norm.cpp diff --git a/ggml/src/ggml-sycl/norm.hpp b/ggml_llama/src/ggml-sycl/norm.hpp similarity index 100% rename from ggml/src/ggml-sycl/norm.hpp rename to ggml_llama/src/ggml-sycl/norm.hpp diff --git a/ggml/src/ggml-sycl/presets.hpp b/ggml_llama/src/ggml-sycl/presets.hpp similarity index 100% rename from ggml/src/ggml-sycl/presets.hpp rename to ggml_llama/src/ggml-sycl/presets.hpp diff --git a/ggml/src/ggml-sycl/rope.cpp b/ggml_llama/src/ggml-sycl/rope.cpp similarity index 100% rename from ggml/src/ggml-sycl/rope.cpp rename to ggml_llama/src/ggml-sycl/rope.cpp diff --git a/ggml/src/ggml-sycl/rope.hpp b/ggml_llama/src/ggml-sycl/rope.hpp similarity index 100% rename from ggml/src/ggml-sycl/rope.hpp rename to ggml_llama/src/ggml-sycl/rope.hpp diff --git a/ggml/src/ggml-sycl/softmax.cpp b/ggml_llama/src/ggml-sycl/softmax.cpp similarity index 100% rename from ggml/src/ggml-sycl/softmax.cpp rename to ggml_llama/src/ggml-sycl/softmax.cpp diff --git a/ggml/src/ggml-sycl/softmax.hpp b/ggml_llama/src/ggml-sycl/softmax.hpp similarity index 100% rename from ggml/src/ggml-sycl/softmax.hpp rename to ggml_llama/src/ggml-sycl/softmax.hpp diff --git a/ggml/src/ggml-sycl/tsembd.cpp b/ggml_llama/src/ggml-sycl/tsembd.cpp similarity index 100% rename from ggml/src/ggml-sycl/tsembd.cpp rename to ggml_llama/src/ggml-sycl/tsembd.cpp diff --git a/ggml/src/ggml-sycl/tsembd.hpp b/ggml_llama/src/ggml-sycl/tsembd.hpp similarity index 100% rename from ggml/src/ggml-sycl/tsembd.hpp rename to ggml_llama/src/ggml-sycl/tsembd.hpp diff --git a/ggml/src/ggml-sycl/vecdotq.hpp b/ggml_llama/src/ggml-sycl/vecdotq.hpp similarity index 100% rename from ggml/src/ggml-sycl/vecdotq.hpp rename to ggml_llama/src/ggml-sycl/vecdotq.hpp diff --git a/ggml/src/ggml-vulkan.cpp b/ggml_llama/src/ggml-vulkan.cpp similarity index 100% rename from ggml/src/ggml-vulkan.cpp rename to ggml_llama/src/ggml-vulkan.cpp diff --git a/ggml/src/ggml.c b/ggml_llama/src/ggml.c similarity index 100% rename from ggml/src/ggml.c rename to ggml_llama/src/ggml.c diff --git a/ggml/src/kompute-shaders/common.comp b/ggml_llama/src/kompute-shaders/common.comp similarity index 100% rename from ggml/src/kompute-shaders/common.comp rename to ggml_llama/src/kompute-shaders/common.comp diff --git a/ggml/src/kompute-shaders/op_add.comp b/ggml_llama/src/kompute-shaders/op_add.comp similarity index 100% rename from ggml/src/kompute-shaders/op_add.comp rename to ggml_llama/src/kompute-shaders/op_add.comp diff --git a/ggml/src/kompute-shaders/op_addrow.comp b/ggml_llama/src/kompute-shaders/op_addrow.comp similarity index 100% rename from ggml/src/kompute-shaders/op_addrow.comp rename to ggml_llama/src/kompute-shaders/op_addrow.comp diff --git a/ggml/src/kompute-shaders/op_cpy_f16_f16.comp b/ggml_llama/src/kompute-shaders/op_cpy_f16_f16.comp similarity index 100% rename from ggml/src/kompute-shaders/op_cpy_f16_f16.comp rename to ggml_llama/src/kompute-shaders/op_cpy_f16_f16.comp diff --git a/ggml/src/kompute-shaders/op_cpy_f16_f32.comp b/ggml_llama/src/kompute-shaders/op_cpy_f16_f32.comp similarity index 100% rename from ggml/src/kompute-shaders/op_cpy_f16_f32.comp rename to ggml_llama/src/kompute-shaders/op_cpy_f16_f32.comp diff --git a/ggml/src/kompute-shaders/op_cpy_f32_f16.comp b/ggml_llama/src/kompute-shaders/op_cpy_f32_f16.comp similarity index 100% rename from ggml/src/kompute-shaders/op_cpy_f32_f16.comp rename to ggml_llama/src/kompute-shaders/op_cpy_f32_f16.comp diff --git a/ggml/src/kompute-shaders/op_cpy_f32_f32.comp b/ggml_llama/src/kompute-shaders/op_cpy_f32_f32.comp similarity index 100% rename from ggml/src/kompute-shaders/op_cpy_f32_f32.comp rename to ggml_llama/src/kompute-shaders/op_cpy_f32_f32.comp diff --git a/ggml/src/kompute-shaders/op_diagmask.comp b/ggml_llama/src/kompute-shaders/op_diagmask.comp similarity index 100% rename from ggml/src/kompute-shaders/op_diagmask.comp rename to ggml_llama/src/kompute-shaders/op_diagmask.comp diff --git a/ggml/src/kompute-shaders/op_gelu.comp b/ggml_llama/src/kompute-shaders/op_gelu.comp similarity index 100% rename from ggml/src/kompute-shaders/op_gelu.comp rename to ggml_llama/src/kompute-shaders/op_gelu.comp diff --git a/ggml/src/kompute-shaders/op_getrows.comp b/ggml_llama/src/kompute-shaders/op_getrows.comp similarity index 100% rename from ggml/src/kompute-shaders/op_getrows.comp rename to ggml_llama/src/kompute-shaders/op_getrows.comp diff --git a/ggml/src/kompute-shaders/op_getrows_f16.comp b/ggml_llama/src/kompute-shaders/op_getrows_f16.comp similarity index 100% rename from ggml/src/kompute-shaders/op_getrows_f16.comp rename to ggml_llama/src/kompute-shaders/op_getrows_f16.comp diff --git a/ggml/src/kompute-shaders/op_getrows_f32.comp b/ggml_llama/src/kompute-shaders/op_getrows_f32.comp similarity index 100% rename from ggml/src/kompute-shaders/op_getrows_f32.comp rename to ggml_llama/src/kompute-shaders/op_getrows_f32.comp diff --git a/ggml/src/kompute-shaders/op_getrows_q4_0.comp b/ggml_llama/src/kompute-shaders/op_getrows_q4_0.comp similarity index 100% rename from ggml/src/kompute-shaders/op_getrows_q4_0.comp rename to ggml_llama/src/kompute-shaders/op_getrows_q4_0.comp diff --git a/ggml/src/kompute-shaders/op_getrows_q4_1.comp b/ggml_llama/src/kompute-shaders/op_getrows_q4_1.comp similarity index 100% rename from ggml/src/kompute-shaders/op_getrows_q4_1.comp rename to ggml_llama/src/kompute-shaders/op_getrows_q4_1.comp diff --git a/ggml/src/kompute-shaders/op_getrows_q6_k.comp b/ggml_llama/src/kompute-shaders/op_getrows_q6_k.comp similarity index 100% rename from ggml/src/kompute-shaders/op_getrows_q6_k.comp rename to ggml_llama/src/kompute-shaders/op_getrows_q6_k.comp diff --git a/ggml/src/kompute-shaders/op_mul.comp b/ggml_llama/src/kompute-shaders/op_mul.comp similarity index 100% rename from ggml/src/kompute-shaders/op_mul.comp rename to ggml_llama/src/kompute-shaders/op_mul.comp diff --git a/ggml/src/kompute-shaders/op_mul_mat_f16.comp b/ggml_llama/src/kompute-shaders/op_mul_mat_f16.comp similarity index 100% rename from ggml/src/kompute-shaders/op_mul_mat_f16.comp rename to ggml_llama/src/kompute-shaders/op_mul_mat_f16.comp diff --git a/ggml/src/kompute-shaders/op_mul_mat_mat_f32.comp b/ggml_llama/src/kompute-shaders/op_mul_mat_mat_f32.comp similarity index 100% rename from ggml/src/kompute-shaders/op_mul_mat_mat_f32.comp rename to ggml_llama/src/kompute-shaders/op_mul_mat_mat_f32.comp diff --git a/ggml/src/kompute-shaders/op_mul_mat_q4_0.comp b/ggml_llama/src/kompute-shaders/op_mul_mat_q4_0.comp similarity index 100% rename from ggml/src/kompute-shaders/op_mul_mat_q4_0.comp rename to ggml_llama/src/kompute-shaders/op_mul_mat_q4_0.comp diff --git a/ggml/src/kompute-shaders/op_mul_mat_q4_1.comp b/ggml_llama/src/kompute-shaders/op_mul_mat_q4_1.comp similarity index 100% rename from ggml/src/kompute-shaders/op_mul_mat_q4_1.comp rename to ggml_llama/src/kompute-shaders/op_mul_mat_q4_1.comp diff --git a/ggml/src/kompute-shaders/op_mul_mat_q6_k.comp b/ggml_llama/src/kompute-shaders/op_mul_mat_q6_k.comp similarity index 100% rename from ggml/src/kompute-shaders/op_mul_mat_q6_k.comp rename to ggml_llama/src/kompute-shaders/op_mul_mat_q6_k.comp diff --git a/ggml/src/kompute-shaders/op_mul_mat_q8_0.comp b/ggml_llama/src/kompute-shaders/op_mul_mat_q8_0.comp similarity index 100% rename from ggml/src/kompute-shaders/op_mul_mat_q8_0.comp rename to ggml_llama/src/kompute-shaders/op_mul_mat_q8_0.comp diff --git a/ggml/src/kompute-shaders/op_mul_mv_q_n.comp b/ggml_llama/src/kompute-shaders/op_mul_mv_q_n.comp similarity index 100% rename from ggml/src/kompute-shaders/op_mul_mv_q_n.comp rename to ggml_llama/src/kompute-shaders/op_mul_mv_q_n.comp diff --git a/ggml/src/kompute-shaders/op_mul_mv_q_n_pre.comp b/ggml_llama/src/kompute-shaders/op_mul_mv_q_n_pre.comp similarity index 100% rename from ggml/src/kompute-shaders/op_mul_mv_q_n_pre.comp rename to ggml_llama/src/kompute-shaders/op_mul_mv_q_n_pre.comp diff --git a/ggml/src/kompute-shaders/op_norm.comp b/ggml_llama/src/kompute-shaders/op_norm.comp similarity index 100% rename from ggml/src/kompute-shaders/op_norm.comp rename to ggml_llama/src/kompute-shaders/op_norm.comp diff --git a/ggml/src/kompute-shaders/op_relu.comp b/ggml_llama/src/kompute-shaders/op_relu.comp similarity index 100% rename from ggml/src/kompute-shaders/op_relu.comp rename to ggml_llama/src/kompute-shaders/op_relu.comp diff --git a/ggml/src/kompute-shaders/op_rmsnorm.comp b/ggml_llama/src/kompute-shaders/op_rmsnorm.comp similarity index 100% rename from ggml/src/kompute-shaders/op_rmsnorm.comp rename to ggml_llama/src/kompute-shaders/op_rmsnorm.comp diff --git a/ggml/src/kompute-shaders/op_rope_f16.comp b/ggml_llama/src/kompute-shaders/op_rope_f16.comp similarity index 100% rename from ggml/src/kompute-shaders/op_rope_f16.comp rename to ggml_llama/src/kompute-shaders/op_rope_f16.comp diff --git a/ggml/src/kompute-shaders/op_rope_f32.comp b/ggml_llama/src/kompute-shaders/op_rope_f32.comp similarity index 100% rename from ggml/src/kompute-shaders/op_rope_f32.comp rename to ggml_llama/src/kompute-shaders/op_rope_f32.comp diff --git a/ggml/src/kompute-shaders/op_scale.comp b/ggml_llama/src/kompute-shaders/op_scale.comp similarity index 100% rename from ggml/src/kompute-shaders/op_scale.comp rename to ggml_llama/src/kompute-shaders/op_scale.comp diff --git a/ggml/src/kompute-shaders/op_scale_8.comp b/ggml_llama/src/kompute-shaders/op_scale_8.comp similarity index 100% rename from ggml/src/kompute-shaders/op_scale_8.comp rename to ggml_llama/src/kompute-shaders/op_scale_8.comp diff --git a/ggml/src/kompute-shaders/op_silu.comp b/ggml_llama/src/kompute-shaders/op_silu.comp similarity index 100% rename from ggml/src/kompute-shaders/op_silu.comp rename to ggml_llama/src/kompute-shaders/op_silu.comp diff --git a/ggml/src/kompute-shaders/op_softmax.comp b/ggml_llama/src/kompute-shaders/op_softmax.comp similarity index 100% rename from ggml/src/kompute-shaders/op_softmax.comp rename to ggml_llama/src/kompute-shaders/op_softmax.comp diff --git a/ggml/src/kompute-shaders/rope_common.comp b/ggml_llama/src/kompute-shaders/rope_common.comp similarity index 100% rename from ggml/src/kompute-shaders/rope_common.comp rename to ggml_llama/src/kompute-shaders/rope_common.comp diff --git a/ggml/src/llamafile/sgemm.cpp b/ggml_llama/src/llamafile/sgemm.cpp similarity index 100% rename from ggml/src/llamafile/sgemm.cpp rename to ggml_llama/src/llamafile/sgemm.cpp diff --git a/ggml/src/llamafile/sgemm.h b/ggml_llama/src/llamafile/sgemm.h similarity index 100% rename from ggml/src/llamafile/sgemm.h rename to ggml_llama/src/llamafile/sgemm.h diff --git a/ggml/src/vulkan-shaders/CMakeLists.txt b/ggml_llama/src/vulkan-shaders/CMakeLists.txt similarity index 100% rename from ggml/src/vulkan-shaders/CMakeLists.txt rename to ggml_llama/src/vulkan-shaders/CMakeLists.txt diff --git a/ggml/src/vulkan-shaders/acc.comp b/ggml_llama/src/vulkan-shaders/acc.comp similarity index 100% rename from ggml/src/vulkan-shaders/acc.comp rename to ggml_llama/src/vulkan-shaders/acc.comp diff --git a/ggml/src/vulkan-shaders/add.comp b/ggml_llama/src/vulkan-shaders/add.comp similarity index 100% rename from ggml/src/vulkan-shaders/add.comp rename to ggml_llama/src/vulkan-shaders/add.comp diff --git a/ggml/src/vulkan-shaders/argsort.comp b/ggml_llama/src/vulkan-shaders/argsort.comp similarity index 100% rename from ggml/src/vulkan-shaders/argsort.comp rename to ggml_llama/src/vulkan-shaders/argsort.comp diff --git a/ggml/src/vulkan-shaders/clamp.comp b/ggml_llama/src/vulkan-shaders/clamp.comp similarity index 100% rename from ggml/src/vulkan-shaders/clamp.comp rename to ggml_llama/src/vulkan-shaders/clamp.comp diff --git a/ggml/src/vulkan-shaders/concat.comp b/ggml_llama/src/vulkan-shaders/concat.comp similarity index 100% rename from ggml/src/vulkan-shaders/concat.comp rename to ggml_llama/src/vulkan-shaders/concat.comp diff --git a/ggml/src/vulkan-shaders/copy.comp b/ggml_llama/src/vulkan-shaders/copy.comp similarity index 100% rename from ggml/src/vulkan-shaders/copy.comp rename to ggml_llama/src/vulkan-shaders/copy.comp diff --git a/ggml/src/vulkan-shaders/cos.comp b/ggml_llama/src/vulkan-shaders/cos.comp similarity index 100% rename from ggml/src/vulkan-shaders/cos.comp rename to ggml_llama/src/vulkan-shaders/cos.comp diff --git a/ggml/src/vulkan-shaders/dequant_f32.comp b/ggml_llama/src/vulkan-shaders/dequant_f32.comp similarity index 100% rename from ggml/src/vulkan-shaders/dequant_f32.comp rename to ggml_llama/src/vulkan-shaders/dequant_f32.comp diff --git a/ggml/src/vulkan-shaders/dequant_funcs.comp b/ggml_llama/src/vulkan-shaders/dequant_funcs.comp similarity index 100% rename from ggml/src/vulkan-shaders/dequant_funcs.comp rename to ggml_llama/src/vulkan-shaders/dequant_funcs.comp diff --git a/ggml/src/vulkan-shaders/dequant_head.comp b/ggml_llama/src/vulkan-shaders/dequant_head.comp similarity index 100% rename from ggml/src/vulkan-shaders/dequant_head.comp rename to ggml_llama/src/vulkan-shaders/dequant_head.comp diff --git a/ggml/src/vulkan-shaders/dequant_iq4_nl.comp b/ggml_llama/src/vulkan-shaders/dequant_iq4_nl.comp similarity index 100% rename from ggml/src/vulkan-shaders/dequant_iq4_nl.comp rename to ggml_llama/src/vulkan-shaders/dequant_iq4_nl.comp diff --git a/ggml/src/vulkan-shaders/dequant_q2_k.comp b/ggml_llama/src/vulkan-shaders/dequant_q2_k.comp similarity index 100% rename from ggml/src/vulkan-shaders/dequant_q2_k.comp rename to ggml_llama/src/vulkan-shaders/dequant_q2_k.comp diff --git a/ggml/src/vulkan-shaders/dequant_q3_k.comp b/ggml_llama/src/vulkan-shaders/dequant_q3_k.comp similarity index 100% rename from ggml/src/vulkan-shaders/dequant_q3_k.comp rename to ggml_llama/src/vulkan-shaders/dequant_q3_k.comp diff --git a/ggml/src/vulkan-shaders/dequant_q4_0.comp b/ggml_llama/src/vulkan-shaders/dequant_q4_0.comp similarity index 100% rename from ggml/src/vulkan-shaders/dequant_q4_0.comp rename to ggml_llama/src/vulkan-shaders/dequant_q4_0.comp diff --git a/ggml/src/vulkan-shaders/dequant_q4_1.comp b/ggml_llama/src/vulkan-shaders/dequant_q4_1.comp similarity index 100% rename from ggml/src/vulkan-shaders/dequant_q4_1.comp rename to ggml_llama/src/vulkan-shaders/dequant_q4_1.comp diff --git a/ggml/src/vulkan-shaders/dequant_q4_k.comp b/ggml_llama/src/vulkan-shaders/dequant_q4_k.comp similarity index 100% rename from ggml/src/vulkan-shaders/dequant_q4_k.comp rename to ggml_llama/src/vulkan-shaders/dequant_q4_k.comp diff --git a/ggml/src/vulkan-shaders/dequant_q5_0.comp b/ggml_llama/src/vulkan-shaders/dequant_q5_0.comp similarity index 100% rename from ggml/src/vulkan-shaders/dequant_q5_0.comp rename to ggml_llama/src/vulkan-shaders/dequant_q5_0.comp diff --git a/ggml/src/vulkan-shaders/dequant_q5_1.comp b/ggml_llama/src/vulkan-shaders/dequant_q5_1.comp similarity index 100% rename from ggml/src/vulkan-shaders/dequant_q5_1.comp rename to ggml_llama/src/vulkan-shaders/dequant_q5_1.comp diff --git a/ggml/src/vulkan-shaders/dequant_q5_k.comp b/ggml_llama/src/vulkan-shaders/dequant_q5_k.comp similarity index 100% rename from ggml/src/vulkan-shaders/dequant_q5_k.comp rename to ggml_llama/src/vulkan-shaders/dequant_q5_k.comp diff --git a/ggml/src/vulkan-shaders/dequant_q6_k.comp b/ggml_llama/src/vulkan-shaders/dequant_q6_k.comp similarity index 100% rename from ggml/src/vulkan-shaders/dequant_q6_k.comp rename to ggml_llama/src/vulkan-shaders/dequant_q6_k.comp diff --git a/ggml/src/vulkan-shaders/dequant_q8_0.comp b/ggml_llama/src/vulkan-shaders/dequant_q8_0.comp similarity index 100% rename from ggml/src/vulkan-shaders/dequant_q8_0.comp rename to ggml_llama/src/vulkan-shaders/dequant_q8_0.comp diff --git a/ggml/src/vulkan-shaders/diag_mask_inf.comp b/ggml_llama/src/vulkan-shaders/diag_mask_inf.comp similarity index 100% rename from ggml/src/vulkan-shaders/diag_mask_inf.comp rename to ggml_llama/src/vulkan-shaders/diag_mask_inf.comp diff --git a/ggml/src/vulkan-shaders/div.comp b/ggml_llama/src/vulkan-shaders/div.comp similarity index 100% rename from ggml/src/vulkan-shaders/div.comp rename to ggml_llama/src/vulkan-shaders/div.comp diff --git a/ggml/src/vulkan-shaders/gelu.comp b/ggml_llama/src/vulkan-shaders/gelu.comp similarity index 100% rename from ggml/src/vulkan-shaders/gelu.comp rename to ggml_llama/src/vulkan-shaders/gelu.comp diff --git a/ggml/src/vulkan-shaders/gelu_quick.comp b/ggml_llama/src/vulkan-shaders/gelu_quick.comp similarity index 100% rename from ggml/src/vulkan-shaders/gelu_quick.comp rename to ggml_llama/src/vulkan-shaders/gelu_quick.comp diff --git a/ggml/src/vulkan-shaders/generic_binary_head.comp b/ggml_llama/src/vulkan-shaders/generic_binary_head.comp similarity index 100% rename from ggml/src/vulkan-shaders/generic_binary_head.comp rename to ggml_llama/src/vulkan-shaders/generic_binary_head.comp diff --git a/ggml/src/vulkan-shaders/generic_head.comp b/ggml_llama/src/vulkan-shaders/generic_head.comp similarity index 100% rename from ggml/src/vulkan-shaders/generic_head.comp rename to ggml_llama/src/vulkan-shaders/generic_head.comp diff --git a/ggml/src/vulkan-shaders/generic_unary_head.comp b/ggml_llama/src/vulkan-shaders/generic_unary_head.comp similarity index 100% rename from ggml/src/vulkan-shaders/generic_unary_head.comp rename to ggml_llama/src/vulkan-shaders/generic_unary_head.comp diff --git a/ggml/src/vulkan-shaders/get_rows.comp b/ggml_llama/src/vulkan-shaders/get_rows.comp similarity index 100% rename from ggml/src/vulkan-shaders/get_rows.comp rename to ggml_llama/src/vulkan-shaders/get_rows.comp diff --git a/ggml/src/vulkan-shaders/get_rows_quant.comp b/ggml_llama/src/vulkan-shaders/get_rows_quant.comp similarity index 100% rename from ggml/src/vulkan-shaders/get_rows_quant.comp rename to ggml_llama/src/vulkan-shaders/get_rows_quant.comp diff --git a/ggml/src/vulkan-shaders/group_norm.comp b/ggml_llama/src/vulkan-shaders/group_norm.comp similarity index 100% rename from ggml/src/vulkan-shaders/group_norm.comp rename to ggml_llama/src/vulkan-shaders/group_norm.comp diff --git a/ggml/src/vulkan-shaders/im2col.comp b/ggml_llama/src/vulkan-shaders/im2col.comp similarity index 100% rename from ggml/src/vulkan-shaders/im2col.comp rename to ggml_llama/src/vulkan-shaders/im2col.comp diff --git a/ggml/src/vulkan-shaders/leaky_relu.comp b/ggml_llama/src/vulkan-shaders/leaky_relu.comp similarity index 100% rename from ggml/src/vulkan-shaders/leaky_relu.comp rename to ggml_llama/src/vulkan-shaders/leaky_relu.comp diff --git a/ggml/src/vulkan-shaders/mul.comp b/ggml_llama/src/vulkan-shaders/mul.comp similarity index 100% rename from ggml/src/vulkan-shaders/mul.comp rename to ggml_llama/src/vulkan-shaders/mul.comp diff --git a/ggml/src/vulkan-shaders/mul_mat_split_k_reduce.comp b/ggml_llama/src/vulkan-shaders/mul_mat_split_k_reduce.comp similarity index 100% rename from ggml/src/vulkan-shaders/mul_mat_split_k_reduce.comp rename to ggml_llama/src/vulkan-shaders/mul_mat_split_k_reduce.comp diff --git a/ggml/src/vulkan-shaders/mul_mat_vec.comp b/ggml_llama/src/vulkan-shaders/mul_mat_vec.comp similarity index 100% rename from ggml/src/vulkan-shaders/mul_mat_vec.comp rename to ggml_llama/src/vulkan-shaders/mul_mat_vec.comp diff --git a/ggml/src/vulkan-shaders/mul_mat_vec_base.comp b/ggml_llama/src/vulkan-shaders/mul_mat_vec_base.comp similarity index 100% rename from ggml/src/vulkan-shaders/mul_mat_vec_base.comp rename to ggml_llama/src/vulkan-shaders/mul_mat_vec_base.comp diff --git a/ggml/src/vulkan-shaders/mul_mat_vec_nc.comp b/ggml_llama/src/vulkan-shaders/mul_mat_vec_nc.comp similarity index 100% rename from ggml/src/vulkan-shaders/mul_mat_vec_nc.comp rename to ggml_llama/src/vulkan-shaders/mul_mat_vec_nc.comp diff --git a/ggml/src/vulkan-shaders/mul_mat_vec_p021.comp b/ggml_llama/src/vulkan-shaders/mul_mat_vec_p021.comp similarity index 100% rename from ggml/src/vulkan-shaders/mul_mat_vec_p021.comp rename to ggml_llama/src/vulkan-shaders/mul_mat_vec_p021.comp diff --git a/ggml/src/vulkan-shaders/mul_mat_vec_q2_k.comp b/ggml_llama/src/vulkan-shaders/mul_mat_vec_q2_k.comp similarity index 100% rename from ggml/src/vulkan-shaders/mul_mat_vec_q2_k.comp rename to ggml_llama/src/vulkan-shaders/mul_mat_vec_q2_k.comp diff --git a/ggml/src/vulkan-shaders/mul_mat_vec_q3_k.comp b/ggml_llama/src/vulkan-shaders/mul_mat_vec_q3_k.comp similarity index 100% rename from ggml/src/vulkan-shaders/mul_mat_vec_q3_k.comp rename to ggml_llama/src/vulkan-shaders/mul_mat_vec_q3_k.comp diff --git a/ggml/src/vulkan-shaders/mul_mat_vec_q4_k.comp b/ggml_llama/src/vulkan-shaders/mul_mat_vec_q4_k.comp similarity index 100% rename from ggml/src/vulkan-shaders/mul_mat_vec_q4_k.comp rename to ggml_llama/src/vulkan-shaders/mul_mat_vec_q4_k.comp diff --git a/ggml/src/vulkan-shaders/mul_mat_vec_q5_k.comp b/ggml_llama/src/vulkan-shaders/mul_mat_vec_q5_k.comp similarity index 100% rename from ggml/src/vulkan-shaders/mul_mat_vec_q5_k.comp rename to ggml_llama/src/vulkan-shaders/mul_mat_vec_q5_k.comp diff --git a/ggml/src/vulkan-shaders/mul_mat_vec_q6_k.comp b/ggml_llama/src/vulkan-shaders/mul_mat_vec_q6_k.comp similarity index 100% rename from ggml/src/vulkan-shaders/mul_mat_vec_q6_k.comp rename to ggml_llama/src/vulkan-shaders/mul_mat_vec_q6_k.comp diff --git a/ggml/src/vulkan-shaders/mul_mm.comp b/ggml_llama/src/vulkan-shaders/mul_mm.comp similarity index 100% rename from ggml/src/vulkan-shaders/mul_mm.comp rename to ggml_llama/src/vulkan-shaders/mul_mm.comp diff --git a/ggml/src/vulkan-shaders/norm.comp b/ggml_llama/src/vulkan-shaders/norm.comp similarity index 100% rename from ggml/src/vulkan-shaders/norm.comp rename to ggml_llama/src/vulkan-shaders/norm.comp diff --git a/ggml/src/vulkan-shaders/pad.comp b/ggml_llama/src/vulkan-shaders/pad.comp similarity index 100% rename from ggml/src/vulkan-shaders/pad.comp rename to ggml_llama/src/vulkan-shaders/pad.comp diff --git a/ggml/src/vulkan-shaders/relu.comp b/ggml_llama/src/vulkan-shaders/relu.comp similarity index 100% rename from ggml/src/vulkan-shaders/relu.comp rename to ggml_llama/src/vulkan-shaders/relu.comp diff --git a/ggml/src/vulkan-shaders/repeat.comp b/ggml_llama/src/vulkan-shaders/repeat.comp similarity index 100% rename from ggml/src/vulkan-shaders/repeat.comp rename to ggml_llama/src/vulkan-shaders/repeat.comp diff --git a/ggml/src/vulkan-shaders/rms_norm.comp b/ggml_llama/src/vulkan-shaders/rms_norm.comp similarity index 100% rename from ggml/src/vulkan-shaders/rms_norm.comp rename to ggml_llama/src/vulkan-shaders/rms_norm.comp diff --git a/ggml/src/vulkan-shaders/rope_head.comp b/ggml_llama/src/vulkan-shaders/rope_head.comp similarity index 100% rename from ggml/src/vulkan-shaders/rope_head.comp rename to ggml_llama/src/vulkan-shaders/rope_head.comp diff --git a/ggml/src/vulkan-shaders/rope_neox.comp b/ggml_llama/src/vulkan-shaders/rope_neox.comp similarity index 100% rename from ggml/src/vulkan-shaders/rope_neox.comp rename to ggml_llama/src/vulkan-shaders/rope_neox.comp diff --git a/ggml/src/vulkan-shaders/rope_norm.comp b/ggml_llama/src/vulkan-shaders/rope_norm.comp similarity index 100% rename from ggml/src/vulkan-shaders/rope_norm.comp rename to ggml_llama/src/vulkan-shaders/rope_norm.comp diff --git a/ggml/src/vulkan-shaders/scale.comp b/ggml_llama/src/vulkan-shaders/scale.comp similarity index 100% rename from ggml/src/vulkan-shaders/scale.comp rename to ggml_llama/src/vulkan-shaders/scale.comp diff --git a/ggml/src/vulkan-shaders/silu.comp b/ggml_llama/src/vulkan-shaders/silu.comp similarity index 100% rename from ggml/src/vulkan-shaders/silu.comp rename to ggml_llama/src/vulkan-shaders/silu.comp diff --git a/ggml/src/vulkan-shaders/sin.comp b/ggml_llama/src/vulkan-shaders/sin.comp similarity index 100% rename from ggml/src/vulkan-shaders/sin.comp rename to ggml_llama/src/vulkan-shaders/sin.comp diff --git a/ggml/src/vulkan-shaders/soft_max.comp b/ggml_llama/src/vulkan-shaders/soft_max.comp similarity index 100% rename from ggml/src/vulkan-shaders/soft_max.comp rename to ggml_llama/src/vulkan-shaders/soft_max.comp diff --git a/ggml/src/vulkan-shaders/square.comp b/ggml_llama/src/vulkan-shaders/square.comp similarity index 100% rename from ggml/src/vulkan-shaders/square.comp rename to ggml_llama/src/vulkan-shaders/square.comp diff --git a/ggml/src/vulkan-shaders/sum_rows.comp b/ggml_llama/src/vulkan-shaders/sum_rows.comp similarity index 100% rename from ggml/src/vulkan-shaders/sum_rows.comp rename to ggml_llama/src/vulkan-shaders/sum_rows.comp diff --git a/ggml/src/vulkan-shaders/tanh.comp b/ggml_llama/src/vulkan-shaders/tanh.comp similarity index 100% rename from ggml/src/vulkan-shaders/tanh.comp rename to ggml_llama/src/vulkan-shaders/tanh.comp diff --git a/ggml/src/vulkan-shaders/timestep_embedding.comp b/ggml_llama/src/vulkan-shaders/timestep_embedding.comp similarity index 100% rename from ggml/src/vulkan-shaders/timestep_embedding.comp rename to ggml_llama/src/vulkan-shaders/timestep_embedding.comp diff --git a/ggml/src/vulkan-shaders/types.comp b/ggml_llama/src/vulkan-shaders/types.comp similarity index 100% rename from ggml/src/vulkan-shaders/types.comp rename to ggml_llama/src/vulkan-shaders/types.comp diff --git a/ggml/src/vulkan-shaders/upscale.comp b/ggml_llama/src/vulkan-shaders/upscale.comp similarity index 100% rename from ggml/src/vulkan-shaders/upscale.comp rename to ggml_llama/src/vulkan-shaders/upscale.comp diff --git a/ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp b/ggml_llama/src/vulkan-shaders/vulkan-shaders-gen.cpp similarity index 100% rename from ggml/src/vulkan-shaders/vulkan-shaders-gen.cpp rename to ggml_llama/src/vulkan-shaders/vulkan-shaders-gen.cpp diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 46a6ad562..caf0a334b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -25,7 +25,7 @@ add_library(llama target_include_directories(llama PUBLIC . ../include) target_compile_features (llama PUBLIC cxx_std_11) # don't bump -target_link_libraries(llama PUBLIC ggml) +target_link_libraries(llama PUBLIC ggml_llama) if (BUILD_SHARED_LIBS) set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON)