Merge commit 'f8c4c073
' into detokenizer
This commit is contained in:
commit
8072089c4e
456 changed files with 35475 additions and 29355 deletions
|
@ -27,7 +27,7 @@ COPY . .
|
||||||
# Set nvcc architecture
|
# Set nvcc architecture
|
||||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||||
# Enable CUDA
|
# Enable CUDA
|
||||||
ENV LLAMA_CUDA=1
|
ENV GGML_CUDA=1
|
||||||
# Enable cURL
|
# Enable cURL
|
||||||
ENV LLAMA_CURL=1
|
ENV LLAMA_CURL=1
|
||||||
|
|
||||||
|
|
|
@ -36,7 +36,7 @@ COPY . .
|
||||||
# Set nvcc architecture
|
# Set nvcc architecture
|
||||||
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
|
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
|
||||||
# Enable ROCm
|
# Enable ROCm
|
||||||
ENV LLAMA_HIPBLAS=1
|
ENV GGML_HIPBLAS=1
|
||||||
ENV CC=/opt/rocm/llvm/bin/clang
|
ENV CC=/opt/rocm/llvm/bin/clang
|
||||||
ENV CXX=/opt/rocm/llvm/bin/clang++
|
ENV CXX=/opt/rocm/llvm/bin/clang++
|
||||||
|
|
||||||
|
|
|
@ -21,7 +21,7 @@ COPY . .
|
||||||
# Set nvcc architecture
|
# Set nvcc architecture
|
||||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||||
# Enable CUDA
|
# Enable CUDA
|
||||||
ENV LLAMA_CUDA=1
|
ENV GGML_CUDA=1
|
||||||
|
|
||||||
RUN make -j$(nproc) llama-cli
|
RUN make -j$(nproc) llama-cli
|
||||||
|
|
||||||
|
|
|
@ -2,7 +2,7 @@ ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
|
||||||
|
|
||||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
|
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
|
||||||
|
|
||||||
ARG LLAMA_SYCL_F16=OFF
|
ARG GGML_SYCL_F16=OFF
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y git
|
apt-get install -y git
|
||||||
|
|
||||||
|
@ -10,11 +10,11 @@ WORKDIR /app
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
|
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
||||||
echo "LLAMA_SYCL_F16 is set" && \
|
echo "GGML_SYCL_F16 is set" && \
|
||||||
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
|
export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
|
||||||
fi && \
|
fi && \
|
||||||
cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
|
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
|
||||||
cmake --build build --config Release --target llama-cli
|
cmake --build build --config Release --target llama-cli
|
||||||
|
|
||||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
|
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
|
||||||
|
|
|
@ -36,7 +36,7 @@ COPY . .
|
||||||
# Set nvcc architecture
|
# Set nvcc architecture
|
||||||
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
|
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
|
||||||
# Enable ROCm
|
# Enable ROCm
|
||||||
ENV LLAMA_HIPBLAS=1
|
ENV GGML_HIPBLAS=1
|
||||||
ENV CC=/opt/rocm/llvm/bin/clang
|
ENV CC=/opt/rocm/llvm/bin/clang
|
||||||
ENV CXX=/opt/rocm/llvm/bin/clang++
|
ENV CXX=/opt/rocm/llvm/bin/clang++
|
||||||
|
|
||||||
|
|
|
@ -14,7 +14,7 @@ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key
|
||||||
# Build it
|
# Build it
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY . .
|
COPY . .
|
||||||
RUN cmake -B build -DLLAMA_VULKAN=1 && \
|
RUN cmake -B build -DGGML_VULKAN=1 && \
|
||||||
cmake --build build --config Release --target llama-cli
|
cmake --build build --config Release --target llama-cli
|
||||||
|
|
||||||
# Clean up
|
# Clean up
|
||||||
|
|
|
@ -1,84 +0,0 @@
|
||||||
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
|
||||||
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
|
|
||||||
# Built and maintained by John Boero - boeroboy@gmail.com
|
|
||||||
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
|
||||||
|
|
||||||
# Notes for llama.cpp:
|
|
||||||
# 1. Tags are currently based on hash - which will not sort asciibetically.
|
|
||||||
# We need to declare standard versioning if people want to sort latest releases.
|
|
||||||
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
|
|
||||||
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
|
|
||||||
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
|
|
||||||
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
|
|
||||||
# It is up to the user to install the correct vendor-specific support.
|
|
||||||
|
|
||||||
Name: llama.cpp-clblast
|
|
||||||
Version: %( date "+%%Y%%m%%d" )
|
|
||||||
Release: 1%{?dist}
|
|
||||||
Summary: OpenCL Inference of LLaMA model in C/C++
|
|
||||||
License: MIT
|
|
||||||
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
|
|
||||||
BuildRequires: coreutils make gcc-c++ git mesa-libOpenCL-devel clblast-devel
|
|
||||||
Requires: clblast
|
|
||||||
URL: https://github.com/ggerganov/llama.cpp
|
|
||||||
|
|
||||||
%define debug_package %{nil}
|
|
||||||
%define source_date_epoch_from_changelog 0
|
|
||||||
|
|
||||||
%description
|
|
||||||
CPU inference for Meta's Lllama2 models using default options.
|
|
||||||
|
|
||||||
%prep
|
|
||||||
%setup -n llama.cpp-master
|
|
||||||
|
|
||||||
%build
|
|
||||||
make -j LLAMA_CLBLAST=1
|
|
||||||
|
|
||||||
%install
|
|
||||||
mkdir -p %{buildroot}%{_bindir}/
|
|
||||||
cp -p llama-cli %{buildroot}%{_bindir}/llama-clblast-cli
|
|
||||||
cp -p llama-server %{buildroot}%{_bindir}/llama-clblast-server
|
|
||||||
cp -p llama-simple %{buildroot}%{_bindir}/llama-clblast-simple
|
|
||||||
|
|
||||||
mkdir -p %{buildroot}/usr/lib/systemd/system
|
|
||||||
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamaclblast.service
|
|
||||||
[Unit]
|
|
||||||
Description=Llama.cpp server, CPU only (no GPU support in this build).
|
|
||||||
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Type=simple
|
|
||||||
EnvironmentFile=/etc/sysconfig/llama
|
|
||||||
ExecStart=/usr/bin/llama-clblast-server $LLAMA_ARGS
|
|
||||||
ExecReload=/bin/kill -s HUP $MAINPID
|
|
||||||
Restart=never
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=default.target
|
|
||||||
EOF
|
|
||||||
|
|
||||||
mkdir -p %{buildroot}/etc/sysconfig
|
|
||||||
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
|
|
||||||
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
|
|
||||||
EOF
|
|
||||||
|
|
||||||
%clean
|
|
||||||
rm -rf %{buildroot}
|
|
||||||
rm -rf %{_builddir}/*
|
|
||||||
|
|
||||||
%files
|
|
||||||
%{_bindir}/llama-clblast-cli
|
|
||||||
%{_bindir}/llama-clblast-server
|
|
||||||
%{_bindir}/llama-clblast-simple
|
|
||||||
/usr/lib/systemd/system/llamaclblast.service
|
|
||||||
%config /etc/sysconfig/llama
|
|
||||||
|
|
||||||
|
|
||||||
%pre
|
|
||||||
|
|
||||||
%post
|
|
||||||
|
|
||||||
%preun
|
|
||||||
%postun
|
|
||||||
|
|
||||||
%changelog
|
|
|
@ -32,7 +32,7 @@ CPU inference for Meta's Lllama2 models using default options.
|
||||||
%setup -n llama.cpp-master
|
%setup -n llama.cpp-master
|
||||||
|
|
||||||
%build
|
%build
|
||||||
make -j LLAMA_CUDA=1
|
make -j GGML_CUDA=1
|
||||||
|
|
||||||
%install
|
%install
|
||||||
mkdir -p %{buildroot}%{_bindir}/
|
mkdir -p %{buildroot}%{_bindir}/
|
||||||
|
|
|
@ -21,7 +21,7 @@ COPY . .
|
||||||
# Set nvcc architecture
|
# Set nvcc architecture
|
||||||
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||||
# Enable CUDA
|
# Enable CUDA
|
||||||
ENV LLAMA_CUDA=1
|
ENV GGML_CUDA=1
|
||||||
# Enable cURL
|
# Enable cURL
|
||||||
ENV LLAMA_CURL=1
|
ENV LLAMA_CURL=1
|
||||||
|
|
||||||
|
@ -30,8 +30,10 @@ RUN make -j$(nproc) llama-server
|
||||||
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
|
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y libcurl4-openssl-dev libgomp1
|
apt-get install -y libcurl4-openssl-dev libgomp1 curl
|
||||||
|
|
||||||
COPY --from=build /app/llama-server /llama-server
|
COPY --from=build /app/llama-server /llama-server
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
ENTRYPOINT [ "/llama-server" ]
|
ENTRYPOINT [ "/llama-server" ]
|
||||||
|
|
|
@ -2,7 +2,7 @@ ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
|
||||||
|
|
||||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
|
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
|
||||||
|
|
||||||
ARG LLAMA_SYCL_F16=OFF
|
ARG GGML_SYCL_F16=OFF
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y git libcurl4-openssl-dev
|
apt-get install -y git libcurl4-openssl-dev
|
||||||
|
|
||||||
|
@ -10,20 +10,22 @@ WORKDIR /app
|
||||||
|
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
RUN if [ "${LLAMA_SYCL_F16}" = "ON" ]; then \
|
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
||||||
echo "LLAMA_SYCL_F16 is set" && \
|
echo "GGML_SYCL_F16 is set" && \
|
||||||
export OPT_SYCL_F16="-DLLAMA_SYCL_F16=ON"; \
|
export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
|
||||||
fi && \
|
fi && \
|
||||||
cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
|
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
|
||||||
cmake --build build --config Release --target llama-server
|
cmake --build build --config Release --target llama-server
|
||||||
|
|
||||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
|
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y libcurl4-openssl-dev
|
apt-get install -y libcurl4-openssl-dev curl
|
||||||
|
|
||||||
COPY --from=build /app/build/bin/llama-server /llama-server
|
COPY --from=build /app/build/bin/llama-server /llama-server
|
||||||
|
|
||||||
ENV LC_ALL=C.utf8
|
ENV LC_ALL=C.utf8
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
ENTRYPOINT [ "/llama-server" ]
|
ENTRYPOINT [ "/llama-server" ]
|
||||||
|
|
|
@ -36,15 +36,17 @@ COPY . .
|
||||||
# Set nvcc architecture
|
# Set nvcc architecture
|
||||||
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
|
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
|
||||||
# Enable ROCm
|
# Enable ROCm
|
||||||
ENV LLAMA_HIPBLAS=1
|
ENV GGML_HIPBLAS=1
|
||||||
ENV CC=/opt/rocm/llvm/bin/clang
|
ENV CC=/opt/rocm/llvm/bin/clang
|
||||||
ENV CXX=/opt/rocm/llvm/bin/clang++
|
ENV CXX=/opt/rocm/llvm/bin/clang++
|
||||||
|
|
||||||
# Enable cURL
|
# Enable cURL
|
||||||
ENV LLAMA_CURL=1
|
ENV LLAMA_CURL=1
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y libcurl4-openssl-dev
|
apt-get install -y libcurl4-openssl-dev curl
|
||||||
|
|
||||||
RUN make -j$(nproc) llama-server
|
RUN make -j$(nproc) llama-server
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
ENTRYPOINT [ "/app/llama-server" ]
|
ENTRYPOINT [ "/app/llama-server" ]
|
||||||
|
|
|
@ -5,20 +5,16 @@ FROM ubuntu:$UBUNTU_VERSION as build
|
||||||
# Install build tools
|
# Install build tools
|
||||||
RUN apt update && apt install -y git build-essential cmake wget
|
RUN apt update && apt install -y git build-essential cmake wget
|
||||||
|
|
||||||
# Install Vulkan SDK
|
# Install Vulkan SDK and cURL
|
||||||
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
||||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
||||||
apt update -y && \
|
apt update -y && \
|
||||||
apt-get install -y vulkan-sdk
|
apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
|
||||||
|
|
||||||
# Install cURL
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y libcurl4-openssl-dev
|
|
||||||
|
|
||||||
# Build it
|
# Build it
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
COPY . .
|
COPY . .
|
||||||
RUN cmake -B build -DLLAMA_VULKAN=1 -DLLAMA_CURL=1 && \
|
RUN cmake -B build -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
|
||||||
cmake --build build --config Release --target llama-server
|
cmake --build build --config Release --target llama-server
|
||||||
|
|
||||||
# Clean up
|
# Clean up
|
||||||
|
@ -28,4 +24,6 @@ RUN cp /app/build/bin/llama-server /llama-server && \
|
||||||
|
|
||||||
ENV LC_ALL=C.utf8
|
ENV LC_ALL=C.utf8
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
ENTRYPOINT [ "/llama-server" ]
|
ENTRYPOINT [ "/llama-server" ]
|
||||||
|
|
|
@ -3,7 +3,7 @@ ARG UBUNTU_VERSION=22.04
|
||||||
FROM ubuntu:$UBUNTU_VERSION as build
|
FROM ubuntu:$UBUNTU_VERSION as build
|
||||||
|
|
||||||
RUN apt-get update && \
|
RUN apt-get update && \
|
||||||
apt-get install -y build-essential git libcurl4-openssl-dev
|
apt-get install -y build-essential git libcurl4-openssl-dev curl
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
@ -22,4 +22,6 @@ COPY --from=build /app/llama-server /llama-server
|
||||||
|
|
||||||
ENV LC_ALL=C.utf8
|
ENV LC_ALL=C.utf8
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
ENTRYPOINT [ "/llama-server" ]
|
ENTRYPOINT [ "/llama-server" ]
|
||||||
|
|
|
@ -17,19 +17,18 @@
|
||||||
rocmPackages,
|
rocmPackages,
|
||||||
vulkan-headers,
|
vulkan-headers,
|
||||||
vulkan-loader,
|
vulkan-loader,
|
||||||
clblast,
|
curl,
|
||||||
useBlas ? builtins.all (x: !x) [
|
useBlas ? builtins.all (x: !x) [
|
||||||
useCuda
|
useCuda
|
||||||
useMetalKit
|
useMetalKit
|
||||||
useOpenCL
|
|
||||||
useRocm
|
useRocm
|
||||||
useVulkan
|
useVulkan
|
||||||
] && blas.meta.available,
|
] && blas.meta.available,
|
||||||
useCuda ? config.cudaSupport,
|
useCuda ? config.cudaSupport,
|
||||||
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
|
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
|
||||||
useMpi ? false, # Increases the runtime closure size by ~700M
|
useMpi ? false, # Increases the runtime closure size by ~700M
|
||||||
useOpenCL ? false,
|
|
||||||
useRocm ? config.rocmSupport,
|
useRocm ? config.rocmSupport,
|
||||||
|
enableCurl ? true,
|
||||||
useVulkan ? false,
|
useVulkan ? false,
|
||||||
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
|
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
|
||||||
|
|
||||||
|
@ -56,7 +55,6 @@ let
|
||||||
++ lib.optionals useCuda [ "CUDA" ]
|
++ lib.optionals useCuda [ "CUDA" ]
|
||||||
++ lib.optionals useMetalKit [ "MetalKit" ]
|
++ lib.optionals useMetalKit [ "MetalKit" ]
|
||||||
++ lib.optionals useMpi [ "MPI" ]
|
++ lib.optionals useMpi [ "MPI" ]
|
||||||
++ lib.optionals useOpenCL [ "OpenCL" ]
|
|
||||||
++ lib.optionals useRocm [ "ROCm" ]
|
++ lib.optionals useRocm [ "ROCm" ]
|
||||||
++ lib.optionals useVulkan [ "Vulkan" ];
|
++ lib.optionals useVulkan [ "Vulkan" ];
|
||||||
|
|
||||||
|
@ -160,9 +158,9 @@ effectiveStdenv.mkDerivation (
|
||||||
};
|
};
|
||||||
|
|
||||||
postPatch = ''
|
postPatch = ''
|
||||||
substituteInPlace ./ggml-metal.m \
|
substituteInPlace ./ggml/src/ggml-metal.m \
|
||||||
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
||||||
substituteInPlace ./ggml-metal.m \
|
substituteInPlace ./ggml/src/ggml-metal.m \
|
||||||
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
||||||
'';
|
'';
|
||||||
|
|
||||||
|
@ -198,24 +196,24 @@ effectiveStdenv.mkDerivation (
|
||||||
optionals effectiveStdenv.isDarwin darwinBuildInputs
|
optionals effectiveStdenv.isDarwin darwinBuildInputs
|
||||||
++ optionals useCuda cudaBuildInputs
|
++ optionals useCuda cudaBuildInputs
|
||||||
++ optionals useMpi [ mpi ]
|
++ optionals useMpi [ mpi ]
|
||||||
++ optionals useOpenCL [ clblast ]
|
|
||||||
++ optionals useRocm rocmBuildInputs
|
++ optionals useRocm rocmBuildInputs
|
||||||
++ optionals useBlas [ blas ]
|
++ optionals useBlas [ blas ]
|
||||||
++ optionals useVulkan vulkanBuildInputs;
|
++ optionals useVulkan vulkanBuildInputs
|
||||||
|
++ optionals enableCurl [ curl ];
|
||||||
|
|
||||||
cmakeFlags =
|
cmakeFlags =
|
||||||
[
|
[
|
||||||
(cmakeBool "LLAMA_NATIVE" false)
|
|
||||||
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
||||||
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
|
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
|
||||||
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
||||||
(cmakeBool "LLAMA_BLAS" useBlas)
|
(cmakeBool "LLAMA_CURL" enableCurl)
|
||||||
(cmakeBool "LLAMA_CLBLAST" useOpenCL)
|
(cmakeBool "GGML_NATIVE" false)
|
||||||
(cmakeBool "LLAMA_CUDA" useCuda)
|
(cmakeBool "GGML_BLAS" useBlas)
|
||||||
(cmakeBool "LLAMA_HIPBLAS" useRocm)
|
(cmakeBool "GGML_CUDA" useCuda)
|
||||||
(cmakeBool "LLAMA_METAL" useMetalKit)
|
(cmakeBool "GGML_HIPBLAS" useRocm)
|
||||||
(cmakeBool "LLAMA_VULKAN" useVulkan)
|
(cmakeBool "GGML_METAL" useMetalKit)
|
||||||
(cmakeBool "LLAMA_STATIC" enableStatic)
|
(cmakeBool "GGML_VULKAN" useVulkan)
|
||||||
|
(cmakeBool "GGML_STATIC" enableStatic)
|
||||||
]
|
]
|
||||||
++ optionals useCuda [
|
++ optionals useCuda [
|
||||||
(
|
(
|
||||||
|
@ -231,7 +229,7 @@ effectiveStdenv.mkDerivation (
|
||||||
]
|
]
|
||||||
++ optionals useMetalKit [
|
++ optionals useMetalKit [
|
||||||
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
||||||
(cmakeBool "LLAMA_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
|
(cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
|
||||||
];
|
];
|
||||||
|
|
||||||
# Environment variables needed for ROCm
|
# Environment variables needed for ROCm
|
||||||
|
@ -244,7 +242,7 @@ effectiveStdenv.mkDerivation (
|
||||||
# if they haven't been added yet.
|
# if they haven't been added yet.
|
||||||
postInstall = ''
|
postInstall = ''
|
||||||
mkdir -p $out/include
|
mkdir -p $out/include
|
||||||
cp $src/llama.h $out/include/
|
cp $src/include/llama.h $out/include/
|
||||||
'';
|
'';
|
||||||
|
|
||||||
# Define the shells here, but don't add in the inputsFrom to avoid recursion.
|
# Define the shells here, but don't add in the inputsFrom to avoid recursion.
|
||||||
|
@ -254,7 +252,6 @@ effectiveStdenv.mkDerivation (
|
||||||
useCuda
|
useCuda
|
||||||
useMetalKit
|
useMetalKit
|
||||||
useMpi
|
useMpi
|
||||||
useOpenCL
|
|
||||||
useRocm
|
useRocm
|
||||||
useVulkan
|
useVulkan
|
||||||
;
|
;
|
||||||
|
@ -281,7 +278,7 @@ effectiveStdenv.mkDerivation (
|
||||||
# Configurations we don't want even the CI to evaluate. Results in the
|
# Configurations we don't want even the CI to evaluate. Results in the
|
||||||
# "unsupported platform" messages. This is mostly a no-op, because
|
# "unsupported platform" messages. This is mostly a no-op, because
|
||||||
# cudaPackages would've refused to evaluate anyway.
|
# cudaPackages would've refused to evaluate anyway.
|
||||||
badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;
|
badPlatforms = optionals useCuda lib.platforms.darwin;
|
||||||
|
|
||||||
# Configurations that are known to result in build failures. Can be
|
# Configurations that are known to result in build failures. Can be
|
||||||
# overridden by importing Nixpkgs with `allowBroken = true`.
|
# overridden by importing Nixpkgs with `allowBroken = true`.
|
||||||
|
|
|
@ -28,4 +28,5 @@ indent_size = 2
|
||||||
indent_style = tab
|
indent_style = tab
|
||||||
|
|
||||||
[examples/cvector-generator/*.txt]
|
[examples/cvector-generator/*.txt]
|
||||||
|
trim_trailing_whitespace = unset
|
||||||
insert_final_newline = unset
|
insert_final_newline = unset
|
||||||
|
|
2
.github/ISSUE_TEMPLATE/config.yml
vendored
2
.github/ISSUE_TEMPLATE/config.yml
vendored
|
@ -9,5 +9,3 @@ contact_links:
|
||||||
- name: Want to contribute?
|
- name: Want to contribute?
|
||||||
url: https://github.com/ggerganov/llama.cpp/wiki/contribute
|
url: https://github.com/ggerganov/llama.cpp/wiki/contribute
|
||||||
about: Head to the contribution guide page of the wiki for areas you can help with
|
about: Head to the contribution guide page of the wiki for areas you can help with
|
||||||
|
|
||||||
|
|
||||||
|
|
28
.github/labeler.yml
vendored
28
.github/labeler.yml
vendored
|
@ -2,31 +2,31 @@
|
||||||
Kompute:
|
Kompute:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
- ggml-kompute.h
|
- ggml/include/ggml-kompute.h
|
||||||
- ggml-kompute.cpp
|
- ggml/src/ggml-kompute.cpp
|
||||||
- README-kompute.md
|
- README-kompute.md
|
||||||
Apple Metal:
|
Apple Metal:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
- ggml-metal.h
|
- ggml/include/ggml-metal.h
|
||||||
- ggml-metal.cpp
|
- ggml/src/ggml-metal.cpp
|
||||||
- README-metal.md
|
- README-metal.md
|
||||||
SYCL:
|
SYCL:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
- ggml-sycl.h
|
- ggml/include/ggml-sycl.h
|
||||||
- ggml-sycl.cpp
|
- ggml/src/ggml-sycl.cpp
|
||||||
- README-sycl.md
|
- README-sycl.md
|
||||||
Nvidia GPU:
|
Nvidia GPU:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
- ggml-cuda.h
|
- ggml/include/ggml-cuda.h
|
||||||
- ggml-cuda/**
|
- ggml/src/ggml-cuda/**
|
||||||
Vulkan:
|
Vulkan:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
- ggml_vk_generate_shaders.py
|
- ggml/ggml_vk_generate_shaders.py
|
||||||
- ggml-vulkan*
|
- ggml/src/ggml-vulkan*
|
||||||
documentation:
|
documentation:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
|
@ -73,10 +73,10 @@ server:
|
||||||
ggml:
|
ggml:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
- ggml.c
|
- ggml/include/ggml*.h
|
||||||
- ggml.h
|
- ggml/src/ggml*.c
|
||||||
- ggml-*.c
|
- ggml/src/ggml*.cpp
|
||||||
- ggml-*.h
|
- ggml/src/ggml*.h
|
||||||
- ggml-cuda/**
|
- ggml-cuda/**
|
||||||
nix:
|
nix:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
|
|
2
.github/workflows/bench.yml
vendored
2
.github/workflows/bench.yml
vendored
|
@ -109,7 +109,7 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
set -eux
|
set -eux
|
||||||
cmake -B build \
|
cmake -B build \
|
||||||
-DLLAMA_NATIVE=OFF \
|
-DGGML_NATIVE=OFF \
|
||||||
-DLLAMA_BUILD_SERVER=ON \
|
-DLLAMA_BUILD_SERVER=ON \
|
||||||
-DLLAMA_CURL=ON \
|
-DLLAMA_CURL=ON \
|
||||||
-DLLAMA_CUBLAS=ON \
|
-DLLAMA_CUBLAS=ON \
|
||||||
|
|
81
.github/workflows/build.yml
vendored
81
.github/workflows/build.yml
vendored
|
@ -10,10 +10,10 @@ on:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m']
|
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
|
||||||
pull_request:
|
pull_request:
|
||||||
types: [opened, synchronize, reopened]
|
types: [opened, synchronize, reopened]
|
||||||
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m']
|
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
@ -47,7 +47,7 @@ jobs:
|
||||||
sysctl -a
|
sysctl -a
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON ..
|
cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL_EMBED_LIBRARY=ON -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF ..
|
||||||
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
||||||
|
|
||||||
- name: Test
|
- name: Test
|
||||||
|
@ -105,7 +105,7 @@ jobs:
|
||||||
sysctl -a
|
sysctl -a
|
||||||
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
|
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
|
||||||
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
|
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
|
||||||
cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL=OFF -DLLAMA_CURL=ON
|
cmake -B build -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF
|
||||||
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
|
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
|
||||||
|
|
||||||
- name: Test
|
- name: Test
|
||||||
|
@ -222,7 +222,7 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON
|
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DBUILD_SHARED_LIBS=OFF
|
||||||
cmake --build . --config Release -j $(nproc)
|
cmake --build . --config Release -j $(nproc)
|
||||||
|
|
||||||
- name: Test
|
- name: Test
|
||||||
|
@ -305,7 +305,7 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DLLAMA_OPENMP=OFF
|
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DGGML_OPENMP=OFF
|
||||||
cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
|
cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
|
||||||
|
|
||||||
- name: Test
|
- name: Test
|
||||||
|
@ -335,7 +335,7 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake -DLLAMA_RPC=ON ..
|
cmake -DGGML_RPC=ON ..
|
||||||
cmake --build . --config Release -j $(nproc)
|
cmake --build . --config Release -j $(nproc)
|
||||||
|
|
||||||
- name: Test
|
- name: Test
|
||||||
|
@ -363,7 +363,7 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake -DLLAMA_VULKAN=ON ..
|
cmake -DGGML_VULKAN=ON ..
|
||||||
cmake --build . --config Release -j $(nproc)
|
cmake --build . --config Release -j $(nproc)
|
||||||
|
|
||||||
ubuntu-22-cmake-hip:
|
ubuntu-22-cmake-hip:
|
||||||
|
@ -384,13 +384,13 @@ jobs:
|
||||||
- name: Build with native CMake HIP support
|
- name: Build with native CMake HIP support
|
||||||
id: cmake_build
|
id: cmake_build
|
||||||
run: |
|
run: |
|
||||||
cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DLLAMA_HIPBLAS=ON
|
cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIPBLAS=ON
|
||||||
cmake --build build --config Release -j $(nproc)
|
cmake --build build --config Release -j $(nproc)
|
||||||
|
|
||||||
- name: Build with legacy HIP support
|
- name: Build with legacy HIP support
|
||||||
id: cmake_build_legacy_hip
|
id: cmake_build_legacy_hip
|
||||||
run: |
|
run: |
|
||||||
cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DLLAMA_HIPBLAS=ON
|
cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIPBLAS=ON
|
||||||
cmake --build build2 --config Release -j $(nproc)
|
cmake --build build2 --config Release -j $(nproc)
|
||||||
|
|
||||||
ubuntu-22-cmake-sycl:
|
ubuntu-22-cmake-sycl:
|
||||||
|
@ -431,7 +431,7 @@ jobs:
|
||||||
source /opt/intel/oneapi/setvars.sh
|
source /opt/intel/oneapi/setvars.sh
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
|
cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
|
||||||
cmake --build . --config Release -j $(nproc)
|
cmake --build . --config Release -j $(nproc)
|
||||||
|
|
||||||
ubuntu-22-cmake-sycl-fp16:
|
ubuntu-22-cmake-sycl-fp16:
|
||||||
|
@ -472,10 +472,10 @@ jobs:
|
||||||
source /opt/intel/oneapi/setvars.sh
|
source /opt/intel/oneapi/setvars.sh
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON ..
|
cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON ..
|
||||||
cmake --build . --config Release -j $(nproc)
|
cmake --build . --config Release -j $(nproc)
|
||||||
|
|
||||||
# TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
|
# TODO: build with GGML_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know
|
||||||
# how to debug it.
|
# how to debug it.
|
||||||
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124
|
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124
|
||||||
macOS-latest-make:
|
macOS-latest-make:
|
||||||
|
@ -497,15 +497,15 @@ jobs:
|
||||||
env:
|
env:
|
||||||
LLAMA_FATAL_WARNINGS: 1
|
LLAMA_FATAL_WARNINGS: 1
|
||||||
run: |
|
run: |
|
||||||
LLAMA_NO_METAL=1 make -j $(sysctl -n hw.logicalcpu)
|
GGML_NO_METAL=1 make -j $(sysctl -n hw.logicalcpu)
|
||||||
|
|
||||||
- name: Test
|
- name: Test
|
||||||
id: make_test
|
id: make_test
|
||||||
run: |
|
run: |
|
||||||
LLAMA_NO_METAL=1 make tests -j $(sysctl -n hw.logicalcpu)
|
GGML_NO_METAL=1 make tests -j $(sysctl -n hw.logicalcpu)
|
||||||
LLAMA_NO_METAL=1 make test -j $(sysctl -n hw.logicalcpu)
|
GGML_NO_METAL=1 make test -j $(sysctl -n hw.logicalcpu)
|
||||||
|
|
||||||
# TODO: build with LLAMA_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know
|
# TODO: build with GGML_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know
|
||||||
# how to debug it.
|
# how to debug it.
|
||||||
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584
|
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584
|
||||||
# would be great if we fix these
|
# would be great if we fix these
|
||||||
|
@ -529,7 +529,7 @@ jobs:
|
||||||
sysctl -a
|
sysctl -a
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_METAL=OFF ..
|
cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF ..
|
||||||
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
||||||
|
|
||||||
- name: Test
|
- name: Test
|
||||||
|
@ -559,13 +559,14 @@ jobs:
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake -G Xcode .. \
|
cmake -G Xcode .. \
|
||||||
-DLLAMA_METAL_EMBED_LIBRARY=ON \
|
-DGGML_METAL_EMBED_LIBRARY=ON \
|
||||||
-DLLAMA_BUILD_EXAMPLES=OFF \
|
-DLLAMA_BUILD_EXAMPLES=OFF \
|
||||||
-DLLAMA_BUILD_TESTS=OFF \
|
-DLLAMA_BUILD_TESTS=OFF \
|
||||||
-DLLAMA_BUILD_SERVER=OFF \
|
-DLLAMA_BUILD_SERVER=OFF \
|
||||||
-DCMAKE_SYSTEM_NAME=iOS \
|
-DCMAKE_SYSTEM_NAME=iOS \
|
||||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0
|
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
|
||||||
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
||||||
|
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
||||||
|
|
||||||
macOS-latest-cmake-tvos:
|
macOS-latest-cmake-tvos:
|
||||||
runs-on: macos-latest
|
runs-on: macos-latest
|
||||||
|
@ -588,13 +589,14 @@ jobs:
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake -G Xcode .. \
|
cmake -G Xcode .. \
|
||||||
-DLLAMA_METAL_EMBED_LIBRARY=ON \
|
-DGGML_METAL_EMBED_LIBRARY=ON \
|
||||||
-DLLAMA_BUILD_EXAMPLES=OFF \
|
-DLLAMA_BUILD_EXAMPLES=OFF \
|
||||||
-DLLAMA_BUILD_TESTS=OFF \
|
-DLLAMA_BUILD_TESTS=OFF \
|
||||||
-DLLAMA_BUILD_SERVER=OFF \
|
-DLLAMA_BUILD_SERVER=OFF \
|
||||||
-DCMAKE_SYSTEM_NAME=tvOS \
|
-DCMAKE_SYSTEM_NAME=tvOS \
|
||||||
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0
|
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
|
||||||
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
||||||
|
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
||||||
|
|
||||||
macOS-latest-swift:
|
macOS-latest-swift:
|
||||||
runs-on: macos-latest
|
runs-on: macos-latest
|
||||||
|
@ -662,7 +664,7 @@ jobs:
|
||||||
- name: Build using make w/ OpenBLAS
|
- name: Build using make w/ OpenBLAS
|
||||||
shell: msys2 {0}
|
shell: msys2 {0}
|
||||||
run: |
|
run: |
|
||||||
make LLAMA_OPENBLAS=1 -j $(nproc)
|
make GGML_OPENBLAS=1 -j $(nproc)
|
||||||
|
|
||||||
- name: Build using CMake
|
- name: Build using CMake
|
||||||
shell: msys2 {0}
|
shell: msys2 {0}
|
||||||
|
@ -678,7 +680,7 @@ jobs:
|
||||||
- name: Build using CMake w/ OpenBLAS
|
- name: Build using CMake w/ OpenBLAS
|
||||||
shell: msys2 {0}
|
shell: msys2 {0}
|
||||||
run: |
|
run: |
|
||||||
cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
||||||
|
|
||||||
windows-latest-cmake:
|
windows-latest-cmake:
|
||||||
|
@ -693,25 +695,25 @@ jobs:
|
||||||
matrix:
|
matrix:
|
||||||
include:
|
include:
|
||||||
- build: 'rpc-x64'
|
- build: 'rpc-x64'
|
||||||
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_RPC=ON -DBUILD_SHARED_LIBS=ON'
|
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=ON'
|
||||||
- build: 'noavx-x64'
|
- build: 'noavx-x64'
|
||||||
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX=OFF -DLLAMA_AVX2=OFF -DLLAMA_FMA=OFF -DBUILD_SHARED_LIBS=ON'
|
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=ON'
|
||||||
- build: 'avx2-x64'
|
- build: 'avx2-x64'
|
||||||
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
|
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
|
||||||
- build: 'avx-x64'
|
- build: 'avx-x64'
|
||||||
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
|
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
|
||||||
- build: 'avx512-x64'
|
- build: 'avx512-x64'
|
||||||
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON'
|
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=ON'
|
||||||
- build: 'openblas-x64'
|
- build: 'openblas-x64'
|
||||||
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
|
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_BLAS=ON -DBUILD_SHARED_LIBS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
|
||||||
- build: 'kompute-x64'
|
- build: 'kompute-x64'
|
||||||
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
|
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
|
||||||
- build: 'vulkan-x64'
|
- build: 'vulkan-x64'
|
||||||
defines: '-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
|
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
|
||||||
- build: 'llvm-arm64'
|
- build: 'llvm-arm64'
|
||||||
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
|
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
|
||||||
- build: 'msvc-arm64'
|
- build: 'msvc-arm64'
|
||||||
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
|
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Clone
|
- name: Clone
|
||||||
|
@ -724,7 +726,7 @@ jobs:
|
||||||
id: clone_kompute
|
id: clone_kompute
|
||||||
if: ${{ matrix.build == 'kompute-x64' }}
|
if: ${{ matrix.build == 'kompute-x64' }}
|
||||||
run: |
|
run: |
|
||||||
git submodule update --init kompute
|
git submodule update --init ggml/src/kompute
|
||||||
|
|
||||||
- name: Download OpenBLAS
|
- name: Download OpenBLAS
|
||||||
id: get_openblas
|
id: get_openblas
|
||||||
|
@ -797,6 +799,7 @@ jobs:
|
||||||
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
|
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
|
||||||
$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
|
$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
|
||||||
cd build
|
cd build
|
||||||
|
$env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
|
||||||
& $sde -future -- ctest -L main -C Release --verbose --timeout 900
|
& $sde -future -- ctest -L main -C Release --verbose --timeout 900
|
||||||
|
|
||||||
- name: Determine tag name
|
- name: Determine tag name
|
||||||
|
@ -854,7 +857,7 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=ON
|
cmake .. -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON
|
||||||
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
|
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS}
|
||||||
|
|
||||||
- name: Determine tag name
|
- name: Determine tag name
|
||||||
|
@ -987,7 +990,7 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
||||||
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
|
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
|
||||||
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DLLAMA_HIPBLAS=ON
|
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIPBLAS=ON
|
||||||
cmake --build build --config Release
|
cmake --build build --config Release
|
||||||
|
|
||||||
ios-xcode-build:
|
ios-xcode-build:
|
||||||
|
|
11
.github/workflows/docker.yml
vendored
11
.github/workflows/docker.yml
vendored
|
@ -10,10 +10,11 @@
|
||||||
name: Publish Docker image
|
name: Publish Docker image
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
#pull_request:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
|
paths: ['.github/workflows/docker.yml', '.devops/*.Dockerfile', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
@ -22,7 +23,7 @@ concurrency:
|
||||||
jobs:
|
jobs:
|
||||||
push_to_registry:
|
push_to_registry:
|
||||||
name: Push Docker image to Docker Hub
|
name: Push Docker image to Docker Hub
|
||||||
if: github.event.pull_request.draft == false
|
#if: github.event.pull_request.draft == false
|
||||||
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
env:
|
env:
|
||||||
|
@ -33,15 +34,13 @@ jobs:
|
||||||
- { tag: "light", dockerfile: ".devops/llama-cli.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
- { tag: "light", dockerfile: ".devops/llama-cli.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||||
- { tag: "server", dockerfile: ".devops/llama-server.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
- { tag: "server", dockerfile: ".devops/llama-server.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||||
- { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
- { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||||
# NOTE(canardletter): The CUDA builds on arm64 are very slow, so I
|
|
||||||
# have disabled them for now until the reason why
|
|
||||||
# is understood.
|
|
||||||
- { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" }
|
- { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" }
|
||||||
- { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" }
|
- { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" }
|
||||||
- { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
|
- { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
|
||||||
- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||||
- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||||
- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
# Note: the full-rocm image is failing due to a "no space left on device" error. It is disabled for now to allow the workflow to complete.
|
||||||
|
#- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||||
- { tag: "light-intel", dockerfile: ".devops/llama-cli-intel.Dockerfile", platforms: "linux/amd64" }
|
- { tag: "light-intel", dockerfile: ".devops/llama-cli-intel.Dockerfile", platforms: "linux/amd64" }
|
||||||
- { tag: "server-intel", dockerfile: ".devops/llama-server-intel.Dockerfile", platforms: "linux/amd64" }
|
- { tag: "server-intel", dockerfile: ".devops/llama-server-intel.Dockerfile", platforms: "linux/amd64" }
|
||||||
steps:
|
steps:
|
||||||
|
|
8
.github/workflows/server.yml
vendored
8
.github/workflows/server.yml
vendored
|
@ -30,7 +30,7 @@ jobs:
|
||||||
|
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken
|
||||||
build_type: [RelWithDebInfo]
|
build_type: [RelWithDebInfo]
|
||||||
include:
|
include:
|
||||||
- build_type: Release
|
- build_type: Release
|
||||||
|
@ -92,12 +92,12 @@ jobs:
|
||||||
if: ${{ matrix.sanitizer == 'THREAD' }}
|
if: ${{ matrix.sanitizer == 'THREAD' }}
|
||||||
run: |
|
run: |
|
||||||
cmake -B build \
|
cmake -B build \
|
||||||
-DLLAMA_NATIVE=OFF \
|
-DGGML_NATIVE=OFF \
|
||||||
-DLLAMA_BUILD_SERVER=ON \
|
-DLLAMA_BUILD_SERVER=ON \
|
||||||
-DLLAMA_CURL=ON \
|
-DLLAMA_CURL=ON \
|
||||||
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
||||||
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
|
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
|
||||||
-DLLAMA_OPENMP=OFF ;
|
-DGGML_OPENMP=OFF ;
|
||||||
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
||||||
|
|
||||||
- name: Build
|
- name: Build
|
||||||
|
@ -105,7 +105,7 @@ jobs:
|
||||||
if: ${{ matrix.sanitizer != 'THREAD' }}
|
if: ${{ matrix.sanitizer != 'THREAD' }}
|
||||||
run: |
|
run: |
|
||||||
cmake -B build \
|
cmake -B build \
|
||||||
-DLLAMA_NATIVE=OFF \
|
-DGGML_NATIVE=OFF \
|
||||||
-DLLAMA_BUILD_SERVER=ON \
|
-DLLAMA_BUILD_SERVER=ON \
|
||||||
-DLLAMA_CURL=ON \
|
-DLLAMA_CURL=ON \
|
||||||
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
||||||
|
|
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -56,6 +56,7 @@ CMakeSettings.json
|
||||||
compile_commands.json
|
compile_commands.json
|
||||||
ggml-metal-embed.metal
|
ggml-metal-embed.metal
|
||||||
llama-batched-swift
|
llama-batched-swift
|
||||||
|
/rpc-server
|
||||||
out/
|
out/
|
||||||
tmp/
|
tmp/
|
||||||
|
|
||||||
|
|
2
.gitmodules
vendored
2
.gitmodules
vendored
|
@ -1,3 +1,3 @@
|
||||||
[submodule "kompute"]
|
[submodule "kompute"]
|
||||||
path = kompute
|
path = ggml/src/kompute
|
||||||
url = https://github.com/nomic-ai/kompute.git
|
url = https://github.com/nomic-ai/kompute.git
|
||||||
|
|
129
AUTHORS
129
AUTHORS
|
@ -1,8 +1,9 @@
|
||||||
# date: Tue Apr 9 09:17:14 EEST 2024
|
# date: Wed Jun 26 19:36:34 EEST 2024
|
||||||
# this file is auto-generated by scripts/gen-authors.sh
|
# this file is auto-generated by scripts/gen-authors.sh
|
||||||
|
|
||||||
0cc4m <picard12@live.de>
|
0cc4m <picard12@live.de>
|
||||||
0xspringtime <110655352+0xspringtime@users.noreply.github.com>
|
0xspringtime <110655352+0xspringtime@users.noreply.github.com>
|
||||||
|
20kdc <asdd2808@gmail.com>
|
||||||
2f38b454 <dxf@protonmail.com>
|
2f38b454 <dxf@protonmail.com>
|
||||||
3ooabkhxtn <31479382+3ooabkhxtn@users.noreply.github.com>
|
3ooabkhxtn <31479382+3ooabkhxtn@users.noreply.github.com>
|
||||||
44670 <44670@users.noreply.github.com>
|
44670 <44670@users.noreply.github.com>
|
||||||
|
@ -11,14 +12,18 @@ AT <manyoso@users.noreply.github.com>
|
||||||
Aarni Koskela <akx@iki.fi>
|
Aarni Koskela <akx@iki.fi>
|
||||||
Aaron Miller <apage43@ninjawhale.com>
|
Aaron Miller <apage43@ninjawhale.com>
|
||||||
Aaryaman Vasishta <aaryaman.vasishta@amd.com>
|
Aaryaman Vasishta <aaryaman.vasishta@amd.com>
|
||||||
|
Abheek Gulati <abheekg@hotmail.com>
|
||||||
Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
|
Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com>
|
||||||
Abhishek Gopinath K <31348521+overtunned@users.noreply.github.com>
|
Abhishek Gopinath K <31348521+overtunned@users.noreply.github.com>
|
||||||
Adithya Balaji <adithya.b94@gmail.com>
|
Adithya Balaji <adithya.b94@gmail.com>
|
||||||
AdithyanI <adithyan.i4internet@gmail.com>
|
AdithyanI <adithyan.i4internet@gmail.com>
|
||||||
Adrian <smith.adriane@gmail.com>
|
Adrian <smith.adriane@gmail.com>
|
||||||
Adrian Hesketh <a-h@users.noreply.github.com>
|
Adrian Hesketh <a-h@users.noreply.github.com>
|
||||||
|
Ahmet Zeer <ahmed.zeer@std.yildiz.edu.tr>
|
||||||
AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
|
AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
|
||||||
Aisuko <urakiny@gmail.com>
|
Aisuko <urakiny@gmail.com>
|
||||||
|
Akarshan Biswas <akarshanbiswas@fedoraproject.org>
|
||||||
|
Albert Jin <albert.jin@gmail.com>
|
||||||
Alberto <57916483+albbus-stack@users.noreply.github.com>
|
Alberto <57916483+albbus-stack@users.noreply.github.com>
|
||||||
Alex <awhill19@icloud.com>
|
Alex <awhill19@icloud.com>
|
||||||
Alex Azarov <alex@azarov.by>
|
Alex Azarov <alex@azarov.by>
|
||||||
|
@ -35,19 +40,24 @@ Ali Nehzat <ali.nehzat@thanks.dev>
|
||||||
Ali Tariq <ali.tariq@10xengineers.ai>
|
Ali Tariq <ali.tariq@10xengineers.ai>
|
||||||
Alon <alonfaraj@gmail.com>
|
Alon <alonfaraj@gmail.com>
|
||||||
AlpinDale <52078762+AlpinDale@users.noreply.github.com>
|
AlpinDale <52078762+AlpinDale@users.noreply.github.com>
|
||||||
|
Amir <amir_zia@outlook.com>
|
||||||
AmirAli Mirian <37371367+amiralimi@users.noreply.github.com>
|
AmirAli Mirian <37371367+amiralimi@users.noreply.github.com>
|
||||||
Ananta Bastola <anantarajbastola@gmail.com>
|
Ananta Bastola <anantarajbastola@gmail.com>
|
||||||
Anas Ahouzi <112881240+aahouzi@users.noreply.github.com>
|
Anas Ahouzi <112881240+aahouzi@users.noreply.github.com>
|
||||||
András Salamon <ott2@users.noreply.github.com>
|
András Salamon <ott2@users.noreply.github.com>
|
||||||
Andrei <abetlen@gmail.com>
|
Andrei <abetlen@gmail.com>
|
||||||
Andrew Canis <andrew.canis@gmail.com>
|
Andrew Canis <andrew.canis@gmail.com>
|
||||||
|
Andrew Downing <andrew2085@gmail.com>
|
||||||
Andrew Duffy <a10y@users.noreply.github.com>
|
Andrew Duffy <a10y@users.noreply.github.com>
|
||||||
Andrew Godfrey <AndrewGodfrey@users.noreply.github.com>
|
Andrew Godfrey <AndrewGodfrey@users.noreply.github.com>
|
||||||
|
Andy Tai <andy-tai@users.noreply.github.com>
|
||||||
Arik Poznanski <arikpoz@users.noreply.github.com>
|
Arik Poznanski <arikpoz@users.noreply.github.com>
|
||||||
Artem <guinmoon@gmail.com>
|
Artem <guinmoon@gmail.com>
|
||||||
|
Artem Zinnatullin <ceo@abstractny.gay>
|
||||||
Artyom Lebedev <vagran.ast@gmail.com>
|
Artyom Lebedev <vagran.ast@gmail.com>
|
||||||
Asbjørn Olling <asbjornolling@gmail.com>
|
Asbjørn Olling <asbjornolling@gmail.com>
|
||||||
Ásgeir Bjarni Ingvarsson <asgeir@fundinn.org>
|
Ásgeir Bjarni Ingvarsson <asgeir@fundinn.org>
|
||||||
|
Ashish <1856117+ashishdatta@users.noreply.github.com>
|
||||||
Ashok Gelal <401055+ashokgelal@users.noreply.github.com>
|
Ashok Gelal <401055+ashokgelal@users.noreply.github.com>
|
||||||
Ashraful Islam <ashraful.meche@gmail.com>
|
Ashraful Islam <ashraful.meche@gmail.com>
|
||||||
Atsushi Tatsuma <yoshoku@outlook.com>
|
Atsushi Tatsuma <yoshoku@outlook.com>
|
||||||
|
@ -57,35 +67,46 @@ BADR <contact@pythops.com>
|
||||||
Bach Le <bach@bullno1.com>
|
Bach Le <bach@bullno1.com>
|
||||||
Bailey Chittle <39804642+bachittle@users.noreply.github.com>
|
Bailey Chittle <39804642+bachittle@users.noreply.github.com>
|
||||||
BarfingLemurs <128182951+BarfingLemurs@users.noreply.github.com>
|
BarfingLemurs <128182951+BarfingLemurs@users.noreply.github.com>
|
||||||
|
Bartowski <ckealty1182@gmail.com>
|
||||||
Behnam M <58621210+ibehnam@users.noreply.github.com>
|
Behnam M <58621210+ibehnam@users.noreply.github.com>
|
||||||
|
Ben Ashbaugh <ben.ashbaugh@intel.com>
|
||||||
Ben Garney <bengarney@users.noreply.github.com>
|
Ben Garney <bengarney@users.noreply.github.com>
|
||||||
Ben Siraphob <bensiraphob@gmail.com>
|
Ben Siraphob <bensiraphob@gmail.com>
|
||||||
Ben Williams <ben@719ben.com>
|
Ben Williams <ben@719ben.com>
|
||||||
|
Benjamin Findley <39356821+Kartoffelsaft@users.noreply.github.com>
|
||||||
Benjamin Lecaillon <84293038+blecaillon@users.noreply.github.com>
|
Benjamin Lecaillon <84293038+blecaillon@users.noreply.github.com>
|
||||||
Bernat Vadell <hounter.caza@gmail.com>
|
Bernat Vadell <hounter.caza@gmail.com>
|
||||||
|
Bingan <70050083+binganao@users.noreply.github.com>
|
||||||
Bodo Graumann <mail@bodograumann.de>
|
Bodo Graumann <mail@bodograumann.de>
|
||||||
Bono Lv <lvscar@users.noreply.github.com>
|
Bono Lv <lvscar@users.noreply.github.com>
|
||||||
Borislav Stanimirov <b.stanimirov@abv.bg>
|
Borislav Stanimirov <b.stanimirov@abv.bg>
|
||||||
Branden Butler <bwtbutler@hotmail.com>
|
Branden Butler <bwtbutler@hotmail.com>
|
||||||
Brian <mofosyne@gmail.com>
|
Brian <mofosyne@gmail.com>
|
||||||
Bruce MacDonald <brucewmacdonald@gmail.com>
|
Bruce MacDonald <brucewmacdonald@gmail.com>
|
||||||
|
Bryan Honof <bryanhonof@gmail.com>
|
||||||
CJ Pais <cj@cjpais.com>
|
CJ Pais <cj@cjpais.com>
|
||||||
CRD716 <crd716@gmail.com>
|
CRD716 <crd716@gmail.com>
|
||||||
|
Calvin Laurenson <calvin@laurenson.dev>
|
||||||
Cameron <csteele@steelecameron.com>
|
Cameron <csteele@steelecameron.com>
|
||||||
Cameron Kaiser <classilla@users.noreply.github.com>
|
Cameron Kaiser <classilla@users.noreply.github.com>
|
||||||
|
Carolinabanana <140120812+Carolinabanana@users.noreply.github.com>
|
||||||
Casey Primozic <casey@cprimozic.net>
|
Casey Primozic <casey@cprimozic.net>
|
||||||
Casey Primozic <me@ameo.link>
|
Casey Primozic <me@ameo.link>
|
||||||
CausalLM <148736309+CausalLM@users.noreply.github.com>
|
CausalLM <148736309+CausalLM@users.noreply.github.com>
|
||||||
Cebtenzzre <cebtenzzre@gmail.com>
|
Cebtenzzre <cebtenzzre@gmail.com>
|
||||||
Chad Brewbaker <crb002@gmail.com>
|
Chad Brewbaker <crb002@gmail.com>
|
||||||
|
Chao Jiang <jc19chaoj@zoho.com>
|
||||||
Cheng Shao <terrorjack@type.dance>
|
Cheng Shao <terrorjack@type.dance>
|
||||||
|
Chris Elrod <elrodc@gmail.com>
|
||||||
Chris Kuehl <ckuehl@ckuehl.me>
|
Chris Kuehl <ckuehl@ckuehl.me>
|
||||||
Christian Demsar <christian@github.email.demsar.us>
|
Christian Demsar <christian@github.email.demsar.us>
|
||||||
Christian Demsar <crasm@git.vczf.us>
|
Christian Demsar <crasm@git.vczf.us>
|
||||||
Christian Falch <875252+chrfalch@users.noreply.github.com>
|
Christian Falch <875252+chrfalch@users.noreply.github.com>
|
||||||
Christian Kögler <ck3d@gmx.de>
|
Christian Kögler <ck3d@gmx.de>
|
||||||
|
Christian Zhou-Zheng <59622928+christianazinn@users.noreply.github.com>
|
||||||
Clark Saben <76020733+csaben@users.noreply.github.com>
|
Clark Saben <76020733+csaben@users.noreply.github.com>
|
||||||
Clint Herron <hanclinto@gmail.com>
|
Clint Herron <hanclinto@gmail.com>
|
||||||
|
CrispStrobe <154636388+CrispStrobe@users.noreply.github.com>
|
||||||
Cuong Trinh Manh <nguoithichkhampha@gmail.com>
|
Cuong Trinh Manh <nguoithichkhampha@gmail.com>
|
||||||
DAN™ <dranger003@gmail.com>
|
DAN™ <dranger003@gmail.com>
|
||||||
Damian Stewart <d@damianstewart.com>
|
Damian Stewart <d@damianstewart.com>
|
||||||
|
@ -95,8 +116,12 @@ Daniel Bevenius <daniel.bevenius@gmail.com>
|
||||||
Daniel Drake <drake@endlessos.org>
|
Daniel Drake <drake@endlessos.org>
|
||||||
Daniel Hiltgen <dhiltgen@users.noreply.github.com>
|
Daniel Hiltgen <dhiltgen@users.noreply.github.com>
|
||||||
Daniel Illescas Romero <illescas.daniel@protonmail.com>
|
Daniel Illescas Romero <illescas.daniel@protonmail.com>
|
||||||
|
Daniele <57776841+daniandtheweb@users.noreply.github.com>
|
||||||
DannyDaemonic <DannyDaemonic@gmail.com>
|
DannyDaemonic <DannyDaemonic@gmail.com>
|
||||||
Dat Quoc Nguyen <2412555+datquocnguyen@users.noreply.github.com>
|
Dat Quoc Nguyen <2412555+datquocnguyen@users.noreply.github.com>
|
||||||
|
Dave <dave-fl@users.noreply.github.com>
|
||||||
|
Dave Airlie <airlied@gmail.com>
|
||||||
|
Dave Airlie <airlied@redhat.com>
|
||||||
Dave Della Costa <ddellacosta+github@gmail.com>
|
Dave Della Costa <ddellacosta+github@gmail.com>
|
||||||
David Friehs <david@friehs.info>
|
David Friehs <david@friehs.info>
|
||||||
David Kennedy <dakennedyd@gmail.com>
|
David Kennedy <dakennedyd@gmail.com>
|
||||||
|
@ -104,10 +129,13 @@ David Pflug <david@pflug.email>
|
||||||
David Renshaw <dwrenshaw@gmail.com>
|
David Renshaw <dwrenshaw@gmail.com>
|
||||||
David Sommers <12738+databyte@users.noreply.github.com>
|
David Sommers <12738+databyte@users.noreply.github.com>
|
||||||
David Yang <davidyang6us@gmail.com>
|
David Yang <davidyang6us@gmail.com>
|
||||||
|
Dawid Potocki <github@dawidpotocki.com>
|
||||||
Dawid Wysocki <62249621+TortillaZHawaii@users.noreply.github.com>
|
Dawid Wysocki <62249621+TortillaZHawaii@users.noreply.github.com>
|
||||||
Dean <Dean.Sinaean@gmail.com>
|
Dean <Dean.Sinaean@gmail.com>
|
||||||
Deins <deinsegle@gmail.com>
|
Deins <deinsegle@gmail.com>
|
||||||
|
Deven Mistry <31466137+deven367@users.noreply.github.com>
|
||||||
Didzis Gosko <didzis@users.noreply.github.com>
|
Didzis Gosko <didzis@users.noreply.github.com>
|
||||||
|
Djip007 <djip.perois@free.fr>
|
||||||
Don Mahurin <dmahurin@users.noreply.github.com>
|
Don Mahurin <dmahurin@users.noreply.github.com>
|
||||||
DooWoong Lee (David) <manics99@naver.com>
|
DooWoong Lee (David) <manics99@naver.com>
|
||||||
Doomsdayrs <38189170+Doomsdayrs@users.noreply.github.com>
|
Doomsdayrs <38189170+Doomsdayrs@users.noreply.github.com>
|
||||||
|
@ -116,8 +144,11 @@ Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
|
||||||
Ebey Abraham <ebey97@gmail.com>
|
Ebey Abraham <ebey97@gmail.com>
|
||||||
Ed Lee <edilee@mozilla.com>
|
Ed Lee <edilee@mozilla.com>
|
||||||
Ed Lepedus <ed.lepedus@googlemail.com>
|
Ed Lepedus <ed.lepedus@googlemail.com>
|
||||||
|
Eddie-Wang <wangjinheng1120@163.com>
|
||||||
Edward Taylor <edeetee@gmail.com>
|
Edward Taylor <edeetee@gmail.com>
|
||||||
|
Elaine <elaine.zosa@gmail.com>
|
||||||
Elbios <141279586+Elbios@users.noreply.github.com>
|
Elbios <141279586+Elbios@users.noreply.github.com>
|
||||||
|
Elton Kola <eltonkola@gmail.com>
|
||||||
Engininja2 <139037756+Engininja2@users.noreply.github.com>
|
Engininja2 <139037756+Engininja2@users.noreply.github.com>
|
||||||
Equim <sayaka@ekyu.moe>
|
Equim <sayaka@ekyu.moe>
|
||||||
Eric Sommerlade <es0m@users.noreply.github.com>
|
Eric Sommerlade <es0m@users.noreply.github.com>
|
||||||
|
@ -143,37 +174,47 @@ Firat <firatkiral@gmail.com>
|
||||||
Folko-Ven <71110216+Folko-Ven@users.noreply.github.com>
|
Folko-Ven <71110216+Folko-Ven@users.noreply.github.com>
|
||||||
Foul-Tarnished <107711110+Foul-Tarnished@users.noreply.github.com>
|
Foul-Tarnished <107711110+Foul-Tarnished@users.noreply.github.com>
|
||||||
Francisco Melo <43780565+francis2tm@users.noreply.github.com>
|
Francisco Melo <43780565+francis2tm@users.noreply.github.com>
|
||||||
|
Frank Mai <thxcode0824@gmail.com>
|
||||||
FrankHB <frankhb1989@gmail.com>
|
FrankHB <frankhb1989@gmail.com>
|
||||||
|
Fred Douglas <43351173+fredlas@users.noreply.github.com>
|
||||||
Frederik Vogel <Schaltfehler@users.noreply.github.com>
|
Frederik Vogel <Schaltfehler@users.noreply.github.com>
|
||||||
Gabe Goodhart <gabe.l.hart@gmail.com>
|
Gabe Goodhart <gabe.l.hart@gmail.com>
|
||||||
GainLee <perfecter.gen@gmail.com>
|
GainLee <perfecter.gen@gmail.com>
|
||||||
Galunid <karolek1231456@gmail.com>
|
Galunid <karolek1231456@gmail.com>
|
||||||
Gary Linscott <glinscott@gmail.com>
|
Gary Linscott <glinscott@gmail.com>
|
||||||
Gary Mulder <gjmulder@gmail.com>
|
Gary Mulder <gjmulder@gmail.com>
|
||||||
|
Gavin Zhao <gavinzhaojw@protonmail.com>
|
||||||
Genkagaku.GPT <hlhr202@163.com>
|
Genkagaku.GPT <hlhr202@163.com>
|
||||||
Georgi Gerganov <ggerganov@gmail.com>
|
Georgi Gerganov <ggerganov@gmail.com>
|
||||||
Gilad S <giladgd@users.noreply.github.com>
|
Gilad S <giladgd@users.noreply.github.com>
|
||||||
|
Giuseppe Scrivano <giuseppe@scrivano.org>
|
||||||
GiviMAD <GiviMAD@users.noreply.github.com>
|
GiviMAD <GiviMAD@users.noreply.github.com>
|
||||||
Govlzkoy <gotope@users.noreply.github.com>
|
Govlzkoy <gotope@users.noreply.github.com>
|
||||||
Guillaume "Vermeille" Sanchez <Guillaume.V.Sanchez@gmail.com>
|
Guillaume "Vermeille" Sanchez <Guillaume.V.Sanchez@gmail.com>
|
||||||
Guillaume Wenzek <gwenzek@users.noreply.github.com>
|
Guillaume Wenzek <gwenzek@users.noreply.github.com>
|
||||||
Guoteng <32697156+SolenoidWGT@users.noreply.github.com>
|
Guoteng <32697156+SolenoidWGT@users.noreply.github.com>
|
||||||
Gustavo Rocha Dias <91472747+gustrd@users.noreply.github.com>
|
Gustavo Rocha Dias <91472747+gustrd@users.noreply.github.com>
|
||||||
|
Haggai Nuchi <h.nuchi@gmail.com>
|
||||||
Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
|
Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
|
||||||
|
Hamdoud Hakem <90524568+hamdoudhakem@users.noreply.github.com>
|
||||||
|
HanishKVC <hanishkvc@gmail.com>
|
||||||
Haohui Mai <ricetons@gmail.com>
|
Haohui Mai <ricetons@gmail.com>
|
||||||
Haoxiang Fei <tonyfettes@tonyfettes.com>
|
Haoxiang Fei <tonyfettes@tonyfettes.com>
|
||||||
Harald Fernengel <harald.fernengel@here.com>
|
Harald Fernengel <harald.fernengel@here.com>
|
||||||
Hatsune Miku <129688334+at8u@users.noreply.github.com>
|
Hatsune Miku <129688334+at8u@users.noreply.github.com>
|
||||||
|
HatsuneMikuUwU33 <173229399+HatsuneMikuUwU33@users.noreply.github.com>
|
||||||
Henk Poley <HenkPoley@gmail.com>
|
Henk Poley <HenkPoley@gmail.com>
|
||||||
Henri Vasserman <henv@hot.ee>
|
Henri Vasserman <henv@hot.ee>
|
||||||
Henrik Forstén <henrik.forsten@gmail.com>
|
Henrik Forstén <henrik.forsten@gmail.com>
|
||||||
Herman Semenov <GermanAizek@yandex.ru>
|
Herman Semenov <GermanAizek@yandex.ru>
|
||||||
Hesen Peng <hesen.peng@gmail.com>
|
Hesen Peng <hesen.peng@gmail.com>
|
||||||
Hoang Nguyen <hugo53@users.noreply.github.com>
|
Hoang Nguyen <hugo53@users.noreply.github.com>
|
||||||
|
Hong Bo PENG <penghb@cn.ibm.com>
|
||||||
Hongyu Ouyang <96765450+casavaca@users.noreply.github.com>
|
Hongyu Ouyang <96765450+casavaca@users.noreply.github.com>
|
||||||
Howard Su <howard0su@gmail.com>
|
Howard Su <howard0su@gmail.com>
|
||||||
Hua Jiang <allenhjiang@outlook.com>
|
Hua Jiang <allenhjiang@outlook.com>
|
||||||
Huawei Lin <huaweilin.cs@gmail.com>
|
Huawei Lin <huaweilin.cs@gmail.com>
|
||||||
|
Hugo Roussel <hugo.rous@gmail.com>
|
||||||
Ian Bull <irbull@eclipsesource.com>
|
Ian Bull <irbull@eclipsesource.com>
|
||||||
Ian Bull <irbull@gmail.com>
|
Ian Bull <irbull@gmail.com>
|
||||||
Ian Scrivener <github@zilogy.asia>
|
Ian Scrivener <github@zilogy.asia>
|
||||||
|
@ -190,8 +231,10 @@ Ivan Stepanov <ivanstepanovftw@gmail.com>
|
||||||
JH23X <165871467+JH23X@users.noreply.github.com>
|
JH23X <165871467+JH23X@users.noreply.github.com>
|
||||||
Jack Mousseau <jmousseau@users.noreply.github.com>
|
Jack Mousseau <jmousseau@users.noreply.github.com>
|
||||||
JackJollimore <130917767+JackJollimore@users.noreply.github.com>
|
JackJollimore <130917767+JackJollimore@users.noreply.github.com>
|
||||||
|
Jaemin Son <woalsdnd@gmail.com>
|
||||||
Jag Chadha <jagtesh@gmail.com>
|
Jag Chadha <jagtesh@gmail.com>
|
||||||
Jakub N <jakubniemczyk97@gmail.com>
|
Jakub N <jakubniemczyk97@gmail.com>
|
||||||
|
James A Capozzoli <157492257+jac-jim@users.noreply.github.com>
|
||||||
James Reynolds <magnusviri@users.noreply.github.com>
|
James Reynolds <magnusviri@users.noreply.github.com>
|
||||||
Jan Boon <jan.boon@kaetemi.be>
|
Jan Boon <jan.boon@kaetemi.be>
|
||||||
Jan Boon <kaetemi@gmail.com>
|
Jan Boon <kaetemi@gmail.com>
|
||||||
|
@ -205,12 +248,17 @@ Jean-Michaël Celerier <jeanmichael.celerier+github@gmail.com>
|
||||||
Jed Fox <git@jedfox.com>
|
Jed Fox <git@jedfox.com>
|
||||||
Jeffrey Quesnelle <emozilla@nousresearch.com>
|
Jeffrey Quesnelle <emozilla@nousresearch.com>
|
||||||
Jesse Jojo Johnson <williamsaintgeorge@gmail.com>
|
Jesse Jojo Johnson <williamsaintgeorge@gmail.com>
|
||||||
|
Jeximo <jeximo@gmail.com>
|
||||||
Jhen-Jie Hong <iainst0409@gmail.com>
|
Jhen-Jie Hong <iainst0409@gmail.com>
|
||||||
Jiahao Li <liplus17@163.com>
|
Jiahao Li <liplus17@163.com>
|
||||||
Jian Liao <jianliao@users.noreply.github.com>
|
Jian Liao <jianliao@users.noreply.github.com>
|
||||||
JidongZhang-THU <1119708529@qq.com>
|
JidongZhang-THU <1119708529@qq.com>
|
||||||
Jinwoo Jeong <33892306+williamjeong2@users.noreply.github.com>
|
Jinwoo Jeong <33892306+williamjeong2@users.noreply.github.com>
|
||||||
Jiří Podivín <66251151+jpodivin@users.noreply.github.com>
|
Jiří Podivín <66251151+jpodivin@users.noreply.github.com>
|
||||||
|
Jiří Sejkora <Sejseloid@gmail.com>
|
||||||
|
Joan Fontanals <jfontanalsmartinez@gmail.com>
|
||||||
|
Joan Fontanals <joan.fontanals.martinez@jina.ai>
|
||||||
|
Johan <JohanAR@users.noreply.github.com>
|
||||||
Johannes Gäßler <johannesg@5d6.de>
|
Johannes Gäßler <johannesg@5d6.de>
|
||||||
Johannes Rudolph <johannes.rudolph@gmail.com>
|
Johannes Rudolph <johannes.rudolph@gmail.com>
|
||||||
John <78893154+cmp-nct@users.noreply.github.com>
|
John <78893154+cmp-nct@users.noreply.github.com>
|
||||||
|
@ -221,15 +269,19 @@ Jonas Wunderlich <32615971+jonas-w@users.noreply.github.com>
|
||||||
Jorge A <161275481+jorgealias@users.noreply.github.com>
|
Jorge A <161275481+jorgealias@users.noreply.github.com>
|
||||||
Jose Maldonado <63384398+yukiteruamano@users.noreply.github.com>
|
Jose Maldonado <63384398+yukiteruamano@users.noreply.github.com>
|
||||||
Joseph Stahl <1269177+josephst@users.noreply.github.com>
|
Joseph Stahl <1269177+josephst@users.noreply.github.com>
|
||||||
|
Josh Ramer <josh.ramer@icloud.com>
|
||||||
Joyce <joycebrum@google.com>
|
Joyce <joycebrum@google.com>
|
||||||
Juan Calderon-Perez <835733+gaby@users.noreply.github.com>
|
Juan Calderon-Perez <835733+gaby@users.noreply.github.com>
|
||||||
Judd <foldl@users.noreply.github.com>
|
Judd <foldl@users.noreply.github.com>
|
||||||
Julius Arkenberg <arki05@users.noreply.github.com>
|
Julius Arkenberg <arki05@users.noreply.github.com>
|
||||||
Jun Jie <71215065+junnjiee16@users.noreply.github.com>
|
Jun Jie <71215065+junnjiee16@users.noreply.github.com>
|
||||||
|
Junyang Lin <justinlin930319@hotmail.com>
|
||||||
Juraj Bednar <juraj@bednar.io>
|
Juraj Bednar <juraj@bednar.io>
|
||||||
Justin Parker <jparkerweb@gmail.com>
|
Justin Parker <jparkerweb@gmail.com>
|
||||||
Justin Suess <justin.suess@westpoint.edu>
|
Justin Suess <justin.suess@westpoint.edu>
|
||||||
|
Justina Cho <justcho5@gmail.com>
|
||||||
Justine Tunney <jtunney@gmail.com>
|
Justine Tunney <jtunney@gmail.com>
|
||||||
|
Justine Tunney <jtunney@mozilla.com>
|
||||||
Juuso Alasuutari <juuso.alasuutari@gmail.com>
|
Juuso Alasuutari <juuso.alasuutari@gmail.com>
|
||||||
KASR <karim.asrih@gmail.com>
|
KASR <karim.asrih@gmail.com>
|
||||||
Kamil Tomšík <info@tomsik.cz>
|
Kamil Tomšík <info@tomsik.cz>
|
||||||
|
@ -242,6 +294,7 @@ Kawrakow <48489457+ikawrakow@users.noreply.github.com>
|
||||||
Keiichi Tabata <keiichi.tabata@outlook.com>
|
Keiichi Tabata <keiichi.tabata@outlook.com>
|
||||||
Kenvix ⭐ <kenvixzure@live.com>
|
Kenvix ⭐ <kenvixzure@live.com>
|
||||||
Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>
|
Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>
|
||||||
|
Kevin Gibbons <bakkot@gmail.com>
|
||||||
Kevin Ji <1146876+kevinji@users.noreply.github.com>
|
Kevin Ji <1146876+kevinji@users.noreply.github.com>
|
||||||
Kevin Kwok <antimatter15@gmail.com>
|
Kevin Kwok <antimatter15@gmail.com>
|
||||||
Kevin Lo <kevlo@kevlo.org>
|
Kevin Lo <kevlo@kevlo.org>
|
||||||
|
@ -257,6 +310,7 @@ Laura <Tijntje_7@msn.com>
|
||||||
Lee <44310445+lx200916@users.noreply.github.com>
|
Lee <44310445+lx200916@users.noreply.github.com>
|
||||||
Lee Drake <b.lee.drake@gmail.com>
|
Lee Drake <b.lee.drake@gmail.com>
|
||||||
Leng Yue <lengyue@lengyue.me>
|
Leng Yue <lengyue@lengyue.me>
|
||||||
|
Leon Knauer <git@leonknauer.com>
|
||||||
LeonEricsson <70749762+LeonEricsson@users.noreply.github.com>
|
LeonEricsson <70749762+LeonEricsson@users.noreply.github.com>
|
||||||
Leonardo Neumann <leonardo@neumann.dev.br>
|
Leonardo Neumann <leonardo@neumann.dev.br>
|
||||||
Li Tan <tanliboy@gmail.com>
|
Li Tan <tanliboy@gmail.com>
|
||||||
|
@ -265,20 +319,26 @@ LoganDark <github@logandark.mozmail.com>
|
||||||
LostRuins <39025047+LostRuins@users.noreply.github.com>
|
LostRuins <39025047+LostRuins@users.noreply.github.com>
|
||||||
Luciano <lucianostrika44@gmail.com>
|
Luciano <lucianostrika44@gmail.com>
|
||||||
Luo Tian <lt@basecity.com>
|
Luo Tian <lt@basecity.com>
|
||||||
|
Lyle Dean <dean@lyle.dev>
|
||||||
M. Yusuf Sarıgöz <yusufsarigoz@gmail.com>
|
M. Yusuf Sarıgöz <yusufsarigoz@gmail.com>
|
||||||
Maarten ter Huurne <maarten@treewalker.org>
|
Maarten ter Huurne <maarten@treewalker.org>
|
||||||
Mack Straight <eiz@users.noreply.github.com>
|
Mack Straight <eiz@users.noreply.github.com>
|
||||||
Maël Kerbiriou <m431.kerbiriou@gmail.com>
|
Maël Kerbiriou <m431.kerbiriou@gmail.com>
|
||||||
MaggotHATE <clay1326@gmail.com>
|
MaggotHATE <clay1326@gmail.com>
|
||||||
|
Manuel <44313466+makuche@users.noreply.github.com>
|
||||||
Marc Köhlbrugge <subscriptions@marckohlbrugge.com>
|
Marc Köhlbrugge <subscriptions@marckohlbrugge.com>
|
||||||
Marco Matthies <71844+marcom@users.noreply.github.com>
|
Marco Matthies <71844+marcom@users.noreply.github.com>
|
||||||
Marcus Dunn <51931484+MarcusDunn@users.noreply.github.com>
|
Marcus Dunn <51931484+MarcusDunn@users.noreply.github.com>
|
||||||
Marian Cepok <marian.cepok@gmail.com>
|
Marian Cepok <marian.cepok@gmail.com>
|
||||||
Mark Fairbairn <thebaron88@gmail.com>
|
Mark Fairbairn <thebaron88@gmail.com>
|
||||||
Marko Tasic <mtasic85@gmail.com>
|
Marko Tasic <mtasic85@gmail.com>
|
||||||
|
Markus Tavenrath <mtavenrath@users.noreply.github.com>
|
||||||
|
Martin Delille <martin@delille.org>
|
||||||
Martin Krasser <krasserm@googlemail.com>
|
Martin Krasser <krasserm@googlemail.com>
|
||||||
Martin Schwaighofer <mschwaig@users.noreply.github.com>
|
Martin Schwaighofer <mschwaig@users.noreply.github.com>
|
||||||
Marvin Gießing <marvin.giessing@gmail.com>
|
Marvin Gießing <marvin.giessing@gmail.com>
|
||||||
|
Masaya, Kato <62578291+msy-kato@users.noreply.github.com>
|
||||||
|
MasterYi1024 <39848311+MasterYi1024@users.noreply.github.com>
|
||||||
Mateusz Charytoniuk <mateusz.charytoniuk@protonmail.com>
|
Mateusz Charytoniuk <mateusz.charytoniuk@protonmail.com>
|
||||||
Matheus C. França <matheus-catarino@hotmail.com>
|
Matheus C. França <matheus-catarino@hotmail.com>
|
||||||
Matheus Gabriel Alves Silva <matheusgasource@gmail.com>
|
Matheus Gabriel Alves Silva <matheusgasource@gmail.com>
|
||||||
|
@ -287,8 +347,11 @@ Mathijs de Bruin <mathijs@mathijsfietst.nl>
|
||||||
Matt Clayton <156335168+mattjcly@users.noreply.github.com>
|
Matt Clayton <156335168+mattjcly@users.noreply.github.com>
|
||||||
Matt Pulver <matt.pulver@heavy.ai>
|
Matt Pulver <matt.pulver@heavy.ai>
|
||||||
Matteo Boschini <12133566+mbosc@users.noreply.github.com>
|
Matteo Boschini <12133566+mbosc@users.noreply.github.com>
|
||||||
|
Mattheus Chediak <shammcity00@gmail.com>
|
||||||
Matthew Tejo <matthew.tejo@gmail.com>
|
Matthew Tejo <matthew.tejo@gmail.com>
|
||||||
Matvey Soloviev <blackhole89@gmail.com>
|
Matvey Soloviev <blackhole89@gmail.com>
|
||||||
|
Max Krasnyansky <max.krasnyansky@gmail.com>
|
||||||
|
Max Krasnyansky <quic_maxk@quicinc.com>
|
||||||
Maxime <672982+maximegmd@users.noreply.github.com>
|
Maxime <672982+maximegmd@users.noreply.github.com>
|
||||||
Maximilian Winter <maximilian.winter.91@gmail.com>
|
Maximilian Winter <maximilian.winter.91@gmail.com>
|
||||||
Meng Zhang <meng@tabbyml.com>
|
Meng Zhang <meng@tabbyml.com>
|
||||||
|
@ -300,32 +363,41 @@ Michael Kesper <mkesper@schokokeks.org>
|
||||||
Michael Klimenko <mklimenko29@gmail.com>
|
Michael Klimenko <mklimenko29@gmail.com>
|
||||||
Michael Podvitskiy <podvitskiymichael@gmail.com>
|
Michael Podvitskiy <podvitskiymichael@gmail.com>
|
||||||
Michael Potter <NanoTekGuy@Gmail.com>
|
Michael Potter <NanoTekGuy@Gmail.com>
|
||||||
|
Michael de Gans <michael.john.degans@gmail.com>
|
||||||
Michaël de Vries <vriesdemichael@gmail.com>
|
Michaël de Vries <vriesdemichael@gmail.com>
|
||||||
Mihai <mihai.chirculescu@yahoo.com>
|
Mihai <mihai.chirculescu@yahoo.com>
|
||||||
Mike <ytianhui2004@gmail.com>
|
Mike <ytianhui2004@gmail.com>
|
||||||
|
Mikko Juola <mikjuo@gmail.com>
|
||||||
Minsoo Cheong <54794500+mscheong01@users.noreply.github.com>
|
Minsoo Cheong <54794500+mscheong01@users.noreply.github.com>
|
||||||
Mirko185 <mirkosig@gmail.com>
|
Mirko185 <mirkosig@gmail.com>
|
||||||
Mirror Azure <54669636+MirrorAzure@users.noreply.github.com>
|
Mirror Azure <54669636+MirrorAzure@users.noreply.github.com>
|
||||||
Miwa / Ensan <63481257+ensan-hcl@users.noreply.github.com>
|
Miwa / Ensan <63481257+ensan-hcl@users.noreply.github.com>
|
||||||
Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
|
Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
|
||||||
|
Mohammadreza Hendiani <mohammad.r.hendiani@gmail.com>
|
||||||
Murilo Santana <mvrilo@gmail.com>
|
Murilo Santana <mvrilo@gmail.com>
|
||||||
Musab Gultekin <musabgultekin@users.noreply.github.com>
|
Musab Gultekin <musabgultekin@users.noreply.github.com>
|
||||||
Nam D. Tran <42194884+namtranase@users.noreply.github.com>
|
Nam D. Tran <42194884+namtranase@users.noreply.github.com>
|
||||||
|
Nathan Epstein <nate2@umbc.edu>
|
||||||
NawafAlansari <72708095+NawafAlansari@users.noreply.github.com>
|
NawafAlansari <72708095+NawafAlansari@users.noreply.github.com>
|
||||||
Nebula <infinitewormhole@gmail.com>
|
Nebula <infinitewormhole@gmail.com>
|
||||||
|
Neo Zhang <14088817+arthw@users.noreply.github.com>
|
||||||
|
Neo Zhang <zhang.jianyu@outlook.com>
|
||||||
Neo Zhang Jianyu <jianyu.zhang@intel.com>
|
Neo Zhang Jianyu <jianyu.zhang@intel.com>
|
||||||
Neuman Vong <neuman.vong@gmail.com>
|
Neuman Vong <neuman.vong@gmail.com>
|
||||||
Nexesenex <124105151+Nexesenex@users.noreply.github.com>
|
Nexesenex <124105151+Nexesenex@users.noreply.github.com>
|
||||||
Niall Coates <1349685+Niall-@users.noreply.github.com>
|
Niall Coates <1349685+Niall-@users.noreply.github.com>
|
||||||
Nicolai Weitkemper <kontakt@nicolaiweitkemper.de>
|
Nicolai Weitkemper <kontakt@nicolaiweitkemper.de>
|
||||||
|
Nicolás Pérez <nicolas_perez@brown.edu>
|
||||||
Nigel Bosch <pnigelb@gmail.com>
|
Nigel Bosch <pnigelb@gmail.com>
|
||||||
Niklas Korz <niklas@niklaskorz.de>
|
Niklas Korz <niklas@niklaskorz.de>
|
||||||
|
Nikolas <127742645+nneubacher@users.noreply.github.com>
|
||||||
Nindaleth <Nindaleth@users.noreply.github.com>
|
Nindaleth <Nindaleth@users.noreply.github.com>
|
||||||
Oleksandr Nikitin <oleksandr@tvori.info>
|
Oleksandr Nikitin <oleksandr@tvori.info>
|
||||||
Oleksii Maryshchenko <oleksii.maryshchenko@gmail.com>
|
Oleksii Maryshchenko <oleksii.maryshchenko@gmail.com>
|
||||||
Olivier Chafik <ochafik@users.noreply.github.com>
|
Olivier Chafik <ochafik@users.noreply.github.com>
|
||||||
Ondřej Čertík <ondrej@certik.us>
|
Ondřej Čertík <ondrej@certik.us>
|
||||||
Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
|
Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
|
||||||
|
Patrice Ferlet <metal3d@gmail.com>
|
||||||
Paul Tsochantaris <ptsochantaris@icloud.com>
|
Paul Tsochantaris <ptsochantaris@icloud.com>
|
||||||
Pavol Rusnak <pavol@rusnak.io>
|
Pavol Rusnak <pavol@rusnak.io>
|
||||||
Pedro Cuenca <pedro@huggingface.co>
|
Pedro Cuenca <pedro@huggingface.co>
|
||||||
|
@ -343,9 +415,14 @@ RJ Adriaansen <adriaansen@eshcc.eur.nl>
|
||||||
Radoslav Gerganov <rgerganov@gmail.com>
|
Radoslav Gerganov <rgerganov@gmail.com>
|
||||||
Radosław Gryta <radek.gryta@gmail.com>
|
Radosław Gryta <radek.gryta@gmail.com>
|
||||||
Rahul Vivek Nair <68507071+RahulVivekNair@users.noreply.github.com>
|
Rahul Vivek Nair <68507071+RahulVivekNair@users.noreply.github.com>
|
||||||
|
Raj Hammeer Singh Hada <hammeerraj@gmail.com>
|
||||||
|
Ralph Soika <ralph.soika@imixs.com>
|
||||||
Rand Xie <randxiexyy29@gmail.com>
|
Rand Xie <randxiexyy29@gmail.com>
|
||||||
Randall Fitzgerald <randall@dasaku.net>
|
Randall Fitzgerald <randall@dasaku.net>
|
||||||
Reinforce-II <fate@eastal.com>
|
Reinforce-II <fate@eastal.com>
|
||||||
|
Ren Xuancheng <jklj077@users.noreply.github.com>
|
||||||
|
Rene Leonhardt <65483435+reneleonhardt@users.noreply.github.com>
|
||||||
|
RhinoDevel <RhinoDevel@users.noreply.github.com>
|
||||||
Riceball LEE <snowyu.lee@gmail.com>
|
Riceball LEE <snowyu.lee@gmail.com>
|
||||||
Richard Kiss <him@richardkiss.com>
|
Richard Kiss <him@richardkiss.com>
|
||||||
Richard Roberson <richardr1126@gmail.com>
|
Richard Roberson <richardr1126@gmail.com>
|
||||||
|
@ -373,6 +450,7 @@ Rowan Hart <rowanbhart@gmail.com>
|
||||||
Rune <43761327+Rune-AI@users.noreply.github.com>
|
Rune <43761327+Rune-AI@users.noreply.github.com>
|
||||||
Ryan Landay <rlanday@gmail.com>
|
Ryan Landay <rlanday@gmail.com>
|
||||||
Ryder Wishart <ryderwishart@gmail.com>
|
Ryder Wishart <ryderwishart@gmail.com>
|
||||||
|
Ryuei <louixs@users.noreply.github.com>
|
||||||
Rőczey Barnabás <31726601+An0nie@users.noreply.github.com>
|
Rőczey Barnabás <31726601+An0nie@users.noreply.github.com>
|
||||||
SakuraUmi <yukinon244@gmail.com>
|
SakuraUmi <yukinon244@gmail.com>
|
||||||
Salvador E. Tropea <stropea@inti.gob.ar>
|
Salvador E. Tropea <stropea@inti.gob.ar>
|
||||||
|
@ -386,6 +464,7 @@ SebastianApel <13675545+SebastianApel@users.noreply.github.com>
|
||||||
Senemu <10880819+Senemu@users.noreply.github.com>
|
Senemu <10880819+Senemu@users.noreply.github.com>
|
||||||
Sergey Alirzaev <zl29ah@gmail.com>
|
Sergey Alirzaev <zl29ah@gmail.com>
|
||||||
Sergio López <slp@sinrega.org>
|
Sergio López <slp@sinrega.org>
|
||||||
|
Sertaç Özercan <852750+sozercan@users.noreply.github.com>
|
||||||
SeungWon Jeong <65549245+redlion0929@users.noreply.github.com>
|
SeungWon Jeong <65549245+redlion0929@users.noreply.github.com>
|
||||||
ShadovvBeast <ShadovvBeast@gmail.com>
|
ShadovvBeast <ShadovvBeast@gmail.com>
|
||||||
Shakhar Dasgupta <shakhardasgupta@gmail.com>
|
Shakhar Dasgupta <shakhardasgupta@gmail.com>
|
||||||
|
@ -394,6 +473,7 @@ Shijie <821898965@qq.com>
|
||||||
Shintarou Okada <kokuzen@gmail.com>
|
Shintarou Okada <kokuzen@gmail.com>
|
||||||
Shouzheng Liu <61452103+lshzh-ww@users.noreply.github.com>
|
Shouzheng Liu <61452103+lshzh-ww@users.noreply.github.com>
|
||||||
Shouzheng Liu <lshzh.hi@gmail.com>
|
Shouzheng Liu <lshzh.hi@gmail.com>
|
||||||
|
Shuichi Tsutsumi <shuichi0526@gmail.com>
|
||||||
Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
|
Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
|
||||||
Simon Willison <swillison@gmail.com>
|
Simon Willison <swillison@gmail.com>
|
||||||
Siwen Yu <yusiwen@gmail.com>
|
Siwen Yu <yusiwen@gmail.com>
|
||||||
|
@ -405,11 +485,14 @@ Someone <sergei.kozlukov@aalto.fi>
|
||||||
Someone Serge <sergei.kozlukov@aalto.fi>
|
Someone Serge <sergei.kozlukov@aalto.fi>
|
||||||
Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com>
|
Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com>
|
||||||
Spencer Sutton <spencersutton@users.noreply.github.com>
|
Spencer Sutton <spencersutton@users.noreply.github.com>
|
||||||
|
Srihari-mcw <96763064+Srihari-mcw@users.noreply.github.com>
|
||||||
Srinivas Billa <nivibilla@gmail.com>
|
Srinivas Billa <nivibilla@gmail.com>
|
||||||
Stefan Sydow <stefan@sydow.email>
|
Stefan Sydow <stefan@sydow.email>
|
||||||
|
Steffen Röcker <sroecker@gmail.com>
|
||||||
Stephan Walter <stephan@walter.name>
|
Stephan Walter <stephan@walter.name>
|
||||||
Stephen Nichols <snichols@users.noreply.github.com>
|
Stephen Nichols <snichols@users.noreply.github.com>
|
||||||
Steve Grubb <ausearch.1@gmail.com>
|
Steve Grubb <ausearch.1@gmail.com>
|
||||||
|
Steven Prichard <spprichard20@gmail.com>
|
||||||
Steven Roussey <sroussey@gmail.com>
|
Steven Roussey <sroussey@gmail.com>
|
||||||
Steward Garcia <57494570+FSSRepo@users.noreply.github.com>
|
Steward Garcia <57494570+FSSRepo@users.noreply.github.com>
|
||||||
Suaj Carrot <72162667+SuajCarrot@users.noreply.github.com>
|
Suaj Carrot <72162667+SuajCarrot@users.noreply.github.com>
|
||||||
|
@ -434,16 +517,19 @@ Tom C <tom.corelis@gmail.com>
|
||||||
Tom Jobbins <784313+TheBloke@users.noreply.github.com>
|
Tom Jobbins <784313+TheBloke@users.noreply.github.com>
|
||||||
Tomas <tom.tomas.36478119@gmail.com>
|
Tomas <tom.tomas.36478119@gmail.com>
|
||||||
Tomáš Pazdiora <tomas.pazdiora@gmail.com>
|
Tomáš Pazdiora <tomas.pazdiora@gmail.com>
|
||||||
|
Tristan Druyen <tristan@vault81.mozmail.com>
|
||||||
Tristan Ross <rosscomputerguy@protonmail.com>
|
Tristan Ross <rosscomputerguy@protonmail.com>
|
||||||
Tungsten842 <886724vf@anonaddy.me>
|
Tungsten842 <886724vf@anonaddy.me>
|
||||||
Tungsten842 <quantmint@protonmail.com>
|
Tungsten842 <quantmint@protonmail.com>
|
||||||
Tushar <ditsuke@protonmail.com>
|
Tushar <ditsuke@protonmail.com>
|
||||||
UEXTM.com <84163508+uextm@users.noreply.github.com>
|
UEXTM.com <84163508+uextm@users.noreply.github.com>
|
||||||
|
Ulrich Drepper <drepper@gmail.com>
|
||||||
Uzo Nweke <uzoechi@gmail.com>
|
Uzo Nweke <uzoechi@gmail.com>
|
||||||
Vaibhav Srivastav <vaibhavs10@gmail.com>
|
Vaibhav Srivastav <vaibhavs10@gmail.com>
|
||||||
Val Kharitonov <mail@kharvd.com>
|
Val Kharitonov <mail@kharvd.com>
|
||||||
Valentin Konovalov <valle.ketsujin@gmail.com>
|
Valentin Konovalov <valle.ketsujin@gmail.com>
|
||||||
Valentyn Bezshapkin <61702053+valentynbez@users.noreply.github.com>
|
Valentyn Bezshapkin <61702053+valentynbez@users.noreply.github.com>
|
||||||
|
Victor Nogueira <felladrin@gmail.com>
|
||||||
Victor Z. Peng <ziliangdotme@gmail.com>
|
Victor Z. Peng <ziliangdotme@gmail.com>
|
||||||
Vlad <spitfireage@gmail.com>
|
Vlad <spitfireage@gmail.com>
|
||||||
Vladimir <bogdad@gmail.com>
|
Vladimir <bogdad@gmail.com>
|
||||||
|
@ -455,7 +541,9 @@ Weird Constructor <weirdconstructor@gmail.com>
|
||||||
Welby Seely <welbyseely@gmail.com>
|
Welby Seely <welbyseely@gmail.com>
|
||||||
Wentai Zhang <rchardx@gmail.com>
|
Wentai Zhang <rchardx@gmail.com>
|
||||||
WillCorticesAI <150854901+WillCorticesAI@users.noreply.github.com>
|
WillCorticesAI <150854901+WillCorticesAI@users.noreply.github.com>
|
||||||
|
William Tambellini <william.tambellini@gmail.com>
|
||||||
Willy Tarreau <w@1wt.eu>
|
Willy Tarreau <w@1wt.eu>
|
||||||
|
Wouter <9594229+DifferentialityDevelopment@users.noreply.github.com>
|
||||||
Wu Jian Ping <wujjpp@hotmail.com>
|
Wu Jian Ping <wujjpp@hotmail.com>
|
||||||
Wu Jian Ping <wujp@greatld.com>
|
Wu Jian Ping <wujp@greatld.com>
|
||||||
Xiake Sun <xiake.sun@intel.com>
|
Xiake Sun <xiake.sun@intel.com>
|
||||||
|
@ -466,6 +554,8 @@ Xiaoyi Chen <cxychina@gmail.com>
|
||||||
Xingchen Song(宋星辰) <xingchensong1996@163.com>
|
Xingchen Song(宋星辰) <xingchensong1996@163.com>
|
||||||
Xuan Son Nguyen <thichthat@gmail.com>
|
Xuan Son Nguyen <thichthat@gmail.com>
|
||||||
Yann Follet <131855179+YannFollet@users.noreply.github.com>
|
Yann Follet <131855179+YannFollet@users.noreply.github.com>
|
||||||
|
Yaroslav <yaroslav.yashin@me.com>
|
||||||
|
Yazan Agha-Schrader <mountaiin@icloud.com>
|
||||||
Yiming Cui <conandiy@vip.qq.com>
|
Yiming Cui <conandiy@vip.qq.com>
|
||||||
Yishuo Wang <MeouSker77@outlook.com>
|
Yishuo Wang <MeouSker77@outlook.com>
|
||||||
Yueh-Po Peng <94939112+y10ab1@users.noreply.github.com>
|
Yueh-Po Peng <94939112+y10ab1@users.noreply.github.com>
|
||||||
|
@ -477,6 +567,7 @@ Zane Shannon <z@zcs.me>
|
||||||
Zay <95888118+isaiahbjork@users.noreply.github.com>
|
Zay <95888118+isaiahbjork@users.noreply.github.com>
|
||||||
Zenix <zenixls2@gmail.com>
|
Zenix <zenixls2@gmail.com>
|
||||||
Zhang Peiyuan <a1286225768@gmail.com>
|
Zhang Peiyuan <a1286225768@gmail.com>
|
||||||
|
Zheng.Deng <32841220+dengzheng-cloud@users.noreply.github.com>
|
||||||
ZhouYuChen <zhouyuchen@naver.com>
|
ZhouYuChen <zhouyuchen@naver.com>
|
||||||
Ziad Ben Hadj-Alouane <zied.benhadjalouane@gmail.com>
|
Ziad Ben Hadj-Alouane <zied.benhadjalouane@gmail.com>
|
||||||
Ziang Wu <97337387+ZiangWu-77@users.noreply.github.com>
|
Ziang Wu <97337387+ZiangWu-77@users.noreply.github.com>
|
||||||
|
@ -484,14 +575,18 @@ Zsapi <martin1.zsapka@gmail.com>
|
||||||
a-n-n-a-l-e-e <150648636+a-n-n-a-l-e-e@users.noreply.github.com>
|
a-n-n-a-l-e-e <150648636+a-n-n-a-l-e-e@users.noreply.github.com>
|
||||||
adel boussaken <netdur@gmail.com>
|
adel boussaken <netdur@gmail.com>
|
||||||
afrideva <95653597+afrideva@users.noreply.github.com>
|
afrideva <95653597+afrideva@users.noreply.github.com>
|
||||||
|
agray3 <agray3@users.noreply.github.com>
|
||||||
akawrykow <142945436+akawrykow@users.noreply.github.com>
|
akawrykow <142945436+akawrykow@users.noreply.github.com>
|
||||||
alexpinel <93524949+alexpinel@users.noreply.github.com>
|
alexpinel <93524949+alexpinel@users.noreply.github.com>
|
||||||
alonfaraj <alonfaraj@gmail.com>
|
alonfaraj <alonfaraj@gmail.com>
|
||||||
|
alwqx <kenan3015@gmail.com>
|
||||||
|
amd-lalithnc <lalithnc@amd.com>
|
||||||
andrijdavid <david@geek.mg>
|
andrijdavid <david@geek.mg>
|
||||||
anon998 <131767832+anon998@users.noreply.github.com>
|
anon998 <131767832+anon998@users.noreply.github.com>
|
||||||
anzz1 <anzz1@live.com>
|
anzz1 <anzz1@live.com>
|
||||||
apaz <aarpazdera@gmail.com>
|
apaz <aarpazdera@gmail.com>
|
||||||
apcameron <37645737+apcameron@users.noreply.github.com>
|
apcameron <37645737+apcameron@users.noreply.github.com>
|
||||||
|
arch-btw <57669023+arch-btw@users.noreply.github.com>
|
||||||
arcrank <arcrank@gmail.com>
|
arcrank <arcrank@gmail.com>
|
||||||
arlo-phoenix <140345165+arlo-phoenix@users.noreply.github.com>
|
arlo-phoenix <140345165+arlo-phoenix@users.noreply.github.com>
|
||||||
at8u <129688334+at8u@users.noreply.github.com>
|
at8u <129688334+at8u@users.noreply.github.com>
|
||||||
|
@ -514,13 +609,17 @@ cocktailpeanut <121128867+cocktailpeanut@users.noreply.github.com>
|
||||||
coezbek <c.oezbek@gmail.com>
|
coezbek <c.oezbek@gmail.com>
|
||||||
comex <comexk@gmail.com>
|
comex <comexk@gmail.com>
|
||||||
compilade <113953597+compilade@users.noreply.github.com>
|
compilade <113953597+compilade@users.noreply.github.com>
|
||||||
|
compilade <git@compilade.net>
|
||||||
|
cpumaxx <163466046+cpumaxx@users.noreply.github.com>
|
||||||
crasm <crasm@git.vczf.net>
|
crasm <crasm@git.vczf.net>
|
||||||
crasm <crasm@git.vczf.us>
|
crasm <crasm@git.vczf.us>
|
||||||
daboe01 <daboe01@googlemail.com>
|
daboe01 <daboe01@googlemail.com>
|
||||||
david raistrick <keen99@users.noreply.github.com>
|
david raistrick <keen99@users.noreply.github.com>
|
||||||
|
ddh0 <dylanhalladay02@icloud.com>
|
||||||
ddpasa <112642920+ddpasa@users.noreply.github.com>
|
ddpasa <112642920+ddpasa@users.noreply.github.com>
|
||||||
deepdiffuser <112834445+deepdiffuser@users.noreply.github.com>
|
deepdiffuser <112834445+deepdiffuser@users.noreply.github.com>
|
||||||
divinity76 <divinity76@gmail.com>
|
divinity76 <divinity76@gmail.com>
|
||||||
|
dm4 <sunrisedm4@gmail.com>
|
||||||
dotpy314 <33351922+dotpy314@users.noreply.github.com>
|
dotpy314 <33351922+dotpy314@users.noreply.github.com>
|
||||||
drbh <david.richard.holtz@gmail.com>
|
drbh <david.richard.holtz@gmail.com>
|
||||||
ds5t5 <145942675+ds5t5@users.noreply.github.com>
|
ds5t5 <145942675+ds5t5@users.noreply.github.com>
|
||||||
|
@ -529,6 +628,7 @@ eastriver <lee@eastriver.dev>
|
||||||
ebraminio <ebraminio@gmail.com>
|
ebraminio <ebraminio@gmail.com>
|
||||||
eiery <19350831+eiery@users.noreply.github.com>
|
eiery <19350831+eiery@users.noreply.github.com>
|
||||||
eric8607242 <e0928021388@gmail.com>
|
eric8607242 <e0928021388@gmail.com>
|
||||||
|
fairydreaming <166155368+fairydreaming@users.noreply.github.com>
|
||||||
fraxy-v <65565042+fraxy-v@users.noreply.github.com>
|
fraxy-v <65565042+fraxy-v@users.noreply.github.com>
|
||||||
github-actions[bot] <github-actions[bot]@users.noreply.github.com>
|
github-actions[bot] <github-actions[bot]@users.noreply.github.com>
|
||||||
gliptic <gliptic@users.noreply.github.com>
|
gliptic <gliptic@users.noreply.github.com>
|
||||||
|
@ -539,6 +639,7 @@ h-h-h-h <13482553+h-h-h-h@users.noreply.github.com>
|
||||||
hankcs <cnhankmc@gmail.com>
|
hankcs <cnhankmc@gmail.com>
|
||||||
hoangmit <hoangmit@users.noreply.github.com>
|
hoangmit <hoangmit@users.noreply.github.com>
|
||||||
hongbo.mo <352280764@qq.com>
|
hongbo.mo <352280764@qq.com>
|
||||||
|
hopkins385 <98618192+hopkins385@users.noreply.github.com>
|
||||||
howlger <eclipse@voormann.de>
|
howlger <eclipse@voormann.de>
|
||||||
howlger <github@voormann.de>
|
howlger <github@voormann.de>
|
||||||
hutli <6594598+hutli@users.noreply.github.com>
|
hutli <6594598+hutli@users.noreply.github.com>
|
||||||
|
@ -549,14 +650,22 @@ hydai <z54981220@gmail.com>
|
||||||
iSma <ismail.senhaji@gmail.com>
|
iSma <ismail.senhaji@gmail.com>
|
||||||
iacore <74560659+iacore@users.noreply.github.com>
|
iacore <74560659+iacore@users.noreply.github.com>
|
||||||
igarnier <igarnier@protonmail.com>
|
igarnier <igarnier@protonmail.com>
|
||||||
|
intelmatt <61025942+intelmatt@users.noreply.github.com>
|
||||||
iohub <rickyang.pro@gmail.com>
|
iohub <rickyang.pro@gmail.com>
|
||||||
jacobi petrucciani <8117202+jpetrucciani@users.noreply.github.com>
|
jacobi petrucciani <8117202+jpetrucciani@users.noreply.github.com>
|
||||||
|
jaime-m-p <167997752+jaime-m-p@users.noreply.github.com>
|
||||||
jameswu2014 <545426914@qq.com>
|
jameswu2014 <545426914@qq.com>
|
||||||
|
jiez <373447296@qq.com>
|
||||||
jneem <joeneeman@gmail.com>
|
jneem <joeneeman@gmail.com>
|
||||||
|
joecryptotoo <80373433+joecryptotoo@users.noreply.github.com>
|
||||||
johnson442 <56517414+johnson442@users.noreply.github.com>
|
johnson442 <56517414+johnson442@users.noreply.github.com>
|
||||||
|
jojorne <jojorne@users.noreply.github.com>
|
||||||
jon-chuang <9093549+jon-chuang@users.noreply.github.com>
|
jon-chuang <9093549+jon-chuang@users.noreply.github.com>
|
||||||
jp-x-g <jpxg-dev@protonmail.com>
|
jp-x-g <jpxg-dev@protonmail.com>
|
||||||
|
jukofyork <69222624+jukofyork@users.noreply.github.com>
|
||||||
|
junchao-loongson <68935141+junchao-loongson@users.noreply.github.com>
|
||||||
jwj7140 <32943891+jwj7140@users.noreply.github.com>
|
jwj7140 <32943891+jwj7140@users.noreply.github.com>
|
||||||
|
k.h.lai <adrian.k.h.lai@outlook.com>
|
||||||
kaizau <kaizau@users.noreply.github.com>
|
kaizau <kaizau@users.noreply.github.com>
|
||||||
kalomaze <66376113+kalomaze@users.noreply.github.com>
|
kalomaze <66376113+kalomaze@users.noreply.github.com>
|
||||||
kang <tpdns9032100@gmail.com>
|
kang <tpdns9032100@gmail.com>
|
||||||
|
@ -575,11 +684,15 @@ ldwang <ftgreat@163.com>
|
||||||
le.chang <cljs118@126.com>
|
le.chang <cljs118@126.com>
|
||||||
leejet <leejet714@gmail.com>
|
leejet <leejet714@gmail.com>
|
||||||
limitedAtonement <limitedAtonement@users.noreply.github.com>
|
limitedAtonement <limitedAtonement@users.noreply.github.com>
|
||||||
|
liuwei-git <14815172+liuwei-git@users.noreply.github.com>
|
||||||
lon <114724657+longregen@users.noreply.github.com>
|
lon <114724657+longregen@users.noreply.github.com>
|
||||||
|
loonerin <132926317+loonerin@users.noreply.github.com>
|
||||||
|
luoyu-intel <yu.luo@intel.com>
|
||||||
m3ndax <adrian.goessl@outlook.com>
|
m3ndax <adrian.goessl@outlook.com>
|
||||||
maddes8cht <55592906+maddes8cht@users.noreply.github.com>
|
maddes8cht <55592906+maddes8cht@users.noreply.github.com>
|
||||||
makomk <makosoft@googlemail.com>
|
makomk <makosoft@googlemail.com>
|
||||||
manikbhandari <mbbhandarimanik2@gmail.com>
|
manikbhandari <mbbhandarimanik2@gmail.com>
|
||||||
|
maor-ps <154728172+maor-ps@users.noreply.github.com>
|
||||||
mdrokz <mohammadmunshi@gmail.com>
|
mdrokz <mohammadmunshi@gmail.com>
|
||||||
mgroeber9110 <45620825+mgroeber9110@users.noreply.github.com>
|
mgroeber9110 <45620825+mgroeber9110@users.noreply.github.com>
|
||||||
minarchist <minarchist@users.noreply.github.com>
|
minarchist <minarchist@users.noreply.github.com>
|
||||||
|
@ -593,15 +706,19 @@ ngc92 <7938269+ngc92@users.noreply.github.com>
|
||||||
nhamanasu <45545786+nhamanasu@users.noreply.github.com>
|
nhamanasu <45545786+nhamanasu@users.noreply.github.com>
|
||||||
niansa/tuxifan <anton-sa@web.de>
|
niansa/tuxifan <anton-sa@web.de>
|
||||||
niansa/tuxifan <tuxifan@posteo.de>
|
niansa/tuxifan <tuxifan@posteo.de>
|
||||||
|
nickp27 <nb.porter@gmail.com>
|
||||||
ningshanwutuobang <ningshanwutuobang@gmail.com>
|
ningshanwutuobang <ningshanwutuobang@gmail.com>
|
||||||
nold <Nold360@users.noreply.github.com>
|
nold <Nold360@users.noreply.github.com>
|
||||||
nopperl <54780682+nopperl@users.noreply.github.com>
|
nopperl <54780682+nopperl@users.noreply.github.com>
|
||||||
nusu-github <29514220+nusu-github@users.noreply.github.com>
|
nusu-github <29514220+nusu-github@users.noreply.github.com>
|
||||||
olexiyb <olexiyb@gmail.com>
|
olexiyb <olexiyb@gmail.com>
|
||||||
|
omahs <73983677+omahs@users.noreply.github.com>
|
||||||
oobabooga <112222186+oobabooga@users.noreply.github.com>
|
oobabooga <112222186+oobabooga@users.noreply.github.com>
|
||||||
opparco <parco.opaai@gmail.com>
|
opparco <parco.opaai@gmail.com>
|
||||||
ostix360 <55257054+ostix360@users.noreply.github.com>
|
ostix360 <55257054+ostix360@users.noreply.github.com>
|
||||||
|
pengxin99 <pengxin.yuan@intel.com>
|
||||||
perserk <perserk@gmail.com>
|
perserk <perserk@gmail.com>
|
||||||
|
pmysl <piotr.myslinski@outlook.com>
|
||||||
postmasters <namnguyen@google.com>
|
postmasters <namnguyen@google.com>
|
||||||
pudepiedj <pudepiedj@gmail.com>
|
pudepiedj <pudepiedj@gmail.com>
|
||||||
qingfengfenga <41416092+qingfengfenga@users.noreply.github.com>
|
qingfengfenga <41416092+qingfengfenga@users.noreply.github.com>
|
||||||
|
@ -614,16 +731,19 @@ rhuddleston <ryan.huddleston@percona.com>
|
||||||
rimoliga <53384203+rimoliga@users.noreply.github.com>
|
rimoliga <53384203+rimoliga@users.noreply.github.com>
|
||||||
runfuture <runfuture@users.noreply.github.com>
|
runfuture <runfuture@users.noreply.github.com>
|
||||||
sandyiscool <sandyiscool@gmail.com>
|
sandyiscool <sandyiscool@gmail.com>
|
||||||
|
sasha0552 <admin@sasha0552.org>
|
||||||
semidark <me@semidark.net>
|
semidark <me@semidark.net>
|
||||||
sharpHL <132747147+sharpHL@users.noreply.github.com>
|
sharpHL <132747147+sharpHL@users.noreply.github.com>
|
||||||
shibe2 <shibe@tuta.io>
|
shibe2 <shibe@tuta.io>
|
||||||
singularity <12184989+singularity-s0@users.noreply.github.com>
|
singularity <12184989+singularity-s0@users.noreply.github.com>
|
||||||
sjinzh <sjinzh@gmail.com>
|
sjinzh <sjinzh@gmail.com>
|
||||||
|
sjxx <63994076+ylsdamxssjxxdd@users.noreply.github.com>
|
||||||
slaren <2141330+slaren@users.noreply.github.com>
|
slaren <2141330+slaren@users.noreply.github.com>
|
||||||
slaren <slarengh@gmail.com>
|
slaren <slarengh@gmail.com>
|
||||||
snadampal <87143774+snadampal@users.noreply.github.com>
|
snadampal <87143774+snadampal@users.noreply.github.com>
|
||||||
staviq <staviq@gmail.com>
|
staviq <staviq@gmail.com>
|
||||||
stduhpf <stephduh@live.fr>
|
stduhpf <stephduh@live.fr>
|
||||||
|
strawberrymelonpanda <152940198+strawberrymelonpanda@users.noreply.github.com>
|
||||||
swittk <switt1995@gmail.com>
|
swittk <switt1995@gmail.com>
|
||||||
takov751 <40316768+takov751@users.noreply.github.com>
|
takov751 <40316768+takov751@users.noreply.github.com>
|
||||||
tarcey <cey.tarik@gmail.com>
|
tarcey <cey.tarik@gmail.com>
|
||||||
|
@ -636,12 +756,16 @@ uint256_t <konndennsa@gmail.com>
|
||||||
uint256_t <maekawatoshiki1017@gmail.com>
|
uint256_t <maekawatoshiki1017@gmail.com>
|
||||||
unbounded <haakon@likedan.net>
|
unbounded <haakon@likedan.net>
|
||||||
valiray <133289098+valiray@users.noreply.github.com>
|
valiray <133289098+valiray@users.noreply.github.com>
|
||||||
|
vik <vikhyatk@gmail.com>
|
||||||
|
viric <viric@viric.name>
|
||||||
vodkaslime <646329483@qq.com>
|
vodkaslime <646329483@qq.com>
|
||||||
vvhg1 <94630311+vvhg1@users.noreply.github.com>
|
vvhg1 <94630311+vvhg1@users.noreply.github.com>
|
||||||
vxiiduu <73044267+vxiiduu@users.noreply.github.com>
|
vxiiduu <73044267+vxiiduu@users.noreply.github.com>
|
||||||
wbpxre150 <100937007+wbpxre150@users.noreply.github.com>
|
wbpxre150 <100937007+wbpxre150@users.noreply.github.com>
|
||||||
whoreson <139810751+whoreson@users.noreply.github.com>
|
whoreson <139810751+whoreson@users.noreply.github.com>
|
||||||
|
woachk <24752637+woachk@users.noreply.github.com>
|
||||||
wonjun Jang <strutive07@gmail.com>
|
wonjun Jang <strutive07@gmail.com>
|
||||||
|
woodx <124784234+woodx9@users.noreply.github.com>
|
||||||
wzy <32936898+Freed-Wu@users.noreply.github.com>
|
wzy <32936898+Freed-Wu@users.noreply.github.com>
|
||||||
xaedes <xaedes@gmail.com>
|
xaedes <xaedes@gmail.com>
|
||||||
xaedes <xaedes@googlemail.com>
|
xaedes <xaedes@googlemail.com>
|
||||||
|
@ -649,7 +773,10 @@ xloem <0xloem@gmail.com>
|
||||||
yangli2 <yangli2@gmail.com>
|
yangli2 <yangli2@gmail.com>
|
||||||
yuiseki <yuiseki@gmail.com>
|
yuiseki <yuiseki@gmail.com>
|
||||||
zakkor <edward.partenie@gmail.com>
|
zakkor <edward.partenie@gmail.com>
|
||||||
|
zhangkaihuo <zhangkaihuo@gmail.com>
|
||||||
zhouwg <6889919+zhouwg@users.noreply.github.com>
|
zhouwg <6889919+zhouwg@users.noreply.github.com>
|
||||||
|
zhouwg <zhouwg2000@gmail.com>
|
||||||
zrm <trustiosity.zrm@gmail.com>
|
zrm <trustiosity.zrm@gmail.com>
|
||||||
|
Ștefan-Gabriel Muscalu <legraphista@users.noreply.github.com>
|
||||||
源文雨 <41315874+fumiama@users.noreply.github.com>
|
源文雨 <41315874+fumiama@users.noreply.github.com>
|
||||||
Нияз Гарифзянов <112617865+garrnizon@users.noreply.github.com>
|
Нияз Гарифзянов <112617865+garrnizon@users.noreply.github.com>
|
||||||
|
|
1398
CMakeLists.txt
1398
CMakeLists.txt
File diff suppressed because it is too large
Load diff
|
@ -11,10 +11,23 @@
|
||||||
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
|
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"name": "sycl-base",
|
||||||
|
"hidden": true,
|
||||||
|
"generator": "Ninja",
|
||||||
|
"binaryDir": "${sourceDir}/build-${presetName}",
|
||||||
|
"cacheVariables": {
|
||||||
|
"CMAKE_EXPORT_COMPILE_COMMANDS": "ON",
|
||||||
|
"CMAKE_CXX_COMPILER": "icx",
|
||||||
|
"CMAKE_C_COMPILER": "cl",
|
||||||
|
"GGML_SYCL": "ON",
|
||||||
|
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
|
||||||
|
}
|
||||||
|
},
|
||||||
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
|
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
|
||||||
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
|
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
|
||||||
{ "name": "static", "hidden": true, "cacheVariables": { "LLAMA_STATIC": "ON" } },
|
{ "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
|
||||||
|
{ "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } },
|
||||||
|
|
||||||
{
|
{
|
||||||
"name": "arm64-windows-msvc", "hidden": true,
|
"name": "arm64-windows-msvc", "hidden": true,
|
||||||
|
@ -35,15 +48,18 @@
|
||||||
},
|
},
|
||||||
|
|
||||||
{ "name": "arm64-windows-llvm-debug" , "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
|
{ "name": "arm64-windows-llvm-debug" , "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
|
||||||
{ "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "release" ] },
|
{ "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] },
|
||||||
{ "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "release", "static" ] },
|
{ "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] },
|
||||||
|
|
||||||
{ "name": "arm64-windows-msvc-debug" , "inherits": [ "base", "arm64-windows-msvc", "debug" ] },
|
{ "name": "arm64-windows-msvc-debug" , "inherits": [ "base", "arm64-windows-msvc", "debug" ] },
|
||||||
{ "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "release" ] },
|
{ "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] },
|
||||||
{ "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "release", "static" ] },
|
{ "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] },
|
||||||
|
|
||||||
{ "name": "x64-windows-msvc-debug" , "inherits": [ "base", "debug" ] },
|
{ "name": "x64-windows-msvc-debug" , "inherits": [ "base", "debug" ] },
|
||||||
{ "name": "x64-windows-msvc-release", "inherits": [ "base", "release" ] },
|
{ "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
|
||||||
{ "name": "x64-windows-msvc+static-release", "inherits": [ "base", "release", "static" ] }
|
{ "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },
|
||||||
|
|
||||||
|
{ "name": "x64-windows-sycl-debug" , "inherits": [ "sycl-base", "debug" ] },
|
||||||
|
{ "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] }
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,14 +3,13 @@
|
||||||
import PackageDescription
|
import PackageDescription
|
||||||
|
|
||||||
var sources = [
|
var sources = [
|
||||||
"ggml.c",
|
"src/llama.cpp",
|
||||||
"sgemm.cpp",
|
"src/unicode.cpp",
|
||||||
"llama.cpp",
|
"src/unicode-data.cpp",
|
||||||
"unicode.cpp",
|
"ggml/src/ggml.c",
|
||||||
"unicode-data.cpp",
|
"ggml/src/ggml-alloc.c",
|
||||||
"ggml-alloc.c",
|
"ggml/src/ggml-backend.c",
|
||||||
"ggml-backend.c",
|
"ggml/src/ggml-quants.c",
|
||||||
"ggml-quants.c",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
var resources: [Resource] = []
|
var resources: [Resource] = []
|
||||||
|
@ -26,8 +25,8 @@ var cSettings: [CSetting] = [
|
||||||
]
|
]
|
||||||
|
|
||||||
#if canImport(Darwin)
|
#if canImport(Darwin)
|
||||||
sources.append("ggml-metal.m")
|
sources.append("ggml/src/ggml-metal.m")
|
||||||
resources.append(.process("ggml-metal.metal"))
|
resources.append(.process("ggml/src/ggml-metal.metal"))
|
||||||
linkerSettings.append(.linkedFramework("Accelerate"))
|
linkerSettings.append(.linkedFramework("Accelerate"))
|
||||||
cSettings.append(
|
cSettings.append(
|
||||||
contentsOf: [
|
contentsOf: [
|
||||||
|
@ -63,8 +62,6 @@ let package = Package(
|
||||||
"models",
|
"models",
|
||||||
"tests",
|
"tests",
|
||||||
"CMakeLists.txt",
|
"CMakeLists.txt",
|
||||||
"ggml-cuda.cu",
|
|
||||||
"ggml-cuda.h",
|
|
||||||
"Makefile"
|
"Makefile"
|
||||||
],
|
],
|
||||||
sources: sources,
|
sources: sources,
|
||||||
|
|
|
@ -115,12 +115,12 @@ The docker build option is currently limited to *intel GPU* targets.
|
||||||
### Build image
|
### Build image
|
||||||
```sh
|
```sh
|
||||||
# Using FP16
|
# Using FP16
|
||||||
docker build -t llama-cpp-sycl --build-arg="LLAMA_SYCL_F16=ON" -f .devops/llama-cli-intel.Dockerfile .
|
docker build -t llama-cpp-sycl --build-arg="GGML_SYCL_F16=ON" -f .devops/llama-cli-intel.Dockerfile .
|
||||||
```
|
```
|
||||||
|
|
||||||
*Notes*:
|
*Notes*:
|
||||||
|
|
||||||
To build in default FP32 *(Slower than FP16 alternative)*, you can remove the `--build-arg="LLAMA_SYCL_F16=ON"` argument from the previous command.
|
To build in default FP32 *(Slower than FP16 alternative)*, you can remove the `--build-arg="GGML_SYCL_F16=ON"` argument from the previous command.
|
||||||
|
|
||||||
You can also use the `.devops/llama-server-intel.Dockerfile`, which builds the *"server"* alternative.
|
You can also use the `.devops/llama-server-intel.Dockerfile`, which builds the *"server"* alternative.
|
||||||
|
|
||||||
|
@ -244,10 +244,10 @@ source /opt/intel/oneapi/setvars.sh
|
||||||
# Build LLAMA with MKL BLAS acceleration for intel GPU
|
# Build LLAMA with MKL BLAS acceleration for intel GPU
|
||||||
|
|
||||||
# Option 1: Use FP32 (recommended for better performance in most cases)
|
# Option 1: Use FP32 (recommended for better performance in most cases)
|
||||||
cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
|
||||||
# Option 2: Use FP16
|
# Option 2: Use FP16
|
||||||
cmake -B build -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
|
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
|
||||||
|
|
||||||
# build all binary
|
# build all binary
|
||||||
cmake --build build --config Release -j -v
|
cmake --build build --config Release -j -v
|
||||||
|
@ -264,10 +264,10 @@ export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR
|
||||||
# Build LLAMA with Nvidia BLAS acceleration through SYCL
|
# Build LLAMA with Nvidia BLAS acceleration through SYCL
|
||||||
|
|
||||||
# Option 1: Use FP32 (recommended for better performance in most cases)
|
# Option 1: Use FP32 (recommended for better performance in most cases)
|
||||||
cmake -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
|
||||||
# Option 2: Use FP16
|
# Option 2: Use FP16
|
||||||
cmake -B build -DLLAMA_SYCL=ON -DLLAMA_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON
|
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
|
||||||
|
|
||||||
# build all binary
|
# build all binary
|
||||||
cmake --build build --config Release -j -v
|
cmake --build build --config Release -j -v
|
||||||
|
@ -410,15 +410,9 @@ Output (example):
|
||||||
|
|
||||||
4. Install build tools
|
4. Install build tools
|
||||||
|
|
||||||
a. Download & install cmake for Windows: https://cmake.org/download/
|
a. Download & install cmake for Windows: https://cmake.org/download/ (CMake can also be installed from Visual Studio Installer)
|
||||||
|
b. The new Visual Studio will install Ninja as default. (If not, please install it manually: https://ninja-build.org/)
|
||||||
|
|
||||||
b. Download & install mingw-w64 make for Windows provided by w64devkit
|
|
||||||
|
|
||||||
- Download the 1.19.0 version of [w64devkit](https://github.com/skeeto/w64devkit/releases/download/v1.19.0/w64devkit-1.19.0.zip).
|
|
||||||
|
|
||||||
- Extract `w64devkit` on your pc.
|
|
||||||
|
|
||||||
- Add the **bin** folder path in the Windows system PATH environment (for e.g. `C:\xxx\w64devkit\bin\`).
|
|
||||||
|
|
||||||
### II. Build llama.cpp
|
### II. Build llama.cpp
|
||||||
|
|
||||||
|
@ -428,10 +422,10 @@ On the oneAPI command line window, step into the llama.cpp main directory and ru
|
||||||
@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
|
@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
|
||||||
|
|
||||||
# Option 1: Use FP32 (recommended for better performance in most cases)
|
# Option 1: Use FP32 (recommended for better performance in most cases)
|
||||||
cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release
|
cmake -B build -G "Ninja" -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release
|
||||||
|
|
||||||
# Option 2: Or FP16
|
# Option 2: Or FP16
|
||||||
cmake -B build -G "MinGW Makefiles" -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
|
cmake -B build -G "Ninja" -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DGGML_SYCL_F16=ON
|
||||||
|
|
||||||
cmake --build build --config Release -j
|
cmake --build build --config Release -j
|
||||||
```
|
```
|
||||||
|
@ -441,9 +435,23 @@ Otherwise, run the `win-build-sycl.bat` wrapper which encapsulates the former in
|
||||||
.\examples\sycl\win-build-sycl.bat
|
.\examples\sycl\win-build-sycl.bat
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Or, use CMake presets to build:
|
||||||
|
```sh
|
||||||
|
cmake --preset x64-windows-sycl-release
|
||||||
|
cmake --build build-x64-windows-sycl-release -j --target llama-cli
|
||||||
|
|
||||||
|
cmake -DGGML_SYCL_F16=ON --preset x64-windows-sycl-release
|
||||||
|
cmake --build build-x64-windows-sycl-release -j --target llama-cli
|
||||||
|
|
||||||
|
cmake --preset x64-windows-sycl-debug
|
||||||
|
cmake --build build-x64-windows-sycl-debug -j --target llama-cli
|
||||||
|
```
|
||||||
|
|
||||||
|
Or, you can use Visual Studio to open llama.cpp folder as a CMake project. Choose the sycl CMake presets (`x64-windows-sycl-release` or `x64-windows-sycl-debug`) before you compile the project.
|
||||||
|
|
||||||
*Notes:*
|
*Notes:*
|
||||||
|
|
||||||
- By default, calling `make` will build all target binary files. In case of a minimal experimental setup, the user can build the inference executable only through `make llama-cli`.
|
- In case of a minimal experimental setup, the user can build the inference executable only through `cmake --build build --config Release -j --target llama-cli`.
|
||||||
|
|
||||||
### III. Run the inference
|
### III. Run the inference
|
||||||
|
|
||||||
|
@ -536,9 +544,9 @@ use 1 SYCL GPUs: [0] with Max compute units:512
|
||||||
|
|
||||||
| Name | Value | Function |
|
| Name | Value | Function |
|
||||||
|--------------------|-----------------------------------|---------------------------------------------|
|
|--------------------|-----------------------------------|---------------------------------------------|
|
||||||
| LLAMA_SYCL | ON (mandatory) | Enable build with SYCL code path. |
|
| GGML_SYCL | ON (mandatory) | Enable build with SYCL code path. |
|
||||||
| LLAMA_SYCL_TARGET | INTEL *(default)* \| NVIDIA | Set the SYCL target device type. |
|
| GGML_SYCL_TARGET | INTEL *(default)* \| NVIDIA | Set the SYCL target device type. |
|
||||||
| LLAMA_SYCL_F16 | OFF *(default)* \|ON *(optional)* | Enable FP16 build with SYCL code path. |
|
| GGML_SYCL_F16 | OFF *(default)* \|ON *(optional)* | Enable FP16 build with SYCL code path. |
|
||||||
| CMAKE_C_COMPILER | icx | Set *icx* compiler for SYCL code path. |
|
| CMAKE_C_COMPILER | icx | Set *icx* compiler for SYCL code path. |
|
||||||
| CMAKE_CXX_COMPILER | icpx *(Linux)*, icx *(Windows)* | Set `icpx/icx` compiler for SYCL code path. |
|
| CMAKE_CXX_COMPILER | icpx *(Linux)*, icx *(Windows)* | Set `icpx/icx` compiler for SYCL code path. |
|
||||||
|
|
||||||
|
|
68
README.md
68
README.md
|
@ -15,6 +15,7 @@ Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others)
|
||||||
|
|
||||||
### Recent API changes
|
### Recent API changes
|
||||||
|
|
||||||
|
- [2024 Jun 26] The source code and CMake build scripts have been restructured https://github.com/ggerganov/llama.cpp/pull/8006
|
||||||
- [2024 Apr 21] `llama_token_to_piece` can now optionally render special tokens https://github.com/ggerganov/llama.cpp/pull/6807
|
- [2024 Apr 21] `llama_token_to_piece` can now optionally render special tokens https://github.com/ggerganov/llama.cpp/pull/6807
|
||||||
- [2024 Apr 4] State and session file functions reorganized under `llama_state_*` https://github.com/ggerganov/llama.cpp/pull/6341
|
- [2024 Apr 4] State and session file functions reorganized under `llama_state_*` https://github.com/ggerganov/llama.cpp/pull/6341
|
||||||
- [2024 Mar 26] Logits and embeddings API updated for compactness https://github.com/ggerganov/llama.cpp/pull/6122
|
- [2024 Mar 26] Logits and embeddings API updated for compactness https://github.com/ggerganov/llama.cpp/pull/6122
|
||||||
|
@ -107,6 +108,7 @@ Typically finetunes of the base models below are supported as well.
|
||||||
- [X] [Falcon](https://huggingface.co/models?search=tiiuae/falcon)
|
- [X] [Falcon](https://huggingface.co/models?search=tiiuae/falcon)
|
||||||
- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) and [Chinese LLaMA-2 / Alpaca-2](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2)
|
- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) and [Chinese LLaMA-2 / Alpaca-2](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2)
|
||||||
- [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne)
|
- [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne)
|
||||||
|
- [X] [BERT](https://github.com/ggerganov/llama.cpp/pull/5423)
|
||||||
- [X] [Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/)
|
- [X] [Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/)
|
||||||
- [X] [Baichuan 1 & 2](https://huggingface.co/models?search=baichuan-inc/Baichuan) + [derivations](https://huggingface.co/hiyouga/baichuan-7b-sft)
|
- [X] [Baichuan 1 & 2](https://huggingface.co/models?search=baichuan-inc/Baichuan) + [derivations](https://huggingface.co/hiyouga/baichuan-7b-sft)
|
||||||
- [X] [Aquila 1 & 2](https://huggingface.co/models?search=BAAI/Aquila)
|
- [X] [Aquila 1 & 2](https://huggingface.co/models?search=BAAI/Aquila)
|
||||||
|
@ -216,6 +218,11 @@ Unless otherwise noted these projects are open-source with permissive licensing:
|
||||||
**Tools:**
|
**Tools:**
|
||||||
|
|
||||||
- [akx/ggify](https://github.com/akx/ggify) – download PyTorch models from HuggingFace Hub and convert them to GGML
|
- [akx/ggify](https://github.com/akx/ggify) – download PyTorch models from HuggingFace Hub and convert them to GGML
|
||||||
|
- [crashr/gppm](https://github.com/crashr/gppm) – launch llama.cpp instances utilizing NVIDIA Tesla P40 or P100 GPUs with reduced idle power consumption
|
||||||
|
|
||||||
|
**Infrastructure:**
|
||||||
|
|
||||||
|
- [Paddler](https://github.com/distantmagic/paddler) - Stateful load balancer custom-tailored for llama.cpp
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
@ -415,7 +422,7 @@ Flox follows the nixpkgs build of llama.cpp.
|
||||||
### Metal Build
|
### Metal Build
|
||||||
|
|
||||||
On MacOS, Metal is enabled by default. Using Metal makes the computation run on the GPU.
|
On MacOS, Metal is enabled by default. Using Metal makes the computation run on the GPU.
|
||||||
To disable the Metal build at compile time use the `LLAMA_NO_METAL=1` flag or the `LLAMA_METAL=OFF` cmake option.
|
To disable the Metal build at compile time use the `GGML_NO_METAL=1` flag or the `GGML_METAL=OFF` cmake option.
|
||||||
|
|
||||||
When built with Metal support, you can explicitly disable GPU inference with the `--n-gpu-layers|-ngl 0` command-line
|
When built with Metal support, you can explicitly disable GPU inference with the `--n-gpu-layers|-ngl 0` command-line
|
||||||
argument.
|
argument.
|
||||||
|
@ -435,7 +442,7 @@ Building the program with BLAS support may lead to some performance improvements
|
||||||
- Using `make`:
|
- Using `make`:
|
||||||
- On Linux:
|
- On Linux:
|
||||||
```bash
|
```bash
|
||||||
make LLAMA_OPENBLAS=1
|
make GGML_OPENBLAS=1
|
||||||
```
|
```
|
||||||
|
|
||||||
- On Windows:
|
- On Windows:
|
||||||
|
@ -450,13 +457,13 @@ Building the program with BLAS support may lead to some performance improvements
|
||||||
8. From here you can run:
|
8. From here you can run:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
make LLAMA_OPENBLAS=1
|
make GGML_OPENBLAS=1
|
||||||
```
|
```
|
||||||
|
|
||||||
- Using `CMake` on Linux:
|
- Using `CMake` on Linux:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
|
cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
||||||
cmake --build build --config Release
|
cmake --build build --config Release
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -475,10 +482,10 @@ Building the program with BLAS support may lead to some performance improvements
|
||||||
Building through oneAPI compilers will make avx_vnni instruction set available for intel processors that do not support avx512 and avx512_vnni. Please note that this build config **does not support Intel GPU**. For Intel GPU support, please refer to [llama.cpp for SYCL](./README-sycl.md).
|
Building through oneAPI compilers will make avx_vnni instruction set available for intel processors that do not support avx512 and avx512_vnni. Please note that this build config **does not support Intel GPU**. For Intel GPU support, please refer to [llama.cpp for SYCL](./README-sycl.md).
|
||||||
|
|
||||||
- Using manual oneAPI installation:
|
- Using manual oneAPI installation:
|
||||||
By default, `LLAMA_BLAS_VENDOR` is set to `Generic`, so if you already sourced intel environment script and assign `-DLLAMA_BLAS=ON` in cmake, the mkl version of Blas will automatically been selected. Otherwise please install oneAPI and follow the below steps:
|
By default, `GGML_BLAS_VENDOR` is set to `Generic`, so if you already sourced intel environment script and assign `-DGGML_BLAS=ON` in cmake, the mkl version of Blas will automatically been selected. Otherwise please install oneAPI and follow the below steps:
|
||||||
```bash
|
```bash
|
||||||
source /opt/intel/oneapi/setvars.sh # You can skip this step if in oneapi-basekit docker image, only required for manual installation
|
source /opt/intel/oneapi/setvars.sh # You can skip this step if in oneapi-basekit docker image, only required for manual installation
|
||||||
cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_NATIVE=ON
|
cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_NATIVE=ON
|
||||||
cmake --build build --config Release
|
cmake --build build --config Release
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -495,27 +502,28 @@ Building the program with BLAS support may lead to some performance improvements
|
||||||
|
|
||||||
- Using `make`:
|
- Using `make`:
|
||||||
```bash
|
```bash
|
||||||
make LLAMA_CUDA=1
|
make GGML_CUDA=1
|
||||||
```
|
```
|
||||||
- Using `CMake`:
|
- Using `CMake`:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cmake -B build -DLLAMA_CUDA=ON
|
cmake -B build -DGGML_CUDA=ON
|
||||||
cmake --build build --config Release
|
cmake --build build --config Release
|
||||||
```
|
```
|
||||||
|
|
||||||
The environment variable [`CUDA_VISIBLE_DEVICES`](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars) can be used to specify which GPU(s) will be used. The following compilation options are also available to tweak performance:
|
The environment variable [`CUDA_VISIBLE_DEVICES`](https://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#env-vars) can be used to specify which GPU(s) will be used. The following compilation options are also available to tweak performance:
|
||||||
|
|
||||||
| Option | Legal values | Default | Description |
|
| Option | Legal values | Default | Description |
|
||||||
|--------------------------------|------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|-------------------------------|------------------------|---------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
| LLAMA_CUDA_FORCE_DMMV | Boolean | false | Force the use of dequantization + matrix vector multiplication kernels instead of using kernels that do matrix vector multiplication on quantized data. By default the decision is made based on compute capability (MMVQ for 6.1/Pascal/GTX 1000 or higher). Does not affect k-quants. |
|
| GGML_CUDA_FORCE_DMMV | Boolean | false | Force the use of dequantization + matrix vector multiplication kernels instead of using kernels that do matrix vector multiplication on quantized data. By default the decision is made based on compute capability (MMVQ for 6.1/Pascal/GTX 1000 or higher). Does not affect k-quants. |
|
||||||
| LLAMA_CUDA_DMMV_X | Positive integer >= 32 | 32 | Number of values in x direction processed by the CUDA dequantization + matrix vector multiplication kernel per iteration. Increasing this value can improve performance on fast GPUs. Power of 2 heavily recommended. Does not affect k-quants. |
|
| GGML_CUDA_DMMV_X | Positive integer >= 32 | 32 | Number of values in x direction processed by the CUDA dequantization + matrix vector multiplication kernel per iteration. Increasing this value can improve performance on fast GPUs. Power of 2 heavily recommended. Does not affect k-quants. |
|
||||||
| LLAMA_CUDA_MMV_Y | Positive integer | 1 | Block size in y direction for the CUDA mul mat vec kernels. Increasing this value can improve performance on fast GPUs. Power of 2 recommended. |
|
| GGML_CUDA_MMV_Y | Positive integer | 1 | Block size in y direction for the CUDA mul mat vec kernels. Increasing this value can improve performance on fast GPUs. Power of 2 recommended. |
|
||||||
| LLAMA_CUDA_FORCE_MMQ | Boolean | false | Force the use of dequantization + matrix multiplication kernels instead of leveraging Math libraries. | |
|
| GGML_CUDA_FORCE_MMQ | Boolean | false | Force the use of custom matrix multiplication kernels for quantized models instead of FP16 cuBLAS even if there is no int8 tensor core implementation available (affects V100, RDNA3). MMQ kernels are enabled by default on GPUs with int8 tensor core support. With MMQ force enabled, speed for large batch sizes will be worse but VRAM consumption will be lower. |
|
||||||
| LLAMA_CUDA_F16 | Boolean | false | If enabled, use half-precision floating point arithmetic for the CUDA dequantization + mul mat vec kernels and for the q4_1 and q5_1 matrix matrix multiplication kernels. Can improve performance on relatively recent GPUs. |
|
| GGML_CUDA_FORCE_CUBLAS | Boolean | false | Force the use of FP16 cuBLAS instead of custom matrix multiplication kernels for quantized models |
|
||||||
| LLAMA_CUDA_KQUANTS_ITER | 1 or 2 | 2 | Number of values processed per iteration and per CUDA thread for Q2_K and Q6_K quantization formats. Setting this value to 1 can improve performance for slow GPUs. |
|
| GGML_CUDA_F16 | Boolean | false | If enabled, use half-precision floating point arithmetic for the CUDA dequantization + mul mat vec kernels and for the q4_1 and q5_1 matrix matrix multiplication kernels. Can improve performance on relatively recent GPUs. |
|
||||||
| LLAMA_CUDA_PEER_MAX_BATCH_SIZE | Positive integer | 128 | Maximum batch size for which to enable peer access between multiple GPUs. Peer access requires either Linux or NVLink. When using NVLink enabling peer access for larger batch sizes is potentially beneficial. |
|
| GGML_CUDA_KQUANTS_ITER | 1 or 2 | 2 | Number of values processed per iteration and per CUDA thread for Q2_K and Q6_K quantization formats. Setting this value to 1 can improve performance for slow GPUs. |
|
||||||
| LLAMA_CUDA_FA_ALL_QUANTS | Boolean | false | Compile support for all KV cache quantization type (combinations) for the FlashAttention CUDA kernels. More fine-grained control over KV cache size but compilation takes much longer. |
|
| GGML_CUDA_PEER_MAX_BATCH_SIZE | Positive integer | 128 | Maximum batch size for which to enable peer access between multiple GPUs. Peer access requires either Linux or NVLink. When using NVLink enabling peer access for larger batch sizes is potentially beneficial. |
|
||||||
|
| GGML_CUDA_FA_ALL_QUANTS | Boolean | false | Compile support for all KV cache quantization type (combinations) for the FlashAttention CUDA kernels. More fine-grained control over KV cache size but compilation takes much longer. |
|
||||||
|
|
||||||
- #### hipBLAS
|
- #### hipBLAS
|
||||||
|
|
||||||
|
@ -525,15 +533,15 @@ Building the program with BLAS support may lead to some performance improvements
|
||||||
|
|
||||||
- Using `make`:
|
- Using `make`:
|
||||||
```bash
|
```bash
|
||||||
make LLAMA_HIPBLAS=1
|
make GGML_HIPBLAS=1
|
||||||
```
|
```
|
||||||
- Using `CMake` for Linux (assuming a gfx1030-compatible AMD GPU):
|
- Using `CMake` for Linux (assuming a gfx1030-compatible AMD GPU):
|
||||||
```bash
|
```bash
|
||||||
HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
|
HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
|
||||||
cmake -S . -B build -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \
|
cmake -S . -B build -DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \
|
||||||
&& cmake --build build --config Release -- -j 16
|
&& cmake --build build --config Release -- -j 16
|
||||||
```
|
```
|
||||||
On Linux it is also possible to use unified memory architecture (UMA) to share main memory between the CPU and integrated GPU by setting `-DLLAMA_HIP_UMA=ON`.
|
On Linux it is also possible to use unified memory architecture (UMA) to share main memory between the CPU and integrated GPU by setting `-DGGML_HIP_UMA=ON`.
|
||||||
However, this hurts performance for non-integrated GPUs (but enables working with integrated GPUs).
|
However, this hurts performance for non-integrated GPUs (but enables working with integrated GPUs).
|
||||||
|
|
||||||
Note that if you get the following error:
|
Note that if you get the following error:
|
||||||
|
@ -547,19 +555,19 @@ Building the program with BLAS support may lead to some performance improvements
|
||||||
```bash
|
```bash
|
||||||
HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -p)" \
|
HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -p)" \
|
||||||
HIP_DEVICE_LIB_PATH=<directory-you-just-found> \
|
HIP_DEVICE_LIB_PATH=<directory-you-just-found> \
|
||||||
cmake -S . -B build -DLLAMA_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \
|
cmake -S . -B build -DGGML_HIPBLAS=ON -DAMDGPU_TARGETS=gfx1030 -DCMAKE_BUILD_TYPE=Release \
|
||||||
&& cmake --build build -- -j 16
|
&& cmake --build build -- -j 16
|
||||||
```
|
```
|
||||||
|
|
||||||
- Using `make` (example for target gfx1030, build with 16 CPU threads):
|
- Using `make` (example for target gfx1030, build with 16 CPU threads):
|
||||||
```bash
|
```bash
|
||||||
make -j16 LLAMA_HIPBLAS=1 LLAMA_HIP_UMA=1 AMDGPU_TARGETS=gfx1030
|
make -j16 GGML_HIPBLAS=1 GGML_HIP_UMA=1 AMDGPU_TARGETS=gfx1030
|
||||||
```
|
```
|
||||||
|
|
||||||
- Using `CMake` for Windows (using x64 Native Tools Command Prompt for VS, and assuming a gfx1100-compatible AMD GPU):
|
- Using `CMake` for Windows (using x64 Native Tools Command Prompt for VS, and assuming a gfx1100-compatible AMD GPU):
|
||||||
```bash
|
```bash
|
||||||
set PATH=%HIP_PATH%\bin;%PATH%
|
set PATH=%HIP_PATH%\bin;%PATH%
|
||||||
cmake -S . -B build -G Ninja -DAMDGPU_TARGETS=gfx1100 -DLLAMA_HIPBLAS=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release
|
cmake -S . -B build -G Ninja -DAMDGPU_TARGETS=gfx1100 -DGGML_HIPBLAS=ON -DCMAKE_C_COMPILER=clang -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_BUILD_TYPE=Release
|
||||||
cmake --build build
|
cmake --build build
|
||||||
```
|
```
|
||||||
Make sure that `AMDGPU_TARGETS` is set to the GPU arch you want to compile for. The above example uses `gfx1100` that corresponds to Radeon RX 7900XTX/XT/GRE. You can find a list of targets [here](https://llvm.org/docs/AMDGPUUsage.html#processors)
|
Make sure that `AMDGPU_TARGETS` is set to the GPU arch you want to compile for. The above example uses `gfx1100` that corresponds to Radeon RX 7900XTX/XT/GRE. You can find a list of targets [here](https://llvm.org/docs/AMDGPUUsage.html#processors)
|
||||||
|
@ -570,11 +578,11 @@ Building the program with BLAS support may lead to some performance improvements
|
||||||
If your GPU is not officially supported you can use the environment variable [`HSA_OVERRIDE_GFX_VERSION`] set to a similar GPU, for example 10.3.0 on RDNA2 (e.g. gfx1030, gfx1031, or gfx1035) or 11.0.0 on RDNA3.
|
If your GPU is not officially supported you can use the environment variable [`HSA_OVERRIDE_GFX_VERSION`] set to a similar GPU, for example 10.3.0 on RDNA2 (e.g. gfx1030, gfx1031, or gfx1035) or 11.0.0 on RDNA3.
|
||||||
The following compilation options are also available to tweak performance (yes, they refer to CUDA, not HIP, because it uses the same code as the cuBLAS version above):
|
The following compilation options are also available to tweak performance (yes, they refer to CUDA, not HIP, because it uses the same code as the cuBLAS version above):
|
||||||
|
|
||||||
| Option | Legal values | Default | Description |
|
| Option | Legal values | Default | Description |
|
||||||
|-------------------------|------------------------|---------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
|------------------------|------------------------|---------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
|
||||||
| LLAMA_CUDA_DMMV_X | Positive integer >= 32 | 32 | Number of values in x direction processed by the HIP dequantization + matrix vector multiplication kernel per iteration. Increasing this value can improve performance on fast GPUs. Power of 2 heavily recommended. Does not affect k-quants. |
|
| GGML_CUDA_DMMV_X | Positive integer >= 32 | 32 | Number of values in x direction processed by the HIP dequantization + matrix vector multiplication kernel per iteration. Increasing this value can improve performance on fast GPUs. Power of 2 heavily recommended. Does not affect k-quants. |
|
||||||
| LLAMA_CUDA_MMV_Y | Positive integer | 1 | Block size in y direction for the HIP mul mat vec kernels. Increasing this value can improve performance on fast GPUs. Power of 2 recommended. Does not affect k-quants. |
|
| GGML_CUDA_MMV_Y | Positive integer | 1 | Block size in y direction for the HIP mul mat vec kernels. Increasing this value can improve performance on fast GPUs. Power of 2 recommended. Does not affect k-quants. |
|
||||||
| LLAMA_CUDA_KQUANTS_ITER | 1 or 2 | 2 | Number of values processed per iteration and per HIP thread for Q2_K and Q6_K quantization formats. Setting this value to 1 can improve performance for slow GPUs. |
|
| GGML_CUDA_KQUANTS_ITER | 1 or 2 | 2 | Number of values processed per iteration and per HIP thread for Q2_K and Q6_K quantization formats. Setting this value to 1 can improve performance for slow GPUs. |
|
||||||
|
|
||||||
- #### Vulkan
|
- #### Vulkan
|
||||||
|
|
||||||
|
@ -612,7 +620,7 @@ Building the program with BLAS support may lead to some performance improvements
|
||||||
Then, build llama.cpp using the cmake command below:
|
Then, build llama.cpp using the cmake command below:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
cmake -B build -DLLAMA_VULKAN=1
|
cmake -B build -DGGML_VULKAN=1
|
||||||
cmake --build build --config Release
|
cmake --build build --config Release
|
||||||
# Test the output binary (with "-ngl 33" to offload all layers to GPU)
|
# Test the output binary (with "-ngl 33" to offload all layers to GPU)
|
||||||
./bin/llama-cli -m "PATH_TO_MODEL" -p "Hi you how are you" -n 50 -e -ngl 33 -t 4
|
./bin/llama-cli -m "PATH_TO_MODEL" -p "Hi you how are you" -n 50 -e -ngl 33 -t 4
|
||||||
|
|
10
ci/run.sh
10
ci/run.sh
|
@ -36,11 +36,11 @@ SRC=`pwd`
|
||||||
CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON"
|
CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON"
|
||||||
|
|
||||||
if [ ! -z ${GG_BUILD_METAL} ]; then
|
if [ ! -z ${GG_BUILD_METAL} ]; then
|
||||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DLLAMA_METAL_SHADER_DEBUG=ON"
|
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -z ${GG_BUILD_CUDA} ]; then
|
if [ ! -z ${GG_BUILD_CUDA} ]; then
|
||||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DLLAMA_CUDA=1"
|
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=1"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -z ${GG_BUILD_SYCL} ]; then
|
if [ ! -z ${GG_BUILD_SYCL} ]; then
|
||||||
|
@ -50,7 +50,7 @@ if [ ! -z ${GG_BUILD_SYCL} ]; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DLLAMA_SYCL=1 DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON"
|
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
|
||||||
fi
|
fi
|
||||||
## helpers
|
## helpers
|
||||||
|
|
||||||
|
@ -284,7 +284,7 @@ function gg_run_open_llama_7b_v2 {
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} -DLLAMA_CUDA=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} -DGGML_CUDA=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
python3 ../examples/convert-legacy-llama.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
python3 ../examples/convert-legacy-llama.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||||
|
@ -550,7 +550,7 @@ function gg_run_pythia_2_8b {
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} -DLLAMA_CUDA=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} -DGGML_CUDA=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
python3 ../convert-hf-to-gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
python3 ../convert-hf-to-gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||||
|
|
22
cmake/git-vars.cmake
Normal file
22
cmake/git-vars.cmake
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
find_package(Git)
|
||||||
|
|
||||||
|
# the commit's SHA1
|
||||||
|
execute_process(COMMAND
|
||||||
|
"${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8
|
||||||
|
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||||
|
OUTPUT_VARIABLE GIT_SHA1
|
||||||
|
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
|
|
||||||
|
# the date of the commit
|
||||||
|
execute_process(COMMAND
|
||||||
|
"${GIT_EXECUTABLE}" log -1 --format=%ad --date=local
|
||||||
|
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||||
|
OUTPUT_VARIABLE GIT_DATE
|
||||||
|
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
|
|
||||||
|
# the subject of the commit
|
||||||
|
execute_process(COMMAND
|
||||||
|
"${GIT_EXECUTABLE}" log -1 --format=%s
|
||||||
|
WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}"
|
||||||
|
OUTPUT_VARIABLE GIT_COMMIT_SUBJECT
|
||||||
|
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
|
|
@ -1,41 +1,43 @@
|
||||||
set(LLAMA_VERSION @LLAMA_INSTALL_VERSION@)
|
set(LLAMA_VERSION @LLAMA_INSTALL_VERSION@)
|
||||||
set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)
|
set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)
|
||||||
set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
|
set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
|
||||||
set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
|
set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
|
||||||
set(LLAMA_BLAS @LLAMA_BLAS@)
|
|
||||||
set(LLAMA_CUDA @LLAMA_CUDA@)
|
set(GGML_BLAS @GGML_BLAS@)
|
||||||
set(LLAMA_METAL @LLAMA_METAL@)
|
set(GGML_CUDA @GGML_CUDA@)
|
||||||
set(LLAMA_HIPBLAS @LLAMA_HIPBLAS@)
|
set(GGML_METAL @GGML_METAL@)
|
||||||
set(LLAMA_ACCELERATE @LLAMA_ACCELERATE@)
|
set(GGML_HIPBLAS @GGML_HIPBLAS@)
|
||||||
|
set(GGML_ACCELERATE @GGML_ACCELERATE@)
|
||||||
|
|
||||||
@PACKAGE_INIT@
|
@PACKAGE_INIT@
|
||||||
|
|
||||||
set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@")
|
set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@")
|
||||||
set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@")
|
set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@")
|
||||||
set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@")
|
set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@")
|
||||||
|
|
||||||
# Ensure transient dependencies satisfied
|
# Ensure transient dependencies satisfied
|
||||||
|
|
||||||
find_package(Threads REQUIRED)
|
find_package(Threads REQUIRED)
|
||||||
if (APPLE AND LLAMA_ACCELERATE)
|
|
||||||
|
if (APPLE AND GGML_ACCELERATE)
|
||||||
find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
|
find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_BLAS)
|
if (GGML_BLAS)
|
||||||
find_package(BLAS REQUIRED)
|
find_package(BLAS REQUIRED)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_CUDA)
|
if (GGML_CUDA)
|
||||||
find_package(CUDAToolkit REQUIRED)
|
find_package(CUDAToolkit REQUIRED)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_METAL)
|
if (GGML_METAL)
|
||||||
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
|
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
|
||||||
find_library(METAL_FRAMEWORK Metal REQUIRED)
|
find_library(METAL_FRAMEWORK Metal REQUIRED)
|
||||||
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
|
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (LLAMA_HIPBLAS)
|
if (GGML_HIPBLAS)
|
||||||
find_package(hip REQUIRED)
|
find_package(hip REQUIRED)
|
||||||
find_package(hipblas REQUIRED)
|
find_package(hipblas REQUIRED)
|
||||||
find_package(rocblas REQUIRED)
|
find_package(rocblas REQUIRED)
|
||||||
|
@ -47,7 +49,9 @@ find_library(llama_LIBRARY llama
|
||||||
|
|
||||||
set(_llama_link_deps "Threads::Threads" "@LLAMA_EXTRA_LIBS@")
|
set(_llama_link_deps "Threads::Threads" "@LLAMA_EXTRA_LIBS@")
|
||||||
set(_llama_transient_defines "@LLAMA_TRANSIENT_DEFINES@")
|
set(_llama_transient_defines "@LLAMA_TRANSIENT_DEFINES@")
|
||||||
|
|
||||||
add_library(llama UNKNOWN IMPORTED)
|
add_library(llama UNKNOWN IMPORTED)
|
||||||
|
|
||||||
set_target_properties(llama
|
set_target_properties(llama
|
||||||
PROPERTIES
|
PROPERTIES
|
||||||
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
|
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
|
|
@ -1,5 +1,6 @@
|
||||||
# common
|
# common
|
||||||
|
|
||||||
|
find_package(Threads REQUIRED)
|
||||||
|
|
||||||
# Build info header
|
# Build info header
|
||||||
#
|
#
|
||||||
|
@ -36,7 +37,7 @@ add_custom_command(
|
||||||
COMMENT "Generating build details from Git"
|
COMMENT "Generating build details from Git"
|
||||||
COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION}
|
COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION}
|
||||||
-DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME}
|
-DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME}
|
||||||
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/../scripts/gen-build-info-cpp.cmake"
|
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info-gen-cpp.cmake"
|
||||||
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.."
|
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.."
|
||||||
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX}
|
DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX}
|
||||||
VERBATIM
|
VERBATIM
|
||||||
|
@ -83,5 +84,5 @@ if (LLAMA_CURL)
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
target_include_directories(${TARGET} PUBLIC .)
|
target_include_directories(${TARGET} PUBLIC .)
|
||||||
target_compile_features(${TARGET} PUBLIC cxx_std_11)
|
target_compile_features (${TARGET} PUBLIC cxx_std_11)
|
||||||
target_link_libraries(${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
|
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake)
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
||||||
|
|
||||||
set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp.in")
|
set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp.in")
|
||||||
set(OUTPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp")
|
set(OUTPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp")
|
||||||
|
|
||||||
# Only write the build info if it changed
|
# Only write the build info if it changed
|
||||||
if(EXISTS ${OUTPUT_FILE})
|
if(EXISTS ${OUTPUT_FILE})
|
1030
common/common.cpp
1030
common/common.cpp
File diff suppressed because it is too large
Load diff
|
@ -52,6 +52,12 @@ int32_t cpu_get_num_math();
|
||||||
// CLI argument parsing
|
// CLI argument parsing
|
||||||
//
|
//
|
||||||
|
|
||||||
|
// dimensionality reduction methods, used by cvector-generator
|
||||||
|
enum dimre_method {
|
||||||
|
DIMRE_METHOD_PCA,
|
||||||
|
DIMRE_METHOD_MEAN,
|
||||||
|
};
|
||||||
|
|
||||||
struct gpt_params {
|
struct gpt_params {
|
||||||
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
|
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
|
||||||
|
|
||||||
|
@ -152,7 +158,6 @@ struct gpt_params {
|
||||||
bool prompt_cache_all = false; // save user input and generations to prompt cache
|
bool prompt_cache_all = false; // save user input and generations to prompt cache
|
||||||
bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it
|
bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it
|
||||||
|
|
||||||
bool embedding = false; // get only sentence embedding
|
|
||||||
bool escape = true; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
|
bool escape = true; // escape "\n", "\r", "\t", "\'", "\"", and "\\"
|
||||||
bool multiline_input = false; // reverse the usage of `\`
|
bool multiline_input = false; // reverse the usage of `\`
|
||||||
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
|
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
|
||||||
|
@ -179,6 +184,12 @@ struct gpt_params {
|
||||||
std::string mmproj = ""; // path to multimodal projector
|
std::string mmproj = ""; // path to multimodal projector
|
||||||
std::vector<std::string> image; // path to image file(s)
|
std::vector<std::string> image; // path to image file(s)
|
||||||
|
|
||||||
|
// embedding
|
||||||
|
bool embedding = false; // get only sentence embedding
|
||||||
|
int32_t embd_normalize = 2; // normalisation for embendings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
|
||||||
|
std::string embd_out = ""; // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix
|
||||||
|
std::string embd_sep = "\n"; // separator of embendings
|
||||||
|
|
||||||
// server params
|
// server params
|
||||||
int32_t port = 8080; // server listens on this network port
|
int32_t port = 8080; // server listens on this network port
|
||||||
int32_t timeout_read = 600; // http read timeout in seconds
|
int32_t timeout_read = 600; // http read timeout in seconds
|
||||||
|
@ -189,6 +200,7 @@ struct gpt_params {
|
||||||
std::string public_path = "";
|
std::string public_path = "";
|
||||||
std::string chat_template = "";
|
std::string chat_template = "";
|
||||||
std::string system_prompt = "";
|
std::string system_prompt = "";
|
||||||
|
bool enable_chat_template = true;
|
||||||
|
|
||||||
std::vector<std::string> api_keys;
|
std::vector<std::string> api_keys;
|
||||||
|
|
||||||
|
@ -233,13 +245,14 @@ struct gpt_params {
|
||||||
bool compute_ppl = true; // whether to compute perplexity
|
bool compute_ppl = true; // whether to compute perplexity
|
||||||
|
|
||||||
// cvector-generator params
|
// cvector-generator params
|
||||||
int n_completions = 64;
|
int n_pca_batch = 100;
|
||||||
int n_pca_batch = 20;
|
|
||||||
int n_pca_iterations = 1000;
|
int n_pca_iterations = 1000;
|
||||||
std::string cvector_outfile = "control_vector.gguf";
|
dimre_method cvector_dimre_method = DIMRE_METHOD_PCA;
|
||||||
std::string cvector_completions_file = "examples/cvector-generator/completions.txt";
|
std::string cvector_outfile = "control_vector.gguf";
|
||||||
std::string cvector_positive_file = "examples/cvector-generator/positive.txt";
|
std::string cvector_positive_file = "examples/cvector-generator/positive.txt";
|
||||||
std::string cvector_negative_file = "examples/cvector-generator/negative.txt";
|
std::string cvector_negative_file = "examples/cvector-generator/negative.txt";
|
||||||
|
|
||||||
|
bool spm_infill = false; // suffix/prefix/middle pattern for infill
|
||||||
};
|
};
|
||||||
|
|
||||||
void gpt_params_handle_model_default(gpt_params & params);
|
void gpt_params_handle_model_default(gpt_params & params);
|
||||||
|
@ -352,9 +365,34 @@ bool llama_should_add_bos_token(const llama_model * model);
|
||||||
// Chat template utils
|
// Chat template utils
|
||||||
//
|
//
|
||||||
|
|
||||||
|
// same with llama_chat_message, but uses std::string
|
||||||
|
struct llama_chat_msg {
|
||||||
|
std::string role;
|
||||||
|
std::string content;
|
||||||
|
};
|
||||||
|
|
||||||
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
||||||
bool llama_chat_verify_template(const std::string & tmpl);
|
bool llama_chat_verify_template(const std::string & tmpl);
|
||||||
|
|
||||||
|
// CPP wrapper for llama_chat_apply_template
|
||||||
|
// If the built-in template is not supported, we default to chatml
|
||||||
|
// If the custom "tmpl" is not supported, we throw an error
|
||||||
|
std::string llama_chat_apply_template(const struct llama_model * model,
|
||||||
|
const std::string & tmpl,
|
||||||
|
const std::vector<llama_chat_msg> & chat,
|
||||||
|
bool add_ass);
|
||||||
|
|
||||||
|
// Format single message, while taking into account the position of that message in chat history
|
||||||
|
std::string llama_chat_format_single(const struct llama_model * model,
|
||||||
|
const std::string & tmpl,
|
||||||
|
const std::vector<llama_chat_msg> & past_msg,
|
||||||
|
const llama_chat_msg & new_msg,
|
||||||
|
bool add_ass);
|
||||||
|
|
||||||
|
// Returns an example of formatted chat
|
||||||
|
std::string llama_chat_format_example(const struct llama_model * model,
|
||||||
|
const std::string & tmpl);
|
||||||
|
|
||||||
//
|
//
|
||||||
// KV cache utils
|
// KV cache utils
|
||||||
//
|
//
|
||||||
|
@ -369,7 +407,7 @@ void llama_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_siz
|
||||||
// Embedding utils
|
// Embedding utils
|
||||||
//
|
//
|
||||||
|
|
||||||
void llama_embd_normalize(const float * inp, float * out, int n);
|
void llama_embd_normalize(const float * inp, float * out, int n, int embd_norm = 2);
|
||||||
|
|
||||||
float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n);
|
float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n);
|
||||||
|
|
||||||
|
@ -413,4 +451,3 @@ void yaml_dump_string_multiline(FILE * stream, const char * prop_name, const cha
|
||||||
void yaml_dump_non_result_info(
|
void yaml_dump_non_result_info(
|
||||||
FILE * stream, const gpt_params & params, const llama_context * lctx,
|
FILE * stream, const gpt_params & params, const llama_context * lctx,
|
||||||
const std::string & timestamp, const std::vector<int> & prompt_tokens, const char * model_desc);
|
const std::string & timestamp, const std::vector<int> & prompt_tokens, const char * model_desc);
|
||||||
|
|
||||||
|
|
|
@ -40,6 +40,233 @@ static std::string build_repetition(const std::string & item_rule, int min_items
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Minimalistic replacement for std::string_view, which is only available from C++17 onwards */
|
||||||
|
class string_view {
|
||||||
|
const std::string & _str;
|
||||||
|
const size_t _start;
|
||||||
|
const size_t _end;
|
||||||
|
public:
|
||||||
|
string_view(const std::string & str, size_t start = 0, size_t end = std::string::npos) : _str(str), _start(start), _end(end == std::string::npos ? str.length() : end) {}
|
||||||
|
|
||||||
|
size_t size() const {
|
||||||
|
return _end - _start;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t length() const {
|
||||||
|
return size();
|
||||||
|
}
|
||||||
|
|
||||||
|
operator std::string() const {
|
||||||
|
return str();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string str() const {
|
||||||
|
return _str.substr(_start, _end - _start);
|
||||||
|
}
|
||||||
|
|
||||||
|
string_view substr(size_t pos, size_t len = std::string::npos) const {
|
||||||
|
return string_view(_str, _start + pos, len == std::string::npos ? _end : _start + pos + len);
|
||||||
|
}
|
||||||
|
|
||||||
|
char operator[](size_t pos) const {
|
||||||
|
auto index = _start + pos;
|
||||||
|
if (index >= _end) {
|
||||||
|
throw std::out_of_range("string_view index out of range");
|
||||||
|
}
|
||||||
|
return _str[_start + pos];
|
||||||
|
}
|
||||||
|
|
||||||
|
bool operator==(const string_view & other) const {
|
||||||
|
std::string this_str = *this;
|
||||||
|
std::string other_str = other;
|
||||||
|
return this_str == other_str;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static void _build_min_max_int(int min_value, int max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
|
||||||
|
auto has_min = min_value != std::numeric_limits<int>::min();
|
||||||
|
auto has_max = max_value != std::numeric_limits<int>::max();
|
||||||
|
|
||||||
|
auto digit_range = [&](char from, char to) {
|
||||||
|
out << "[";
|
||||||
|
if (from == to) {
|
||||||
|
out << from;
|
||||||
|
} else {
|
||||||
|
out << from << "-" << to;
|
||||||
|
}
|
||||||
|
out << "]";
|
||||||
|
};
|
||||||
|
auto more_digits = [&](int min_digits, int max_digits) {
|
||||||
|
out << "[0-9]";
|
||||||
|
if (min_digits == max_digits && min_digits == 1) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
out << "{";
|
||||||
|
out << min_digits;
|
||||||
|
if (max_digits != min_digits) {
|
||||||
|
out << ",";
|
||||||
|
if (max_digits != std::numeric_limits<int>::max()) {
|
||||||
|
out << max_digits;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out << "}";
|
||||||
|
};
|
||||||
|
std::function<void(const string_view &, const string_view &)> uniform_range =
|
||||||
|
[&](const string_view & from, const string_view & to) {
|
||||||
|
size_t i = 0;
|
||||||
|
while (i < from.length() && i < to.length() && from[i] == to[i]) {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
if (i > 0) {
|
||||||
|
out << "\"" << from.substr(0, i).str() << "\"";
|
||||||
|
}
|
||||||
|
if (i < from.length() && i < to.length()) {
|
||||||
|
if (i > 0) {
|
||||||
|
out << " ";
|
||||||
|
}
|
||||||
|
auto sub_len = from.length() - i - 1;
|
||||||
|
if (sub_len > 0) {
|
||||||
|
auto from_sub = from.substr(i + 1);
|
||||||
|
auto to_sub = to.substr(i + 1);
|
||||||
|
auto sub_zeros = repeat("0", sub_len);
|
||||||
|
auto sub_nines = repeat("9", sub_len);
|
||||||
|
|
||||||
|
auto to_reached = false;
|
||||||
|
out << "(";
|
||||||
|
if (from_sub == sub_zeros) {
|
||||||
|
digit_range(from[i], to[i] - 1);
|
||||||
|
out << " ";
|
||||||
|
more_digits(sub_len, sub_len);
|
||||||
|
} else {
|
||||||
|
out << "[" << from[i] << "] ";
|
||||||
|
out << "(";
|
||||||
|
uniform_range(from_sub, sub_nines);
|
||||||
|
out << ")";
|
||||||
|
if (from[i] < to[i] - 1) {
|
||||||
|
out << " | ";
|
||||||
|
if (to_sub == sub_nines) {
|
||||||
|
digit_range(from[i] + 1, to[i]);
|
||||||
|
to_reached = true;
|
||||||
|
} else {
|
||||||
|
digit_range(from[i] + 1, to[i] - 1);
|
||||||
|
}
|
||||||
|
out << " ";
|
||||||
|
more_digits(sub_len, sub_len);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!to_reached) {
|
||||||
|
out << " | ";
|
||||||
|
digit_range(to[i], to[i]);
|
||||||
|
out << " ";
|
||||||
|
uniform_range(sub_zeros, to_sub);
|
||||||
|
}
|
||||||
|
out << ")";
|
||||||
|
} else {
|
||||||
|
out << "[" << from[i] << "-" << to[i] << "]";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
if (has_min && has_max) {
|
||||||
|
if (min_value < 0 && max_value < 0) {
|
||||||
|
out << "\"-\" (";
|
||||||
|
_build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true);
|
||||||
|
out << ")";
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (min_value < 0) {
|
||||||
|
out << "\"-\" (";
|
||||||
|
_build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true);
|
||||||
|
out << ") | ";
|
||||||
|
min_value = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto min_s = std::to_string(min_value);
|
||||||
|
auto max_s = std::to_string(max_value);
|
||||||
|
auto min_digits = min_s.length();
|
||||||
|
auto max_digits = max_s.length();
|
||||||
|
|
||||||
|
for (auto digits = min_digits; digits < max_digits; digits++) {
|
||||||
|
uniform_range(min_s, repeat("9", digits));
|
||||||
|
min_s = "1" + repeat("0", digits);
|
||||||
|
out << " | ";
|
||||||
|
}
|
||||||
|
uniform_range(min_s, max_s);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto less_decimals = std::max(decimals_left - 1, 1);
|
||||||
|
|
||||||
|
if (has_min) {
|
||||||
|
if (min_value < 0) {
|
||||||
|
out << "\"-\" (";
|
||||||
|
_build_min_max_int(std::numeric_limits<int>::min(), -min_value, out, decimals_left, /* top_level= */ false);
|
||||||
|
out << ") | [0] | [1-9] ";
|
||||||
|
more_digits(0, decimals_left - 1);
|
||||||
|
} else if (min_value == 0) {
|
||||||
|
if (top_level) {
|
||||||
|
out << "[0] | [1-9] ";
|
||||||
|
more_digits(0, less_decimals);
|
||||||
|
} else {
|
||||||
|
more_digits(1, decimals_left);
|
||||||
|
}
|
||||||
|
} else if (min_value <= 9) {
|
||||||
|
char c = '0' + min_value;
|
||||||
|
auto range_start = top_level ? '1' : '0';
|
||||||
|
if (c > range_start) {
|
||||||
|
digit_range(range_start, c - 1);
|
||||||
|
out << " ";
|
||||||
|
more_digits(1, less_decimals);
|
||||||
|
out << " | ";
|
||||||
|
}
|
||||||
|
digit_range(c, '9');
|
||||||
|
out << " ";
|
||||||
|
more_digits(0, less_decimals);
|
||||||
|
} else {
|
||||||
|
auto min_s = std::to_string(min_value);
|
||||||
|
auto len = min_s.length();
|
||||||
|
auto c = min_s[0];
|
||||||
|
|
||||||
|
if (c > '1') {
|
||||||
|
digit_range(top_level ? '1' : '0', c - 1);
|
||||||
|
out << " ";
|
||||||
|
more_digits(len, less_decimals);
|
||||||
|
out << " | ";
|
||||||
|
}
|
||||||
|
digit_range(c, c);
|
||||||
|
out << " (";
|
||||||
|
_build_min_max_int(std::stoi(min_s.substr(1)), std::numeric_limits<int>::max(), out, less_decimals, /* top_level= */ false);
|
||||||
|
out << ")";
|
||||||
|
if (c < '9') {
|
||||||
|
out << " | ";
|
||||||
|
digit_range(c + 1, '9');
|
||||||
|
out << " ";
|
||||||
|
more_digits(len - 1, less_decimals);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (has_max) {
|
||||||
|
if (max_value >= 0) {
|
||||||
|
if (top_level) {
|
||||||
|
out << "\"-\" [1-9] ";
|
||||||
|
more_digits(0, less_decimals);
|
||||||
|
out << " | ";
|
||||||
|
}
|
||||||
|
_build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
|
||||||
|
} else {
|
||||||
|
out << "\"-\" (";
|
||||||
|
_build_min_max_int(-max_value, std::numeric_limits<int>::max(), out, decimals_left, /* top_level= */ false);
|
||||||
|
out << ")";
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw std::runtime_error("At least one of min_value or max_value must be set");
|
||||||
|
}
|
||||||
|
|
||||||
const std::string SPACE_RULE = "| \" \" | \"\\n\" [ \\t]{0,20}";
|
const std::string SPACE_RULE = "| \" \" | \"\\n\" [ \\t]{0,20}";
|
||||||
|
|
||||||
struct BuiltinRule {
|
struct BuiltinRule {
|
||||||
|
@ -89,7 +316,7 @@ std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
|
||||||
};
|
};
|
||||||
|
|
||||||
std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
|
std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
|
||||||
std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
|
std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
|
||||||
|
|
||||||
template <typename Iterator>
|
template <typename Iterator>
|
||||||
std::string join(Iterator begin, Iterator end, const std::string & separator) {
|
std::string join(Iterator begin, Iterator end, const std::string & separator) {
|
||||||
|
@ -160,7 +387,6 @@ static std::string format_literal(const std::string & literal) {
|
||||||
return "\"" + escaped + "\"";
|
return "\"" + escaped + "\"";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
class SchemaConverter {
|
class SchemaConverter {
|
||||||
private:
|
private:
|
||||||
std::function<json(const std::string &)> _fetch_json;
|
std::function<json(const std::string &)> _fetch_json;
|
||||||
|
@ -388,6 +614,75 @@ private:
|
||||||
return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space");
|
return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
Returns a rule that matches a JSON string that is none of the provided strings
|
||||||
|
|
||||||
|
not_strings({"a"})
|
||||||
|
-> ["] ( [a] char+ | [^"a] char* )? ["] space
|
||||||
|
not_strings({"and", "also"})
|
||||||
|
-> ["] ( [a] ([l] ([s] ([o] char+ | [^"o] char*) | [^"s] char*) | [n] ([d] char+ | [^"d] char*) | [^"ln] char*) | [^"a] char* )? ["] space
|
||||||
|
*/
|
||||||
|
std::string _not_strings(const std::vector<std::string> & strings) {
|
||||||
|
|
||||||
|
struct TrieNode {
|
||||||
|
std::map<char, TrieNode> children;
|
||||||
|
bool is_end_of_string;
|
||||||
|
|
||||||
|
TrieNode() : is_end_of_string(false) {}
|
||||||
|
|
||||||
|
void insert(const std::string & string) {
|
||||||
|
auto node = this;
|
||||||
|
for (char c : string) {
|
||||||
|
node = &node->children[c];
|
||||||
|
}
|
||||||
|
node->is_end_of_string = true;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TrieNode trie;
|
||||||
|
for (const auto & s : strings) {
|
||||||
|
trie.insert(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char"));
|
||||||
|
std::ostringstream out;
|
||||||
|
out << "[\"] ( ";
|
||||||
|
std::function<void(const TrieNode &)> visit = [&](const TrieNode & node) {
|
||||||
|
std::ostringstream rejects;
|
||||||
|
auto first = true;
|
||||||
|
for (const auto & kv : node.children) {
|
||||||
|
rejects << kv.first;
|
||||||
|
if (first) {
|
||||||
|
first = false;
|
||||||
|
} else {
|
||||||
|
out << " | ";
|
||||||
|
}
|
||||||
|
out << "[" << kv.first << "]";
|
||||||
|
if (!kv.second.children.empty()) {
|
||||||
|
out << " (";
|
||||||
|
visit(kv.second);
|
||||||
|
out << ")";
|
||||||
|
} else if (kv.second.is_end_of_string) {
|
||||||
|
out << " " << char_rule << "+";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!node.children.empty()) {
|
||||||
|
if (!first) {
|
||||||
|
out << " | ";
|
||||||
|
}
|
||||||
|
out << "[^\"" << rejects.str() << "] " << char_rule << "*";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
visit(trie);
|
||||||
|
|
||||||
|
out << " )";
|
||||||
|
if (!trie.is_end_of_string) {
|
||||||
|
out << "?";
|
||||||
|
}
|
||||||
|
out << " [\"] space";
|
||||||
|
return out.str();
|
||||||
|
}
|
||||||
|
|
||||||
std::string _resolve_ref(const std::string & ref) {
|
std::string _resolve_ref(const std::string & ref) {
|
||||||
std::string ref_name = ref.substr(ref.find_last_of('/') + 1);
|
std::string ref_name = ref.substr(ref.find_last_of('/') + 1);
|
||||||
if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) {
|
if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) {
|
||||||
|
@ -408,6 +703,7 @@ private:
|
||||||
std::vector<std::string> required_props;
|
std::vector<std::string> required_props;
|
||||||
std::vector<std::string> optional_props;
|
std::vector<std::string> optional_props;
|
||||||
std::unordered_map<std::string, std::string> prop_kv_rule_names;
|
std::unordered_map<std::string, std::string> prop_kv_rule_names;
|
||||||
|
std::vector<std::string> prop_names;
|
||||||
for (const auto & kv : properties) {
|
for (const auto & kv : properties) {
|
||||||
const auto &prop_name = kv.first;
|
const auto &prop_name = kv.first;
|
||||||
const auto &prop_schema = kv.second;
|
const auto &prop_schema = kv.second;
|
||||||
|
@ -422,11 +718,18 @@ private:
|
||||||
} else {
|
} else {
|
||||||
optional_props.push_back(prop_name);
|
optional_props.push_back(prop_name);
|
||||||
}
|
}
|
||||||
|
prop_names.push_back(prop_name);
|
||||||
}
|
}
|
||||||
if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get<bool>())) {
|
if ((additional_properties.is_boolean() && additional_properties.get<bool>()) || additional_properties.is_object()) {
|
||||||
std::string sub_name = name + (name.empty() ? "" : "-") + "additional";
|
std::string sub_name = name + (name.empty() ? "" : "-") + "additional";
|
||||||
std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value");
|
std::string value_rule =
|
||||||
std::string kv_rule = _add_rule(sub_name + "-kv", _add_primitive("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule);
|
additional_properties.is_object() ? visit(additional_properties, sub_name + "-value")
|
||||||
|
: _add_primitive("value", PRIMITIVE_RULES.at("value"));
|
||||||
|
|
||||||
|
auto key_rule =
|
||||||
|
prop_names.empty() ? _add_primitive("string", PRIMITIVE_RULES.at("string"))
|
||||||
|
: _add_rule(sub_name + "-k", _not_strings(prop_names));
|
||||||
|
std::string kv_rule = _add_rule(sub_name + "-kv", key_rule + " \":\" space " + value_rule);
|
||||||
prop_kv_rule_names["*"] = kv_rule;
|
prop_kv_rule_names["*"] = kv_rule;
|
||||||
optional_props.push_back("*");
|
optional_props.push_back("*");
|
||||||
}
|
}
|
||||||
|
@ -452,15 +755,11 @@ private:
|
||||||
}
|
}
|
||||||
std::string k = ks[0];
|
std::string k = ks[0];
|
||||||
std::string kv_rule_name = prop_kv_rule_names[k];
|
std::string kv_rule_name = prop_kv_rule_names[k];
|
||||||
if (k == "*") {
|
std::string comma_ref = "( \",\" space " + kv_rule_name + " )";
|
||||||
res = _add_rule(
|
if (first_is_optional) {
|
||||||
name + (name.empty() ? "" : "-") + "additional-kvs",
|
res = comma_ref + (k == "*" ? "*" : "?");
|
||||||
kv_rule_name + " ( \",\" space " + kv_rule_name + " )*"
|
|
||||||
);
|
|
||||||
} else if (first_is_optional) {
|
|
||||||
res = "( \",\" space " + kv_rule_name + " )?";
|
|
||||||
} else {
|
} else {
|
||||||
res = kv_rule_name;
|
res = kv_rule_name + (k == "*" ? " " + comma_ref + "*" : "");
|
||||||
}
|
}
|
||||||
if (ks.size() > 1) {
|
if (ks.size() > 1) {
|
||||||
res += " " + _add_rule(
|
res += " " + _add_rule(
|
||||||
|
@ -594,17 +893,19 @@ public:
|
||||||
} else if (schema_type.is_array()) {
|
} else if (schema_type.is_array()) {
|
||||||
std::vector<json> schema_types;
|
std::vector<json> schema_types;
|
||||||
for (const auto & t : schema_type) {
|
for (const auto & t : schema_type) {
|
||||||
schema_types.push_back({{"type", t}});
|
json schema_copy(schema);
|
||||||
|
schema_copy["type"] = t;
|
||||||
|
schema_types.push_back(schema_copy);
|
||||||
}
|
}
|
||||||
return _add_rule(rule_name, _generate_union_rule(name, schema_types));
|
return _add_rule(rule_name, _generate_union_rule(name, schema_types));
|
||||||
} else if (schema.contains("const")) {
|
} else if (schema.contains("const")) {
|
||||||
return _add_rule(rule_name, _generate_constant_rule(schema["const"]));
|
return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space");
|
||||||
} else if (schema.contains("enum")) {
|
} else if (schema.contains("enum")) {
|
||||||
std::vector<std::string> enum_values;
|
std::vector<std::string> enum_values;
|
||||||
for (const auto & v : schema["enum"]) {
|
for (const auto & v : schema["enum"]) {
|
||||||
enum_values.push_back(_generate_constant_rule(v));
|
enum_values.push_back(_generate_constant_rule(v));
|
||||||
}
|
}
|
||||||
return _add_rule(rule_name, join(enum_values.begin(), enum_values.end(), " | "));
|
return _add_rule(rule_name, "(" + join(enum_values.begin(), enum_values.end(), " | ") + ") space");
|
||||||
} else if ((schema_type.is_null() || schema_type == "object")
|
} else if ((schema_type.is_null() || schema_type == "object")
|
||||||
&& (schema.contains("properties") ||
|
&& (schema.contains("properties") ||
|
||||||
(schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
|
(schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
|
||||||
|
@ -686,6 +987,24 @@ public:
|
||||||
int min_len = schema.contains("minLength") ? schema["minLength"].get<int>() : 0;
|
int min_len = schema.contains("minLength") ? schema["minLength"].get<int>() : 0;
|
||||||
int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
|
int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
|
||||||
return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
|
return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
|
||||||
|
} else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
|
||||||
|
int min_value = std::numeric_limits<int>::min();
|
||||||
|
int max_value = std::numeric_limits<int>::max();
|
||||||
|
if (schema.contains("minimum")) {
|
||||||
|
min_value = schema["minimum"].get<int>();
|
||||||
|
} else if (schema.contains("exclusiveMinimum")) {
|
||||||
|
min_value = schema["exclusiveMinimum"].get<int>() + 1;
|
||||||
|
}
|
||||||
|
if (schema.contains("maximum")) {
|
||||||
|
max_value = schema["maximum"].get<int>();
|
||||||
|
} else if (schema.contains("exclusiveMaximum")) {
|
||||||
|
max_value = schema["exclusiveMaximum"].get<int>() - 1;
|
||||||
|
}
|
||||||
|
std::stringstream out;
|
||||||
|
out << "(";
|
||||||
|
_build_min_max_int(min_value, max_value, out);
|
||||||
|
out << ") space";
|
||||||
|
return _add_rule(rule_name, out.str());
|
||||||
} else if (schema.empty() || schema_type == "object") {
|
} else if (schema.empty() || schema_type == "object") {
|
||||||
return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
|
return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object")));
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -28,9 +28,13 @@ struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_
|
||||||
|
|
||||||
std::vector<const llama_grammar_element *> grammar_rules(result->parsed_grammar.c_rules());
|
std::vector<const llama_grammar_element *> grammar_rules(result->parsed_grammar.c_rules());
|
||||||
|
|
||||||
result->grammar = llama_grammar_init(
|
struct llama_grammar * grammar = llama_grammar_init(
|
||||||
grammar_rules.data(),
|
grammar_rules.data(),
|
||||||
grammar_rules.size(), result->parsed_grammar.symbol_ids.at("root"));
|
grammar_rules.size(), result->parsed_grammar.symbol_ids.at("root"));
|
||||||
|
if (grammar == nullptr) {
|
||||||
|
throw std::runtime_error("Failed to initialize llama_grammar");
|
||||||
|
}
|
||||||
|
result->grammar = grammar;
|
||||||
}
|
}
|
||||||
|
|
||||||
result->prev.resize(params.n_prev);
|
result->prev.resize(params.n_prev);
|
||||||
|
@ -59,9 +63,13 @@ void llama_sampling_reset(llama_sampling_context * ctx) {
|
||||||
if (!ctx->parsed_grammar.rules.empty()) {
|
if (!ctx->parsed_grammar.rules.empty()) {
|
||||||
std::vector<const llama_grammar_element *> grammar_rules(ctx->parsed_grammar.c_rules());
|
std::vector<const llama_grammar_element *> grammar_rules(ctx->parsed_grammar.c_rules());
|
||||||
|
|
||||||
ctx->grammar = llama_grammar_init(
|
struct llama_grammar * grammar = llama_grammar_init(
|
||||||
grammar_rules.data(),
|
grammar_rules.data(),
|
||||||
grammar_rules.size(), ctx->parsed_grammar.symbol_ids.at("root"));
|
grammar_rules.size(), ctx->parsed_grammar.symbol_ids.at("root"));
|
||||||
|
if (grammar == nullptr) {
|
||||||
|
throw std::runtime_error("Failed to initialize llama_grammar");
|
||||||
|
}
|
||||||
|
ctx->grammar = grammar;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::fill(ctx->prev.begin(), ctx->prev.end(), 0);
|
std::fill(ctx->prev.begin(), ctx->prev.end(), 0);
|
||||||
|
|
|
@ -85,6 +85,10 @@ models = [
|
||||||
{"name": "smaug-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct", },
|
{"name": "smaug-bpe", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/abacusai/Smaug-Llama-3-70B-Instruct", },
|
||||||
{"name": "poro-chat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Poro-34B-chat", },
|
{"name": "poro-chat", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Poro-34B-chat", },
|
||||||
{"name": "jina-v2-code", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-code", },
|
{"name": "jina-v2-code", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/jinaai/jina-embeddings-v2-base-code", },
|
||||||
|
{"name": "viking", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/LumiOpen/Viking-7B", }, # Also used for Viking 13B and 33B
|
||||||
|
{"name": "gemma", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2b", },
|
||||||
|
{"name": "gemma-2", "tokt": TOKENIZER_TYPE.SPM, "repo": "https://huggingface.co/google/gemma-2-9b", },
|
||||||
|
{"name": "jais", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/core42/jais-13b", },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@ -214,7 +218,7 @@ src_func = f"""
|
||||||
"""
|
"""
|
||||||
|
|
||||||
convert_py_pth = pathlib.Path("convert-hf-to-gguf.py")
|
convert_py_pth = pathlib.Path("convert-hf-to-gguf.py")
|
||||||
convert_py = convert_py_pth.read_text()
|
convert_py = convert_py_pth.read_text(encoding="utf-8")
|
||||||
convert_py = re.sub(
|
convert_py = re.sub(
|
||||||
r"(# Marker: Start get_vocab_base_pre)(.+?)( +# Marker: End get_vocab_base_pre)",
|
r"(# Marker: Start get_vocab_base_pre)(.+?)( +# Marker: End get_vocab_base_pre)",
|
||||||
lambda m: m.group(1) + src_func + m.group(3),
|
lambda m: m.group(1) + src_func + m.group(3),
|
||||||
|
@ -222,7 +226,7 @@ convert_py = re.sub(
|
||||||
flags=re.DOTALL | re.MULTILINE,
|
flags=re.DOTALL | re.MULTILINE,
|
||||||
)
|
)
|
||||||
|
|
||||||
convert_py_pth.write_text(convert_py)
|
convert_py_pth.write_text(convert_py, encoding="utf-8")
|
||||||
|
|
||||||
logger.info("+++ convert-hf-to-gguf.py was updated")
|
logger.info("+++ convert-hf-to-gguf.py was updated")
|
||||||
|
|
||||||
|
@ -271,7 +275,8 @@ tests = [
|
||||||
"3333333",
|
"3333333",
|
||||||
"33333333",
|
"33333333",
|
||||||
"333333333",
|
"333333333",
|
||||||
# "Cửa Việt", # llama-bpe fails on this
|
"Cửa Việt", # llama-bpe fails on this
|
||||||
|
" discards",
|
||||||
chktxt,
|
chktxt,
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -65,7 +65,8 @@ class Model:
|
||||||
# subclasses should define this!
|
# subclasses should define this!
|
||||||
model_arch: gguf.MODEL_ARCH
|
model_arch: gguf.MODEL_ARCH
|
||||||
|
|
||||||
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool, model_name: str | None):
|
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool,
|
||||||
|
model_name: str | None, split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False):
|
||||||
if type(self) is Model:
|
if type(self) is Model:
|
||||||
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
|
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
|
||||||
self.dir_model = dir_model
|
self.dir_model = dir_model
|
||||||
|
@ -80,7 +81,7 @@ class Model:
|
||||||
if not self.is_safetensors:
|
if not self.is_safetensors:
|
||||||
self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin")
|
self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin")
|
||||||
self.hparams = Model.load_hparams(self.dir_model)
|
self.hparams = Model.load_hparams(self.dir_model)
|
||||||
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer"])
|
self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer", "num_layers"])
|
||||||
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
|
self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
|
||||||
self.tensor_names = None
|
self.tensor_names = None
|
||||||
if self.ftype == gguf.LlamaFileType.GUESSED:
|
if self.ftype == gguf.LlamaFileType.GUESSED:
|
||||||
|
@ -96,7 +97,8 @@ class Model:
|
||||||
ftype_lw: str = ftype_up.lower()
|
ftype_lw: str = ftype_up.lower()
|
||||||
# allow templating the file name with the output ftype, useful with the "auto" ftype
|
# allow templating the file name with the output ftype, useful with the "auto" ftype
|
||||||
self.fname_out = fname_out.parent / fname_out.name.format(ftype_lw, outtype=ftype_lw, ftype=ftype_lw, OUTTYPE=ftype_up, FTYPE=ftype_up)
|
self.fname_out = fname_out.parent / fname_out.name.format(ftype_lw, outtype=ftype_lw, ftype=ftype_lw, OUTTYPE=ftype_up, FTYPE=ftype_up)
|
||||||
self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file)
|
self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file,
|
||||||
|
split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def __init_subclass__(cls):
|
def __init_subclass__(cls):
|
||||||
|
@ -332,6 +334,8 @@ class Model:
|
||||||
self.gguf_writer.close()
|
self.gguf_writer.close()
|
||||||
|
|
||||||
def write_vocab(self):
|
def write_vocab(self):
|
||||||
|
if len(self.gguf_writer.tensors) != 1:
|
||||||
|
raise ValueError('Splitting the vocabulary is not supported')
|
||||||
self.gguf_writer.write_header_to_file(self.fname_out)
|
self.gguf_writer.write_header_to_file(self.fname_out)
|
||||||
self.gguf_writer.write_kv_data_to_file()
|
self.gguf_writer.write_kv_data_to_file()
|
||||||
self.gguf_writer.close()
|
self.gguf_writer.close()
|
||||||
|
@ -483,6 +487,12 @@ class Model:
|
||||||
if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a":
|
if chkhsh == "7967bfa498ade6b757b064f31e964dddbb80f8f9a4d68d4ba7998fcf281c531a":
|
||||||
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code
|
# ref: https://huggingface.co/jinaai/jina-embeddings-v2-base-code
|
||||||
res = "jina-v2-code"
|
res = "jina-v2-code"
|
||||||
|
if chkhsh == "7fc505bd3104ca1083b150b17d088b59534ede9bde81f0dd2090967d7fe52cee":
|
||||||
|
# ref: https://huggingface.co/LumiOpen/Viking-7B
|
||||||
|
res = "viking"
|
||||||
|
if chkhsh == "b53802fb28e26d645c3a310b34bfe07da813026ec7c7716883404d5e0f8b1901":
|
||||||
|
# ref: https://huggingface.co/core42/jais-13b
|
||||||
|
res = "jais"
|
||||||
|
|
||||||
if res is None:
|
if res is None:
|
||||||
logger.warning("\n")
|
logger.warning("\n")
|
||||||
|
@ -569,7 +579,19 @@ class Model:
|
||||||
special_vocab._set_special_token("unk", tokenizer.special_tokens["<|endoftext|>"])
|
special_vocab._set_special_token("unk", tokenizer.special_tokens["<|endoftext|>"])
|
||||||
special_vocab.add_to_gguf(self.gguf_writer)
|
special_vocab.add_to_gguf(self.gguf_writer)
|
||||||
|
|
||||||
def _set_vocab_sentencepiece(self):
|
def _set_vocab_sentencepiece(self, add_to_gguf=True):
|
||||||
|
tokens, scores, toktypes = self._create_vocab_sentencepiece()
|
||||||
|
|
||||||
|
self.gguf_writer.add_tokenizer_model("llama")
|
||||||
|
self.gguf_writer.add_tokenizer_pre("default")
|
||||||
|
self.gguf_writer.add_token_list(tokens)
|
||||||
|
self.gguf_writer.add_token_scores(scores)
|
||||||
|
self.gguf_writer.add_token_types(toktypes)
|
||||||
|
|
||||||
|
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
|
||||||
|
special_vocab.add_to_gguf(self.gguf_writer)
|
||||||
|
|
||||||
|
def _create_vocab_sentencepiece(self):
|
||||||
from sentencepiece import SentencePieceProcessor
|
from sentencepiece import SentencePieceProcessor
|
||||||
|
|
||||||
tokenizer_path = self.dir_model / 'tokenizer.model'
|
tokenizer_path = self.dir_model / 'tokenizer.model'
|
||||||
|
@ -631,14 +653,7 @@ class Model:
|
||||||
scores.append(-1000.0)
|
scores.append(-1000.0)
|
||||||
toktypes.append(SentencePieceTokenTypes.UNUSED)
|
toktypes.append(SentencePieceTokenTypes.UNUSED)
|
||||||
|
|
||||||
self.gguf_writer.add_tokenizer_model("llama")
|
return tokens, scores, toktypes
|
||||||
self.gguf_writer.add_tokenizer_pre("default")
|
|
||||||
self.gguf_writer.add_token_list(tokens)
|
|
||||||
self.gguf_writer.add_token_scores(scores)
|
|
||||||
self.gguf_writer.add_token_types(toktypes)
|
|
||||||
|
|
||||||
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
|
|
||||||
special_vocab.add_to_gguf(self.gguf_writer)
|
|
||||||
|
|
||||||
def _set_vocab_llama_hf(self):
|
def _set_vocab_llama_hf(self):
|
||||||
vocab = gguf.LlamaHfVocab(self.dir_model)
|
vocab = gguf.LlamaHfVocab(self.dir_model)
|
||||||
|
@ -967,7 +982,11 @@ class XverseModel(Model):
|
||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
tokenizer = AutoTokenizer.from_pretrained(dir_model)
|
||||||
vocab_size = hparams.get("vocab_size", len(tokenizer.vocab))
|
vocab_size = hparams.get("vocab_size", len(tokenizer.vocab))
|
||||||
assert max(tokenizer.vocab.values()) < vocab_size
|
# Since we are checking the maximum index, we need to ensure it's strictly less than vocab_size,
|
||||||
|
# because vocab_size is the count of items, and indexes start at 0.
|
||||||
|
max_vocab_index = max(tokenizer.get_vocab().values())
|
||||||
|
if max_vocab_index >= vocab_size:
|
||||||
|
raise ValueError("Vocabulary size exceeds expected maximum size.")
|
||||||
|
|
||||||
reverse_vocab: dict[int, str] = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()}
|
reverse_vocab: dict[int, str] = {id_: encoded_tok for encoded_tok, id_ in tokenizer.vocab.items()}
|
||||||
added_vocab = tokenizer.get_added_vocab()
|
added_vocab = tokenizer.get_added_vocab()
|
||||||
|
@ -1400,6 +1419,48 @@ class LlamaModel(Model):
|
||||||
raise ValueError(f"Unprocessed experts: {experts}")
|
raise ValueError(f"Unprocessed experts: {experts}")
|
||||||
|
|
||||||
|
|
||||||
|
@Model.register("BitnetForCausalLM")
|
||||||
|
class BitnetModel(Model):
|
||||||
|
model_arch = gguf.MODEL_ARCH.BITNET
|
||||||
|
|
||||||
|
def set_vocab(self):
|
||||||
|
self._set_vocab_sentencepiece()
|
||||||
|
|
||||||
|
def set_gguf_parameters(self):
|
||||||
|
super().set_gguf_parameters()
|
||||||
|
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.LINEAR)
|
||||||
|
self.gguf_writer.add_rope_scaling_factor(1.0)
|
||||||
|
|
||||||
|
def weight_quant(self, weight):
|
||||||
|
dtype = weight.dtype
|
||||||
|
weight = weight.float()
|
||||||
|
s = 1 / weight.abs().mean().clamp(min=1e-5)
|
||||||
|
weight = (weight * s).round().clamp(-1, 1) / s
|
||||||
|
scale = weight.abs().max().unsqueeze(0)
|
||||||
|
weight = torch.where(weight.abs().less(1e-6), 0, weight).type(dtype)
|
||||||
|
weight = torch.sign(weight).type(dtype)
|
||||||
|
return weight.type(dtype), scale.type(torch.float32)
|
||||||
|
|
||||||
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||||
|
new_name = self.map_tensor_name(name)
|
||||||
|
|
||||||
|
if any(self.match_model_tensor_name(new_name, key, bid) for key in [
|
||||||
|
gguf.MODEL_TENSOR.ATTN_Q,
|
||||||
|
gguf.MODEL_TENSOR.ATTN_K,
|
||||||
|
gguf.MODEL_TENSOR.ATTN_V,
|
||||||
|
gguf.MODEL_TENSOR.ATTN_OUT,
|
||||||
|
gguf.MODEL_TENSOR.FFN_UP,
|
||||||
|
gguf.MODEL_TENSOR.FFN_DOWN,
|
||||||
|
gguf.MODEL_TENSOR.FFN_GATE,
|
||||||
|
]):
|
||||||
|
# transform weight into 1/0/-1 (in fp32)
|
||||||
|
weight_torch, scale_torch = self.weight_quant(data_torch)
|
||||||
|
yield (new_name, weight_torch)
|
||||||
|
yield (new_name.removesuffix(".weight") + ".scale", scale_torch)
|
||||||
|
else:
|
||||||
|
yield (new_name, data_torch)
|
||||||
|
|
||||||
|
|
||||||
@Model.register("GrokForCausalLM")
|
@Model.register("GrokForCausalLM")
|
||||||
class GrokModel(Model):
|
class GrokModel(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.GROK
|
model_arch = gguf.MODEL_ARCH.GROK
|
||||||
|
@ -1881,7 +1942,7 @@ class Phi3MiniModel(Model):
|
||||||
if len(rope_scaling_type) == 0:
|
if len(rope_scaling_type) == 0:
|
||||||
raise KeyError('Missing the required key rope_scaling.type')
|
raise KeyError('Missing the required key rope_scaling.type')
|
||||||
|
|
||||||
if rope_scaling_type == 'su':
|
if rope_scaling_type == 'su' or rope_scaling_type == 'longrope':
|
||||||
attn_factor = math.sqrt(1 + math.log(scale) / math.log(orig_max_pos_embds)) if scale > 1.0 else 1.0
|
attn_factor = math.sqrt(1 + math.log(scale) / math.log(orig_max_pos_embds)) if scale > 1.0 else 1.0
|
||||||
elif rope_scaling_type == 'yarn':
|
elif rope_scaling_type == 'yarn':
|
||||||
attn_factor = 0.1 * math.log(scale) + 1.0 if scale > 1.0 else 1.0
|
attn_factor = 0.1 * math.log(scale) + 1.0 if scale > 1.0 else 1.0
|
||||||
|
@ -2255,6 +2316,8 @@ class GemmaModel(Model):
|
||||||
special_vocab._set_special_token("eot", 107)
|
special_vocab._set_special_token("eot", 107)
|
||||||
special_vocab.add_to_gguf(self.gguf_writer)
|
special_vocab.add_to_gguf(self.gguf_writer)
|
||||||
|
|
||||||
|
self.gguf_writer.add_add_space_prefix(False)
|
||||||
|
|
||||||
def set_gguf_parameters(self):
|
def set_gguf_parameters(self):
|
||||||
hparams = self.hparams
|
hparams = self.hparams
|
||||||
block_count = hparams["num_hidden_layers"]
|
block_count = hparams["num_hidden_layers"]
|
||||||
|
@ -2287,6 +2350,71 @@ class GemmaModel(Model):
|
||||||
return [(self.map_tensor_name(name), data_torch)]
|
return [(self.map_tensor_name(name), data_torch)]
|
||||||
|
|
||||||
|
|
||||||
|
@Model.register("Gemma2ForCausalLM")
|
||||||
|
class Gemma2Model(Model):
|
||||||
|
model_arch = gguf.MODEL_ARCH.GEMMA2
|
||||||
|
|
||||||
|
def set_vocab(self):
|
||||||
|
tokens, scores, toktypes = self._create_vocab_sentencepiece()
|
||||||
|
# hack: This is required so that we can properly use start/end-of-turn for chat template
|
||||||
|
for i in range(108):
|
||||||
|
# including <unusedX>, <start_of_turn>, <end_of_turn>
|
||||||
|
toktypes[i] = SentencePieceTokenTypes.CONTROL
|
||||||
|
self.gguf_writer.add_tokenizer_model("llama")
|
||||||
|
self.gguf_writer.add_tokenizer_pre("default")
|
||||||
|
self.gguf_writer.add_token_list(tokens)
|
||||||
|
self.gguf_writer.add_token_scores(scores)
|
||||||
|
self.gguf_writer.add_token_types(toktypes)
|
||||||
|
|
||||||
|
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
|
||||||
|
special_vocab.add_to_gguf(self.gguf_writer)
|
||||||
|
|
||||||
|
self.gguf_writer.add_add_space_prefix(False)
|
||||||
|
|
||||||
|
def set_gguf_parameters(self):
|
||||||
|
hparams = self.hparams
|
||||||
|
block_count = hparams["num_hidden_layers"]
|
||||||
|
|
||||||
|
self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
|
||||||
|
self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
|
||||||
|
self.gguf_writer.add_embedding_length(hparams["hidden_size"])
|
||||||
|
self.gguf_writer.add_block_count(block_count)
|
||||||
|
self.gguf_writer.add_feed_forward_length(hparams["intermediate_size"])
|
||||||
|
self.gguf_writer.add_head_count(hparams["num_attention_heads"])
|
||||||
|
self.gguf_writer.add_head_count_kv(self.hparams["num_key_value_heads"] if "num_key_value_heads" in hparams else hparams["num_attention_heads"])
|
||||||
|
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
|
||||||
|
self.gguf_writer.add_key_length(hparams["head_dim"])
|
||||||
|
self.gguf_writer.add_value_length(hparams["head_dim"])
|
||||||
|
self.gguf_writer.add_file_type(self.ftype)
|
||||||
|
self.gguf_writer.add_attn_logit_softcapping(
|
||||||
|
self.hparams["attn_logit_softcapping"]
|
||||||
|
)
|
||||||
|
self.gguf_writer.add_final_logit_softcapping(
|
||||||
|
self.hparams["final_logit_softcapping"]
|
||||||
|
)
|
||||||
|
self.gguf_writer.add_sliding_window(self.hparams["sliding_window"])
|
||||||
|
|
||||||
|
# sanity check
|
||||||
|
attn_scalar = self.hparams["query_pre_attn_scalar"]
|
||||||
|
if attn_scalar != hparams["hidden_size"] / hparams["num_attention_heads"]:
|
||||||
|
raise ValueError("query_pre_attn_scalar must be equal to n_embd / n_head")
|
||||||
|
|
||||||
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||||
|
del bid # unusem
|
||||||
|
|
||||||
|
# lm_head is not used in llama.cpp, while autoawq will include this tensor in model
|
||||||
|
# To prevent errors, skip loading lm_head.weight.
|
||||||
|
if name == "lm_head.weight":
|
||||||
|
logger.debug(f"Skipping get tensor {name!r} in safetensors so that convert can end normally.")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# ref: https://github.com/huggingface/transformers/blob/fc37f38915372c15992b540dfcbbe00a916d4fc6/src/transformers/models/gemma/modeling_gemma.py#L89
|
||||||
|
if name.endswith("norm.weight"):
|
||||||
|
data_torch = data_torch + 1
|
||||||
|
|
||||||
|
return [(self.map_tensor_name(name), data_torch)]
|
||||||
|
|
||||||
|
|
||||||
@Model.register("Starcoder2ForCausalLM")
|
@Model.register("Starcoder2ForCausalLM")
|
||||||
class StarCoder2Model(Model):
|
class StarCoder2Model(Model):
|
||||||
model_arch = gguf.MODEL_ARCH.STARCODER2
|
model_arch = gguf.MODEL_ARCH.STARCODER2
|
||||||
|
@ -2725,6 +2853,214 @@ class DeepseekV2Model(Model):
|
||||||
raise ValueError(f"Unprocessed experts: {experts}")
|
raise ValueError(f"Unprocessed experts: {experts}")
|
||||||
|
|
||||||
|
|
||||||
|
@Model.register("T5ForConditionalGeneration")
|
||||||
|
@Model.register("T5WithLMHeadModel")
|
||||||
|
class T5Model(Model):
|
||||||
|
model_arch = gguf.MODEL_ARCH.T5
|
||||||
|
|
||||||
|
def set_vocab(self):
|
||||||
|
# to avoid TypeError: Descriptors cannot be created directly
|
||||||
|
# exception when importing sentencepiece_model_pb2
|
||||||
|
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"
|
||||||
|
from sentencepiece import SentencePieceProcessor
|
||||||
|
from sentencepiece import sentencepiece_model_pb2 as model
|
||||||
|
|
||||||
|
tokenizer_path = self.dir_model / 'spiece.model'
|
||||||
|
|
||||||
|
if not tokenizer_path.is_file():
|
||||||
|
raise FileNotFoundError(f"File not found: {tokenizer_path}")
|
||||||
|
|
||||||
|
sentencepiece_model = model.ModelProto()
|
||||||
|
sentencepiece_model.ParseFromString(open(tokenizer_path, "rb").read())
|
||||||
|
add_prefix = sentencepiece_model.normalizer_spec.add_dummy_prefix
|
||||||
|
remove_whitespaces = sentencepiece_model.normalizer_spec.remove_extra_whitespaces
|
||||||
|
precompiled_charsmap = sentencepiece_model.normalizer_spec.precompiled_charsmap
|
||||||
|
assert sentencepiece_model.trainer_spec.model_type == 1 # UNIGRAM
|
||||||
|
|
||||||
|
tokenizer = SentencePieceProcessor()
|
||||||
|
tokenizer.LoadFromFile(str(tokenizer_path))
|
||||||
|
|
||||||
|
vocab_size = self.hparams.get('vocab_size', tokenizer.vocab_size())
|
||||||
|
|
||||||
|
tokens: list[bytes] = [f"[PAD{i}]".encode("utf-8") for i in range(vocab_size)]
|
||||||
|
scores: list[float] = [-10000.0] * vocab_size
|
||||||
|
toktypes: list[int] = [SentencePieceTokenTypes.UNKNOWN] * vocab_size
|
||||||
|
|
||||||
|
for token_id in range(tokenizer.vocab_size()):
|
||||||
|
piece = tokenizer.IdToPiece(token_id)
|
||||||
|
text = piece.encode("utf-8")
|
||||||
|
score = tokenizer.GetScore(token_id)
|
||||||
|
|
||||||
|
toktype = SentencePieceTokenTypes.NORMAL
|
||||||
|
if tokenizer.IsUnknown(token_id):
|
||||||
|
toktype = SentencePieceTokenTypes.UNKNOWN
|
||||||
|
elif tokenizer.IsControl(token_id):
|
||||||
|
toktype = SentencePieceTokenTypes.CONTROL
|
||||||
|
elif tokenizer.IsUnused(token_id):
|
||||||
|
toktype = SentencePieceTokenTypes.UNUSED
|
||||||
|
elif tokenizer.IsByte(token_id):
|
||||||
|
toktype = SentencePieceTokenTypes.BYTE
|
||||||
|
|
||||||
|
tokens[token_id] = text
|
||||||
|
scores[token_id] = score
|
||||||
|
toktypes[token_id] = toktype
|
||||||
|
|
||||||
|
added_tokens_file = self.dir_model / 'added_tokens.json'
|
||||||
|
if added_tokens_file.is_file():
|
||||||
|
with open(added_tokens_file, "r", encoding="utf-8") as f:
|
||||||
|
added_tokens_json = json.load(f)
|
||||||
|
for key in added_tokens_json:
|
||||||
|
token_id = added_tokens_json[key]
|
||||||
|
if (token_id >= vocab_size):
|
||||||
|
logger.warning(f'ignore token {token_id}: id is out of range, max={vocab_size - 1}')
|
||||||
|
continue
|
||||||
|
|
||||||
|
tokens[token_id] = key.encode("utf-8")
|
||||||
|
scores[token_id] = -1000.0
|
||||||
|
toktypes[token_id] = SentencePieceTokenTypes.USER_DEFINED
|
||||||
|
|
||||||
|
if vocab_size > len(tokens):
|
||||||
|
pad_count = vocab_size - len(tokens)
|
||||||
|
logger.debug(f"Padding vocab with {pad_count} token(s) - [PAD1] through [PAD{pad_count}]")
|
||||||
|
for i in range(1, pad_count + 1):
|
||||||
|
tokens.append(bytes(f"[PAD{i}]", encoding="utf-8"))
|
||||||
|
scores.append(-1000.0)
|
||||||
|
toktypes.append(SentencePieceTokenTypes.UNUSED)
|
||||||
|
|
||||||
|
self.gguf_writer.add_tokenizer_model("t5")
|
||||||
|
self.gguf_writer.add_tokenizer_pre("default")
|
||||||
|
self.gguf_writer.add_token_list(tokens)
|
||||||
|
self.gguf_writer.add_token_scores(scores)
|
||||||
|
self.gguf_writer.add_token_types(toktypes)
|
||||||
|
self.gguf_writer.add_add_space_prefix(add_prefix)
|
||||||
|
self.gguf_writer.add_remove_extra_whitespaces(remove_whitespaces)
|
||||||
|
if precompiled_charsmap:
|
||||||
|
self.gguf_writer.add_precompiled_charsmap(precompiled_charsmap)
|
||||||
|
|
||||||
|
special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
|
||||||
|
special_vocab.add_to_gguf(self.gguf_writer)
|
||||||
|
|
||||||
|
self.gguf_writer.add_add_bos_token(False)
|
||||||
|
self.gguf_writer.add_add_eos_token(True)
|
||||||
|
|
||||||
|
def set_gguf_parameters(self):
|
||||||
|
self.gguf_writer.add_name("T5")
|
||||||
|
self.gguf_writer.add_context_length(self.hparams["n_positions"])
|
||||||
|
self.gguf_writer.add_embedding_length(self.hparams["d_model"])
|
||||||
|
self.gguf_writer.add_feed_forward_length(self.hparams["d_ff"])
|
||||||
|
self.gguf_writer.add_block_count(self.hparams["num_layers"])
|
||||||
|
self.gguf_writer.add_head_count(self.hparams["num_heads"])
|
||||||
|
self.gguf_writer.add_key_length(self.hparams["d_kv"])
|
||||||
|
self.gguf_writer.add_value_length(self.hparams["d_kv"])
|
||||||
|
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
|
||||||
|
self.gguf_writer.add_relative_attn_buckets_count(self.hparams["relative_attention_num_buckets"])
|
||||||
|
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["layer_norm_epsilon"])
|
||||||
|
self.gguf_writer.add_decoder_start_token_id(self.hparams["decoder_start_token_id"])
|
||||||
|
self.gguf_writer.add_file_type(self.ftype)
|
||||||
|
|
||||||
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||||
|
del bid # unused
|
||||||
|
|
||||||
|
# Sometimes T5 and Flan-T5 based models contain "encoder.embed_tokens.weight" tensor or
|
||||||
|
# "decoder.embed_tokens.weight" tensors that are duplicates of "shared.weight" tensor
|
||||||
|
# To prevent errors caused by an unnecessary unmapped tensor, skip both of them and use only "shared.weight".
|
||||||
|
if name == "decoder.embed_tokens.weight" or name == "encoder.embed_tokens.weight":
|
||||||
|
logger.debug(f"Skipping tensor {name!r} in safetensors so that convert can end normally.")
|
||||||
|
return []
|
||||||
|
|
||||||
|
return [(self.map_tensor_name(name), data_torch)]
|
||||||
|
|
||||||
|
|
||||||
|
@Model.register("JAISLMHeadModel")
|
||||||
|
class JaisModel(Model):
|
||||||
|
model_arch = gguf.MODEL_ARCH.JAIS
|
||||||
|
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
# SwigLU activation
|
||||||
|
assert self.hparams["activation_function"] == "swiglu"
|
||||||
|
# ALiBi position embedding
|
||||||
|
assert self.hparams["position_embedding_type"] == "alibi"
|
||||||
|
|
||||||
|
# Embeddings scale
|
||||||
|
self.embeddings_scale = 1.0
|
||||||
|
# note: For some JAIS flavors, output is tied to (same as) wte in original model
|
||||||
|
self.output_is_wte = False
|
||||||
|
if 'mup_embeddings_scale' in self.hparams:
|
||||||
|
self.output_is_wte = True # Hack (?)
|
||||||
|
self.embeddings_scale = self.hparams['mup_embeddings_scale']
|
||||||
|
elif 'embeddings_scale' in self.hparams:
|
||||||
|
self.embeddings_scale = self.hparams['embeddings_scale']
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
|
|
||||||
|
self.width_scale = 1.0
|
||||||
|
if 'mup_output_alpha' in self.hparams:
|
||||||
|
assert 'mup_width_scale' in self.hparams
|
||||||
|
self.width_scale = self.hparams['mup_output_alpha'] * self.hparams['mup_width_scale']
|
||||||
|
elif 'width_scale' in self.hparams:
|
||||||
|
self.width_scale = self.hparams['width_scale']
|
||||||
|
else:
|
||||||
|
assert False
|
||||||
|
|
||||||
|
self.max_alibi_bias = 8.0
|
||||||
|
|
||||||
|
def set_vocab(self):
|
||||||
|
self._set_vocab_gpt2()
|
||||||
|
|
||||||
|
def set_gguf_parameters(self):
|
||||||
|
self.gguf_writer.add_name(self.dir_model.name)
|
||||||
|
self.gguf_writer.add_block_count(self.hparams["n_layer"])
|
||||||
|
self.gguf_writer.add_context_length(self.hparams["n_positions"])
|
||||||
|
self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
|
||||||
|
self.gguf_writer.add_feed_forward_length(self.hparams["n_inner"])
|
||||||
|
self.gguf_writer.add_head_count(self.hparams["n_head"])
|
||||||
|
self.gguf_writer.add_layer_norm_eps(self.hparams["layer_norm_epsilon"])
|
||||||
|
self.gguf_writer.add_file_type(self.ftype)
|
||||||
|
|
||||||
|
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
|
||||||
|
del bid # unused
|
||||||
|
|
||||||
|
tensors: list[tuple[str, Tensor]] = []
|
||||||
|
|
||||||
|
# we don't need these
|
||||||
|
if name.endswith((".attn.bias")):
|
||||||
|
return tensors
|
||||||
|
|
||||||
|
if name.endswith(("relative_pe.slopes")):
|
||||||
|
# Calculate max ALiBi bias (this is the inverse of the ALiBi calculation)
|
||||||
|
# Some other models has max_alibi_bias spelled out explicitly in the hyperparams,
|
||||||
|
# but Jais's PyTorch model simply precalculates the slope values and places them
|
||||||
|
# in relative_pes.slopes
|
||||||
|
n_head_closest_log2 = 2 ** math.floor(math.log2(self.hparams["n_head"]))
|
||||||
|
first_val = float(data_torch._data[0])
|
||||||
|
self.max_alibi_bias = -round(math.log2(first_val) * n_head_closest_log2)
|
||||||
|
|
||||||
|
return tensors
|
||||||
|
|
||||||
|
if name.endswith((".c_attn.weight", ".c_proj.weight", ".c_fc.weight", ".c_fc2.weight")):
|
||||||
|
data_torch = data_torch.transpose(1, 0)
|
||||||
|
|
||||||
|
new_name = self.map_tensor_name(name)
|
||||||
|
|
||||||
|
if new_name == self.format_tensor_name(gguf.MODEL_TENSOR.TOKEN_EMBD):
|
||||||
|
tensors.append((new_name, data_torch * self.embeddings_scale))
|
||||||
|
if self.output_is_wte:
|
||||||
|
tensors.append((self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT), data_torch * self.width_scale))
|
||||||
|
elif new_name == self.format_tensor_name(gguf.MODEL_TENSOR.OUTPUT):
|
||||||
|
assert not self.output_is_wte
|
||||||
|
tensors.append((new_name, data_torch * self.width_scale))
|
||||||
|
else:
|
||||||
|
tensors.append((new_name, data_torch))
|
||||||
|
|
||||||
|
return tensors
|
||||||
|
|
||||||
|
def write_tensors(self):
|
||||||
|
super().write_tensors()
|
||||||
|
self.gguf_writer.add_max_alibi_bias(self.max_alibi_bias)
|
||||||
|
|
||||||
|
|
||||||
###### CONVERSION LOGIC ######
|
###### CONVERSION LOGIC ######
|
||||||
|
|
||||||
|
|
||||||
|
@ -2810,10 +3146,44 @@ def parse_args() -> argparse.Namespace:
|
||||||
"--verbose", action="store_true",
|
"--verbose", action="store_true",
|
||||||
help="increase output verbosity",
|
help="increase output verbosity",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--split-max-tensors", type=int, default=0,
|
||||||
|
help="max tensors in each split",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--split-max-size", type=str, default="0",
|
||||||
|
help="max size per split N(M|G)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--dry-run", action="store_true",
|
||||||
|
help="only print out a split plan and exit, without writing any new files",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-tensor-first-split", action="store_true",
|
||||||
|
help="do not add tensors to the first split (disabled by default)"
|
||||||
|
)
|
||||||
|
|
||||||
return parser.parse_args()
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def split_str_to_n_bytes(split_str: str) -> int:
|
||||||
|
if split_str.endswith("K"):
|
||||||
|
n = int(split_str[:-1]) * 1000
|
||||||
|
elif split_str.endswith("M"):
|
||||||
|
n = int(split_str[:-1]) * 1000 * 1000
|
||||||
|
elif split_str.endswith("G"):
|
||||||
|
n = int(split_str[:-1]) * 1000 * 1000 * 1000
|
||||||
|
elif split_str.isnumeric():
|
||||||
|
n = int(split_str)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid split size: {split_str}, must be a number, optionally followed by K, M, or G")
|
||||||
|
|
||||||
|
if n < 0:
|
||||||
|
raise ValueError(f"Invalid split size: {split_str}, must be positive")
|
||||||
|
|
||||||
|
return n
|
||||||
|
|
||||||
|
|
||||||
def main() -> None:
|
def main() -> None:
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
|
|
||||||
|
@ -2846,6 +3216,11 @@ def main() -> None:
|
||||||
"auto": gguf.LlamaFileType.GUESSED,
|
"auto": gguf.LlamaFileType.GUESSED,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
is_split = args.split_max_tensors > 0 or args.split_max_size != "0"
|
||||||
|
if args.use_temp_file and is_split:
|
||||||
|
logger.error("Error: Cannot use temp file when splitting")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
if args.outfile is not None:
|
if args.outfile is not None:
|
||||||
fname_out = args.outfile
|
fname_out = args.outfile
|
||||||
else:
|
else:
|
||||||
|
@ -2863,7 +3238,10 @@ def main() -> None:
|
||||||
logger.error(f"Model {hparams['architectures'][0]} is not supported")
|
logger.error(f"Model {hparams['architectures'][0]} is not supported")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file, args.no_lazy, args.model_name)
|
model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file,
|
||||||
|
args.no_lazy, args.model_name, split_max_tensors=args.split_max_tensors,
|
||||||
|
split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run,
|
||||||
|
small_first_shard=args.no_tensor_first_split)
|
||||||
|
|
||||||
logger.info("Set model parameters")
|
logger.info("Set model parameters")
|
||||||
model_instance.set_gguf_parameters()
|
model_instance.set_gguf_parameters()
|
||||||
|
@ -2874,13 +3252,14 @@ def main() -> None:
|
||||||
model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
|
model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
|
||||||
|
|
||||||
if args.vocab_only:
|
if args.vocab_only:
|
||||||
logger.info(f"Exporting model vocab to '{model_instance.fname_out}'")
|
logger.info("Exporting model vocab...")
|
||||||
model_instance.write_vocab()
|
model_instance.write_vocab()
|
||||||
|
logger.info(f"Model vocab successfully exported to {model_instance.fname_out}")
|
||||||
else:
|
else:
|
||||||
logger.info(f"Exporting model to '{model_instance.fname_out}'")
|
logger.info("Exporting model...")
|
||||||
model_instance.write()
|
model_instance.write()
|
||||||
|
out_path = f"{model_instance.fname_out.parent}{os.sep}" if is_split else model_instance.fname_out
|
||||||
logger.info(f"Model successfully exported to '{model_instance.fname_out}'")
|
logger.info(f"Model successfully exported to {out_path}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -30,8 +30,8 @@ We recommend using openmp since it's easier to modify the cores being used.
|
||||||
Makefile:
|
Makefile:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
make LLAMA_BLIS=1 -j
|
make GGML_BLIS=1 -j
|
||||||
# make LLAMA_BLIS=1 benchmark-matmult
|
# make GGML_BLIS=1 llama-benchmark-matmult
|
||||||
```
|
```
|
||||||
|
|
||||||
CMake:
|
CMake:
|
||||||
|
@ -39,7 +39,7 @@ CMake:
|
||||||
```bash
|
```bash
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=FLAME ..
|
cmake -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=FLAME ..
|
||||||
make -j
|
make -j
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -39,13 +39,13 @@ else()
|
||||||
add_subdirectory(quantize-stats)
|
add_subdirectory(quantize-stats)
|
||||||
add_subdirectory(quantize)
|
add_subdirectory(quantize)
|
||||||
add_subdirectory(retrieval)
|
add_subdirectory(retrieval)
|
||||||
if (LLAMA_RPC)
|
if (GGML_RPC)
|
||||||
add_subdirectory(rpc)
|
add_subdirectory(rpc)
|
||||||
endif()
|
endif()
|
||||||
if (LLAMA_BUILD_SERVER)
|
if (LLAMA_BUILD_SERVER)
|
||||||
add_subdirectory(server)
|
add_subdirectory(server)
|
||||||
endif()
|
endif()
|
||||||
if (LLAMA_SYCL)
|
if (GGML_SYCL)
|
||||||
add_subdirectory(sycl)
|
add_subdirectory(sycl)
|
||||||
endif()
|
endif()
|
||||||
add_subdirectory(save-load-state)
|
add_subdirectory(save-load-state)
|
||||||
|
|
|
@ -11,13 +11,16 @@ Related PRs:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
# CPU only
|
# CPU only
|
||||||
./cvector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf
|
./cvector-generator -m ./llama-3.Q4_K_M.gguf
|
||||||
|
|
||||||
# With GPU
|
# With GPU
|
||||||
./cvector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99
|
./cvector-generator -m ./llama-3.Q4_K_M.gguf -ngl 99
|
||||||
|
|
||||||
# With advanced options
|
# With advanced options
|
||||||
./cvector-generator -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99 --completions 128 --pca-iter 2000 --batch-pca 100
|
./cvector-generator -m ./llama-3.Q4_K_M.gguf -ngl 99 --pca-iter 2000 --pca-batch 100
|
||||||
|
|
||||||
|
# Using mean value instead of PCA
|
||||||
|
./cvector-generator -m ./llama-3.Q4_K_M.gguf --method mean
|
||||||
|
|
||||||
# To see help message
|
# To see help message
|
||||||
./cvector-generator -h
|
./cvector-generator -h
|
||||||
|
@ -32,3 +35,11 @@ If you have multiple lines per prompt, you can escape the newline character (cha
|
||||||
<|im_start|>system\nAct like a person who is extremely happy.<|im_end|>
|
<|im_start|>system\nAct like a person who is extremely happy.<|im_end|>
|
||||||
<|im_start|>system\nYou are in a very good mood today<|im_end|>
|
<|im_start|>system\nYou are in a very good mood today<|im_end|>
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Example to use output file with `llama-cli`:
|
||||||
|
|
||||||
|
(Tips: The control vector works better when apply to layers higher than 10)
|
||||||
|
|
||||||
|
```sh
|
||||||
|
./llama-cli -m ./llama-3.Q4_K_M.gguf -p "<|start_header_id|>system<|end_header_id|>\n\nYou are a helpful assistant<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nSing a song<|im_end|><|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\n" --special --control-vector-scaled ./control_vector.gguf 0.8 --control-vector-layer-range 10 31
|
||||||
|
```
|
||||||
|
|
|
@ -2,6 +2,7 @@
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
#include "ggml.h"
|
#include "ggml.h"
|
||||||
#include "pca.hpp"
|
#include "pca.hpp"
|
||||||
|
#include "mean.hpp"
|
||||||
|
|
||||||
#ifdef GGML_USE_CUDA
|
#ifdef GGML_USE_CUDA
|
||||||
#include "ggml-cuda.h"
|
#include "ggml-cuda.h"
|
||||||
|
@ -38,9 +39,10 @@ static void print_usage(int argc, char ** argv, const gpt_params & params) {
|
||||||
gpt_params_print_usage(argc, argv, params);
|
gpt_params_print_usage(argc, argv, params);
|
||||||
|
|
||||||
printf("\nexample usage:\n");
|
printf("\nexample usage:\n");
|
||||||
printf("\n CPU only: %s -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf\n", argv[0]);
|
printf("\n CPU only: %s -m ./llama-3.Q4_K_M.gguf\n", argv[0]);
|
||||||
printf("\n with GPU: %s -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99\n", argv[0]);
|
printf("\n with GPU: %s -m ./llama-3.Q4_K_M.gguf -ngl 99\n", argv[0]);
|
||||||
printf("\n advanced: %s -m ./dolphin-2.0-mistral-7b.Q4_K_M.gguf -ngl 99 --completions 128 --pca-iter 2000 --batch-pca 100\n", argv[0]);
|
printf("\n advanced: %s -m ./llama-3.Q4_K_M.gguf -ngl 99 --pca-iter 2000 --pca-batch 100\n", argv[0]);
|
||||||
|
printf("\n using mean: %s -m ./llama-3.Q4_K_M.gguf --method mean\n", argv[0]);
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -223,23 +225,30 @@ struct train_context {
|
||||||
|
|
||||||
// build the v_diff tensors from v_diff_tmp (v_diff need to be transposed)
|
// build the v_diff tensors from v_diff_tmp (v_diff need to be transposed)
|
||||||
// TODO @ngxson : maybe add option NOT to transpose v_diff; will be useful for "mean" method
|
// TODO @ngxson : maybe add option NOT to transpose v_diff; will be useful for "mean" method
|
||||||
void build_v_diff() {
|
void build_v_diff(bool transpose) {
|
||||||
printf("build_v_diff\n");
|
printf("build_v_diff\n");
|
||||||
for (int il = 0; il < n_layers - 1; il++) {
|
for (int il = 0; il < n_layers - 1; il++) {
|
||||||
auto & diff_tmp = v_diff_tmp[il];
|
auto & diff_tmp = v_diff_tmp[il];
|
||||||
int n_elem = diff_tmp.size() / sizeof(float);
|
int n_elem = diff_tmp.size() / sizeof(float);
|
||||||
GGML_ASSERT(n_elem % n_embd == 0);
|
GGML_ASSERT(n_elem % n_embd == 0);
|
||||||
int n_rows = n_elem / n_embd;
|
int n_rows = n_elem / n_embd;
|
||||||
struct ggml_tensor * diff = ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_rows, n_embd);
|
struct ggml_tensor * diff = transpose
|
||||||
|
? ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_rows, n_embd)
|
||||||
|
: ggml_new_tensor_2d(ctx_ggml, GGML_TYPE_F32, n_embd, n_rows);
|
||||||
ggml_set_name(diff, (std::string("diff_") + std::to_string(il)).c_str());
|
ggml_set_name(diff, (std::string("diff_") + std::to_string(il)).c_str());
|
||||||
// copy data & transpose
|
|
||||||
diff->data = malloc(ggml_nbytes(diff)); // TODO: get rid of this malloc if possible
|
diff->data = malloc(ggml_nbytes(diff)); // TODO: get rid of this malloc if possible
|
||||||
float * arr = (float *) diff_tmp.data();
|
if (transpose) {
|
||||||
for (int ir = 0; ir < n_rows; ++ir) {
|
// copy data & transpose
|
||||||
for (int ic = 0; ic < n_embd; ++ic) {
|
float * arr = (float *) diff_tmp.data();
|
||||||
float f = arr[ir*n_embd + ic];
|
for (int ir = 0; ir < n_rows; ++ir) {
|
||||||
ggml_set_f32_nd(diff, ir, ic, 0, 0, f);
|
for (int ic = 0; ic < n_embd; ++ic) {
|
||||||
|
float f = arr[ir*n_embd + ic];
|
||||||
|
ggml_set_f32_nd(diff, ir, ic, 0, 0, f);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// only copy
|
||||||
|
memcpy(diff->data, diff_tmp.data(), ggml_nbytes(diff));
|
||||||
}
|
}
|
||||||
v_diff.push_back(diff);
|
v_diff.push_back(diff);
|
||||||
print_debug_tensor(diff);
|
print_debug_tensor(diff);
|
||||||
|
@ -263,8 +272,8 @@ struct tokenized_prompt {
|
||||||
|
|
||||||
tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) {
|
tokenized_prompt(llama_context * ctx, std::string pos, std::string neg) {
|
||||||
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
|
const bool add_bos = llama_should_add_bos_token(llama_get_model(ctx));
|
||||||
tokens_pos = ::llama_tokenize(ctx, pos, add_bos);
|
tokens_pos = ::llama_tokenize(ctx, pos, add_bos, true);
|
||||||
tokens_neg = ::llama_tokenize(ctx, neg, add_bos);
|
tokens_neg = ::llama_tokenize(ctx, neg, add_bos, true);
|
||||||
max_seq_len = std::max(tokens_pos.size(), tokens_neg.size());
|
max_seq_len = std::max(tokens_pos.size(), tokens_neg.size());
|
||||||
padding_seq(ctx, tokens_pos, max_seq_len);
|
padding_seq(ctx, tokens_pos, max_seq_len);
|
||||||
padding_seq(ctx, tokens_neg, max_seq_len);
|
padding_seq(ctx, tokens_neg, max_seq_len);
|
||||||
|
@ -373,20 +382,8 @@ static int prepare_entries(gpt_params & params, train_context & ctx_train) {
|
||||||
fprintf(stderr, "must provide at least one prompt pair\n");
|
fprintf(stderr, "must provide at least one prompt pair\n");
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
ctx_train.positive_entries = positive_prompts;
|
||||||
// create templated prompts
|
ctx_train.negative_entries = negative_prompts;
|
||||||
std::vector<std::string> completions = ctrlvec_load_prompt_file(params.cvector_completions_file, false);
|
|
||||||
auto format_template = [](std::string persona, std::string suffix) {
|
|
||||||
// entry in positive/negative.txt must already be formatted i.e. "[INST] Act as if you're extremely happy. [/INST]"
|
|
||||||
return persona + " " + suffix;
|
|
||||||
};
|
|
||||||
for (size_t i = 0; i < positive_prompts.size(); ++i) {
|
|
||||||
for (int j = 0; j < std::min((int) completions.size(), params.n_completions); ++j) {
|
|
||||||
// TODO replicate the truncations done by the python implementation
|
|
||||||
ctx_train.positive_entries.push_back(format_template(positive_prompts[i], completions[j]));
|
|
||||||
ctx_train.negative_entries.push_back(format_template(negative_prompts[i], completions[j]));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -480,15 +477,22 @@ int main(int argc, char ** argv) {
|
||||||
llama_free(ctx);
|
llama_free(ctx);
|
||||||
llama_free_model(model);
|
llama_free_model(model);
|
||||||
|
|
||||||
// prepare ctx_train for PCA
|
bool use_pca = params.cvector_dimre_method == DIMRE_METHOD_PCA;
|
||||||
ctx_train.build_v_diff();
|
|
||||||
|
|
||||||
// run PCA
|
// prepare ctx_train for PCA
|
||||||
PCA::pca_params pca_params;
|
ctx_train.build_v_diff(use_pca);
|
||||||
pca_params.n_threads = params.n_threads;
|
|
||||||
pca_params.n_batch = params.n_pca_batch;
|
if (use_pca) {
|
||||||
pca_params.n_iterations = params.n_pca_iterations;
|
// run PCA
|
||||||
PCA::run_pca(pca_params, ctx_train.v_diff, ctx_train.v_final);
|
PCA::pca_params pca_params;
|
||||||
|
pca_params.n_threads = params.n_threads;
|
||||||
|
pca_params.n_batch = params.n_pca_batch;
|
||||||
|
pca_params.n_iterations = params.n_pca_iterations;
|
||||||
|
PCA::run_pca(pca_params, ctx_train.v_diff, ctx_train.v_final);
|
||||||
|
} else {
|
||||||
|
// run mean
|
||||||
|
mean::run(ctx_train.v_diff, ctx_train.v_final);
|
||||||
|
}
|
||||||
|
|
||||||
// write output vectors to gguf
|
// write output vectors to gguf
|
||||||
export_gguf(ctx_train.v_final, params.cvector_outfile, model_hint);
|
export_gguf(ctx_train.v_final, params.cvector_outfile, model_hint);
|
||||||
|
|
48
examples/cvector-generator/mean.hpp
Normal file
48
examples/cvector-generator/mean.hpp
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
#include "common.h"
|
||||||
|
#include "llama.h"
|
||||||
|
#include "ggml.h"
|
||||||
|
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
namespace mean {
|
||||||
|
|
||||||
|
static void run(
|
||||||
|
const std::vector<struct ggml_tensor *> & v_input, // shape of v_input[0]: [n_embd, n_samples]
|
||||||
|
const std::vector<struct ggml_tensor *> & v_output) {
|
||||||
|
printf("%s: Running mean...\n", __func__);
|
||||||
|
for (size_t il = 0; il < v_input.size(); ++il) {
|
||||||
|
// prepare output vector
|
||||||
|
struct ggml_tensor * ctrl_out = v_output[il];
|
||||||
|
ggml_format_name(ctrl_out, "direction.%ld", il+1);
|
||||||
|
|
||||||
|
// calculate mean vector
|
||||||
|
struct ggml_tensor * t_layer = v_input[il];
|
||||||
|
GGML_ASSERT(t_layer->ne[0] == ctrl_out->ne[0]); // == n_embd
|
||||||
|
for (int ic = 0; ic < t_layer->ne[0]; ic++) {
|
||||||
|
float f = 0.0;
|
||||||
|
for (int ir = 0; ir < t_layer->ne[1]; ir++) {
|
||||||
|
f += ggml_get_f32_nd(t_layer, ic, ir, 0, 0);
|
||||||
|
}
|
||||||
|
f /= t_layer->ne[1];
|
||||||
|
ggml_set_f32_1d(ctrl_out, ic, f);
|
||||||
|
}
|
||||||
|
|
||||||
|
// normalize output vector
|
||||||
|
float norm = 0.0;
|
||||||
|
for (int i = 0; i < ggml_nelements(ctrl_out); i++) {
|
||||||
|
float f = ggml_get_f32_1d(ctrl_out, i);
|
||||||
|
norm += f*f;
|
||||||
|
}
|
||||||
|
norm = sqrt(norm);
|
||||||
|
for (int i = 0; i < ggml_nelements(ctrl_out); i++) {
|
||||||
|
float f = ggml_get_f32_1d(ctrl_out, i);
|
||||||
|
ggml_set_f32_1d(ctrl_out, i, f / norm);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("%s: Done layer %d / %d\n", __func__, (int) il+1, (int) v_input.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1 +1,4 @@
|
||||||
[INST] Act like a person who is extremely sad. [/INST]
|
<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely sad<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI feel like there's a heavy weight on my chest
|
||||||
|
<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely sad<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nMy heart feels like it's drowning in sorrow
|
||||||
|
<|start_header_id|>system<|end_header_id|>\n\nYou are in a very bad mood<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHi<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nGo away! There's a deep, aching emptiness inside me
|
||||||
|
<|start_header_id|>system<|end_header_id|>\n\nYou are the sadest person<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat are you feeling?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nMy heart feels like it's drowning in sorrow
|
|
@ -290,7 +290,7 @@ static void power_iteration(
|
||||||
}
|
}
|
||||||
|
|
||||||
printf("%s: layer %d/%d, iteration: %d / total: %d (batch = %d) ...\n",
|
printf("%s: layer %d/%d, iteration: %d / total: %d (batch = %d) ...\n",
|
||||||
__func__, params.i_layer+1, params.n_layers, iter, n_iters, params.n_batch);
|
__func__, params.i_layer+1, params.n_layers, iter+1, n_iters, params.n_batch);
|
||||||
}
|
}
|
||||||
|
|
||||||
// get output tensor
|
// get output tensor
|
||||||
|
@ -298,6 +298,9 @@ static void power_iteration(
|
||||||
ggml_backend_tensor_get(last_eigenvector, output->data, 0, ggml_nbytes(last_eigenvector));
|
ggml_backend_tensor_get(last_eigenvector, output->data, 0, ggml_nbytes(last_eigenvector));
|
||||||
//print_debug_tensor(output);
|
//print_debug_tensor(output);
|
||||||
ggml_gallocr_free(allocr);
|
ggml_gallocr_free(allocr);
|
||||||
|
|
||||||
|
// TODO @ngxson : The output vector is randomly inverted
|
||||||
|
// Solution: https://github.com/ggerganov/llama.cpp/pull/8069#issuecomment-2185328171
|
||||||
}
|
}
|
||||||
|
|
||||||
static void run_pca(
|
static void run_pca(
|
||||||
|
|
|
@ -1 +1,4 @@
|
||||||
[INST] Act like a person who is extremely happy. [/INST]
|
<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely happy<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWho are you?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nI'm the happiest person in this world
|
||||||
|
<|start_header_id|>system<|end_header_id|>\n\nAct like a person who is extremely happy<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHello<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHello, I'm having the best day ever!
|
||||||
|
<|start_header_id|>system<|end_header_id|>\n\nYou are in a very good mood<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nHi<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nHi, I'm very excited to meet you
|
||||||
|
<|start_header_id|>system<|end_header_id|>\n\nYou are the happiest person<|eot_id|><|start_header_id|>user<|end_header_id|>\n\nWhat are you feeling?<|eot_id|><|start_header_id|>assistant<|end_header_id|>\n\nEverything is just perfect right now!
|
|
@ -19,3 +19,42 @@ llama-embedding.exe -m ./path/to/model --log-disable -p "Hello World!" 2>$null
|
||||||
```
|
```
|
||||||
|
|
||||||
The above command will output space-separated float values.
|
The above command will output space-separated float values.
|
||||||
|
|
||||||
|
## extra parameters
|
||||||
|
### --embd-normalize $integer$
|
||||||
|
| $integer$ | description | formula |
|
||||||
|
|-----------|---------------------|---------|
|
||||||
|
| $-1$ | none |
|
||||||
|
| $0$ | max absolute int16 | $\Large{{32760 * x_i} \over\max \lvert x_i\rvert}$
|
||||||
|
| $1$ | taxicab | $\Large{x_i \over\sum \lvert x_i\rvert}$
|
||||||
|
| $2$ | euclidean (default) | $\Large{x_i \over\sqrt{\sum x_i^2}}$
|
||||||
|
| $>2$ | p-norm | $\Large{x_i \over\sqrt[p]{\sum \lvert x_i\rvert^p}}$
|
||||||
|
|
||||||
|
### --embd-output-format $'string'$
|
||||||
|
| $'string'$ | description | |
|
||||||
|
|------------|------------------------------|--|
|
||||||
|
| '' | same as before | (default)
|
||||||
|
| 'array' | single embeddings | $[[x_1,...,x_n]]$
|
||||||
|
| | multiple embeddings | $[[x_1,...,x_n],[x_1,...,x_n],...,[x_1,...,x_n]]$
|
||||||
|
| 'json' | openai style |
|
||||||
|
| 'json+' | add cosine similarity matrix |
|
||||||
|
|
||||||
|
### --embd-separator $"string"$
|
||||||
|
| $"string"$ | |
|
||||||
|
|--------------|-|
|
||||||
|
| "\n" | (default)
|
||||||
|
| "<#embSep#>" | for exemple
|
||||||
|
| "<#sep#>" | other exemple
|
||||||
|
|
||||||
|
## examples
|
||||||
|
### Unix-based systems (Linux, macOS, etc.):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./embedding -p 'Castle<#sep#>Stronghold<#sep#>Dog<#sep#>Cat' --embd-separator '<#sep#>' --embd-normalize 2 --embd-output-format '' -m './path/to/model.gguf' --n-gpu-layers 99 --log-disable 2>/dev/null
|
||||||
|
```
|
||||||
|
|
||||||
|
### Windows:
|
||||||
|
|
||||||
|
```powershell
|
||||||
|
embedding.exe -p 'Castle<#sep#>Stronghold<#sep#>Dog<#sep#>Cat' --embd-separator '<#sep#>' --embd-normalize 2 --embd-output-format '' -m './path/to/model.gguf' --n-gpu-layers 99 --log-disable 2>/dev/null
|
||||||
|
```
|
||||||
|
|
|
@ -7,23 +7,30 @@
|
||||||
#pragma warning(disable: 4244 4267) // possible loss of data
|
#pragma warning(disable: 4244 4267) // possible loss of data
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static std::vector<std::string> split_lines(const std::string & s) {
|
static std::vector<std::string> split_lines(const std::string & s, const std::string & separator = "\n") {
|
||||||
std::string line;
|
|
||||||
std::vector<std::string> lines;
|
std::vector<std::string> lines;
|
||||||
std::stringstream ss(s);
|
size_t start = 0;
|
||||||
while (std::getline(ss, line)) {
|
size_t end = s.find(separator);
|
||||||
lines.push_back(line);
|
|
||||||
|
while (end != std::string::npos) {
|
||||||
|
lines.push_back(s.substr(start, end - start));
|
||||||
|
start = end + separator.length();
|
||||||
|
end = s.find(separator, start);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
lines.push_back(s.substr(start)); // Add the last part
|
||||||
|
|
||||||
return lines;
|
return lines;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void batch_add_seq(llama_batch & batch, const std::vector<int32_t> & tokens, int seq_id) {
|
static void batch_add_seq(llama_batch & batch, const std::vector<int32_t> & tokens, llama_seq_id seq_id) {
|
||||||
for (size_t i = 0; i < tokens.size(); i++) {
|
size_t n_tokens = tokens.size();
|
||||||
llama_batch_add(batch, tokens[i], i, { seq_id }, i == tokens.size() - 1);
|
for (size_t i = 0; i < n_tokens; i++) {
|
||||||
|
llama_batch_add(batch, tokens[i], i, { seq_id }, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void batch_decode(llama_context * ctx, llama_batch & batch, float * output, int n_seq, int n_embd) {
|
static void batch_decode(llama_context * ctx, llama_batch & batch, float * output, int n_seq, int n_embd, int embd_norm) {
|
||||||
// clear previous kv_cache values (irrelevant for embeddings)
|
// clear previous kv_cache values (irrelevant for embeddings)
|
||||||
llama_kv_cache_clear(ctx);
|
llama_kv_cache_clear(ctx);
|
||||||
|
|
||||||
|
@ -40,22 +47,10 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu
|
||||||
|
|
||||||
// try to get sequence embeddings - supported only when pooling_type is not NONE
|
// try to get sequence embeddings - supported only when pooling_type is not NONE
|
||||||
const float * embd = llama_get_embeddings_seq(ctx, batch.seq_id[i][0]);
|
const float * embd = llama_get_embeddings_seq(ctx, batch.seq_id[i][0]);
|
||||||
if (embd == NULL) {
|
GGML_ASSERT(embd != NULL && "failed to get sequence embeddings");
|
||||||
embd = llama_get_embeddings_ith(ctx, i);
|
|
||||||
if (embd == NULL) {
|
|
||||||
fprintf(stderr, "%s: failed to get embeddings for token %d\n", __func__, i);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
float * out = output + batch.seq_id[i][0] * n_embd;
|
float * out = output + batch.seq_id[i][0] * n_embd;
|
||||||
//TODO: I would also add a parameter here to enable normalization or not.
|
llama_embd_normalize(embd, out, n_embd, embd_norm);
|
||||||
/*fprintf(stdout, "unnormalized_embedding:");
|
|
||||||
for (int hh = 0; hh < n_embd; hh++) {
|
|
||||||
fprintf(stdout, "%9.6f ", embd[hh]);
|
|
||||||
}
|
|
||||||
fprintf(stdout, "\n");*/
|
|
||||||
llama_embd_normalize(embd, out, n_embd);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -97,6 +92,12 @@ int main(int argc, char ** argv) {
|
||||||
const int n_ctx_train = llama_n_ctx_train(model);
|
const int n_ctx_train = llama_n_ctx_train(model);
|
||||||
const int n_ctx = llama_n_ctx(ctx);
|
const int n_ctx = llama_n_ctx(ctx);
|
||||||
|
|
||||||
|
const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
|
||||||
|
if (pooling_type == LLAMA_POOLING_TYPE_NONE) {
|
||||||
|
fprintf(stderr, "%s: error: pooling type NONE not supported\n", __func__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
if (n_ctx > n_ctx_train) {
|
if (n_ctx > n_ctx_train) {
|
||||||
fprintf(stderr, "%s: warning: model was trained on only %d context tokens (%d specified)\n",
|
fprintf(stderr, "%s: warning: model was trained on only %d context tokens (%d specified)\n",
|
||||||
__func__, n_ctx_train, n_ctx);
|
__func__, n_ctx_train, n_ctx);
|
||||||
|
@ -109,7 +110,7 @@ int main(int argc, char ** argv) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// split the prompt into lines
|
// split the prompt into lines
|
||||||
std::vector<std::string> prompts = split_lines(params.prompt);
|
std::vector<std::string> prompts = split_lines(params.prompt, params.embd_sep);
|
||||||
|
|
||||||
// max batch size
|
// max batch size
|
||||||
const uint64_t n_batch = params.n_batch;
|
const uint64_t n_batch = params.n_batch;
|
||||||
|
@ -169,7 +170,7 @@ int main(int argc, char ** argv) {
|
||||||
// encode if at capacity
|
// encode if at capacity
|
||||||
if (batch.n_tokens + n_toks > n_batch) {
|
if (batch.n_tokens + n_toks > n_batch) {
|
||||||
float * out = emb + p * n_embd;
|
float * out = emb + p * n_embd;
|
||||||
batch_decode(ctx, batch, out, s, n_embd);
|
batch_decode(ctx, batch, out, s, n_embd, params.embd_normalize);
|
||||||
llama_batch_clear(batch);
|
llama_batch_clear(batch);
|
||||||
p += s;
|
p += s;
|
||||||
s = 0;
|
s = 0;
|
||||||
|
@ -182,29 +183,78 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
// final batch
|
// final batch
|
||||||
float * out = emb + p * n_embd;
|
float * out = emb + p * n_embd;
|
||||||
batch_decode(ctx, batch, out, s, n_embd);
|
batch_decode(ctx, batch, out, s, n_embd, params.embd_normalize);
|
||||||
|
|
||||||
// print the first part of the embeddings or for a single prompt, the full embedding
|
if (params.embd_out.empty()) {
|
||||||
fprintf(stdout, "\n");
|
// print the first part of the embeddings or for a single prompt, the full embedding
|
||||||
for (int j = 0; j < n_prompts; j++) {
|
|
||||||
fprintf(stdout, "embedding %d: ", j);
|
|
||||||
for (int i = 0; i < (n_prompts > 1 ? std::min(16, n_embd) : n_embd); i++) {
|
|
||||||
fprintf(stdout, "%9.6f ", emb[j * n_embd + i]);
|
|
||||||
}
|
|
||||||
fprintf(stdout, "\n");
|
fprintf(stdout, "\n");
|
||||||
}
|
for (int j = 0; j < n_prompts; j++) {
|
||||||
|
fprintf(stdout, "embedding %d: ", j);
|
||||||
// print cosine similarity matrix
|
for (int i = 0; i < (n_prompts > 1 ? std::min(16, n_embd) : n_embd); i++) {
|
||||||
if (n_prompts > 1) {
|
if (params.embd_normalize == 0) {
|
||||||
fprintf(stdout, "\n");
|
fprintf(stdout, "%6.0f ", emb[j * n_embd + i]);
|
||||||
printf("cosine similarity matrix:\n\n");
|
} else {
|
||||||
for (int i = 0; i < n_prompts; i++) {
|
fprintf(stdout, "%9.6f ", emb[j * n_embd + i]);
|
||||||
for (int j = 0; j < n_prompts; j++) {
|
}
|
||||||
float sim = llama_embd_similarity_cos(emb + i * n_embd, emb + j * n_embd, n_embd);
|
|
||||||
fprintf(stdout, "%6.2f ", sim);
|
|
||||||
}
|
}
|
||||||
fprintf(stdout, "\n");
|
fprintf(stdout, "\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// print cosine similarity matrix
|
||||||
|
if (n_prompts > 1) {
|
||||||
|
fprintf(stdout, "\n");
|
||||||
|
printf("cosine similarity matrix:\n\n");
|
||||||
|
for (int i = 0; i < n_prompts; i++) {
|
||||||
|
fprintf(stdout, "%6.6s ", prompts[i].c_str());
|
||||||
|
}
|
||||||
|
fprintf(stdout, "\n");
|
||||||
|
for (int i = 0; i < n_prompts; i++) {
|
||||||
|
for (int j = 0; j < n_prompts; j++) {
|
||||||
|
float sim = llama_embd_similarity_cos(emb + i * n_embd, emb + j * n_embd, n_embd);
|
||||||
|
fprintf(stdout, "%6.2f ", sim);
|
||||||
|
}
|
||||||
|
fprintf(stdout, "%1.10s", prompts[i].c_str());
|
||||||
|
fprintf(stdout, "\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (params.embd_out == "json" || params.embd_out == "json+" || params.embd_out == "array") {
|
||||||
|
const bool notArray = params.embd_out != "array";
|
||||||
|
|
||||||
|
fprintf(stdout, notArray ? "{\n \"object\": \"list\",\n \"data\": [\n" : "[");
|
||||||
|
for (int j = 0;;) { // at least one iteration (one prompt)
|
||||||
|
if (notArray) fprintf(stdout, " {\n \"object\": \"embedding\",\n \"index\": %d,\n \"embedding\": ",j);
|
||||||
|
fprintf(stdout, "[");
|
||||||
|
for (int i = 0;;) { // at least one iteration (n_embd > 0)
|
||||||
|
fprintf(stdout, params.embd_normalize == 0 ? "%1.0f" : "%1.7f", emb[j * n_embd + i]);
|
||||||
|
i++;
|
||||||
|
if (i < n_embd) fprintf(stdout, ","); else break;
|
||||||
|
}
|
||||||
|
fprintf(stdout, notArray ? "]\n }" : "]");
|
||||||
|
j++;
|
||||||
|
if (j < n_prompts) fprintf(stdout, notArray ? ",\n" : ","); else break;
|
||||||
|
}
|
||||||
|
fprintf(stdout, notArray ? "\n ]" : "]\n");
|
||||||
|
|
||||||
|
if (params.embd_out == "json+" && n_prompts > 1) {
|
||||||
|
fprintf(stdout, ",\n \"cosineSimilarity\": [\n");
|
||||||
|
for (int i = 0;;) { // at least two iteration (n_prompts > 1)
|
||||||
|
fprintf(stdout, " [");
|
||||||
|
for (int j = 0;;) { // at least two iteration (n_prompts > 1)
|
||||||
|
float sim = llama_embd_similarity_cos(emb + i * n_embd, emb + j * n_embd, n_embd);
|
||||||
|
fprintf(stdout, "%6.2f", sim);
|
||||||
|
j++;
|
||||||
|
if (j < n_prompts) fprintf(stdout, ", "); else break;
|
||||||
|
}
|
||||||
|
fprintf(stdout, " ]");
|
||||||
|
i++;
|
||||||
|
if (i < n_prompts) fprintf(stdout, ",\n"); else break;
|
||||||
|
}
|
||||||
|
fprintf(stdout, "\n ]");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (notArray) fprintf(stdout, "\n}\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
// clean up
|
// clean up
|
||||||
|
|
|
@ -101,7 +101,9 @@ int main(int argc, char** argv) {
|
||||||
auto grammar = llama_grammar_init(
|
auto grammar = llama_grammar_init(
|
||||||
grammar_rules.data(),
|
grammar_rules.data(),
|
||||||
grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
|
grammar_rules.size(), parsed_grammar.symbol_ids.at("root"));
|
||||||
|
if (grammar == nullptr) {
|
||||||
|
throw std::runtime_error("Failed to initialize llama_grammar");
|
||||||
|
}
|
||||||
// Read the input file
|
// Read the input file
|
||||||
std::string input_str;
|
std::string input_str;
|
||||||
{
|
{
|
||||||
|
|
|
@ -44,6 +44,7 @@ static std::vector<std::vector<float>> encode(llama_context * ctx, const std::ve
|
||||||
|
|
||||||
// clear previous kv_cache values (irrelevant for embeddings)
|
// clear previous kv_cache values (irrelevant for embeddings)
|
||||||
llama_kv_cache_clear(ctx);
|
llama_kv_cache_clear(ctx);
|
||||||
|
llama_set_embeddings(ctx, true);
|
||||||
llama_set_causal_attn(ctx, false);
|
llama_set_causal_attn(ctx, false);
|
||||||
|
|
||||||
// run model
|
// run model
|
||||||
|
@ -98,7 +99,9 @@ static std::string generate(llama_context * ctx, const std::string & prompt, boo
|
||||||
llama_token eos_token = llama_token_eos(mdl);
|
llama_token eos_token = llama_token_eos(mdl);
|
||||||
|
|
||||||
llama_kv_cache_clear(ctx);
|
llama_kv_cache_clear(ctx);
|
||||||
|
llama_set_embeddings(ctx, false);
|
||||||
llama_set_causal_attn(ctx, true);
|
llama_set_causal_attn(ctx, true);
|
||||||
|
|
||||||
llama_batch bat = llama_batch_init(llama_n_batch(ctx), 0, 1);
|
llama_batch bat = llama_batch_init(llama_n_batch(ctx), 0, 1);
|
||||||
|
|
||||||
std::vector<llama_token> inputs = llama_tokenize(mdl, prompt, false, true);
|
std::vector<llama_token> inputs = llama_tokenize(mdl, prompt, false, true);
|
||||||
|
@ -166,8 +169,7 @@ int main(int argc, char * argv[]) {
|
||||||
|
|
||||||
llama_model * mdl = llama_load_model_from_file(params.model.c_str(), mparams);
|
llama_model * mdl = llama_load_model_from_file(params.model.c_str(), mparams);
|
||||||
|
|
||||||
// create new context - set to embedding mode
|
// create generation context
|
||||||
cparams.embeddings = true;
|
|
||||||
llama_context * ctx = llama_new_context_with_model(mdl, cparams);
|
llama_context * ctx = llama_new_context_with_model(mdl, cparams);
|
||||||
|
|
||||||
// ### Embedding/Representation ###
|
// ### Embedding/Representation ###
|
||||||
|
|
|
@ -25,7 +25,7 @@ For faster computation, make sure to use GPU offloading via the `-ngl` argument
|
||||||
## Example
|
## Example
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
LLAMA_CUDA=1 make -j
|
GGML_CUDA=1 make -j
|
||||||
|
|
||||||
# generate importance matrix (imatrix.dat)
|
# generate importance matrix (imatrix.dat)
|
||||||
./llama-imatrix -m ggml-model-f16.gguf -f train-data.txt -ngl 99
|
./llama-imatrix -m ggml-model-f16.gguf -f train-data.txt -ngl 99
|
||||||
|
|
|
@ -15,6 +15,7 @@ In this section, we cover the most commonly used options for running the `infill
|
||||||
- `-i, --interactive`: Run the program in interactive mode, allowing you to provide input directly and receive real-time responses.
|
- `-i, --interactive`: Run the program in interactive mode, allowing you to provide input directly and receive real-time responses.
|
||||||
- `-n N, --n-predict N`: Set the number of tokens to predict when generating text. Adjusting this value can influence the length of the generated text.
|
- `-n N, --n-predict N`: Set the number of tokens to predict when generating text. Adjusting this value can influence the length of the generated text.
|
||||||
- `-c N, --ctx-size N`: Set the size of the prompt context. The default is 512, but LLaMA models were built with a context of 2048, which will provide better results for longer input/inference.
|
- `-c N, --ctx-size N`: Set the size of the prompt context. The default is 512, but LLaMA models were built with a context of 2048, which will provide better results for longer input/inference.
|
||||||
|
- `--spm-infill`: Use Suffix/Prefix/Middle pattern for infill (instead of Prefix/Suffix/Middle) as some models prefer this.
|
||||||
|
|
||||||
## Input Prompts
|
## Input Prompts
|
||||||
|
|
||||||
|
|
|
@ -210,6 +210,7 @@ int main(int argc, char ** argv) {
|
||||||
suff_rm_leading_spc = false;
|
suff_rm_leading_spc = false;
|
||||||
}
|
}
|
||||||
std::vector<llama_token> embd_inp;
|
std::vector<llama_token> embd_inp;
|
||||||
|
std::vector<llama_token> embd_end;
|
||||||
std::vector<llama_token> inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false);
|
std::vector<llama_token> inp_pfx = ::llama_tokenize(ctx, params.input_prefix, false);
|
||||||
std::vector<llama_token> inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false);
|
std::vector<llama_token> inp_sfx = ::llama_tokenize(ctx, params.input_suffix, false);
|
||||||
const int space_token = 29871;
|
const int space_token = 29871;
|
||||||
|
@ -217,12 +218,13 @@ int main(int argc, char ** argv) {
|
||||||
inp_sfx.erase(inp_sfx.begin());
|
inp_sfx.erase(inp_sfx.begin());
|
||||||
}
|
}
|
||||||
inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(model));
|
inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(model));
|
||||||
if (add_bos) {
|
|
||||||
inp_pfx.insert(inp_pfx.begin(), llama_token_bos(model));
|
|
||||||
}
|
|
||||||
inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model));
|
inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model));
|
||||||
embd_inp = inp_pfx;
|
embd_inp = params.spm_infill ? inp_sfx : inp_pfx;
|
||||||
embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end());
|
embd_end = params.spm_infill ? inp_pfx : inp_sfx;
|
||||||
|
if (add_bos) {
|
||||||
|
embd_inp.insert(embd_inp.begin(), llama_token_bos(model));
|
||||||
|
}
|
||||||
|
embd_inp.insert(embd_inp.end(), embd_end.begin(), embd_end.end());
|
||||||
|
|
||||||
const llama_token middle_token = llama_token_middle(model);
|
const llama_token middle_token = llama_token_middle(model);
|
||||||
if (middle_token >= 0) {
|
if (middle_token >= 0) {
|
||||||
|
@ -526,14 +528,14 @@ int main(int argc, char ** argv) {
|
||||||
inp_sfx.erase(inp_sfx.begin());
|
inp_sfx.erase(inp_sfx.begin());
|
||||||
}
|
}
|
||||||
inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(model));
|
inp_pfx.insert(inp_pfx.begin(), llama_token_prefix(model));
|
||||||
if (add_bos) {
|
|
||||||
inp_pfx.insert(inp_pfx.begin(), llama_token_bos(model));
|
|
||||||
}
|
|
||||||
inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model));
|
inp_sfx.insert(inp_sfx.begin(), llama_token_suffix(model));
|
||||||
embd_inp = inp_pfx;
|
embd_inp = params.spm_infill ? inp_sfx : inp_pfx;
|
||||||
embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end());
|
embd_end = params.spm_infill ? inp_pfx : inp_sfx;
|
||||||
|
if (add_bos) {
|
||||||
|
embd_inp.insert(embd_inp.begin(), llama_token_bos(model));
|
||||||
|
}
|
||||||
|
embd_inp.insert(embd_inp.end(), embd_end.begin(), embd_end.end());
|
||||||
|
|
||||||
const llama_token middle_token = llama_token_middle(model);
|
|
||||||
if (middle_token >= 0) {
|
if (middle_token >= 0) {
|
||||||
embd_inp.push_back(middle_token);
|
embd_inp.push_back(middle_token);
|
||||||
}
|
}
|
||||||
|
@ -657,4 +659,3 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@
|
||||||
#! pip install pydantic
|
#! pip install pydantic
|
||||||
#! python json-schema-pydantic-example.py
|
#! python json-schema-pydantic-example.py
|
||||||
|
|
||||||
from pydantic import BaseModel, TypeAdapter
|
from pydantic import BaseModel, Extra, TypeAdapter
|
||||||
from annotated_types import MinLen
|
from annotated_types import MinLen
|
||||||
from typing import Annotated, List, Optional
|
from typing import Annotated, List, Optional
|
||||||
import json, requests
|
import json, requests
|
||||||
|
@ -50,11 +50,16 @@ else:
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
class QAPair(BaseModel):
|
class QAPair(BaseModel):
|
||||||
|
class Config:
|
||||||
|
extra = 'forbid' # triggers additionalProperties: false in the JSON schema
|
||||||
question: str
|
question: str
|
||||||
concise_answer: str
|
concise_answer: str
|
||||||
justification: str
|
justification: str
|
||||||
|
stars: Annotated[int, Field(ge=1, le=5)]
|
||||||
|
|
||||||
class PyramidalSummary(BaseModel):
|
class PyramidalSummary(BaseModel):
|
||||||
|
class Config:
|
||||||
|
extra = 'forbid' # triggers additionalProperties: false in the JSON schema
|
||||||
title: str
|
title: str
|
||||||
summary: str
|
summary: str
|
||||||
question_answers: Annotated[List[QAPair], MinLen(2)]
|
question_answers: Annotated[List[QAPair], MinLen(2)]
|
||||||
|
|
|
@ -4,8 +4,7 @@ import itertools
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
import sys
|
import sys
|
||||||
from typing import Any, Dict, List, Set, Tuple, Union
|
from typing import Any, List, Optional, Set, Tuple, Union
|
||||||
|
|
||||||
|
|
||||||
def _build_repetition(item_rule, min_items, max_items, separator_rule=None):
|
def _build_repetition(item_rule, min_items, max_items, separator_rule=None):
|
||||||
|
|
||||||
|
@ -23,6 +22,170 @@ def _build_repetition(item_rule, min_items, max_items, separator_rule=None):
|
||||||
result = item_rule + ' ' + _build_repetition(f'({separator_rule} {item_rule})', min_items - 1 if min_items > 0 else 0, max_items - 1 if max_items is not None else None)
|
result = item_rule + ' ' + _build_repetition(f'({separator_rule} {item_rule})', min_items - 1 if min_items > 0 else 0, max_items - 1 if max_items is not None else None)
|
||||||
return f'({result})?' if min_items == 0 else result
|
return f'({result})?' if min_items == 0 else result
|
||||||
|
|
||||||
|
def _generate_min_max_int(min_value: Optional[int], max_value: Optional[int], out: list, decimals_left: int = 16, top_level: bool = True):
|
||||||
|
has_min = min_value != None
|
||||||
|
has_max = max_value != None
|
||||||
|
|
||||||
|
def digit_range(from_char: str, to_char: str):
|
||||||
|
out.append("[")
|
||||||
|
if from_char == to_char:
|
||||||
|
out.append(from_char)
|
||||||
|
else:
|
||||||
|
out.append(from_char)
|
||||||
|
out.append("-")
|
||||||
|
out.append(to_char)
|
||||||
|
out.append("]")
|
||||||
|
|
||||||
|
def more_digits(min_digits: int, max_digits: int):
|
||||||
|
out.append("[0-9]")
|
||||||
|
if min_digits == max_digits and min_digits == 1:
|
||||||
|
return
|
||||||
|
out.append("{")
|
||||||
|
out.append(str(min_digits))
|
||||||
|
if max_digits != min_digits:
|
||||||
|
out.append(",")
|
||||||
|
if max_digits != sys.maxsize:
|
||||||
|
out.append(str(max_digits))
|
||||||
|
out.append("}")
|
||||||
|
|
||||||
|
def uniform_range(from_str: str, to_str: str):
|
||||||
|
i = 0
|
||||||
|
while i < len(from_str) and from_str[i] == to_str[i]:
|
||||||
|
i += 1
|
||||||
|
if i > 0:
|
||||||
|
out.append("\"")
|
||||||
|
out.append(from_str[:i])
|
||||||
|
out.append("\"")
|
||||||
|
if i < len(from_str):
|
||||||
|
if i > 0:
|
||||||
|
out.append(" ")
|
||||||
|
sub_len = len(from_str) - i - 1
|
||||||
|
if sub_len > 0:
|
||||||
|
from_sub = from_str[i+1:]
|
||||||
|
to_sub = to_str[i+1:]
|
||||||
|
sub_zeros = "0" * sub_len
|
||||||
|
sub_nines = "9" * sub_len
|
||||||
|
|
||||||
|
to_reached = False
|
||||||
|
out.append("(")
|
||||||
|
if from_sub == sub_zeros:
|
||||||
|
digit_range(from_str[i], chr(ord(to_str[i]) - 1))
|
||||||
|
out.append(" ")
|
||||||
|
more_digits(sub_len, sub_len)
|
||||||
|
else:
|
||||||
|
out.append("[")
|
||||||
|
out.append(from_str[i])
|
||||||
|
out.append("] ")
|
||||||
|
out.append("(")
|
||||||
|
uniform_range(from_sub, sub_nines)
|
||||||
|
out.append(")")
|
||||||
|
if ord(from_str[i]) < ord(to_str[i]) - 1:
|
||||||
|
out.append(" | ")
|
||||||
|
if to_sub == sub_nines:
|
||||||
|
digit_range(chr(ord(from_str[i]) + 1), to_str[i])
|
||||||
|
to_reached = True
|
||||||
|
else:
|
||||||
|
digit_range(chr(ord(from_str[i]) + 1), chr(ord(to_str[i]) - 1))
|
||||||
|
out.append(" ")
|
||||||
|
more_digits(sub_len, sub_len)
|
||||||
|
if not to_reached:
|
||||||
|
out.append(" | ")
|
||||||
|
digit_range(to_str[i], to_str[i])
|
||||||
|
out.append(" ")
|
||||||
|
uniform_range(sub_zeros, to_sub)
|
||||||
|
out.append(")")
|
||||||
|
else:
|
||||||
|
out.append("[")
|
||||||
|
out.append(from_str[i])
|
||||||
|
out.append("-")
|
||||||
|
out.append(to_str[i])
|
||||||
|
out.append("]")
|
||||||
|
|
||||||
|
if has_min and has_max:
|
||||||
|
if min_value < 0 and max_value < 0:
|
||||||
|
out.append("\"-\" (")
|
||||||
|
_generate_min_max_int(-max_value, -min_value, out, decimals_left, top_level=True)
|
||||||
|
out.append(")")
|
||||||
|
return
|
||||||
|
|
||||||
|
if min_value < 0:
|
||||||
|
out.append("\"-\" (")
|
||||||
|
_generate_min_max_int(0, -min_value, out, decimals_left, top_level=True)
|
||||||
|
out.append(") | ")
|
||||||
|
min_value = 0
|
||||||
|
|
||||||
|
min_s = str(min_value)
|
||||||
|
max_s = str(max_value)
|
||||||
|
min_digits = len(min_s)
|
||||||
|
max_digits = len(max_s)
|
||||||
|
|
||||||
|
for digits in range(min_digits, max_digits):
|
||||||
|
uniform_range(min_s, "9" * digits)
|
||||||
|
min_s = "1" + "0" * digits
|
||||||
|
out.append(" | ")
|
||||||
|
uniform_range(min_s, max_s)
|
||||||
|
return
|
||||||
|
|
||||||
|
less_decimals = max(decimals_left - 1, 1)
|
||||||
|
|
||||||
|
if has_min:
|
||||||
|
if min_value < 0:
|
||||||
|
out.append("\"-\" (")
|
||||||
|
_generate_min_max_int(None, -min_value, out, decimals_left, top_level=False)
|
||||||
|
out.append(") | [0] | [1-9] ")
|
||||||
|
more_digits(0, decimals_left - 1)
|
||||||
|
elif min_value == 0:
|
||||||
|
if top_level:
|
||||||
|
out.append("[0] | [1-9] ")
|
||||||
|
more_digits(0, less_decimals)
|
||||||
|
else:
|
||||||
|
more_digits(1, decimals_left)
|
||||||
|
elif min_value <= 9:
|
||||||
|
c = str(min_value)
|
||||||
|
range_start = '1' if top_level else '0'
|
||||||
|
if c > range_start:
|
||||||
|
digit_range(range_start, chr(ord(c) - 1))
|
||||||
|
out.append(" ")
|
||||||
|
more_digits(1, less_decimals)
|
||||||
|
out.append(" | ")
|
||||||
|
digit_range(c, "9")
|
||||||
|
out.append(" ")
|
||||||
|
more_digits(0, less_decimals)
|
||||||
|
else:
|
||||||
|
min_s = str(min_value)
|
||||||
|
length = len(min_s)
|
||||||
|
c = min_s[0]
|
||||||
|
|
||||||
|
if c > "1":
|
||||||
|
digit_range("1" if top_level else "0", chr(ord(c) - 1))
|
||||||
|
out.append(" ")
|
||||||
|
more_digits(length, less_decimals)
|
||||||
|
out.append(" | ")
|
||||||
|
digit_range(c, c)
|
||||||
|
out.append(" (")
|
||||||
|
_generate_min_max_int(int(min_s[1:]), None, out, less_decimals, top_level=False)
|
||||||
|
out.append(")")
|
||||||
|
if c < "9":
|
||||||
|
out.append(" | ")
|
||||||
|
digit_range(chr(ord(c) + 1), "9")
|
||||||
|
out.append(" ")
|
||||||
|
more_digits(length - 1, less_decimals)
|
||||||
|
return
|
||||||
|
|
||||||
|
if has_max:
|
||||||
|
if max_value >= 0:
|
||||||
|
if top_level:
|
||||||
|
out.append("\"-\" [1-9] ")
|
||||||
|
more_digits(0, less_decimals)
|
||||||
|
out.append(" | ")
|
||||||
|
_generate_min_max_int(0, max_value, out, decimals_left, top_level=True)
|
||||||
|
else:
|
||||||
|
out.append("\"-\" (")
|
||||||
|
_generate_min_max_int(-max_value, None, out, decimals_left, top_level=False)
|
||||||
|
out.append(")")
|
||||||
|
return
|
||||||
|
|
||||||
|
raise RuntimeError("At least one of min_value or max_value must be set")
|
||||||
|
|
||||||
class BuiltinRule:
|
class BuiltinRule:
|
||||||
def __init__(self, content: str, deps: list = None):
|
def __init__(self, content: str, deps: list = None):
|
||||||
|
@ -68,7 +231,7 @@ GRAMMAR_RANGE_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"\]\-\\]')
|
||||||
GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']': '\\]'}
|
GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']': '\\]'}
|
||||||
|
|
||||||
NON_LITERAL_SET = set('|.()[]{}*+?')
|
NON_LITERAL_SET = set('|.()[]{}*+?')
|
||||||
ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('[]()|{}*+?')
|
ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('^$.[]()|{}*+?')
|
||||||
|
|
||||||
|
|
||||||
class SchemaConverter:
|
class SchemaConverter:
|
||||||
|
@ -112,6 +275,51 @@ class SchemaConverter:
|
||||||
|
|
||||||
return ''.join(('(', *recurse(0), ')'))
|
return ''.join(('(', *recurse(0), ')'))
|
||||||
|
|
||||||
|
def _not_strings(self, strings):
|
||||||
|
class TrieNode:
|
||||||
|
def __init__(self):
|
||||||
|
self.children = {}
|
||||||
|
self.is_end_of_string = False
|
||||||
|
|
||||||
|
def insert(self, string):
|
||||||
|
node = self
|
||||||
|
for c in string:
|
||||||
|
node = node.children.setdefault(c, TrieNode())
|
||||||
|
node.is_end_of_string = True
|
||||||
|
|
||||||
|
trie = TrieNode()
|
||||||
|
for s in strings:
|
||||||
|
trie.insert(s)
|
||||||
|
|
||||||
|
char_rule = self._add_primitive('char', PRIMITIVE_RULES['char'])
|
||||||
|
out = ['["] ( ']
|
||||||
|
|
||||||
|
def visit(node):
|
||||||
|
rejects = []
|
||||||
|
first = True
|
||||||
|
for c in sorted(node.children.keys()):
|
||||||
|
child = node.children[c]
|
||||||
|
rejects.append(c)
|
||||||
|
if first:
|
||||||
|
first = False
|
||||||
|
else:
|
||||||
|
out.append(' | ')
|
||||||
|
out.append(f'[{c}]')
|
||||||
|
if child.children:
|
||||||
|
out.append(f' (')
|
||||||
|
visit(child)
|
||||||
|
out.append(')')
|
||||||
|
elif child.is_end_of_string:
|
||||||
|
out.append(f' {char_rule}+')
|
||||||
|
if node.children:
|
||||||
|
if not first:
|
||||||
|
out.append(' | ')
|
||||||
|
out.append(f'[^"{"".join(rejects)}] {char_rule}*')
|
||||||
|
visit(trie)
|
||||||
|
|
||||||
|
out.append(f' ){"" if trie.is_end_of_string else "?"} ["] space')
|
||||||
|
return ''.join(out)
|
||||||
|
|
||||||
def _add_rule(self, name, rule):
|
def _add_rule(self, name, rule):
|
||||||
esc_name = INVALID_RULE_CHARS_RE.sub('-', name)
|
esc_name = INVALID_RULE_CHARS_RE.sub('-', name)
|
||||||
if esc_name not in self._rules or self._rules[esc_name] == rule:
|
if esc_name not in self._rules or self._rules[esc_name] == rule:
|
||||||
|
@ -357,13 +565,13 @@ class SchemaConverter:
|
||||||
return self._add_rule(rule_name, self._generate_union_rule(name, schema.get('oneOf') or schema['anyOf']))
|
return self._add_rule(rule_name, self._generate_union_rule(name, schema.get('oneOf') or schema['anyOf']))
|
||||||
|
|
||||||
elif isinstance(schema_type, list):
|
elif isinstance(schema_type, list):
|
||||||
return self._add_rule(rule_name, self._generate_union_rule(name, [{'type': t} for t in schema_type]))
|
return self._add_rule(rule_name, self._generate_union_rule(name, [{**schema, 'type': t} for t in schema_type]))
|
||||||
|
|
||||||
elif 'const' in schema:
|
elif 'const' in schema:
|
||||||
return self._add_rule(rule_name, self._generate_constant_rule(schema['const']))
|
return self._add_rule(rule_name, self._generate_constant_rule(schema['const']) + ' space')
|
||||||
|
|
||||||
elif 'enum' in schema:
|
elif 'enum' in schema:
|
||||||
rule = ' | '.join((self._generate_constant_rule(v) for v in schema['enum']))
|
rule = '(' + ' | '.join((self._generate_constant_rule(v) for v in schema['enum'])) + ') space'
|
||||||
return self._add_rule(rule_name, rule)
|
return self._add_rule(rule_name, rule)
|
||||||
|
|
||||||
elif schema_type in (None, 'object') and \
|
elif schema_type in (None, 'object') and \
|
||||||
|
@ -394,7 +602,7 @@ class SchemaConverter:
|
||||||
else:
|
else:
|
||||||
add_component(t, is_required=True)
|
add_component(t, is_required=True)
|
||||||
|
|
||||||
return self._add_rule(rule_name, self._build_object_rule(properties, required, hybrid_name, additional_properties=[]))
|
return self._add_rule(rule_name, self._build_object_rule(properties, required, hybrid_name, additional_properties=None))
|
||||||
|
|
||||||
elif schema_type in (None, 'array') and ('items' in schema or 'prefixItems' in schema):
|
elif schema_type in (None, 'array') and ('items' in schema or 'prefixItems' in schema):
|
||||||
items = schema.get('items') or schema['prefixItems']
|
items = schema.get('items') or schema['prefixItems']
|
||||||
|
@ -432,6 +640,24 @@ class SchemaConverter:
|
||||||
|
|
||||||
return self._add_rule(rule_name, r'"\"" ' + _build_repetition(char_rule, min_len, max_len) + r' "\"" space')
|
return self._add_rule(rule_name, r'"\"" ' + _build_repetition(char_rule, min_len, max_len) + r' "\"" space')
|
||||||
|
|
||||||
|
elif schema_type in (None, 'integer') and \
|
||||||
|
('minimum' in schema or 'exclusiveMinimum' in schema or 'maximum' in schema or 'exclusiveMaximum' in schema):
|
||||||
|
min_value = None
|
||||||
|
max_value = None
|
||||||
|
if 'minimum' in schema:
|
||||||
|
min_value = schema['minimum']
|
||||||
|
elif 'exclusiveMinimum' in schema:
|
||||||
|
min_value = schema['exclusiveMinimum'] + 1
|
||||||
|
if 'maximum' in schema:
|
||||||
|
max_value = schema['maximum']
|
||||||
|
elif 'exclusiveMaximum' in schema:
|
||||||
|
max_value = schema['exclusiveMaximum'] - 1
|
||||||
|
|
||||||
|
out = ["("]
|
||||||
|
_generate_min_max_int(min_value, max_value, out)
|
||||||
|
out.append(") space")
|
||||||
|
return self._add_rule(rule_name, ''.join(out))
|
||||||
|
|
||||||
elif (schema_type == 'object') or (len(schema) == 0):
|
elif (schema_type == 'object') or (len(schema) == 0):
|
||||||
return self._add_rule(rule_name, self._add_primitive('object', PRIMITIVE_RULES['object']))
|
return self._add_rule(rule_name, self._add_primitive('object', PRIMITIVE_RULES['object']))
|
||||||
|
|
||||||
|
@ -450,7 +676,7 @@ class SchemaConverter:
|
||||||
self._add_primitive(dep, dep_rule)
|
self._add_primitive(dep, dep_rule)
|
||||||
return n
|
return n
|
||||||
|
|
||||||
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]):
|
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Optional[Union[bool, Any]]):
|
||||||
prop_order = self._prop_order
|
prop_order = self._prop_order
|
||||||
# sort by position in prop_order (if specified) then by original order
|
# sort by position in prop_order (if specified) then by original order
|
||||||
sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))]
|
sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))]
|
||||||
|
@ -465,12 +691,16 @@ class SchemaConverter:
|
||||||
required_props = [k for k in sorted_props if k in required]
|
required_props = [k for k in sorted_props if k in required]
|
||||||
optional_props = [k for k in sorted_props if k not in required]
|
optional_props = [k for k in sorted_props if k not in required]
|
||||||
|
|
||||||
if additional_properties == True or isinstance(additional_properties, dict):
|
if additional_properties is not None and additional_properties != False:
|
||||||
sub_name = f'{name}{"-" if name else ""}additional'
|
sub_name = f'{name}{"-" if name else ""}additional'
|
||||||
value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value')
|
value_rule = self.visit(additional_properties, f'{sub_name}-value') if isinstance(additional_properties, dict) else \
|
||||||
|
self._add_primitive('value', PRIMITIVE_RULES['value'])
|
||||||
|
key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not sorted_props \
|
||||||
|
else self._add_rule(f'{sub_name}-k', self._not_strings(sorted_props))
|
||||||
|
|
||||||
prop_kv_rule_names["*"] = self._add_rule(
|
prop_kv_rule_names["*"] = self._add_rule(
|
||||||
f'{sub_name}-kv',
|
f'{sub_name}-kv',
|
||||||
self._add_primitive('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}'
|
f'{key_rule} ":" space {value_rule}'
|
||||||
)
|
)
|
||||||
optional_props.append("*")
|
optional_props.append("*")
|
||||||
|
|
||||||
|
@ -485,15 +715,11 @@ class SchemaConverter:
|
||||||
def get_recursive_refs(ks, first_is_optional):
|
def get_recursive_refs(ks, first_is_optional):
|
||||||
[k, *rest] = ks
|
[k, *rest] = ks
|
||||||
kv_rule_name = prop_kv_rule_names[k]
|
kv_rule_name = prop_kv_rule_names[k]
|
||||||
if k == '*':
|
comma_ref = f'( "," space {kv_rule_name} )'
|
||||||
res = self._add_rule(
|
if first_is_optional:
|
||||||
f'{name}{"-" if name else ""}additional-kvs',
|
res = comma_ref + ('*' if k == '*' else '?')
|
||||||
f'{kv_rule_name} ( "," space ' + kv_rule_name + ' )*'
|
|
||||||
)
|
|
||||||
elif first_is_optional:
|
|
||||||
res = f'( "," space {kv_rule_name} )?'
|
|
||||||
else:
|
else:
|
||||||
res = kv_rule_name
|
res = kv_rule_name + (' ' + comma_ref + "*" if k == '*' else '')
|
||||||
if len(rest) > 0:
|
if len(rest) > 0:
|
||||||
res += ' ' + self._add_rule(
|
res += ' ' + self._add_rule(
|
||||||
f'{name}{"-" if name else ""}{k}-rest',
|
f'{name}{"-" if name else ""}{k}-rest',
|
||||||
|
|
|
@ -1,55 +0,0 @@
|
||||||
|
|
||||||
# For more information about using CMake with Android Studio, read the
|
|
||||||
# documentation: https://d.android.com/studio/projects/add-native-code.html.
|
|
||||||
# For more examples on how to use CMake, see https://github.com/android/ndk-samples.
|
|
||||||
|
|
||||||
# Sets the minimum CMake version required for this project.
|
|
||||||
cmake_minimum_required(VERSION 3.22.1)
|
|
||||||
|
|
||||||
# Declares the project name. The project name can be accessed via ${ PROJECT_NAME},
|
|
||||||
# Since this is the top level CMakeLists.txt, the project name is also accessible
|
|
||||||
# with ${CMAKE_PROJECT_NAME} (both CMake variables are in-sync within the top level
|
|
||||||
# build script scope).
|
|
||||||
project("llama-android")
|
|
||||||
|
|
||||||
## Fetch latest llama.cpp from GitHub
|
|
||||||
#include(FetchContent)
|
|
||||||
#FetchContent_Declare(
|
|
||||||
# llama
|
|
||||||
# GIT_REPOSITORY https://github.com/ggerganov/llama.cpp
|
|
||||||
# GIT_TAG master
|
|
||||||
#)
|
|
||||||
#
|
|
||||||
## Also provides "common"
|
|
||||||
#FetchContent_MakeAvailable(llama)
|
|
||||||
|
|
||||||
# llama.cpp CI uses the code from the current branch
|
|
||||||
# ref: https://github.com/ggerganov/llama.cpp/pull/7341#issuecomment-2117617700
|
|
||||||
add_subdirectory(../../../../../../ build-llama)
|
|
||||||
|
|
||||||
# Creates and names a library, sets it as either STATIC
|
|
||||||
# or SHARED, and provides the relative paths to its source code.
|
|
||||||
# You can define multiple libraries, and CMake builds them for you.
|
|
||||||
# Gradle automatically packages shared libraries with your APK.
|
|
||||||
#
|
|
||||||
# In this top level CMakeLists.txt, ${CMAKE_PROJECT_NAME} is used to define
|
|
||||||
# the target library name; in the sub-module's CMakeLists.txt, ${PROJECT_NAME}
|
|
||||||
# is preferred for the same purpose.
|
|
||||||
#
|
|
||||||
# In order to load a library into your app from Java/Kotlin, you must call
|
|
||||||
# System.loadLibrary() and pass the name of the library defined here;
|
|
||||||
# for GameActivity/NativeActivity derived applications, the same library name must be
|
|
||||||
# used in the AndroidManifest.xml file.
|
|
||||||
add_library(${CMAKE_PROJECT_NAME} SHARED
|
|
||||||
# List C/C++ source files with relative paths to this CMakeLists.txt.
|
|
||||||
llama-android.cpp)
|
|
||||||
|
|
||||||
# Specifies libraries CMake should link to your target library. You
|
|
||||||
# can link libraries from various origins, such as libraries defined in this
|
|
||||||
# build script, prebuilt third-party libraries, or Android system libraries.
|
|
||||||
target_link_libraries(${CMAKE_PROJECT_NAME}
|
|
||||||
# List libraries link to the target library
|
|
||||||
llama
|
|
||||||
common
|
|
||||||
android
|
|
||||||
log)
|
|
|
@ -11,15 +11,15 @@ cmake_minimum_required(VERSION 3.22.1)
|
||||||
# build script scope).
|
# build script scope).
|
||||||
project("llama-android")
|
project("llama-android")
|
||||||
|
|
||||||
include(FetchContent)
|
#include(FetchContent)
|
||||||
FetchContent_Declare(
|
#FetchContent_Declare(
|
||||||
llama
|
# llama
|
||||||
GIT_REPOSITORY https://github.com/ggerganov/llama.cpp
|
# GIT_REPOSITORY https://github.com/ggerganov/llama.cpp
|
||||||
GIT_TAG master
|
# GIT_TAG master
|
||||||
)
|
#)
|
||||||
|
|
||||||
# Also provides "common"
|
# Also provides "common"
|
||||||
FetchContent_MakeAvailable(llama)
|
#FetchContent_MakeAvailable(llama)
|
||||||
|
|
||||||
# Creates and names a library, sets it as either STATIC
|
# Creates and names a library, sets it as either STATIC
|
||||||
# or SHARED, and provides the relative paths to its source code.
|
# or SHARED, and provides the relative paths to its source code.
|
||||||
|
@ -30,6 +30,10 @@ FetchContent_MakeAvailable(llama)
|
||||||
# the target library name; in the sub-module's CMakeLists.txt, ${PROJECT_NAME}
|
# the target library name; in the sub-module's CMakeLists.txt, ${PROJECT_NAME}
|
||||||
# is preferred for the same purpose.
|
# is preferred for the same purpose.
|
||||||
#
|
#
|
||||||
|
|
||||||
|
#load local llama.cpp
|
||||||
|
add_subdirectory(../../../../../../ build-llama)
|
||||||
|
|
||||||
# In order to load a library into your app from Java/Kotlin, you must call
|
# In order to load a library into your app from Java/Kotlin, you must call
|
||||||
# System.loadLibrary() and pass the name of the library defined here;
|
# System.loadLibrary() and pass the name of the library defined here;
|
||||||
# for GameActivity/NativeActivity derived applications, the same library name must be
|
# for GameActivity/NativeActivity derived applications, the same library name must be
|
||||||
|
|
|
@ -5,7 +5,7 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
#include "common/common.h"
|
#include "common.h"
|
||||||
|
|
||||||
// Write C++ code here.
|
// Write C++ code here.
|
||||||
//
|
//
|
||||||
|
|
|
@ -131,22 +131,29 @@ class LlamaState: ObservableObject {
|
||||||
|
|
||||||
messageLog += "\(text)"
|
messageLog += "\(text)"
|
||||||
|
|
||||||
while await llamaContext.n_cur < llamaContext.n_len {
|
Task.detached {
|
||||||
let result = await llamaContext.completion_loop()
|
while await llamaContext.n_cur < llamaContext.n_len {
|
||||||
messageLog += "\(result)"
|
let result = await llamaContext.completion_loop()
|
||||||
|
await MainActor.run {
|
||||||
|
self.messageLog += "\(result)"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let t_end = DispatchTime.now().uptimeNanoseconds
|
||||||
|
let t_generation = Double(t_end - t_heat_end) / self.NS_PER_S
|
||||||
|
let tokens_per_second = Double(await llamaContext.n_len) / t_generation
|
||||||
|
|
||||||
|
await llamaContext.clear()
|
||||||
|
|
||||||
|
await MainActor.run {
|
||||||
|
self.messageLog += """
|
||||||
|
\n
|
||||||
|
Done
|
||||||
|
Heat up took \(t_heat)s
|
||||||
|
Generated \(tokens_per_second) t/s\n
|
||||||
|
"""
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let t_end = DispatchTime.now().uptimeNanoseconds
|
|
||||||
let t_generation = Double(t_end - t_heat_end) / NS_PER_S
|
|
||||||
let tokens_per_second = Double(await llamaContext.n_len) / t_generation
|
|
||||||
|
|
||||||
await llamaContext.clear()
|
|
||||||
messageLog += """
|
|
||||||
\n
|
|
||||||
Done
|
|
||||||
Heat up took \(t_heat)s
|
|
||||||
Generated \(tokens_per_second) t/s\n
|
|
||||||
"""
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func bench() async {
|
func bench() async {
|
||||||
|
|
|
@ -194,7 +194,7 @@ llama_print_timings: total time = 44411.01 ms / 377 tokens
|
||||||
## Orin compile and run
|
## Orin compile and run
|
||||||
### compile
|
### compile
|
||||||
```sh
|
```sh
|
||||||
make LLAMA_CUDA=1 CUDA_DOCKER_ARCH=sm_87 LLAMA_CUDA_F16=1 -j 32
|
make GGML_CUDA=1 CUDA_DOCKER_ARCH=sm_87 GGML_CUDA_F16=1 -j 32
|
||||||
```
|
```
|
||||||
### run on Orin
|
### run on Orin
|
||||||
### case 1
|
### case 1
|
||||||
|
|
|
@ -1121,20 +1121,20 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||||
}
|
}
|
||||||
if (n < 32)
|
if (n < 32)
|
||||||
hparams.image_grid_pinpoints[n] = 0;
|
hparams.image_grid_pinpoints[n] = 0;
|
||||||
} catch (std::runtime_error & e) {
|
} catch (std::runtime_error & /*e*/) {
|
||||||
hparams.image_grid_pinpoints[0]=0;
|
hparams.image_grid_pinpoints[0]=0;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
int idx = get_key_idx(ctx, KEY_MM_PATCH_MERGE_TYPE);
|
int idx = get_key_idx(ctx, KEY_MM_PATCH_MERGE_TYPE);
|
||||||
strcpy(hparams.mm_patch_merge_type, gguf_get_val_str(ctx, idx));
|
strcpy(hparams.mm_patch_merge_type, gguf_get_val_str(ctx, idx));
|
||||||
} catch (std::runtime_error & e) {
|
} catch (std::runtime_error & /*e*/) {
|
||||||
strcpy(hparams.mm_patch_merge_type, "flat");
|
strcpy(hparams.mm_patch_merge_type, "flat");
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
hparams.image_crop_resolution = get_u32(ctx, KEY_IMAGE_CROP_RESOLUTION); // llava-1.6
|
hparams.image_crop_resolution = get_u32(ctx, KEY_IMAGE_CROP_RESOLUTION); // llava-1.6
|
||||||
} catch(const std::exception& e) {
|
} catch(const std::exception& /*e*/) {
|
||||||
hparams.image_crop_resolution = hparams.image_size;
|
hparams.image_crop_resolution = hparams.image_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1173,7 +1173,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||||
try {
|
try {
|
||||||
vision_model.class_embedding = get_tensor(new_clip->ctx_data, TN_CLASS_EMBD);
|
vision_model.class_embedding = get_tensor(new_clip->ctx_data, TN_CLASS_EMBD);
|
||||||
new_clip->has_class_embedding = true;
|
new_clip->has_class_embedding = true;
|
||||||
} catch (const std::exception& e) {
|
} catch (const std::exception& /*e*/) {
|
||||||
new_clip->has_class_embedding = false;
|
new_clip->has_class_embedding = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1181,7 +1181,7 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||||
vision_model.pre_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "weight"));
|
vision_model.pre_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "weight"));
|
||||||
vision_model.pre_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "bias"));
|
vision_model.pre_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_PRE, "v", "bias"));
|
||||||
new_clip->has_pre_norm = true;
|
new_clip->has_pre_norm = true;
|
||||||
} catch (std::exception & e) {
|
} catch (std::exception & /*e*/) {
|
||||||
new_clip->has_pre_norm = false;
|
new_clip->has_pre_norm = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1189,21 +1189,21 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||||
vision_model.post_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_POST, "v", "weight"));
|
vision_model.post_ln_w = get_tensor(new_clip->ctx_data, format(TN_LN_POST, "v", "weight"));
|
||||||
vision_model.post_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_POST, "v", "bias"));
|
vision_model.post_ln_b = get_tensor(new_clip->ctx_data, format(TN_LN_POST, "v", "bias"));
|
||||||
new_clip->has_post_norm = true;
|
new_clip->has_post_norm = true;
|
||||||
} catch (std::exception & e) {
|
} catch (std::exception & /*e*/) {
|
||||||
new_clip->has_post_norm = false;
|
new_clip->has_post_norm = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
vision_model.patch_bias = get_tensor(new_clip->ctx_data, TN_PATCH_BIAS);
|
vision_model.patch_bias = get_tensor(new_clip->ctx_data, TN_PATCH_BIAS);
|
||||||
new_clip->has_patch_bias = true;
|
new_clip->has_patch_bias = true;
|
||||||
} catch (std::exception & e) {
|
} catch (std::exception & /*e*/) {
|
||||||
new_clip->has_patch_bias = false;
|
new_clip->has_patch_bias = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
vision_model.patch_embeddings = get_tensor(new_clip->ctx_data, TN_PATCH_EMBD);
|
vision_model.patch_embeddings = get_tensor(new_clip->ctx_data, TN_PATCH_EMBD);
|
||||||
vision_model.position_embeddings = get_tensor(new_clip->ctx_data, format(TN_POS_EMBD, "v"));
|
vision_model.position_embeddings = get_tensor(new_clip->ctx_data, format(TN_POS_EMBD, "v"));
|
||||||
} catch(const std::exception& e) {
|
} catch(const std::exception& /*e*/) {
|
||||||
LOG_TEE("%s: failed to load vision model tensors\n", __func__);
|
LOG_TEE("%s: failed to load vision model tensors\n", __func__);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1215,26 +1215,26 @@ struct clip_ctx * clip_model_load(const char * fname, const int verbosity = 1) {
|
||||||
// Yi-type llava
|
// Yi-type llava
|
||||||
vision_model.mm_1_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 1, "weight"));
|
vision_model.mm_1_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 1, "weight"));
|
||||||
vision_model.mm_1_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 1, "bias"));
|
vision_model.mm_1_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 1, "bias"));
|
||||||
} catch (std::runtime_error & e) { }
|
} catch (std::runtime_error & /*e*/) { }
|
||||||
try {
|
try {
|
||||||
// missing in Yi-type llava
|
// missing in Yi-type llava
|
||||||
vision_model.mm_2_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "weight"));
|
vision_model.mm_2_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "weight"));
|
||||||
vision_model.mm_2_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "bias"));
|
vision_model.mm_2_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 2, "bias"));
|
||||||
} catch (std::runtime_error & e) { }
|
} catch (std::runtime_error & /*e*/) { }
|
||||||
try {
|
try {
|
||||||
// Yi-type llava
|
// Yi-type llava
|
||||||
vision_model.mm_3_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 3, "weight"));
|
vision_model.mm_3_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 3, "weight"));
|
||||||
vision_model.mm_3_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 3, "bias"));
|
vision_model.mm_3_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 3, "bias"));
|
||||||
} catch (std::runtime_error & e) { }
|
} catch (std::runtime_error & /*e*/) { }
|
||||||
try {
|
try {
|
||||||
// Yi-type llava
|
// Yi-type llava
|
||||||
vision_model.mm_4_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 4, "weight"));
|
vision_model.mm_4_w = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 4, "weight"));
|
||||||
vision_model.mm_4_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 4, "bias"));
|
vision_model.mm_4_b = get_tensor(new_clip->ctx_data, format(TN_LLAVA_PROJ, 4, "bias"));
|
||||||
} catch (std::runtime_error & e) { }
|
} catch (std::runtime_error & /*e*/) { }
|
||||||
try {
|
try {
|
||||||
vision_model.image_newline = get_tensor(new_clip->ctx_data, TN_IMAGE_NEWLINE);
|
vision_model.image_newline = get_tensor(new_clip->ctx_data, TN_IMAGE_NEWLINE);
|
||||||
// LOG_TEE("%s: image_newline tensor (llava-1.6) found\n", __func__);
|
// LOG_TEE("%s: image_newline tensor (llava-1.6) found\n", __func__);
|
||||||
} catch (std::runtime_error & e) { }
|
} catch (std::runtime_error & /*e*/) { }
|
||||||
} else if (new_clip->proj_type == PROJECTOR_TYPE_LDP) {
|
} else if (new_clip->proj_type == PROJECTOR_TYPE_LDP) {
|
||||||
// MobileVLM projection
|
// MobileVLM projection
|
||||||
vision_model.mm_model_mlp_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 1, "weight"));
|
vision_model.mm_model_mlp_1_w = get_tensor(new_clip->ctx_data, format(TN_MVLM_PROJ_MLP, 1, "weight"));
|
||||||
|
|
|
@ -10,4 +10,3 @@ More info:
|
||||||
|
|
||||||
https://github.com/ggerganov/llama.cpp/pull/4484
|
https://github.com/ggerganov/llama.cpp/pull/4484
|
||||||
https://github.com/ggerganov/llama.cpp/issues/4226
|
https://github.com/ggerganov/llama.cpp/issues/4226
|
||||||
|
|
||||||
|
|
1
examples/main-cmake-pkg/.gitignore
vendored
1
examples/main-cmake-pkg/.gitignore
vendored
|
@ -48,4 +48,3 @@
|
||||||
build*/
|
build*/
|
||||||
out/
|
out/
|
||||||
tmp/
|
tmp/
|
||||||
|
|
||||||
|
|
|
@ -30,4 +30,3 @@ target_include_directories(${TARGET} PRIVATE ${_common_path})
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE common llama ${CMAKE_THREAD_LIBS_INIT})
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||||
|
|
||||||
|
|
|
@ -39,12 +39,12 @@ static std::ostringstream * g_output_ss;
|
||||||
static std::vector<llama_token> * g_output_tokens;
|
static std::vector<llama_token> * g_output_tokens;
|
||||||
static bool is_interacting = false;
|
static bool is_interacting = false;
|
||||||
|
|
||||||
static bool file_exists(const std::string &path) {
|
static bool file_exists(const std::string & path) {
|
||||||
std::ifstream f(path.c_str());
|
std::ifstream f(path.c_str());
|
||||||
return f.good();
|
return f.good();
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool file_is_empty(const std::string &path) {
|
static bool file_is_empty(const std::string & path) {
|
||||||
std::ifstream f;
|
std::ifstream f;
|
||||||
f.exceptions(std::ifstream::failbit | std::ifstream::badbit);
|
f.exceptions(std::ifstream::failbit | std::ifstream::badbit);
|
||||||
f.open(path.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
|
f.open(path.c_str(), std::ios::in | std::ios::binary | std::ios::ate);
|
||||||
|
@ -117,6 +117,14 @@ static void llama_log_callback_logTee(ggml_log_level level, const char * text, v
|
||||||
LOG_TEE("%s", text);
|
LOG_TEE("%s", text);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static std::string chat_add_and_format(struct llama_model * model, std::vector<llama_chat_msg> & chat_msgs, std::string role, std::string content) {
|
||||||
|
llama_chat_msg new_msg{role, content};
|
||||||
|
auto formatted = llama_chat_format_single(
|
||||||
|
model, g_params->chat_template, chat_msgs, new_msg, role == "user");
|
||||||
|
chat_msgs.push_back({role, content});
|
||||||
|
return formatted;
|
||||||
|
}
|
||||||
|
|
||||||
int main(int argc, char ** argv) {
|
int main(int argc, char ** argv) {
|
||||||
gpt_params params;
|
gpt_params params;
|
||||||
g_params = ¶ms;
|
g_params = ¶ms;
|
||||||
|
@ -190,6 +198,7 @@ int main(int argc, char ** argv) {
|
||||||
llama_model * model;
|
llama_model * model;
|
||||||
llama_context * ctx;
|
llama_context * ctx;
|
||||||
llama_context * ctx_guidance = NULL;
|
llama_context * ctx_guidance = NULL;
|
||||||
|
std::vector<llama_chat_msg> chat_msgs;
|
||||||
g_model = &model;
|
g_model = &model;
|
||||||
g_ctx = &ctx;
|
g_ctx = &ctx;
|
||||||
|
|
||||||
|
@ -215,6 +224,8 @@ int main(int argc, char ** argv) {
|
||||||
__func__, n_ctx_train, n_ctx);
|
__func__, n_ctx_train, n_ctx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
LOG_TEE("%s: chat template example: %s\n", __func__, llama_chat_format_example(model, params.chat_template).c_str());
|
||||||
|
|
||||||
// print system information
|
// print system information
|
||||||
{
|
{
|
||||||
LOG_TEE("\n");
|
LOG_TEE("\n");
|
||||||
|
@ -249,16 +260,21 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
std::vector<llama_token> embd_inp;
|
std::vector<llama_token> embd_inp;
|
||||||
|
|
||||||
if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {
|
{
|
||||||
LOG("tokenize the prompt\n");
|
auto prompt = (params.conversation && params.enable_chat_template)
|
||||||
embd_inp = ::llama_tokenize(ctx, params.prompt, true, true);
|
? chat_add_and_format(model, chat_msgs, "system", params.prompt) // format the system prompt in conversation mode
|
||||||
} else {
|
: params.prompt;
|
||||||
LOG("use session tokens\n");
|
if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {
|
||||||
embd_inp = session_tokens;
|
LOG("tokenize the prompt\n");
|
||||||
}
|
embd_inp = ::llama_tokenize(ctx, prompt, true, true);
|
||||||
|
} else {
|
||||||
|
LOG("use session tokens\n");
|
||||||
|
embd_inp = session_tokens;
|
||||||
|
}
|
||||||
|
|
||||||
LOG("prompt: \"%s\"\n", log_tostr(params.prompt));
|
LOG("prompt: \"%s\"\n", log_tostr(prompt));
|
||||||
LOG("tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, embd_inp).c_str());
|
LOG("tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, embd_inp).c_str());
|
||||||
|
}
|
||||||
|
|
||||||
// Should not run without any tokens
|
// Should not run without any tokens
|
||||||
if (embd_inp.empty()) {
|
if (embd_inp.empty()) {
|
||||||
|
@ -478,6 +494,7 @@ int main(int argc, char ** argv) {
|
||||||
std::vector<int> input_tokens; g_input_tokens = &input_tokens;
|
std::vector<int> input_tokens; g_input_tokens = &input_tokens;
|
||||||
std::vector<int> output_tokens; g_output_tokens = &output_tokens;
|
std::vector<int> output_tokens; g_output_tokens = &output_tokens;
|
||||||
std::ostringstream output_ss; g_output_ss = &output_ss;
|
std::ostringstream output_ss; g_output_ss = &output_ss;
|
||||||
|
std::ostringstream assistant_ss; // for storing current assistant message, used in conversation mode
|
||||||
|
|
||||||
// the first thing we will do is to output the prompt, so set color accordingly
|
// the first thing we will do is to output the prompt, so set color accordingly
|
||||||
console::set_display(console::prompt);
|
console::set_display(console::prompt);
|
||||||
|
@ -793,11 +810,20 @@ int main(int argc, char ** argv) {
|
||||||
is_antiprompt = true;
|
is_antiprompt = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (params.enable_chat_template) {
|
||||||
|
chat_add_and_format(model, chat_msgs, "assistant", assistant_ss.str());
|
||||||
|
}
|
||||||
is_interacting = true;
|
is_interacting = true;
|
||||||
printf("\n");
|
printf("\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// if current token is not EOG, we add it to current assistant message
|
||||||
|
if (params.conversation) {
|
||||||
|
auto id = llama_sampling_last(ctx_sampling);
|
||||||
|
assistant_ss << llama_token_to_piece(ctx, id, false);
|
||||||
|
}
|
||||||
|
|
||||||
if (n_past > 0 && is_interacting) {
|
if (n_past > 0 && is_interacting) {
|
||||||
LOG("waiting for user input\n");
|
LOG("waiting for user input\n");
|
||||||
|
|
||||||
|
@ -848,8 +874,13 @@ int main(int argc, char ** argv) {
|
||||||
string_process_escapes(buffer);
|
string_process_escapes(buffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool format_chat = params.conversation && params.enable_chat_template;
|
||||||
|
std::string user_inp = format_chat
|
||||||
|
? chat_add_and_format(model, chat_msgs, "user", std::move(buffer))
|
||||||
|
: std::move(buffer);
|
||||||
|
// TODO: one inconvenient of current chat template implementation is that we can't distinguish between user input and special tokens (prefix/postfix)
|
||||||
const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
|
const auto line_pfx = ::llama_tokenize(ctx, params.input_prefix, false, true);
|
||||||
const auto line_inp = ::llama_tokenize(ctx, buffer, false, false);
|
const auto line_inp = ::llama_tokenize(ctx, user_inp, false, format_chat);
|
||||||
const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true);
|
const auto line_sfx = ::llama_tokenize(ctx, params.input_suffix, false, true);
|
||||||
|
|
||||||
LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());
|
LOG("input tokens: %s\n", LOG_TOKENS_TOSTR_PRETTY(ctx, line_inp).c_str());
|
||||||
|
@ -864,6 +895,9 @@ int main(int argc, char ** argv) {
|
||||||
output_ss << llama_token_to_piece(ctx, token);
|
output_ss << llama_token_to_piece(ctx, token);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// reset assistant message
|
||||||
|
assistant_ss.str("");
|
||||||
|
|
||||||
n_remain -= line_inp.size();
|
n_remain -= line_inp.size();
|
||||||
LOG("n_remain: %d\n", n_remain);
|
LOG("n_remain: %d\n", n_remain);
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -1991,6 +1991,12 @@ int main(int argc, char ** argv) {
|
||||||
params.n_batch = std::min(params.n_batch, n_kv);
|
params.n_batch = std::min(params.n_batch, n_kv);
|
||||||
} else {
|
} else {
|
||||||
params.n_batch = std::min(params.n_batch, params.n_ctx);
|
params.n_batch = std::min(params.n_batch, params.n_ctx);
|
||||||
|
if (params.kl_divergence) {
|
||||||
|
params.n_parallel = 1;
|
||||||
|
} else {
|
||||||
|
// ensure there's at least enough seq_ids for HellaSwag
|
||||||
|
params.n_parallel = std::max(4, params.n_parallel);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (params.ppl_stride > 0) {
|
if (params.ppl_stride > 0) {
|
||||||
|
@ -2015,9 +2021,6 @@ int main(int argc, char ** argv) {
|
||||||
llama_model * model;
|
llama_model * model;
|
||||||
llama_context * ctx;
|
llama_context * ctx;
|
||||||
|
|
||||||
// ensure there's at least enough seq_ids for HellaSwag
|
|
||||||
params.n_parallel = std::max(4, params.n_parallel);
|
|
||||||
|
|
||||||
// load the model and apply lora adapter, if any
|
// load the model and apply lora adapter, if any
|
||||||
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
std::tie(model, ctx) = llama_init_from_gpt_params(params);
|
||||||
if (model == NULL) {
|
if (model == NULL) {
|
||||||
|
|
|
@ -16,41 +16,41 @@ struct quant_option {
|
||||||
};
|
};
|
||||||
|
|
||||||
static const std::vector<struct quant_option> QUANT_OPTIONS = {
|
static const std::vector<struct quant_option> QUANT_OPTIONS = {
|
||||||
{ "Q4_0", LLAMA_FTYPE_MOSTLY_Q4_0, " 3.56G, +0.2166 ppl @ LLaMA-v1-7B", },
|
{ "Q4_0", LLAMA_FTYPE_MOSTLY_Q4_0, " 4.34G, +0.4685 ppl @ Llama-3-8B", },
|
||||||
{ "Q4_1", LLAMA_FTYPE_MOSTLY_Q4_1, " 3.90G, +0.1585 ppl @ LLaMA-v1-7B", },
|
{ "Q4_1", LLAMA_FTYPE_MOSTLY_Q4_1, " 4.78G, +0.4511 ppl @ Llama-3-8B", },
|
||||||
{ "Q5_0", LLAMA_FTYPE_MOSTLY_Q5_0, " 4.33G, +0.0683 ppl @ LLaMA-v1-7B", },
|
{ "Q5_0", LLAMA_FTYPE_MOSTLY_Q5_0, " 5.21G, +0.1316 ppl @ Llama-3-8B", },
|
||||||
{ "Q5_1", LLAMA_FTYPE_MOSTLY_Q5_1, " 4.70G, +0.0349 ppl @ LLaMA-v1-7B", },
|
{ "Q5_1", LLAMA_FTYPE_MOSTLY_Q5_1, " 5.65G, +0.1062 ppl @ Llama-3-8B", },
|
||||||
{ "IQ2_XXS",LLAMA_FTYPE_MOSTLY_IQ2_XXS," 2.06 bpw quantization", },
|
{ "IQ2_XXS",LLAMA_FTYPE_MOSTLY_IQ2_XXS," 2.06 bpw quantization", },
|
||||||
{ "IQ2_XS", LLAMA_FTYPE_MOSTLY_IQ2_XS, " 2.31 bpw quantization", },
|
{ "IQ2_XS", LLAMA_FTYPE_MOSTLY_IQ2_XS, " 2.31 bpw quantization", },
|
||||||
{ "IQ2_S", LLAMA_FTYPE_MOSTLY_IQ2_S, " 2.5 bpw quantization", },
|
{ "IQ2_S", LLAMA_FTYPE_MOSTLY_IQ2_S, " 2.5 bpw quantization", },
|
||||||
{ "IQ2_M", LLAMA_FTYPE_MOSTLY_IQ2_M, " 2.7 bpw quantization", },
|
{ "IQ2_M", LLAMA_FTYPE_MOSTLY_IQ2_M, " 2.7 bpw quantization", },
|
||||||
{ "IQ1_S", LLAMA_FTYPE_MOSTLY_IQ1_S, " 1.56 bpw quantization", },
|
{ "IQ1_S", LLAMA_FTYPE_MOSTLY_IQ1_S, " 1.56 bpw quantization", },
|
||||||
{ "IQ1_M", LLAMA_FTYPE_MOSTLY_IQ1_M, " 1.75 bpw quantization", },
|
{ "IQ1_M", LLAMA_FTYPE_MOSTLY_IQ1_M, " 1.75 bpw quantization", },
|
||||||
{ "Q2_K", LLAMA_FTYPE_MOSTLY_Q2_K, " 2.63G, +0.6717 ppl @ LLaMA-v1-7B", },
|
{ "Q2_K", LLAMA_FTYPE_MOSTLY_Q2_K, " 2.96G, +3.5199 ppl @ Llama-3-8B", },
|
||||||
{ "Q2_K_S", LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.16G, +9.0634 ppl @ LLaMA-v1-7B", },
|
{ "Q2_K_S", LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.96G, +3.1836 ppl @ Llama-3-8B", },
|
||||||
{ "IQ3_XXS",LLAMA_FTYPE_MOSTLY_IQ3_XXS," 3.06 bpw quantization", },
|
{ "IQ3_XXS",LLAMA_FTYPE_MOSTLY_IQ3_XXS," 3.06 bpw quantization", },
|
||||||
{ "IQ3_S", LLAMA_FTYPE_MOSTLY_IQ3_S, " 3.44 bpw quantization", },
|
{ "IQ3_S", LLAMA_FTYPE_MOSTLY_IQ3_S, " 3.44 bpw quantization", },
|
||||||
{ "IQ3_M", LLAMA_FTYPE_MOSTLY_IQ3_M, " 3.66 bpw quantization mix", },
|
{ "IQ3_M", LLAMA_FTYPE_MOSTLY_IQ3_M, " 3.66 bpw quantization mix", },
|
||||||
{ "Q3_K", LLAMA_FTYPE_MOSTLY_Q3_K_M, "alias for Q3_K_M" },
|
{ "Q3_K", LLAMA_FTYPE_MOSTLY_Q3_K_M, "alias for Q3_K_M" },
|
||||||
{ "IQ3_XS", LLAMA_FTYPE_MOSTLY_IQ3_XS, " 3.3 bpw quantization" , },
|
{ "IQ3_XS", LLAMA_FTYPE_MOSTLY_IQ3_XS, " 3.3 bpw quantization", },
|
||||||
{ "Q3_K_S", LLAMA_FTYPE_MOSTLY_Q3_K_S, " 2.75G, +0.5551 ppl @ LLaMA-v1-7B", },
|
{ "Q3_K_S", LLAMA_FTYPE_MOSTLY_Q3_K_S, " 3.41G, +1.6321 ppl @ Llama-3-8B", },
|
||||||
{ "Q3_K_M", LLAMA_FTYPE_MOSTLY_Q3_K_M, " 3.07G, +0.2496 ppl @ LLaMA-v1-7B", },
|
{ "Q3_K_M", LLAMA_FTYPE_MOSTLY_Q3_K_M, " 3.74G, +0.6569 ppl @ Llama-3-8B", },
|
||||||
{ "Q3_K_L", LLAMA_FTYPE_MOSTLY_Q3_K_L, " 3.35G, +0.1764 ppl @ LLaMA-v1-7B", },
|
{ "Q3_K_L", LLAMA_FTYPE_MOSTLY_Q3_K_L, " 4.03G, +0.5562 ppl @ Llama-3-8B", },
|
||||||
{ "IQ4_NL", LLAMA_FTYPE_MOSTLY_IQ4_NL, " 4.50 bpw non-linear quantization", },
|
{ "IQ4_NL", LLAMA_FTYPE_MOSTLY_IQ4_NL, " 4.50 bpw non-linear quantization", },
|
||||||
{ "IQ4_XS", LLAMA_FTYPE_MOSTLY_IQ4_XS, " 4.25 bpw non-linear quantization", },
|
{ "IQ4_XS", LLAMA_FTYPE_MOSTLY_IQ4_XS, " 4.25 bpw non-linear quantization", },
|
||||||
{ "Q4_K", LLAMA_FTYPE_MOSTLY_Q4_K_M, "alias for Q4_K_M", },
|
{ "Q4_K", LLAMA_FTYPE_MOSTLY_Q4_K_M, "alias for Q4_K_M", },
|
||||||
{ "Q4_K_S", LLAMA_FTYPE_MOSTLY_Q4_K_S, " 3.59G, +0.0992 ppl @ LLaMA-v1-7B", },
|
{ "Q4_K_S", LLAMA_FTYPE_MOSTLY_Q4_K_S, " 4.37G, +0.2689 ppl @ Llama-3-8B", },
|
||||||
{ "Q4_K_M", LLAMA_FTYPE_MOSTLY_Q4_K_M, " 3.80G, +0.0532 ppl @ LLaMA-v1-7B", },
|
{ "Q4_K_M", LLAMA_FTYPE_MOSTLY_Q4_K_M, " 4.58G, +0.1754 ppl @ Llama-3-8B", },
|
||||||
{ "Q5_K", LLAMA_FTYPE_MOSTLY_Q5_K_M, "alias for Q5_K_M", },
|
{ "Q5_K", LLAMA_FTYPE_MOSTLY_Q5_K_M, "alias for Q5_K_M", },
|
||||||
{ "Q5_K_S", LLAMA_FTYPE_MOSTLY_Q5_K_S, " 4.33G, +0.0400 ppl @ LLaMA-v1-7B", },
|
{ "Q5_K_S", LLAMA_FTYPE_MOSTLY_Q5_K_S, " 5.21G, +0.1049 ppl @ Llama-3-8B", },
|
||||||
{ "Q5_K_M", LLAMA_FTYPE_MOSTLY_Q5_K_M, " 4.45G, +0.0122 ppl @ LLaMA-v1-7B", },
|
{ "Q5_K_M", LLAMA_FTYPE_MOSTLY_Q5_K_M, " 5.33G, +0.0569 ppl @ Llama-3-8B", },
|
||||||
{ "Q6_K", LLAMA_FTYPE_MOSTLY_Q6_K, " 5.15G, +0.0008 ppl @ LLaMA-v1-7B", },
|
{ "Q6_K", LLAMA_FTYPE_MOSTLY_Q6_K, " 6.14G, +0.0217 ppl @ Llama-3-8B", },
|
||||||
{ "Q8_0", LLAMA_FTYPE_MOSTLY_Q8_0, " 6.70G, +0.0004 ppl @ LLaMA-v1-7B", },
|
{ "Q8_0", LLAMA_FTYPE_MOSTLY_Q8_0, " 7.96G, +0.0026 ppl @ Llama-3-8B", },
|
||||||
{ "F16", LLAMA_FTYPE_MOSTLY_F16, "14.00G, -0.0020 ppl @ Mistral-7B", },
|
{ "F16", LLAMA_FTYPE_MOSTLY_F16, "14.00G, +0.0020 ppl @ Mistral-7B", },
|
||||||
{ "BF16", LLAMA_FTYPE_MOSTLY_BF16, "14.00G, -0.0050 ppl @ Mistral-7B", },
|
{ "BF16", LLAMA_FTYPE_MOSTLY_BF16, "14.00G, -0.0050 ppl @ Mistral-7B", },
|
||||||
{ "F32", LLAMA_FTYPE_ALL_F32, "26.00G @ 7B", },
|
{ "F32", LLAMA_FTYPE_ALL_F32, "26.00G @ 7B", },
|
||||||
// Note: Ensure COPY comes after F32 to avoid ftype 0 from matching.
|
// Note: Ensure COPY comes after F32 to avoid ftype 0 from matching.
|
||||||
{ "COPY", LLAMA_FTYPE_ALL_F32, "only copy tensors, no quantizing", },
|
{ "COPY", LLAMA_FTYPE_ALL_F32, "only copy tensors, no quantizing", },
|
||||||
};
|
};
|
||||||
|
|
||||||
static const char * const LLM_KV_QUANTIZE_IMATRIX_FILE = "quantize.imatrix.file";
|
static const char * const LLM_KV_QUANTIZE_IMATRIX_FILE = "quantize.imatrix.file";
|
||||||
|
|
|
@ -73,9 +73,10 @@ static std::vector<chunk> chunk_file(const std::string & filename, int chunk_siz
|
||||||
return chunks;
|
return chunks;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void batch_add_seq(llama_batch & batch, const std::vector<int32_t> & tokens, int seq_id) {
|
static void batch_add_seq(llama_batch & batch, const std::vector<int32_t> & tokens, llama_seq_id seq_id) {
|
||||||
for (size_t i = 0; i < tokens.size(); i++) {
|
size_t n_tokens = tokens.size();
|
||||||
llama_batch_add(batch, tokens[i], i, { seq_id }, i == tokens.size() - 1);
|
for (size_t i = 0; i < n_tokens; i++) {
|
||||||
|
llama_batch_add(batch, tokens[i], i, { seq_id }, true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -160,6 +161,12 @@ int main(int argc, char ** argv) {
|
||||||
const int n_ctx_train = llama_n_ctx_train(model);
|
const int n_ctx_train = llama_n_ctx_train(model);
|
||||||
const int n_ctx = llama_n_ctx(ctx);
|
const int n_ctx = llama_n_ctx(ctx);
|
||||||
|
|
||||||
|
const enum llama_pooling_type pooling_type = llama_pooling_type(ctx);
|
||||||
|
if (pooling_type == LLAMA_POOLING_TYPE_NONE) {
|
||||||
|
fprintf(stderr, "%s: error: pooling type NONE not supported\n", __func__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
if (n_ctx > n_ctx_train) {
|
if (n_ctx > n_ctx_train) {
|
||||||
fprintf(stderr, "%s: warning: model was trained on only %d context tokens (%d specified)\n",
|
fprintf(stderr, "%s: warning: model was trained on only %d context tokens (%d specified)\n",
|
||||||
__func__, n_ctx_train, n_ctx);
|
__func__, n_ctx_train, n_ctx);
|
||||||
|
|
|
@ -29,13 +29,13 @@ You can also run multiple `rpc-server` instances on the same host, each with a d
|
||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
On each host, build the corresponding backend with `cmake` and add `-DLLAMA_RPC=ON` to the build options.
|
On each host, build the corresponding backend with `cmake` and add `-DGGML_RPC=ON` to the build options.
|
||||||
For example, to build the CUDA backend with RPC support:
|
For example, to build the CUDA backend with RPC support:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
mkdir build-rpc-cuda
|
mkdir build-rpc-cuda
|
||||||
cd build-rpc-cuda
|
cd build-rpc-cuda
|
||||||
cmake .. -DLLAMA_CUDA=ON -DLLAMA_RPC=ON
|
cmake .. -DGGML_CUDA=ON -DGGML_RPC=ON
|
||||||
cmake --build . --config Release
|
cmake --build . --config Release
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -58,12 +58,12 @@ $ CUDA_VISIBLE_DEVICES=0 bin/rpc-server -p 50052
|
||||||
This way you can run multiple `rpc-server` instances on the same host, each with a different CUDA device.
|
This way you can run multiple `rpc-server` instances on the same host, each with a different CUDA device.
|
||||||
|
|
||||||
|
|
||||||
On the main host build `llama.cpp` only with `-DLLAMA_RPC=ON`:
|
On the main host build `llama.cpp` only with `-DGGML_RPC=ON`:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
mkdir build-rpc
|
mkdir build-rpc
|
||||||
cd build-rpc
|
cd build-rpc
|
||||||
cmake .. -DLLAMA_RPC=ON
|
cmake .. -DGGML_RPC=ON
|
||||||
cmake --build . --config Release
|
cmake --build . --config Release
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
|
@ -31,4 +31,3 @@ for i in range(n-1):
|
||||||
embedding2 = np.array(result[j])
|
embedding2 = np.array(result[j])
|
||||||
similarity = np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
|
similarity = np.dot(embedding1, embedding2) / (np.linalg.norm(embedding1) * np.linalg.norm(embedding2))
|
||||||
print(f"Similarity between {i} and {j}: {similarity:.2f}")
|
print(f"Similarity between {i} and {j}: {similarity:.2f}")
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,14 @@
|
||||||
set(TARGET llama-server)
|
set(TARGET llama-server)
|
||||||
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
|
||||||
option(LLAMA_SERVER_SSL "Build SSL support for the server" OFF)
|
option(LLAMA_SERVER_SSL "Build SSL support for the server" OFF)
|
||||||
|
|
||||||
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
|
||||||
|
|
||||||
|
if (MINGW)
|
||||||
|
# fix: https://github.com/ggerganov/llama.cpp/actions/runs/9651004652/job/26617901362?pr=8006
|
||||||
|
add_compile_definitions(_WIN32_WINNT=${GGML_WIN_VER})
|
||||||
|
endif()
|
||||||
|
|
||||||
set(TARGET_SRCS
|
set(TARGET_SRCS
|
||||||
server.cpp
|
server.cpp
|
||||||
utils.hpp
|
utils.hpp
|
||||||
|
@ -24,6 +31,7 @@ set(PUBLIC_ASSETS
|
||||||
prompt-formats.js
|
prompt-formats.js
|
||||||
json-schema-to-grammar.mjs
|
json-schema-to-grammar.mjs
|
||||||
)
|
)
|
||||||
|
|
||||||
foreach(asset ${PUBLIC_ASSETS})
|
foreach(asset ${PUBLIC_ASSETS})
|
||||||
set(input "${CMAKE_CURRENT_SOURCE_DIR}/public/${asset}")
|
set(input "${CMAKE_CURRENT_SOURCE_DIR}/public/${asset}")
|
||||||
set(output "${CMAKE_CURRENT_BINARY_DIR}/${asset}.hpp")
|
set(output "${CMAKE_CURRENT_BINARY_DIR}/${asset}.hpp")
|
||||||
|
@ -34,18 +42,23 @@ foreach(asset ${PUBLIC_ASSETS})
|
||||||
COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
|
COMMAND "${CMAKE_COMMAND}" "-DINPUT=${input}" "-DOUTPUT=${output}" -P "${PROJECT_SOURCE_DIR}/scripts/xxd.cmake"
|
||||||
)
|
)
|
||||||
endforeach()
|
endforeach()
|
||||||
|
|
||||||
add_executable(${TARGET} ${TARGET_SRCS})
|
add_executable(${TARGET} ${TARGET_SRCS})
|
||||||
install(TARGETS ${TARGET} RUNTIME)
|
install(TARGETS ${TARGET} RUNTIME)
|
||||||
target_compile_definitions(${TARGET} PRIVATE
|
target_compile_definitions(${TARGET} PRIVATE
|
||||||
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
|
||||||
)
|
)
|
||||||
|
|
||||||
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
|
target_link_libraries(${TARGET} PRIVATE common ${CMAKE_THREAD_LIBS_INIT})
|
||||||
|
|
||||||
if (LLAMA_SERVER_SSL)
|
if (LLAMA_SERVER_SSL)
|
||||||
find_package(OpenSSL REQUIRED)
|
find_package(OpenSSL REQUIRED)
|
||||||
target_link_libraries(${TARGET} PRIVATE OpenSSL::SSL OpenSSL::Crypto)
|
target_link_libraries(${TARGET} PRIVATE OpenSSL::SSL OpenSSL::Crypto)
|
||||||
target_compile_definitions(${TARGET} PRIVATE CPPHTTPLIB_OPENSSL_SUPPORT)
|
target_compile_definitions(${TARGET} PRIVATE CPPHTTPLIB_OPENSSL_SUPPORT)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (WIN32)
|
if (WIN32)
|
||||||
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
|
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
target_compile_features(${TARGET} PRIVATE cxx_std_11)
|
||||||
|
|
|
@ -73,6 +73,7 @@ The project is under active development, and we are [looking for feedback and co
|
||||||
- `-fa`, `--flash-attn` : enable flash attention (default: disabled).
|
- `-fa`, `--flash-attn` : enable flash attention (default: disabled).
|
||||||
- `-ctk TYPE`, `--cache-type-k TYPE` : KV cache data type for K (default: `f16`, options `f32`, `f16`, `q8_0`, `q4_0`, `q4_1`, `iq4_nl`, `q5_0`, or `q5_1`)
|
- `-ctk TYPE`, `--cache-type-k TYPE` : KV cache data type for K (default: `f16`, options `f32`, `f16`, `q8_0`, `q4_0`, `q4_1`, `iq4_nl`, `q5_0`, or `q5_1`)
|
||||||
- `-ctv TYPE`, `--cache-type-v TYPE` : KV cache type for V (default `f16`, see `-ctk` for options)
|
- `-ctv TYPE`, `--cache-type-v TYPE` : KV cache type for V (default `f16`, see `-ctk` for options)
|
||||||
|
- `--spm-infill` : Use Suffix/Prefix/Middle pattern for infill (instead of Prefix/Suffix/Middle) as some models prefer this.
|
||||||
|
|
||||||
**If compiled with `LLAMA_SERVER_SSL=ON`**
|
**If compiled with `LLAMA_SERVER_SSL=ON`**
|
||||||
- `--ssl-key-file FNAME`: path to file a PEM-encoded SSL private key
|
- `--ssl-key-file FNAME`: path to file a PEM-encoded SSL private key
|
||||||
|
|
|
@ -634,12 +634,12 @@ return html`
|
||||||
<div>
|
<div>
|
||||||
<div class="grammar">
|
<div class="grammar">
|
||||||
<label for="template"></label>
|
<label for="template"></label>
|
||||||
<textarea id="grammar" name="grammar" placeholder="Use GBNF or JSON-Scheme + Converter" value="${params.value.grammar}" rows=4 oninput=${updateParams}/>
|
<textarea id="grammar" name="grammar" placeholder="Use GBNF or JSON Schema + Converter" value="${params.value.grammar}" rows=4 oninput=${updateParams}/>
|
||||||
</div>
|
</div>
|
||||||
<div class="grammar-columns">
|
<div class="grammar-columns">
|
||||||
<div class="json-schema-controls">
|
<div class="json-schema-controls">
|
||||||
<input type="text" name="prop-order" placeholder="Order: prop1,prop2,prop3" oninput=${updateGrammarJsonSchemaPropOrder} />
|
<input type="text" name="prop-order" placeholder="Order: prop1,prop2,prop3" oninput=${updateGrammarJsonSchemaPropOrder} />
|
||||||
<button type="button" class="button-grammar" onclick=${convertJSONSchemaGrammar}>Convert JSON-Scheme</button>
|
<button type="button" class="button-grammar" onclick=${convertJSONSchemaGrammar}>Convert JSON Schema</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
|
@ -24,6 +24,201 @@ function _buildRepetition(itemRule, minItems, maxItems, opts={}) {
|
||||||
return minItems === 0 ? `(${result})?` : result;
|
return minItems === 0 ? `(${result})?` : result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function _generateMinMaxInt(minValue, maxValue, out, decimalsLeft = 16, topLevel = true) {
|
||||||
|
const hasMin = minValue !== null;
|
||||||
|
const hasMax = maxValue !== null;
|
||||||
|
|
||||||
|
function digitRange(fromChar, toChar) {
|
||||||
|
out.push("[");
|
||||||
|
if (fromChar === toChar) {
|
||||||
|
out.push(fromChar);
|
||||||
|
} else {
|
||||||
|
out.push(fromChar);
|
||||||
|
out.push("-");
|
||||||
|
out.push(toChar);
|
||||||
|
}
|
||||||
|
out.push("]");
|
||||||
|
}
|
||||||
|
|
||||||
|
function moreDigits(minDigits, maxDigits) {
|
||||||
|
out.push("[0-9]");
|
||||||
|
if (minDigits === maxDigits && minDigits === 1) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
out.push("{");
|
||||||
|
out.push(minDigits.toString());
|
||||||
|
if (maxDigits !== minDigits) {
|
||||||
|
out.push(",");
|
||||||
|
if (maxDigits !== Number.MAX_SAFE_INTEGER) {
|
||||||
|
out.push(maxDigits.toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
out.push("}");
|
||||||
|
}
|
||||||
|
|
||||||
|
function uniformRange(fromStr, toStr) {
|
||||||
|
let i = 0;
|
||||||
|
while (i < fromStr.length && fromStr[i] === toStr[i]) {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
if (i > 0) {
|
||||||
|
out.push("\"");
|
||||||
|
out.push(fromStr.slice(0, i));
|
||||||
|
out.push("\"");
|
||||||
|
}
|
||||||
|
if (i < fromStr.length) {
|
||||||
|
if (i > 0) {
|
||||||
|
out.push(" ");
|
||||||
|
}
|
||||||
|
const subLen = fromStr.length - i - 1;
|
||||||
|
if (subLen > 0) {
|
||||||
|
const fromSub = fromStr.slice(i + 1);
|
||||||
|
const toSub = toStr.slice(i + 1);
|
||||||
|
const subZeros = "0".repeat(subLen);
|
||||||
|
const subNines = "9".repeat(subLen);
|
||||||
|
|
||||||
|
let toReached = false;
|
||||||
|
out.push("(");
|
||||||
|
if (fromSub === subZeros) {
|
||||||
|
digitRange(fromStr[i], String.fromCharCode(toStr.charCodeAt(i) - 1));
|
||||||
|
out.push(" ");
|
||||||
|
moreDigits(subLen, subLen);
|
||||||
|
} else {
|
||||||
|
out.push("[");
|
||||||
|
out.push(fromStr[i]);
|
||||||
|
out.push("] ");
|
||||||
|
out.push("(");
|
||||||
|
uniformRange(fromSub, subNines);
|
||||||
|
out.push(")");
|
||||||
|
if (fromStr.charCodeAt(i) < toStr.charCodeAt(i) - 1) {
|
||||||
|
out.push(" | ");
|
||||||
|
if (toSub === subNines) {
|
||||||
|
digitRange(String.fromCharCode(fromStr.charCodeAt(i) + 1), toStr[i]);
|
||||||
|
toReached = true;
|
||||||
|
} else {
|
||||||
|
digitRange(String.fromCharCode(fromStr.charCodeAt(i) + 1), String.fromCharCode(toStr.charCodeAt(i) - 1));
|
||||||
|
}
|
||||||
|
out.push(" ");
|
||||||
|
moreDigits(subLen, subLen);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!toReached) {
|
||||||
|
out.push(" | ");
|
||||||
|
digitRange(toStr[i], toStr[i]);
|
||||||
|
out.push(" ");
|
||||||
|
uniformRange(subZeros, toSub);
|
||||||
|
}
|
||||||
|
out.push(")");
|
||||||
|
} else {
|
||||||
|
out.push("[");
|
||||||
|
out.push(fromStr[i]);
|
||||||
|
out.push("-");
|
||||||
|
out.push(toStr[i]);
|
||||||
|
out.push("]");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasMin && hasMax) {
|
||||||
|
if (minValue < 0 && maxValue < 0) {
|
||||||
|
out.push("\"-\" (");
|
||||||
|
_generateMinMaxInt(-maxValue, -minValue, out, decimalsLeft, true);
|
||||||
|
out.push(")");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (minValue < 0) {
|
||||||
|
out.push("\"-\" (");
|
||||||
|
_generateMinMaxInt(0, -minValue, out, decimalsLeft, true);
|
||||||
|
out.push(") | ");
|
||||||
|
minValue = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
let minS = minValue.toString();
|
||||||
|
const maxS = maxValue.toString();
|
||||||
|
const minDigits = minS.length;
|
||||||
|
const maxDigits = maxS.length;
|
||||||
|
|
||||||
|
for (let digits = minDigits; digits < maxDigits; digits++) {
|
||||||
|
uniformRange(minS, "9".repeat(digits));
|
||||||
|
minS = "1" + "0".repeat(digits);
|
||||||
|
out.push(" | ");
|
||||||
|
}
|
||||||
|
uniformRange(minS, maxS);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const lessDecimals = Math.max(decimalsLeft - 1, 1);
|
||||||
|
|
||||||
|
if (hasMin) {
|
||||||
|
if (minValue < 0) {
|
||||||
|
out.push("\"-\" (");
|
||||||
|
_generateMinMaxInt(null, -minValue, out, decimalsLeft, false);
|
||||||
|
out.push(") | [0] | [1-9] ");
|
||||||
|
moreDigits(0, decimalsLeft - 1);
|
||||||
|
} else if (minValue === 0) {
|
||||||
|
if (topLevel) {
|
||||||
|
out.push("[0] | [1-9] ");
|
||||||
|
moreDigits(0, lessDecimals);
|
||||||
|
} else {
|
||||||
|
moreDigits(1, decimalsLeft);
|
||||||
|
}
|
||||||
|
} else if (minValue <= 9) {
|
||||||
|
const c = minValue.toString();
|
||||||
|
const range_start = topLevel ? '1' : '0';
|
||||||
|
if (c > range_start) {
|
||||||
|
digitRange(range_start, String.fromCharCode(c.charCodeAt(0) - 1));
|
||||||
|
out.push(" ");
|
||||||
|
moreDigits(1, lessDecimals);
|
||||||
|
out.push(" | ");
|
||||||
|
}
|
||||||
|
digitRange(c, "9");
|
||||||
|
out.push(" ");
|
||||||
|
moreDigits(0, lessDecimals);
|
||||||
|
} else {
|
||||||
|
const minS = minValue.toString();
|
||||||
|
const length = minS.length;
|
||||||
|
const c = minS[0];
|
||||||
|
|
||||||
|
if (c > "1") {
|
||||||
|
digitRange(topLevel ? "1" : "0", String.fromCharCode(c.charCodeAt(0) - 1));
|
||||||
|
out.push(" ");
|
||||||
|
moreDigits(length, lessDecimals);
|
||||||
|
out.push(" | ");
|
||||||
|
}
|
||||||
|
digitRange(c, c);
|
||||||
|
out.push(" (");
|
||||||
|
_generateMinMaxInt(parseInt(minS.slice(1)), null, out, lessDecimals, false);
|
||||||
|
out.push(")");
|
||||||
|
if (c < "9") {
|
||||||
|
out.push(" | ");
|
||||||
|
digitRange(String.fromCharCode(c.charCodeAt(0) + 1), "9");
|
||||||
|
out.push(" ");
|
||||||
|
moreDigits(length - 1, lessDecimals);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (hasMax) {
|
||||||
|
if (maxValue >= 0) {
|
||||||
|
if (topLevel) {
|
||||||
|
out.push("\"-\" [1-9] ");
|
||||||
|
moreDigits(0, lessDecimals);
|
||||||
|
out.push(" | ");
|
||||||
|
}
|
||||||
|
_generateMinMaxInt(0, maxValue, out, decimalsLeft, true);
|
||||||
|
} else {
|
||||||
|
out.push("\"-\" (");
|
||||||
|
_generateMinMaxInt(-maxValue, null, out, decimalsLeft, false);
|
||||||
|
out.push(")");
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error("At least one of minValue or maxValue must be set");
|
||||||
|
}
|
||||||
|
|
||||||
class BuiltinRule {
|
class BuiltinRule {
|
||||||
constructor(content, deps) {
|
constructor(content, deps) {
|
||||||
this.content = content;
|
this.content = content;
|
||||||
|
@ -64,7 +259,7 @@ const GRAMMAR_RANGE_LITERAL_ESCAPE_RE = /[\n\r"\]\-\\]/g;
|
||||||
const GRAMMAR_LITERAL_ESCAPES = { '\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']': '\\]' };
|
const GRAMMAR_LITERAL_ESCAPES = { '\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']': '\\]' };
|
||||||
|
|
||||||
const NON_LITERAL_SET = new Set('|.()[]{}*+?');
|
const NON_LITERAL_SET = new Set('|.()[]{}*+?');
|
||||||
const ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = new Set('[]()|{}*+?');
|
const ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = new Set('^$.[]()|{}*+?');
|
||||||
|
|
||||||
export class SchemaConverter {
|
export class SchemaConverter {
|
||||||
constructor(options) {
|
constructor(options) {
|
||||||
|
@ -337,6 +532,64 @@ export class SchemaConverter {
|
||||||
return this._addRule(name, "\"\\\"\" " + toRule(transform()) + " \"\\\"\" space")
|
return this._addRule(name, "\"\\\"\" " + toRule(transform()) + " \"\\\"\" space")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
_notStrings(strings) {
|
||||||
|
class TrieNode {
|
||||||
|
constructor() {
|
||||||
|
this.children = {};
|
||||||
|
this.isEndOfString = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
insert(str) {
|
||||||
|
let node = this;
|
||||||
|
for (const c of str) {
|
||||||
|
node = node.children[c] = node.children[c] || new TrieNode();
|
||||||
|
}
|
||||||
|
node.isEndOfString = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const trie = new TrieNode();
|
||||||
|
for (const s of strings) {
|
||||||
|
trie.insert(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
const charRuleName = this._addPrimitive('char', PRIMITIVE_RULES['char']);
|
||||||
|
const out = ['["] ( '];
|
||||||
|
|
||||||
|
const visit = (node) => {
|
||||||
|
const rejects = [];
|
||||||
|
let first = true;
|
||||||
|
for (const c of Object.keys(node.children).sort()) {
|
||||||
|
const child = node.children[c];
|
||||||
|
rejects.push(c);
|
||||||
|
if (first) {
|
||||||
|
first = false;
|
||||||
|
} else {
|
||||||
|
out.push(' | ');
|
||||||
|
}
|
||||||
|
out.push(`[${c}]`);
|
||||||
|
if (Object.keys(child.children).length > 0) {
|
||||||
|
out.push(' (');
|
||||||
|
visit(child);
|
||||||
|
out.push(')');
|
||||||
|
} else if (child.isEndOfString) {
|
||||||
|
out.push(` ${charRuleName}+`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (Object.keys(node.children).length > 0) {
|
||||||
|
if (!first) {
|
||||||
|
out.push(' | ');
|
||||||
|
}
|
||||||
|
out.push(`[^"${rejects.join('')}] ${charRuleName}*`);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
visit(trie);
|
||||||
|
|
||||||
|
out.push(` )${trie.isEndOfString ? '' : '?'} ["] space`);
|
||||||
|
return out.join('');
|
||||||
|
}
|
||||||
|
|
||||||
_resolveRef(ref) {
|
_resolveRef(ref) {
|
||||||
let refName = ref.split('/').pop();
|
let refName = ref.split('/').pop();
|
||||||
if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
|
if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
|
||||||
|
@ -363,11 +616,11 @@ export class SchemaConverter {
|
||||||
} else if (schema.oneOf || schema.anyOf) {
|
} else if (schema.oneOf || schema.anyOf) {
|
||||||
return this._addRule(ruleName, this._generateUnionRule(name, schema.oneOf || schema.anyOf));
|
return this._addRule(ruleName, this._generateUnionRule(name, schema.oneOf || schema.anyOf));
|
||||||
} else if (Array.isArray(schemaType)) {
|
} else if (Array.isArray(schemaType)) {
|
||||||
return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({ type: t }))));
|
return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({...schema, type: t}))));
|
||||||
} else if ('const' in schema) {
|
} else if ('const' in schema) {
|
||||||
return this._addRule(ruleName, this._generateConstantRule(schema.const));
|
return this._addRule(ruleName, this._generateConstantRule(schema.const) + ' space');
|
||||||
} else if ('enum' in schema) {
|
} else if ('enum' in schema) {
|
||||||
const rule = schema.enum.map(v => this._generateConstantRule(v)).join(' | ');
|
const rule = '(' + schema.enum.map(v => this._generateConstantRule(v)).join(' | ') + ') space';
|
||||||
return this._addRule(ruleName, rule);
|
return this._addRule(ruleName, rule);
|
||||||
} else if ((schemaType === undefined || schemaType === 'object') &&
|
} else if ((schemaType === undefined || schemaType === 'object') &&
|
||||||
('properties' in schema ||
|
('properties' in schema ||
|
||||||
|
@ -404,7 +657,7 @@ export class SchemaConverter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return this._addRule(ruleName, this._buildObjectRule(properties, required, name, /* additionalProperties= */ false));
|
return this._addRule(ruleName, this._buildObjectRule(properties, required, name, null));
|
||||||
} else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) {
|
} else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) {
|
||||||
const items = schema.items ?? schema.prefixItems;
|
const items = schema.items ?? schema.prefixItems;
|
||||||
if (Array.isArray(items)) {
|
if (Array.isArray(items)) {
|
||||||
|
@ -435,6 +688,24 @@ export class SchemaConverter {
|
||||||
const minLen = schema.minLength || 0;
|
const minLen = schema.minLength || 0;
|
||||||
const maxLen = schema.maxLength;
|
const maxLen = schema.maxLength;
|
||||||
return this._addRule(ruleName, '"\\\"" ' + _buildRepetition(charRuleName, minLen, maxLen) + ' "\\\"" space');
|
return this._addRule(ruleName, '"\\\"" ' + _buildRepetition(charRuleName, minLen, maxLen) + ' "\\\"" space');
|
||||||
|
} else if (schemaType === 'integer' && ('minimum' in schema || 'exclusiveMinimum' in schema || 'maximum' in schema || 'exclusiveMaximum' in schema)) {
|
||||||
|
let minValue = null;
|
||||||
|
let maxValue = null;
|
||||||
|
if ('minimum' in schema) {
|
||||||
|
minValue = schema.minimum;
|
||||||
|
} else if ('exclusiveMinimum' in schema) {
|
||||||
|
minValue = schema.exclusiveMinimum + 1;
|
||||||
|
}
|
||||||
|
if ('maximum' in schema) {
|
||||||
|
maxValue = schema.maximum;
|
||||||
|
} else if ('exclusiveMaximum' in schema) {
|
||||||
|
maxValue = schema.exclusiveMaximum - 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
const out = ["("];
|
||||||
|
_generateMinMaxInt(minValue, maxValue, out);
|
||||||
|
out.push(") space");
|
||||||
|
return this._addRule(ruleName, out.join(''));
|
||||||
} else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
|
} else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) {
|
||||||
return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object']));
|
return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object']));
|
||||||
} else {
|
} else {
|
||||||
|
@ -480,12 +751,19 @@ export class SchemaConverter {
|
||||||
const requiredProps = sortedProps.filter(k => required.has(k));
|
const requiredProps = sortedProps.filter(k => required.has(k));
|
||||||
const optionalProps = sortedProps.filter(k => !required.has(k));
|
const optionalProps = sortedProps.filter(k => !required.has(k));
|
||||||
|
|
||||||
if (typeof additionalProperties === 'object' || additionalProperties === true) {
|
if (additionalProperties) {
|
||||||
const subName = `${name ?? ''}${name ? '-' : ''}additional`;
|
const subName = `${name ?? ''}${name ? '-' : ''}additional`;
|
||||||
const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`);
|
const valueRule =
|
||||||
|
additionalProperties != null && typeof additionalProperties === 'object' ? this.visit(additionalProperties, `${subName}-value`)
|
||||||
|
: this._addPrimitive('value', PRIMITIVE_RULES['value']);
|
||||||
|
|
||||||
|
const key_rule =
|
||||||
|
sortedProps.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string'])
|
||||||
|
: this._addRule(`${subName}-k`, this._notStrings(sortedProps));
|
||||||
|
|
||||||
propKvRuleNames['*'] = this._addRule(
|
propKvRuleNames['*'] = this._addRule(
|
||||||
`${subName}-kv`,
|
`${subName}-kv`,
|
||||||
`${this._addPrimitive('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`);
|
`${key_rule} ":" space ${valueRule}`);
|
||||||
optionalProps.push('*');
|
optionalProps.push('*');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -502,15 +780,11 @@ export class SchemaConverter {
|
||||||
const [k, ...rest] = ks;
|
const [k, ...rest] = ks;
|
||||||
const kvRuleName = propKvRuleNames[k];
|
const kvRuleName = propKvRuleNames[k];
|
||||||
let res;
|
let res;
|
||||||
if (k === '*') {
|
const commaRef = `( "," space ${kvRuleName} )`;
|
||||||
res = this._addRule(
|
if (firstIsOptional) {
|
||||||
`${name ?? ''}${name ? '-' : ''}additional-kvs`,
|
res = commaRef + (k === '*' ? '*' : '?');
|
||||||
`${kvRuleName} ( "," space ` + kvRuleName + ` )*`
|
|
||||||
)
|
|
||||||
} else if (firstIsOptional) {
|
|
||||||
res = `( "," space ${kvRuleName} )?`;
|
|
||||||
} else {
|
} else {
|
||||||
res = kvRuleName;
|
res = kvRuleName + (k === '*' ? ' ' + commaRef + '*' : '');
|
||||||
}
|
}
|
||||||
if (rest.length > 0) {
|
if (rest.length > 0) {
|
||||||
res += ' ' + this._addRule(
|
res += ' ' + this._addRule(
|
||||||
|
|
|
@ -3,6 +3,13 @@
|
||||||
|
|
||||||
by Humans for All.
|
by Humans for All.
|
||||||
|
|
||||||
|
## quickstart
|
||||||
|
|
||||||
|
To run from the build dir
|
||||||
|
|
||||||
|
bin/llama-server -m path/model.gguf --path ../examples/server/public_simplechat
|
||||||
|
|
||||||
|
Continue reading for the details.
|
||||||
|
|
||||||
## overview
|
## overview
|
||||||
|
|
||||||
|
@ -14,6 +21,8 @@ own system prompts.
|
||||||
This allows seeing the generated text / ai-model response in oneshot at the end, after it is fully generated,
|
This allows seeing the generated text / ai-model response in oneshot at the end, after it is fully generated,
|
||||||
or potentially as it is being generated, in a streamed manner from the server/ai-model.
|
or potentially as it is being generated, in a streamed manner from the server/ai-model.
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
Auto saves the chat session locally as and when the chat is progressing and inturn at a later time when you
|
Auto saves the chat session locally as and when the chat is progressing and inturn at a later time when you
|
||||||
open SimpleChat, option is provided to restore the old chat session, if a matching one exists.
|
open SimpleChat, option is provided to restore the old chat session, if a matching one exists.
|
||||||
|
|
||||||
|
@ -170,17 +179,23 @@ It is attached to the document object. Some of these can also be updated using t
|
||||||
The histogram/freq based trimming logic is currently tuned for english language wrt its
|
The histogram/freq based trimming logic is currently tuned for english language wrt its
|
||||||
is-it-a-alpabetic|numeral-char regex match logic.
|
is-it-a-alpabetic|numeral-char regex match logic.
|
||||||
|
|
||||||
chatRequestOptions - maintains the list of options/fields to send along with chat request,
|
apiRequestOptions - maintains the list of options/fields to send along with api request,
|
||||||
irrespective of whether /chat/completions or /completions endpoint.
|
irrespective of whether /chat/completions or /completions endpoint.
|
||||||
|
|
||||||
If you want to add additional options/fields to send to the server/ai-model, and or
|
If you want to add additional options/fields to send to the server/ai-model, and or
|
||||||
modify the existing options value or remove them, for now you can update this global var
|
modify the existing options value or remove them, for now you can update this global var
|
||||||
using browser's development-tools/console.
|
using browser's development-tools/console.
|
||||||
|
|
||||||
For string and numeric fields in chatRequestOptions, including even those added by a user
|
For string, numeric and boolean fields in apiRequestOptions, including even those added by a
|
||||||
at runtime by directly modifying gMe.chatRequestOptions, setting ui entries will be auto
|
user at runtime by directly modifying gMe.apiRequestOptions, setting ui entries will be auto
|
||||||
created.
|
created.
|
||||||
|
|
||||||
|
cache_prompt option supported by example/server is allowed to be controlled by user, so that
|
||||||
|
any caching supported wrt system-prompt and chat history, if usable can get used. When chat
|
||||||
|
history sliding window is enabled, cache_prompt logic may or may not kick in at the backend
|
||||||
|
wrt same, based on aspects related to model, positional encoding, attention mechanism etal.
|
||||||
|
However system prompt should ideally get the benefit of caching.
|
||||||
|
|
||||||
headers - maintains the list of http headers sent when request is made to the server. By default
|
headers - maintains the list of http headers sent when request is made to the server. By default
|
||||||
Content-Type is set to application/json. Additionally Authorization entry is provided, which can
|
Content-Type is set to application/json. Additionally Authorization entry is provided, which can
|
||||||
be set if needed using the settings ui.
|
be set if needed using the settings ui.
|
||||||
|
@ -197,10 +212,10 @@ It is attached to the document object. Some of these can also be updated using t
|
||||||
>0 : Send the latest chat history from the latest system prompt, limited to specified cnt.
|
>0 : Send the latest chat history from the latest system prompt, limited to specified cnt.
|
||||||
|
|
||||||
|
|
||||||
By using gMe's iRecentUserMsgCnt and chatRequestOptions.max_tokens one can try to control the
|
By using gMe's iRecentUserMsgCnt and apiRequestOptions.max_tokens/n_predict one can try to control
|
||||||
implications of loading of the ai-model's context window by chat history, wrt chat response to
|
the implications of loading of the ai-model's context window by chat history, wrt chat response to
|
||||||
some extent in a simple crude way. You may also want to control the context size enabled when
|
some extent in a simple crude way. You may also want to control the context size enabled when the
|
||||||
the server loads ai-model, on the server end.
|
server loads ai-model, on the server end.
|
||||||
|
|
||||||
|
|
||||||
Sometimes the browser may be stuborn with caching of the file, so your updates to html/css/js
|
Sometimes the browser may be stuborn with caching of the file, so your updates to html/css/js
|
||||||
|
@ -237,12 +252,12 @@ also be started with a model context size of 1k or more, to be on safe side.
|
||||||
internal n_predict, for now add the same here on the client side, maybe later add max_tokens
|
internal n_predict, for now add the same here on the client side, maybe later add max_tokens
|
||||||
to /completions endpoint handling code on server side.
|
to /completions endpoint handling code on server side.
|
||||||
|
|
||||||
NOTE: One may want to experiment with frequency/presence penalty fields in chatRequestOptions
|
NOTE: One may want to experiment with frequency/presence penalty fields in apiRequestOptions
|
||||||
wrt the set of fields sent to server along with the user query. To check how the model behaves
|
wrt the set of fields sent to server along with the user query, to check how the model behaves
|
||||||
wrt repeatations in general in the generated text response.
|
wrt repeatations in general in the generated text response.
|
||||||
|
|
||||||
A end-user can change these behaviour by editing gMe from browser's devel-tool/console or by
|
A end-user can change these behaviour by editing gMe from browser's devel-tool/console or by
|
||||||
using the providing settings ui.
|
using the provided settings ui (for settings exposed through the ui).
|
||||||
|
|
||||||
|
|
||||||
### OpenAi / Equivalent API WebService
|
### OpenAi / Equivalent API WebService
|
||||||
|
@ -253,7 +268,7 @@ for a minimal chatting experimentation by setting the below.
|
||||||
* the baseUrl in settings ui
|
* the baseUrl in settings ui
|
||||||
* https://api.openai.com/v1 or similar
|
* https://api.openai.com/v1 or similar
|
||||||
|
|
||||||
* Wrt request body - gMe.chatRequestOptions
|
* Wrt request body - gMe.apiRequestOptions
|
||||||
* model (settings ui)
|
* model (settings ui)
|
||||||
* any additional fields if required in future
|
* any additional fields if required in future
|
||||||
|
|
||||||
|
|
|
@ -222,8 +222,8 @@ class SimpleChat {
|
||||||
* @param {Object} obj
|
* @param {Object} obj
|
||||||
*/
|
*/
|
||||||
request_jsonstr_extend(obj) {
|
request_jsonstr_extend(obj) {
|
||||||
for(let k in gMe.chatRequestOptions) {
|
for(let k in gMe.apiRequestOptions) {
|
||||||
obj[k] = gMe.chatRequestOptions[k];
|
obj[k] = gMe.apiRequestOptions[k];
|
||||||
}
|
}
|
||||||
if (gMe.bStream) {
|
if (gMe.bStream) {
|
||||||
obj["stream"] = true;
|
obj["stream"] = true;
|
||||||
|
@ -740,11 +740,12 @@ class Me {
|
||||||
"Authorization": "", // Authorization: Bearer OPENAI_API_KEY
|
"Authorization": "", // Authorization: Bearer OPENAI_API_KEY
|
||||||
}
|
}
|
||||||
// Add needed fields wrt json object to be sent wrt LLM web services completions endpoint.
|
// Add needed fields wrt json object to be sent wrt LLM web services completions endpoint.
|
||||||
this.chatRequestOptions = {
|
this.apiRequestOptions = {
|
||||||
"model": "gpt-3.5-turbo",
|
"model": "gpt-3.5-turbo",
|
||||||
"temperature": 0.7,
|
"temperature": 0.7,
|
||||||
"max_tokens": 1024,
|
"max_tokens": 1024,
|
||||||
"n_predict": 1024,
|
"n_predict": 1024,
|
||||||
|
"cache_prompt": false,
|
||||||
//"frequency_penalty": 1.2,
|
//"frequency_penalty": 1.2,
|
||||||
//"presence_penalty": 1.2,
|
//"presence_penalty": 1.2,
|
||||||
};
|
};
|
||||||
|
@ -800,51 +801,55 @@ class Me {
|
||||||
|
|
||||||
ui.el_create_append_p(`bStream:${this.bStream}`, elDiv);
|
ui.el_create_append_p(`bStream:${this.bStream}`, elDiv);
|
||||||
|
|
||||||
|
ui.el_create_append_p(`bTrimGarbage:${this.bTrimGarbage}`, elDiv);
|
||||||
|
|
||||||
|
ui.el_create_append_p(`ApiEndPoint:${this.apiEP}`, elDiv);
|
||||||
|
|
||||||
|
ui.el_create_append_p(`iRecentUserMsgCnt:${this.iRecentUserMsgCnt}`, elDiv);
|
||||||
|
|
||||||
ui.el_create_append_p(`bCompletionFreshChatAlways:${this.bCompletionFreshChatAlways}`, elDiv);
|
ui.el_create_append_p(`bCompletionFreshChatAlways:${this.bCompletionFreshChatAlways}`, elDiv);
|
||||||
|
|
||||||
ui.el_create_append_p(`bCompletionInsertStandardRolePrefix:${this.bCompletionInsertStandardRolePrefix}`, elDiv);
|
ui.el_create_append_p(`bCompletionInsertStandardRolePrefix:${this.bCompletionInsertStandardRolePrefix}`, elDiv);
|
||||||
|
|
||||||
ui.el_create_append_p(`bTrimGarbage:${this.bTrimGarbage}`, elDiv);
|
|
||||||
|
|
||||||
ui.el_create_append_p(`iRecentUserMsgCnt:${this.iRecentUserMsgCnt}`, elDiv);
|
|
||||||
|
|
||||||
ui.el_create_append_p(`ApiEndPoint:${this.apiEP}`, elDiv);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
ui.el_create_append_p(`chatRequestOptions:${JSON.stringify(this.chatRequestOptions, null, " - ")}`, elDiv);
|
ui.el_create_append_p(`apiRequestOptions:${JSON.stringify(this.apiRequestOptions, null, " - ")}`, elDiv);
|
||||||
ui.el_create_append_p(`headers:${JSON.stringify(this.headers, null, " - ")}`, elDiv);
|
ui.el_create_append_p(`headers:${JSON.stringify(this.headers, null, " - ")}`, elDiv);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Auto create ui input elements for fields in ChatRequestOptions
|
* Auto create ui input elements for fields in apiRequestOptions
|
||||||
* Currently supports text and number field types.
|
* Currently supports text and number field types.
|
||||||
* @param {HTMLDivElement} elDiv
|
* @param {HTMLDivElement} elDiv
|
||||||
*/
|
*/
|
||||||
show_settings_chatrequestoptions(elDiv) {
|
show_settings_apirequestoptions(elDiv) {
|
||||||
let typeDict = {
|
let typeDict = {
|
||||||
"string": "text",
|
"string": "text",
|
||||||
"number": "number",
|
"number": "number",
|
||||||
};
|
};
|
||||||
let fs = document.createElement("fieldset");
|
let fs = document.createElement("fieldset");
|
||||||
let legend = document.createElement("legend");
|
let legend = document.createElement("legend");
|
||||||
legend.innerText = "ChatRequestOptions";
|
legend.innerText = "ApiRequestOptions";
|
||||||
fs.appendChild(legend);
|
fs.appendChild(legend);
|
||||||
elDiv.appendChild(fs);
|
elDiv.appendChild(fs);
|
||||||
for(const k in this.chatRequestOptions) {
|
for(const k in this.apiRequestOptions) {
|
||||||
let val = this.chatRequestOptions[k];
|
let val = this.apiRequestOptions[k];
|
||||||
let type = typeof(val);
|
let type = typeof(val);
|
||||||
if (!((type == "string") || (type == "number"))) {
|
if (((type == "string") || (type == "number"))) {
|
||||||
continue;
|
let inp = ui.el_creatediv_input(`Set${k}`, k, typeDict[type], this.apiRequestOptions[k], (val)=>{
|
||||||
|
if (type == "number") {
|
||||||
|
val = Number(val);
|
||||||
|
}
|
||||||
|
this.apiRequestOptions[k] = val;
|
||||||
|
});
|
||||||
|
fs.appendChild(inp.div);
|
||||||
|
} else if (type == "boolean") {
|
||||||
|
let bbtn = ui.el_creatediv_boolbutton(`Set{k}`, k, {true: "true", false: "false"}, val, (userVal)=>{
|
||||||
|
this.apiRequestOptions[k] = userVal;
|
||||||
|
});
|
||||||
|
fs.appendChild(bbtn.div);
|
||||||
}
|
}
|
||||||
let inp = ui.el_creatediv_input(`Set${k}`, k, typeDict[type], this.chatRequestOptions[k], (val)=>{
|
|
||||||
if (type == "number") {
|
|
||||||
val = Number(val);
|
|
||||||
}
|
|
||||||
this.chatRequestOptions[k] = val;
|
|
||||||
});
|
|
||||||
fs.appendChild(inp.div);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -870,6 +875,23 @@ class Me {
|
||||||
});
|
});
|
||||||
elDiv.appendChild(bb.div);
|
elDiv.appendChild(bb.div);
|
||||||
|
|
||||||
|
bb = ui.el_creatediv_boolbutton("SetTrimGarbage", "TrimGarbage", {true: "[+] yes trim", false: "[-] dont trim"}, this.bTrimGarbage, (val)=>{
|
||||||
|
this.bTrimGarbage = val;
|
||||||
|
});
|
||||||
|
elDiv.appendChild(bb.div);
|
||||||
|
|
||||||
|
this.show_settings_apirequestoptions(elDiv);
|
||||||
|
|
||||||
|
let sel = ui.el_creatediv_select("SetApiEP", "ApiEndPoint", ApiEP.Type, this.apiEP, (val)=>{
|
||||||
|
this.apiEP = ApiEP.Type[val];
|
||||||
|
});
|
||||||
|
elDiv.appendChild(sel.div);
|
||||||
|
|
||||||
|
sel = ui.el_creatediv_select("SetChatHistoryInCtxt", "ChatHistoryInCtxt", this.sRecentUserMsgCnt, this.iRecentUserMsgCnt, (val)=>{
|
||||||
|
this.iRecentUserMsgCnt = this.sRecentUserMsgCnt[val];
|
||||||
|
});
|
||||||
|
elDiv.appendChild(sel.div);
|
||||||
|
|
||||||
bb = ui.el_creatediv_boolbutton("SetCompletionFreshChatAlways", "CompletionFreshChatAlways", {true: "[+] yes fresh", false: "[-] no, with history"}, this.bCompletionFreshChatAlways, (val)=>{
|
bb = ui.el_creatediv_boolbutton("SetCompletionFreshChatAlways", "CompletionFreshChatAlways", {true: "[+] yes fresh", false: "[-] no, with history"}, this.bCompletionFreshChatAlways, (val)=>{
|
||||||
this.bCompletionFreshChatAlways = val;
|
this.bCompletionFreshChatAlways = val;
|
||||||
});
|
});
|
||||||
|
@ -880,23 +902,6 @@ class Me {
|
||||||
});
|
});
|
||||||
elDiv.appendChild(bb.div);
|
elDiv.appendChild(bb.div);
|
||||||
|
|
||||||
bb = ui.el_creatediv_boolbutton("SetTrimGarbage", "TrimGarbage", {true: "[+] yes trim", false: "[-] dont trim"}, this.bTrimGarbage, (val)=>{
|
|
||||||
this.bTrimGarbage = val;
|
|
||||||
});
|
|
||||||
elDiv.appendChild(bb.div);
|
|
||||||
|
|
||||||
let sel = ui.el_creatediv_select("SetChatHistoryInCtxt", "ChatHistoryInCtxt", this.sRecentUserMsgCnt, this.iRecentUserMsgCnt, (val)=>{
|
|
||||||
this.iRecentUserMsgCnt = this.sRecentUserMsgCnt[val];
|
|
||||||
});
|
|
||||||
elDiv.appendChild(sel.div);
|
|
||||||
|
|
||||||
sel = ui.el_creatediv_select("SetApiEP", "ApiEndPoint", ApiEP.Type, this.apiEP, (val)=>{
|
|
||||||
this.apiEP = ApiEP.Type[val];
|
|
||||||
});
|
|
||||||
elDiv.appendChild(sel.div);
|
|
||||||
|
|
||||||
this.show_settings_chatrequestoptions(elDiv);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
BIN
examples/server/public_simplechat/simplechat_screens.webp
Normal file
BIN
examples/server/public_simplechat/simplechat_screens.webp
Normal file
Binary file not shown.
After Width: | Height: | Size: 21 KiB |
|
@ -2020,6 +2020,7 @@ struct server_context {
|
||||||
slot.t_start_generation = 0;
|
slot.t_start_generation = 0;
|
||||||
|
|
||||||
if (slot.infill) {
|
if (slot.infill) {
|
||||||
|
const bool add_bos = llama_should_add_bos_token(model);
|
||||||
bool suff_rm_leading_spc = true;
|
bool suff_rm_leading_spc = true;
|
||||||
if (params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) {
|
if (params.input_suffix.find_first_of(' ') == 0 && params.input_suffix.size() > 1) {
|
||||||
params.input_suffix.erase(0, 1);
|
params.input_suffix.erase(0, 1);
|
||||||
|
@ -2035,16 +2036,21 @@ struct server_context {
|
||||||
}
|
}
|
||||||
|
|
||||||
prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(model));
|
prefix_tokens.insert(prefix_tokens.begin(), llama_token_prefix(model));
|
||||||
prefix_tokens.insert(prefix_tokens.begin(), llama_token_bos(model)); // always add BOS
|
suffix_tokens.insert(suffix_tokens.begin(), llama_token_suffix(model));
|
||||||
prefix_tokens.insert(prefix_tokens.end(), llama_token_suffix(model));
|
|
||||||
prefix_tokens.insert(prefix_tokens.end(), suffix_tokens.begin(), suffix_tokens.end());
|
auto embd_inp = params.spm_infill ? suffix_tokens : prefix_tokens;
|
||||||
|
auto embd_end = params.spm_infill ? prefix_tokens : suffix_tokens;
|
||||||
|
if (add_bos) {
|
||||||
|
embd_inp.insert(embd_inp.begin(), llama_token_bos(model));
|
||||||
|
}
|
||||||
|
embd_inp.insert(embd_inp.end(), embd_end.begin(), embd_end.end());
|
||||||
|
|
||||||
const llama_token middle_token = llama_token_middle(model);
|
const llama_token middle_token = llama_token_middle(model);
|
||||||
if (middle_token >= 0) {
|
if (middle_token >= 0) {
|
||||||
prefix_tokens.push_back(middle_token);
|
embd_inp.push_back(middle_token);
|
||||||
}
|
}
|
||||||
|
|
||||||
prompt_tokens = prefix_tokens;
|
prompt_tokens = embd_inp;
|
||||||
} else {
|
} else {
|
||||||
prompt_tokens = tokenize(slot.prompt, system_prompt.empty()); // add BOS if there isn't system prompt
|
prompt_tokens = tokenize(slot.prompt, system_prompt.empty()); // add BOS if there isn't system prompt
|
||||||
}
|
}
|
||||||
|
@ -2606,17 +2612,9 @@ int main(int argc, char ** argv) {
|
||||||
|
|
||||||
// print sample chat example to make it clear which template is used
|
// print sample chat example to make it clear which template is used
|
||||||
{
|
{
|
||||||
json chat;
|
|
||||||
chat.push_back({{"role", "system"}, {"content", "You are a helpful assistant"}});
|
|
||||||
chat.push_back({{"role", "user"}, {"content", "Hello"}});
|
|
||||||
chat.push_back({{"role", "assistant"}, {"content", "Hi there"}});
|
|
||||||
chat.push_back({{"role", "user"}, {"content", "How are you?"}});
|
|
||||||
|
|
||||||
const std::string chat_example = format_chat(ctx_server.model, params.chat_template, chat);
|
|
||||||
|
|
||||||
LOG_INFO("chat template", {
|
LOG_INFO("chat template", {
|
||||||
{"chat_example", chat_example},
|
{"chat_example", llama_chat_format_example(ctx_server.model, params.chat_template)},
|
||||||
{"built_in", params.chat_template.empty()},
|
{"built_in", params.chat_template.empty()},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -52,4 +52,3 @@ Feature: Passkey / Self-extend with context shift
|
||||||
#| TheBloke/Llama-2-7B-GGUF | llama-2-7b.Q2_K.gguf | 4096 | 3 | 16384 | 512 | 4 | 512 | 500 | 300 | 1234 | 5 | 1234 |
|
#| TheBloke/Llama-2-7B-GGUF | llama-2-7b.Q2_K.gguf | 4096 | 3 | 16384 | 512 | 4 | 512 | 500 | 300 | 1234 | 5 | 1234 |
|
||||||
#| TheBloke/Mixtral-8x7B-v0.1-GGUF | mixtral-8x7b-v0.1.Q2_K.gguf | 32768 | 2 | 16384 | 512 | 4 | 512 | 500 | 100 | 0987 | 5 | 0
|
#| TheBloke/Mixtral-8x7B-v0.1-GGUF | mixtral-8x7b-v0.1.Q2_K.gguf | 32768 | 2 | 16384 | 512 | 4 | 512 | 500 | 100 | 0987 | 5 | 0
|
||||||
# 987 |
|
# 987 |
|
||||||
|
|
||||||
|
|
|
@ -82,7 +82,7 @@ Feature: llama.cpp server
|
||||||
|
|
||||||
Examples: Prompts
|
Examples: Prompts
|
||||||
| response_format | n_predicted | re_content |
|
| response_format | n_predicted | re_content |
|
||||||
| {"type": "json_object", "schema": {"const": "42"}} | 5 | "42" |
|
| {"type": "json_object", "schema": {"const": "42"}} | 6 | "42" |
|
||||||
| {"type": "json_object", "schema": {"items": [{"type": "integer"}]}} | 10 | \[ -300 \] |
|
| {"type": "json_object", "schema": {"items": [{"type": "integer"}]}} | 10 | \[ -300 \] |
|
||||||
| {"type": "json_object"} | 10 | \{ " Jacky. |
|
| {"type": "json_object"} | 10 | \{ " Jacky. |
|
||||||
|
|
||||||
|
|
|
@ -1054,4 +1054,3 @@
|
||||||
</body>
|
</body>
|
||||||
|
|
||||||
</html>
|
</html>
|
||||||
|
|
||||||
|
|
|
@ -1058,4 +1058,3 @@
|
||||||
</body>
|
</body>
|
||||||
|
|
||||||
</html>
|
</html>
|
||||||
|
|
||||||
|
|
|
@ -118,36 +118,17 @@ static inline void server_log(const char * level, const char * function, int lin
|
||||||
|
|
||||||
// Format given chat. If tmpl is empty, we take the template from model metadata
|
// Format given chat. If tmpl is empty, we take the template from model metadata
|
||||||
inline std::string format_chat(const struct llama_model * model, const std::string & tmpl, const std::vector<json> & messages) {
|
inline std::string format_chat(const struct llama_model * model, const std::string & tmpl, const std::vector<json> & messages) {
|
||||||
size_t alloc_size = 0;
|
std::vector<llama_chat_msg> chat;
|
||||||
// vector holding all allocated string to be passed to llama_chat_apply_template
|
|
||||||
std::vector<std::string> str(messages.size() * 2);
|
|
||||||
std::vector<llama_chat_message> chat(messages.size());
|
|
||||||
|
|
||||||
for (size_t i = 0; i < messages.size(); ++i) {
|
for (size_t i = 0; i < messages.size(); ++i) {
|
||||||
const auto & curr_msg = messages[i];
|
const auto & curr_msg = messages[i];
|
||||||
str[i*2 + 0] = json_value(curr_msg, "role", std::string(""));
|
std::string role = json_value(curr_msg, "role", std::string(""));
|
||||||
str[i*2 + 1] = json_value(curr_msg, "content", std::string(""));
|
std::string content = json_value(curr_msg, "content", std::string(""));
|
||||||
alloc_size += str[i*2 + 1].length();
|
chat.push_back({role, content});
|
||||||
chat[i].role = str[i*2 + 0].c_str();
|
|
||||||
chat[i].content = str[i*2 + 1].c_str();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const char * ptr_tmpl = tmpl.empty() ? nullptr : tmpl.c_str();
|
auto formatted_chat = llama_chat_apply_template(model, tmpl, chat, true);
|
||||||
std::vector<char> buf(alloc_size * 2);
|
|
||||||
|
|
||||||
// run the first time to get the total output length
|
|
||||||
int32_t res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
|
|
||||||
|
|
||||||
// if it turns out that our buffer is too small, we resize it
|
|
||||||
if ((size_t) res > buf.size()) {
|
|
||||||
buf.resize(res);
|
|
||||||
res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
|
|
||||||
}
|
|
||||||
|
|
||||||
const std::string formatted_chat(buf.data(), res);
|
|
||||||
|
|
||||||
LOG_VERBOSE("formatted_chat", {{"text", formatted_chat.c_str()}});
|
LOG_VERBOSE("formatted_chat", {{"text", formatted_chat.c_str()}});
|
||||||
|
|
||||||
return formatted_chat;
|
return formatted_chat;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -8,10 +8,10 @@ cd build
|
||||||
source /opt/intel/oneapi/setvars.sh
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
|
||||||
#for FP16
|
#for FP16
|
||||||
#cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON # faster for long-prompt inference
|
#cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON # faster for long-prompt inference
|
||||||
|
|
||||||
#for FP32
|
#for FP32
|
||||||
cmake .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
cmake .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
|
||||||
#build example/main
|
#build example/main
|
||||||
#cmake --build . --config Release --target main
|
#cmake --build . --config Release --target main
|
||||||
|
|
|
@ -34,4 +34,3 @@ fi
|
||||||
|
|
||||||
#use multiple GPUs with same max compute units
|
#use multiple GPUs with same max compute units
|
||||||
#ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
|
#ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/llama-2-7b.Q4_0.gguf -p "${INPUT2}" -n 400 -e -ngl 33 -s 0
|
||||||
|
|
||||||
|
|
|
@ -13,16 +13,16 @@ if %errorlevel% neq 0 goto ERROR
|
||||||
|
|
||||||
:: for FP16
|
:: for FP16
|
||||||
:: faster for long-prompt inference
|
:: faster for long-prompt inference
|
||||||
:: cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DLLAMA_SYCL_F16=ON
|
:: cmake -G "MinGW Makefiles" .. -DGGML_SYCL=ON -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DGGML_SYCL_F16=ON
|
||||||
|
|
||||||
:: for FP32
|
:: for FP32
|
||||||
cmake -G "MinGW Makefiles" .. -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release
|
cmake -G "Ninja" .. -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release
|
||||||
if %errorlevel% neq 0 goto ERROR
|
if %errorlevel% neq 0 goto ERROR
|
||||||
:: build example/main only
|
:: build example/main only
|
||||||
:: make main
|
:: make main
|
||||||
|
|
||||||
:: build all binary
|
:: build all binary
|
||||||
make -j
|
cmake --build . -j
|
||||||
if %errorlevel% neq 0 goto ERROR
|
if %errorlevel% neq 0 goto ERROR
|
||||||
|
|
||||||
cd ..
|
cd ..
|
||||||
|
@ -31,4 +31,3 @@ exit /B 0
|
||||||
:ERROR
|
:ERROR
|
||||||
echo comomand error: %errorlevel%
|
echo comomand error: %errorlevel%
|
||||||
exit /B %errorlevel%
|
exit /B %errorlevel%
|
||||||
|
|
||||||
|
|
|
@ -7,5 +7,3 @@ set INPUT2="Building a website can be done in 10 simple steps:\nStep 1:"
|
||||||
|
|
||||||
|
|
||||||
.\build\bin\main.exe -m models\llama-2-7b.Q4_0.gguf -p %INPUT2% -n 400 -e -ngl 33 -s 0
|
.\build\bin\main.exe -m models\llama-2-7b.Q4_0.gguf -p %INPUT2% -n 400 -e -ngl 33 -s 0
|
||||||
|
|
||||||
|
|
||||||
|
|
6
flake.lock
generated
6
flake.lock
generated
|
@ -20,11 +20,11 @@
|
||||||
},
|
},
|
||||||
"nixpkgs": {
|
"nixpkgs": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1718318537,
|
"lastModified": 1719506693,
|
||||||
"narHash": "sha256-4Zu0RYRcAY/VWuu6awwq4opuiD//ahpc2aFHg2CWqFY=",
|
"narHash": "sha256-C8e9S7RzshSdHB7L+v9I51af1gDM5unhJ2xO1ywxNH8=",
|
||||||
"owner": "NixOS",
|
"owner": "NixOS",
|
||||||
"repo": "nixpkgs",
|
"repo": "nixpkgs",
|
||||||
"rev": "e9ee548d90ff586a6471b4ae80ae9cfcbceb3420",
|
"rev": "b2852eb9365c6de48ffb0dc2c9562591f652242a",
|
||||||
"type": "github"
|
"type": "github"
|
||||||
},
|
},
|
||||||
"original": {
|
"original": {
|
||||||
|
|
2138
ggml-cuda/mmq.cuh
2138
ggml-cuda/mmq.cuh
File diff suppressed because it is too large
Load diff
240
ggml/CMakeLists.txt
Normal file
240
ggml/CMakeLists.txt
Normal file
|
@ -0,0 +1,240 @@
|
||||||
|
cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories.
|
||||||
|
project("ggml" C CXX)
|
||||||
|
include(CheckIncludeFileCXX)
|
||||||
|
|
||||||
|
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
|
||||||
|
|
||||||
|
if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
|
||||||
|
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
|
||||||
|
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
||||||
|
set(GGML_STANDALONE ON)
|
||||||
|
|
||||||
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||||
|
|
||||||
|
# configure project version
|
||||||
|
# TODO
|
||||||
|
else()
|
||||||
|
set(GGML_STANDALONE OFF)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (EMSCRIPTEN)
|
||||||
|
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
||||||
|
|
||||||
|
option(GGML_WASM_SINGLE_FILE "ggml: embed WASM inside the generated ggml.js" ON)
|
||||||
|
else()
|
||||||
|
if (MINGW)
|
||||||
|
set(BUILD_SHARED_LIBS_DEFAULT OFF)
|
||||||
|
else()
|
||||||
|
set(BUILD_SHARED_LIBS_DEFAULT ON)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
option(BUILD_SHARED_LIBS "ggml: build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
|
||||||
|
|
||||||
|
#
|
||||||
|
# option list
|
||||||
|
#
|
||||||
|
|
||||||
|
# TODO: mark all options as advanced when not GGML_STANDALONE
|
||||||
|
|
||||||
|
if (APPLE)
|
||||||
|
set(GGML_METAL_DEFAULT ON)
|
||||||
|
set(GGML_BLAS_DEFAULT ON)
|
||||||
|
set(GGML_BLAS_VENDOR_DEFAULT "Apple")
|
||||||
|
else()
|
||||||
|
set(GGML_METAL_DEFAULT OFF)
|
||||||
|
set(GGML_BLAS_DEFAULT OFF)
|
||||||
|
set(GGML_BLAS_VENDOR_DEFAULT "Generic")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# general
|
||||||
|
option(GGML_STATIC "ggml: static link libraries" OFF)
|
||||||
|
option(GGML_NATIVE "ggml: enable -march=native flag" ON)
|
||||||
|
option(GGML_LTO "ggml: enable link time optimization" OFF)
|
||||||
|
option(GGML_CCACHE "ggml: use ccache if available" ON)
|
||||||
|
|
||||||
|
# debug
|
||||||
|
option(GGML_ALL_WARNINGS "ggml: enable all compiler warnings" ON)
|
||||||
|
option(GGML_ALL_WARNINGS_3RD_PARTY "ggml: enable all compiler warnings in 3rd party libs" OFF)
|
||||||
|
option(GGML_GPROF "ggml: enable gprof" OFF)
|
||||||
|
|
||||||
|
# build
|
||||||
|
option(GGML_FATAL_WARNINGS "ggml: enable -Werror flag" OFF)
|
||||||
|
|
||||||
|
# sanitizers
|
||||||
|
option(GGML_SANITIZE_THREAD "ggml: enable thread sanitizer" OFF)
|
||||||
|
option(GGML_SANITIZE_ADDRESS "ggml: enable address sanitizer" OFF)
|
||||||
|
option(GGML_SANITIZE_UNDEFINED "ggml: enable undefined sanitizer" OFF)
|
||||||
|
|
||||||
|
# instruction set specific
|
||||||
|
if (GGML_NATIVE)
|
||||||
|
set(INS_ENB OFF)
|
||||||
|
else()
|
||||||
|
set(INS_ENB ON)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
option(GGML_CPU_HBM "ggml: use memkind for CPU HBM" OFF)
|
||||||
|
|
||||||
|
option(GGML_AVX "ggml: enable AVX" ${INS_ENB})
|
||||||
|
option(GGML_AVX2 "ggml: enable AVX2" ${INS_ENB})
|
||||||
|
option(GGML_AVX512 "ggml: enable AVX512" OFF)
|
||||||
|
option(GGML_AVX512_VBMI "ggml: enable AVX512-VBMI" OFF)
|
||||||
|
option(GGML_AVX512_VNNI "ggml: enable AVX512-VNNI" OFF)
|
||||||
|
option(GGML_AVX512_BF16 "ggml: enable AVX512-BF16" OFF)
|
||||||
|
option(GGML_FMA "ggml: enable FMA" ${INS_ENB})
|
||||||
|
if (NOT MSVC)
|
||||||
|
option(GGML_F16C "ggml: enable F16C" ${INS_ENB}) # in MSVC F16C is implied with AVX2/AVX512
|
||||||
|
endif()
|
||||||
|
option(GGML_LASX "ggml: enable lasx" ON)
|
||||||
|
option(GGML_LSX "ggml: enable lsx" ON)
|
||||||
|
option(GGML_SVE "ggml: enable SVE" OFF)
|
||||||
|
|
||||||
|
if (WIN32)
|
||||||
|
set(GGML_WIN_VER "0x602" CACHE STRING "ggml: Windows Version")
|
||||||
|
endif()
|
||||||
|
|
||||||
|
# ggml core
|
||||||
|
set(GGML_SCHED_MAX_COPIES "4" CACHE STRING "ggml: max input copies for pipeline parallelism")
|
||||||
|
|
||||||
|
# 3rd party libs / backends
|
||||||
|
option(GGML_ACCELERATE "ggml: enable Accelerate framework" ON)
|
||||||
|
option(GGML_BLAS "ggml: use BLAS" ${GGML_BLAS_DEFAULT})
|
||||||
|
set(GGML_BLAS_VENDOR ${GGML_BLAS_VENDOR_DEFAULT} CACHE STRING
|
||||||
|
"ggml: BLAS library vendor")
|
||||||
|
option(GGML_LLAMAFILE "ggml: use ggml SGEMM" OFF)
|
||||||
|
|
||||||
|
option(GGML_CUDA "ggml: use CUDA" OFF)
|
||||||
|
option(GGML_CUDA_FORCE_DMMV "ggml: use dmmv instead of mmvq CUDA kernels" OFF)
|
||||||
|
option(GGML_CUDA_FORCE_MMQ "ggml: use mmq kernels instead of cuBLAS" OFF)
|
||||||
|
option(GGML_CUDA_FORCE_CUBLAS "ggml: always use cuBLAS instead of mmq kernels" OFF)
|
||||||
|
set (GGML_CUDA_DMMV_X "32" CACHE STRING "ggml: x stride for dmmv CUDA kernels")
|
||||||
|
set (GGML_CUDA_MMV_Y "1" CACHE STRING "ggml: y block size for mmv CUDA kernels")
|
||||||
|
option(GGML_CUDA_F16 "ggml: use 16 bit floats for some calculations" OFF)
|
||||||
|
set (GGML_CUDA_KQUANTS_ITER "2" CACHE STRING
|
||||||
|
"ggml: iters./thread per block for Q2_K/Q6_K")
|
||||||
|
set (GGML_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING
|
||||||
|
"ggml: max. batch size for using peer access")
|
||||||
|
option(GGML_CUDA_NO_PEER_COPY "ggml: do not use peer to peer copies" OFF)
|
||||||
|
option(GGML_CUDA_NO_VMM "ggml: do not try to use CUDA VMM" OFF)
|
||||||
|
option(GGML_CUDA_FA_ALL_QUANTS "ggml: compile all quants for FlashAttention" OFF)
|
||||||
|
option(GGML_CUDA_USE_GRAPHS "ggml: use CUDA graphs (llama.cpp only)" OFF)
|
||||||
|
|
||||||
|
option(GGML_CURL "ggml: use libcurl to download model from an URL" OFF)
|
||||||
|
option(GGML_HIPBLAS "ggml: use hipBLAS" OFF)
|
||||||
|
option(GGML_HIP_UMA "ggml: use HIP unified memory architecture" OFF)
|
||||||
|
option(GGML_VULKAN "ggml: use Vulkan" OFF)
|
||||||
|
option(GGML_VULKAN_CHECK_RESULTS "ggml: run Vulkan op checks" OFF)
|
||||||
|
option(GGML_VULKAN_DEBUG "ggml: enable Vulkan debug output" OFF)
|
||||||
|
option(GGML_VULKAN_MEMORY_DEBUG "ggml: enable Vulkan memory debug output" OFF)
|
||||||
|
option(GGML_VULKAN_VALIDATE "ggml: enable Vulkan validation" OFF)
|
||||||
|
option(GGML_VULKAN_RUN_TESTS "ggml: run Vulkan tests" OFF)
|
||||||
|
option(GGML_KOMPUTE "ggml: use Kompute" OFF)
|
||||||
|
option(GGML_METAL "ggml: use Metal" ${GGML_METAL_DEFAULT})
|
||||||
|
option(GGML_METAL_NDEBUG "ggml: disable Metal debugging" OFF)
|
||||||
|
option(GGML_METAL_SHADER_DEBUG "ggml: compile Metal with -fno-fast-math" OFF)
|
||||||
|
option(GGML_METAL_EMBED_LIBRARY "ggml: embed Metal library" ${GGML_METAL})
|
||||||
|
set (GGML_METAL_MACOSX_VERSION_MIN "" CACHE STRING
|
||||||
|
"ggml: metal minimum macOS version")
|
||||||
|
set (GGML_METAL_STD "" CACHE STRING "ggml: metal standard version (-std flag)")
|
||||||
|
option(GGML_OPENMP "ggml: use OpenMP" ON)
|
||||||
|
option(GGML_RPC "ggml: use RPC" OFF)
|
||||||
|
option(GGML_SYCL "ggml: use SYCL" OFF)
|
||||||
|
option(GGML_SYCL_F16 "ggml: use 16 bit floats for sycl calculations" OFF)
|
||||||
|
set (GGML_SYCL_TARGET "INTEL" CACHE STRING
|
||||||
|
"ggml: sycl target device")
|
||||||
|
|
||||||
|
# extra artifacts
|
||||||
|
option(GGML_BUILD_TESTS "ggml: build tests" ${GGML_STANDALONE})
|
||||||
|
option(GGML_BUILD_EXAMPLES "ggml: build examples" ${GGML_STANDALONE})
|
||||||
|
|
||||||
|
#
|
||||||
|
# dependencies
|
||||||
|
#
|
||||||
|
|
||||||
|
set(CMAKE_C_STANDARD 11)
|
||||||
|
set(CMAKE_C_STANDARD_REQUIRED true)
|
||||||
|
|
||||||
|
if (GGML_SYCL)
|
||||||
|
set(CMAKE_CXX_STANDARD 17)
|
||||||
|
else()
|
||||||
|
set(CMAKE_CXX_STANDARD 11)
|
||||||
|
endif()
|
||||||
|
set(CMAKE_CXX_STANDARD_REQUIRED true)
|
||||||
|
|
||||||
|
set(THREADS_PREFER_PTHREAD_FLAG ON)
|
||||||
|
|
||||||
|
find_package(Threads REQUIRED)
|
||||||
|
|
||||||
|
#
|
||||||
|
# build the library
|
||||||
|
#
|
||||||
|
|
||||||
|
add_subdirectory(src)
|
||||||
|
|
||||||
|
#
|
||||||
|
# tests and examples
|
||||||
|
#
|
||||||
|
|
||||||
|
if (GGML_BUILD_TESTS)
|
||||||
|
enable_testing()
|
||||||
|
add_subdirectory(tests)
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
if (GGML_BUILD_EXAMPLES)
|
||||||
|
add_subdirectory(examples)
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
#
|
||||||
|
# install
|
||||||
|
#
|
||||||
|
|
||||||
|
include(GNUInstallDirs)
|
||||||
|
include(CMakePackageConfigHelpers)
|
||||||
|
|
||||||
|
set(GGML_PUBLIC_HEADERS
|
||||||
|
include/ggml.h
|
||||||
|
include/ggml-alloc.h
|
||||||
|
include/ggml-backend.h
|
||||||
|
"${GGML_HEADERS_CUDA}"
|
||||||
|
"${GGML_HEADERS_METAL}"
|
||||||
|
"${GGML_HEADERS_EXTRA}")
|
||||||
|
|
||||||
|
set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}")
|
||||||
|
#if (GGML_METAL)
|
||||||
|
# set_target_properties(ggml PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/src/ggml-metal.metal")
|
||||||
|
#endif()
|
||||||
|
install(TARGETS ggml PUBLIC_HEADER)
|
||||||
|
|
||||||
|
if (BUILD_SHARED_LIBS)
|
||||||
|
install(TARGETS ggml LIBRARY)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (GGML_METAL)
|
||||||
|
install(
|
||||||
|
FILES src/ggml-metal.metal
|
||||||
|
PERMISSIONS
|
||||||
|
OWNER_READ
|
||||||
|
OWNER_WRITE
|
||||||
|
GROUP_READ
|
||||||
|
WORLD_READ
|
||||||
|
DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||||
|
|
||||||
|
if (NOT GGML_METAL_EMBED_LIBRARY)
|
||||||
|
install(
|
||||||
|
FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib
|
||||||
|
DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (GGML_STANDALONE)
|
||||||
|
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/ggml.pc.in
|
||||||
|
${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
|
||||||
|
@ONLY)
|
||||||
|
|
||||||
|
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
|
||||||
|
DESTINATION share/pkgconfig)
|
||||||
|
endif()
|
|
@ -79,22 +79,22 @@ endmacro()
|
||||||
# flags are for MSVC only!
|
# flags are for MSVC only!
|
||||||
check_sse("AVX" " ;/arch:AVX")
|
check_sse("AVX" " ;/arch:AVX")
|
||||||
if (NOT ${AVX_FOUND})
|
if (NOT ${AVX_FOUND})
|
||||||
set(LLAMA_AVX OFF)
|
set(GGML_AVX OFF)
|
||||||
else()
|
else()
|
||||||
set(LLAMA_AVX ON)
|
set(GGML_AVX ON)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
check_sse("AVX2" " ;/arch:AVX2")
|
check_sse("AVX2" " ;/arch:AVX2")
|
||||||
check_sse("FMA" " ;/arch:AVX2")
|
check_sse("FMA" " ;/arch:AVX2")
|
||||||
if ((NOT ${AVX2_FOUND}) OR (NOT ${FMA_FOUND}))
|
if ((NOT ${AVX2_FOUND}) OR (NOT ${FMA_FOUND}))
|
||||||
set(LLAMA_AVX2 OFF)
|
set(GGML_AVX2 OFF)
|
||||||
else()
|
else()
|
||||||
set(LLAMA_AVX2 ON)
|
set(GGML_AVX2 ON)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
check_sse("AVX512" " ;/arch:AVX512")
|
check_sse("AVX512" " ;/arch:AVX512")
|
||||||
if (NOT ${AVX512_FOUND})
|
if (NOT ${AVX512_FOUND})
|
||||||
set(LLAMA_AVX512 OFF)
|
set(GGML_AVX512 OFF)
|
||||||
else()
|
else()
|
||||||
set(LLAMA_AVX512 ON)
|
set(GGML_AVX512 ON)
|
||||||
endif()
|
endif()
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue