diff --git a/.clang-format b/.clang-format deleted file mode 100644 index 45232b80e..000000000 --- a/.clang-format +++ /dev/null @@ -1,161 +0,0 @@ ---- -Language: Cpp -AlignAfterOpenBracket: Align -AlignArrayOfStructures: Left -AlignConsecutiveAssignments: AcrossComments -AlignConsecutiveBitFields: AcrossComments -AlignConsecutiveDeclarations: AcrossComments -AlignConsecutiveMacros: AcrossComments -# AlignConsecutiveShortCaseStatements: AcrossComments -AlignEscapedNewlines: Left # LeftWithLastLine -AlignOperands: Align -AlignTrailingComments: - Kind: Always - OverEmptyLines: 1 -AllowAllArgumentsOnNextLine: true -AllowAllParametersOfDeclarationOnNextLine: false -# AllowBreakBeforeNoexceptSpecifier: OnlyWithParen -AllowShortBlocksOnASingleLine: Never -AllowShortCaseLabelsOnASingleLine: false -AllowShortFunctionsOnASingleLine: Inline -AllowShortIfStatementsOnASingleLine: Never -AllowShortLambdasOnASingleLine: Inline -AllowShortLoopsOnASingleLine: false -AlwaysBreakBeforeMultilineStrings: true -BinPackArguments: true -BinPackParameters: true # OnePerLine -BitFieldColonSpacing: Both -BreakBeforeBraces: Custom # Attach -BraceWrapping: - AfterCaseLabel: true - AfterClass: false - AfterControlStatement: false - AfterEnum: false - AfterFunction: false - AfterNamespace: false - AfterObjCDeclaration: false - AfterStruct: false - AfterUnion: false - AfterExternBlock: false - BeforeCatch: false - BeforeElse: false - BeforeLambdaBody: false - BeforeWhile: false - IndentBraces: false - SplitEmptyFunction: false - SplitEmptyRecord: false - SplitEmptyNamespace: false -# BreakAdjacentStringLiterals: true -BreakAfterAttributes: Never -BreakBeforeBinaryOperators: None -BreakBeforeInlineASMColon: OnlyMultiline -BreakBeforeTernaryOperators: false -# BreakBinaryOperations: Never -BreakConstructorInitializers: AfterColon -# BreakFunctionDefinitionParameters: false -BreakInheritanceList: AfterComma -BreakStringLiterals: true -# BreakTemplateDeclarations: Yes -ColumnLimit: 120 -CommentPragmas: '^ IWYU pragma:' -CompactNamespaces: false -ConstructorInitializerIndentWidth: 4 -ContinuationIndentWidth: 4 -Cpp11BracedListStyle: false -DerivePointerAlignment: false -DisableFormat: false -EmptyLineBeforeAccessModifier: Leave -EmptyLineAfterAccessModifier: Never -ExperimentalAutoDetectBinPacking: false -FixNamespaceComments: true -IncludeBlocks: Regroup -IncludeCategories: - - Regex: '^<.*\.h>' - Priority: 1 - SortPriority: 0 - - Regex: '^<.*' - Priority: 2 - SortPriority: 0 - - Regex: '.*' - Priority: 3 - SortPriority: 0 -IncludeIsMainRegex: '([-_](test|unittest))?$' -IncludeIsMainSourceRegex: '' -IndentAccessModifiers: false -IndentCaseBlocks: true -IndentCaseLabels: true -IndentExternBlock: NoIndent -IndentGotoLabels: false -IndentPPDirectives: AfterHash -IndentWidth: 4 -IndentWrappedFunctionNames: false -InsertBraces: true # NOTE: may lead to incorrect formatting -InsertNewlineAtEOF: true -JavaScriptQuotes: Leave -JavaScriptWrapImports: true -KeepEmptyLinesAtTheStartOfBlocks: false -LambdaBodyIndentation: Signature -LineEnding: LF -MacroBlockBegin: '' -MacroBlockEnd: '' -MaxEmptyLinesToKeep: 1 -NamespaceIndentation: None -ObjCBinPackProtocolList: Auto -ObjCBlockIndentWidth: 4 -ObjCSpaceAfterProperty: true -ObjCSpaceBeforeProtocolList: true -PPIndentWidth: -1 -PackConstructorInitializers: CurrentLine -PenaltyBreakAssignment: 2 -PenaltyBreakBeforeFirstCallParameter: 1 -PenaltyBreakComment: 300 -PenaltyBreakFirstLessLess: 120 -PenaltyBreakString: 1000 -PenaltyBreakTemplateDeclaration: 10 -PenaltyExcessCharacter: 1000000 -PenaltyReturnTypeOnItsOwnLine: 200 -PointerAlignment: Middle -QualifierAlignment: Left -#QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict'] -RawStringFormats: - - Language: Cpp - Delimiters: - - cc - - CC - - cpp - - Cpp - - CPP - - 'c++' - - 'C++' - CanonicalDelimiter: '' -ReferenceAlignment: Middle -ReflowComments: false # IndentOnly -SeparateDefinitionBlocks: Always -SortIncludes: CaseInsensitive -SortUsingDeclarations: LexicographicNumeric -SpaceAfterCStyleCast: true -SpaceAfterLogicalNot: false -SpaceAfterTemplateKeyword: true -SpaceBeforeAssignmentOperators: true -SpaceBeforeCpp11BracedList: false -SpaceBeforeCtorInitializerColon: true -SpaceBeforeInheritanceColon: true -SpaceBeforeParens: ControlStatements -SpaceBeforeRangeBasedForLoopColon: true -SpaceInEmptyBlock: false -SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 2 -SpacesInAngles: Never -SpacesInContainerLiterals: true -SpacesInLineCommentPrefix: - Minimum: 1 - Maximum: -1 -SpacesInParentheses: false -SpacesInSquareBrackets: false -SpaceBeforeSquareBrackets: false -Standard: c++17 -TabWidth: 4 -UseTab: Never -WhitespaceSensitiveMacros: ['STRINGIZE'] -... - diff --git a/.clang-tidy b/.clang-tidy deleted file mode 100644 index 310c3d182..000000000 --- a/.clang-tidy +++ /dev/null @@ -1,26 +0,0 @@ ---- -Checks: > - bugprone-*, - -bugprone-easily-swappable-parameters, - -bugprone-implicit-widening-of-multiplication-result, - -bugprone-misplaced-widening-cast, - -bugprone-narrowing-conversions, - readability-*, - -readability-avoid-unconditional-preprocessor-if, - -readability-function-cognitive-complexity, - -readability-identifier-length, - -readability-implicit-bool-conversion, - -readability-magic-numbers, - -readability-uppercase-literal-suffix, - -readability-simplify-boolean-expr, - clang-analyzer-*, - -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling, - performance-*, - portability-*, - -portability-simd-intrinsics, - misc-*, - -misc-const-correctness, - -misc-non-private-member-variables-in-classes, - -misc-no-recursion, - -misc-use-anonymous-namespace, -FormatStyle: none diff --git a/.devops/cloud-v-pipeline b/.devops/cloud-v-pipeline deleted file mode 100644 index af8c0cea6..000000000 --- a/.devops/cloud-v-pipeline +++ /dev/null @@ -1,22 +0,0 @@ -node('x86_runner1'){ // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries - stage('Cleanup'){ - cleanWs() // Cleaning previous CI build in workspace - } - stage('checkout repo'){ - retry(5){ // Retry if the cloning fails due to some reason - checkout scm // Clone the repo on Runner - } - } - stage('Compiling llama.cpp'){ - sh'''#!/bin/bash - make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V - ''' - } - stage('Running llama.cpp'){ - sh'''#!/bin/bash - module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc - qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64 - cat llama_log.txt # Printing results - ''' - } -} diff --git a/.devops/cpu.Dockerfile b/.devops/cpu.Dockerfile deleted file mode 100644 index 522ee8147..000000000 --- a/.devops/cpu.Dockerfile +++ /dev/null @@ -1,92 +0,0 @@ -ARG UBUNTU_VERSION=22.04 - -FROM ubuntu:$UBUNTU_VERSION AS build - -ARG TARGETARCH - -ARG GGML_CPU_ARM_ARCH=armv8-a - -RUN apt-get update && \ - apt-get install -y build-essential git cmake libcurl4-openssl-dev - -WORKDIR /app - -COPY . . - -RUN if [ "$TARGETARCH" = "amd64" ]; then \ - cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \ - elif [ "$TARGETARCH" = "arm64" ]; then \ - cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \ - else \ - echo "Unsupported architecture"; \ - exit 1; \ - fi && \ - cmake --build build -j $(nproc) - -RUN mkdir -p /app/lib && \ - find build -name "*.so" -exec cp {} /app/lib \; - -RUN mkdir -p /app/full \ - && cp build/bin/* /app/full \ - && cp *.py /app/full \ - && cp -r gguf-py /app/full \ - && cp -r requirements /app/full \ - && cp requirements.txt /app/full \ - && cp .devops/tools.sh /app/full/tools.sh - -## Base image -FROM ubuntu:$UBUNTU_VERSION AS base - -RUN apt-get update \ - && apt-get install -y libgomp1 curl\ - && apt autoremove -y \ - && apt clean -y \ - && rm -rf /tmp/* /var/tmp/* \ - && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ - && find /var/cache -type f -delete - -COPY --from=build /app/lib/ /app - -### Full -FROM base AS full - -COPY --from=build /app/full /app - -WORKDIR /app - -RUN apt-get update \ - && apt-get install -y \ - git \ - python3 \ - python3-pip \ - && pip install --upgrade pip setuptools wheel \ - && pip install -r requirements.txt \ - && apt autoremove -y \ - && apt clean -y \ - && rm -rf /tmp/* /var/tmp/* \ - && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ - && find /var/cache -type f -delete - -ENTRYPOINT ["/app/tools.sh"] - -### Light, CLI only -FROM base AS light - -COPY --from=build /app/full/llama-cli /app - -WORKDIR /app - -ENTRYPOINT [ "/app/llama-cli" ] - -### Server, Server only -FROM base AS server - -ENV LLAMA_ARG_HOST=0.0.0.0 - -COPY --from=build /app/full/llama-server /app - -WORKDIR /app - -HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] - -ENTRYPOINT [ "/app/llama-server" ] diff --git a/.devops/cuda.Dockerfile b/.devops/cuda.Dockerfile deleted file mode 100644 index 974dd78a8..000000000 --- a/.devops/cuda.Dockerfile +++ /dev/null @@ -1,94 +0,0 @@ -ARG UBUNTU_VERSION=22.04 -# This needs to generally match the container host's environment. -ARG CUDA_VERSION=12.6.0 -# Target the CUDA build image -ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION} - -ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} - -FROM ${BASE_CUDA_DEV_CONTAINER} AS build - -# CUDA architecture to build for (defaults to all supported archs) -ARG CUDA_DOCKER_ARCH=default - -RUN apt-get update && \ - apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1 - -WORKDIR /app - -COPY . . - -RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \ - export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \ - fi && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ - cmake --build build --config Release -j$(nproc) - -RUN mkdir -p /app/lib && \ - find build -name "*.so" -exec cp {} /app/lib \; - -RUN mkdir -p /app/full \ - && cp build/bin/* /app/full \ - && cp *.py /app/full \ - && cp -r gguf-py /app/full \ - && cp -r requirements /app/full \ - && cp requirements.txt /app/full \ - && cp .devops/tools.sh /app/full/tools.sh - -## Base image -FROM ${BASE_CUDA_RUN_CONTAINER} AS base - -RUN apt-get update \ - && apt-get install -y libgomp1 curl\ - && apt autoremove -y \ - && apt clean -y \ - && rm -rf /tmp/* /var/tmp/* \ - && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ - && find /var/cache -type f -delete - -COPY --from=build /app/lib/ /app - -### Full -FROM base AS full - -COPY --from=build /app/full /app - -WORKDIR /app - -RUN apt-get update \ - && apt-get install -y \ - git \ - python3 \ - python3-pip \ - && pip install --upgrade pip setuptools wheel \ - && pip install -r requirements.txt \ - && apt autoremove -y \ - && apt clean -y \ - && rm -rf /tmp/* /var/tmp/* \ - && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ - && find /var/cache -type f -delete - - -ENTRYPOINT ["/app/tools.sh"] - -### Light, CLI only -FROM base AS light - -COPY --from=build /app/full/llama-cli /app - -WORKDIR /app - -ENTRYPOINT [ "/app/llama-cli" ] - -### Server, Server only -FROM base AS server - -ENV LLAMA_ARG_HOST=0.0.0.0 - -COPY --from=build /app/full/llama-server /app - -WORKDIR /app - -HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] - -ENTRYPOINT [ "/app/llama-server" ] diff --git a/.devops/full.Dockerfile b/.devops/full.Dockerfile new file mode 100644 index 000000000..491d67676 --- /dev/null +++ b/.devops/full.Dockerfile @@ -0,0 +1,19 @@ +ARG UBUNTU_VERSION=22.04 + +FROM ubuntu:$UBUNTU_VERSION as build + +RUN apt-get update && \ + apt-get install -y build-essential python3 python3-pip + +COPY requirements.txt requirements.txt + +RUN pip install --upgrade pip setuptools wheel \ + && pip install -r requirements.txt + +WORKDIR /app + +COPY . . + +RUN make + +ENTRYPOINT ["/app/.devops/tools.sh"] diff --git a/.devops/intel.Dockerfile b/.devops/intel.Dockerfile deleted file mode 100644 index af783f5e9..000000000 --- a/.devops/intel.Dockerfile +++ /dev/null @@ -1,91 +0,0 @@ -ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04 - -## Build Image - -FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build - -ARG GGML_SYCL_F16=OFF -RUN apt-get update && \ - apt-get install -y git libcurl4-openssl-dev - -WORKDIR /app - -COPY . . - -RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \ - echo "GGML_SYCL_F16 is set" \ - && export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \ - fi && \ - echo "Building with dynamic libs" && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \ - cmake --build build --config Release -j$(nproc) - -RUN mkdir -p /app/lib && \ - find build -name "*.so" -exec cp {} /app/lib \; - -RUN mkdir -p /app/full \ - && cp build/bin/* /app/full \ - && cp *.py /app/full \ - && cp -r gguf-py /app/full \ - && cp -r requirements /app/full \ - && cp requirements.txt /app/full \ - && cp .devops/tools.sh /app/full/tools.sh - -FROM intel/oneapi-basekit:$ONEAPI_VERSION AS base - -RUN apt-get update \ - && apt-get install -y libgomp1 curl\ - && apt autoremove -y \ - && apt clean -y \ - && rm -rf /tmp/* /var/tmp/* \ - && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ - && find /var/cache -type f -delete - -### Full -FROM base AS full - -COPY --from=build /app/lib/ /app -COPY --from=build /app/full /app - -WORKDIR /app - -RUN apt-get update \ - && apt-get install -y \ - git \ - python3 \ - python3-pip \ - && pip install --upgrade pip setuptools wheel \ - && pip install -r requirements.txt \ - && apt autoremove -y \ - && apt clean -y \ - && rm -rf /tmp/* /var/tmp/* \ - && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ - && find /var/cache -type f -delete - - -ENTRYPOINT ["/app/tools.sh"] - -### Light, CLI only -FROM base AS light - -COPY --from=build /app/lib/ /app -COPY --from=build /app/full/llama-cli /app - -WORKDIR /app - -ENTRYPOINT [ "/app/llama-cli" ] - -### Server, Server only -FROM base AS server - -ENV LLAMA_ARG_HOST=0.0.0.0 - -COPY --from=build /app/lib/ /app -COPY --from=build /app/full/llama-server /app - -WORKDIR /app - -HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] - -ENTRYPOINT [ "/app/llama-server" ] - diff --git a/.devops/llama-cli-cann.Dockerfile b/.devops/llama-cli-cann.Dockerfile deleted file mode 100644 index 02dce501c..000000000 --- a/.devops/llama-cli-cann.Dockerfile +++ /dev/null @@ -1,44 +0,0 @@ -ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8 - -FROM ascendai/cann:$ASCEND_VERSION AS build - -WORKDIR /app - -COPY . . - -RUN yum install -y gcc g++ cmake make -ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest -ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH -ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH} -ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH} -ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH} -ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME} -ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp -ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit -ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME} - -# find libascend_hal.so, because the drive hasn`t been mounted. -ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH - -RUN echo "Building with static libs" && \ - source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \ - cmake --build build --config Release --target llama-cli - -# TODO: use image with NNRT -FROM ascendai/cann:$ASCEND_VERSION AS runtime -COPY --from=build /app/build/bin/llama-cli /llama-cli - -ENV LC_ALL=C.utf8 - -ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest -ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH -ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH} -ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH} -ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH} -ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME} -ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp -ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit -ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME} - -ENTRYPOINT ["/llama-cli" ] diff --git a/.devops/llama-cpp-cuda.srpm.spec b/.devops/llama-cpp-cuda.srpm.spec deleted file mode 100644 index 7425d3a9d..000000000 --- a/.devops/llama-cpp-cuda.srpm.spec +++ /dev/null @@ -1,83 +0,0 @@ -# SRPM for building from source and packaging an RPM for RPM-based distros. -# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages -# Built and maintained by John Boero - boeroboy@gmail.com -# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal - -# Notes for llama.cpp: -# 1. Tags are currently based on hash - which will not sort asciibetically. -# We need to declare standard versioning if people want to sort latest releases. -# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies. -# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed. -# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo -# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries. -# It is up to the user to install the correct vendor-specific support. - -Name: llama.cpp-cuda -Version: %( date "+%%Y%%m%%d" ) -Release: 1%{?dist} -Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL) -License: MIT -Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz -BuildRequires: coreutils make gcc-c++ git cuda-toolkit -Requires: cuda-toolkit -URL: https://github.com/ggerganov/llama.cpp - -%define debug_package %{nil} -%define source_date_epoch_from_changelog 0 - -%description -CPU inference for Meta's Lllama2 models using default options. - -%prep -%setup -n llama.cpp-master - -%build -make -j GGML_CUDA=1 - -%install -mkdir -p %{buildroot}%{_bindir}/ -cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli -cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server -cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple - -mkdir -p %{buildroot}/usr/lib/systemd/system -%{__cat} < %{buildroot}/usr/lib/systemd/system/llamacuda.service -[Unit] -Description=Llama.cpp server, CPU only (no GPU support in this build). -After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target - -[Service] -Type=simple -EnvironmentFile=/etc/sysconfig/llama -ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS -ExecReload=/bin/kill -s HUP $MAINPID -Restart=never - -[Install] -WantedBy=default.target -EOF - -mkdir -p %{buildroot}/etc/sysconfig -%{__cat} < %{buildroot}/etc/sysconfig/llama -LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin" -EOF - -%clean -rm -rf %{buildroot} -rm -rf %{_builddir}/* - -%files -%{_bindir}/llama-cuda-cli -%{_bindir}/llama-cuda-server -%{_bindir}/llama-cuda-simple -/usr/lib/systemd/system/llamacuda.service -%config /etc/sysconfig/llama - -%pre - -%post - -%preun -%postun - -%changelog diff --git a/.devops/llama-cpp.srpm.spec b/.devops/llama-cpp.srpm.spec deleted file mode 100644 index 4d5560089..000000000 --- a/.devops/llama-cpp.srpm.spec +++ /dev/null @@ -1,85 +0,0 @@ -# SRPM for building from source and packaging an RPM for RPM-based distros. -# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages -# Built and maintained by John Boero - boeroboy@gmail.com -# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal - -# Notes for llama.cpp: -# 1. Tags are currently based on hash - which will not sort asciibetically. -# We need to declare standard versioning if people want to sort latest releases. -# In the meantime, YYYYMMDD format will be used. -# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies. -# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed. -# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo -# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries. -# It is up to the user to install the correct vendor-specific support. - -Name: llama.cpp -Version: %( date "+%%Y%%m%%d" ) -Release: 1%{?dist} -Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL) -License: MIT -Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz -BuildRequires: coreutils make gcc-c++ git libstdc++-devel -Requires: libstdc++ -URL: https://github.com/ggerganov/llama.cpp - -%define debug_package %{nil} -%define source_date_epoch_from_changelog 0 - -%description -CPU inference for Meta's Lllama2 models using default options. -Models are not included in this package and must be downloaded separately. - -%prep -%setup -n llama.cpp-master - -%build -make -j - -%install -mkdir -p %{buildroot}%{_bindir}/ -cp -p llama-cli %{buildroot}%{_bindir}/llama-cli -cp -p llama-server %{buildroot}%{_bindir}/llama-server -cp -p llama-simple %{buildroot}%{_bindir}/llama-simple - -mkdir -p %{buildroot}/usr/lib/systemd/system -%{__cat} < %{buildroot}/usr/lib/systemd/system/llama.service -[Unit] -Description=Llama.cpp server, CPU only (no GPU support in this build). -After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target - -[Service] -Type=simple -EnvironmentFile=/etc/sysconfig/llama -ExecStart=/usr/bin/llama-server $LLAMA_ARGS -ExecReload=/bin/kill -s HUP $MAINPID -Restart=never - -[Install] -WantedBy=default.target -EOF - -mkdir -p %{buildroot}/etc/sysconfig -%{__cat} < %{buildroot}/etc/sysconfig/llama -LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin" -EOF - -%clean -rm -rf %{buildroot} -rm -rf %{_builddir}/* - -%files -%{_bindir}/llama-cli -%{_bindir}/llama-server -%{_bindir}/llama-simple -/usr/lib/systemd/system/llama.service -%config /etc/sysconfig/llama - -%pre - -%post - -%preun -%postun - -%changelog diff --git a/.devops/main.Dockerfile b/.devops/main.Dockerfile new file mode 100644 index 000000000..2e629f8ce --- /dev/null +++ b/.devops/main.Dockerfile @@ -0,0 +1,18 @@ +ARG UBUNTU_VERSION=22.04 + +FROM ubuntu:$UBUNTU_VERSION as build + +RUN apt-get update && \ + apt-get install -y build-essential + +WORKDIR /app + +COPY . . + +RUN make + +FROM ubuntu:$UBUNTU_VERSION as runtime + +COPY --from=build /app/main /main + +ENTRYPOINT [ "/main" ] diff --git a/.devops/musa.Dockerfile b/.devops/musa.Dockerfile deleted file mode 100644 index bfd7fc1c1..000000000 --- a/.devops/musa.Dockerfile +++ /dev/null @@ -1,108 +0,0 @@ -ARG UBUNTU_VERSION=22.04 -# This needs to generally match the container host's environment. -ARG MUSA_VERSION=rc3.1.0 -# Target the MUSA build image -ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION} - -ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION} - -FROM ${BASE_MUSA_DEV_CONTAINER} AS build - -# MUSA architecture to build for (defaults to all supported archs) -ARG MUSA_DOCKER_ARCH=default - -RUN apt-get update && \ - apt-get install -y \ - build-essential \ - cmake \ - python3 \ - python3-pip \ - git \ - libcurl4-openssl-dev \ - libgomp1 - -COPY requirements.txt requirements.txt -COPY requirements requirements - -RUN pip install --upgrade pip setuptools wheel \ - && pip install -r requirements.txt - -WORKDIR /app - -COPY . . - -# Use the default MUSA archs if not specified -RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \ - export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \ - fi && \ - cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \ - cmake --build build --config Release -j$(nproc) - -RUN mkdir -p /app/lib && \ - find build -name "*.so" -exec cp {} /app/lib \; - -RUN mkdir -p /app/full \ - && cp build/bin/* /app/full \ - && cp *.py /app/full \ - && cp -r gguf-py /app/full \ - && cp -r requirements /app/full \ - && cp requirements.txt /app/full \ - && cp .devops/tools.sh /app/full/tools.sh - -## Base image -FROM ${BASE_MUSA_RUN_CONTAINER} AS base - -RUN apt-get update \ - && apt-get install -y libgomp1 curl\ - && apt autoremove -y \ - && apt clean -y \ - && rm -rf /tmp/* /var/tmp/* \ - && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ - && find /var/cache -type f -delete - -COPY --from=build /app/lib/ /app - -### Full -FROM base AS full - -COPY --from=build /app/full /app - -WORKDIR /app - -RUN apt-get update \ - && apt-get install -y \ - git \ - python3 \ - python3-pip \ - && pip install --upgrade pip setuptools wheel \ - && pip install -r requirements.txt \ - && apt autoremove -y \ - && apt clean -y \ - && rm -rf /tmp/* /var/tmp/* \ - && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ - && find /var/cache -type f -delete - - -ENTRYPOINT ["/app/tools.sh"] - -### Light, CLI only -FROM base AS light - -COPY --from=build /app/full/llama-cli /app - -WORKDIR /app - -ENTRYPOINT [ "/app/llama-cli" ] - -### Server, Server only -FROM base AS server - -ENV LLAMA_ARG_HOST=0.0.0.0 - -COPY --from=build /app/full/llama-server /app - -WORKDIR /app - -HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] - -ENTRYPOINT [ "/app/llama-server" ] diff --git a/.devops/nix/apps.nix b/.devops/nix/apps.nix deleted file mode 100644 index 0ecf19fc5..000000000 --- a/.devops/nix/apps.nix +++ /dev/null @@ -1,21 +0,0 @@ -{ - perSystem = - { config, lib, ... }: - { - apps = - let - inherit (config.packages) default; - binaries = [ - "llama-cli" - "llama-embedding" - "llama-server" - "llama-quantize" - ]; - mkApp = name: { - type = "app"; - program = "${default}/bin/${name}"; - }; - in - lib.genAttrs binaries mkApp; - }; -} diff --git a/.devops/nix/devshells.nix b/.devops/nix/devshells.nix deleted file mode 100644 index bfd304af1..000000000 --- a/.devops/nix/devshells.nix +++ /dev/null @@ -1,52 +0,0 @@ -{ inputs, ... }: - -{ - perSystem = - { - config, - lib, - system, - ... - }: - { - devShells = - let - pkgs = import inputs.nixpkgs { inherit system; }; - stdenv = pkgs.stdenv; - scripts = config.packages.python-scripts; - in - lib.pipe (config.packages) [ - (lib.concatMapAttrs ( - name: package: { - ${name} = pkgs.mkShell { - name = "${name}"; - inputsFrom = [ package ]; - shellHook = '' - echo "Entering ${name} devShell" - ''; - }; - "${name}-extra" = - if (name == "python-scripts") then - null - else - pkgs.mkShell { - name = "${name}-extra"; - inputsFrom = [ - package - scripts - ]; - # Extra packages that *may* be used by some scripts - packages = [ - pkgs.python3Packages.tiktoken - ]; - shellHook = '' - echo "Entering ${name} devShell" - addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib" - ''; - }; - } - )) - (lib.filterAttrs (name: value: value != null)) - ]; - }; -} diff --git a/.devops/nix/docker.nix b/.devops/nix/docker.nix deleted file mode 100644 index d607b4575..000000000 --- a/.devops/nix/docker.nix +++ /dev/null @@ -1,37 +0,0 @@ -{ - lib, - dockerTools, - buildEnv, - llama-cpp, - interactive ? true, - coreutils, -}: - -# A tar that can be fed into `docker load`: -# -# $ nix build .#llamaPackages.docker -# $ docker load < result - -# For details and variations cf. -# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage -# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922 -# - https://nixery.dev/ - -# Approximate (compressed) sizes, at the time of writing, are: -# -# .#llamaPackages.docker: 125M; -# .#llamaPackagesCuda.docker: 537M; -# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M. - -dockerTools.buildLayeredImage { - name = llama-cpp.pname; - tag = "latest"; - - contents = - [ llama-cpp ] - ++ lib.optionals interactive [ - coreutils - dockerTools.binSh - dockerTools.caCertificates - ]; -} diff --git a/.devops/nix/jetson-support.nix b/.devops/nix/jetson-support.nix deleted file mode 100644 index 78e2e40e0..000000000 --- a/.devops/nix/jetson-support.nix +++ /dev/null @@ -1,39 +0,0 @@ -{ inputs, ... }: -{ - perSystem = - { - config, - system, - lib, - pkgsCuda, - ... - }: - { - legacyPackages = - let - caps.llamaPackagesXavier = "7.2"; - caps.llamaPackagesOrin = "8.7"; - caps.llamaPackagesTX2 = "6.2"; - caps.llamaPackagesNano = "5.3"; - - pkgsFor = - cap: - import inputs.nixpkgs { - inherit system; - config = { - cudaSupport = true; - cudaCapabilities = [ cap ]; - cudaEnableForwardCompat = false; - inherit (pkgsCuda.config) allowUnfreePredicate; - }; - }; - in - builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps; - - packages = lib.optionalAttrs (system == "aarch64-linux") { - jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp; - jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp; - jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp; - }; - }; -} diff --git a/.devops/nix/nixpkgs-instances.nix b/.devops/nix/nixpkgs-instances.nix deleted file mode 100644 index 90d683a71..000000000 --- a/.devops/nix/nixpkgs-instances.nix +++ /dev/null @@ -1,45 +0,0 @@ -{ inputs, ... }: -{ - # The _module.args definitions are passed on to modules as arguments. E.g. - # the module `{ pkgs ... }: { /* config */ }` implicitly uses - # `_module.args.pkgs` (defined in this case by flake-parts). - perSystem = - { system, ... }: - { - _module.args = { - # Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs - # again, the below creates several nixpkgs instances which the - # flake-centric CLI will be forced to evaluate e.g. on `nix flake show`. - # - # This is currently "slow" and "expensive", on a certain scale. - # This also isn't "right" in that this hinders dependency injection at - # the level of flake inputs. This might get removed in the foreseeable - # future. - # - # Note that you can use these expressions without Nix - # (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point). - - pkgsCuda = import inputs.nixpkgs { - inherit system; - # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc, - # and ucx are built with CUDA support) - config.cudaSupport = true; - config.allowUnfreePredicate = - p: - builtins.all ( - license: - license.free - || builtins.elem license.shortName [ - "CUDA EULA" - "cuDNN EULA" - ] - ) (p.meta.licenses or [ p.meta.license ]); - }; - # Ensure dependencies use ROCm consistently - pkgsRocm = import inputs.nixpkgs { - inherit system; - config.rocmSupport = true; - }; - }; - }; -} diff --git a/.devops/nix/package-gguf-py.nix b/.devops/nix/package-gguf-py.nix deleted file mode 100644 index cca2f36a5..000000000 --- a/.devops/nix/package-gguf-py.nix +++ /dev/null @@ -1,36 +0,0 @@ -{ - lib, - llamaVersion, - numpy, - tqdm, - sentencepiece, - pyyaml, - poetry-core, - buildPythonPackage, - pytestCheckHook, -}: - -buildPythonPackage { - pname = "gguf"; - version = llamaVersion; - pyproject = true; - nativeBuildInputs = [ poetry-core ]; - propagatedBuildInputs = [ - numpy - tqdm - sentencepiece - pyyaml - ]; - src = lib.cleanSource ../../gguf-py; - pythonImportsCheck = [ - "numpy" - "gguf" - ]; - nativeCheckInputs = [ pytestCheckHook ]; - doCheck = true; - meta = with lib; { - description = "Python package for writing binary files in the GGUF format"; - license = licenses.mit; - maintainers = [ maintainers.ditsuke ]; - }; -} diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix deleted file mode 100644 index 043c4364b..000000000 --- a/.devops/nix/package.nix +++ /dev/null @@ -1,247 +0,0 @@ -{ - lib, - glibc, - config, - stdenv, - runCommand, - cmake, - ninja, - pkg-config, - git, - mpi, - blas, - cudaPackages, - autoAddDriverRunpath, - darwin, - rocmPackages, - vulkan-headers, - vulkan-loader, - curl, - shaderc, - useBlas ? - builtins.all (x: !x) [ - useCuda - useMetalKit - useRocm - useVulkan - ] - && blas.meta.available, - useCuda ? config.cudaSupport, - useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin, - # Increases the runtime closure size by ~700M - useMpi ? false, - useRocm ? config.rocmSupport, - rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets, - enableCurl ? true, - useVulkan ? false, - llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake - - # It's necessary to consistently use backendStdenv when building with CUDA support, - # otherwise we get libstdc++ errors downstream. - effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv, - enableStatic ? effectiveStdenv.hostPlatform.isStatic, - precompileMetalShaders ? false, -}: - -let - inherit (lib) - cmakeBool - cmakeFeature - optionals - strings - ; - - stdenv = throw "Use effectiveStdenv instead"; - - suffices = - lib.optionals useBlas [ "BLAS" ] - ++ lib.optionals useCuda [ "CUDA" ] - ++ lib.optionals useMetalKit [ "MetalKit" ] - ++ lib.optionals useMpi [ "MPI" ] - ++ lib.optionals useRocm [ "ROCm" ] - ++ lib.optionals useVulkan [ "Vulkan" ]; - - pnameSuffix = - strings.optionalString (suffices != [ ]) - "-${strings.concatMapStringsSep "-" strings.toLower suffices}"; - descriptionSuffix = strings.optionalString ( - suffices != [ ] - ) ", accelerated with ${strings.concatStringsSep ", " suffices}"; - - xcrunHost = runCommand "xcrunHost" { } '' - mkdir -p $out/bin - ln -s /usr/bin/xcrun $out/bin - ''; - - # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64 - # separately - darwinBuildInputs = - with darwin.apple_sdk.frameworks; - [ - Accelerate - CoreVideo - CoreGraphics - ] - ++ optionals useMetalKit [ MetalKit ]; - - cudaBuildInputs = with cudaPackages; [ - cuda_cudart - cuda_cccl # - libcublas - ]; - - rocmBuildInputs = with rocmPackages; [ - clr - hipblas - rocblas - ]; - - vulkanBuildInputs = [ - vulkan-headers - vulkan-loader - shaderc - ]; -in - -effectiveStdenv.mkDerivation (finalAttrs: { - pname = "llama-cpp${pnameSuffix}"; - version = llamaVersion; - - # Note: none of the files discarded here are visible in the sandbox or - # affect the output hash. This also means they can be modified without - # triggering a rebuild. - src = lib.cleanSourceWith { - filter = - name: type: - let - noneOf = builtins.all (x: !x); - baseName = baseNameOf name; - in - noneOf [ - (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths - (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths - (lib.hasPrefix "." baseName) # Skip hidden files and directories - (baseName == "flake.lock") - ]; - src = lib.cleanSource ../../.; - }; - - postPatch = '' - substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \ - --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" - substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \ - --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";" - ''; - - # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015, - # `default.metallib` may be compiled with Metal compiler from XCode - # and we need to escape sandbox on MacOS to access Metal compiler. - # `xcrun` is used find the path of the Metal compiler, which is varible - # and not on $PATH - # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion - __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders; - - nativeBuildInputs = - [ - cmake - ninja - pkg-config - git - ] - ++ optionals useCuda [ - cudaPackages.cuda_nvcc - - autoAddDriverRunpath - ] - ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ] - ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ]; - - buildInputs = - optionals effectiveStdenv.isDarwin darwinBuildInputs - ++ optionals useCuda cudaBuildInputs - ++ optionals useMpi [ mpi ] - ++ optionals useRocm rocmBuildInputs - ++ optionals useBlas [ blas ] - ++ optionals useVulkan vulkanBuildInputs - ++ optionals enableCurl [ curl ]; - - cmakeFlags = - [ - (cmakeBool "LLAMA_BUILD_SERVER" true) - (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic)) - (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) - (cmakeBool "LLAMA_CURL" enableCurl) - (cmakeBool "GGML_NATIVE" false) - (cmakeBool "GGML_BLAS" useBlas) - (cmakeBool "GGML_CUDA" useCuda) - (cmakeBool "GGML_HIP" useRocm) - (cmakeBool "GGML_METAL" useMetalKit) - (cmakeBool "GGML_VULKAN" useVulkan) - (cmakeBool "GGML_STATIC" enableStatic) - ] - ++ optionals useCuda [ - ( - with cudaPackages.flags; - cmakeFeature "CMAKE_CUDA_ARCHITECTURES" ( - builtins.concatStringsSep ";" (map dropDot cudaCapabilities) - ) - ) - ] - ++ optionals useRocm [ - (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang") - (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets) - ] - ++ optionals useMetalKit [ - (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") - (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders)) - ]; - - # Environment variables needed for ROCm - env = optionals useRocm { - ROCM_PATH = "${rocmPackages.clr}"; - HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode"; - }; - - # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, - # if they haven't been added yet. - postInstall = '' - mkdir -p $out/include - cp $src/include/llama.h $out/include/ - ''; - - meta = { - # Configurations we don't want even the CI to evaluate. Results in the - # "unsupported platform" messages. This is mostly a no-op, because - # cudaPackages would've refused to evaluate anyway. - badPlatforms = optionals useCuda lib.platforms.darwin; - - # Configurations that are known to result in build failures. Can be - # overridden by importing Nixpkgs with `allowBroken = true`. - broken = (useMetalKit && !effectiveStdenv.isDarwin); - - description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; - homepage = "https://github.com/ggerganov/llama.cpp/"; - license = lib.licenses.mit; - - # Accommodates `nix run` and `lib.getExe` - mainProgram = "llama-cli"; - - # These people might respond, on the best effort basis, if you ping them - # in case of Nix-specific regressions or for reviewing Nix-specific PRs. - # Consider adding yourself to this list if you want to ensure this flake - # stays maintained and you're willing to invest your time. Do not add - # other people without their consent. Consider removing people after - # they've been unreachable for long periods of time. - - # Note that lib.maintainers is defined in Nixpkgs, but you may just add - # an attrset following the same format as in - # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix - maintainers = with lib.maintainers; [ - philiptaron - SomeoneSerge - ]; - - # Extend `badPlatforms` instead - platforms = lib.platforms.all; - }; -}) diff --git a/.devops/nix/python-scripts.nix b/.devops/nix/python-scripts.nix deleted file mode 100644 index 56ea18278..000000000 --- a/.devops/nix/python-scripts.nix +++ /dev/null @@ -1,66 +0,0 @@ -{ - lib, - stdenv, - buildPythonPackage, - poetry-core, - mkShell, - python3Packages, - gguf-py, -}@inputs: - -let - llama-python-deps = with python3Packages; [ - numpy - sentencepiece - transformers - protobuf - torchWithoutCuda - gguf-py - tqdm - - # for scripts/compare-llama-bench.py - gitpython - tabulate - - # for examples/pydantic-models-to-grammar-examples.py - docstring-parser - pydantic - - ]; - - llama-python-test-deps = with python3Packages; [ - # Server bench - matplotlib - - # server tests - openai - pytest - prometheus-client - ]; -in - -buildPythonPackage ({ - pname = "llama-scripts"; - version = "0.0.0"; - pyproject = true; - - # NOTE: The files filtered out here are not visible in the build sandbox, neither - # do they affect the output hash. They can be modified without triggering a rebuild. - src = lib.cleanSourceWith { - filter = - name: type: - let - any = builtins.any (x: x); - baseName = builtins.baseNameOf name; - in - any [ - (lib.hasSuffix ".py" name) - (baseName == "README.md") - (baseName == "pyproject.toml") - ]; - src = lib.cleanSource ../../.; - }; - nativeBuildInputs = [ poetry-core ]; - nativeCheckInputs = llama-python-test-deps; - dependencies = llama-python-deps; -}) diff --git a/.devops/nix/scope.nix b/.devops/nix/scope.nix deleted file mode 100644 index 478e8c422..000000000 --- a/.devops/nix/scope.nix +++ /dev/null @@ -1,41 +0,0 @@ -{ - lib, - newScope, - python3, - llamaVersion ? "0.0.0", -}: - -let - pythonPackages = python3.pkgs; - buildPythonPackage = pythonPackages.buildPythonPackage; - numpy = pythonPackages.numpy; - tqdm = pythonPackages.tqdm; - sentencepiece = pythonPackages.sentencepiece; - pyyaml = pythonPackages.pyyaml; - poetry-core = pythonPackages.poetry-core; - pytestCheckHook = pythonPackages.pytestCheckHook; -in - -# We're using `makeScope` instead of just writing out an attrset -# because it allows users to apply overlays later using `overrideScope'`. -# Cf. https://noogle.dev/f/lib/makeScope - -lib.makeScope newScope (self: { - inherit llamaVersion; - gguf-py = self.callPackage ./package-gguf-py.nix { - inherit - buildPythonPackage - numpy - tqdm - sentencepiece - poetry-core - pyyaml - pytestCheckHook - ; - }; - python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; }; - llama-cpp = self.callPackage ./package.nix { }; - docker = self.callPackage ./docker.nix { }; - docker-min = self.callPackage ./docker.nix { interactive = false; }; - sif = self.callPackage ./sif.nix { }; -}) diff --git a/.devops/nix/sif.nix b/.devops/nix/sif.nix deleted file mode 100644 index 7a5e1dd0f..000000000 --- a/.devops/nix/sif.nix +++ /dev/null @@ -1,27 +0,0 @@ -{ - lib, - singularity-tools, - llama-cpp, - bashInteractive, - interactive ? false, -}: - -let - optionalInt = cond: x: if cond then x else 0; -in -singularity-tools.buildImage rec { - inherit (llama-cpp) name; - contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ]; - - # These are excessive (but safe) for most variants. Building singularity - # images requires superuser privileges, so we build them inside a VM in a - # writable image of pre-determined size. - # - # ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846 - # - # Expected image sizes: - # - cpu/blas: 150M, - # - cuda, all gencodes: 560M, - diskSize = 4096 + optionalInt llama-cpp.useRocm 16384; - memSize = diskSize; -} diff --git a/.devops/rocm.Dockerfile b/.devops/rocm.Dockerfile deleted file mode 100644 index a8088ea00..000000000 --- a/.devops/rocm.Dockerfile +++ /dev/null @@ -1,113 +0,0 @@ -ARG UBUNTU_VERSION=24.04 - -# This needs to generally match the container host's environment. -ARG ROCM_VERSION=6.3 -ARG AMDGPU_VERSION=6.3 - -# Target the CUDA build image -ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete - -### Build image -FROM ${BASE_ROCM_DEV_CONTAINER} AS build - -# Unless otherwise specified, we make a fat build. -# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878 -# This is mostly tied to rocBLAS supported archs. -# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported -# gfx906 is deprecated -#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html - -#ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102' -ARG ROCM_DOCKER_ARCH=gfx1100 - -# Set nvcc architectured -ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH} -# Enable ROCm -# ENV CC=/opt/rocm/llvm/bin/clang -# ENV CXX=/opt/rocm/llvm/bin/clang++ - -RUN apt-get update \ - && apt-get install -y \ - build-essential \ - cmake \ - git \ - libcurl4-openssl-dev \ - curl \ - libgomp1 - -WORKDIR /app - -COPY . . - -RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \ - cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \ - && cmake --build build --config Release -j$(nproc) - -RUN mkdir -p /app/lib \ - && find build -name "*.so" -exec cp {} /app/lib \; - -RUN mkdir -p /app/full \ - && cp build/bin/* /app/full \ - && cp *.py /app/full \ - && cp -r gguf-py /app/full \ - && cp -r requirements /app/full \ - && cp requirements.txt /app/full \ - && cp .devops/tools.sh /app/full/tools.sh - -## Base image -FROM ${BASE_ROCM_DEV_CONTAINER} AS base - -RUN apt-get update \ - && apt-get install -y libgomp1 curl\ - && apt autoremove -y \ - && apt clean -y \ - && rm -rf /tmp/* /var/tmp/* \ - && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ - && find /var/cache -type f -delete - -COPY --from=build /app/lib/ /app - -### Full -FROM base AS full - -COPY --from=build /app/full /app - -WORKDIR /app - -RUN apt-get update \ - && apt-get install -y \ - git \ - python3-pip \ - python3 \ - python3-wheel\ - && pip install --break-system-packages --upgrade setuptools \ - && pip install --break-system-packages -r requirements.txt \ - && apt autoremove -y \ - && apt clean -y \ - && rm -rf /tmp/* /var/tmp/* \ - && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ - && find /var/cache -type f -delete - -ENTRYPOINT ["/app/tools.sh"] - -### Light, CLI only -FROM base AS light - -COPY --from=build /app/full/llama-cli /app - -WORKDIR /app - -ENTRYPOINT [ "/app/llama-cli" ] - -### Server, Server only -FROM base AS server - -ENV LLAMA_ARG_HOST=0.0.0.0 - -COPY --from=build /app/full/llama-server /app - -WORKDIR /app - -HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] - -ENTRYPOINT [ "/app/llama-server" ] diff --git a/.devops/tools.sh b/.devops/tools.sh index 41a6b1e55..b0196b60d 100755 --- a/.devops/tools.sh +++ b/.devops/tools.sh @@ -7,43 +7,34 @@ arg1="$1" # Shift the arguments to remove the first one shift -if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then - exec python3 ./convert_hf_to_gguf.py "$@" -elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then - exec ./llama-quantize "$@" -elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then - exec ./llama-cli "$@" -elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then - exec ./llama-bench "$@" -elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then - exec ./llama-perplexity "$@" -elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then +# Join the remaining arguments into a single string +arg2="$@" + +if [[ $arg1 == '--convert' || $arg1 == '-c' ]]; then + python3 ./convert-pth-to-ggml.py $arg2 +elif [[ $arg1 == '--quantize' || $arg1 == '-q' ]]; then + ./quantize $arg2 +elif [[ $arg1 == '--run' || $arg1 == '-r' ]]; then + ./main $arg2 +elif [[ $arg1 == '--all-in-one' || $arg1 == '-a' ]]; then echo "Converting PTH to GGML..." - for i in $(ls $1/$2/ggml-model-f16.bin*); do + for i in `ls $1/$2/ggml-model-f16.bin*`; do if [ -f "${i/f16/q4_0}" ]; then echo "Skip model quantization, it already exists: ${i/f16/q4_0}" else echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..." - exec ./llama-quantize "$i" "${i/f16/q4_0}" q4_0 + ./quantize "$i" "${i/f16/q4_0}" 2 fi done -elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then - exec ./llama-server "$@" else echo "Unknown command: $arg1" echo "Available commands: " echo " --run (-r): Run a model previously converted into ggml" echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512" - echo " --bench (-b): Benchmark the performance of the inference for various parameters." - echo " ex: -m model.gguf" - echo " --perplexity (-p): Measure the perplexity of a model over a given text." - echo " ex: -m model.gguf -f file.txt" echo " --convert (-c): Convert a llama model into ggml" - echo " ex: --outtype f16 \"/models/7B/\" " + echo " ex: \"/models/7B/\" 1" echo " --quantize (-q): Optimize with quantization process ggml" echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2" echo " --all-in-one (-a): Execute --convert & --quantize" echo " ex: \"/models/\" 7B" - echo " --server (-s): Run a model on the server" - echo " ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080" fi diff --git a/.devops/vulkan.Dockerfile b/.devops/vulkan.Dockerfile deleted file mode 100644 index 9064f3838..000000000 --- a/.devops/vulkan.Dockerfile +++ /dev/null @@ -1,89 +0,0 @@ -ARG UBUNTU_VERSION=24.04 - -FROM ubuntu:$UBUNTU_VERSION AS build - -# Install build tools -RUN apt update && apt install -y git build-essential cmake wget - -# Install Vulkan SDK and cURL -RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \ - wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \ - apt update -y && \ - apt-get install -y vulkan-sdk libcurl4-openssl-dev curl - -# Build it -WORKDIR /app - -COPY . . - -RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \ - cmake --build build --config Release -j$(nproc) - -RUN mkdir -p /app/lib && \ - find build -name "*.so" -exec cp {} /app/lib \; - -RUN mkdir -p /app/full \ - && cp build/bin/* /app/full \ - && cp *.py /app/full \ - && cp -r gguf-py /app/full \ - && cp -r requirements /app/full \ - && cp requirements.txt /app/full \ - && cp .devops/tools.sh /app/full/tools.sh - -## Base image -FROM ubuntu:$UBUNTU_VERSION AS base - -RUN apt-get update \ - && apt-get install -y libgomp1 curl libvulkan-dev \ - && apt autoremove -y \ - && apt clean -y \ - && rm -rf /tmp/* /var/tmp/* \ - && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ - && find /var/cache -type f -delete - -COPY --from=build /app/lib/ /app - -### Full -FROM base AS full - -COPY --from=build /app/full /app - -WORKDIR /app - -RUN apt-get update \ - && apt-get install -y \ - git \ - python3 \ - python3-pip \ - python3-wheel \ - && pip install --break-system-packages --upgrade setuptools \ - && pip install --break-system-packages -r requirements.txt \ - && apt autoremove -y \ - && apt clean -y \ - && rm -rf /tmp/* /var/tmp/* \ - && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \ - && find /var/cache -type f -delete - -ENTRYPOINT ["/app/tools.sh"] - -### Light, CLI only -FROM base AS light - -COPY --from=build /app/full/llama-cli /app - -WORKDIR /app - -ENTRYPOINT [ "/app/llama-cli" ] - -### Server, Server only -FROM base AS server - -ENV LLAMA_ARG_HOST=0.0.0.0 - -COPY --from=build /app/full/llama-server /app - -WORKDIR /app - -HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ] - -ENTRYPOINT [ "/app/llama-server" ] diff --git a/.dockerignore b/.dockerignore index 064b7c7be..462fac23a 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,19 +1,23 @@ *.o *.a .cache/ -# Do not ignore .git directory, otherwise the reported build number will always be 0 -.github/ -.gitignore .vs/ .vscode/ .DS_Store -build*/ +build/ +build-em/ +build-debug/ +build-release/ +build-static/ +build-no-accel/ +build-sanitize-addr/ +build-sanitize-thread/ models/* -/llama-cli -/llama-quantize +/main +/quantize arm_neon.h compile_commands.json diff --git a/.ecrc b/.ecrc index c68877ec2..b682057dd 100644 --- a/.ecrc +++ b/.ecrc @@ -1,5 +1,4 @@ { - "Exclude": ["^\\.gitmodules$", "stb_image\\.h"], "Disable": { "IndentSize": true } diff --git a/.editorconfig b/.editorconfig index 5d63d0a51..135a7e4bc 100644 --- a/.editorconfig +++ b/.editorconfig @@ -15,36 +15,5 @@ indent_size = 4 [Makefile] indent_style = tab -[scripts/*.mk] -indent_style = tab - [prompts/*.txt] insert_final_newline = unset - -[examples/server/public/*] -indent_size = 2 - -[examples/server/public/deps_*] -trim_trailing_whitespace = unset -indent_style = unset -indent_size = unset - -[examples/server/deps_*] -trim_trailing_whitespace = unset -indent_style = unset -indent_size = unset - -[examples/llama.swiftui/llama.swiftui.xcodeproj/*] -indent_style = tab - -[examples/cvector-generator/*.txt] -trim_trailing_whitespace = unset -insert_final_newline = unset - -[models/templates/*.jinja] -indent_style = unset -indent_size = unset -end_of_line = unset -charset = unset -trim_trailing_whitespace = unset -insert_final_newline = unset diff --git a/.flake8 b/.flake8 deleted file mode 100644 index d64c2564a..000000000 --- a/.flake8 +++ /dev/null @@ -1,17 +0,0 @@ -[flake8] -max-line-length = 125 -ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503 -exclude = - # Do not traverse examples - examples, - # Do not include package initializers - __init__.py, - # No need to traverse our git directory - .git, - # There's no value in checking cache directories - __pycache__, - # No need to include the build path - build, - # This contains builds that we don't want to check - dist # This is generated with `python build .` for package releases -# max-complexity = 10 diff --git a/.github/ISSUE_TEMPLATE/010-bug-compilation.yml b/.github/ISSUE_TEMPLATE/010-bug-compilation.yml deleted file mode 100644 index b85bf5741..000000000 --- a/.github/ISSUE_TEMPLATE/010-bug-compilation.yml +++ /dev/null @@ -1,87 +0,0 @@ -name: Bug (compilation) -description: Something goes wrong when trying to compile llama.cpp. -title: "Compile bug: " -labels: ["bug-unconfirmed", "compilation"] -body: - - type: markdown - attributes: - value: > - Thanks for taking the time to fill out this bug report! - This issue template is intended for bug reports where the compilation of llama.cpp fails. - Before opening an issue, please confirm that the compilation still fails with `-DGGML_CCACHE=OFF`. - If the compilation succeeds with ccache disabled you should be able to permanently fix the issue - by clearing `~/.cache/ccache` (on Linux). - - type: textarea - id: commit - attributes: - label: Git commit - description: Which commit are you trying to compile? - placeholder: | - $git rev-parse HEAD - 84a07a17b1b08cf2b9747c633a2372782848a27f - validations: - required: true - - type: dropdown - id: operating-system - attributes: - label: Operating systems - description: Which operating systems do you know to be affected? - multiple: true - options: - - Linux - - Mac - - Windows - - BSD - - Other? (Please let us know in description) - validations: - required: true - - type: dropdown - id: backends - attributes: - label: GGML backends - description: Which GGML backends do you know to be affected? - options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan] - multiple: true - validations: - required: true - - type: textarea - id: info - attributes: - label: Problem description & steps to reproduce - description: > - Please give us a summary of the problem and tell us how to reproduce it. - If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us. - placeholder: > - I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY. - Here are the exact commands that I used: ... - validations: - required: true - - type: textarea - id: first_bad_commit - attributes: - label: First Bad Commit - description: > - If the bug was not present on an earlier version: when did it start appearing? - If possible, please do a git bisect and identify the exact commit that introduced the bug. - validations: - required: false - - type: textarea - id: command - attributes: - label: Compile command - description: > - Please provide the exact command you used to compile llama.cpp. For example: `cmake -B ...`. - This will be automatically formatted into code, so no need for backticks. - render: shell - validations: - required: true - - type: textarea - id: logs - attributes: - label: Relevant log output - description: > - Please copy and paste any relevant log output, including any generated text. - This will be automatically formatted into code, so no need for backticks. - render: shell - validations: - required: true diff --git a/.github/ISSUE_TEMPLATE/011-bug-results.yml b/.github/ISSUE_TEMPLATE/011-bug-results.yml deleted file mode 100644 index 1ccef0793..000000000 --- a/.github/ISSUE_TEMPLATE/011-bug-results.yml +++ /dev/null @@ -1,101 +0,0 @@ -name: Bug (model use) -description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module). -title: "Eval bug: " -labels: ["bug-unconfirmed", "model evaluation"] -body: - - type: markdown - attributes: - value: > - Thanks for taking the time to fill out this bug report! - This issue template is intended for bug reports where the model evaluation results - (i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation. - If you encountered the issue while using an external UI (e.g. ollama), - please reproduce your issue using one of the examples/binaries in this repository. - The `llama-cli` binary can be used for simple and reproducible model inference. - - type: textarea - id: version - attributes: - label: Name and Version - description: Which version of our software are you running? (use `--version` to get a version string) - placeholder: | - $./llama-cli --version - version: 2999 (42b4109e) - built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu - validations: - required: true - - type: dropdown - id: operating-system - attributes: - label: Operating systems - description: Which operating systems do you know to be affected? - multiple: true - options: - - Linux - - Mac - - Windows - - BSD - - Other? (Please let us know in description) - validations: - required: true - - type: dropdown - id: backends - attributes: - label: GGML backends - description: Which GGML backends do you know to be affected? - options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan] - multiple: true - validations: - required: true - - type: textarea - id: hardware - attributes: - label: Hardware - description: Which CPUs/GPUs are you using? - placeholder: > - e.g. Ryzen 5950X + 2x RTX 4090 - validations: - required: true - - type: textarea - id: model - attributes: - label: Models - description: > - Which model(s) at which quantization were you using when encountering the bug? - If you downloaded a GGUF file off of Huggingface, please provide a link. - placeholder: > - e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M - validations: - required: false - - type: textarea - id: info - attributes: - label: Problem description & steps to reproduce - description: > - Please give us a summary of the problem and tell us how to reproduce it. - If you can narrow down the bug to specific hardware, compile flags, or command line arguments, - that information would be very much appreciated by us. - placeholder: > - e.g. when I run llama-cli with -ngl 99 I get garbled outputs. - When I use -ngl 0 it works correctly. - Here are the exact commands that I used: ... - validations: - required: true - - type: textarea - id: first_bad_commit - attributes: - label: First Bad Commit - description: > - If the bug was not present on an earlier version: when did it start appearing? - If possible, please do a git bisect and identify the exact commit that introduced the bug. - validations: - required: false - - type: textarea - id: logs - attributes: - label: Relevant log output - description: > - Please copy and paste any relevant log output, including the command that you entered and any generated text. - This will be automatically formatted into code, so no need for backticks. - render: shell - validations: - required: true diff --git a/.github/ISSUE_TEMPLATE/019-bug-misc.yml b/.github/ISSUE_TEMPLATE/019-bug-misc.yml deleted file mode 100644 index 1904e31fd..000000000 --- a/.github/ISSUE_TEMPLATE/019-bug-misc.yml +++ /dev/null @@ -1,91 +0,0 @@ -name: Bug (misc.) -description: Something is not working the way it should (and it's not covered by any of the above cases). -title: "Misc. bug: " -labels: ["bug-unconfirmed"] -body: - - type: markdown - attributes: - value: > - Thanks for taking the time to fill out this bug report! - This issue template is intended for miscellaneous bugs that don't fit into any other category. - If you encountered the issue while using an external UI (e.g. ollama), - please reproduce your issue using one of the examples/binaries in this repository. - - type: textarea - id: version - attributes: - label: Name and Version - description: Which version of our software is affected? (You can use `--version` to get a version string.) - placeholder: | - $./llama-cli --version - version: 2999 (42b4109e) - built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu - validations: - required: true - - type: dropdown - id: operating-system - attributes: - label: Operating systems - description: Which operating systems do you know to be affected? - multiple: true - options: - - Linux - - Mac - - Windows - - BSD - - Other? (Please let us know in description) - validations: - required: false - - type: dropdown - id: module - attributes: - label: Which llama.cpp modules do you know to be affected? - multiple: true - options: - - Documentation/Github - - libllama (core library) - - llama-cli - - llama-server - - llama-bench - - llama-quantize - - Python/Bash scripts - - Test code - - Other (Please specify in the next section) - validations: - required: false - - type: textarea - id: command - attributes: - label: Command line - description: > - Please provide the exact commands you entered, if applicable. For example: `llama-server -m ... -c ...`, `llama-cli -m ...`, etc. - This will be automatically formatted into code, so no need for backticks. - render: shell - validations: - required: false - - type: textarea - id: info - attributes: - label: Problem description & steps to reproduce - description: > - Please give us a summary of the problem and tell us how to reproduce it (if applicable). - validations: - required: true - - type: textarea - id: first_bad_commit - attributes: - label: First Bad Commit - description: > - If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing? - If possible, please do a git bisect and identify the exact commit that introduced the bug. - validations: - required: false - - type: textarea - id: logs - attributes: - label: Relevant log output - description: > - If applicable, please copy and paste any relevant log output, including any generated text. - This will be automatically formatted into code, so no need for backticks. - render: shell - validations: - required: false diff --git a/.github/ISSUE_TEMPLATE/020-enhancement.yml b/.github/ISSUE_TEMPLATE/020-enhancement.yml deleted file mode 100644 index 02dd4f575..000000000 --- a/.github/ISSUE_TEMPLATE/020-enhancement.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: Enhancement -description: Used to request enhancements for llama.cpp. -title: "Feature Request: " -labels: ["enhancement"] -body: - - type: markdown - attributes: - value: | - [Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggerganov/llama.cpp/discussions/categories/ideas) - - - type: checkboxes - id: prerequisites - attributes: - label: Prerequisites - description: Please confirm the following before submitting your enhancement request. - options: - - label: I am running the latest code. Mention the version if possible as well. - required: true - - label: I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md). - required: true - - label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed). - required: true - - label: I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new and useful enhancement to share. - required: true - - - type: textarea - id: feature-description - attributes: - label: Feature Description - description: Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement. - placeholder: Detailed description of the enhancement - validations: - required: true - - - type: textarea - id: motivation - attributes: - label: Motivation - description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users. - placeholder: Explanation of why this feature is needed and its benefits - validations: - required: true - - - type: textarea - id: possible-implementation - attributes: - label: Possible Implementation - description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better. - placeholder: Detailed description of potential implementation - validations: - required: false diff --git a/.github/ISSUE_TEMPLATE/030-research.yml b/.github/ISSUE_TEMPLATE/030-research.yml deleted file mode 100644 index 18975dbbf..000000000 --- a/.github/ISSUE_TEMPLATE/030-research.yml +++ /dev/null @@ -1,52 +0,0 @@ -name: Research -description: Track new technical research area. -title: "Research: " -labels: ["research 🔬"] -body: - - type: markdown - attributes: - value: | - Don't forget to check for any [duplicate research issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22) - - - type: checkboxes - id: research-stage - attributes: - label: Research Stage - description: Track general state of this research ticket - options: - - label: Background Research (Let's try to avoid reinventing the wheel) - - label: Hypothesis Formed (How do you think this will work and it's effect?) - - label: Strategy / Implementation Forming - - label: Analysis of results - - label: Debrief / Documentation (So people in the future can learn from us) - - - type: textarea - id: background - attributes: - label: Previous existing literature and research - description: Whats the current state of the art and whats the motivation for this research? - - - type: textarea - id: hypothesis - attributes: - label: Hypothesis - description: How do you think this will work and it's effect? - - - type: textarea - id: implementation - attributes: - label: Implementation - description: Got an approach? e.g. a PR ready to go? - - - type: textarea - id: analysis - attributes: - label: Analysis - description: How does the proposed implementation behave? - - - type: textarea - id: logs - attributes: - label: Relevant log output - description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. - render: shell diff --git a/.github/ISSUE_TEMPLATE/040-refactor.yml b/.github/ISSUE_TEMPLATE/040-refactor.yml deleted file mode 100644 index b6e6ab36d..000000000 --- a/.github/ISSUE_TEMPLATE/040-refactor.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: Refactor (Maintainers) -description: Used to track refactoring opportunities. -title: "Refactor: " -labels: ["refactor"] -body: - - type: markdown - attributes: - value: | - Don't forget to [check for existing refactor issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered. - Also you may want to check [Pull request refactor label as well](https://github.com/ggerganov/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too. - - - type: textarea - id: background-description - attributes: - label: Background Description - description: Please provide a detailed written description of the pain points you are trying to solve. - placeholder: Detailed description behind your motivation to request refactor - validations: - required: true - - - type: textarea - id: possible-approaches - attributes: - label: Possible Refactor Approaches - description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list. - placeholder: Your idea of possible refactoring opportunity/approaches - validations: - required: false diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml deleted file mode 100644 index eb8c4b472..000000000 --- a/.github/ISSUE_TEMPLATE/config.yml +++ /dev/null @@ -1,11 +0,0 @@ -blank_issues_enabled: true -contact_links: - - name: Got an idea? - url: https://github.com/ggerganov/llama.cpp/discussions/categories/ideas - about: Pop it there. It may then become an enhancement ticket. - - name: Got a question? - url: https://github.com/ggerganov/llama.cpp/discussions/categories/q-a - about: Ask a question there! - - name: Want to contribute? - url: https://github.com/ggerganov/llama.cpp/wiki/contribute - about: Head to the contribution guide page of the wiki for areas you can help with diff --git a/.github/ISSUE_TEMPLATE/custom.md b/.github/ISSUE_TEMPLATE/custom.md new file mode 100644 index 000000000..8fd955356 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/custom.md @@ -0,0 +1,185 @@ +--- +name: Issue and enhancement template +about: Used to report issues and request enhancements for llama.cpp +title: "[User] Insert summary of your issue or enhancement.." +labels: '' +assignees: '' + +--- + +# Prerequisites + +Please answer the following questions for yourself before submitting an issue. + +- [ ] I am running the latest code. Development is very rapid so there are no tagged versions as of now. +- [ ] I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md). +- [ ] I [searched using keywords relevant to my issue](https://docs.github.com/en/issues/tracking-your-work-with-issues/filtering-and-searching-issues-and-pull-requests) to make sure that I am creating a new issue that is not already open (or closed). +- [ ] I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new bug or useful enhancement to share. + +# Expected Behavior + +Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do. + +# Current Behavior + +Please provide a detailed written description of what `llama.cpp` did, instead. + +# Environment and Context + +Please provide detailed information about your computer setup. This is important in case the issue is not reproducible except for under certain specific conditions. + +* Physical (or virtual) hardware you are using, e.g. for Linux: + +`$ lscpu` + +* Operating System, e.g. for Linux: + +`$ uname -a` + +* SDK version, e.g. for Linux: + +``` +$ python3 --version +$ make --version +$ g++ --version +``` + +# Failure Information (for bugs) + +Please help provide information about the failure if this is a bug. If it is not a bug, please remove the rest of this template. + +# Steps to Reproduce + +Please provide detailed steps for reproducing the issue. We are not sitting in front of your screen, so the more detail the better. + +1. step 1 +2. step 2 +3. step 3 +4. etc. + +# Failure Logs + +Please include any relevant log snippets or files. If it works under one configuration but not under another, please provide logs for both configurations and their corresponding outputs so it is easy to see where behavior changes. + +Also, please try to **avoid using screenshots** if at all possible. Instead, copy/paste the console output and use [Github's markdown](https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax) to cleanly format your logs for easy readability. + +Example environment info: +``` +llama.cpp$ git log | head -1 +commit 2af23d30434a677c6416812eea52ccc0af65119c + +llama.cpp$ lscpu | egrep "AMD|Flags" +Vendor ID: AuthenticAMD +Model name: AMD Ryzen Threadripper 1950X 16-Core Processor +Flags: fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx mmxext fxsr_opt pdpe1gb rdtscp lm constant_tsc rep_good nopl nonstop_tsc cpuid extd_apicid amd_dcm aperfmperf rapl pni pclmulqdq monitor ssse3 fma cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx f16c rdrand lahf_lm cmp_legacy svm extapic cr8_legacy abm sse4a misalignsse 3dnowprefetch osvw skinit wdt tce topoext perfctr_core perfctr_nb bpext perfctr_llc mwaitx cpb hw_pstate ssbd ibpb vmmcall fsgsbase bmi1 avx2 smep bmi2 rdseed adx smap clflushopt sha_ni xsaveopt xsavec xgetbv1 xsaves clzero irperf xsaveerptr arat npt lbrv svm_lock nrip_save tsc_scale vmcb_clean flushbyasid decodeassists pausefilter pfthreshold avic v_vmsave_vmload vgif overflow_recov succor smca sme sev +Virtualization: AMD-V + +llama.cpp$ python3 --version +Python 3.10.9 + +llama.cpp$ pip list | egrep "torch|numpy|sentencepiece" +numpy 1.24.2 +numpydoc 1.5.0 +sentencepiece 0.1.97 +torch 1.13.1 +torchvision 0.14.1 + +llama.cpp$ make --version | head -1 +GNU Make 4.3 + +$ md5sum ./models/65B/ggml-model-q4_0.bin +dbdd682cce80e2d6e93cefc7449df487 ./models/65B/ggml-model-q4_0.bin +``` + +Example run with the Linux command [perf](https://www.brendangregg.com/perf.html) +``` +llama.cpp$ perf stat ./main -m ./models/65B/ggml-model-q4_0.bin -t 16 -n 1024 -p "Please close your issue when it has been answered." +main: seed = 1679149377 +llama_model_load: loading model from './models/65B/ggml-model-q4_0.bin' - please wait ... +llama_model_load: n_vocab = 32000 +llama_model_load: n_ctx = 512 +llama_model_load: n_embd = 8192 +llama_model_load: n_mult = 256 +llama_model_load: n_head = 64 +llama_model_load: n_layer = 80 +llama_model_load: n_rot = 128 +llama_model_load: f16 = 2 +llama_model_load: n_ff = 22016 +llama_model_load: n_parts = 8 +llama_model_load: ggml ctx size = 41477.73 MB +llama_model_load: memory_size = 2560.00 MB, n_mem = 40960 +llama_model_load: loading model part 1/8 from './models/65B/ggml-model-q4_0.bin' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 2/8 from './models/65B/ggml-model-q4_0.bin.1' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 3/8 from './models/65B/ggml-model-q4_0.bin.2' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 4/8 from './models/65B/ggml-model-q4_0.bin.3' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 5/8 from './models/65B/ggml-model-q4_0.bin.4' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 6/8 from './models/65B/ggml-model-q4_0.bin.5' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 7/8 from './models/65B/ggml-model-q4_0.bin.6' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 +llama_model_load: loading model part 8/8 from './models/65B/ggml-model-q4_0.bin.7' +llama_model_load: .......................................................................................... done +llama_model_load: model size = 4869.09 MB / num tensors = 723 + +system_info: n_threads = 16 / 32 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | + +main: prompt: 'Please close your issue when it has been answered.' +main: number of tokens in prompt = 11 + 1 -> '' + 12148 -> 'Please' + 3802 -> ' close' + 596 -> ' your' + 2228 -> ' issue' + 746 -> ' when' + 372 -> ' it' + 756 -> ' has' + 1063 -> ' been' + 7699 -> ' answered' + 29889 -> '.' + +sampling parameters: temp = 0.800000, top_k = 40, top_p = 0.950000, repeat_last_n = 64, repeat_penalty = 1.300000 + + +Please close your issue when it has been answered. +@duncan-donut: I'm trying to figure out what kind of "support" you need for this script and why, exactly? Is there a question about how the code works that hasn't already been addressed in one or more comments below this ticket, or are we talking something else entirely like some sorta bugfixing job because your server setup is different from mine?? +I can understand if your site needs to be running smoothly and you need help with a fix of sorts but there should really be nothing wrong here that the code itself could not handle. And given that I'm getting reports about how it works perfectly well on some other servers, what exactly are we talking? A detailed report will do wonders in helping us get this resolved for ya quickly so please take your time and describe the issue(s) you see as clearly & concisely as possible!! +@duncan-donut: I'm not sure if you have access to cPanel but you could try these instructions. It is worth a shot! Let me know how it goes (or what error message, exactly!) when/if ya give that code a go? [end of text] + + +main: mem per token = 71159620 bytes +main: load time = 19309.95 ms +main: sample time = 168.62 ms +main: predict time = 223895.61 ms / 888.47 ms per token +main: total time = 246406.42 ms + + Performance counter stats for './main -m ./models/65B/ggml-model-q4_0.bin -t 16 -n 1024 -p Please close your issue when it has been answered.': + + 3636882.89 msec task-clock # 14.677 CPUs utilized + 13509 context-switches # 3.714 /sec + 2436 cpu-migrations # 0.670 /sec + 10476679 page-faults # 2.881 K/sec + 13133115082869 cycles # 3.611 GHz (16.77%) + 29314462753 stalled-cycles-frontend # 0.22% frontend cycles idle (16.76%) + 10294402631459 stalled-cycles-backend # 78.39% backend cycles idle (16.74%) + 23479217109614 instructions # 1.79 insn per cycle + # 0.44 stalled cycles per insn (16.76%) + 2353072268027 branches # 647.002 M/sec (16.77%) + 1998682780 branch-misses # 0.08% of all branches (16.76%) + + 247.802177522 seconds time elapsed + + 3618.573072000 seconds user + 18.491698000 seconds sys +``` diff --git a/.github/labeler.yml b/.github/labeler.yml deleted file mode 100644 index 1b47bc968..000000000 --- a/.github/labeler.yml +++ /dev/null @@ -1,86 +0,0 @@ -# https://github.com/actions/labeler -Kompute: - - changed-files: - - any-glob-to-any-file: - - ggml/include/ggml-kompute.h - - ggml/src/ggml-kompute/** - - README-kompute.md -Apple Metal: - - changed-files: - - any-glob-to-any-file: - - ggml/include/ggml-metal.h - - ggml/src/ggml-metal/** - - README-metal.md -SYCL: - - changed-files: - - any-glob-to-any-file: - - ggml/include/ggml-sycl.h - - ggml/src/ggml-sycl/** - - docs/backend/SYCL.md - - examples/sycl/** -Nvidia GPU: - - changed-files: - - any-glob-to-any-file: - - ggml/include/ggml-cuda.h - - ggml/src/ggml-cuda/** -Vulkan: - - changed-files: - - any-glob-to-any-file: - - ggml/include/ggml-vulkan.h - - ggml/src/ggml-vulkan/** -documentation: - - changed-files: - - any-glob-to-any-file: - - docs/** - - media/** -testing: - - changed-files: - - any-glob-to-any-file: - - tests/** -build: - - changed-files: - - any-glob-to-any-file: - - cmake/** - - CMakeLists.txt - - CMakePresets.json -examples: - - changed-files: - - any-glob-to-any-file: examples/** -devops: - - changed-files: - - any-glob-to-any-file: - - .devops/** - - .github/** - - ci/** -python: - - changed-files: - - any-glob-to-any-file: - - "**/*.py" - - requirements/** - - gguf-py/** - - .flake8 -script: - - changed-files: - - any-glob-to-any-file: - - scripts/** -android: - - changed-files: - - any-glob-to-any-file: - - examples/llama.android/** -server: - - changed-files: - - any-glob-to-any-file: - - examples/server/** -ggml: - - changed-files: - - any-glob-to-any-file: - - ggml/** -nix: - - changed-files: - - any-glob-to-any-file: - - "**/*.nix" - - .github/workflows/nix-*.yml - - .devops/nix/nixpkgs-instances.nix -embedding: - - changed-files: - - any-glob-to-any-file: examples/embedding/ diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md deleted file mode 100644 index d9f5bdc23..000000000 --- a/.github/pull_request_template.md +++ /dev/null @@ -1 +0,0 @@ -*Make sure to read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR* diff --git a/.github/workflows/bench.yml.disabled b/.github/workflows/bench.yml.disabled deleted file mode 100644 index 1c8787ef7..000000000 --- a/.github/workflows/bench.yml.disabled +++ /dev/null @@ -1,315 +0,0 @@ -# TODO: there have been some issues with the workflow, so disabling for now -# https://github.com/ggerganov/llama.cpp/issues/7893 -# -# Benchmark -name: Benchmark - -on: - workflow_dispatch: - inputs: - gpu-series: - description: 'Azure GPU series to run with' - required: true - type: choice - options: - - Standard_NC4as_T4_v3 - - Standard_NC24ads_A100_v4 - - Standard_NC80adis_H100_v5 - sha: - description: 'Commit SHA1 to build' - required: false - type: string - duration: - description: 'Duration of the bench' - type: string - default: 10m - - push: - branches: - - master - paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp'] - pull_request_target: - types: [opened, synchronize, reopened] - paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp'] - schedule: - - cron: '04 2 * * *' - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }} - cancel-in-progress: true - -jobs: - bench-server-baseline: - runs-on: Standard_NC4as_T4_v3 - env: - RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it - N_USERS: 8 - DURATION: 10m - - strategy: - matrix: - model: [phi-2] - ftype: [q4_0, q8_0, f16] - include: - - model: phi-2 - ftype: q4_0 - pr_comment_enabled: "true" - - if: | - inputs.gpu-series == 'Standard_NC4as_T4_v3' - || ( - github.event_name == 'schedule' - && github.ref_name == 'master' - && github.repository_owner == 'ggerganov' - ) - || github.event_name == 'pull_request_target' - || ( - github.event_name == 'push' - && github.event.ref == 'refs/heads/master' - && github.repository_owner == 'ggerganov' - ) - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} - - - name: Install python env - id: pipenv - run: | - cd examples/server/bench - python3 -m venv venv - source venv/bin/activate - pip install -r requirements.txt - - - name: Prometheus - id: install_prometheus - run: | - wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz - tar xzf prometheus*.tar.gz --strip-components=1 - ./prometheus --config.file=examples/server/bench/prometheus.yml & - while ! nc -z localhost 9090; do - sleep 0.1 - done - - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version: '1.21' - - - name: Install k6 and xk6-sse - id: k6_installation - run: | - cd examples/server/bench - go install go.k6.io/xk6/cmd/xk6@latest - xk6 build master \ - --with github.com/phymbert/xk6-sse - - - name: Build - id: cmake_build - run: | - set -eux - cmake -B build \ - -DGGML_NATIVE=OFF \ - -DLLAMA_BUILD_SERVER=ON \ - -DLLAMA_CURL=ON \ - -DLLAMA_CUBLAS=ON \ - -DCUDAToolkit_ROOT=/usr/local/cuda \ - -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \ - -DCMAKE_CUDA_ARCHITECTURES=75 \ - -DLLAMA_FATAL_WARNINGS=OFF \ - -DLLAMA_ALL_WARNINGS=OFF \ - -DCMAKE_BUILD_TYPE=Release; - cmake --build build --config Release -j $(nproc) --target llama-server - - - name: Download the dataset - id: download_dataset - run: | - cd examples/server/bench - wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json - - - name: Server bench - id: server_bench - env: - HEAD_REF: ${{ github.head_ref || github.ref_name }} - run: | - set -eux - - cd examples/server/bench - source venv/bin/activate - python bench.py \ - --runner-label ${{ env.RUNNER_LABEL }} \ - --name ${{ github.job }} \ - --branch $HEAD_REF \ - --commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \ - --scenario script.js \ - --duration ${{ github.event.inputs.duration || env.DURATION }} \ - --hf-repo ggml-org/models \ - --hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \ - --model-path-prefix /models \ - --parallel ${{ env.N_USERS }} \ - -ngl 33 \ - --batch-size 2048 \ - --ubatch-size 256 \ - --ctx-size 16384 \ - --n-prompts 1000 \ - --max-prompt-tokens 1024 \ - --max-tokens 2048 - - cat results.github.env >> $GITHUB_ENV - - # Remove dataset as we do not want it in the artefact - rm ShareGPT_V3_unfiltered_cleaned_split.json - - - uses: actions/upload-artifact@v4 - with: - name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} - compression-level: 9 - path: | - examples/server/bench/*.jpg - examples/server/bench/*.json - examples/server/bench/*.log - - - name: Commit status - uses: Sibz/github-status-action@v1 - with: - authToken: ${{secrets.GITHUB_TOKEN}} - sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }} - context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} - description: | - ${{ env.BENCH_RESULTS }} - state: 'success' - - - name: Upload benchmark images - uses: devicons/public-upload-to-imgur@v2.2.2 - continue-on-error: true # Important as it looks unstable: 503 - id: imgur_step - with: - client_id: ${{secrets.IMGUR_CLIENT_ID}} - path: | - examples/server/bench/prompt_tokens_seconds.jpg - examples/server/bench/predicted_tokens_seconds.jpg - examples/server/bench/kv_cache_usage_ratio.jpg - examples/server/bench/requests_processing.jpg - - - name: Extract mermaid - id: set_mermaid - run: | - set -eux - - cd examples/server/bench - PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid) - echo "PROMPT_TOKENS_SECONDS<> $GITHUB_ENV - echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV - echo "EOF" >> $GITHUB_ENV - - PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid) - echo "PREDICTED_TOKENS_SECONDS<> $GITHUB_ENV - echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV - echo "EOF" >> $GITHUB_ENV - - KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid) - echo "KV_CACHE_USAGE_RATIO<> $GITHUB_ENV - echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV - echo "EOF" >> $GITHUB_ENV - - REQUESTS_PROCESSING=$(cat requests_processing.mermaid) - echo "REQUESTS_PROCESSING<> $GITHUB_ENV - echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV - echo "EOF" >> $GITHUB_ENV - - - name: Extract image url - id: extract_image_url - continue-on-error: true - run: | - set -eux - - echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV - echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV - echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV - echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV - - - name: Comment PR - uses: mshick/add-pr-comment@v2 - id: comment_pr - if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }} - with: - message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }} - message: | -

- - 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀 - -

- -
- - Expand details for performance related PR only - - - Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }} - - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }} - - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s - - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s - - ${{ env.BENCH_GRAPH_XLABEL }} - - -

- - prompt_tokens_seconds - -

- - More - - ```mermaid - ${{ env.PROMPT_TOKENS_SECONDS }} - ``` - -
- - predicted_tokens_seconds - -
- More - - ```mermaid - ${{ env.PREDICTED_TOKENS_SECONDS }} - ``` - -
- -

- -
- - Details - -

- - kv_cache_usage_ratio - -

- More - - ```mermaid - ${{ env.KV_CACHE_USAGE_RATIO }} - ``` - -
- - requests_processing - -
- More - - ```mermaid - ${{ env.REQUESTS_PROCESSING }} - ``` - -
- -

-
-
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 6841ba589..7c40b0c12 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -10,264 +10,22 @@ on: push: branches: - master - paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp'] + paths: ['.github/workflows/**', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.c', '**/*.cpp'] pull_request: types: [opened, synchronize, reopened] - paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp'] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -# Fine-grant permission -# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token -permissions: - contents: write # for creating release + paths: ['**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.c', '**/*.cpp'] env: - BRANCH_NAME: ${{ github.head_ref || github.ref_name }} - GGML_NLOOP: 3 - GGML_N_THREADS: 1 - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} jobs: - macOS-latest-cmake-arm64: - runs-on: macos-14 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: macOS-latest-cmake-arm64 - evict-old-files: 1d - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - - - name: Build - id: cmake_build - run: | - sysctl -a - cmake -B build \ - -DCMAKE_BUILD_RPATH="@loader_path" \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_CURL=ON \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DGGML_RPC=ON - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L 'main|curl' --verbose --timeout 900 - - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then - echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT - else - SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') - echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT - fi - - - name: Pack artifacts - id: pack_artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - run: | - cp LICENSE ./build/bin/ - cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp - zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/* - - - name: Upload artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - uses: actions/upload-artifact@v4 - with: - path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip - name: llama-bin-macos-arm64.zip - - macOS-latest-cmake-x64: - runs-on: macos-13 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: macOS-latest-cmake-x64 - evict-old-files: 1d - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - - - name: Build - id: cmake_build - run: | - sysctl -a - # Metal is disabled due to intermittent failures with Github runners not having a GPU: - # https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313 - cmake -B build \ - -DCMAKE_BUILD_RPATH="@loader_path" \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_CURL=ON \ - -DGGML_METAL=OFF \ - -DGGML_RPC=ON - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose --timeout 900 - - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then - echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT - else - SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') - echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT - fi - - - name: Pack artifacts - id: pack_artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - run: | - cp LICENSE ./build/bin/ - cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp - zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/* - - - name: Upload artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - uses: actions/upload-artifact@v4 - with: - path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip - name: llama-bin-macos-x64.zip - - ubuntu-cpu-cmake: - runs-on: ubuntu-22.04 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: ubuntu-cpu-cmake - evict-old-files: 1d - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install build-essential libcurl4-openssl-dev - - - name: Build - id: cmake_build - run: | - cmake -B build \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_CURL=ON \ - -DGGML_RPC=ON - cmake --build build --config Release -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L 'main|curl' --verbose --timeout 900 - - - name: Test llama2c conversion - id: llama2c_test - run: | - cd build - echo "Fetch tokenizer" - wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin - echo "Fetch llama2c model" - wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin - ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf - ./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256 - - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then - echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT - else - SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') - echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT - fi - - - name: Pack artifacts - id: pack_artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - run: | - cp LICENSE ./build/bin/ - cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp - zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/* - - - name: Upload artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - uses: actions/upload-artifact@v4 - with: - path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip - name: llama-bin-ubuntu-x64.zip - - ubuntu-latest-cmake-sanitizer: + ubuntu-latest-make: runs-on: ubuntu-latest - continue-on-error: true - - strategy: - matrix: - sanitizer: [ADDRESS, THREAD, UNDEFINED] - build_type: [Debug] - steps: - name: Clone id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: ubuntu-latest-cmake-sanitizer-${{ matrix.sanitizer }} - evict-old-files: 1d + uses: actions/checkout@v1 - name: Dependencies id: depends @@ -276,39 +34,17 @@ jobs: sudo apt-get install build-essential - name: Build - id: cmake_build - if: ${{ matrix.sanitizer != 'THREAD' }} + id: make_build run: | - cmake -B build \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) + make - - name: Build (no OpenMP) - id: cmake_build_no_openmp - if: ${{ matrix.sanitizer == 'THREAD' }} - run: | - cmake -B build \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ - -DGGML_OPENMP=OFF - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) - - - name: Test - id: cmake_test - run: | - cd build - ctest -L main --verbose --timeout 900 - - ubuntu-latest-llguidance: + ubuntu-latest-cmake: runs-on: ubuntu-latest steps: - name: Clone id: checkout - uses: actions/checkout@v4 + uses: actions/checkout@v1 - name: Dependencies id: depends @@ -321,32 +57,29 @@ jobs: run: | mkdir build cd build - cmake .. \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DLLAMA_LLGUIDANCE=ON - cmake --build . --config Release -j $(nproc) + cmake .. + cmake --build . --config Release - name: Test id: cmake_test run: | cd build - ctest -L main --verbose --timeout 900 + ctest --verbose - ubuntu-latest-cmake-rpc: + ubuntu-latest-cmake-sanitizer: runs-on: ubuntu-latest continue-on-error: true + strategy: + matrix: + sanitizer: [ADDRESS, THREAD, UNDEFINED] + build_type: [Debug, Release] + steps: - name: Clone id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: ubuntu-latest-cmake-rpc - evict-old-files: 1d + uses: actions/checkout@v1 - name: Dependencies id: depends @@ -357,493 +90,91 @@ jobs: - name: Build id: cmake_build run: | - cmake -B build \ - -DGGML_RPC=ON - cmake --build build --config Release -j $(nproc) + mkdir build + cd build + cmake .. -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + cmake --build . --config ${{ matrix.build_type }} - name: Test id: cmake_test run: | cd build - ctest -L main --verbose + ctest --verbose - ubuntu-22-cmake-vulkan: - runs-on: ubuntu-22.04 + macOS-latest-make: + runs-on: macos-latest steps: - name: Clone id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: ubuntu-22-cmake-vulkan - evict-old-files: 1d + uses: actions/checkout@v1 - name: Dependencies id: depends run: | - wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add - - sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list - sudo apt-get update -y - sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk + brew update + + - name: Build + id: make_build + run: | + make + + macOS-latest-cmake: + runs-on: macOS-latest + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v1 + + - name: Dependencies + id: depends + run: | + brew update - name: Build id: cmake_build run: | - cmake -B build \ - -DGGML_VULKAN=ON - cmake --build build --config Release -j $(nproc) + mkdir build + cd build + cmake -DLLAMA_AVX2=OFF .. + cmake --build . --config Release - name: Test id: cmake_test run: | cd build - # This is using llvmpipe and runs slower than other backends - ctest -L main --verbose --timeout 1800 - - ubuntu-22-cmake-hip: - runs-on: ubuntu-22.04 - container: rocm/dev-ubuntu-22.04:6.0.2 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: ubuntu-22-cmake-hip - evict-old-files: 1d - - - name: Build with native CMake HIP support - id: cmake_build - run: | - cmake -B build -S . \ - -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \ - -DGGML_HIP=ON - cmake --build build --config Release -j $(nproc) - - - name: Build with legacy HIP support - id: cmake_build_legacy_hip - run: | - cmake -B build2 -S . \ - -DCMAKE_C_COMPILER=hipcc \ - -DCMAKE_CXX_COMPILER=hipcc \ - -DGGML_HIP=ON - cmake --build build2 --config Release -j $(nproc) - - ubuntu-22-cmake-musa: - runs-on: ubuntu-22.04 - container: mthreads/musa:rc3.1.0-devel-ubuntu22.04 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Dependencies - id: depends - run: | - apt-get update - apt-get install -y build-essential git cmake libcurl4-openssl-dev - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: ubuntu-22-cmake-musa - evict-old-files: 1d - - - name: Build with native CMake MUSA support - id: cmake_build - run: | - cmake -B build -S . \ - -DGGML_MUSA=ON - cmake --build build --config Release -j $(nproc) - - ubuntu-22-cmake-sycl: - runs-on: ubuntu-22.04 - - continue-on-error: true - - steps: - - uses: actions/checkout@v4 - - - name: add oneAPI to apt - shell: bash - run: | - cd /tmp - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" - - - name: install oneAPI dpcpp compiler - shell: bash - run: | - sudo apt update - sudo apt install intel-oneapi-compiler-dpcpp-cpp - - - name: install oneAPI MKL library - shell: bash - run: | - sudo apt install intel-oneapi-mkl-devel - - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: ubuntu-22-cmake-sycl - evict-old-files: 1d - - - name: Build - id: cmake_build - run: | - source /opt/intel/oneapi/setvars.sh - cmake -B build \ - -DGGML_SYCL=ON \ - -DCMAKE_C_COMPILER=icx \ - -DCMAKE_CXX_COMPILER=icpx - cmake --build build --config Release -j $(nproc) - - ubuntu-22-cmake-sycl-fp16: - runs-on: ubuntu-22.04 - - continue-on-error: true - - steps: - - uses: actions/checkout@v4 - - - name: add oneAPI to apt - shell: bash - run: | - cd /tmp - wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB - sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" - - - name: install oneAPI dpcpp compiler - shell: bash - run: | - sudo apt update - sudo apt install intel-oneapi-compiler-dpcpp-cpp - - - name: install oneAPI MKL library - shell: bash - run: | - sudo apt install intel-oneapi-mkl-devel - - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: ubuntu-22-cmake-sycl-fp16 - evict-old-files: 1d - - - name: Build - id: cmake_build - run: | - source /opt/intel/oneapi/setvars.sh - cmake -B build \ - -DGGML_SYCL=ON \ - -DCMAKE_C_COMPILER=icx \ - -DCMAKE_CXX_COMPILER=icpx \ - -DGGML_SYCL_F16=ON - cmake --build build --config Release -j $(nproc) - - macOS-latest-cmake-ios: - runs-on: macos-latest - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: macOS-latest-cmake-ios - evict-old-files: 1d - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - - - name: Build - id: cmake_build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=iOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - - macOS-latest-cmake-tvos: - runs-on: macos-latest - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: macOS-latest-cmake-tvos - evict-old-files: 1d - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - - - name: Build - id: cmake_build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=tvOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - - macOS-latest-swift: - runs-on: macos-latest - - strategy: - matrix: - destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS'] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: macOS-latest-swift - evict-old-files: 1d - - - name: Dependencies - id: depends - continue-on-error: true - run: | - brew update - - - name: Build llama.cpp with CMake - id: cmake_build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64" - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) - sudo cmake --install build --config Release - - - name: xcodebuild for swift package - id: xcodebuild - run: | - xcodebuild -scheme llama-Package -destination "${{ matrix.destination }}" - - windows-msys2: - runs-on: windows-latest - - strategy: - fail-fast: false - matrix: - include: - - { sys: UCRT64, env: ucrt-x86_64, build: Release } - - { sys: CLANG64, env: clang-x86_64, build: Release } - - steps: - - name: Clone - uses: actions/checkout@v4 - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: windows-msys2 - variant: sccache - evict-old-files: 1d - - - name: Setup ${{ matrix.sys }} - uses: msys2/setup-msys2@v2 - with: - update: true - msystem: ${{matrix.sys}} - install: >- - base-devel - git - mingw-w64-${{matrix.env}}-toolchain - mingw-w64-${{matrix.env}}-cmake - mingw-w64-${{matrix.env}}-openblas - - - name: Build using CMake - shell: msys2 {0} - run: | - cmake -B build - cmake --build build --config ${{ matrix.build }} -j $(nproc) - - - name: Clean after building using CMake - shell: msys2 {0} - run: | - rm -rf build - - - name: Build using CMake w/ OpenBLAS - shell: msys2 {0} - run: | - cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS - cmake --build build --config ${{ matrix.build }} -j $(nproc) + ctest --verbose windows-latest-cmake: runs-on: windows-latest - env: - OPENBLAS_VERSION: 0.3.23 - SDE_VERSION: 9.33.0-2024-01-07 - VULKAN_VERSION: 1.3.261.1 - strategy: matrix: include: - - build: 'noavx-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF' - - build: 'avx2-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON' - - build: 'avx-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF' - - build: 'avx512-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON' - - build: 'openblas-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"' - - build: 'kompute-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON' - - build: 'vulkan-x64' - defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON' - - build: 'llvm-arm64' - defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON' - - build: 'msvc-arm64' - defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON' - - build: 'llvm-arm64-opencl-adreno' - defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON' + - build: 'avx2' + defines: '' + - build: 'avx' + defines: '-DLLAMA_AVX2=OFF' + - build: 'avx512' + defines: '-DLLAMA_AVX512=ON -DBUILD_SHARED_LIBS=ON' steps: - name: Clone id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: windows-latest-cmake-${{ matrix.build }} - variant: sccache - evict-old-files: 1d - - - name: Clone Kompute submodule - id: clone_kompute - if: ${{ matrix.build == 'kompute-x64' }} - run: | - git submodule update --init ggml/src/ggml-kompute/kompute - - - name: Download OpenBLAS - id: get_openblas - if: ${{ matrix.build == 'openblas-x64' }} - run: | - curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip" - curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE" - mkdir $env:RUNNER_TEMP/openblas - tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas - $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath) - $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim())) - $lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe') - & $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll - - - name: Install Vulkan SDK - id: get_vulkan - if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }} - run: | - curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe" - & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install - Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}" - Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin" - - - name: Install Ninja - id: install_ninja - run: | - choco install ninja - - - name: Install OpenCL Headers and Libs - id: install_opencl - if: ${{ matrix.build == 'llvm-arm64-opencl-adreno' }} - run: | - git clone https://github.com/KhronosGroup/OpenCL-Headers - cd OpenCL-Headers - cmake -B build ` - -DBUILD_TESTING=OFF ` - -DOPENCL_HEADERS_BUILD_TESTING=OFF ` - -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF ` - -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" - cmake --build build --target install - git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader - cd OpenCL-ICD-Loader - cmake -B build-arm64-release ` - -A arm64 ` - -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" ` - -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release" - cmake --build build-arm64-release --target install --config release + uses: actions/checkout@v1 - name: Build id: cmake_build run: | - cmake -S . -B build ${{ matrix.defines }} - cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} - - - name: Add libopenblas.dll - id: add_libopenblas_dll - if: ${{ matrix.build == 'openblas-x64' }} - run: | - cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll - cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt + mkdir build + cd build + cmake .. ${{ matrix.defines }} + cmake --build . --config Release - name: Check AVX512F support id: check_avx512f - if: ${{ matrix.build == 'avx512-x64' }} + if: ${{ matrix.build == 'avx512' }} continue-on-error: true run: | cd build @@ -856,487 +187,28 @@ jobs: - name: Test id: cmake_test - # not all machines have native AVX-512 - if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }} + if: ${{ matrix.build != 'avx512' || env.HAS_AVX512F == '1' }} # Test AVX-512 only when possible run: | cd build - ctest -L main -C Release --verbose --timeout 900 + ctest -C Release --verbose - - name: Test (Intel SDE) - id: cmake_test_sde - if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation - run: | - curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz" - # for some weird reason windows tar doesn't like sde tar.xz - 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz - 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar - $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe) - cd build - $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1 - & $sde -future -- ctest -L main -C Release --verbose --timeout 900 - - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then - echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT - else - SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') - echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT - fi + - name: Get commit hash + id: commit + if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} + uses: pr-mpt/actions-commit-hash@v2 - name: Pack artifacts id: pack_artifacts if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} run: | - Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt - Copy-Item .\examples\run\linenoise.cpp\LICENSE .\build\bin\Release\linenoise.cpp.txt - 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\* + 7z a llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-x64.zip .\build\bin\Release\* - name: Upload artifacts if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - uses: actions/upload-artifact@v4 + uses: actions/upload-artifact@v3 with: - path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip - name: llama-bin-win-${{ matrix.build }}.zip - - ubuntu-latest-cmake-cuda: - runs-on: ubuntu-latest - container: nvidia/cuda:12.6.2-devel-ubuntu24.04 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Install dependencies - env: - DEBIAN_FRONTEND: noninteractive - run: | - apt update - apt install -y cmake build-essential ninja-build libgomp1 git - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: ubuntu-latest-cmake-cuda - evict-old-files: 1d - - - name: Build with CMake - run: | - cmake -S . -B build -G Ninja \ - -DCMAKE_BUILD_TYPE=Release \ - -DCMAKE_CUDA_ARCHITECTURES=89-real \ - -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \ - -DLLAMA_FATAL_WARNINGS=ON \ - -DGGML_NATIVE=OFF \ - -DGGML_CUDA=ON - cmake --build build - - windows-2019-cmake-cuda: - runs-on: windows-2019 - - strategy: - matrix: - cuda: ['12.4', '11.7'] - build: ['cuda'] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: Install ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }} - variant: sccache - evict-old-files: 1d - - - name: Install Cuda Toolkit 11.7 - if: ${{ matrix.cuda == '11.7' }} - run: | - mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" - choco install unzip -y - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip" - unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y - echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 - echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 - - - name: Install Cuda Toolkit 12.4 - if: ${{ matrix.cuda == '12.4' }} - run: | - mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" - choco install unzip -y - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip" - curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip" - unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y - xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y - echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append - echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 - echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8 - - - name: Install Ninja - id: install_ninja - run: | - choco install ninja - - - name: Build - id: cmake_build - shell: cmd - run: | - call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat" - cmake -S . -B build -G "Ninja Multi-Config" ^ - -DLLAMA_BUILD_SERVER=ON ^ - -DGGML_NATIVE=OFF ^ - -DGGML_CUDA=ON ^ - -DGGML_RPC=ON - set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1 - cmake --build build --config Release -j %NINJA_JOBS% -t ggml - cmake --build build --config Release - - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then - echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT - else - SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') - echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT - fi - - - name: Pack artifacts - id: pack_artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - run: | - 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\* - - - name: Upload artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - uses: actions/upload-artifact@v4 - with: - path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip - name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip - - - name: Copy and pack Cuda runtime - if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }} - run: | - echo "Cuda install location: ${{ env.CUDA_PATH }}" - $dst='.\build\bin\cudart\' - robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll - robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll - 7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\* - - - name: Upload Cuda runtime - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - uses: actions/upload-artifact@v4 - with: - path: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip - name: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip - - windows-latest-cmake-sycl: - runs-on: windows-latest - - defaults: - run: - shell: bash - - env: - WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe - WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel - ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI" - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: windows-latest-cmake-sycl - variant: sccache - evict-old-files: 1d - - - name: Install - run: | - scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL - - - name: Build - id: cmake_build - run: examples/sycl/win-build-sycl.bat - - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then - echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT - else - SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') - echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT - fi - - - name: Build the release package - id: pack_artifacts - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - run: | - echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin" - - cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin - - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin - - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin - - cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin - cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin - - echo "cp oneAPI running time dll files to ./build/bin done" - 7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/* - - - name: Upload the release package - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - uses: actions/upload-artifact@v4 - with: - path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip - name: llama-bin-win-sycl-x64.zip - - windows-latest-cmake-hip: - if: ${{ github.event.inputs.create_release != 'true' }} - runs-on: windows-latest - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - - - name: Install - id: depends - run: | - $ErrorActionPreference = "Stop" - write-host "Downloading AMD HIP SDK Installer" - Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" - write-host "Installing AMD HIP SDK" - Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait - write-host "Completed AMD HIP SDK installation" - - - name: Verify ROCm - id: verify - run: | - & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version - - - name: Install ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: ${{ github.job }} - evict-old-files: 1d - - - name: Build - id: cmake_build - run: | - $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) - $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" - cmake -G "Unix Makefiles" -B build -S . ` - -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" ` - -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` - -DCMAKE_BUILD_TYPE=Release ` - -DGGML_HIP=ON ` - -DGGML_RPC=ON - cmake --build build -j ${env:NUMBER_OF_PROCESSORS} - - windows-latest-cmake-hip-release: - if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} - runs-on: windows-latest - - strategy: - matrix: - gpu_target: [gfx1100, gfx1101, gfx1030] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: windows-latest-cmake-hip-release - evict-old-files: 1d - - - name: Install - id: depends - run: | - $ErrorActionPreference = "Stop" - write-host "Downloading AMD HIP SDK Installer" - Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe" - write-host "Installing AMD HIP SDK" - Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait - write-host "Completed AMD HIP SDK installation" - - - name: Verify ROCm - id: verify - run: | - & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version - - - name: Build - id: cmake_build - run: | - $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path) - $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}" - cmake -G "Unix Makefiles" -B build -S . ` - -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" ` - -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" ` - -DCMAKE_BUILD_TYPE=Release ` - -DAMDGPU_TARGETS=${{ matrix.gpu_target }} ` - -DGGML_HIP=ON ` - -DGGML_RPC=ON - cmake --build build -j ${env:NUMBER_OF_PROCESSORS} - md "build\bin\rocblas\library\" - cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\" - cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\" - cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\" - - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then - echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT - else - SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') - echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT - fi - - - name: Pack artifacts - id: pack_artifacts - run: | - 7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\* - - - name: Upload artifacts - uses: actions/upload-artifact@v4 - with: - path: llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip - name: llama-bin-win-hip-x64-${{ matrix.gpu_target }}.zip - - ios-xcode-build: - runs-on: macos-latest - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Build - id: cmake_build - run: | - sysctl -a - cmake -B build -G Xcode \ - -DGGML_METAL_USE_BF16=ON \ - -DGGML_METAL_EMBED_LIBRARY=ON \ - -DLLAMA_BUILD_EXAMPLES=OFF \ - -DLLAMA_BUILD_TESTS=OFF \ - -DLLAMA_BUILD_SERVER=OFF \ - -DCMAKE_SYSTEM_NAME=iOS \ - -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \ - -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml - cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO - sudo cmake --install build --config Release - - - name: xcodebuild for swift package - id: xcodebuild - run: | - xcodebuild -scheme llama-Package -destination 'generic/platform=iOS' - - - name: Build Xcode project - run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build - - android-build: - runs-on: ubuntu-latest - - steps: - - name: Clone - uses: actions/checkout@v4 - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: android-build - evict-old-files: 1d - - - name: Set up JDK - uses: actions/setup-java@v3 - with: - java-version: 17 - distribution: zulu - - - name: Setup Android SDK - uses: android-actions/setup-android@v3 - with: - log-accepted-android-sdk-licenses: false - - - name: Build - run: | - cd examples/llama.android - - ./gradlew build --no-daemon + path: | + llama-${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }}-bin-win-${{ matrix.build }}-x64.zip release: if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }} @@ -1344,56 +216,28 @@ jobs: runs-on: ubuntu-latest needs: - - ubuntu-cpu-cmake + - ubuntu-latest-make + - ubuntu-latest-cmake + - macOS-latest-make + - macOS-latest-cmake - windows-latest-cmake - - windows-2019-cmake-cuda - - windows-latest-cmake-hip-release - - macOS-latest-cmake-arm64 - - macOS-latest-cmake-x64 steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - - - name: ccache - uses: hendrikmuhs/ccache-action@v1.2.16 - with: - key: release - evict-old-files: 1d - - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then - echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT - else - SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-') - echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT - fi - - name: Download artifacts id: download-artifact - uses: actions/download-artifact@v4 - with: - path: ./artifact + uses: actions/download-artifact@v3 - - name: Move artifacts - id: move_artifacts - run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release + - name: Get commit hash + id: commit + uses: pr-mpt/actions-commit-hash@v2 - name: Create release id: create_release - uses: ggml-org/action-create-release@v1 + uses: anzz1/action-create-release@v1 env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} with: - tag_name: ${{ steps.tag.outputs.name }} + tag_name: ${{ env.BRANCH_NAME }}-${{ steps.commit.outputs.short }} - name: Upload release id: upload_release @@ -1404,7 +248,7 @@ jobs: const path = require('path'); const fs = require('fs'); const release_id = '${{ steps.create_release.outputs.id }}'; - for (let file of await fs.readdirSync('./artifact/release')) { + for (let file of await fs.readdirSync('./artifact')) { if (path.extname(file) === '.zip') { console.log('uploadReleaseAsset', file); await github.repos.uploadReleaseAsset({ @@ -1412,7 +256,7 @@ jobs: repo: context.repo.repo, release_id: release_id, name: file, - data: await fs.readFileSync(`./artifact/release/${file}`) + data: await fs.readFileSync(`./artifact/${file}`) }); } } @@ -1426,7 +270,7 @@ jobs: # # steps: # - name: Clone -# uses: actions/checkout@v4 +# uses: actions/checkout@v1 # # - name: Dependencies # run: | @@ -1450,7 +294,7 @@ jobs: # # steps: # - name: Clone -# uses: actions/checkout@v4 +# uses: actions/checkout@v1 # # - name: Dependencies # run: | @@ -1474,7 +318,7 @@ jobs: # # steps: # - name: Clone -# uses: actions/checkout@v4 +# uses: actions/checkout@v1 # # - name: Dependencies # run: | @@ -1504,7 +348,7 @@ jobs: # # steps: # - name: Clone -# uses: actions/checkout@v4 +# uses: actions/checkout@v1 # # - name: Add msbuild to PATH # uses: microsoft/setup-msbuild@v1 @@ -1520,7 +364,7 @@ jobs: # msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }} # # - name: Upload binaries -# uses: actions/upload-artifact@v4 +# uses: actions/upload-artifact@v1 # with: # name: llama-bin-${{ matrix.arch }} # path: build/bin/${{ matrix.build }} @@ -1543,7 +387,7 @@ jobs: # # steps: # - name: Clone -# uses: actions/checkout@v4 +# uses: actions/checkout@v1 # # - name: Add msbuild to PATH # uses: microsoft/setup-msbuild@v1 @@ -1575,7 +419,7 @@ jobs: # # - name: Upload binaries # if: matrix.blas == 'ON' -# uses: actions/upload-artifact@v4 +# uses: actions/upload-artifact@v1 # with: # name: llama-blas-bin-${{ matrix.arch }} # path: build/bin/${{ matrix.build }} @@ -1589,7 +433,7 @@ jobs: # # steps: # - name: Clone -# uses: actions/checkout@v4 +# uses: actions/checkout@v1 # # - name: Dependencies # run: | @@ -1609,37 +453,3 @@ jobs: # popd # emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} # make - - openEuler-latest-cmake-cann: - if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }} - defaults: - run: - shell: bash -el {0} - runs-on: ubuntu-24.04-arm - strategy: - matrix: - cann: - - '8.0.rc3.beta1-910b-openeuler22.03-py3.10' - device: - - 'ascend910b3' - build: - - 'Release' - container: ascendai/cann:${{ matrix.cann }} - steps: - - name: Checkout - uses: actions/checkout@v4 - - - name: Dependencies - run: | - yum update -y - yum install -y git gcc gcc-c++ make cmake - - - name: Build - run: | - export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH} - - cmake -S . -B build \ - -DCMAKE_BUILD_TYPE=${{ matrix.build }} \ - -DGGML_CANN=on \ - -DSOC_TYPE=${{ matrix.device }} - cmake --build build -j $(nproc) diff --git a/.github/workflows/close-issue.yml b/.github/workflows/close-issue.yml deleted file mode 100644 index 276a217d4..000000000 --- a/.github/workflows/close-issue.yml +++ /dev/null @@ -1,28 +0,0 @@ -name: Close inactive issues -on: - schedule: - - cron: "42 0 * * *" - -# Fine-grant permission -# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token -permissions: - issues: write - -jobs: - close-issues: - runs-on: ubuntu-latest - permissions: - issues: write - pull-requests: write - steps: - - uses: actions/stale@v5 - with: - exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap" - days-before-issue-stale: 30 - days-before-issue-close: 14 - stale-issue-label: "stale" - close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale." - days-before-pr-stale: -1 - days-before-pr-close: -1 - operations-per-run: 10000 - repo-token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/docker.yml b/.github/workflows/docker.yml index 6955a7dc8..379fbd7ad 100644 --- a/.github/workflows/docker.yml +++ b/.github/workflows/docker.yml @@ -10,50 +10,33 @@ name: Publish Docker image on: - workflow_dispatch: # allows manual triggering - schedule: - # Rebuild daily rather than on every push because it is expensive - - cron: '12 4 * * *' - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -# Fine-grant permission -# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token -permissions: - packages: write + pull_request: + push: + branches: + - master jobs: push_to_registry: name: Push Docker image to Docker Hub + if: github.event.pull_request.draft == false - runs-on: ubuntu-22.04 + runs-on: ubuntu-latest env: COMMIT_SHA: ${{ github.sha }} strategy: - fail-fast: false matrix: config: - # Multi-stage build - - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false} - - { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} - - { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} - - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} - - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false} - # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete - #- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: true } + - { tag: "light", dockerfile: ".devops/main.Dockerfile" } + - { tag: "full", dockerfile: ".devops/full.Dockerfile" } steps: - name: Check out the repo - uses: actions/checkout@v4 - with: - fetch-depth: 0 # preserve git history, so we can determine the build number + uses: actions/checkout@v3 - name: Set up QEMU - uses: docker/setup-qemu-action@v3 + uses: docker/setup-qemu-action@v2 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 + uses: docker/setup-buildx-action@v2 - name: Log in to Docker Hub uses: docker/login-action@v2 @@ -62,112 +45,21 @@ jobs: username: ${{ github.repository_owner }} password: ${{ secrets.GITHUB_TOKEN }} - - name: Determine tag name - id: tag - shell: bash - run: | - BUILD_NUMBER="$(git rev-list --count HEAD)" - SHORT_HASH="$(git rev-parse --short=7 HEAD)" - REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case - REPO_NAME="${{ github.event.repository.name }}" - - # determine tag name postfix (build number, commit hash) - if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then - TAG_POSTFIX="-b${BUILD_NUMBER}" - else - SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-') - TAG_POSTFIX="-${SAFE_NAME}-${SHORT_HASH}" - fi - # list all tags possible - if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then - TYPE="" - else - TYPE="-${{ matrix.config.tag }}" - fi - PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:" - FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}${TAG_POSTFIX}" - LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}${TAG_POSTFIX}" - SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}${TAG_POSTFIX}" - echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT - echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT - echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT - echo "full_output_tags=$FULLTAGS" # print out for debugging - echo "light_output_tags=$LIGHTTAGS" # print out for debugging - echo "server_output_tags=$SERVERTAGS" # print out for debugging - env: - GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }} - GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}' - - - name: Free Disk Space (Ubuntu) - if: ${{ matrix.config.free_disk_space == true }} - uses: ggml-org/free-disk-space@v1.3.1 - with: - # this might remove tools that are actually needed, - # if set to "true" but frees about 6 GB - tool-cache: false - - # all of these default to true, but feel free to set to - # "false" if necessary for your workflow - android: true - dotnet: true - haskell: true - large-packages: true - docker-images: true - swap-storage: true - - - name: Build and push Full Docker image (tagged + versioned) - if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }} - uses: docker/build-push-action@v6 + - name: Build and push Docker image (versioned) + if: github.event_name == 'push' + uses: docker/build-push-action@v4 with: context: . push: true - platforms: ${{ matrix.config.platforms }} - # tag list is generated from step above - tags: ${{ steps.tag.outputs.full_output_tags }} + platforms: linux/amd64,linux/arm64 + tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}" file: ${{ matrix.config.dockerfile }} - target: full - provenance: false - # using github experimental cache - cache-from: type=gha - cache-to: type=gha,mode=max - # return to this if the experimental github cache is having issues - #cache-to: type=local,dest=/tmp/.buildx-cache - #cache-from: type=local,src=/tmp/.buildx-cache - - name: Build and push Light Docker image (tagged + versioned) - if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }} - uses: docker/build-push-action@v6 + - name: Build and push Docker image (tagged) + uses: docker/build-push-action@v4 with: context: . - push: true - platforms: ${{ matrix.config.platforms }} - # tag list is generated from step above - tags: ${{ steps.tag.outputs.light_output_tags }} + push: ${{ github.event_name == 'push' }} + platforms: linux/amd64,linux/arm64 + tags: "ghcr.io/ggerganov/llama.cpp:${{ matrix.config.tag }}" file: ${{ matrix.config.dockerfile }} - target: light - provenance: false - # using github experimental cache - cache-from: type=gha - cache-to: type=gha,mode=max - # return to this if the experimental github cache is having issues - #cache-to: type=local,dest=/tmp/.buildx-cache - #cache-from: type=local,src=/tmp/.buildx-cache - - - name: Build and push Server Docker image (tagged + versioned) - if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }} - uses: docker/build-push-action@v6 - with: - context: . - push: true - platforms: ${{ matrix.config.platforms }} - # tag list is generated from step above - tags: ${{ steps.tag.outputs.server_output_tags }} - file: ${{ matrix.config.dockerfile }} - target: server - provenance: false - # using github experimental cache - cache-from: type=gha - cache-to: type=gha,mode=max - # return to this if the experimental github cache is having issues - #cache-to: type=local,dest=/tmp/.buildx-cache - #cache-from: type=local,src=/tmp/.buildx-cache diff --git a/.github/workflows/editorconfig.yml b/.github/workflows/editorconfig.yml index f02b7c219..b4e535acf 100644 --- a/.github/workflows/editorconfig.yml +++ b/.github/workflows/editorconfig.yml @@ -1,12 +1,6 @@ name: EditorConfig Checker on: - workflow_dispatch: # allows manual triggering - inputs: - create_release: - description: 'Create new release' - required: true - type: boolean push: branches: - master @@ -14,16 +8,10 @@ on: branches: - master -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - jobs: editorconfig: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v4 - - uses: editorconfig-checker/action-editorconfig-checker@v2 - with: - version: v3.0.3 + - uses: actions/checkout@v3 + - uses: editorconfig-checker/action-editorconfig-checker@main - run: editorconfig-checker diff --git a/.github/workflows/gguf-publish.yml b/.github/workflows/gguf-publish.yml deleted file mode 100644 index 3ca4d3058..000000000 --- a/.github/workflows/gguf-publish.yml +++ /dev/null @@ -1,44 +0,0 @@ -# This workflow will upload a Python Package using Twine when a GGUF release is created -# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries - -# See `gguf-py/README.md` for how to make a release. - -# This workflow uses actions that are not certified by GitHub. -# They are provided by a third-party and are governed by -# separate terms of service, privacy policy, and support -# documentation. - -name: Upload Python Package - -on: - workflow_dispatch: - push: - # Pattern matched against refs/tags - tags: - - 'gguf-v*' # Push events to every version tag - - -jobs: - deploy: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: '3.9.x' - - name: Install dependencies - run: | - cd gguf-py - python -m pip install poetry - poetry install - - - name: Build package - run: cd gguf-py && poetry build - - name: Publish package - uses: pypa/gh-action-pypi-publish@release/v1 - with: - password: ${{ secrets.PYPI_API_TOKEN }} - packages-dir: gguf-py/dist diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml deleted file mode 100644 index 368dbdbe5..000000000 --- a/.github/workflows/labeler.yml +++ /dev/null @@ -1,17 +0,0 @@ -name: "Pull Request Labeler" -on: -- pull_request_target - -jobs: - labeler: - permissions: - contents: read - pull-requests: write - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - with: - repository: "ggerganov/llama.cpp" - - uses: actions/labeler@v5 - with: - configuration-path: '.github/labeler.yml' diff --git a/.github/workflows/python-check-requirements.yml b/.github/workflows/python-check-requirements.yml deleted file mode 100644 index 46e80aecd..000000000 --- a/.github/workflows/python-check-requirements.yml +++ /dev/null @@ -1,33 +0,0 @@ -name: Python check requirements.txt - -on: - push: - paths: - - '.github/workflows/python-check-requirements.yml' - - 'scripts/check-requirements.sh' - - 'convert*.py' - - '**/requirements*.txt' - pull_request: - paths: - - '.github/workflows/python-check-requirements.yml' - - 'scripts/check-requirements.sh' - - 'convert*.py' - - '**/requirements*.txt' - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -jobs: - python-check-requirements: - runs-on: ubuntu-latest - name: check-requirements - steps: - - name: Check out source repository - uses: actions/checkout@v4 - - name: Set up Python environment - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - name: Run check-requirements.sh script - run: bash scripts/check-requirements.sh diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml deleted file mode 100644 index ddfdf73b8..000000000 --- a/.github/workflows/python-lint.yml +++ /dev/null @@ -1,30 +0,0 @@ -name: flake8 Lint - -on: - push: - branches: - - master - paths: ['.github/workflows/python-lint.yml', '**/*.py'] - pull_request: - types: [opened, synchronize, reopened] - paths: ['.github/workflows/python-lint.yml', '**/*.py'] - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -jobs: - flake8-lint: - runs-on: ubuntu-latest - name: Lint - steps: - - name: Check out source repository - uses: actions/checkout@v4 - - name: Set up Python environment - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - name: flake8 Lint - uses: py-actions/flake8@v2 - with: - plugins: "flake8-no-print" diff --git a/.github/workflows/python-type-check.yml b/.github/workflows/python-type-check.yml deleted file mode 100644 index 373bb6010..000000000 --- a/.github/workflows/python-type-check.yml +++ /dev/null @@ -1,40 +0,0 @@ -name: Python Type-Check - -on: - push: - paths: - - '.github/workflows/python-type-check.yml' - - 'pyrightconfig.json' - - '**.py' - - '**/requirements*.txt' - pull_request: - paths: - - '.github/workflows/python-type-check.yml' - - 'pyrightconfig.json' - - '**.py' - - '**/requirements*.txt' - -concurrency: - group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} - cancel-in-progress: true - -jobs: - python-type-check: - runs-on: ubuntu-latest - name: pyright type-check - steps: - - name: Check out source repository - uses: actions/checkout@v4 - - name: Set up Python environment - uses: actions/setup-python@v5 - with: - python-version: "3.11" - - name: Install Python dependencies - # TODO: use a venv - run: pip install -r requirements/requirements-all.txt - - name: Type-check with Pyright - uses: jakebailey/pyright-action@v2 - with: - version: 1.1.382 - level: warning - warnings: true diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml deleted file mode 100644 index 3a29107d0..000000000 --- a/.github/workflows/server.yml +++ /dev/null @@ -1,239 +0,0 @@ -# Server build and tests -name: Server - -on: - workflow_dispatch: # allows manual triggering - inputs: - sha: - description: 'Commit SHA1 to build' - required: false - type: string - slow_tests: - description: 'Run slow tests' - required: true - type: boolean - push: - branches: - - master - paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*'] - pull_request: - types: [opened, synchronize, reopened] - paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*'] - -env: - LLAMA_LOG_COLORS: 1 - LLAMA_LOG_PREFIX: 1 - LLAMA_LOG_TIMESTAMPS: 1 - LLAMA_LOG_VERBOSITY: 10 - -concurrency: - group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }} - cancel-in-progress: true - -jobs: - server: - runs-on: ubuntu-latest - - strategy: - matrix: - sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken - build_type: [RelWithDebInfo] - include: - - build_type: Release - sanitizer: "" - fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken - - steps: - - name: Dependencies - id: depends - run: | - sudo apt-get update - sudo apt-get -y install \ - build-essential \ - xxd \ - git \ - cmake \ - curl \ - wget \ - language-pack-en \ - libcurl4-openssl-dev - - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} - - - name: Python setup - id: setup_python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Tests dependencies - id: test_dependencies - run: | - pip install -r examples/server/tests/requirements.txt - - # Setup nodejs (to be used for verifying bundled index.html) - - uses: actions/setup-node@v4 - with: - node-version: '22.11.0' - - - name: WebUI - Install dependencies - id: webui_lint - run: | - cd examples/server/webui - npm ci - - - name: WebUI - Check code format - id: webui_format - run: | - git config --global --add safe.directory $(realpath .) - cd examples/server/webui - git status - - npm run format - git status - modified_files="$(git status -s)" - echo "Modified files: ${modified_files}" - if [ -n "${modified_files}" ]; then - echo "Files do not follow coding style. To fix: npm run format" - echo "${modified_files}" - exit 1 - fi - - - name: Verify bundled index.html - id: verify_server_index_html - run: | - git config --global --add safe.directory $(realpath .) - cd examples/server/webui - git status - - npm run build - git status - modified_files="$(git status -s)" - echo "Modified files: ${modified_files}" - if [ -n "${modified_files}" ]; then - echo "Repository is dirty or server/webui is not built as expected" - echo "Hint: You may need to follow Web UI build guide in server/README.md" - echo "${modified_files}" - exit 1 - fi - - - name: Build (no OpenMP) - id: cmake_build_no_openmp - if: ${{ matrix.sanitizer == 'THREAD' }} - run: | - cmake -B build \ - -DGGML_NATIVE=OFF \ - -DLLAMA_BUILD_SERVER=ON \ - -DLLAMA_CURL=ON \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ - -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \ - -DGGML_OPENMP=OFF ; - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server - - - name: Build (sanitizers) - id: cmake_build_sanitizers - if: ${{ matrix.sanitizer != '' && matrix.sanitizer != 'THREAD' }} - run: | - cmake -B build \ - -DGGML_NATIVE=OFF \ - -DLLAMA_BUILD_SERVER=ON \ - -DLLAMA_CURL=ON \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \ - -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ; - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server - - - name: Build (sanitizers) - id: cmake_build - if: ${{ matrix.sanitizer == '' }} - run: | - cmake -B build \ - -DGGML_NATIVE=OFF \ - -DLLAMA_BUILD_SERVER=ON \ - -DLLAMA_CURL=ON \ - -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ; - cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server - - - name: Tests - id: server_integration_tests - if: ${{ matrix.sanitizer == '' }} - run: | - cd examples/server/tests - ./tests.sh - - - name: Tests (sanitizers) - id: server_integration_tests_sanitizers - if: ${{ matrix.sanitizer != '' }} - run: | - cd examples/server/tests - LLAMA_SANITIZE=1 ./tests.sh - - - name: Slow tests - id: server_integration_tests_slow - if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} - run: | - cd examples/server/tests - SLOW_TESTS=1 ./tests.sh - - - server-windows: - runs-on: windows-2019 - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v4 - with: - fetch-depth: 0 - ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }} - - - name: libCURL - id: get_libcurl - env: - CURL_VERSION: 8.6.0_6 - run: | - curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip" - mkdir $env:RUNNER_TEMP/libcurl - tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl - - - name: Build - id: cmake_build - run: | - cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" - cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server - - - name: Python setup - id: setup_python - uses: actions/setup-python@v5 - with: - python-version: '3.11' - - - name: Tests dependencies - id: test_dependencies - run: | - pip install -r examples/server/tests/requirements.txt - - - name: Copy Libcurl - id: prepare_libcurl - run: | - cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll - - - name: Tests - id: server_integration_tests - if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }} - run: | - cd examples/server/tests - $env:PYTHONIOENCODING = ":replace" - pytest -v -x -m "not slow" - - - name: Slow tests - id: server_integration_tests_slow - if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }} - run: | - cd examples/server/tests - $env:SLOW_TESTS = "1" - pytest -v -x diff --git a/.gitignore b/.gitignore index 694f36e04..e52d479ee 100644 --- a/.gitignore +++ b/.gitignore @@ -1,145 +1,42 @@ -# Extensions - -*.a -*.bat -*.bin -*.d -*.dll -*.dot -*.etag -*.exe -*.gcda -*.gcno -*.gcov -*.gguf -*.gguf.json -*.lastModified -*.log -*.metallib *.o -*.so -*.swp -*.tmp - -# IDE / OS - -.cache/ -.ccls-cache/ -.direnv/ +*.a .DS_Store +.build/ +.cache/ +.direnv/ .envrc -.idea/ .swiftpm +.venv .vs/ .vscode/ -nppBackup - -# Coverage - -gcovr-report/ -lcov-report/ - -# Build Artifacts - -tags -.build/ -build* -!build-info.cmake -!build-info.cpp.in -!build-info.sh -!build.zig -!docs/build.md -/libllama.so -/llama-* -/vulkan-shaders-gen -android-ndk-* -arm_neon.h -cmake-build-* -CMakeSettings.json -compile_commands.json -ggml-metal-embed.metal -llama-batched-swift -/rpc-server -out/ -tmp/ -autogen-*.md - -# Deprecated - -/main -/server - -# CI - -!.github/workflows/*.yml - -# Models +build/ +build-em/ +build-debug/ +build-release/ +build-static/ +build-no-accel/ +build-sanitize-addr/ +build-sanitize-thread/ models/* -models-mnt -!models/.editorconfig -!models/ggml-vocab-*.gguf* -# Zig +/main +/quantize +/quantize-stats +/result +/perplexity +/embedding +/benchmark-q4_0-matmult +/vdot +/Pipfile + +arm_neon.h +compile_commands.json + +__pycache__ + zig-out/ zig-cache/ -# Logs - ppl-*.txt -qnt-*.txt -perf-*.txt - -# Examples - -examples/jeopardy/results.txt -examples/server/*.css.hpp -examples/server/*.html.hpp -examples/server/*.js.hpp -examples/server/*.mjs.hpp -!build_64.sh -!examples/*.bat -!examples/*/*.kts -!examples/*/*/*.kts -!examples/sycl/*.bat -!examples/sycl/*.sh - -# Server Web UI temporary files -node_modules -examples/server/webui/dist - -# Python - -/.venv -__pycache__/ -*/poetry.lock -poetry.toml - -# Nix -/result - -# Test binaries -/tests/test-backend-ops -/tests/test-double-float -/tests/test-grad0 -/tests/test-grammar-parser -/tests/test-llama-grammar -/tests/test-opt -/tests/test-quantize-fns -/tests/test-quantize-perf -/tests/test-rope -/tests/test-sampling -/tests/test-tokenizer-0 -/tests/test-tokenizer-1-bpe -/tests/test-tokenizer-1-spm - -# Scripts -!/scripts/install-oneapi.bat - -# Test models for lora adapters -/lora-tests - -# Local scripts -/run-vim.sh -/run-chat.sh diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index 23ce5ff05..000000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "kompute"] - path = ggml/src/ggml-kompute/kompute - url = https://github.com/nomic-ai/kompute.git diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 91d791628..000000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,16 +0,0 @@ -# See https://pre-commit.com for more information -# See https://pre-commit.com/hooks.html for more hooks -exclude: prompts/.*.txt -repos: -- repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 - hooks: - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-yaml - - id: check-added-large-files -- repo: https://github.com/PyCQA/flake8 - rev: 7.0.0 - hooks: - - id: flake8 - additional_dependencies: [flake8-no-print] diff --git a/AUTHORS b/AUTHORS deleted file mode 100644 index 6796b2941..000000000 --- a/AUTHORS +++ /dev/null @@ -1,1047 +0,0 @@ -# date: Tue Feb 4 13:04:05 EET 2025 -# this file is auto-generated by scripts/gen-authors.sh - -0cc4m -0xspringtime <110655352+0xspringtime@users.noreply.github.com> -20kdc -2f38b454 -3ooabkhxtn <31479382+3ooabkhxtn@users.noreply.github.com> -44670 <44670@users.noreply.github.com> -65a <10104049+65a@users.noreply.github.com> -AN Long -AT -Aarni Koskela -Aaron Miller -Aaryaman Vasishta -Abheek Gulati -Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> -Abhishek Gopinath K <31348521+overtunned@users.noreply.github.com> -Adithya Balaji -AdithyanI -Adrian -Adrian Hesketh -Adrien Gallouët -Adrien Gallouët -Ahmad Tameem <113388789+Tameem-10xE@users.noreply.github.com> -Ahmet Zeer -AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com> -AidanBeltonS -Aisuko -Akarshan Biswas -Akarshan Biswas -Al Mochkin <14274697+amochkin@users.noreply.github.com> -Albert Jin -Alberto <57916483+albbus-stack@users.noreply.github.com> -Alberto Cabrera Pérez -Alberto Cabrera Pérez -Alex -Alex Azarov -Alex Azarov -Alex Klinkhamer -Alex Klinkhamer -Alex Nguyen -Alex O'Connell <35843486+acon96@users.noreply.github.com> -Alex Petenchea -Alex Renda -Alex Tuddenham <61622354+AlexsCode@users.noreply.github.com> -Alex von Gluck IV -Alexey Parfenov -Ali Chraghi <63465728+alichraghi@users.noreply.github.com> -Ali Nehzat -Ali Tariq -Alon -AlpinDale <52078762+AlpinDale@users.noreply.github.com> -Amir -AmirAli Mirian <37371367+amiralimi@users.noreply.github.com> -Ananta Bastola -Anas Ahouzi <112881240+aahouzi@users.noreply.github.com> -András Salamon -Andreas (Andi) Kunar -Andreas Kieslinger <47689530+aendk@users.noreply.github.com> -Andrei -Andrew Canis -Andrew Downing -Andrew Duffy -Andrew Godfrey -Andrew Minh Nguyen <40281306+amqdn@users.noreply.github.com> -Andy Salerno -Andy Tai -Anthony Van de Gejuchte -Antonis Makropoulos -Arik Poznanski -Armen Kaleshian -Artem -Artem Zinnatullin -Artyom Lebedev -Asbjørn Olling -Ásgeir Bjarni Ingvarsson -Asghar Ghorbani -Ashish <1856117+ashishdatta@users.noreply.github.com> -Ashok Gelal <401055+ashokgelal@users.noreply.github.com> -Ashraful Islam -Atsushi Tatsuma -Austin <77757836+teleprint-me@users.noreply.github.com> -AustinMroz -BADR -Bach Le -Bailey Chittle <39804642+bachittle@users.noreply.github.com> -BarfingLemurs <128182951+BarfingLemurs@users.noreply.github.com> -Bartowski -Behnam M <58621210+ibehnam@users.noreply.github.com> -Ben Ashbaugh -Ben Garney -Ben Siraphob -Ben Williams -Benjamin Findley <39356821+Kartoffelsaft@users.noreply.github.com> -Benjamin Lecaillon <84293038+blecaillon@users.noreply.github.com> -Benson Wong -Bernat Vadell -Bernhard M. Wiedemann -Bert Wagner -Billel Mokeddem -Bingan <70050083+binganao@users.noreply.github.com> -Bjarke Viksøe <164612031+bviksoe@users.noreply.github.com> -Bodo Graumann -Bono Lv -Borislav Stanimirov -Borislav Stanimirov -Branden Butler -Brandon Squizzato <35474886+bsquizz@users.noreply.github.com> -Brian -Brian Cunnie -Bruce MacDonald -Bryan Honof -CJ Pais -CRD716 -Calvin Laurenson -Cameron -Cameron Kaiser -Carolinabanana <140120812+Carolinabanana@users.noreply.github.com> -CarryFun <76023481+CarryFun@users.noreply.github.com> -Carsten Kragelund Jørgensen -CarterLi999 <664681047@qq.com> -Casey Primozic -Casey Primozic -CausalLM <148736309+CausalLM@users.noreply.github.com> -Cebtenzzre -CentricStorm -Chad Brewbaker -Changyeon Kim -Chao Jiang -Charles Xu <63788048+chaxu01@users.noreply.github.com> -Charles Xu -Chen Xi -Chen Xi -Cheng Shao -Chenguang Li <87689256+noemotiovon@users.noreply.github.com> -Chris Elrod -Chris Kuehl -Christian Demsar -Christian Demsar -Christian Falch <875252+chrfalch@users.noreply.github.com> -Christian Kastner -Christian Kögler -Christian Köhnenkamp -Christian Zhou-Zheng <59622928+christianazinn@users.noreply.github.com> -Christopher Nielsen <62156882+mascguy@users.noreply.github.com> -Clark Saben <76020733+csaben@users.noreply.github.com> -Clint Herron -Conrad Kramer -Corentin REGAL -CrispStrobe <154636388+CrispStrobe@users.noreply.github.com> -Csaba Kecskemeti -Cuong Trinh Manh -DAN™ -Damian Stewart -Dan Johansson <164997844+eddnjjn@users.noreply.github.com> -Dan Johansson -Dane Madsen -DaniAndTheWeb <57776841+DaniAndTheWeb@users.noreply.github.com> -Daniel Bevenius -Daniel Drake -Daniel Hiltgen -Daniel Illescas Romero -Daniel Kleine <53251018+d-kleine@users.noreply.github.com> -Daniele <57776841+daniandtheweb@users.noreply.github.com> -DannyDaemonic -Dat Quoc Nguyen <2412555+datquocnguyen@users.noreply.github.com> -Dave -Dave Airlie -Dave Airlie -Dave Della Costa -David Friehs -David Kennedy -David Pflug -David Renshaw -David Sommers <12738+databyte@users.noreply.github.com> -David Yang -DavidKorczynski -Dawid Potocki -Dawid Wysocki <62249621+TortillaZHawaii@users.noreply.github.com> -Dean -Deins -Denis Spasyuk <34203011+dspasyuk@users.noreply.github.com> -Derrick T. Woolworth -Deven Mistry <31466137+deven367@users.noreply.github.com> -Dibakar Gope -Didzis Gosko -Diego Devesa -Diogo Teles Sant'Anna -Djip007 <3705339+Djip007@users.noreply.github.com> -Djip007 -Don Mahurin -DooWoong Lee (David) -Doomsdayrs <38189170+Doomsdayrs@users.noreply.github.com> -Dou Xinpeng <15529241576@163.com> -Dou Xinpeng <81913537+Dou-Git@users.noreply.github.com> -Douglas Hanley -Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com> -Ebey Abraham -Echo Nolan -Ed Lee -Ed Lepedus -Eddie-Wang -Edward Taylor -Elaine -Elbios <141279586+Elbios@users.noreply.github.com> -Elton Kola -Emreerdog <34742675+Emreerdog@users.noreply.github.com> -Engininja2 <139037756+Engininja2@users.noreply.github.com> -Equim -Eric Curtin -Eric Curtin -Eric Sommerlade -Eric Zhang <34133756+EZForever@users.noreply.github.com> -Erik Garrison -Erik Scholz -Esko Toivonen -Ettore Di Giacinto -Evan Jones -Evan Miller -Eve <139727413+netrunnereve@users.noreply.github.com> -Evgeny Kurnevsky -Ewout ter Hoeven -ExtReMLapin <3909752+ExtReMLapin@users.noreply.github.com> -FK -Fabian -Fabio R. Sluzala -Faez Shakil -Faisal Zaghloul -Faisal Zaghloul -Fan Shupei -FantasyGmm <16450052+FantasyGmm@users.noreply.github.com> -Farbod Bijary <110523279+farbodbj@users.noreply.github.com> -Fattire <528174+fat-tire@users.noreply.github.com> -Felix -Finn Voorhees -Firat -FirstTimeEZ <179362031+FirstTimeEZ@users.noreply.github.com> -Folko-Ven <71110216+Folko-Ven@users.noreply.github.com> -Foul-Tarnished <107711110+Foul-Tarnished@users.noreply.github.com> -Francisco Melo <43780565+francis2tm@users.noreply.github.com> -Frank Mai -FrankHB -Frankie Robertson -Fred Douglas <43351173+fredlas@users.noreply.github.com> -Frederik Vogel -Gabe Goodhart -Gabe Goodhart -Gaetan Bisson -GainLee -Galunid -Gary Linscott -Gary Mulder -Gavin Zhao -Genkagaku.GPT -Georgi Gerganov -Gilad S -Gilad S. <7817232+giladgd@users.noreply.github.com> -Giuseppe Scrivano -GiviMAD -Govlzkoy -Guillaume "Vermeille" Sanchez -Guillaume Wenzek -Guoliang Hua <32868157+nbcsm@users.noreply.github.com> -Guoteng <32697156+SolenoidWGT@users.noreply.github.com> -Guspan Tanadi <36249910+guspan-tanadi@users.noreply.github.com> -Gustavo Rocha Dias <91472747+gustrd@users.noreply.github.com> -Haggai Nuchi -Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com> -Hamdoud Hakem <90524568+hamdoudhakem@users.noreply.github.com> -HanishKVC -Haohui Mai -Haoxiang Fei -Harald Fernengel -Hatsune Miku <129688334+at8u@users.noreply.github.com> -HatsuneMikuUwU33 <173229399+HatsuneMikuUwU33@users.noreply.github.com> -Haus1 -Henk Poley -Henri Vasserman -Henrik Forstén -Herman Semenov -Hesen Peng -HimariO -Hoang Nguyen -Hong Bo PENG -Hongyu Ouyang <96765450+casavaca@users.noreply.github.com> -Howard Su -Hua Jiang -Huang Qi -Huawei Lin -Hugo Roussel -Huifeng Ou <79071290+ho2103@users.noreply.github.com> -Ian Bull -Ian Bull -Ian Scrivener -Icecream95 -Ido S -IgnacioFDM -Igor Okulist -Ihar Hrachyshka -Ikko Eltociear Ashimine -Ilya Kurdyukov <59548320+ilyakurdyukov@users.noreply.github.com> -Ionoclast Laboratories -Isaac McFadyen -IsaacDynamo <61521674+IsaacDynamo@users.noreply.github.com> -Ivan -Ivan Filipov <159561759+vanaka11@users.noreply.github.com> -Ivan Komarov -Ivan Stepanov -JFLFY2255 -JH23X <165871467+JH23X@users.noreply.github.com> -Jack Mousseau -Jack Mousseau -JackJollimore <130917767+JackJollimore@users.noreply.github.com> -Jaeden Amero -Jaemin Son -Jafar Uruç -Jag Chadha -Jakub N -James A Capozzoli <157492257+jac-jim@users.noreply.github.com> -James Reynolds -Jan Boon -Jan Boon -Jan Ploski -Jannis Schönleber -Jared Van Bortel -Jared Van Bortel -Jason McCartney -Jason Stillerman -Jean-Christophe Hoelt -Jean-Michaël Celerier -Jed Fox -Jeff Bolz -Jeffrey Morgan -Jeffrey Quesnelle -Jeroen Mostert -Jesse Jojo Johnson -Jett Janiak -Jeximo -Jhen-Jie Hong -Jiahao Li -Jian Liao -JidongZhang-THU <1119708529@qq.com> -Jinwoo Jeong <33892306+williamjeong2@users.noreply.github.com> -Jiří Podivín <66251151+jpodivin@users.noreply.github.com> -Jiří Sejkora -Joan Fontanals -Joan Fontanals -João Dinis Ferreira -Joe Eli McIlvain -Joe Todd -Johan -Johannes Gäßler -Johannes Rudolph -John <78893154+cmp-nct@users.noreply.github.com> -John Balis -John Smith <67539080+kingsidelee@users.noreply.github.com> -JohnnyB -Jonas Wunderlich <32615971+jonas-w@users.noreply.github.com> -Jorge A <161275481+jorgealias@users.noreply.github.com> -Jose Maldonado <63384398+yukiteruamano@users.noreply.github.com> -Joseph Stahl <1269177+josephst@users.noreply.github.com> -Josh Ramer -Joyce -Juan Calderon-Perez <835733+gaby@users.noreply.github.com> -Judd -Juk Armstrong <69222624+jukofyork@users.noreply.github.com> -Julius Arkenberg -Jun Hee Yoo -Jun Jie <71215065+junnjiee16@users.noreply.github.com> -Junil Kim -Junyang Lin -Juraj Bednar -Justin Parker -Justin Suess -Justina Cho -Justine Tunney -Justine Tunney -Juuso Alasuutari -KASR -Kamil Tomšík -Karol Kontny <82021046+kkontny@users.noreply.github.com> -Karsten Weiss -Karthick -Karthik Kumar Viswanathan <195178+guilt@users.noreply.github.com> -Karthik Sethuraman -Kasumi <90275229+kasumi-1@users.noreply.github.com> -Kawrakow <48489457+ikawrakow@users.noreply.github.com> -Keiichi Tabata -Keke Han -Kenvix ⭐ -Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com> -Kevin Gibbons -Kevin Ji <1146876+kevinji@users.noreply.github.com> -Kevin Kwok -Kevin Lo -Kevin Wang -Kolen Cheung -Konstantin Herud -Konstantin Zhuravlyov -Kunshang Ji -Kyle Bruene -Kyle Liang -Kyle Mistele -Kylin <56434533+KyL0N@users.noreply.github.com> -Lars Grammel -Laura -Lee <44310445+lx200916@users.noreply.github.com> -Lee Drake -Leng Yue -Leon Knauer -LeonEricsson <70749762+LeonEricsson@users.noreply.github.com> -Leonardo Neumann -Li Tan -Linwei Wang -Liu Jia <109258120+Septa2112@users.noreply.github.com> -Liu Jia -LoganDark -Loïc Carrère -LostRuins <39025047+LostRuins@users.noreply.github.com> -LostRuins Concedo <39025047+LostRuins@users.noreply.github.com> -Luciano -Luo Tian -Lyle Dean -M-A -M. Yusuf Sarıgöz -Ma Mingfei -Maarten ter Huurne -Mack Straight -Maël Kerbiriou -MaggotHATE -Mahesh Madhav <67384846+heshpdx@users.noreply.github.com> -Manuel <44313466+makuche@users.noreply.github.com> -Marc Köhlbrugge -Marco Matthies <71844+marcom@users.noreply.github.com> -Marcus Dunn <51931484+MarcusDunn@users.noreply.github.com> -Marian Cepok -Mark Fairbairn -Mark Zhuang -Marko Tasic -Markus Tavenrath -Martin Delille -Martin Krasser -Martin Schwaighofer -Marvin Gießing -Masaya, Kato <62578291+msy-kato@users.noreply.github.com> -MasterYi1024 <39848311+MasterYi1024@users.noreply.github.com> -Mateusz Charytoniuk -Matheus C. França -Matheus Gabriel Alves Silva -Mathieu Baudier -Mathieu Geli -Mathieu Nayrolles -Mathijs Henquet -Mathijs de Bruin -Matt Clayton <156335168+mattjcly@users.noreply.github.com> -Matt Pulver -Matt Stephenson -Matteo Boschini <12133566+mbosc@users.noreply.github.com> -Matteo Mortari -Mattheus Chediak -Matthew Tejo -Matvey Soloviev -Max Krasnyansky -Max Krasnyansky -Maxime <672982+maximegmd@users.noreply.github.com> -Maximilian Winter -Meng Zhang -Meng, Hengyu -Mengqing Cao -Merrick Christensen -Michael Coppola -Michael Engel -Michael Francis -Michael Hueschen -Michael Kesper -Michael Klimenko -Michael Podvitskiy -Michael Potter -Michael de Gans -Michaël de Vries -Michał Moskal -Michał Tuszyński -Michelle Tan <41475767+MichelleTanPY@users.noreply.github.com> -Mihai -Mike -Mikko Juola -Minsoo Cheong <54794500+mscheong01@users.noreply.github.com> -Minsoo Cheong -Mirko185 -Mirror Azure <54669636+MirrorAzure@users.noreply.github.com> -MistApproach <98988043+MistApproach@users.noreply.github.com> -Miwa / Ensan <63481257+ensan-hcl@users.noreply.github.com> -Mohammadreza Hendiani -Mohammadreza Hendiani -Molly Sophia -MorganRO8 <47795945+MorganRO8@users.noreply.github.com> -Murilo Santana -Musab Gultekin -Nam D. Tran <42194884+namtranase@users.noreply.github.com> -Nathan Epstein -Natsu -NawafAlansari <72708095+NawafAlansari@users.noreply.github.com> -Nebula -Neo Zhang <14088817+arthw@users.noreply.github.com> -Neo Zhang -Neo Zhang Jianyu -Neuman Vong -NeverLucky <92274250+nvrxq@users.noreply.github.com> -Nexes the Old <124105151+Nexesenex@users.noreply.github.com> -Nexesenex <124105151+Nexesenex@users.noreply.github.com> -Niall Coates <1349685+Niall-@users.noreply.github.com> -Nicholai Tukanov -Nico Bosshard -Nicolai Weitkemper -Nicolás Pérez -Nicolò Scipione -Nigel Bosch -Nikita Sarychev <42014488+sARY77@users.noreply.github.com> -Niklas Korz -NikolaiLyssogor <59844691+NikolaiLyssogor@users.noreply.github.com> -Nikolaos Pothitos -Nikolas <127742645+nneubacher@users.noreply.github.com> -Nindaleth -Nuno -OSecret <135510162+OLSecret@users.noreply.github.com> -Oleksandr Nikitin -Oleksii Maryshchenko -Olivier Chafik -Ondřej Čertík -Ouadie EL FAROUKI -PAB -Pablo Duboue -Pascal Patry -Patrice Ferlet -Paul Tsochantaris -Pavel Zloi -Pavol Rusnak -Paweł Wodnicki <151604+32bitmicro@users.noreply.github.com> -Pedro Cuenca -Peter -Peter Sugihara -Phil H <5756783+phiharri@users.noreply.github.com> -Philip Taron -Phillip Kravtsov -Pierre Alexandre SCHEMBRI -Pierrick Hymbert -Pieter Ouwerkerk -Plamen Minev -Prashant Vithule <119530321+Vithulep@users.noreply.github.com> -Przemysław Pawełczyk -Qin Yue Chen <71813199+chenqiny@users.noreply.github.com> -Qingyou Meng -Qu Zongfu <43257352+yancaoweidaode@users.noreply.github.com> -R0CKSTAR -R0CKSTAR -RJ Adriaansen -Radoslav Gerganov -Radosław Gryta -Rahul Vivek Nair <68507071+RahulVivekNair@users.noreply.github.com> -Raj Hammeer Singh Hada -Ralph Soika -Rand Xie -Randall Fitzgerald -Random Fly -Reinforce-II -Rémy Oudompheng -Ren Xuancheng -Rene Leonhardt <65483435+reneleonhardt@users.noreply.github.com> -Reza Kakhki -RhinoDevel -Riccardo Orlando -Riceball LEE -Rich Dougherty -Richard Kiss -Richard Roberson -Rick G <26732651+TheFlipbook@users.noreply.github.com> -Rickard Edén -Rickard Hallerbäck -Rickey Bowers Jr -Riley Stewart -Rinne -Rinne -Robert Brisita <986796+rbrisita@users.noreply.github.com> -Robert Collins -Robert Ormandi <52251610+ormandi@users.noreply.github.com> -Robert Sung-wook Shin -Robey Holderith -Robyn -Roger Meier -Roland <14355895+rbur0425@users.noreply.github.com> -Romain Biessy -Romain D <90720+Artefact2@users.noreply.github.com> -Romain Neutron -Roman Parykin -Ron Evans -Ron Jailall -Roni -Ronny Brendel -Ronsor -Rowan Hart -Ruan <47767371+ruanych@users.noreply.github.com> -Ruchira Hasaranga -Rudi Servo -Ruixin Huang <18860020911@163.com> -Rune <43761327+Rune-AI@users.noreply.github.com> -RunningLeon -RunningLeon -Ryan Landay -Ryder Wishart -Ryuei -Rőczey Barnabás <31726601+An0nie@users.noreply.github.com> -SRHMorris <69468379+SRHMorris@users.noreply.github.com> -SXX -SakuraUmi -Salvador E. Tropea -Salvatore Mesoraca -Sam Spilsbury -Sami Farin <3876865+Safari77@users.noreply.github.com> -Samuel Maynard -Sang-Kil Park -Seb C <47074056+Sebby37@users.noreply.github.com> -Sebastián A -SebastianApel <13675545+SebastianApel@users.noreply.github.com> -Senemu <10880819+Senemu@users.noreply.github.com> -Sergey Alirzaev -Sergio López -Sergio López -Sertaç Özercan <852750+sozercan@users.noreply.github.com> -SeungWon Jeong <65549245+redlion0929@users.noreply.github.com> -ShadovvBeast -Shakhar Dasgupta -Shane A -Shangning Xu <32517059+xushangning@users.noreply.github.com> -Shankar -Shanshan Shen <467638484@qq.com> -Shijie <821898965@qq.com> -Shintarou Okada -Shouzheng Liu <61452103+lshzh-ww@users.noreply.github.com> -Shouzheng Liu -Shuichi Tsutsumi -Shupei Fan -Sigbjørn Skjæret -Simon Willison -Siwen Yu -Sky Yan -Slaren <2141330+slaren@users.noreply.github.com> -Slava Primenko -Small Grass Forest -SoftwareRenderer <138734813+SoftwareRenderer@users.noreply.github.com> -Someone -Someone Serge -Sourab Mangrulkar <13534540+pacman100@users.noreply.github.com> -Spencer Sutton -Srihari-mcw <96763064+Srihari-mcw@users.noreply.github.com> -Srinivas Billa -Stefan Sydow -Steffen Röcker -Stephan Walter -Stephen Nichols -Steve Bonds -Steve Grubb -Steven Prichard -Steven Roussey -Steward Garcia <57494570+FSSRepo@users.noreply.github.com> -StrangeBytesDev <141275258+StrangeBytesDev@users.noreply.github.com> -Suaj Carrot <72162667+SuajCarrot@users.noreply.github.com> -Sukriti Sharma -SuperUserNameMan -Sutou Kouhei -Tai Duc Nguyen -Taikono-Himazin -Tameem <113388789+AhmadTameem@users.noreply.github.com> -Tamotsu Takahashi -Tei Home -Thái Hoàng Tâm <75922889+RoyalHeart@users.noreply.github.com> -Thatcher Chamberlin -Theia Vogel -Thérence <13496987+Royalphax@users.noreply.github.com> -Thibault Terrasson -Thomas Klausner -Thorsten Sommer -Tim Miller -Tim Wang -Timmy Knight -Timothy Cronin <40186632+4imothy@users.noreply.github.com> -Ting Lou -Ting Lou -Ting Sun -Tobias Lütke -Tom C -Tom Jobbins <784313+TheBloke@users.noreply.github.com> -Tomas -Tomáš Pazdiora -Tony Wasserka <4840017+neobrain@users.noreply.github.com> -Tristan Druyen -Tristan Ross -Trivikram Kamat <16024985+trivikr@users.noreply.github.com> -Tungsten842 <886724vf@anonaddy.me> -Tungsten842 -Tushar -UEXTM.com <84163508+uextm@users.noreply.github.com> -Ujjawal Panchal <31011628+Ujjawal-K-Panchal@users.noreply.github.com> -Ulrich Drepper -Uzo Nweke -Vaibhav Srivastav -Val Kharitonov -Valentin Konovalov -Valentin Mamedov <45292985+Inf1delis@users.noreply.github.com> -Valentyn Bezshapkin <61702053+valentynbez@users.noreply.github.com> -Vali Malinoiu <0x4139@gmail.com> -Victor Nogueira -Victor Z. Peng -Viet-Anh NGUYEN (Andrew) -Vinesh Janarthanan <36610342+VJHack@users.noreply.github.com> -Vlad -Vladimir -Vladimir Malyutin -Vladimir Zorin -VoidIsVoid <343750470@qq.com> -Volodymyr Vitvitskyi <72226+signalpillar@users.noreply.github.com> -Wang Qin <37098874+wangqin0@users.noreply.github.com> -Wang Ran (汪然) -WangHaoranRobin <56047610+WangHaoranRobin@users.noreply.github.com> -Weird Constructor -Welby Seely -Wentai Zhang -WillCorticesAI <150854901+WillCorticesAI@users.noreply.github.com> -William Tambellini -William Tambellini -Willy Tarreau -Woof Dog <197125663+woof-dog@users.noreply.github.com> -Wouter <9594229+DifferentialityDevelopment@users.noreply.github.com> -Wu Jian Ping -Wu Jian Ping -Xiake Sun -Xiang (Kevin) Li -Xiao-Yong Jin -XiaotaoChen -Xiaoyi Chen -Xie Yanbo -Xingchen Song(宋星辰) -Xinpeng Dou <81913537+Dou-Git@users.noreply.github.com> -Xuan Son Nguyen -Xuan-Son Nguyen -Yaiko -Yann Follet <131855179+YannFollet@users.noreply.github.com> -Yaroslav -Yazan Agha-Schrader -Yiming Cui -Yishuo Wang -Yoshi Suhara -Yoshi Suhara -Younes Belkada <49240599+younesbelkada@users.noreply.github.com> -Yueh-Po Peng <94939112+y10ab1@users.noreply.github.com> -Yüg -Yui -Yun Dou -Yuri Khrustalev -Yusuf Kağan Hanoğlu -Yuval Peled <31162840+Yuval-Peled@users.noreply.github.com> -ZHAOKAI WANG -Zane Shannon -Zay <95888118+isaiahbjork@users.noreply.github.com> -Zenix -Zhang Peiyuan -Zheng.Deng <32841220+dengzheng-cloud@users.noreply.github.com> -Zhenwei Jin <109658203+kylo5aby@users.noreply.github.com> -Zhiyuan Li -Zhiyuan Li -ZhouYuChen -Ziad Ben Hadj-Alouane -Ziang Wu <97337387+ZiangWu-77@users.noreply.github.com> -Zsapi -a-n-n-a-l-e-e <150648636+a-n-n-a-l-e-e@users.noreply.github.com> -a3sh <38979186+A3shTnT@users.noreply.github.com> -adel boussaken -afrideva <95653597+afrideva@users.noreply.github.com> -ag2s20150909 <19373730+ag2s20150909@users.noreply.github.com> -agray3 -akawrykow <142945436+akawrykow@users.noreply.github.com> -alek3y <44779186+alek3y@users.noreply.github.com> -alexpinel <93524949+alexpinel@users.noreply.github.com> -alonfaraj -alwqx -amd-dwang -amd-lalithnc -amritahs-ibm -andrijdavid -anon998 <131767832+anon998@users.noreply.github.com> -anzz1 -apaz -apcameron <37645737+apcameron@users.noreply.github.com> -arch-btw <57669023+arch-btw@users.noreply.github.com> -arcrank -ardfork <134447697+ardfork@users.noreply.github.com> -arlo-phoenix <140345165+arlo-phoenix@users.noreply.github.com> -aryantandon01 <80969509+aryantandon01@users.noreply.github.com> -at8u <129688334+at8u@users.noreply.github.com> -automaticcat -awatuna <23447591+awatuna@users.noreply.github.com> -b4b4o -bandoti <141645996+bandoti@users.noreply.github.com> -beiller -bhubbb <79117352+bhubbb@users.noreply.github.com> -bmwl -bobqianic <129547291+bobqianic@users.noreply.github.com> -brucepro -bryanSwk <93190252+bryanSwk@users.noreply.github.com> -bsilvereagle -bssrdf -byte-6174 <88070277+byte-6174@users.noreply.github.com> -cduk <19917266+cduk@users.noreply.github.com> -cebtenzzre -chaihahaha -chiranko <96988916+chiranko@users.noreply.github.com> -clibdev <52199778+clibdev@users.noreply.github.com> -clyang -cocktailpeanut <121128867+cocktailpeanut@users.noreply.github.com> -codezjx -coezbek -comex -compilade <113953597+compilade@users.noreply.github.com> -compilade -cpumaxx <163466046+cpumaxx@users.noreply.github.com> -crasm -crasm -daboe01 -daghanerdonmez <44506702+daghanerdonmez@users.noreply.github.com> -daminho <37615795+daminho@users.noreply.github.com> -david raistrick -ddh0 -ddpasa <112642920+ddpasa@users.noreply.github.com> -deepdiffuser <112834445+deepdiffuser@users.noreply.github.com> -devojony <61173062+devojony@users.noreply.github.com> -ditsuke -divinity76 -dm4 -dotpy314 <33351922+dotpy314@users.noreply.github.com> -drbh -ds5t5 <145942675+ds5t5@users.noreply.github.com> -dylan -eastriver -ebraminio -ebraminio -eiery <19350831+eiery@users.noreply.github.com> -eric8607242 -fairydreaming <166155368+fairydreaming@users.noreply.github.com> -fengerhu1 <2748250768@qq.com> -fj-y-saito <85871716+fj-y-saito@users.noreply.github.com> -fraxy-v <65565042+fraxy-v@users.noreply.github.com> -github-actions[bot] -gliptic -gn64 -goerch -grahameth <96447521+grahameth@users.noreply.github.com> -gtygo -gwjr <502526+gwjr@users.noreply.github.com> -h-h-h-h <13482553+h-h-h-h@users.noreply.github.com> -hankcs -haopeng <657407891@qq.com> -hipudding -hoangmit -hongbo.mo <352280764@qq.com> -hopkins385 <98618192+hopkins385@users.noreply.github.com> -howlger -howlger -hutli <6594598+hutli@users.noreply.github.com> -hutli -hutli -hxer7963 -hydai -iSma -iacore <74560659+iacore@users.noreply.github.com> -icppWorld <124377669+icppWorld@users.noreply.github.com> -igarnier -intelmatt <61025942+intelmatt@users.noreply.github.com> -iohub -issixx <46835150+issixx@users.noreply.github.com> -jacobi petrucciani <8117202+jpetrucciani@users.noreply.github.com> -jaime-m-p <167997752+jaime-m-p@users.noreply.github.com> -jameswu2014 <545426914@qq.com> -jdomke <28772296+jdomke@users.noreply.github.com> -jiahao su -jiez <373447296@qq.com> -jneem -joecryptotoo <80373433+joecryptotoo@users.noreply.github.com> -johnson442 <56517414+johnson442@users.noreply.github.com> -jojorne -jon-chuang <9093549+jon-chuang@users.noreply.github.com> -jp-x-g -jukofyork <69222624+jukofyork@users.noreply.github.com> -junchao-loongson <68935141+junchao-loongson@users.noreply.github.com> -jwj7140 <32943891+jwj7140@users.noreply.github.com> -k.h.lai -kaizau -kallewoof -kalomaze <66376113+kalomaze@users.noreply.github.com> -kang -katsu560 <118887472+katsu560@users.noreply.github.com> -kchro3 <62481661+kchro3@users.noreply.github.com> -khimaros -kiltyj -klosax <131523366+klosax@users.noreply.github.com> -krystiancha -kunal-vaishnavi <115581922+kunal-vaishnavi@users.noreply.github.com> -kunnis -kuronekosaiko -kustaaya <58045274+kustaaya@users.noreply.github.com> -kuvaus <22169537+kuvaus@users.noreply.github.com> -kwin1412 <42286931+kwin1412@users.noreply.github.com> -l3utterfly -laik -ldwang -le.chang -leejet -leo-pony -lexasub -lhez -limitedAtonement -liuwei-git <14815172+liuwei-git@users.noreply.github.com> -lon <114724657+longregen@users.noreply.github.com> -loonerin <132926317+loonerin@users.noreply.github.com> -ltoniazzi <61414566+ltoniazzi@users.noreply.github.com> -luoyu-intel -m3ndax -maddes8cht <55592906+maddes8cht@users.noreply.github.com> -mahorozte <41834471+mahorozte@users.noreply.github.com> -makomk -manikbhandari -maor-ps <154728172+maor-ps@users.noreply.github.com> -mashdragon <122402293+mashdragon@users.noreply.github.com> -matiaslin <45382001+matiaslin@users.noreply.github.com> -matt23654 -matteo -mdrokz -mgroeber9110 <45620825+mgroeber9110@users.noreply.github.com> -minarchist -mj-shifu <77107165+mj-shifu@users.noreply.github.com> -mmyjona -momonga <115213907+mmnga@users.noreply.github.com> -momonga <146910567+mmngays@users.noreply.github.com> -moritzbrantner <31051084+moritzbrantner@users.noreply.github.com> -musoles <135031143+musoles@users.noreply.github.com> -mzcu -nanahi <130121847+na-na-hi@users.noreply.github.com> -ngc92 <7938269+ngc92@users.noreply.github.com> -nhamanasu <45545786+nhamanasu@users.noreply.github.com> -niansa/tuxifan -niansa/tuxifan -nickp27 -ningshanwutuobang -nold -nopperl <54780682+nopperl@users.noreply.github.com> -nusu-github <29514220+nusu-github@users.noreply.github.com> -olexiyb -omahs <73983677+omahs@users.noreply.github.com> -oobabooga <112222186+oobabooga@users.noreply.github.com> -opparco -ostix360 <55257054+ostix360@users.noreply.github.com> -pculliton -peidaqi -pengxin99 -perserk -piDack <104877312+piDack@users.noreply.github.com> -pmysl -postmasters -pudepiedj -qingfengfenga <41416092+qingfengfenga@users.noreply.github.com> -qingy1337 -qouoq -qunash -rabidcopy -rankaiyx -redbeard -rhjdvsgsgks <26178113+rhjdvsgsgks@users.noreply.github.com> -rhuddleston -rimoliga <53384203+rimoliga@users.noreply.github.com> -runfuture -sandyiscool -sasha0552 -semidark -serhii-nakon <57632032+serhii-nakon@users.noreply.github.com> -sharpHL <132747147+sharpHL@users.noreply.github.com> -shibe2 -singularity <12184989+singularity-s0@users.noreply.github.com> -sjinzh -sjxx <63994076+ylsdamxssjxxdd@users.noreply.github.com> -slaren <2141330+slaren@users.noreply.github.com> -slaren -snadampal <87143774+snadampal@users.noreply.github.com> -someone13574 <81528246+someone13574@users.noreply.github.com> -standby24x7 -staviq -stduhpf -strawberrymelonpanda <152940198+strawberrymelonpanda@users.noreply.github.com> -swittk -takov751 <40316768+takov751@users.noreply.github.com> -tarcey -tc-mb <157115220+tc-mb@users.noreply.github.com> -texmex76 <40733439+texmex76@users.noreply.github.com> -thement <40525767+thement@users.noreply.github.com> -thewh1teagle <61390950+thewh1teagle@users.noreply.github.com> -tjohnman -toyer <2042519524@qq.com> -tslmy -ubik2 -uint256_t -uint256_t -unbounded -uvos -uvos -valiray <133289098+valiray@users.noreply.github.com> -vb -vik -viric -vodkaslime <646329483@qq.com> -vvhg1 <94630311+vvhg1@users.noreply.github.com> -vxiiduu <73044267+vxiiduu@users.noreply.github.com> -wangshuai09 <391746016@qq.com> -wbpxre150 <100937007+wbpxre150@users.noreply.github.com> -whoreson <139810751+whoreson@users.noreply.github.com> -woachk <24752637+woachk@users.noreply.github.com> -wonjun Jang -woodx <124784234+woodx9@users.noreply.github.com> -wwoodsTM <104587230+wwoodsTM@users.noreply.github.com> -wzy <32936898+Freed-Wu@users.noreply.github.com> -xaedes -xaedes -xctan -xloem <0xloem@gmail.com> -yangli2 -ymcki <84055651+ymcki@users.noreply.github.com> -yuiseki -yuri@FreeBSD -zakkor -zhangkaihuo -zhentaoyu -zhouwg <6889919+zhouwg@users.noreply.github.com> -zhouwg -zrm -Ștefan-Gabriel Muscalu -杨朱 · Kiki -源文雨 <41315874+fumiama@users.noreply.github.com> -蕭澧邦 <45505768+shou692199@users.noreply.github.com> -谢乃闻 -Нияз Гарифзянов <112617865+garrnizon@users.noreply.github.com> diff --git a/CMakeLists.txt b/CMakeLists.txt index 7b2a1845e..11ebe9eb6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,9 +1,5 @@ -cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories. +cmake_minimum_required(VERSION 3.12) # Don't bump this version for no reason project("llama.cpp" C CXX) -include(CheckIncludeFileCXX) - -#set(CMAKE_WARN_DEPRECATED YES) -set(CMAKE_WARN_UNUSED_CLI YES) set(CMAKE_EXPORT_COMPILE_COMMANDS ON) @@ -12,17 +8,11 @@ if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() -# Add path to modules -list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/") - set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) -set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) -if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) +if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) set(LLAMA_STANDALONE ON) - include(git-vars) - # configure project version # TODO else() @@ -41,196 +31,324 @@ else() endif() endif() -option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT}) - -if (WIN32) - add_compile_definitions(_CRT_SECURE_NO_WARNINGS) -endif() - -if (MSVC) - add_compile_options("$<$:/utf-8>") - add_compile_options("$<$:/utf-8>") - add_compile_options("$<$:/bigobj>") - add_compile_options("$<$:/bigobj>") -endif() # -# option list +# Option list # +# general +option(LLAMA_STATIC "llama: static link libraries" OFF) +option(LLAMA_NATIVE "llama: enable -march=native flag" OFF) +option(LLAMA_LTO "llama: enable link time optimization" OFF) + # debug option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) - -# build -option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) +option(LLAMA_GPROF "llama: enable gprof" OFF) # sanitizers -option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) -option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) -option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) +option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) +option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) +option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) -# utils -option(LLAMA_BUILD_COMMON "llama: build common utils library" ${LLAMA_STANDALONE}) - -# extra artifacts -option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) -option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) -option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE}) +# instruction set specific +option(LLAMA_AVX "llama: enable AVX" ON) +option(LLAMA_AVX2 "llama: enable AVX2" ON) +option(LLAMA_AVX512 "llama: enable AVX512" OFF) +option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) +option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) +option(LLAMA_FMA "llama: enable FMA" ON) +# in MSVC F16C is implied with AVX2/AVX512 +if (NOT MSVC) + option(LLAMA_F16C "llama: enable F16C" ON) +endif() # 3rd party libs -option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF) -option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF) +option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) +option(LLAMA_OPENBLAS "llama: use OpenBLAS" OFF) +option(LLAMA_CUBLAS "llama: use cuBLAS" OFF) -# Required for relocatable CMake package -include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake) -include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake) +option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) +option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) -# override ggml options -set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS}) -set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS}) +# +# Compile flags +# -# change the default for these ggml options -if (NOT DEFINED GGML_LLAMAFILE) - set(GGML_LLAMAFILE_DEFAULT ON) -endif() - -if (NOT DEFINED GGML_CUDA_GRAPHS) - set(GGML_CUDA_GRAPHS_DEFAULT ON) -endif() - -# transition helpers -function (llama_option_depr TYPE OLD NEW) - if (${OLD}) - message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n") - set(${NEW} ON PARENT_SCOPE) - endif() -endfunction() - -llama_option_depr(FATAL_ERROR LLAMA_CUBLAS GGML_CUDA) -llama_option_depr(WARNING LLAMA_CUDA GGML_CUDA) -llama_option_depr(WARNING LLAMA_KOMPUTE GGML_KOMPUTE) -llama_option_depr(WARNING LLAMA_METAL GGML_METAL) -llama_option_depr(WARNING LLAMA_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY) -llama_option_depr(WARNING LLAMA_NATIVE GGML_NATIVE) -llama_option_depr(WARNING LLAMA_RPC GGML_RPC) -llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL) -llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16) -llama_option_depr(WARNING LLAMA_CANN GGML_CANN) +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED true) +set(CMAKE_C_STANDARD 11) +set(CMAKE_C_STANDARD_REQUIRED true) +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads REQUIRED) if (NOT MSVC) if (LLAMA_SANITIZE_THREAD) - message(STATUS "Using -fsanitize=thread") - add_compile_options(-fsanitize=thread) - link_libraries (-fsanitize=thread) + link_libraries(-fsanitize=thread) endif() if (LLAMA_SANITIZE_ADDRESS) - message(STATUS "Using -fsanitize=address") - add_compile_options(-fsanitize=address -fno-omit-frame-pointer) - link_libraries (-fsanitize=address) + link_libraries(-fsanitize=address) endif() if (LLAMA_SANITIZE_UNDEFINED) - message(STATUS "Using -fsanitize=undefined") - add_compile_options(-fsanitize=undefined) - link_libraries (-fsanitize=undefined) + link_libraries(-fsanitize=undefined) endif() endif() -# -# 3rd-party -# +if (APPLE AND LLAMA_ACCELERATE) + find_library(ACCELERATE_FRAMEWORK Accelerate) + if (ACCELERATE_FRAMEWORK) + message(STATUS "Accelerate framework found") -if (NOT TARGET ggml) - add_subdirectory(ggml) - # ... otherwise assume ggml is added by a parent CMakeLists.txt + add_compile_definitions(GGML_USE_ACCELERATE) + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK}) + else() + message(WARNING "Accelerate framework not found") + endif() +endif() + +if (LLAMA_OPENBLAS) + if (LLAMA_STATIC) + set(BLA_STATIC ON) + endif() + + set(BLA_VENDOR OpenBLAS) + find_package(BLAS) + if (BLAS_FOUND) + message(STATUS "OpenBLAS found") + + add_compile_definitions(GGML_USE_OPENBLAS) + add_link_options(${BLAS_LIBRARIES}) + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} openblas) + + # find header file + set(OPENBLAS_INCLUDE_SEARCH_PATHS + /usr/include + /usr/include/openblas + /usr/include/openblas-base + /usr/local/include + /usr/local/include/openblas + /usr/local/include/openblas-base + /opt/OpenBLAS/include + $ENV{OpenBLAS_HOME} + $ENV{OpenBLAS_HOME}/include + ) + find_path(OPENBLAS_INC NAMES cblas.h PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS}) + add_compile_options(-I${OPENBLAS_INC}) + else() + message(WARNING "OpenBLAS not found") + endif() +endif() + +if (LLAMA_CUBLAS) + cmake_minimum_required(VERSION 3.17) + + find_package(CUDAToolkit) + if (CUDAToolkit_FOUND) + message(STATUS "cuBLAS found") + + enable_language(CUDA) + + set(GGML_CUDA_SOURCES ggml-cuda.cu ggml-cuda.h) + + add_compile_definitions(GGML_USE_CUBLAS) + + if (LLAMA_STATIC) + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) + else() + set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt) + endif() + + else() + message(WARNING "cuBLAS not found") + endif() +endif() + +if (LLAMA_ALL_WARNINGS) + if (NOT MSVC) + set(c_flags + -Wall + -Wextra + -Wpedantic + -Wcast-qual + -Wdouble-promotion + -Wshadow + -Wstrict-prototypes + -Wpointer-arith + ) + set(cxx_flags + -Wall + -Wextra + -Wpedantic + -Wcast-qual + -Wno-unused-function + -Wno-multichar + ) + else() + # todo : msvc + endif() + + add_compile_options( + "$<$:${c_flags}>" + "$<$:${cxx_flags}>" + ) + +endif() + +if (MSVC) + add_compile_definitions(_CRT_SECURE_NO_WARNINGS) + + if (BUILD_SHARED_LIBS) + set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) + endif() +endif() + +if (LLAMA_LTO) + include(CheckIPOSupported) + check_ipo_supported(RESULT result OUTPUT output) + if (result) + set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) + else() + message(WARNING "IPO is not supported: ${output}") + endif() +endif() + +# Architecture specific +# TODO: probably these flags need to be tweaked on some architectures +# feel free to update the Makefile for your architecture and send a pull request or issue +message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") +if (NOT MSVC) + if (LLAMA_STATIC) + add_link_options(-static) + if (MINGW) + add_link_options(-static-libgcc -static-libstdc++) + endif() + endif() + if (LLAMA_GPROF) + add_compile_options(-pg) + endif() + if (LLAMA_NATIVE) + add_compile_options(-march=native) + endif() +endif() + +if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm" OR ${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") + message(STATUS "ARM detected") + if (MSVC) + # TODO: arm msvc? + else() + if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "aarch64") + add_compile_options(-mcpu=native) + endif() + # TODO: armv6,7,8 version specific flags + endif() +elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$") + message(STATUS "x86 detected") + if (MSVC) + if (LLAMA_AVX512) + add_compile_options($<$:/arch:AVX512>) + add_compile_options($<$:/arch:AVX512>) + # MSVC has no compile-time flags enabling specific + # AVX512 extensions, neither it defines the + # macros corresponding to the extensions. + # Do it manually. + if (LLAMA_AVX512_VBMI) + add_compile_definitions($<$:__AVX512VBMI__>) + add_compile_definitions($<$:__AVX512VBMI__>) + endif() + if (LLAMA_AVX512_VNNI) + add_compile_definitions($<$:__AVX512VNNI__>) + add_compile_definitions($<$:__AVX512VNNI__>) + endif() + elseif (LLAMA_AVX2) + add_compile_options($<$:/arch:AVX2>) + add_compile_options($<$:/arch:AVX2>) + elseif (LLAMA_AVX) + add_compile_options($<$:/arch:AVX>) + add_compile_options($<$:/arch:AVX>) + endif() + else() + if (LLAMA_F16C) + add_compile_options(-mf16c) + endif() + if (LLAMA_FMA) + add_compile_options(-mfma) + endif() + if (LLAMA_AVX) + add_compile_options(-mavx) + endif() + if (LLAMA_AVX2) + add_compile_options(-mavx2) + endif() + if (LLAMA_AVX512) + add_compile_options(-mavx512f) + add_compile_options(-mavx512bw) + endif() + if (LLAMA_AVX512_VBMI) + add_compile_options(-mavx512vbmi) + endif() + if (LLAMA_AVX512_VNNI) + add_compile_options(-mavx512vnni) + endif() + endif() +else() + # TODO: support PowerPC + message(STATUS "Unknown architecture") endif() # -# build the library +# Build libraries # -add_subdirectory(src) +add_library(ggml OBJECT + ggml.c + ggml.h + ${GGML_CUDA_SOURCES}) -# -# utils, programs, examples and tests -# +target_include_directories(ggml PUBLIC .) +target_compile_features(ggml PUBLIC c_std_11) # don't bump +target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS}) -if (LLAMA_BUILD_COMMON) - add_subdirectory(common) +if (BUILD_SHARED_LIBS) + set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON) endif() -if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION) +add_library(llama + llama.cpp + llama.h + llama_util.h) + +target_include_directories(llama PUBLIC .) +target_compile_features(llama PUBLIC cxx_std_11) # don't bump +target_link_libraries(llama PRIVATE ggml ${LLAMA_EXTRA_LIBS}) + +if (BUILD_SHARED_LIBS) + set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_compile_definitions(llama PRIVATE LLAMA_SHARED LLAMA_BUILD) +endif() + +if (GGML_CUDA_SOURCES) + message(STATUS "GGML CUDA sources found, configuring CUDA architecture") + set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES OFF) + set_property(TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto") + set_property(TARGET llama PROPERTY CUDA_ARCHITECTURES OFF) +endif() + + +# +# programs, examples and tests +# + +if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION) include(CTest) add_subdirectory(tests) -endif() +endif () -if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES) +if (LLAMA_BUILD_EXAMPLES) add_subdirectory(examples) add_subdirectory(pocs) endif() - -# -# install -# - -include(GNUInstallDirs) -include(CMakePackageConfigHelpers) - -set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER}) -set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT}) -set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER}) - -set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files") -set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files") -set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files") - -set(LLAMA_PUBLIC_HEADERS - ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h - ${CMAKE_CURRENT_SOURCE_DIR}/include/llama-cpp.h) - -set_target_properties(llama - PROPERTIES - PUBLIC_HEADER "${LLAMA_PUBLIC_HEADERS}") - -install(TARGETS llama LIBRARY PUBLIC_HEADER) - -configure_package_config_file( - ${CMAKE_CURRENT_SOURCE_DIR}/cmake/llama-config.cmake.in - ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake - INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama - PATH_VARS LLAMA_INCLUDE_INSTALL_DIR - LLAMA_LIB_INSTALL_DIR - LLAMA_BIN_INSTALL_DIR ) - -write_basic_package_version_file( - ${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake - VERSION ${LLAMA_INSTALL_VERSION} - COMPATIBILITY SameMajorVersion) - -install(FILES ${CMAKE_CURRENT_BINARY_DIR}/llama-config.cmake - ${CMAKE_CURRENT_BINARY_DIR}/llama-version.cmake - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/llama) - -install( - FILES convert_hf_to_gguf.py - PERMISSIONS - OWNER_READ - OWNER_WRITE - OWNER_EXECUTE - GROUP_READ - GROUP_EXECUTE - WORLD_READ - WORLD_EXECUTE - DESTINATION ${CMAKE_INSTALL_BINDIR}) - -configure_file(cmake/llama.pc.in - "${CMAKE_CURRENT_BINARY_DIR}/llama.pc" - @ONLY) - -install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc" - DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) diff --git a/CMakePresets.json b/CMakePresets.json deleted file mode 100644 index 13bdd7907..000000000 --- a/CMakePresets.json +++ /dev/null @@ -1,97 +0,0 @@ -{ - "version": 4, - "configurePresets": [ - { - "name": "base", - "hidden": true, - "generator": "Ninja", - "binaryDir": "${sourceDir}/build-${presetName}", - "cacheVariables": { - "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", - "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.." - } - }, - { - "name": "sycl-base", - "hidden": true, - "generator": "Ninja", - "binaryDir": "${sourceDir}/build-${presetName}", - "cacheVariables": { - "CMAKE_EXPORT_COMPILE_COMMANDS": "ON", - "CMAKE_CXX_COMPILER": "icx", - "CMAKE_C_COMPILER": "cl", - "GGML_SYCL": "ON", - "CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.." - } - }, - { "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } }, - { "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } }, - { "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } }, - { "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } }, - { "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } }, - { "name": "vulkan", "hidden": true, "cacheVariables": { "GGML_VULKAN": "ON" } }, - - { - "name": "x64-windows-llvm", "hidden": true, - "cacheVariables": { - "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/x64-windows-llvm.cmake" - } - }, - - { - "name": "arm64-windows-msvc", "hidden": true, - "architecture": { "value": "arm64", "strategy": "external" }, - "toolset": { "value": "host=x64", "strategy": "external" }, - "cacheVariables": { - "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-msvc.cmake" - } - }, - - { - "name": "arm64-windows-llvm", "hidden": true, - "architecture": { "value": "arm64", "strategy": "external" }, - "toolset": { "value": "host=x64", "strategy": "external" }, - "cacheVariables": { - "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake" - } - }, - - { - "name": "arm64-apple-clang", "hidden": true, - "architecture": { "value": "arm64", "strategy": "external" }, - "toolset": { "value": "host=x64", "strategy": "external" }, - "cacheVariables": { - "CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake" - } - }, - - { "name": "arm64-windows-llvm-debug", "inherits": [ "base", "arm64-windows-llvm", "debug" ] }, - { "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] }, - { "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] }, - - { "name": "arm64-apple-clang-debug", "inherits": [ "base", "arm64-apple-clang", "debug" ] }, - { "name": "arm64-apple-clang-release", "inherits": [ "base", "arm64-apple-clang", "reldbg" ] }, - { "name": "arm64-apple-clang+static-release", "inherits": [ "base", "arm64-apple-clang", "reldbg", "static" ] }, - - { "name": "arm64-windows-msvc-debug", "inherits": [ "base", "arm64-windows-msvc", "debug" ] }, - { "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] }, - { "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] }, - - { "name": "x64-windows-llvm-debug", "inherits": [ "base", "x64-windows-llvm", "debug" ] }, - { "name": "x64-windows-llvm-release", "inherits": [ "base", "x64-windows-llvm", "release" ] }, - { "name": "x64-windows-llvm-reldbg", "inherits": [ "base", "x64-windows-llvm", "reldbg" ] }, - { "name": "x64-windows-llvm+static-release", "inherits": [ "base", "x64-windows-llvm", "reldbg", "static" ] }, - - { "name": "x64-windows-msvc-debug", "inherits": [ "base", "debug" ] }, - { "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] }, - { "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] }, - - { "name": "x64-windows-sycl-debug", "inherits": [ "sycl-base", "debug" ] }, - { "name": "x64-windows-sycl-debug-f16", "inherits": [ "sycl-base", "debug", "sycl_f16" ] }, - { "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] }, - { "name": "x64-windows-sycl-release-f16", "inherits": [ "sycl-base", "release", "sycl_f16" ] }, - - { "name": "x64-windows-vulkan-debug", "inherits": [ "base", "vulkan", "debug" ] }, - { "name": "x64-windows-vulkan-release", "inherits": [ "base", "vulkan", "release" ] } - ] -} diff --git a/CODEOWNERS b/CODEOWNERS deleted file mode 100644 index 72d594b46..000000000 --- a/CODEOWNERS +++ /dev/null @@ -1,11 +0,0 @@ -# collaborators can optionally add themselves here to indicate their availability for reviewing related PRs - -/ci/ @ggerganov -/.devops/*.Dockerfile @ngxson -/examples/server/ @ngxson -/ggml/src/ggml-cuda/fattn* @JohannesGaessler -/ggml/src/ggml-cuda/mmq.* @JohannesGaessler -/ggml/src/ggml-cuda/mmv.* @JohannesGaessler -/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler -/ggml/src/ggml-opt.cpp @JohannesGaessler -/ggml/src/gguf.cpp @JohannesGaessler diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 8d411982b..000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,125 +0,0 @@ -# Pull requests (for contributors) - -- Test your changes: - - Execute [the full CI locally on your machine](ci/README.md) before publishing - - Verify that the perplexity and the performance are not affected negatively by your changes (use `llama-perplexity` and `llama-bench`) - - If you modified the `ggml` source, run the `test-backend-ops` tool to check whether different backend implementations of the `ggml` operators produce consistent results (this requires access to at least two different `ggml` backends) - - If you modified a `ggml` operator or added a new one, add the corresponding test cases to `test-backend-ops` -- Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly -- If your PR becomes stale, don't hesitate to ping the maintainers in the comments - -# Pull requests (for collaborators) - -- Squash-merge PRs -- Use the following format for the squashed commit title: ` : (#)`. For example: `utils : fix typo in utils.py (#1234)` -- Optionally pick a `` from here: https://github.com/ggerganov/llama.cpp/wiki/Modules -- Consider adding yourself to [CODEOWNERS](CODEOWNERS) - -# Coding guidelines - -- Avoid adding third-party dependencies, extra files, extra headers, etc. -- Always consider cross-compatibility with other operating systems and architectures -- Avoid fancy-looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple -- Vertical alignment makes things more readable and easier to batch edit -- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a` -- Use sized integer types such as `int32_t` in the public API, e.g. `size_t` may also be appropriate for allocation sizes or byte offsets -- Declare structs with `struct foo {}` instead of `typedef struct foo {} foo` - - In C++ code omit optional `struct` and `enum` keyword whenever they are not necessary - ```cpp - // OK - llama_context * ctx; - const llama_rope_type rope_type; - - // not OK - struct llama_context * ctx; - const enum llama_rope_type rope_type; - ``` - - _(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline.)_ - -- Try to follow the existing patterns in the code (indentation, spaces, etc.). In case of doubt use `clang-format` to format the added code -- For anything not covered in the current guidelines, refer to the [C++ Core Guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines) -- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices -- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggerganov/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$ - -![matmul](media/matmul.png) - -# Naming guidelines - -- Use `snake_case` for function, variable and type names -- Naming usually optimizes for longest common prefix (see https://github.com/ggerganov/ggml/pull/302#discussion_r1243240963) - - ```cpp - // not OK - int small_number; - int big_number; - - // OK - int number_small; - int number_big; - ``` - -- Enum values are always in upper case and prefixed with the enum name - - ```cpp - enum llama_vocab_type { - LLAMA_VOCAB_TYPE_NONE = 0, - LLAMA_VOCAB_TYPE_SPM = 1, - LLAMA_VOCAB_TYPE_BPE = 2, - LLAMA_VOCAB_TYPE_WPM = 3, - LLAMA_VOCAB_TYPE_UGM = 4, - LLAMA_VOCAB_TYPE_RWKV = 5, - }; - ``` - -- The general naming pattern is `_`, with `` being `_` - - ```cpp - llama_model_init(); // class: "llama_model", method: "init" - llama_sampler_chain_remove(); // class: "llama_sampler_chain", method: "remove" - llama_sampler_get_seed(); // class: "llama_sampler", method: "get_seed" - llama_set_embeddings(); // class: "llama_context", method: "set_embeddings" - llama_n_threads(); // class: "llama_context", method: "n_threads" - llama_adapter_lora_free(); // class: "llama_adapter_lora", method: "free" - ``` - - - The `get` `` can be omitted - - The `` can be omitted if not necessary - - The `_context` suffix of the `` is optional. Use it to disambiguate symbols when needed - - Use `init`/`free` for constructor/destructor `` - -- Use the `_t` suffix when a type is supposed to be opaque to the user - it's not relevant to them if it is a struct or anything else - - ```cpp - typedef struct llama_context * llama_context_t; - - enum llama_pooling_type llama_pooling_type(const llama_context_t ctx); - ``` - - _(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline)_ - -- C/C++ filenames are all lowercase with dashes. Headers use the `.h` extension. Source files use the `.c` or `.cpp` extension -- Python filenames are all lowercase with underscores - -- _(TODO: abbreviations usage)_ - -# Preprocessor directives - -- _(TODO: add guidelines with examples and apply them to the codebase)_ - - ```cpp - #ifdef FOO - #endif // FOO - ``` - -# Documentation - -- Documentation is a community effort -- When you need to look into the source code to figure out how to use an API consider adding a short summary to the header file for future reference -- When you notice incorrect or outdated documentation, please update it - -# Resources - -The Github issues, PRs and discussions contain a lot of information that can be useful to get familiar with the codebase. For convenience, some of the more important information is referenced from Github projects: - -https://github.com/ggerganov/llama.cpp/projects diff --git a/LICENSE b/LICENSE index acb96ce78..76f67efdc 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ MIT License -Copyright (c) 2023-2024 The ggml authors +Copyright (c) 2023 Georgi Gerganov Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal diff --git a/Makefile b/Makefile index dc3de3cb1..b297959c9 100644 --- a/Makefile +++ b/Makefile @@ -1,161 +1,5 @@ -ifndef LLAMA_MAKEFILE -$(error The Makefile build is deprecated. Use the CMake build instead. For more details, see https://github.com/ggerganov/llama.cpp/blob/master/docs/build.md) -endif - # Define the default target now so that it is always the first target -BUILD_TARGETS = \ - libllava.a \ - llama-batched \ - llama-batched-bench \ - llama-bench \ - llama-cli \ - llama-convert-llama2c-to-ggml \ - llama-embedding \ - llama-eval-callback \ - llama-export-lora \ - llama-gbnf-validator \ - llama-gguf \ - llama-gguf-hash \ - llama-gguf-split \ - llama-gritlm \ - llama-imatrix \ - llama-infill \ - llama-llava-cli \ - llama-minicpmv-cli\ - llama-qwen2vl-cli\ - llama-lookahead \ - llama-lookup \ - llama-lookup-create \ - llama-lookup-merge \ - llama-lookup-stats \ - llama-parallel \ - llama-passkey \ - llama-perplexity \ - llama-q8dot \ - llama-quantize \ - llama-quantize-stats \ - llama-retrieval \ - llama-save-load-state \ - llama-server \ - llama-simple \ - llama-simple-chat \ - llama-run \ - llama-speculative \ - llama-tokenize \ - llama-vdot \ - llama-cvector-generator \ - llama-gen-docs \ - tests/test-c.o - -# Binaries only useful for tests -TEST_TARGETS = \ - tests/test-arg-parser \ - tests/test-autorelease \ - tests/test-backend-ops \ - tests/test-chat \ - tests/test-chat-template \ - tests/test-double-float \ - tests/test-grammar-integration \ - tests/test-grammar-parser \ - tests/test-json-schema-to-grammar \ - tests/test-llama-grammar \ - tests/test-log \ - tests/test-model-load-cancel \ - tests/test-quantize-fns \ - tests/test-quantize-perf \ - tests/test-rope \ - tests/test-sampling \ - tests/test-tokenizer-0 \ - tests/test-tokenizer-1-bpe \ - tests/test-tokenizer-1-spm -# tests/test-opt \ - -# Legacy build targets that were renamed in #7809, but should still be removed when the project is cleaned -LEGACY_TARGETS_CLEAN = main quantize quantize-stats perplexity imatrix embedding vdot q8dot convert-llama2c-to-ggml \ - simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama \ - retrieval speculative infill tokenize parallel export-lora lookahead lookup passkey gritlm - -# Legacy build targets that were renamed in #7809, but we want to build binaries that for them that output a deprecation warning if people try to use them. -# We don't want to clutter things too much, so we only build replacements for the most commonly used binaries. -LEGACY_TARGETS_BUILD = main quantize perplexity embedding server - -# Deprecation aliases -ifdef LLAMA_CUBLAS -$(error LLAMA_CUBLAS is removed. Use GGML_CUDA instead.) -endif - -ifdef LLAMA_CUDA -GGML_CUDA := 1 -DEPRECATE_WARNING := 1 -endif - -ifdef LLAMA_KOMPUTE -GGML_KOMPUTE := 1 -DEPRECATE_WARNING := 1 -endif - -ifdef LLAMA_METAL -GGML_METAL := 1 -DEPRECATE_WARNING := 1 -endif - -ifdef LLAMA_RPC -GGML_RPC := 1 -DEPRECATE_WARNING := 1 -endif - -ifdef LLAMA_SYCL -GGML_SYCL := 1 -DEPRECATE_WARNING := 1 -endif - -ifdef LLAMA_SYCL_F16 -GGML_SYCL_F16 := 1 -DEPRECATE_WARNING := 1 -endif - -ifdef LLAMA_OPENBLAS -GGML_OPENBLAS := 1 -DEPRECATE_WARNING := 1 -endif - -ifdef LLAMA_OPENBLAS64 -GGML_OPENBLAS64 := 1 -DEPRECATE_WARNING := 1 -endif - -ifdef LLAMA_BLIS -GGML_BLIS := 1 -DEPRECATE_WARNING := 1 -endif - -ifdef LLAMA_NO_LLAMAFILE -GGML_NO_LLAMAFILE := 1 -DEPRECATE_WARNING := 1 -endif - -ifdef LLAMA_NO_ACCELERATE -GGML_NO_ACCELERATE := 1 -DEPRECATE_WARNING := 1 -endif - -ifdef LLAMA_NO_OPENMP -GGML_NO_OPENMP := 1 -DEPRECATE_WARNING := 1 -endif - -ifdef LLAMA_NO_METAL -GGML_NO_METAL := 1 -DEPRECATE_WARNING := 1 -endif - -ifdef LLAMA_DISABLE_LOGS -REMOVE_WARNING := 1 -endif - -ifdef LLAMA_SERVER_VERBOSE -REMOVE_WARNING := 1 -endif +default: main quantize quantize-stats perplexity embedding vdot ifndef UNAME_S UNAME_S := $(shell uname -s) @@ -169,26 +13,12 @@ ifndef UNAME_M UNAME_M := $(shell uname -m) endif -# In GNU make default CXX is g++ instead of c++. Let's fix that so that users -# of non-gcc compilers don't have to provide g++ alias or wrapper. -DEFCC := cc -DEFCXX := c++ -ifeq ($(origin CC),default) -CC := $(DEFCC) -endif -ifeq ($(origin CXX),default) -CXX := $(DEFCXX) -endif +CCV := $(shell $(CC) --version | head -n 1) +CXXV := $(shell $(CXX) --version | head -n 1) # Mac OS + Arm can report x86_64 # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789 ifeq ($(UNAME_S),Darwin) - ifndef GGML_NO_METAL - GGML_METAL := 1 - endif - - GGML_NO_OPENMP := 1 - ifneq ($(UNAME_P),arm) SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null) ifeq ($(SYSCTL_M),1) @@ -199,830 +29,109 @@ ifeq ($(UNAME_S),Darwin) endif endif -ifdef GGML_METAL - GGML_METAL_EMBED_LIBRARY := 1 -endif - -ifdef GGML_RPC - BUILD_TARGETS += rpc-server -endif - -ifdef GGML_VULKAN - BUILD_TARGETS += vulkan-shaders-gen -endif - -default: $(BUILD_TARGETS) $(LEGACY_TARGETS_BUILD) - -test: $(TEST_TARGETS) - @failures=0; \ - for test_target in $(TEST_TARGETS); do \ - if [ "$$test_target" = "tests/test-tokenizer-0" ]; then \ - ./$$test_target $(CURDIR)/models/ggml-vocab-llama-spm.gguf; \ - ./$$test_target $(CURDIR)/models/ggml-vocab-llama-bpe.gguf; \ - ./$$test_target $(CURDIR)/models/ggml-vocab-phi-3.gguf; \ - ./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \ - ./$$test_target $(CURDIR)/models/ggml-vocab-bert-bge.gguf; \ - ./$$test_target $(CURDIR)/models/ggml-vocab-starcoder.gguf; \ - ./$$test_target $(CURDIR)/models/ggml-vocab-gpt-2.gguf; \ - ./$$test_target $(CURDIR)/models/ggml-vocab-refact.gguf; \ - elif [ "$$test_target" = "tests/test-tokenizer-1-spm" ]; then \ - continue; \ - elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \ - continue; \ - else \ - echo "Running test $$test_target..."; \ - ./$$test_target; \ - fi; \ - if [ $$? -ne 0 ]; then \ - printf 'Test %s FAILED!\n\n' $$test_target; \ - failures=$$(( failures + 1 )); \ - else \ - printf 'Test %s passed.\n\n' $$test_target; \ - fi; \ - done; \ - if [ $$failures -gt 0 ]; then \ - printf '\n%s tests failed.\n' $$failures; \ - exit 1; \ - fi - @echo 'All tests passed.' - -all: $(BUILD_TARGETS) $(TEST_TARGETS) $(LEGACY_TARGETS_BUILD) - -ifdef RISCV_CROSS_COMPILE -CC := riscv64-unknown-linux-gnu-gcc -CXX := riscv64-unknown-linux-gnu-g++ -endif - # # Compile flags # -# keep standard at C11 and C++17 -MK_CPPFLAGS = -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -DGGML_USE_CPU -MK_CFLAGS = -std=c11 -fPIC -MK_CXXFLAGS = -std=c++17 -fPIC -MK_NVCCFLAGS = -std=c++17 - -ifdef LLAMA_NO_CCACHE -GGML_NO_CCACHE := 1 -DEPRECATE_WARNING := 1 -endif - -ifndef GGML_NO_CCACHE -CCACHE := $(shell which ccache) -ifdef CCACHE -export CCACHE_SLOPPINESS = time_macros -$(info I ccache found, compilation results will be cached. Disable with GGML_NO_CCACHE.) -CC := $(CCACHE) $(CC) -CXX := $(CCACHE) $(CXX) -else -$(info I ccache not found. Consider installing it for faster compilation.) -endif # CCACHE -endif # GGML_NO_CCACHE - -# clock_gettime came in POSIX.1b (1993) -# CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional -# posix_memalign came in POSIX.1-2001 / SUSv3 -# M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985) -MK_CPPFLAGS += -D_XOPEN_SOURCE=600 - -# Somehow in OpenBSD whenever POSIX conformance is specified -# some string functions rely on locale_t availability, -# which was introduced in POSIX.1-2008, forcing us to go higher -ifeq ($(UNAME_S),OpenBSD) - MK_CPPFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700 -endif - -# Data types, macros and functions related to controlling CPU affinity and -# some memory allocation are available on Linux through GNU extensions in libc -ifeq ($(UNAME_S),Linux) - MK_CPPFLAGS += -D_GNU_SOURCE - MK_LDFLAGS += -ldl -endif - -# RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1, -# and on macOS its availability depends on enabling Darwin extensions -# similarly on DragonFly, enabling BSD extensions is necessary -ifeq ($(UNAME_S),Darwin) - MK_CPPFLAGS += -D_DARWIN_C_SOURCE -endif -ifeq ($(UNAME_S),DragonFly) - MK_CPPFLAGS += -D__BSD_VISIBLE -endif - -# alloca is a non-standard interface that is not visible on BSDs when -# POSIX conformance is specified, but not all of them provide a clean way -# to enable it in such cases -ifeq ($(UNAME_S),FreeBSD) - MK_CPPFLAGS += -D__BSD_VISIBLE -endif -ifeq ($(UNAME_S),NetBSD) - MK_CPPFLAGS += -D_NETBSD_SOURCE -endif -ifeq ($(UNAME_S),OpenBSD) - MK_CPPFLAGS += -D_BSD_SOURCE -endif - -ifdef GGML_SCHED_MAX_COPIES - MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES=$(GGML_SCHED_MAX_COPIES) -endif - -ifdef LLAMA_DEBUG - MK_CFLAGS += -O0 -g - MK_CXXFLAGS += -O0 -g - MK_LDFLAGS += -g - MK_NVCCFLAGS += -O0 -g - - ifeq ($(UNAME_S),Linux) - MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS - endif -else - MK_CPPFLAGS += -DNDEBUG - MK_CFLAGS += -O3 -g - MK_CXXFLAGS += -O3 -g - MK_NVCCFLAGS += -O3 -g -endif - -ifdef LLAMA_SANITIZE_THREAD - MK_CFLAGS += -fsanitize=thread -g - MK_CXXFLAGS += -fsanitize=thread -g - MK_LDFLAGS += -fsanitize=thread -g -endif - -ifdef LLAMA_SANITIZE_ADDRESS - MK_CFLAGS += -fsanitize=address -fno-omit-frame-pointer -g - MK_CXXFLAGS += -fsanitize=address -fno-omit-frame-pointer -g - MK_LDFLAGS += -fsanitize=address -fno-omit-frame-pointer -g -endif - -ifdef LLAMA_SANITIZE_UNDEFINED - MK_CFLAGS += -fsanitize=undefined -g - MK_CXXFLAGS += -fsanitize=undefined -g - MK_LDFLAGS += -fsanitize=undefined -g -endif - -ifdef LLAMA_SERVER_SSL - MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT - MK_LDFLAGS += -lssl -lcrypto -endif - -ifndef GGML_NO_CPU_AARCH64 - MK_CPPFLAGS += -DGGML_USE_CPU_AARCH64 -endif +# keep standard at C11 and C++11 +CFLAGS = -I. -O3 -DNDEBUG -std=c11 -fPIC +CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC +LDFLAGS = # warnings -WARN_FLAGS = \ - -Wall \ - -Wextra \ - -Wpedantic \ - -Wcast-qual \ - -Wno-unused-function - -MK_CFLAGS += \ - $(WARN_FLAGS) \ - -Wshadow \ - -Wstrict-prototypes \ - -Wpointer-arith \ - -Wmissing-prototypes \ - -Werror=implicit-int \ - -Werror=implicit-function-declaration - -MK_CXXFLAGS += \ - $(WARN_FLAGS) \ - -Wmissing-declarations \ - -Wmissing-noreturn - -ifeq ($(LLAMA_FATAL_WARNINGS),1) - MK_CFLAGS += -Werror - MK_CXXFLAGS += -Werror -endif - -# this version of Apple ld64 is buggy -ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))' - MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER -endif +CFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith +CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar # OS specific # TODO: support Windows -ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)' - MK_CFLAGS += -pthread - MK_CXXFLAGS += -pthread +ifeq ($(UNAME_S),Linux) + CFLAGS += -pthread + CXXFLAGS += -pthread endif - -# detect Windows -ifneq ($(findstring _NT,$(UNAME_S)),) - _WIN32 := 1 +ifeq ($(UNAME_S),Darwin) + CFLAGS += -pthread + CXXFLAGS += -pthread endif - -# library name prefix -ifneq ($(_WIN32),1) - LIB_PRE := lib +ifeq ($(UNAME_S),FreeBSD) + CFLAGS += -pthread + CXXFLAGS += -pthread endif - -# Dynamic Shared Object extension -ifneq ($(_WIN32),1) - DSO_EXT := .so -else - DSO_EXT := .dll +ifeq ($(UNAME_S),NetBSD) + CFLAGS += -pthread + CXXFLAGS += -pthread endif - -# Windows Sockets 2 (Winsock) for network-capable apps -ifeq ($(_WIN32),1) - LWINSOCK2 := -lws2_32 +ifeq ($(UNAME_S),OpenBSD) + CFLAGS += -pthread + CXXFLAGS += -pthread endif - -ifdef LLAMA_GPROF - MK_CFLAGS += -pg - MK_CXXFLAGS += -pg +ifeq ($(UNAME_S),Haiku) + CFLAGS += -pthread + CXXFLAGS += -pthread endif # Architecture specific # TODO: probably these flags need to be tweaked on some architectures # feel free to update the Makefile for your architecture and send a pull request or issue - -ifndef RISCV_CROSS_COMPILE - -ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64)) +ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686)) # Use all CPU extensions that are available: - MK_CFLAGS += -march=native -mtune=native - HOST_CXXFLAGS += -march=native -mtune=native - - # Usage AMX build test - #MK_CFLAGS += -march=graniterapids -mtune=graniterapids - #HOST_CXXFLAGS += -march=graniterapids -mtune=graniterapids + CFLAGS += -march=native -mtune=native + CXXFLAGS += -march=native -mtune=native # Usage AVX-only - #MK_CFLAGS += -mfma -mf16c -mavx - #MK_CXXFLAGS += -mfma -mf16c -mavx - - # Usage SSSE3-only (Not is SSE3!) - #MK_CFLAGS += -mssse3 - #MK_CXXFLAGS += -mssse3 + #CFLAGS += -mfma -mf16c -mavx + #CXXFLAGS += -mfma -mf16c -mavx endif - -ifneq '' '$(findstring mingw,$(shell $(CC) -dumpmachine))' - # The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves. - # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412 - # https://github.com/ggerganov/llama.cpp/issues/2922 - MK_CFLAGS += -Xassembler -muse-unaligned-vector-move - MK_CXXFLAGS += -Xassembler -muse-unaligned-vector-move - - # Target Windows 8 for PrefetchVirtualMemory - MK_CPPFLAGS += -D_WIN32_WINNT=0x602 -endif - -ifneq ($(filter aarch64%,$(UNAME_M)),) - # Apple M1, M2, etc. - # Raspberry Pi 3, 4, Zero 2 (64-bit) - # Nvidia Jetson - MK_CFLAGS += -mcpu=native - MK_CXXFLAGS += -mcpu=native - JETSON_RELEASE_INFO = $(shell jetson_release) - ifdef JETSON_RELEASE_INFO - ifneq ($(filter TX2%,$(JETSON_RELEASE_INFO)),) - JETSON_EOL_MODULE_DETECT = 1 - CC = aarch64-unknown-linux-gnu-gcc - cxx = aarch64-unknown-linux-gnu-g++ - endif - endif -endif - -ifneq ($(filter armv6%,$(UNAME_M)),) - # Raspberry Pi 1, Zero - MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access - MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -endif - -ifneq ($(filter armv7%,$(UNAME_M)),) - # Raspberry Pi 2 - MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations - MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations -endif - -ifneq ($(filter armv8%,$(UNAME_M)),) - # Raspberry Pi 3, 4, Zero 2 (32-bit) - MK_CFLAGS += -mfp16-format=ieee -mno-unaligned-access - MK_CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access -endif - ifneq ($(filter ppc64%,$(UNAME_M)),) POWER9_M := $(shell grep "POWER9" /proc/cpuinfo) ifneq (,$(findstring POWER9,$(POWER9_M))) - MK_CFLAGS += -mcpu=power9 - MK_CXXFLAGS += -mcpu=power9 + CFLAGS += -mcpu=power9 + CXXFLAGS += -mcpu=power9 + endif + # Require c++23's std::byteswap for big-endian support. + ifeq ($(UNAME_M),ppc64) + CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN endif endif - -ifneq ($(filter ppc64le%,$(UNAME_M)),) - MK_CFLAGS += -mcpu=powerpc64le - MK_CXXFLAGS += -mcpu=powerpc64le - CUDA_POWER_ARCH = 1 -endif - -ifneq ($(filter loongarch64%,$(UNAME_M)),) - MK_CFLAGS += -mlasx - MK_CXXFLAGS += -mlasx -endif - -ifneq ($(filter riscv64%,$(UNAME_M)),) - MK_CFLAGS += -march=rv64gcv -mabi=lp64d - MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d -endif - -else # RISC-V CROSS COMPILATION - MK_CFLAGS += -march=rv64gcv -mabi=lp64d - MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d -endif - -ifndef GGML_NO_ACCELERATE - # Mac OS - include Accelerate framework. - # `-framework Accelerate` works both with Apple Silicon and Mac Intel +ifndef LLAMA_NO_ACCELERATE + # Mac M1 - include Accelerate framework. + # `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time). ifeq ($(UNAME_S),Darwin) - MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE - MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK - MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64 - MK_LDFLAGS += -framework Accelerate - OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o + CFLAGS += -DGGML_USE_ACCELERATE + LDFLAGS += -framework Accelerate endif -endif # GGML_NO_ACCELERATE - -ifndef GGML_NO_OPENMP - MK_CPPFLAGS += -DGGML_USE_OPENMP - MK_CFLAGS += -fopenmp - MK_CXXFLAGS += -fopenmp -endif # GGML_NO_OPENMP - -ifdef GGML_OPENBLAS - MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas) - MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas) - MK_LDFLAGS += $(shell pkg-config --libs openblas) - OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o -endif # GGML_OPENBLAS - -ifdef GGML_OPENBLAS64 - MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64) - MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64) - MK_LDFLAGS += $(shell pkg-config --libs openblas64) - OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o -endif # GGML_OPENBLAS64 - -ifdef GGML_BLIS - MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_BLIS -I/usr/local/include/blis -I/usr/include/blis - MK_LDFLAGS += -lblis -L/usr/local/lib - OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o -endif # GGML_BLIS - -ifdef GGML_NVPL - MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_NVPL -DNVPL_ILP64 -I/usr/local/include/nvpl_blas -I/usr/include/nvpl_blas - MK_LDFLAGS += -L/usr/local/lib -lnvpl_blas_core -lnvpl_blas_ilp64_gomp - OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o -endif # GGML_NVPL - -ifndef GGML_NO_LLAMAFILE - MK_CPPFLAGS += -DGGML_USE_LLAMAFILE - OBJ_GGML_EXT += ggml/src/ggml-cpu/llamafile/sgemm.o endif - -ifndef GGML_NO_AMX - MK_CPPFLAGS += -DGGML_USE_AMX - OBJ_GGML_EXT += ggml/src/ggml-cpu/amx/amx.o ggml/src/ggml-cpu/amx/mmq.o +ifdef LLAMA_OPENBLAS + CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas + LDFLAGS += -lopenblas endif - -# only necessary for the CPU backend files -MK_CPPFLAGS += -Iggml/src/ggml-cpu - -ifdef GGML_RPC - MK_CPPFLAGS += -DGGML_USE_RPC - OBJ_GGML_EXT += ggml/src/ggml-rpc.o -endif # GGML_RPC - -OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-mma*.cu)) -OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu)) - -ifdef GGML_CUDA_FA_ALL_QUANTS - OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*.cu)) -else - OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu)) - OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu)) - OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu)) -endif # GGML_CUDA_FA_ALL_QUANTS - -ifdef GGML_CUDA - ifneq ('', '$(wildcard /opt/cuda)') - CUDA_PATH ?= /opt/cuda - else - CUDA_PATH ?= /usr/local/cuda - endif - - MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include - MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib - MK_NVCCFLAGS += -use_fast_math - - OBJ_GGML_EXT += ggml/src/ggml-cuda/ggml-cuda.o - OBJ_GGML_EXT += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu)) - OBJ_GGML_EXT += $(OBJ_CUDA_TMPL) - -ifdef LLAMA_FATAL_WARNINGS - MK_NVCCFLAGS += -Werror all-warnings -endif # LLAMA_FATAL_WARNINGS - -ifndef JETSON_EOL_MODULE_DETECT - MK_NVCCFLAGS += --forward-unknown-to-host-compiler -endif # JETSON_EOL_MODULE_DETECT - -ifdef LLAMA_DEBUG - MK_NVCCFLAGS += -lineinfo -endif # LLAMA_DEBUG - -ifdef GGML_CUDA_DEBUG - MK_NVCCFLAGS += --device-debug -endif # GGML_CUDA_DEBUG - -ifdef GGML_CUDA_NVCC - NVCC = $(CCACHE) $(GGML_CUDA_NVCC) -else - NVCC = $(CCACHE) nvcc -endif # GGML_CUDA_NVCC - -ifdef CUDA_DOCKER_ARCH - MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH) -else ifndef CUDA_POWER_ARCH - MK_NVCCFLAGS += -arch=native -endif # CUDA_DOCKER_ARCH - -ifdef GGML_CUDA_FORCE_MMQ - MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ -endif # GGML_CUDA_FORCE_MMQ - -ifdef GGML_CUDA_FORCE_CUBLAS - MK_NVCCFLAGS += -DGGML_CUDA_FORCE_CUBLAS -endif # GGML_CUDA_FORCE_CUBLAS - -ifdef GGML_CUDA_F16 - MK_NVCCFLAGS += -DGGML_CUDA_F16 -endif # GGML_CUDA_F16 - -ifdef GGML_CUDA_DMMV_F16 - MK_NVCCFLAGS += -DGGML_CUDA_F16 -endif # GGML_CUDA_DMMV_F16 - -ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE - MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE) -else - MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -endif # GGML_CUDA_PEER_MAX_BATCH_SIZE - -ifdef GGML_CUDA_NO_PEER_COPY - MK_NVCCFLAGS += -DGGML_CUDA_NO_PEER_COPY -endif # GGML_CUDA_NO_PEER_COPY - -ifdef GGML_CUDA_CCBIN - MK_NVCCFLAGS += -ccbin $(GGML_CUDA_CCBIN) -endif # GGML_CUDA_CCBIN - -ifdef GGML_CUDA_FA_ALL_QUANTS - MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS -endif # GGML_CUDA_FA_ALL_QUANTS - -ifdef JETSON_EOL_MODULE_DETECT -define NVCC_COMPILE - $(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@ -endef # NVCC_COMPILE -else -define NVCC_COMPILE - $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@ -endef # NVCC_COMPILE -endif # JETSON_EOL_MODULE_DETECT - -ggml/src/ggml-cuda/%.o: \ - ggml/src/ggml-cuda/%.cu \ - ggml/include/ggml.h \ - ggml/src/ggml-common.h \ - ggml/src/ggml-cuda/common.cuh - $(NVCC_COMPILE) - -ggml/src/ggml-cuda/ggml-cuda.o: \ - ggml/src/ggml-cuda/ggml-cuda.cu \ - ggml/include/ggml-cuda.h \ - ggml/include/ggml.h \ - ggml/include/ggml-backend.h \ - ggml/src/ggml-backend-impl.h \ - ggml/src/ggml-common.h \ - $(wildcard ggml/src/ggml-cuda/*.cuh) - $(NVCC_COMPILE) -endif # GGML_CUDA - -ifdef GGML_VULKAN - MK_CPPFLAGS += -DGGML_USE_VULKAN - MK_LDFLAGS += $(shell pkg-config --libs vulkan) - OBJ_GGML_EXT += ggml/src/ggml-vulkan.o ggml/src/ggml-vulkan-shaders.o - -ifdef GGML_VULKAN_CHECK_RESULTS - MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS +ifdef LLAMA_CUBLAS + CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include + LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 + OBJS += ggml-cuda.o + NVCC = nvcc + NVCCFLAGS = --forward-unknown-to-host-linker -arch=native +ggml-cuda.o: ggml-cuda.cu ggml-cuda.h + $(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -c $< -o $@ endif - -ifdef GGML_VULKAN_DEBUG - MK_CPPFLAGS += -DGGML_VULKAN_DEBUG +ifdef LLAMA_GPROF + CFLAGS += -pg + CXXFLAGS += -pg endif - -ifdef GGML_VULKAN_MEMORY_DEBUG - MK_CPPFLAGS += -DGGML_VULKAN_MEMORY_DEBUG +ifneq ($(filter aarch64%,$(UNAME_M)),) + CFLAGS += -mcpu=native + CXXFLAGS += -mcpu=native endif - -ifdef GGML_VULKAN_PERF - MK_CPPFLAGS += -DGGML_VULKAN_PERF +ifneq ($(filter armv6%,$(UNAME_M)),) + # Raspberry Pi 1, 2, 3 + CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access endif - -ifdef GGML_VULKAN_VALIDATE - MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE +ifneq ($(filter armv7%,$(UNAME_M)),) + # Raspberry Pi 4 + CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations endif - -ifdef GGML_VULKAN_RUN_TESTS - MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS -endif - -GLSLC_CMD = glslc -_ggml_vk_genshaders_cmd = $(shell pwd)/vulkan-shaders-gen -_ggml_vk_header = ggml/src/ggml-vulkan-shaders.hpp -_ggml_vk_source = ggml/src/ggml-vulkan-shaders.cpp -_ggml_vk_input_dir = ggml/src/ggml-vulkan/vulkan-shaders -_ggml_vk_shader_deps = $(echo $(_ggml_vk_input_dir)/*.comp) - -ggml/src/ggml-vulkan.o: ggml/src/ggml-vulkan/ggml-vulkan.cpp ggml/include/ggml-vulkan.h $(_ggml_vk_header) $(_ggml_vk_source) - $(CXX) $(CXXFLAGS) $(shell pkg-config --cflags vulkan) -c $< -o $@ - -$(_ggml_vk_header): $(_ggml_vk_source) - -$(_ggml_vk_source): $(_ggml_vk_shader_deps) vulkan-shaders-gen - $(_ggml_vk_genshaders_cmd) \ - --glslc $(GLSLC_CMD) \ - --input-dir $(_ggml_vk_input_dir) \ - --target-hpp $(_ggml_vk_header) \ - --target-cpp $(_ggml_vk_source) - -vulkan-shaders-gen: ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp - $(CXX) $(CXXFLAGS) -o $@ $(LDFLAGS) ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp - -endif # GGML_VULKAN - -ifdef GGML_HIP - ifeq ($(wildcard /opt/rocm),) - ROCM_PATH ?= /usr - AMDGPU_TARGETS ?= $(shell $(shell which amdgpu-arch)) - else - ROCM_PATH ?= /opt/rocm - AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch) - endif - - MK_CPPFLAGS += -DGGML_USE_HIP -DGGML_USE_CUDA - -ifdef GGML_HIP_UMA - MK_CPPFLAGS += -DGGML_HIP_UMA -endif # GGML_HIP_UMA - - MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib - MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64 - MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas - - HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc - - HIPFLAGS += $(addprefix --offload-arch=,$(AMDGPU_TARGETS)) - -ifdef GGML_CUDA_FORCE_MMQ - HIPFLAGS += -DGGML_CUDA_FORCE_MMQ -endif # GGML_CUDA_FORCE_MMQ - -ifdef GGML_CUDA_FORCE_CUBLAS - HIPFLAGS += -DGGML_CUDA_FORCE_CUBLAS -endif # GGML_CUDA_FORCE_CUBLAS - -ifdef GGML_CUDA_NO_PEER_COPY - HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY -endif # GGML_CUDA_NO_PEER_COPY - - OBJ_GGML_EXT += ggml/src/ggml-cuda/ggml-cuda.o - OBJ_GGML_EXT += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu)) - OBJ_GGML_EXT += $(OBJ_CUDA_TMPL) - -ggml/src/ggml-cuda/ggml-cuda.o: \ - ggml/src/ggml-cuda/ggml-cuda.cu \ - ggml/include/ggml-cuda.h \ - ggml/include/ggml.h \ - ggml/include/ggml-backend.h \ - ggml/src/ggml-backend-impl.h \ - ggml/src/ggml-common.h \ - $(wildcard ggml/src/ggml-cuda/*.cuh) - $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $< - -ggml/src/ggml-cuda/%.o: \ - ggml/src/ggml-cuda/%.cu \ - ggml/include/ggml.h \ - ggml/src/ggml-common.h \ - ggml/src/ggml-cuda/common.cuh - $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $< -endif # GGML_HIP - -ifdef GGML_MUSA - ifeq ($(wildcard /opt/musa),) - MUSA_PATH ?= /usr/local/musa - else - MUSA_PATH ?= /opt/musa - endif - MUSA_ARCHITECTURES ?= 21;22 - - MK_CPPFLAGS += -DGGML_USE_MUSA -DGGML_USE_CUDA - MK_LDFLAGS += -L$(MUSA_PATH)/lib -Wl,-rpath=$(MUSA_PATH)/lib - MK_LDFLAGS += -lmusa -lmusart -lmublas - - ifndef GGML_NO_OPENMP - # For Ubuntu Focal - MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp - MK_LDFLAGS += -L/usr/lib/llvm-10/lib - # For Ubuntu Jammy - MK_CPPFLAGS += -I/usr/lib/llvm-14/lib/clang/14.0.0/include - MK_LDFLAGS += -L/usr/lib/llvm-14/lib - endif # GGML_NO_OPENMP - - CC := $(MUSA_PATH)/bin/clang - CXX := $(MUSA_PATH)/bin/clang++ - MCC := $(CCACHE) $(MUSA_PATH)/bin/mcc - - MUSAFLAGS = -x musa -mtgpu - MUSAFLAGS += $(foreach arch,$(subst ;, ,$(MUSA_ARCHITECTURES)),--cuda-gpu-arch=mp_$(arch)) - -ifdef GGML_CUDA_FORCE_MMQ - MUSAFLAGS += -DGGML_CUDA_FORCE_MMQ -endif # GGML_CUDA_FORCE_MMQ - -ifdef GGML_CUDA_FORCE_CUBLAS - MUSAFLAGS += -DGGML_CUDA_FORCE_CUBLAS -endif # GGML_CUDA_FORCE_CUBLAS - -ifdef GGML_CUDA_F16 - MUSAFLAGS += -DGGML_CUDA_F16 -endif # GGML_CUDA_F16 - -ifdef GGML_CUDA_DMMV_F16 - MUSAFLAGS += -DGGML_CUDA_F16 -endif # GGML_CUDA_DMMV_F16 - -ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE - MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE) -else - MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -endif # GGML_CUDA_PEER_MAX_BATCH_SIZE - -ifdef GGML_CUDA_NO_PEER_COPY - MUSAFLAGS += -DGGML_CUDA_NO_PEER_COPY -endif # GGML_CUDA_NO_PEER_COPY - -ifdef GGML_CUDA_FA_ALL_QUANTS - MUSAFLAGS += -DGGML_CUDA_FA_ALL_QUANTS -endif # GGML_CUDA_FA_ALL_QUANTS - - OBJ_GGML_EXT += ggml/src/ggml-cuda/ggml-cuda.o - OBJ_GGML_EXT += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu)) - OBJ_GGML_EXT += $(OBJ_CUDA_TMPL) - -ggml/src/ggml-cuda/ggml-cuda.o: \ - ggml/src/ggml-cuda/ggml-cuda.cu \ - ggml/include/ggml-cuda.h \ - ggml/include/ggml.h \ - ggml/include/ggml-backend.h \ - ggml/src/ggml-backend-impl.h \ - ggml/src/ggml-common.h \ - $(wildcard ggml/src/ggml-cuda/*.cuh) - $(MCC) $(CXXFLAGS) $(MUSAFLAGS) -c -o $@ $< - -ggml/src/ggml-cuda/%.o: \ - ggml/src/ggml-cuda/%.cu \ - ggml/include/ggml.h \ - ggml/src/ggml-common.h \ - ggml/src/ggml-cuda/common.cuh - $(MCC) $(CXXFLAGS) $(MUSAFLAGS) -c -o $@ $< -endif # GGML_MUSA - -ifdef GGML_METAL - MK_CPPFLAGS += -DGGML_USE_METAL - MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit - OBJ_GGML_EXT += ggml/src/ggml-metal/ggml-metal.o - -ifdef GGML_METAL_USE_BF16 - MK_CPPFLAGS += -DGGML_METAL_USE_BF16 -endif # GGML_METAL_USE_BF16 -ifdef GGML_METAL_NDEBUG - MK_CPPFLAGS += -DGGML_METAL_NDEBUG -endif -ifdef GGML_METAL_EMBED_LIBRARY - MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY - OBJ_GGML_EXT += ggml/src/ggml-metal-embed.o -endif -endif # GGML_METAL - -ifdef GGML_METAL -ggml/src/ggml-metal/ggml-metal.o: \ - ggml/src/ggml-metal/ggml-metal.m \ - ggml/src/ggml-metal/ggml-metal-impl.h \ - ggml/include/ggml-metal.h \ - ggml/include/ggml.h - $(CC) $(CFLAGS) -c $< -o $@ - -ifdef GGML_METAL_EMBED_LIBRARY -ggml/src/ggml-metal-embed.o: \ - ggml/src/ggml-metal/ggml-metal.metal \ - ggml/src/ggml-metal/ggml-metal-impl.h \ - ggml/src/ggml-common.h - @echo "Embedding Metal library" - @sed -e '/__embed_ggml-common.h__/r ggml/src/ggml-common.h' -e '/__embed_ggml-common.h__/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp - @sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal - $(eval TEMP_ASSEMBLY=$(shell mktemp -d)) - @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s - @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s - @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s - @echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s - @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s - @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s - $(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@ - @rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s - @rmdir ${TEMP_ASSEMBLY} -endif -endif # GGML_METAL - -DIR_GGML = ggml -DIR_LLAMA = src -DIR_COMMON = common - -OBJ_GGML = \ - $(DIR_GGML)/src/ggml.o \ - $(DIR_GGML)/src/ggml-alloc.o \ - $(DIR_GGML)/src/ggml-backend.o \ - $(DIR_GGML)/src/ggml-backend-reg.o \ - $(DIR_GGML)/src/ggml-opt.o \ - $(DIR_GGML)/src/ggml-quants.o \ - $(DIR_GGML)/src/ggml-threading.o \ - $(DIR_GGML)/src/ggml-cpu/ggml-cpu.o \ - $(DIR_GGML)/src/ggml-cpu/ggml-cpu_cpp.o \ - $(DIR_GGML)/src/ggml-cpu/ggml-cpu-aarch64.o \ - $(DIR_GGML)/src/ggml-cpu/ggml-cpu-hbm.o \ - $(DIR_GGML)/src/ggml-cpu/ggml-cpu-quants.o \ - $(DIR_GGML)/src/ggml-cpu/ggml-cpu-traits.o \ - $(OBJ_GGML_EXT) - -OBJ_LLAMA = \ - $(DIR_LLAMA)/llama.o \ - $(DIR_LLAMA)/llama-vocab.o \ - $(DIR_LLAMA)/llama-grammar.o \ - $(DIR_LLAMA)/llama-sampling.o \ - $(DIR_LLAMA)/unicode.o \ - $(DIR_LLAMA)/unicode-data.o - -OBJ_COMMON = \ - $(DIR_COMMON)/common.o \ - $(DIR_COMMON)/arg.o \ - $(DIR_COMMON)/log.o \ - $(DIR_COMMON)/console.o \ - $(DIR_COMMON)/ngram-cache.o \ - $(DIR_COMMON)/sampling.o \ - $(DIR_COMMON)/speculative.o \ - $(DIR_COMMON)/chat.o \ - $(DIR_COMMON)/build-info.o \ - $(DIR_COMMON)/json-schema-to-grammar.o - -OBJ_ALL = $(OBJ_GGML) $(OBJ_LLAMA) $(OBJ_COMMON) - -LIB_GGML = $(LIB_PRE)ggml$(DSO_EXT) -LIB_GGML_S = $(LIB_PRE)ggml.a - -LIB_LLAMA = $(LIB_PRE)llama$(DSO_EXT) -LIB_LLAMA_S = $(LIB_PRE)llama.a - -LIB_COMMON = $(LIB_PRE)common$(DSO_EXT) -LIB_COMMON_S = $(LIB_PRE)common.a - -LIB_ALL = $(LIB_GGML) $(LIB_LLAMA) $(LIB_COMMON) -LIB_ALL_S = $(LIB_GGML_S) $(LIB_LLAMA_S) $(LIB_COMMON_S) - -GF_CC := $(CC) -include scripts/get-flags.mk - -# combine build flags with cmdline overrides -override CPPFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) -override CFLAGS := $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS) -BASE_CXXFLAGS := $(MK_CXXFLAGS) $(CXXFLAGS) -override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS) $(CPPFLAGS) -override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS) -override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS) - -# identify CUDA host compiler -ifdef GGML_CUDA -GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler -include scripts/get-flags.mk -CUDA_CXXFLAGS := $(BASE_CXXFLAGS) $(GF_CXXFLAGS) -Wno-pedantic -endif - -ifdef LLAMA_CURL -override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_CURL -override LDFLAGS := $(LDFLAGS) -lcurl +ifneq ($(filter armv8%,$(UNAME_M)),) + # Raspberry Pi 4 + CFLAGS += -mfp16-format=ieee -mno-unaligned-access endif # @@ -1030,576 +139,64 @@ endif # $(info I llama.cpp build info: ) -$(info I UNAME_S: $(UNAME_S)) -$(info I UNAME_P: $(UNAME_P)) -$(info I UNAME_M: $(UNAME_M)) -$(info I CFLAGS: $(CFLAGS)) -$(info I CXXFLAGS: $(CXXFLAGS)) -$(info I NVCCFLAGS: $(NVCCFLAGS)) -$(info I LDFLAGS: $(LDFLAGS)) -$(info I CC: $(shell $(CC) --version | head -n 1)) -$(info I CXX: $(shell $(CXX) --version | head -n 1)) -ifdef GGML_CUDA -$(info I NVCC: $(shell $(NVCC) --version | tail -n 1)) -CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])') -ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1) - -ifndef CUDA_DOCKER_ARCH -ifndef CUDA_POWER_ARCH -$(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via environment variable CUDA_DOCKER_ARCH, e.g. by running "export CUDA_DOCKER_ARCH=compute_XX" on Unix-like systems, where XX is the minimum compute capability that the code needs to run on. A list with compute capabilities can be found here: https://developer.nvidia.com/cuda-gpus ) -endif # CUDA_POWER_ARCH -endif # CUDA_DOCKER_ARCH - -endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1) -endif # GGML_CUDA +$(info I UNAME_S: $(UNAME_S)) +$(info I UNAME_P: $(UNAME_P)) +$(info I UNAME_M: $(UNAME_M)) +$(info I CFLAGS: $(CFLAGS)) +$(info I CXXFLAGS: $(CXXFLAGS)) +$(info I LDFLAGS: $(LDFLAGS)) +$(info I CC: $(CCV)) +$(info I CXX: $(CXXV)) $(info ) -ifdef DEPRECATE_WARNING -$(info !!! DEPRECATION WARNING !!!) -$(info The following LLAMA_ options are deprecated and will be removed in the future. Use the GGML_ prefix instead) -$(info - LLAMA_CUDA) -$(info - LLAMA_METAL) -$(info - LLAMA_METAL_EMBED_LIBRARY) -$(info - LLAMA_OPENMP) -$(info - LLAMA_RPC) -$(info - LLAMA_SYCL) -$(info - LLAMA_SYCL_F16) -$(info - LLAMA_OPENBLAS) -$(info - LLAMA_OPENBLAS64) -$(info - LLAMA_BLIS) -$(info - LLAMA_NO_LLAMAFILE) -$(info - LLAMA_NO_ACCELERATE) -$(info - LLAMA_NO_OPENMP) -$(info - LLAMA_NO_METAL) -$(info - LLAMA_NO_CCACHE) -$(info ) -endif - -ifdef REMOVE_WARNING -$(info !!! REMOVAL WARNING !!!) -$(info The following LLAMA_ options have been removed and are no longer supported) -$(info - LLAMA_DISABLE_LOGS (https://github.com/ggerganov/llama.cpp/pull/9418)) -$(info - LLAMA_SERVER_VERBOSE (https://github.com/ggerganov/llama.cpp/pull/9418)) -$(info ) -endif - # -# Build libraries +# Build library # -# Libraries -LIB_GGML = libggml.so -LIB_GGML_S = libggml.a +ggml.o: ggml.c ggml.h + $(CC) $(CFLAGS) -c $< -o $@ -LIB_LLAMA = libllama.so -LIB_LLAMA_S = libllama.a +llama.o: llama.cpp ggml.h llama.h llama_util.h + $(CXX) $(CXXFLAGS) -c $< -o $@ -LIB_COMMON = libcommon.so -LIB_COMMON_S = libcommon.a +common.o: examples/common.cpp examples/common.h + $(CXX) $(CXXFLAGS) -c $< -o $@ -# Targets -BUILD_TARGETS += $(LIB_GGML) $(LIB_GGML_S) $(LIB_LLAMA) $(LIB_LLAMA_S) $(LIB_COMMON) $(LIB_COMMON_S) +clean: + rm -vf *.o main quantize quantize-stats perplexity embedding benchmark-q4_0-matmult -# Dependency files -DEP_FILES = $(OBJ_GGML:.o=.d) $(OBJ_LLAMA:.o=.d) $(OBJ_COMMON:.o=.d) - -# Default target -all: $(BUILD_TARGETS) - -# force c++ build for source file that have same name as c file -# Note: need this exception because `ggml-cpu.c` and `ggml-cpu.cpp` both produce the same obj/dep files -$(DIR_GGML)/%_cpp.o: $(DIR_GGML)/%.cpp - $(CXX) $(CXXFLAGS) -MMD -c $< -o $@ - -# Rules for building object files -$(DIR_GGML)/%.o: $(DIR_GGML)/%.c - $(CC) $(CFLAGS) -MMD -c $< -o $@ - -$(DIR_GGML)/%.o: $(DIR_GGML)/%.cpp - $(CXX) $(CXXFLAGS) -MMD -c $< -o $@ - -$(DIR_LLAMA)/%.o: $(DIR_LLAMA)/%.cpp - $(CXX) $(CXXFLAGS) -MMD -c $< -o $@ - -$(DIR_COMMON)/%.o: $(DIR_COMMON)/%.cpp - $(CXX) $(CXXFLAGS) -MMD -c $< -o $@ - -# Rules for building libraries -$(LIB_GGML): $(OBJ_GGML) - $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) - -$(LIB_GGML_S): $(OBJ_GGML) - ar rcs $(LIB_GGML_S) $^ - -$(LIB_LLAMA): $(OBJ_LLAMA) $(LIB_GGML) - $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) - -$(LIB_LLAMA_S): $(OBJ_LLAMA) - ar rcs $(LIB_LLAMA_S) $^ - -$(LIB_COMMON): $(OBJ_COMMON) $(LIB_LLAMA) $(LIB_GGML) - $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) - -$(LIB_COMMON_S): $(OBJ_COMMON) - ar rcs $(LIB_COMMON_S) $^ - -# Include dependency files --include $(DEP_FILES) - -# Clean generated server assets -clean-server-assets: - find examples/server -type f -name "*.js.hpp" -delete - find examples/server -type f -name "*.mjs.hpp" -delete - find examples/server -type f -name "*.css.hpp" -delete - find examples/server -type f -name "*.html.hpp" -delete - -# Clean rule -clean: clean-server-assets - rm -vrf $(BUILD_TARGETS) $(TEST_TARGETS) - rm -rvf *.a *.dll *.so *.dot - find ggml src common tests examples pocs -type f -name "*.o" -delete - find ggml src common tests examples pocs -type f -name "*.d" -delete - -# -# Examples -# - -# $< is the first prerequisite, i.e. the source file. -# Explicitly compile this to an object file so that it can be cached with ccache. -# The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead. - -# Helper function that replaces .c, .cpp, and .cu file endings with .o: -GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1)))) - -llama-cli: examples/main/main.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - @echo - @echo '==== Run ./llama-cli -h for help. ====' - @echo - -llama-infill: examples/infill/infill.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-run: examples/run/run.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-simple: examples/simple/simple.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-simple-chat: examples/simple-chat/simple-chat.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-tokenize: examples/tokenize/tokenize.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-batched: examples/batched/batched.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-batched-bench: examples/batched-bench/batched-bench.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-quantize: examples/quantize/quantize.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-quantize-stats: examples/quantize-stats/quantize-stats.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-perplexity: examples/perplexity/perplexity.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-imatrix: examples/imatrix/imatrix.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-embedding: examples/embedding/embedding.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-gritlm: examples/gritlm/gritlm.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-save-load-state: examples/save-load-state/save-load-state.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-gguf: examples/gguf/gguf.cpp \ - $(OBJ_GGML) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -examples/gguf-hash/deps/sha1/sha1.o: \ - examples/gguf-hash/deps/sha1/sha1.c - $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@ - -examples/gguf-hash/deps/xxhash/xxhash.o: \ - examples/gguf-hash/deps/xxhash/xxhash.c - $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@ - -examples/gguf-hash/deps/sha256/sha256.o: \ - examples/gguf-hash/deps/sha256/sha256.c - $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@ - -llama-gguf-hash: examples/gguf-hash/gguf-hash.cpp examples/gguf-hash/deps/sha1/sha1.o examples/gguf-hash/deps/xxhash/xxhash.o examples/gguf-hash/deps/sha256/sha256.o\ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -Iexamples/gguf-hash/deps -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-gguf-split: examples/gguf-split/gguf-split.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-eval-callback: examples/eval-callback/eval-callback.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-bench: examples/llama-bench/llama-bench.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-export-lora: examples/export-lora/export-lora.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-retrieval: examples/retrieval/retrieval.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-speculative: examples/speculative/speculative.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-parallel: examples/parallel/parallel.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-lookahead: examples/lookahead/lookahead.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-lookup: examples/lookup/lookup.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-lookup-create: examples/lookup/lookup-create.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-lookup-merge: examples/lookup/lookup-merge.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-lookup-stats: examples/lookup/lookup-stats.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-passkey: examples/passkey/passkey.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -ifdef GGML_RPC -rpc-server: examples/rpc/rpc-server.cpp \ - $(OBJ_GGML) +main: examples/main/main.cpp ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) -endif # GGML_RPC + @echo + @echo '==== Run ./main -h for help. ====' + @echo -llama-server: \ - examples/server/server.cpp \ - examples/server/utils.hpp \ - examples/server/httplib.h \ - examples/server/index.html.hpp \ - examples/server/loading.html.hpp \ - common/chat.cpp \ - common/chat.hpp \ - common/chat-template.hpp \ - common/json.hpp \ - common/minja.hpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2) +quantize: examples/quantize/quantize.cpp ggml.o llama.o $(OBJS) + $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) -# Portable equivalent of `cd examples/server/public && xxd -i $(notdir $<) ../$(notdir $<).hpp`: -examples/server/%.hpp: examples/server/public/% FORCE Makefile - @( export NAME=$(subst .,_,$(subst -,_,$(notdir $<))) && \ - echo "unsigned char $${NAME}[] = {" && \ - cat $< | od -v -t x1 -An | sed -E 's/([0-9a-fA-F]+)/0x\1, /g' && \ - echo "};" && \ - echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \ - ) > $@ +quantize-stats: examples/quantize-stats/quantize-stats.cpp ggml.o llama.o $(OBJS) + $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) -llama-gen-docs: examples/gen-docs/gen-docs.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) +perplexity: examples/perplexity/perplexity.cpp ggml.o llama.o common.o $(OBJS) + $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) -libllava.a: examples/llava/llava.cpp \ - examples/llava/llava.h \ - examples/llava/clip.cpp \ - examples/llava/clip.h \ - common/stb_image.h \ - common/base64.hpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual +embedding: examples/embedding/embedding.cpp ggml.o llama.o common.o $(OBJS) + $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) -llama-llava-cli: examples/llava/llava-cli.cpp \ - examples/llava/llava.cpp \ - examples/llava/llava.h \ - examples/llava/clip.cpp \ - examples/llava/clip.h \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual +vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) -llama-minicpmv-cli: examples/llava/minicpmv-cli.cpp \ - examples/llava/llava.cpp \ - examples/llava/llava.h \ - examples/llava/clip.cpp \ - examples/llava/clip.h \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual - -llama-qwen2vl-cli: examples/llava/qwen2vl-cli.cpp \ - examples/llava/llava.cpp \ - examples/llava/llava.h \ - examples/llava/clip.cpp \ - examples/llava/clip.h \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual - -ifeq ($(UNAME_S),Darwin) -swift: examples/batched.swift - (cd examples/batched.swift; make build) -endif - -common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh - @sh scripts/build-info.sh "$(CC)" > $@.tmp - @if ! cmp -s $@.tmp $@; then \ - mv $@.tmp $@; \ - else \ - rm $@.tmp; \ - fi - -common/build-info.o: common/build-info.cpp - $(CXX) $(CXXFLAGS) -c $(filter-out %.h,$^) -o $@ +libllama.so: llama.o ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) # # Tests # -tests: $(TEST_TARGETS) +benchmark: examples/benchmark/benchmark-q4_0-matmult.c ggml.o $(OBJS) + $(CXX) $(CXXFLAGS) $^ -o benchmark-q4_0-matmult $(LDFLAGS) + ./benchmark-q4_0-matmult -tests/test-arg-parser: tests/test-arg-parser.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -tests/test-llama-grammar: tests/test-llama-grammar.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -tests/test-log: tests/test-log.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -tests/test-grammar-parser: tests/test-grammar-parser.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -tests/test-grammar-integration: tests/test-grammar-integration.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -tests/test-double-float: tests/test-double-float.cpp - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -tests/test-chat: tests/test-chat.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -tests/test-opt: tests/test-opt.cpp \ - $(OBJ_GGML) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -tests/test-quantize-fns: tests/test-quantize-fns.cpp \ - $(OBJ_GGML) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -tests/test-quantize-perf: tests/test-quantize-perf.cpp \ - $(OBJ_GGML) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -tests/test-sampling: tests/test-sampling.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -tests/test-tokenizer-0: tests/test-tokenizer-0.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -tests/test-tokenizer-1-spm: tests/test-tokenizer-1-spm.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -tests/test-rope: tests/test-rope.cpp ggml/src/ggml.o \ - $(OBJ_GGML) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -tests/test-c.o: tests/test-c.c include/llama.h - $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@ - -tests/test-backend-ops: tests/test-backend-ops.cpp \ - $(OBJ_GGML) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -tests/test-model-load-cancel: tests/test-model-load-cancel.cpp tests/get-model.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -tests/test-autorelease: tests/test-autorelease.cpp tests/get-model.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -tests/test-chat-template: tests/test-chat-template.cpp \ - $(OBJ_ALL) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -# -# PoCs -# - -llama-vdot: pocs/vdot/vdot.cpp ggml/src/ggml.o \ - $(OBJ_GGML) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -llama-q8dot: pocs/vdot/q8dot.cpp ggml/src/ggml.o \ - $(OBJ_GGML) - $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) - $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) - -# -# Deprecated binaries that we want to keep around long enough for people to migrate to the new filenames, then these can be removed. -# -# Mark legacy binary targets as .PHONY so that they are always checked. -.PHONY: FORCE main quantize perplexity embedding server - -# Define the object file target -examples/deprecation-warning/deprecation-warning.o: examples/deprecation-warning/deprecation-warning.cpp - $(CXX) $(CXXFLAGS) -c $< -o $@ - -# NOTE: We currently will always build the deprecation-warning `main` and `server` binaries to help users migrate. -# Eventually we will want to remove these target from building all the time. -main: examples/deprecation-warning/deprecation-warning.o - $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) - @echo "NOTICE: The 'main' binary is deprecated. Please use 'llama-cli' instead." - -server: examples/deprecation-warning/deprecation-warning.o - $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) - @echo "NOTICE: The 'server' binary is deprecated. Please use 'llama-server' instead." - -quantize: examples/deprecation-warning/deprecation-warning.o -ifneq (,$(wildcard quantize)) - $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) - @echo "#########" - @echo "WARNING: The 'quantize' binary is deprecated. Please use 'llama-quantize' instead." - @echo " Remove the 'quantize' binary to remove this warning." - @echo "#########" -endif - -perplexity: examples/deprecation-warning/deprecation-warning.o -ifneq (,$(wildcard perplexity)) - $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) - @echo "#########" - @echo "WARNING: The 'perplexity' binary is deprecated. Please use 'llama-perplexity' instead." - @echo " Remove the 'perplexity' binary to remove this warning." - @echo "#########" -endif - -embedding: examples/deprecation-warning/deprecation-warning.o -ifneq (,$(wildcard embedding)) - $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) - @echo "#########" - @echo "WARNING: The 'embedding' binary is deprecated. Please use 'llama-embedding' instead." - @echo " Remove the 'embedding' binary to remove this warning." - @echo "#########" -endif +.PHONY: tests +tests: + bash ./tests/run-tests.sh diff --git a/Package.swift b/Package.swift index 01c996d24..2c2c147ba 100644 --- a/Package.swift +++ b/Package.swift @@ -1,19 +1,23 @@ -// swift-tools-version:5.5 +// swift-tools-version:5.3 import PackageDescription let package = Package( name: "llama", - platforms: [ - .macOS(.v12), - .iOS(.v14), - .watchOS(.v4), - .tvOS(.v14) - ], products: [ .library(name: "llama", targets: ["llama"]), ], targets: [ - .systemLibrary(name: "llama", pkgConfig: "llama"), - ] + .target( + name: "llama", + path: ".", + sources: ["ggml.c", "llama.cpp"], + publicHeadersPath: "spm-headers", + cSettings: [.unsafeFlags(["-Wno-shorten-64-to-32"]), .define("GGML_USE_ACCELERATE")], + linkerSettings: [ + .linkedFramework("Accelerate") + ] + ), + ], + cxxLanguageStandard: .cxx11 ) diff --git a/README.md b/README.md index 11f3d0286..324d49f07 100644 --- a/README.md +++ b/README.md @@ -2,521 +2,416 @@ ![llama](https://user-images.githubusercontent.com/1991296/230134379-7181e485-c521-4d23-a0d6-f7b3b61ba524.png) +[![Actions Status](https://github.com/ggerganov/llama.cpp/workflows/CI/badge.svg)](https://github.com/ggerganov/llama.cpp/actions) [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT) -[![Server](https://github.com/ggerganov/llama.cpp/actions/workflows/server.yml/badge.svg)](https://github.com/ggerganov/llama.cpp/actions/workflows/server.yml) -[Roadmap](https://github.com/users/ggerganov/projects/7) / [Project status](https://github.com/ggerganov/llama.cpp/discussions/3471) / [Manifesto](https://github.com/ggerganov/llama.cpp/discussions/205) / [ggml](https://github.com/ggerganov/ggml) +Inference of [LLaMA](https://arxiv.org/abs/2302.13971) model in pure C/C++ -Inference of Meta's [LLaMA](https://arxiv.org/abs/2302.13971) model (and others) in pure C/C++ +**Warnings** -## Recent API changes +- `Q4_2` and `Q4_3` are still in development. Do not expect any kind of backward compatibility until they are finalized -- [Changelog for `libllama` API](https://github.com/ggerganov/llama.cpp/issues/9289) -- [Changelog for `llama-server` REST API](https://github.com/ggerganov/llama.cpp/issues/9291) +**Hot topics:** -## Hot topics - -- **How to use [MTLResidencySet](https://developer.apple.com/documentation/metal/mtlresidencyset?language=objc) to keep the GPU memory active?** https://github.com/ggerganov/llama.cpp/pull/11427 -- **VS Code extension for FIM completions:** https://github.com/ggml-org/llama.vscode -- Universal tool call support in `llama-server`: https://github.com/ggerganov/llama.cpp/pull/9639 -- Vim/Neovim plugin for FIM completions: https://github.com/ggml-org/llama.vim -- Introducing GGUF-my-LoRA https://github.com/ggerganov/llama.cpp/discussions/10123 -- Hugging Face Inference Endpoints now support GGUF out of the box! https://github.com/ggerganov/llama.cpp/discussions/9669 -- Hugging Face GGUF editor: [discussion](https://github.com/ggerganov/llama.cpp/discussions/9268) | [tool](https://huggingface.co/spaces/CISCai/gguf-editor) - ----- +- [Added LoRA support](https://github.com/ggerganov/llama.cpp/pull/820) +- [Add GPU support to ggml](https://github.com/ggerganov/llama.cpp/discussions/915) +- [Roadmap Apr 2023](https://github.com/ggerganov/llama.cpp/discussions/784) ## Description -The main goal of `llama.cpp` is to enable LLM inference with minimal setup and state-of-the-art performance on a wide -range of hardware - locally and in the cloud. +The main goal of llama.cpp is to run the llama model using 4-bit quantization on a MacBook. -- Plain C/C++ implementation without any dependencies -- Apple silicon is a first-class citizen - optimized via ARM NEON, Accelerate and Metal frameworks -- AVX, AVX2, AVX512 and AMX support for x86 architectures -- 1.5-bit, 2-bit, 3-bit, 4-bit, 5-bit, 6-bit, and 8-bit integer quantization for faster inference and reduced memory use -- Custom CUDA kernels for running LLMs on NVIDIA GPUs (support for AMD GPUs via HIP and Moore Threads MTT GPUs via MUSA) -- Vulkan and SYCL backend support -- CPU+GPU hybrid inference to partially accelerate models larger than the total VRAM capacity +- Plain C/C++ implementation without dependencies +- Apple silicon first-class citizen - optimized via ARM NEON and Accelerate framework +- AVX2 support for x86 architectures +- Mixed F16 / F32 precision +- 4-bit quantization support +- Runs on the CPU -The `llama.cpp` project is the main playground for developing new features for the [ggml](https://github.com/ggerganov/ggml) library. +This was [hacked in an evening](https://github.com/ggerganov/llama.cpp/issues/33#issuecomment-1465108022) - I have no idea if it works correctly. +Please do not make conclusions about the models based on the results from this implementation. +For all I know, it can be completely wrong. This project is for educational purposes. +New features will probably be added mostly through community contributions. -
-Models +**Supported platforms:** -Typically finetunes of the base models below are supported as well. +- [X] Mac OS +- [X] Linux +- [X] Windows (via CMake) +- [X] Docker -Instructions for adding support for new models: [HOWTO-add-model.md](docs/development/HOWTO-add-model.md) - -#### Text-only +**Supported models:** - [X] LLaMA 🦙 -- [x] LLaMA 2 🦙🦙 -- [x] LLaMA 3 🦙🦙🦙 -- [X] [Mistral 7B](https://huggingface.co/mistralai/Mistral-7B-v0.1) -- [x] [Mixtral MoE](https://huggingface.co/models?search=mistral-ai/Mixtral) -- [x] [DBRX](https://huggingface.co/databricks/dbrx-instruct) -- [X] [Falcon](https://huggingface.co/models?search=tiiuae/falcon) -- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) and [Chinese LLaMA-2 / Alpaca-2](https://github.com/ymcui/Chinese-LLaMA-Alpaca-2) +- [X] [Alpaca](https://github.com/ggerganov/llama.cpp#instruction-mode-with-alpaca) +- [X] [GPT4All](https://github.com/ggerganov/llama.cpp#using-gpt4all) +- [X] [Chinese LLaMA / Alpaca](https://github.com/ymcui/Chinese-LLaMA-Alpaca) - [X] [Vigogne (French)](https://github.com/bofenghuang/vigogne) -- [X] [BERT](https://github.com/ggerganov/llama.cpp/pull/5423) +- [X] [Vicuna](https://github.com/ggerganov/llama.cpp/discussions/643#discussioncomment-5533894) - [X] [Koala](https://bair.berkeley.edu/blog/2023/04/03/koala/) -- [X] [Baichuan 1 & 2](https://huggingface.co/models?search=baichuan-inc/Baichuan) + [derivations](https://huggingface.co/hiyouga/baichuan-7b-sft) -- [X] [Aquila 1 & 2](https://huggingface.co/models?search=BAAI/Aquila) -- [X] [Starcoder models](https://github.com/ggerganov/llama.cpp/pull/3187) -- [X] [Refact](https://huggingface.co/smallcloudai/Refact-1_6B-fim) -- [X] [MPT](https://github.com/ggerganov/llama.cpp/pull/3417) -- [X] [Bloom](https://github.com/ggerganov/llama.cpp/pull/3553) -- [x] [Yi models](https://huggingface.co/models?search=01-ai/Yi) -- [X] [StableLM models](https://huggingface.co/stabilityai) -- [x] [Deepseek models](https://huggingface.co/models?search=deepseek-ai/deepseek) -- [x] [Qwen models](https://huggingface.co/models?search=Qwen/Qwen) -- [x] [PLaMo-13B](https://github.com/ggerganov/llama.cpp/pull/3557) -- [x] [Phi models](https://huggingface.co/models?search=microsoft/phi) -- [x] [PhiMoE](https://github.com/ggerganov/llama.cpp/pull/11003) -- [x] [GPT-2](https://huggingface.co/gpt2) -- [x] [Orion 14B](https://github.com/ggerganov/llama.cpp/pull/5118) -- [x] [InternLM2](https://huggingface.co/models?search=internlm2) -- [x] [CodeShell](https://github.com/WisdomShell/codeshell) -- [x] [Gemma](https://ai.google.dev/gemma) -- [x] [Mamba](https://github.com/state-spaces/mamba) -- [x] [Grok-1](https://huggingface.co/keyfan/grok-1-hf) -- [x] [Xverse](https://huggingface.co/models?search=xverse) -- [x] [Command-R models](https://huggingface.co/models?search=CohereForAI/c4ai-command-r) -- [x] [SEA-LION](https://huggingface.co/models?search=sea-lion) -- [x] [GritLM-7B](https://huggingface.co/GritLM/GritLM-7B) + [GritLM-8x7B](https://huggingface.co/GritLM/GritLM-8x7B) -- [x] [OLMo](https://allenai.org/olmo) -- [x] [OLMo 2](https://allenai.org/olmo) -- [x] [OLMoE](https://huggingface.co/allenai/OLMoE-1B-7B-0924) -- [x] [Granite models](https://huggingface.co/collections/ibm-granite/granite-code-models-6624c5cec322e4c148c8b330) -- [x] [GPT-NeoX](https://github.com/EleutherAI/gpt-neox) + [Pythia](https://github.com/EleutherAI/pythia) -- [x] [Snowflake-Arctic MoE](https://huggingface.co/collections/Snowflake/arctic-66290090abe542894a5ac520) -- [x] [Smaug](https://huggingface.co/models?search=Smaug) -- [x] [Poro 34B](https://huggingface.co/LumiOpen/Poro-34B) -- [x] [Bitnet b1.58 models](https://huggingface.co/1bitLLM) -- [x] [Flan T5](https://huggingface.co/models?search=flan-t5) -- [x] [Open Elm models](https://huggingface.co/collections/apple/openelm-instruct-models-6619ad295d7ae9f868b759ca) -- [x] [ChatGLM3-6b](https://huggingface.co/THUDM/chatglm3-6b) + [ChatGLM4-9b](https://huggingface.co/THUDM/glm-4-9b) + [GLMEdge-1.5b](https://huggingface.co/THUDM/glm-edge-1.5b-chat) + [GLMEdge-4b](https://huggingface.co/THUDM/glm-edge-4b-chat) -- [x] [SmolLM](https://huggingface.co/collections/HuggingFaceTB/smollm-6695016cad7167254ce15966) -- [x] [EXAONE-3.0-7.8B-Instruct](https://huggingface.co/LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct) -- [x] [FalconMamba Models](https://huggingface.co/collections/tiiuae/falconmamba-7b-66b9a580324dd1598b0f6d4a) -- [x] [Jais](https://huggingface.co/inceptionai/jais-13b-chat) -- [x] [Bielik-11B-v2.3](https://huggingface.co/collections/speakleash/bielik-11b-v23-66ee813238d9b526a072408a) -- [x] [RWKV-6](https://github.com/BlinkDL/RWKV-LM) -- [x] [QRWKV-6](https://huggingface.co/recursal/QRWKV6-32B-Instruct-Preview-v0.1) -- [x] [GigaChat-20B-A3B](https://huggingface.co/ai-sage/GigaChat-20B-A3B-instruct) -#### Multimodal - -- [x] [LLaVA 1.5 models](https://huggingface.co/collections/liuhaotian/llava-15-653aac15d994e992e2677a7e), [LLaVA 1.6 models](https://huggingface.co/collections/liuhaotian/llava-16-65b9e40155f60fd046a5ccf2) -- [x] [BakLLaVA](https://huggingface.co/models?search=SkunkworksAI/Bakllava) -- [x] [Obsidian](https://huggingface.co/NousResearch/Obsidian-3B-V0.5) -- [x] [ShareGPT4V](https://huggingface.co/models?search=Lin-Chen/ShareGPT4V) -- [x] [MobileVLM 1.7B/3B models](https://huggingface.co/models?search=mobileVLM) -- [x] [Yi-VL](https://huggingface.co/models?search=Yi-VL) -- [x] [Mini CPM](https://huggingface.co/models?search=MiniCPM) -- [x] [Moondream](https://huggingface.co/vikhyatk/moondream2) -- [x] [Bunny](https://github.com/BAAI-DCAI/Bunny) -- [x] [GLM-EDGE](https://huggingface.co/models?search=glm-edge) -- [x] [Qwen2-VL](https://huggingface.co/collections/Qwen/qwen2-vl-66cee7455501d7126940800d) - -
- -
-Bindings +**Bindings:** - Python: [abetlen/llama-cpp-python](https://github.com/abetlen/llama-cpp-python) - Go: [go-skynet/go-llama.cpp](https://github.com/go-skynet/go-llama.cpp) -- Node.js: [withcatai/node-llama-cpp](https://github.com/withcatai/node-llama-cpp) -- JS/TS (llama.cpp server client): [lgrammel/modelfusion](https://modelfusion.dev/integration/model-provider/llamacpp) -- JS/TS (Programmable Prompt Engine CLI): [offline-ai/cli](https://github.com/offline-ai/cli) -- JavaScript/Wasm (works in browser): [tangledgroup/llama-cpp-wasm](https://github.com/tangledgroup/llama-cpp-wasm) -- Typescript/Wasm (nicer API, available on npm): [ngxson/wllama](https://github.com/ngxson/wllama) +- Node.js: [hlhr202/llama-node](https://github.com/hlhr202/llama-node) - Ruby: [yoshoku/llama_cpp.rb](https://github.com/yoshoku/llama_cpp.rb) -- Rust (more features): [edgenai/llama_cpp-rs](https://github.com/edgenai/llama_cpp-rs) -- Rust (nicer API): [mdrokz/rust-llama.cpp](https://github.com/mdrokz/rust-llama.cpp) -- Rust (more direct bindings): [utilityai/llama-cpp-rs](https://github.com/utilityai/llama-cpp-rs) -- Rust (automated build from crates.io): [ShelbyJenkins/llm_client](https://github.com/ShelbyJenkins/llm_client) -- C#/.NET: [SciSharp/LLamaSharp](https://github.com/SciSharp/LLamaSharp) -- C#/VB.NET (more features - community license): [LM-Kit.NET](https://docs.lm-kit.com/lm-kit-net/index.html) -- Scala 3: [donderom/llm4s](https://github.com/donderom/llm4s) -- Clojure: [phronmophobic/llama.clj](https://github.com/phronmophobic/llama.clj) -- React Native: [mybigday/llama.rn](https://github.com/mybigday/llama.rn) -- Java: [kherud/java-llama.cpp](https://github.com/kherud/java-llama.cpp) -- Zig: [deins/llama.cpp.zig](https://github.com/Deins/llama.cpp.zig) -- Flutter/Dart: [netdur/llama_cpp_dart](https://github.com/netdur/llama_cpp_dart) -- Flutter: [xuegao-tzx/Fllama](https://github.com/xuegao-tzx/Fllama) -- PHP (API bindings and features built on top of llama.cpp): [distantmagic/resonance](https://github.com/distantmagic/resonance) [(more info)](https://github.com/ggerganov/llama.cpp/pull/6326) -- Guile Scheme: [guile_llama_cpp](https://savannah.nongnu.org/projects/guile-llama-cpp) -- Swift [srgtuszy/llama-cpp-swift](https://github.com/srgtuszy/llama-cpp-swift) -- Swift [ShenghaiWang/SwiftLlama](https://github.com/ShenghaiWang/SwiftLlama) -
+**UI:** -
-UIs +- [nat/openplayground](https://github.com/nat/openplayground) +- [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui) -*(to have a project listed here, it should clearly state that it depends on `llama.cpp`)* +--- -- [AI Sublime Text plugin](https://github.com/yaroslavyaroslav/OpenAI-sublime-text) (MIT) -- [cztomsik/ava](https://github.com/cztomsik/ava) (MIT) -- [Dot](https://github.com/alexpinel/Dot) (GPL) -- [eva](https://github.com/ylsdamxssjxxdd/eva) (MIT) -- [iohub/collama](https://github.com/iohub/coLLaMA) (Apache-2.0) -- [janhq/jan](https://github.com/janhq/jan) (AGPL) -- [KanTV](https://github.com/zhouwg/kantv?tab=readme-ov-file) (Apache-2.0) -- [KodiBot](https://github.com/firatkiral/kodibot) (GPL) -- [llama.vim](https://github.com/ggml-org/llama.vim) (MIT) -- [LARS](https://github.com/abgulati/LARS) (AGPL) -- [Llama Assistant](https://github.com/vietanhdev/llama-assistant) (GPL) -- [LLMFarm](https://github.com/guinmoon/LLMFarm?tab=readme-ov-file) (MIT) -- [LLMUnity](https://github.com/undreamai/LLMUnity) (MIT) -- [LMStudio](https://lmstudio.ai/) (proprietary) -- [LocalAI](https://github.com/mudler/LocalAI) (MIT) -- [LostRuins/koboldcpp](https://github.com/LostRuins/koboldcpp) (AGPL) -- [MindMac](https://mindmac.app) (proprietary) -- [MindWorkAI/AI-Studio](https://github.com/MindWorkAI/AI-Studio) (FSL-1.1-MIT) -- [Mobile-Artificial-Intelligence/maid](https://github.com/Mobile-Artificial-Intelligence/maid) (MIT) -- [Mozilla-Ocho/llamafile](https://github.com/Mozilla-Ocho/llamafile) (Apache-2.0) -- [nat/openplayground](https://github.com/nat/openplayground) (MIT) -- [nomic-ai/gpt4all](https://github.com/nomic-ai/gpt4all) (MIT) -- [ollama/ollama](https://github.com/ollama/ollama) (MIT) -- [oobabooga/text-generation-webui](https://github.com/oobabooga/text-generation-webui) (AGPL) -- [PocketPal AI](https://github.com/a-ghorbani/pocketpal-ai) (MIT) -- [psugihara/FreeChat](https://github.com/psugihara/FreeChat) (MIT) -- [ptsochantaris/emeltal](https://github.com/ptsochantaris/emeltal) (MIT) -- [pythops/tenere](https://github.com/pythops/tenere) (AGPL) -- [ramalama](https://github.com/containers/ramalama) (MIT) -- [semperai/amica](https://github.com/semperai/amica) (MIT) -- [withcatai/catai](https://github.com/withcatai/catai) (MIT) -- [Autopen](https://github.com/blackhole89/autopen) (GPL) +Here is a typical run using LLaMA-7B: -
+```java +make -j && ./main -m ./models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512 +I llama.cpp build info: +I UNAME_S: Darwin +I UNAME_P: arm +I UNAME_M: arm64 +I CFLAGS: -I. -O3 -DNDEBUG -std=c11 -fPIC -pthread -DGGML_USE_ACCELERATE +I CXXFLAGS: -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC -pthread +I LDFLAGS: -framework Accelerate +I CC: Apple clang version 14.0.0 (clang-1400.0.29.202) +I CXX: Apple clang version 14.0.0 (clang-1400.0.29.202) -
-Tools +make: Nothing to be done for `default'. +main: seed = 1678486056 +llama_model_load: loading model from './models/7B/ggml-model-q4_0.bin' - please wait ... +llama_model_load: n_vocab = 32000 +llama_model_load: n_ctx = 512 +llama_model_load: n_embd = 4096 +llama_model_load: n_mult = 256 +llama_model_load: n_head = 32 +llama_model_load: n_layer = 32 +llama_model_load: n_rot = 128 +llama_model_load: f16 = 2 +llama_model_load: n_ff = 11008 +llama_model_load: ggml ctx size = 4529.34 MB +llama_model_load: memory_size = 512.00 MB, n_mem = 16384 +llama_model_load: .................................... done +llama_model_load: model size = 4017.27 MB / num tensors = 291 -- [akx/ggify](https://github.com/akx/ggify) – download PyTorch models from HuggingFace Hub and convert them to GGML -- [akx/ollama-dl](https://github.com/akx/ollama-dl) – download models from the Ollama library to be used directly with llama.cpp -- [crashr/gppm](https://github.com/crashr/gppm) – launch llama.cpp instances utilizing NVIDIA Tesla P40 or P100 GPUs with reduced idle power consumption -- [gpustack/gguf-parser](https://github.com/gpustack/gguf-parser-go/tree/main/cmd/gguf-parser) - review/check the GGUF file and estimate the memory usage -- [Styled Lines](https://marketplace.unity.com/packages/tools/generative-ai/styled-lines-llama-cpp-model-292902) (proprietary licensed, async wrapper of inference part for game development in Unity3d with pre-built Mobile and Web platform wrappers and a model example) +main: prompt: 'Building a website can be done in 10 simple steps:' +main: number of tokens in prompt = 15 + 1 -> '' + 8893 -> 'Build' + 292 -> 'ing' + 263 -> ' a' + 4700 -> ' website' + 508 -> ' can' + 367 -> ' be' + 2309 -> ' done' + 297 -> ' in' + 29871 -> ' ' + 29896 -> '1' + 29900 -> '0' + 2560 -> ' simple' + 6576 -> ' steps' + 29901 -> ':' -
+sampling parameters: temp = 0.800000, top_k = 40, top_p = 0.950000 -
-Infrastructure -- [Paddler](https://github.com/distantmagic/paddler) - Stateful load balancer custom-tailored for llama.cpp -- [GPUStack](https://github.com/gpustack/gpustack) - Manage GPU clusters for running LLMs -- [llama_cpp_canister](https://github.com/onicai/llama_cpp_canister) - llama.cpp as a smart contract on the Internet Computer, using WebAssembly -- [llama-swap](https://github.com/mostlygeek/llama-swap) - transparent proxy that adds automatic model switching with llama-server -- [Kalavai](https://github.com/kalavai-net/kalavai-client) - Crowdsource end to end LLM deployment at any scale +Building a website can be done in 10 simple steps: +1) Select a domain name and web hosting plan +2) Complete a sitemap +3) List your products +4) Write product descriptions +5) Create a user account +6) Build the template +7) Start building the website +8) Advertise the website +9) Provide email support +10) Submit the website to search engines +A website is a collection of web pages that are formatted with HTML. HTML is the code that defines what the website looks like and how it behaves. +The HTML code is formatted into a template or a format. Once this is done, it is displayed on the user's browser. +The web pages are stored in a web server. The web server is also called a host. When the website is accessed, it is retrieved from the server and displayed on the user's computer. +A website is known as a website when it is hosted. This means that it is displayed on a host. The host is usually a web server. +A website can be displayed on different browsers. The browsers are basically the software that renders the website on the user's screen. +A website can also be viewed on different devices such as desktops, tablets and smartphones. +Hence, to have a website displayed on a browser, the website must be hosted. +A domain name is an address of a website. It is the name of the website. +The website is known as a website when it is hosted. This means that it is displayed on a host. The host is usually a web server. +A website can be displayed on different browsers. The browsers are basically the software that renders the website on the user’s screen. +A website can also be viewed on different devices such as desktops, tablets and smartphones. Hence, to have a website displayed on a browser, the website must be hosted. +A domain name is an address of a website. It is the name of the website. +A website is an address of a website. It is a collection of web pages that are formatted with HTML. HTML is the code that defines what the website looks like and how it behaves. +The HTML code is formatted into a template or a format. Once this is done, it is displayed on the user’s browser. +A website is known as a website when it is hosted -
+main: mem per token = 14434244 bytes +main: load time = 1332.48 ms +main: sample time = 1081.40 ms +main: predict time = 31378.77 ms / 61.41 ms per token +main: total time = 34036.74 ms +``` -
-Games +And here is another demo of running both LLaMA-7B and [whisper.cpp](https://github.com/ggerganov/whisper.cpp) on a single M1 Pro MacBook: -- [Lucy's Labyrinth](https://github.com/MorganRO8/Lucys_Labyrinth) - A simple maze game where agents controlled by an AI model will try to trick you. +https://user-images.githubusercontent.com/1991296/224442907-7693d4be-acaa-4e01-8b4f-add84093ffff.mp4 -
+## Usage -## Supported backends +Here are the steps for the LLaMA-7B model. -| Backend | Target devices | -| --- | --- | -| [Metal](docs/build.md#metal-build) | Apple Silicon | -| [BLAS](docs/build.md#blas-build) | All | -| [BLIS](docs/backend/BLIS.md) | All | -| [SYCL](docs/backend/SYCL.md) | Intel and Nvidia GPU | -| [MUSA](docs/build.md#musa) | Moore Threads MTT GPU | -| [CUDA](docs/build.md#cuda) | Nvidia GPU | -| [HIP](docs/build.md#hip) | AMD GPU | -| [Vulkan](docs/build.md#vulkan) | GPU | -| [CANN](docs/build.md#cann) | Ascend NPU | +### Get the Code -## Building the project +```bash +git clone https://github.com/ggerganov/llama.cpp +cd llama.cpp +``` -The main product of this project is the `llama` library. Its C-style interface can be found in [include/llama.h](include/llama.h). -The project also includes many example programs and tools using the `llama` library. The examples range from simple, minimal code snippets to sophisticated sub-projects such as an OpenAI-compatible HTTP server. Possible methods for obtaining the binaries: +### Build -- Clone this repository and build locally, see [how to build](docs/build.md) -- On MacOS or Linux, install `llama.cpp` via [brew, flox or nix](docs/install.md) -- Use a Docker image, see [documentation for Docker](docs/docker.md) -- Download pre-built binaries from [releases](https://github.com/ggerganov/llama.cpp/releases) +Note: For Windows, CMake or Zig can be used. -## Obtaining and quantizing models - -The [Hugging Face](https://huggingface.co) platform hosts a [number of LLMs](https://huggingface.co/models?library=gguf&sort=trending) compatible with `llama.cpp`: - -- [Trending](https://huggingface.co/models?library=gguf&sort=trending) -- [LLaMA](https://huggingface.co/models?sort=trending&search=llama+gguf) - -You can either manually download the GGUF file or directly use any `llama.cpp`-compatible models from Hugging Face by using this CLI argument: `-hf /[:quant]` - -After downloading a model, use the CLI tools to run it locally - see below. - -`llama.cpp` requires the model to be stored in the [GGUF](https://github.com/ggerganov/ggml/blob/master/docs/gguf.md) file format. Models in other data formats can be converted to GGUF using the `convert_*.py` Python scripts in this repo. - -The Hugging Face platform provides a variety of online tools for converting, quantizing and hosting models with `llama.cpp`: - -- Use the [GGUF-my-repo space](https://huggingface.co/spaces/ggml-org/gguf-my-repo) to convert to GGUF format and quantize model weights to smaller sizes -- Use the [GGUF-my-LoRA space](https://huggingface.co/spaces/ggml-org/gguf-my-lora) to convert LoRA adapters to GGUF format (more info: https://github.com/ggerganov/llama.cpp/discussions/10123) -- Use the [GGUF-editor space](https://huggingface.co/spaces/CISCai/gguf-editor) to edit GGUF meta data in the browser (more info: https://github.com/ggerganov/llama.cpp/discussions/9268) -- Use the [Inference Endpoints](https://ui.endpoints.huggingface.co/) to directly host `llama.cpp` in the cloud (more info: https://github.com/ggerganov/llama.cpp/discussions/9669) - -To learn more about model quantization, [read this documentation](examples/quantize/README.md) - -## [`llama-cli`](examples/main) - -#### A CLI tool for accessing and experimenting with most of `llama.cpp`'s functionality. - --
- Run in conversation mode - - Models with a built-in chat template will automatically activate conversation mode. If this doesn't occur, you can manually enable it by adding `-cnv` and specifying a suitable chat template with `--chat-template NAME` +1. Use `make` ```bash - llama-cli -m model.gguf - - # > hi, who are you? - # Hi there! I'm your helpful assistant! I'm an AI-powered chatbot designed to assist and provide information to users like you. I'm here to help answer your questions, provide guidance, and offer support on a wide range of topics. I'm a friendly and knowledgeable AI, and I'm always happy to help with anything you need. What's on your mind, and how can I assist you today? - # - # > what is 1+1? - # Easy peasy! The answer to 1+1 is... 2! + make ``` -
- --
- Run in conversation mode with custom chat template +1. Use CMake ```bash - # use the "chatml" template (use -h to see the list of supported templates) - llama-cli -m model.gguf -cnv --chat-template chatml - - # use a custom template - llama-cli -m model.gguf -cnv --in-prefix 'User: ' --reverse-prompt 'User:' + mkdir build + cd build + cmake .. + cmake --build . --config Release ``` -
- --
- Run simple text completion - - To disable conversation mode explicitly, use `-no-cnv` +1. Use Zig ```bash - llama-cli -m model.gguf -p "I believe the meaning of life is" -n 128 -no-cnv - - # I believe the meaning of life is to find your own truth and to live in accordance with it. For me, this means being true to myself and following my passions, even if they don't align with societal expectations. I think that's what I love about yoga – it's not just a physical practice, but a spiritual one too. It's about connecting with yourself, listening to your inner voice, and honoring your own unique journey. + zig build -Drelease-fast ``` -
+### Prepare Data & Run --
- Constrain the output with a custom grammar +```bash +# obtain the original LLaMA model weights and place them in ./models +ls ./models +65B 30B 13B 7B tokenizer_checklist.chk tokenizer.model - ```bash - llama-cli -m model.gguf -n 256 --grammar-file grammars/json.gbnf -p 'Request: schedule a call at 8pm; Command:' +# install Python dependencies +python3 -m pip install -r requirements.txt - # {"appointmentTime": "8pm", "appointmentDetails": "schedule a a call"} - ``` +# convert the 7B model to ggml FP16 format +python3 convert.py models/7B/ - The [grammars/](grammars/) folder contains a handful of sample grammars. To write your own, check out the [GBNF Guide](grammars/README.md). +# quantize the model to 4-bits (using method 2 = q4_0) +./quantize ./models/7B/ggml-model-f16.bin ./models/7B/ggml-model-q4_0.bin 2 - For authoring more complex JSON grammars, check out https://grammar.intrinsiclabs.ai/ +# run the inference +./main -m ./models/7B/ggml-model-q4_0.bin -n 128 +``` -
+When running the larger models, make sure you have enough disk space to store all the intermediate files. +### Memory/Disk Requirements -## [`llama-server`](examples/server) +As the models are currently fully loaded into memory, you will need adequate disk space to save them and sufficient RAM to load them. At the moment, memory and disk requirements are the same. -#### A lightweight, [OpenAI API](https://github.com/openai/openai-openapi) compatible, HTTP server for serving LLMs. +| model | original size | quantized size (4-bit) | +|-------|---------------|------------------------| +| 7B | 13 GB | 3.9 GB | +| 13B | 24 GB | 7.8 GB | +| 30B | 60 GB | 19.5 GB | +| 65B | 120 GB | 38.5 GB | --
- Start a local HTTP server with default configuration on port 8080 +### Interactive mode - ```bash - llama-server -m model.gguf --port 8080 +If you want a more ChatGPT-like experience, you can run in interactive mode by passing `-i` as a parameter. +In this mode, you can always interrupt generation by pressing Ctrl+C and entering one or more lines of text, which will be converted into tokens and appended to the current context. You can also specify a *reverse prompt* with the parameter `-r "reverse prompt string"`. This will result in user input being prompted whenever the exact tokens of the reverse prompt string are encountered in the generation. A typical use is to use a prompt that makes LLaMa emulate a chat between multiple users, say Alice and Bob, and pass `-r "Alice:"`. - # Basic web UI can be accessed via browser: http://localhost:8080 - # Chat completion endpoint: http://localhost:8080/v1/chat/completions - ``` +Here is an example of a few-shot interaction, invoked with the command -
+```bash +# default arguments using a 7B model +./examples/chat.sh --
- Support multiple-users and parallel decoding +# advanced chat with a 13B model +./examples/chat-13B.sh - ```bash - # up to 4 concurrent requests, each with 4096 max context - llama-server -m model.gguf -c 16384 -np 4 - ``` +# custom arguments using a 13B model +./main -m ./models/13B/ggml-model-q4_0.bin -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt +``` -
+Note the use of `--color` to distinguish between user input and generated text. --
- Enable speculative decoding +![image](https://user-images.githubusercontent.com/1991296/224575029-2af3c7dc-5a65-4f64-a6bb-517a532aea38.png) - ```bash - # the draft.gguf model should be a small variant of the target model.gguf - llama-server -m model.gguf -md draft.gguf - ``` +### Instruction mode with Alpaca -
+1. First, download the `ggml` Alpaca model into the `./models` folder +2. Run the `main` tool like this: --
- Serve an embedding model +``` +./examples/alpaca.sh +``` - ```bash - # use the /embedding endpoint - llama-server -m model.gguf --embedding --pooling cls -ub 8192 - ``` +Sample run: -
+``` +== Running in interactive mode. == + - Press Ctrl+C to interject at any time. + - Press Return to return control to LLaMa. + - If you want to submit another line, end your input in '\'. --
- Serve a reranking model + Below is an instruction that describes a task. Write a response that appropriately completes the request. - ```bash - # use the /reranking endpoint - llama-server -m model.gguf --reranking - ``` +> How many letters are there in the English alphabet? +There 26 letters in the English Alphabet +> What is the most common way of transportation in Amsterdam? +The majority (54%) are using public transit. This includes buses, trams and metros with over 100 lines throughout the city which make it very accessible for tourists to navigate around town as well as locals who commute by tram or metro on a daily basis +> List 5 words that start with "ca". +cadaver, cauliflower, cabbage (vegetable), catalpa (tree) and Cailleach. +> +``` -
+### Using [GPT4All](https://github.com/nomic-ai/gpt4all) --
- Constrain all outputs with a grammar +- Obtain the `gpt4all-lora-quantized.bin` model +- It is distributed in the old `ggml` format, which is now obsoleted +- You have to convert it to the new format using [./convert-gpt4all-to-ggml.py](./convert-gpt4all-to-ggml.py). You may also need to +convert the model from the old format to the new format with [./migrate-ggml-2023-03-30-pr613.py](./migrate-ggml-2023-03-30-pr613.py): - ```bash - # custom grammar - llama-server -m model.gguf --grammar-file grammar.gbnf + ```bash + python3 convert-gpt4all-to-ggml.py models/gpt4all-7B/gpt4all-lora-quantized.bin ./models/tokenizer.model + python3 migrate-ggml-2023-03-30-pr613.py models/gpt4all-7B/gpt4all-lora-quantized.bin models/gpt4all-7B/gpt4all-lora-quantized-new.bin + ``` - # JSON - llama-server -m model.gguf --grammar-file grammars/json.gbnf - ``` +- You can now use the newly generated `gpt4all-lora-quantized-new.bin` model in exactly the same way as all other models +- The original model is saved in the same folder with a suffix `.orig` -
+### Obtaining and verifying the Facebook LLaMA original model and Stanford Alpaca model data +- **Under no circumstances should IPFS, magnet links, or any other links to model downloads be shared anywhere in this repository, including in issues, discussions, or pull requests. They will be immediately deleted.** +- The LLaMA models are officially distributed by Facebook and will **never** be provided through this repository. +- Refer to [Facebook's LLaMA repository](https://github.com/facebookresearch/llama/pull/73/files) if you need to request access to the model data. +- Please verify the [sha256 checksums](SHA256SUMS) of all downloaded model files to confirm that you have the correct model data files before creating an issue relating to your model files. +- The following command will verify if you have all possible latest files in your self-installed `./models` subdirectory: -## [`llama-perplexity`](examples/perplexity) + `sha256sum --ignore-missing -c SHA256SUMS` on Linux -#### A tool for measuring the perplexity [^1][^2] (and other quality metrics) of a model over a given text. + or --
- Measure the perplexity over a text file + `shasum -a 256 --ignore-missing -c SHA256SUMS` on macOS - ```bash - llama-perplexity -m model.gguf -f file.txt +- If your issue is with model generation quality, then please at least scan the following links and papers to understand the limitations of LLaMA models. This is especially important when choosing an appropriate model size and appreciating both the significant and subtle differences between LLaMA models and ChatGPT: +- LLaMA: +- [Introducing LLaMA: A foundational, 65-billion-parameter large language model](https://ai.facebook.com/blog/large-language-model-llama-meta-ai/) +- [LLaMA: Open and Efficient Foundation Language Models](https://arxiv.org/abs/2302.13971) +- GPT-3 +- [Language Models are Few-Shot Learners](https://arxiv.org/abs/2005.14165) +- GPT-3.5 / InstructGPT / ChatGPT: +- [Aligning language models to follow instructions](https://openai.com/research/instruction-following) +- [Training language models to follow instructions with human feedback](https://arxiv.org/abs/2203.02155) - # [1]15.2701,[2]5.4007,[3]5.3073,[4]6.2965,[5]5.8940,[6]5.6096,[7]5.7942,[8]4.9297, ... - # Final estimate: PPL = 5.4007 +/- 0.67339 - ``` +### Perplexity (measuring model quality) -
+You can use the `perplexity` example to measure perplexity over the given prompt. For more background, see [https://huggingface.co/docs/transformers/perplexity](https://huggingface.co/docs/transformers/perplexity). However, in general, lower perplexity is better for LLMs. --
- Measure KL divergence +#### Latest measurements - ```bash - # TODO - ``` +The latest perplexity scores for the various model sizes and quantizations are being tracked in [discussion #406](https://github.com/ggerganov/llama.cpp/discussions/406). `llama.cpp` is measuring very well compared to the baseline implementations. Quantization has a small negative impact on quality, but, as you can see, running +13B at q4_0 beats the 7B f16 model by a significant amount. -
+All measurements are done against the wikitext2 test dataset (https://paperswithcode.com/dataset/wikitext-2), with default options (512 length context). +Note that changing the context length will have a significant impact on perplexity (longer context = better perplexity). +``` +Perplexity - model options +5.5985 - 13B, q4_0 +5.9565 - 7B, f16 +6.3001 - 7B, q4_1 +6.5949 - 7B, q4_0 +6.5995 - 7B, q4_0, --memory_f16 +``` -[^1]: [examples/perplexity/README.md](./examples/perplexity/README.md) -[^2]: [https://huggingface.co/docs/transformers/perplexity](https://huggingface.co/docs/transformers/perplexity) +#### How to run -## [`llama-bench`](examples/llama-bench) +1. Download/extract: https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-raw-v1.zip?ref=salesforce-research +2. Run `./perplexity -m models/7B/ggml-model-q4_0.bin -f wiki.test.raw` +3. Output: +``` +perplexity : calculating perplexity over 655 chunks +24.43 seconds per pass - ETA 4.45 hours +[1]4.5970,[2]5.1807,[3]6.0382,... +``` +And after 4.45 hours, you will have the final perplexity. -#### Benchmark the performance of the inference for various parameters. +### Android --
- Run default benchmark +You can easily run `llama.cpp` on Android device with [termux](https://termux.dev/). +First, obtain the [Android NDK](https://developer.android.com/ndk) and then build with CMake: +``` +$ mkdir build-android +$ cd build-android +$ export NDK= +$ cmake -DCMAKE_TOOLCHAIN_FILE=$NDK/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DANDROID_PLATFORM=android-23 -DCMAKE_C_FLAGS=-march=armv8.4a+dotprod .. +$ make +``` +Install [termux](https://termux.dev/) on your device and run `termux-setup-storage` to get access to your SD card. +Finally, copy the `llama` binary and the model files to your device storage. Here is a demo of an interactive session running on Pixel 5 phone: - ```bash - llama-bench -m model.gguf +https://user-images.githubusercontent.com/271616/225014776-1d567049-ad71-4ef2-b050-55b0b3b9274c.mp4 - # Output: - # | model | size | params | backend | threads | test | t/s | - # | ------------------- | ---------: | ---------: | ---------- | ------: | ------------: | -------------------: | - # | qwen2 1.5B Q4_0 | 885.97 MiB | 1.54 B | Metal,BLAS | 16 | pp512 | 5765.41 ± 20.55 | - # | qwen2 1.5B Q4_0 | 885.97 MiB | 1.54 B | Metal,BLAS | 16 | tg128 | 197.71 ± 0.81 | - # - # build: 3e0ba0e60 (4229) - ``` +### Docker -
+#### Prerequisites +* Docker must be installed and running on your system. +* Create a folder to store big models & intermediate files (ex. /llama/models) -## [`llama-run`](examples/run) +#### Images +We have two Docker images available for this project: -#### A comprehensive example for running `llama.cpp` models. Useful for inferencing. Used with RamaLama [^3]. +1. `ghcr.io/ggerganov/llama.cpp:full`: This image includes both the main executable file and the tools to convert LLaMA models into ggml and convert into 4-bit quantization. +2. `ghcr.io/ggerganov/llama.cpp:light`: This image only includes the main executable file. --
- Run a model with a specific prompt (by default it's pulled from Ollama registry) +#### Usage - ```bash - llama-run granite-code - ``` +The easiest way to download the models, convert them to ggml and optimize them is with the --all-in-one command which includes the full docker image. -
+Replace `/path/to/models` below with the actual path where you downloaded the models. -[^3]: [RamaLama](https://github.com/containers/ramalama) +```bash +docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:full --all-in-one "/models/" 7B +``` -## [`llama-simple`](examples/simple) +On completion, you are ready to play! -#### A minimal example for implementing apps with `llama.cpp`. Useful for developers. +```bash +docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:full --run -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512 +``` --
- Basic text completion +or with a light image: - ```bash - llama-simple -m model.gguf +```bash +docker run -v /path/to/models:/models ghcr.io/ggerganov/llama.cpp:light -m /models/7B/ggml-model-q4_0.bin -p "Building a website can be done in 10 simple steps:" -n 512 +``` - # Hello my name is Kaitlyn and I am a 16 year old girl. I am a junior in high school and I am currently taking a class called "The Art of - ``` - -
- - -## Contributing +### Contributing - Contributors can open PRs - Collaborators can push to branches in the `llama.cpp` repo and merge PRs into the `master` branch - Collaborators will be invited based on contributions -- Any help with managing issues, PRs and projects is very appreciated! -- See [good first issues](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) for tasks suitable for first contributions -- Read the [CONTRIBUTING.md](CONTRIBUTING.md) for more information +- Any help with managing issues and PRs is very appreciated! - Make sure to read this: [Inference at the edge](https://github.com/ggerganov/llama.cpp/discussions/205) - A bit of backstory for those who are interested: [Changelog podcast](https://changelog.com/podcast/532) -## Other documentation +### Coding guidelines -- [main (cli)](examples/main/README.md) -- [server](examples/server/README.md) -- [GBNF grammars](grammars/README.md) +- Avoid adding third-party dependencies, extra files, extra headers, etc. +- Always consider cross-compatibility with other operating systems and architectures +- Avoid fancy looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple +- There are no strict rules for the code style, but try to follow the patterns in the code (indentation, spaces, etc.). Vertical alignment makes things more readable and easier to batch edit +- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a` +- See [good first issues](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22) for tasks suitable for first contributions -#### Development documentation +### Docs -- [How to build](docs/build.md) -- [Running on Docker](docs/docker.md) -- [Build on Android](docs/android.md) -- [Performance troubleshooting](docs/development/token_generation_performance_tips.md) - [GGML tips & tricks](https://github.com/ggerganov/llama.cpp/wiki/GGML-Tips-&-Tricks) - -#### Seminal papers and background on the models - -If your issue is with model generation quality, then please at least scan the following links and papers to understand the limitations of LLaMA models. This is especially important when choosing an appropriate model size and appreciating both the significant and subtle differences between LLaMA models and ChatGPT: -- LLaMA: - - [Introducing LLaMA: A foundational, 65-billion-parameter large language model](https://ai.facebook.com/blog/large-language-model-llama-meta-ai/) - - [LLaMA: Open and Efficient Foundation Language Models](https://arxiv.org/abs/2302.13971) -- GPT-3 - - [Language Models are Few-Shot Learners](https://arxiv.org/abs/2005.14165) -- GPT-3.5 / InstructGPT / ChatGPT: - - [Aligning language models to follow instructions](https://openai.com/research/instruction-following) - - [Training language models to follow instructions with human feedback](https://arxiv.org/abs/2203.02155) - -#### References - diff --git a/SECURITY.md b/SECURITY.md deleted file mode 100644 index f4322c6ee..000000000 --- a/SECURITY.md +++ /dev/null @@ -1,67 +0,0 @@ -# Security Policy - - - [**Using llama.cpp securely**](#using-llamacpp-securely) - - [Untrusted models](#untrusted-models) - - [Untrusted inputs](#untrusted-inputs) - - [Data privacy](#data-privacy) - - [Untrusted environments or networks](#untrusted-environments-or-networks) - - [Multi-Tenant environments](#multi-tenant-environments) - - [**Reporting a vulnerability**](#reporting-a-vulnerability) - -## Using llama.cpp securely - -### Untrusted models -Be careful when running untrusted models. This classification includes models created by unknown developers or utilizing data obtained from unknown sources. - -*Always execute untrusted models within a secure, isolated environment such as a sandbox* (e.g., containers, virtual machines). This helps protect your system from potentially malicious code. - -> [!NOTE] -> The trustworthiness of a model is not binary. You must always determine the proper level of caution depending on the specific model and how it matches your use case and risk tolerance. - -### Untrusted inputs - -Some models accept various input formats (text, images, audio, etc.). The libraries converting these inputs have varying security levels, so it's crucial to isolate the model and carefully pre-process inputs to mitigate script injection risks. - -For maximum security when handling untrusted inputs, you may need to employ the following: - -* Sandboxing: Isolate the environment where the inference happens. -* Pre-analysis: Check how the model performs by default when exposed to prompt injection (e.g. using [fuzzing for prompt injection](https://github.com/FonduAI/awesome-prompt-injection?tab=readme-ov-file#tools)). This will give you leads on how hard you will have to work on the next topics. -* Updates: Keep both LLaMA C++ and your libraries updated with the latest security patches. -* Input Sanitation: Before feeding data to the model, sanitize inputs rigorously. This involves techniques such as: - * Validation: Enforce strict rules on allowed characters and data types. - * Filtering: Remove potentially malicious scripts or code fragments. - * Encoding: Convert special characters into safe representations. - * Verification: Run tooling that identifies potential script injections (e.g. [models that detect prompt injection attempts](https://python.langchain.com/docs/guides/safety/hugging_face_prompt_injection)). - -### Data privacy - -To protect sensitive data from potential leaks or unauthorized access, it is crucial to sandbox the model execution. This means running the model in a secure, isolated environment, which helps mitigate many attack vectors. - -### Untrusted environments or networks - -If you can't run your models in a secure and isolated environment or if it must be exposed to an untrusted network, make sure to take the following security precautions: -* Confirm the hash of any downloaded artifact (e.g. pre-trained model weights) matches a known-good value -* Encrypt your data if sending it over the network. - -### Multi-Tenant environments - -If you intend to run multiple models in parallel with shared memory, it is your responsibility to ensure the models do not interact or access each other's data. The primary areas of concern are tenant isolation, resource allocation, model sharing and hardware attacks. - -1. Tenant Isolation: Models should run separately with strong isolation methods to prevent unwanted data access. Separating networks is crucial for isolation, as it prevents unauthorized access to data or models and malicious users from sending graphs to execute under another tenant's identity. - -2. Resource Allocation: A denial of service caused by one model can impact the overall system health. Implement safeguards like rate limits, access controls, and health monitoring. - -3. Model Sharing: In a multitenant model sharing design, tenants and users must understand the security risks of running code provided by others. Since there are no reliable methods to detect malicious models, sandboxing the model execution is the recommended approach to mitigate the risk. - -4. Hardware Attacks: GPUs or TPUs can also be attacked. [Researches](https://scholar.google.com/scholar?q=gpu+side+channel) has shown that side channel attacks on GPUs are possible, which can make data leak from other models or processes running on the same system at the same time. - -## Reporting a vulnerability - -Beware that none of the topics under [Using llama.cpp securely](#using-llamacpp-securely) are considered vulnerabilities of LLaMA C++. - - -However, If you have discovered a security vulnerability in this project, please report it privately. **Do not disclose it as a public issue.** This gives us time to work with you to fix the issue before public exposure, reducing the chance that the exploit will be used before a patch is released. - -Please disclose it as a private [security advisory](https://github.com/ggerganov/llama.cpp/security/advisories/new). - -A team of volunteers on a reasonable-effort basis maintains this project. As such, please give us at least 90 days to work on a fix before public exposure. diff --git a/SHA256SUMS b/SHA256SUMS new file mode 100644 index 000000000..1d034b371 --- /dev/null +++ b/SHA256SUMS @@ -0,0 +1,40 @@ +700df0d3013b703a806d2ae7f1bfb8e59814e3d06ae78be0c66368a50059f33d models/7B/consolidated.00.pth +666a4bb533b303bdaf89e1b6a3b6f93535d868de31d903afdc20983dc526c847 models/7B/ggml-model-f16.bin +fcb7664c2e69776920b526362a243e912f73c36b1ec892eb354bab940f5edb5a models/7B/ggml-model-q4_0.bin +cc061458339a3eb8bcecbf0a825e9924fb7d1a8150f63cd5d091caa99215aafe models/7B/ggml-model-q4_1.bin +1bc7484c24a87612726d756f1761890e7acf5f412e23378577ce50fbe789b5b8 models/7B/ggml-model-q4_2.bin +3429bf198ec771886cf81a574df45245f3ebf04f0ce0956b73ef5d0ab01ff48b models/7B/ggml-model-q4_3.bin +7e89e242ddc0dd6f060b43ca219ce8b3e8f08959a72cb3c0855df8bb04d46265 models/7B/params.json +745bf4e29a4dd6f411e72976d92b452da1b49168a4f41c951cfcc8051823cf08 models/13B/consolidated.00.pth +d5ccbcc465c71c0de439a5aeffebe8344c68a519bce70bc7f9f92654ee567085 models/13B/consolidated.01.pth +2b206e9b21fb1076f11cafc624e2af97c9e48ea09312a0962153acc20d45f808 models/13B/ggml-model-f16.bin +4b69e4d6b6e3275230955997b90407fceca7e5ab3daf2e63a2c9e7270a8e1e3e models/13B/ggml-model-q4_0.bin +d9581b5b88e5622532fe897c9f9b0e67a317d22dd27a6f90fa4ab8c6d23ccdbb models/13B/ggml-model-q4_1.bin +8d55a2077317ec9a928c7851d6a43e08e51f7e9e08360f2a7a7e1deefea3134f models/13B/ggml-model-q4_2.bin +4208cdec9788ffa48dc1a17af2c36a0299f5bf3eb0e2b87889dda7fad591fca3 models/13B/ggml-model-q4_3.bin +4ab77bec4d4405ccb66a97b282574c89a94417e3c32e5f68f37e2876fc21322f models/13B/params.json +e23294a58552d8cdec5b7e8abb87993b97ea6eced4178ff2697c02472539d067 models/30B/consolidated.00.pth +4e077b7136c7ae2302e954860cf64930458d3076fcde9443f4d0e939e95903ff models/30B/consolidated.01.pth +24a87f01028cbd3a12de551dcedb712346c0b5cbdeff1454e0ddf2df9b675378 models/30B/consolidated.02.pth +1adfcef71420886119544949767f6a56cb6339b4d5fcde755d80fe68b49de93b models/30B/consolidated.03.pth +7e1b524061a9f4b27c22a12d6d2a5bf13b8ebbea73e99f218809351ed9cf7d37 models/30B/ggml-model-f16.bin +7a679908ce31c9d6ae2e38d6059bcd4d0ad3a870cd58cc1c8f7b36f2b2f51c73 models/30B/ggml-model-q4_0.bin +7b75ac615fa369ee593493a7e6ef87542bf0350255db928b22c5a24f6d598bcd models/30B/ggml-model-q4_1.bin +2c82b4954a94a6a284f452f6011c1e4f0d20362c194a0b1eb5737f5fd8a20fb3 models/30B/ggml-model-q4_2.bin +a6188660199dbcb8d5658abe7d89169869e50423494385830d9e6b330ea7fc33 models/30B/ggml-model-q4_3.bin +2c07118ea98d69dbe7810d88520e30288fa994751b337f8fca02b171955f44cb models/30B/params.json +135c563f6b3938114458183afb01adc9a63bef3d8ff7cccc3977e5d3664ecafe models/65B/consolidated.00.pth +9a600b37b19d38c7e43809485f70d17d1dc12206c07efa83bc72bb498a568bde models/65B/consolidated.01.pth +e7babf7c5606f165a3756f527cb0fedc4f83e67ef1290391e52fb1cce5f26770 models/65B/consolidated.02.pth +73176ffb426b40482f2aa67ae1217ef79fbbd1fff5482bae5060cdc5a24ab70e models/65B/consolidated.03.pth +882e6431d0b08a8bc66261a0d3607da21cbaeafa96a24e7e59777632dbdac225 models/65B/consolidated.04.pth +a287c0dfe49081626567c7fe87f74cce5831f58e459b427b5e05567641f47b78 models/65B/consolidated.05.pth +72b4eba67a1a3b18cb67a85b70f8f1640caae9b40033ea943fb166bd80a7b36b models/65B/consolidated.06.pth +d27f5b0677d7ff129ceacd73fd461c4d06910ad7787cf217b249948c3f3bc638 models/65B/consolidated.07.pth +60758f2384d74e423dffddfd020ffed9d3bb186ebc54506f9c4a787d0f5367b0 models/65B/ggml-model-f16.bin +c671fe1bce71499ac732ec999770ebe53ac486623a7891e42c9dfdb6962d2c64 models/65B/ggml-model-q4_0.bin +4743a28aac3e5f32a6e838a815f51d3779de44fbbe251d745251e66c23c5950f models/65B/ggml-model-q4_1.bin +4a145a210c56982389b1ed34387e0590c3e0d7325fa9be4f2284fe4d244a3633 models/65B/ggml-model-q4_2.bin +305e91a4608b4f627b9b8ad5b4af75187d2684254bfd76dcb9db571618ef293c models/65B/ggml-model-q4_3.bin +999ed1659b469ccc2a941714c0a9656fa571d17c9f7c8c7589817ca90edef51b models/65B/params.json +9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347 models/tokenizer.model diff --git a/Sources/llama/llama.h b/Sources/llama/llama.h deleted file mode 100644 index 41725880e..000000000 --- a/Sources/llama/llama.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once - -#include - diff --git a/Sources/llama/module.modulemap b/Sources/llama/module.modulemap deleted file mode 100644 index d010555b1..000000000 --- a/Sources/llama/module.modulemap +++ /dev/null @@ -1,5 +0,0 @@ -module llama [system] { - header "llama.h" - link "llama" - export * -} diff --git a/build.zig b/build.zig new file mode 100644 index 000000000..306127ffe --- /dev/null +++ b/build.zig @@ -0,0 +1,61 @@ +const std = @import("std"); + +pub fn build(b: *std.build.Builder) void { + const target = b.standardTargetOptions(.{}); + const optimize = b.standardReleaseOptions(); + const want_lto = b.option(bool, "lto", "Want -fLTO"); + + const lib = b.addStaticLibrary("llama", null); + lib.want_lto = want_lto; + lib.setTarget(target); + lib.setBuildMode(optimize); + lib.linkLibCpp(); + lib.addIncludePath("."); + lib.addIncludePath("examples"); + lib.addCSourceFiles(&.{ + "ggml.c", + }, &.{"-std=c11"}); + lib.addCSourceFiles(&.{ + "llama.cpp", + }, &.{"-std=c++11"}); + lib.install(); + + const build_args = .{ .b = b, .lib = lib, .target = target, .optimize = optimize, .want_lto = want_lto }; + + const exe = build_example("main", build_args); + _ = build_example("quantize", build_args); + _ = build_example("perplexity", build_args); + _ = build_example("embedding", build_args); + + // create "zig build run" command for ./main + + const run_cmd = exe.run(); + run_cmd.step.dependOn(b.getInstallStep()); + if (b.args) |args| { + run_cmd.addArgs(args); + } + + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); +} + +fn build_example(comptime name: []const u8, args: anytype) *std.build.LibExeObjStep { + const b = args.b; + const lib = args.lib; + const want_lto = args.want_lto; + + const exe = b.addExecutable(name, null); + exe.want_lto = want_lto; + lib.setTarget(args.target); + lib.setBuildMode(args.optimize); + exe.addIncludePath("."); + exe.addIncludePath("examples"); + exe.addCSourceFiles(&.{ + std.fmt.comptimePrint("examples/{s}/{s}.cpp", .{name, name}), + "examples/common.cpp", + }, &.{"-std=c++11"}); + exe.linkLibrary(lib); + exe.install(); + + return exe; +} diff --git a/ci/README.md b/ci/README.md deleted file mode 100644 index 406470519..000000000 --- a/ci/README.md +++ /dev/null @@ -1,29 +0,0 @@ -# CI - -In addition to [Github Actions](https://github.com/ggerganov/llama.cpp/actions) `llama.cpp` uses a custom CI framework: - -https://github.com/ggml-org/ci - -It monitors the `master` branch for new commits and runs the -[ci/run.sh](https://github.com/ggerganov/llama.cpp/blob/master/ci/run.sh) script on dedicated cloud instances. This allows us -to execute heavier workloads compared to just using Github Actions. Also with time, the cloud instances will be scaled -to cover various hardware architectures, including GPU and Apple Silicon instances. - -Collaborators can optionally trigger the CI run by adding the `ggml-ci` keyword to their commit message. -Only the branches of this repo are monitored for this keyword. - -It is a good practice, before publishing changes to execute the full CI locally on your machine: - -```bash -mkdir tmp - -# CPU-only build -bash ./ci/run.sh ./tmp/results ./tmp/mnt - -# with CUDA support -GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt - -# with SYCL support -source /opt/intel/oneapi/setvars.sh -GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt -``` diff --git a/ci/run.sh b/ci/run.sh deleted file mode 100755 index 77c32ce00..000000000 --- a/ci/run.sh +++ /dev/null @@ -1,854 +0,0 @@ -#!/bin/bash -# -# sample usage: -# -# mkdir tmp -# -# # CPU-only build -# bash ./ci/run.sh ./tmp/results ./tmp/mnt -# -# # with CUDA support -# GG_BUILD_CUDA=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt -# -# # with SYCL support -# GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt -# -# # with VULKAN support -# GG_BUILD_VULKAN=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt -# - -if [ -z "$2" ]; then - echo "usage: $0 " - exit 1 -fi - -mkdir -p "$1" -mkdir -p "$2" - -OUT=$(realpath "$1") -MNT=$(realpath "$2") - -rm -f "$OUT/*.log" -rm -f "$OUT/*.exit" -rm -f "$OUT/*.md" - -sd=`dirname $0` -cd $sd/../ -SRC=`pwd` - -CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON" - -if [ ! -z ${GG_BUILD_METAL} ]; then - CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON -DGGML_METAL_USE_BF16=ON" -fi - -if [ ! -z ${GG_BUILD_CUDA} ]; then - CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=native" -fi - -if [ ! -z ${GG_BUILD_SYCL} ]; then - if [ -z ${ONEAPI_ROOT} ]; then - echo "Not detected ONEAPI_ROOT, please install oneAPI base toolkit and enable it by:" - echo "source /opt/intel/oneapi/setvars.sh" - exit 1 - fi - - CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON" -fi - -if [ ! -z ${GG_BUILD_VULKAN} ]; then - CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1" -fi -## helpers - -# download a file if it does not exist or if it is outdated -function gg_wget { - local out=$1 - local url=$2 - - local cwd=`pwd` - - mkdir -p $out - cd $out - - # should not re-download if file is the same - wget -nv -N $url - - cd $cwd -} - -function gg_printf { - printf -- "$@" >> $OUT/README.md -} - -function gg_run { - ci=$1 - - set -o pipefail - set -x - - gg_run_$ci | tee $OUT/$ci.log - cur=$? - echo "$cur" > $OUT/$ci.exit - - set +x - set +o pipefail - - gg_sum_$ci - - ret=$((ret | cur)) -} - -## ci - -# ctest_debug - -function gg_run_ctest_debug { - cd ${SRC} - - rm -rf build-ci-debug && mkdir build-ci-debug && cd build-ci-debug - - set -e - - # Check cmake, make and ctest are installed - gg_check_build_requirements - - (time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log - (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log - - (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log - - set +e -} - -function gg_sum_ctest_debug { - gg_printf '### %s\n\n' "${ci}" - - gg_printf 'Runs ctest in debug mode\n' - gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" - gg_printf '```\n' - gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" - gg_printf '```\n' - gg_printf '\n' -} - -# ctest_release - -function gg_run_ctest_release { - cd ${SRC} - - rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release - - set -e - - # Check cmake, make and ctest are installed - gg_check_build_requirements - - (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log - (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log - - if [ -z ${GG_BUILD_LOW_PERF} ]; then - (time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log - else - (time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log - fi - - set +e -} - -function gg_sum_ctest_release { - gg_printf '### %s\n\n' "${ci}" - - gg_printf 'Runs ctest in release mode\n' - gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" - gg_printf '```\n' - gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" - gg_printf '```\n' -} - -# test_scripts_debug - -function gg_run_test_scripts_debug { - cd ${SRC} - - set -e - - (cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log - (cd ./examples/quantize && time bash tests.sh "$SRC/build-ci-debug/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log - - set +e -} - -function gg_sum_test_scripts_debug { - gg_printf '### %s\n\n' "${ci}" - - gg_printf 'Runs test scripts in debug mode\n' - gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" - gg_printf '```\n' - gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)" - gg_printf '```\n' - gg_printf '\n' -} - -# test_scripts_release - -function gg_run_test_scripts_release { - cd ${SRC} - - set -e - - (cd ./examples/gguf-split && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log - (cd ./examples/quantize && time bash tests.sh "$SRC/build-ci-release/bin" "$MNT/models") 2>&1 | tee -a $OUT/${ci}-scripts.log - - set +e -} - -function gg_sum_test_scripts_release { - gg_printf '### %s\n\n' "${ci}" - - gg_printf 'Runs test scripts in release mode\n' - gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" - gg_printf '```\n' - gg_printf '%s\n' "$(cat $OUT/${ci}-scripts.log)" - gg_printf '```\n' - gg_printf '\n' -} - -function gg_get_model { - local gguf_0="$MNT/models/pythia/1.4B/ggml-model-f16.gguf" - local gguf_1="$MNT/models/pythia/2.8B/ggml-model-f16.gguf" - local gguf_2="$MNT/models/open-llama/7B-v2/ggml-model-f16.gguf" - if [[ -s $gguf_0 ]]; then - echo -n "$gguf_0" - elif [[ -s $gguf_1 ]]; then - echo -n "$gguf_1" - elif [[ -s $gguf_2 ]]; then - echo -n "$gguf_2" - else - echo >&2 "No model found. Can't run gg_run_ctest_with_model." - exit 1 - fi -} - -function gg_run_ctest_with_model_debug { - cd ${SRC} - - local model; model=$(gg_get_model) - cd build-ci-debug - set -e - (LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log - set +e - cd .. -} - -function gg_run_ctest_with_model_release { - cd ${SRC} - - local model; model=$(gg_get_model) - cd build-ci-release - set -e - (LLAMACPP_TEST_MODELFILE="$model" time ctest --output-on-failure -L model) 2>&1 | tee -a $OUT/${ci}-ctest.log - set +e - cd .. -} - -function gg_sum_ctest_with_model_debug { - gg_printf '### %s\n\n' "${ci}" - - gg_printf 'Runs ctest with model files in debug mode\n' - gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" - gg_printf '```\n' - gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" - gg_printf '```\n' -} - -function gg_sum_ctest_with_model_release { - gg_printf '### %s\n\n' "${ci}" - - gg_printf 'Runs ctest with model files in release mode\n' - gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" - gg_printf '```\n' - gg_printf '%s\n' "$(cat $OUT/${ci}-ctest.log)" - gg_printf '```\n' -} - -# open_llama_7b_v2 - -function gg_run_open_llama_7b_v2 { - cd ${SRC} - - gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/config.json - gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/tokenizer.model - gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/tokenizer_config.json - gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/special_tokens_map.json - gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/pytorch_model.bin.index.json - gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00001-of-00002.bin - gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/resolve/main/pytorch_model-00002-of-00002.bin - gg_wget models-mnt/open-llama/7B-v2/ https://huggingface.co/openlm-research/open_llama_7b_v2/raw/main/generation_config.json - - gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip - unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/ - - path_models="../models-mnt/open-llama/7B-v2" - path_wiki="../models-mnt/wikitext/wikitext-2-raw" - - rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release - - set -e - - (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log - (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log - - python3 ../examples/convert_legacy_llama.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf - - model_f16="${path_models}/ggml-model-f16.gguf" - model_q8_0="${path_models}/ggml-model-q8_0.gguf" - model_q4_0="${path_models}/ggml-model-q4_0.gguf" - model_q4_1="${path_models}/ggml-model-q4_1.gguf" - model_q5_0="${path_models}/ggml-model-q5_0.gguf" - model_q5_1="${path_models}/ggml-model-q5_1.gguf" - model_q2_k="${path_models}/ggml-model-q2_k.gguf" - model_q3_k="${path_models}/ggml-model-q3_k.gguf" - model_q4_k="${path_models}/ggml-model-q4_k.gguf" - model_q5_k="${path_models}/ggml-model-q5_k.gguf" - model_q6_k="${path_models}/ggml-model-q6_k.gguf" - - wiki_test="${path_wiki}/wiki.test.raw" - - ./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0 - ./bin/llama-quantize ${model_f16} ${model_q4_0} q4_0 - ./bin/llama-quantize ${model_f16} ${model_q4_1} q4_1 - ./bin/llama-quantize ${model_f16} ${model_q5_0} q5_0 - ./bin/llama-quantize ${model_f16} ${model_q5_1} q5_1 - ./bin/llama-quantize ${model_f16} ${model_q2_k} q2_k - ./bin/llama-quantize ${model_f16} ${model_q3_k} q3_k - ./bin/llama-quantize ${model_f16} ${model_q4_k} q4_k - ./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k - ./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k - - (time ./bin/llama-cli -no-cnv --model ${model_f16} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log - (time ./bin/llama-cli -no-cnv --model ${model_q8_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log - (time ./bin/llama-cli -no-cnv --model ${model_q4_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log - (time ./bin/llama-cli -no-cnv --model ${model_q4_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log - (time ./bin/llama-cli -no-cnv --model ${model_q5_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log - (time ./bin/llama-cli -no-cnv --model ${model_q5_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log - (time ./bin/llama-cli -no-cnv --model ${model_q2_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log - (time ./bin/llama-cli -no-cnv --model ${model_q3_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log - (time ./bin/llama-cli -no-cnv --model ${model_q4_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log - (time ./bin/llama-cli -no-cnv --model ${model_q5_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log - (time ./bin/llama-cli -no-cnv --model ${model_q6_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log - - (time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log - (time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log - (time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log - (time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log - (time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log - (time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log - (time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log - (time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log - (time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log - (time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log - (time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log - - (time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log - - (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log - (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log - (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log - (time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log - - function check_ppl { - qnt="$1" - ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1) - - if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then - printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl" - return 20 - fi - - printf ' - %s @ %s OK\n' "$qnt" "$ppl" - return 0 - } - - check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - - cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log - - set +e -} - -function gg_sum_open_llama_7b_v2 { - gg_printf '### %s\n\n' "${ci}" - - gg_printf 'OpenLLaMA 7B-v2:\n' - gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" - gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)" - gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)" - gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)" - gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)" - gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)" - gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)" - gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)" - gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)" - gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)" - gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)" - gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)" - gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)" - gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)" - gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)" -} - -# pythia_1.4b - -function gg_run_pythia_1_4b { - cd ${SRC} - - gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/config.json - gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/tokenizer.json - gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/tokenizer_config.json - gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/raw/main/special_tokens_map.json - gg_wget models-mnt/pythia/1.4B/ https://huggingface.co/EleutherAI/pythia-1.4b/resolve/main/pytorch_model.bin - - gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip - unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/ - head -n 60 models-mnt/wikitext/wikitext-2-raw/wiki.test.raw > models-mnt/wikitext/wikitext-2-raw/wiki.test-60.raw - - path_models="../models-mnt/pythia/1.4B" - path_wiki="../models-mnt/wikitext/wikitext-2-raw" - - rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release - - set -e - - (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log - (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log - - python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf - - model_f16="${path_models}/ggml-model-f16.gguf" - model_q8_0="${path_models}/ggml-model-q8_0.gguf" - model_q4_0="${path_models}/ggml-model-q4_0.gguf" - model_q4_1="${path_models}/ggml-model-q4_1.gguf" - model_q5_0="${path_models}/ggml-model-q5_0.gguf" - model_q5_1="${path_models}/ggml-model-q5_1.gguf" - model_q2_k="${path_models}/ggml-model-q2_k.gguf" - model_q3_k="${path_models}/ggml-model-q3_k.gguf" - model_q4_k="${path_models}/ggml-model-q4_k.gguf" - model_q5_k="${path_models}/ggml-model-q5_k.gguf" - model_q6_k="${path_models}/ggml-model-q6_k.gguf" - - wiki_test_60="${path_wiki}/wiki.test-60.raw" - - ./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0 - ./bin/llama-quantize ${model_f16} ${model_q4_0} q4_0 - ./bin/llama-quantize ${model_f16} ${model_q4_1} q4_1 - ./bin/llama-quantize ${model_f16} ${model_q5_0} q5_0 - ./bin/llama-quantize ${model_f16} ${model_q5_1} q5_1 - ./bin/llama-quantize ${model_f16} ${model_q2_k} q2_k - ./bin/llama-quantize ${model_f16} ${model_q3_k} q3_k - ./bin/llama-quantize ${model_f16} ${model_q4_k} q4_k - ./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k - ./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k - - (time ./bin/llama-cli -no-cnv --model ${model_f16} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log - (time ./bin/llama-cli -no-cnv --model ${model_q8_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log - (time ./bin/llama-cli -no-cnv --model ${model_q4_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log - (time ./bin/llama-cli -no-cnv --model ${model_q4_1} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log - (time ./bin/llama-cli -no-cnv --model ${model_q5_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log - (time ./bin/llama-cli -no-cnv --model ${model_q5_1} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log - (time ./bin/llama-cli -no-cnv --model ${model_q2_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log - (time ./bin/llama-cli -no-cnv --model ${model_q3_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log - (time ./bin/llama-cli -no-cnv --model ${model_q4_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log - (time ./bin/llama-cli -no-cnv --model ${model_q5_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log - (time ./bin/llama-cli -no-cnv --model ${model_q6_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log - - (time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log - (time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log - (time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log - (time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log - (time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log - (time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log - (time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log - (time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log - (time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log - (time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log - (time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log - - (time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log - - (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log - (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log - - function check_ppl { - qnt="$1" - ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1) - - if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then - printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl" - return 20 - fi - - printf ' - %s @ %s OK\n' "$qnt" "$ppl" - return 0 - } - - check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - #check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log # note: ppl > 20.0 for this quant and model - check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - - cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log - - set +e -} - -function gg_sum_pythia_1_4b { - gg_printf '### %s\n\n' "${ci}" - - gg_printf 'Pythia 1.4B:\n' - gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" - gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)" - gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)" - gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)" - gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)" - gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)" - gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)" - gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)" - gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)" - gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)" - gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)" - gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)" - gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)" - gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)" - gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)" -} - -# pythia_2_8b - -function gg_run_pythia_2_8b { - cd ${SRC} - - gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/config.json - gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/tokenizer.json - gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/tokenizer_config.json - gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/raw/main/special_tokens_map.json - gg_wget models-mnt/pythia/2.8B/ https://huggingface.co/EleutherAI/pythia-2.8b/resolve/main/pytorch_model.bin - - gg_wget models-mnt/wikitext/ https://huggingface.co/datasets/ggml-org/ci/resolve/main/wikitext-2-raw-v1.zip - unzip -o models-mnt/wikitext/wikitext-2-raw-v1.zip -d models-mnt/wikitext/ - - path_models="../models-mnt/pythia/2.8B" - path_wiki="../models-mnt/wikitext/wikitext-2-raw" - - rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release - - set -e - - (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log - (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log - - python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf - - model_f16="${path_models}/ggml-model-f16.gguf" - model_q8_0="${path_models}/ggml-model-q8_0.gguf" - model_q4_0="${path_models}/ggml-model-q4_0.gguf" - model_q4_1="${path_models}/ggml-model-q4_1.gguf" - model_q5_0="${path_models}/ggml-model-q5_0.gguf" - model_q5_1="${path_models}/ggml-model-q5_1.gguf" - model_q2_k="${path_models}/ggml-model-q2_k.gguf" - model_q3_k="${path_models}/ggml-model-q3_k.gguf" - model_q4_k="${path_models}/ggml-model-q4_k.gguf" - model_q5_k="${path_models}/ggml-model-q5_k.gguf" - model_q6_k="${path_models}/ggml-model-q6_k.gguf" - - wiki_test="${path_wiki}/wiki.test.raw" - - ./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0 - ./bin/llama-quantize ${model_f16} ${model_q4_0} q4_0 - ./bin/llama-quantize ${model_f16} ${model_q4_1} q4_1 - ./bin/llama-quantize ${model_f16} ${model_q5_0} q5_0 - ./bin/llama-quantize ${model_f16} ${model_q5_1} q5_1 - ./bin/llama-quantize ${model_f16} ${model_q2_k} q2_k - ./bin/llama-quantize ${model_f16} ${model_q3_k} q3_k - ./bin/llama-quantize ${model_f16} ${model_q4_k} q4_k - ./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k - ./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k - - (time ./bin/llama-cli -no-cnv --model ${model_f16} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log - (time ./bin/llama-cli -no-cnv --model ${model_q8_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log - (time ./bin/llama-cli -no-cnv --model ${model_q4_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log - (time ./bin/llama-cli -no-cnv --model ${model_q4_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log - (time ./bin/llama-cli -no-cnv --model ${model_q5_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log - (time ./bin/llama-cli -no-cnv --model ${model_q5_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log - (time ./bin/llama-cli -no-cnv --model ${model_q2_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log - (time ./bin/llama-cli -no-cnv --model ${model_q3_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log - (time ./bin/llama-cli -no-cnv --model ${model_q4_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log - (time ./bin/llama-cli -no-cnv --model ${model_q5_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log - (time ./bin/llama-cli -no-cnv --model ${model_q6_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log - - (time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log - (time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log - (time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log - (time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log - (time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log - (time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log - (time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log - (time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log - (time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log - (time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log - (time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log - - (time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log - - (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log - (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log - (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log - (time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log - - function check_ppl { - qnt="$1" - ppl=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1) - - if [ $(echo "$ppl > 20.0" | bc) -eq 1 ]; then - printf ' - %s @ %s (FAIL: ppl > 20.0)\n' "$qnt" "$ppl" - return 20 - fi - - printf ' - %s @ %s OK\n' "$qnt" "$ppl" - return 0 - } - - check_ppl "f16" "$(cat $OUT/${ci}-tg-f16.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q8_0" "$(cat $OUT/${ci}-tg-q8_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q4_0" "$(cat $OUT/${ci}-tg-q4_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q4_1" "$(cat $OUT/${ci}-tg-q4_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q5_0" "$(cat $OUT/${ci}-tg-q5_0.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q5_1" "$(cat $OUT/${ci}-tg-q5_1.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - #check_ppl "q2_k" "$(cat $OUT/${ci}-tg-q2_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log # note: ppl > 20.0 for this quant and model - check_ppl "q3_k" "$(cat $OUT/${ci}-tg-q3_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q4_k" "$(cat $OUT/${ci}-tg-q4_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q5_k" "$(cat $OUT/${ci}-tg-q5_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - check_ppl "q6_k" "$(cat $OUT/${ci}-tg-q6_k.log | grep "^\[1\]")" | tee -a $OUT/${ci}-ppl.log - - cat $OUT/${ci}-imatrix.log | grep "Final" >> $OUT/${ci}-imatrix-sum.log - - set +e -} - -function gg_sum_pythia_2_8b { - gg_printf '### %s\n\n' "${ci}" - - gg_printf 'Pythia 2.8B:\n' - gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" - gg_printf '- perplexity:\n%s\n' "$(cat $OUT/${ci}-ppl.log)" - gg_printf '- imatrix:\n```\n%s\n```\n' "$(cat $OUT/${ci}-imatrix-sum.log)" - gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)" - gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)" - gg_printf '- q4_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_0.log)" - gg_printf '- q4_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_1.log)" - gg_printf '- q5_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_0.log)" - gg_printf '- q5_1:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_1.log)" - gg_printf '- q2_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q2_k.log)" - gg_printf '- q3_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q3_k.log)" - gg_printf '- q4_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q4_k.log)" - gg_printf '- q5_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q5_k.log)" - gg_printf '- q6_k:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q6_k.log)" - gg_printf '- save-load-state: \n```\n%s\n```\n' "$(cat $OUT/${ci}-save-load-state.log)" -} - -# bge-small - -function gg_run_embd_bge_small { - cd ${SRC} - - gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/config.json - gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/tokenizer.json - gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/tokenizer_config.json - gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/special_tokens_map.json - gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/resolve/main/pytorch_model.bin - gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/sentence_bert_config.json - gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/vocab.txt - gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/modules.json - gg_wget models-mnt/bge-small/ https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/config.json - - gg_wget models-mnt/bge-small/1_Pooling https://huggingface.co/BAAI/bge-small-en-v1.5/raw/main/1_Pooling/config.json - - path_models="../models-mnt/bge-small" - - rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release - - set -e - - (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log - (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log - - python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf - - model_f16="${path_models}/ggml-model-f16.gguf" - model_q8_0="${path_models}/ggml-model-q8_0.gguf" - - ./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0 - - (time ./bin/llama-embedding --model ${model_f16} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log - (time ./bin/llama-embedding --model ${model_q8_0} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log - - set +e -} - -function gg_sum_embd_bge_small { - gg_printf '### %s\n\n' "${ci}" - - gg_printf 'BGE Small (BERT):\n' - gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" - gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-f16.log)" - gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)" -} - -# rerank_tiny - -function gg_run_rerank_tiny { - cd ${SRC} - - gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/config.json - gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/tokenizer.json - gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/tokenizer_config.json - gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/special_tokens_map.json - gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/resolve/main/pytorch_model.bin - gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/sentence_bert_config.json - gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/vocab.txt - gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/modules.json - gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/config.json - - gg_wget models-mnt/rerank-tiny/1_Pooling https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/1_Pooling/config.json - - path_models="../models-mnt/rerank-tiny" - - rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release - - set -e - - (time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log - (time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log - - python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf - - model_f16="${path_models}/ggml-model-f16.gguf" - - # for this model, the SEP token is "" - (time ./bin/llama-embedding --model ${model_f16} -p "what is panda?hi\nwhat is panda?it's a bear\nwhat is panda?The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log - - # sample output - # rerank score 0: 0.029 - # rerank score 1: 0.029 - # rerank score 2: 0.135 - - # check that the score is in the range [$3, $4] - function check_score { - qnt="$1" - score=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1) - - if [ $(echo "$score < $3" | bc) -eq 1 ] || [ $(echo "$score > $4" | bc) -eq 1 ]; then - printf ' - %s @ %s (FAIL: score not in range [%s, %s])\n' "$qnt" "$score" "$3" "$4" - return 20 - fi - - printf ' - %s @ %s OK\n' "$qnt" "$score" - return 0 - } - - check_score "rerank score 0" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 0")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log - check_score "rerank score 1" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 1")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log - check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.30" | tee -a $OUT/${ci}-rk-f16.log - - set +e -} - -function gg_sum_rerank_tiny { - gg_printf '### %s\n\n' "${ci}" - - gg_printf 'Rerank Tiny (Jina):\n' - gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)" - gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-rk-f16.log)" -} - -function gg_check_build_requirements { - if ! command -v cmake &> /dev/null; then - gg_printf 'cmake not found, please install' - fi - - if ! command -v make &> /dev/null; then - gg_printf 'make not found, please install' - fi - - if ! command -v ctest &> /dev/null; then - gg_printf 'ctest not found, please install' - fi -} - -## main - -export LLAMA_LOG_PREFIX=1 -export LLAMA_LOG_TIMESTAMPS=1 - -if [ -z ${GG_BUILD_LOW_PERF} ]; then - # Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt - rm -rf ${SRC}/models-mnt - mnt_models=${MNT}/models - mkdir -p ${mnt_models} - ln -sfn ${mnt_models} ${SRC}/models-mnt - - # Create a fresh python3 venv and enter it - if ! python3 -m venv "$MNT/venv"; then - echo "Error: Failed to create Python virtual environment at $MNT/venv." - exit 1 - fi - source "$MNT/venv/bin/activate" - - pip install -r ${SRC}/requirements.txt --disable-pip-version-check - pip install --editable gguf-py --disable-pip-version-check -fi - -ret=0 - -test $ret -eq 0 && gg_run ctest_debug -test $ret -eq 0 && gg_run ctest_release - -if [ -z ${GG_BUILD_LOW_PERF} ]; then - test $ret -eq 0 && gg_run embd_bge_small - test $ret -eq 0 && gg_run rerank_tiny - - if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then - test $ret -eq 0 && gg_run test_scripts_debug - test $ret -eq 0 && gg_run test_scripts_release - fi - - if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then - if [ -z ${GG_BUILD_CUDA} ] && [ -z ${GG_BUILD_VULKAN} ]; then - test $ret -eq 0 && gg_run pythia_1_4b - else - test $ret -eq 0 && gg_run pythia_2_8b - #test $ret -eq 0 && gg_run open_llama_7b_v2 - fi - test $ret -eq 0 && gg_run ctest_with_model_debug - test $ret -eq 0 && gg_run ctest_with_model_release - fi -fi - -exit $ret diff --git a/cmake/arm64-apple-clang.cmake b/cmake/arm64-apple-clang.cmake deleted file mode 100644 index 5fcd2882a..000000000 --- a/cmake/arm64-apple-clang.cmake +++ /dev/null @@ -1,16 +0,0 @@ -set( CMAKE_SYSTEM_NAME Darwin ) -set( CMAKE_SYSTEM_PROCESSOR arm64 ) - -set( target arm64-apple-darwin-macho ) - -set( CMAKE_C_COMPILER clang ) -set( CMAKE_CXX_COMPILER clang++ ) - -set( CMAKE_C_COMPILER_TARGET ${target} ) -set( CMAKE_CXX_COMPILER_TARGET ${target} ) - -set( arch_c_flags "-march=armv8.4-a -fvectorize -ffp-model=fast -fno-finite-math-only" ) -set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function" ) - -set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" ) -set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" ) diff --git a/cmake/arm64-windows-llvm.cmake b/cmake/arm64-windows-llvm.cmake deleted file mode 100644 index 802379680..000000000 --- a/cmake/arm64-windows-llvm.cmake +++ /dev/null @@ -1,16 +0,0 @@ -set( CMAKE_SYSTEM_NAME Windows ) -set( CMAKE_SYSTEM_PROCESSOR arm64 ) - -set( target arm64-pc-windows-msvc ) - -set( CMAKE_C_COMPILER clang ) -set( CMAKE_CXX_COMPILER clang++ ) - -set( CMAKE_C_COMPILER_TARGET ${target} ) -set( CMAKE_CXX_COMPILER_TARGET ${target} ) - -set( arch_c_flags "-march=armv8.7-a -fvectorize -ffp-model=fast -fno-finite-math-only" ) -set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function -Wno-gnu-zero-variadic-macro-arguments" ) - -set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" ) -set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" ) diff --git a/cmake/arm64-windows-msvc.cmake b/cmake/arm64-windows-msvc.cmake deleted file mode 100644 index c77631420..000000000 --- a/cmake/arm64-windows-msvc.cmake +++ /dev/null @@ -1,6 +0,0 @@ -set( CMAKE_SYSTEM_NAME Windows ) -set( CMAKE_SYSTEM_PROCESSOR arm64 ) - -set( target arm64-pc-windows-msvc ) -set( CMAKE_C_COMPILER_TARGET ${target} ) -set( CMAKE_CXX_COMPILER_TARGET ${target} ) diff --git a/cmake/build-info.cmake b/cmake/build-info.cmake deleted file mode 100644 index c1a456e17..000000000 --- a/cmake/build-info.cmake +++ /dev/null @@ -1,58 +0,0 @@ -set(BUILD_NUMBER 0) -set(BUILD_COMMIT "unknown") -set(BUILD_COMPILER "unknown") -set(BUILD_TARGET "unknown") - -# Look for git -find_package(Git) -if(NOT Git_FOUND) - find_program(GIT_EXECUTABLE NAMES git git.exe) - if(GIT_EXECUTABLE) - set(Git_FOUND TRUE) - message(STATUS "Found Git: ${GIT_EXECUTABLE}") - else() - message(WARNING "Git not found. Build info will not be accurate.") - endif() -endif() - -# Get the commit count and hash -if(Git_FOUND) - execute_process( - COMMAND ${GIT_EXECUTABLE} rev-parse --short HEAD - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - OUTPUT_VARIABLE HEAD - OUTPUT_STRIP_TRAILING_WHITESPACE - RESULT_VARIABLE RES - ) - if (RES EQUAL 0) - set(BUILD_COMMIT ${HEAD}) - endif() - execute_process( - COMMAND ${GIT_EXECUTABLE} rev-list --count HEAD - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} - OUTPUT_VARIABLE COUNT - OUTPUT_STRIP_TRAILING_WHITESPACE - RESULT_VARIABLE RES - ) - if (RES EQUAL 0) - set(BUILD_NUMBER ${COUNT}) - endif() -endif() - -if(MSVC) - set(BUILD_COMPILER "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}") - set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME}) -else() - execute_process( - COMMAND sh -c "\"$@\" --version | head -1" _ ${CMAKE_C_COMPILER} - OUTPUT_VARIABLE OUT - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - set(BUILD_COMPILER ${OUT}) - execute_process( - COMMAND ${CMAKE_C_COMPILER} -dumpmachine - OUTPUT_VARIABLE OUT - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - set(BUILD_TARGET ${OUT}) -endif() diff --git a/cmake/common.cmake b/cmake/common.cmake deleted file mode 100644 index 0f54871e4..000000000 --- a/cmake/common.cmake +++ /dev/null @@ -1,33 +0,0 @@ -function(llama_add_compile_flags) - if (LLAMA_FATAL_WARNINGS) - if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") - list(APPEND C_FLAGS -Werror) - list(APPEND CXX_FLAGS -Werror) - elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") - add_compile_options(/WX) - endif() - endif() - - if (LLAMA_ALL_WARNINGS) - if (NOT MSVC) - list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes - -Werror=implicit-int -Werror=implicit-function-declaration) - - list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn) - - list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function) - - list(APPEND C_FLAGS ${WARNING_FLAGS}) - list(APPEND CXX_FLAGS ${WARNING_FLAGS}) - - ggml_get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}) - - add_compile_options("$<$:${C_FLAGS};${GF_C_FLAGS}>" - "$<$:${CXX_FLAGS};${GF_CXX_FLAGS}>") - else() - # todo : msvc - set(C_FLAGS "" PARENT_SCOPE) - set(CXX_FLAGS "" PARENT_SCOPE) - endif() - endif() -endfunction() diff --git a/cmake/git-vars.cmake b/cmake/git-vars.cmake deleted file mode 100644 index 1a4c24ebf..000000000 --- a/cmake/git-vars.cmake +++ /dev/null @@ -1,22 +0,0 @@ -find_package(Git) - -# the commit's SHA1 -execute_process(COMMAND - "${GIT_EXECUTABLE}" describe --match=NeVeRmAtCh --always --abbrev=8 - WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_SHA1 - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - -# the date of the commit -execute_process(COMMAND - "${GIT_EXECUTABLE}" log -1 --format=%ad --date=local - WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_DATE - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) - -# the subject of the commit -execute_process(COMMAND - "${GIT_EXECUTABLE}" log -1 --format=%s - WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}" - OUTPUT_VARIABLE GIT_COMMIT_SUBJECT - ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/cmake/llama-config.cmake.in b/cmake/llama-config.cmake.in deleted file mode 100644 index 90cbec5b6..000000000 --- a/cmake/llama-config.cmake.in +++ /dev/null @@ -1,30 +0,0 @@ -set(LLAMA_VERSION @LLAMA_INSTALL_VERSION@) -set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@) -set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@) -set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@) - -@PACKAGE_INIT@ - -set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@") -set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@") -set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@") - -find_package(ggml REQUIRED HINTS ${LLAMA_LIB_DIR}/cmake) - -find_library(llama_LIBRARY llama - REQUIRED - HINTS ${LLAMA_LIB_DIR} - NO_CMAKE_FIND_ROOT_PATH -) - -add_library(llama UNKNOWN IMPORTED) -set_target_properties(llama - PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}" - INTERFACE_LINK_LIBRARIES "ggml::ggml;ggml::ggml-base;" - IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" - IMPORTED_LOCATION "${llama_LIBRARY}" - INTERFACE_COMPILE_FEATURES c_std_90 - POSITION_INDEPENDENT_CODE ON) - -check_required_components(Llama) diff --git a/cmake/llama.pc.in b/cmake/llama.pc.in deleted file mode 100644 index 6fb58b5f6..000000000 --- a/cmake/llama.pc.in +++ /dev/null @@ -1,10 +0,0 @@ -prefix=@CMAKE_INSTALL_PREFIX@ -exec_prefix=@CMAKE_INSTALL_PREFIX@ -libdir=@CMAKE_INSTALL_FULL_LIBDIR@ -includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@ - -Name: llama -Description: Port of Facebook's LLaMA model in C/C++ -Version: @LLAMA_INSTALL_VERSION@ -Libs: -L${libdir} -lggml -lggml-base -lllama -Cflags: -I${includedir} diff --git a/cmake/x64-windows-llvm.cmake b/cmake/x64-windows-llvm.cmake deleted file mode 100644 index 0603d738f..000000000 --- a/cmake/x64-windows-llvm.cmake +++ /dev/null @@ -1,11 +0,0 @@ -set( CMAKE_SYSTEM_NAME Windows ) -set( CMAKE_SYSTEM_PROCESSOR x86_64 ) - -set( CMAKE_C_COMPILER clang ) -set( CMAKE_CXX_COMPILER clang++ ) - -set( arch_c_flags "-march=native" ) - -set( CMAKE_C_FLAGS_INIT "${arch_c_flags}" ) -set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags}" ) - diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt deleted file mode 100644 index e61015d2a..000000000 --- a/common/CMakeLists.txt +++ /dev/null @@ -1,124 +0,0 @@ -# common - -find_package(Threads REQUIRED) - -llama_add_compile_flags() - -# Build info header -# - -if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/../.git") - set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../.git") - - # Is git submodule - if(NOT IS_DIRECTORY "${GIT_DIR}") - file(READ ${GIT_DIR} REAL_GIT_DIR_LINK) - string(REGEX REPLACE "gitdir: (.*)\n$" "\\1" REAL_GIT_DIR ${REAL_GIT_DIR_LINK}) - string(FIND "${REAL_GIT_DIR}" "/" SLASH_POS) - if (SLASH_POS EQUAL 0) - set(GIT_DIR "${REAL_GIT_DIR}") - else() - set(GIT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/../${REAL_GIT_DIR}") - endif() - endif() - - if(EXISTS "${GIT_DIR}/index") - set(GIT_INDEX "${GIT_DIR}/index") - else() - message(WARNING "Git index not found in git repository.") - set(GIT_INDEX "") - endif() -else() - message(WARNING "Git repository not found; to enable automatic generation of build info, make sure Git is installed and the project is a Git repository.") - set(GIT_INDEX "") -endif() - -# Add a custom command to rebuild build-info.cpp when .git/index changes -add_custom_command( - OUTPUT "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp" - COMMENT "Generating build details from Git" - COMMAND ${CMAKE_COMMAND} -DMSVC=${MSVC} -DCMAKE_C_COMPILER_VERSION=${CMAKE_C_COMPILER_VERSION} - -DCMAKE_C_COMPILER_ID=${CMAKE_C_COMPILER_ID} -DCMAKE_VS_PLATFORM_NAME=${CMAKE_VS_PLATFORM_NAME} - -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -P "${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info-gen-cpp.cmake" - WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.." - DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/build-info.cpp.in" ${GIT_INDEX} - VERBATIM -) -set(TARGET build_info) -add_library(${TARGET} OBJECT build-info.cpp) -if (BUILD_SHARED_LIBS) - set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) -endif() - -set(TARGET common) - -add_library(${TARGET} STATIC - arg.cpp - arg.h - base64.hpp - chat.cpp - chat.hpp - chat-template.hpp - common.cpp - common.h - console.cpp - console.h - json-schema-to-grammar.cpp - json.hpp - llguidance.cpp - log.cpp - log.h - minja.hpp - ngram-cache.cpp - ngram-cache.h - sampling.cpp - sampling.h - speculative.cpp - speculative.h - ) - -if (BUILD_SHARED_LIBS) - set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON) -endif() - -set(LLAMA_COMMON_EXTRA_LIBS build_info) - -# Use curl to download model url -if (LLAMA_CURL) - find_package(CURL REQUIRED) - target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL) - include_directories(${CURL_INCLUDE_DIRS}) - find_library(CURL_LIBRARY curl REQUIRED) - set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY}) -endif () - -if (LLAMA_LLGUIDANCE) - include(ExternalProject) - set(LLGUIDANCE_SRC ${CMAKE_BINARY_DIR}/llguidance/source) - set(LLGUIDANCE_PATH ${LLGUIDANCE_SRC}/target/release) - ExternalProject_Add(llguidance_ext - GIT_REPOSITORY https://github.com/guidance-ai/llguidance - # v0.6.12: - GIT_TAG ced1c9023d47ec194fa977932d35ce65c2ebfc09 - PREFIX ${CMAKE_BINARY_DIR}/llguidance - SOURCE_DIR ${LLGUIDANCE_SRC} - BUILD_IN_SOURCE TRUE - CONFIGURE_COMMAND "" - BUILD_COMMAND cargo build --release - INSTALL_COMMAND "" - BUILD_BYPRODUCTS ${LLGUIDANCE_PATH}/libllguidance.a ${LLGUIDANCE_PATH}/llguidance.h - UPDATE_COMMAND "" - ) - target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_LLGUIDANCE) - - add_library(llguidance STATIC IMPORTED) - set_target_properties(llguidance PROPERTIES IMPORTED_LOCATION ${LLGUIDANCE_PATH}/libllguidance.a) - add_dependencies(llguidance llguidance_ext) - - target_include_directories(${TARGET} PRIVATE ${LLGUIDANCE_PATH}) - set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance) -endif () - -target_include_directories(${TARGET} PUBLIC .) -target_compile_features (${TARGET} PUBLIC cxx_std_17) -target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads) diff --git a/common/arg.cpp b/common/arg.cpp deleted file mode 100644 index 152f671ab..000000000 --- a/common/arg.cpp +++ /dev/null @@ -1,2370 +0,0 @@ -#include "arg.h" - -#include "log.h" -#include "sampling.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "json-schema-to-grammar.h" - -using json = nlohmann::ordered_json; - -common_arg & common_arg::set_examples(std::initializer_list examples) { - this->examples = std::move(examples); - return *this; -} - -common_arg & common_arg::set_excludes(std::initializer_list excludes) { - this->excludes = std::move(excludes); - return *this; -} - -common_arg & common_arg::set_env(const char * env) { - help = help + "\n(env: " + env + ")"; - this->env = env; - return *this; -} - -common_arg & common_arg::set_sparam() { - is_sparam = true; - return *this; -} - -bool common_arg::in_example(enum llama_example ex) { - return examples.find(ex) != examples.end(); -} - -bool common_arg::is_exclude(enum llama_example ex) { - return excludes.find(ex) != excludes.end(); -} - -bool common_arg::get_value_from_env(std::string & output) { - if (env == nullptr) return false; - char * value = std::getenv(env); - if (value) { - output = value; - return true; - } - return false; -} - -bool common_arg::has_value_from_env() { - return env != nullptr && std::getenv(env); -} - -static std::vector break_str_into_lines(std::string input, size_t max_char_per_line) { - std::vector result; - std::istringstream iss(input); - std::string line; - auto add_line = [&](const std::string& l) { - if (l.length() <= max_char_per_line) { - result.push_back(l); - } else { - std::istringstream line_stream(l); - std::string word, current_line; - while (line_stream >> word) { - if (current_line.length() + !current_line.empty() + word.length() > max_char_per_line) { - if (!current_line.empty()) result.push_back(current_line); - current_line = word; - } else { - current_line += (!current_line.empty() ? " " : "") + word; - } - } - if (!current_line.empty()) result.push_back(current_line); - } - }; - while (std::getline(iss, line)) { - add_line(line); - } - return result; -} - -std::string common_arg::to_string() { - // params for printing to console - const static int n_leading_spaces = 40; - const static int n_char_per_line_help = 70; // TODO: detect this based on current console - std::string leading_spaces(n_leading_spaces, ' '); - - std::ostringstream ss; - for (const auto arg : args) { - if (arg == args.front()) { - if (args.size() == 1) { - ss << arg; - } else { - // first arg is usually abbreviation, we need padding to make it more beautiful - auto tmp = std::string(arg) + ", "; - auto spaces = std::string(std::max(0, 7 - (int)tmp.size()), ' '); - ss << tmp << spaces; - } - } else { - ss << arg << (arg != args.back() ? ", " : ""); - } - } - if (value_hint) ss << " " << value_hint; - if (value_hint_2) ss << " " << value_hint_2; - if (ss.tellp() > n_leading_spaces - 3) { - // current line is too long, add new line - ss << "\n" << leading_spaces; - } else { - // padding between arg and help, same line - ss << std::string(leading_spaces.size() - ss.tellp(), ' '); - } - const auto help_lines = break_str_into_lines(help, n_char_per_line_help); - for (const auto & line : help_lines) { - ss << (&line == &help_lines.front() ? "" : leading_spaces) << line << "\n"; - } - return ss.str(); -} - -// -// utils -// - -static void common_params_handle_model_default( - std::string & model, - const std::string & model_url, - std::string & hf_repo, - std::string & hf_file, - const std::string & hf_token, - const std::string & model_default) { - if (!hf_repo.empty()) { - // short-hand to avoid specifying --hf-file -> default it to --model - if (hf_file.empty()) { - if (model.empty()) { - auto auto_detected = common_get_hf_file(hf_repo, hf_token); - if (auto_detected.first.empty() || auto_detected.second.empty()) { - exit(1); // built without CURL, error message already printed - } - hf_repo = auto_detected.first; - hf_file = auto_detected.second; - } else { - hf_file = model; - } - } - // make sure model path is present (for caching purposes) - if (model.empty()) { - // this is to avoid different repo having same file name, or same file name in different subdirs - std::string filename = hf_repo + "_" + hf_file; - // to make sure we don't have any slashes in the filename - string_replace_all(filename, "/", "_"); - model = fs_get_cache_file(filename); - } - } else if (!model_url.empty()) { - if (model.empty()) { - auto f = string_split(model_url, '#').front(); - f = string_split(f, '?').front(); - model = fs_get_cache_file(string_split(f, '/').back()); - } - } else if (model.empty()) { - model = model_default; - } -} - -const std::vector kv_cache_types = { - GGML_TYPE_F32, - GGML_TYPE_F16, - GGML_TYPE_BF16, - GGML_TYPE_Q8_0, - GGML_TYPE_Q4_0, - GGML_TYPE_Q4_1, - GGML_TYPE_IQ4_NL, - GGML_TYPE_Q5_0, - GGML_TYPE_Q5_1, -}; - -static ggml_type kv_cache_type_from_str(const std::string & s) { - for (const auto & type : kv_cache_types) { - if (ggml_type_name(type) == s) { - return type; - } - } - throw std::runtime_error("Unsupported cache type: " + s); -} - -static std::string get_all_kv_cache_types() { - std::ostringstream msg; - for (const auto & type : kv_cache_types) { - msg << ggml_type_name(type) << (&type == &kv_cache_types.back() ? "" : ", "); - } - return msg.str(); -} - -// -// CLI argument parsing functions -// - -static bool common_params_parse_ex(int argc, char ** argv, common_params_context & ctx_arg) { - std::string arg; - const std::string arg_prefix = "--"; - common_params & params = ctx_arg.params; - - std::unordered_map arg_to_options; - for (auto & opt : ctx_arg.options) { - for (const auto & arg : opt.args) { - arg_to_options[arg] = &opt; - } - } - - // handle environment variables - for (auto & opt : ctx_arg.options) { - std::string value; - if (opt.get_value_from_env(value)) { - try { - if (opt.handler_void && (value == "1" || value == "true")) { - opt.handler_void(params); - } - if (opt.handler_int) { - opt.handler_int(params, std::stoi(value)); - } - if (opt.handler_string) { - opt.handler_string(params, value); - continue; - } - } catch (std::exception & e) { - throw std::invalid_argument(string_format( - "error while handling environment variable \"%s\": %s\n\n", opt.env, e.what())); - } - } - } - - // handle command line arguments - auto check_arg = [&](int i) { - if (i+1 >= argc) { - throw std::invalid_argument("expected value for argument"); - } - }; - - for (int i = 1; i < argc; i++) { - const std::string arg_prefix = "--"; - - std::string arg = argv[i]; - if (arg.compare(0, arg_prefix.size(), arg_prefix) == 0) { - std::replace(arg.begin(), arg.end(), '_', '-'); - } - if (arg_to_options.find(arg) == arg_to_options.end()) { - throw std::invalid_argument(string_format("error: invalid argument: %s", arg.c_str())); - } - auto opt = *arg_to_options[arg]; - if (opt.has_value_from_env()) { - fprintf(stderr, "warn: %s environment variable is set, but will be overwritten by command line argument %s\n", opt.env, arg.c_str()); - } - try { - if (opt.handler_void) { - opt.handler_void(params); - continue; - } - - // arg with single value - check_arg(i); - std::string val = argv[++i]; - if (opt.handler_int) { - opt.handler_int(params, std::stoi(val)); - continue; - } - if (opt.handler_string) { - opt.handler_string(params, val); - continue; - } - - // arg with 2 values - check_arg(i); - std::string val2 = argv[++i]; - if (opt.handler_str_str) { - opt.handler_str_str(params, val, val2); - continue; - } - } catch (std::exception & e) { - throw std::invalid_argument(string_format( - "error while handling argument \"%s\": %s\n\n" - "usage:\n%s\n\nto show complete usage, run with -h", - arg.c_str(), e.what(), arg_to_options[arg]->to_string().c_str())); - } - } - - postprocess_cpu_params(params.cpuparams, nullptr); - postprocess_cpu_params(params.cpuparams_batch, ¶ms.cpuparams); - - postprocess_cpu_params(params.speculative.cpuparams, ¶ms.cpuparams); - postprocess_cpu_params(params.speculative.cpuparams_batch, ¶ms.cpuparams_batch); - - if (params.prompt_cache_all && (params.interactive || params.interactive_first)) { - throw std::invalid_argument("error: --prompt-cache-all not supported in interactive mode yet\n"); - } - - // TODO: refactor model params in a common struct - common_params_handle_model_default(params.model, params.model_url, params.hf_repo, params.hf_file, params.hf_token, DEFAULT_MODEL_PATH); - common_params_handle_model_default(params.speculative.model, params.speculative.model_url, params.speculative.hf_repo, params.speculative.hf_file, params.hf_token, ""); - common_params_handle_model_default(params.vocoder.model, params.vocoder.model_url, params.vocoder.hf_repo, params.vocoder.hf_file, params.hf_token, ""); - - if (params.escape) { - string_process_escapes(params.prompt); - string_process_escapes(params.input_prefix); - string_process_escapes(params.input_suffix); - for (auto & antiprompt : params.antiprompt) { - string_process_escapes(antiprompt); - } - for (auto & seq_breaker : params.sampling.dry_sequence_breakers) { - string_process_escapes(seq_breaker); - } - } - - if (!params.kv_overrides.empty()) { - params.kv_overrides.emplace_back(); - params.kv_overrides.back().key[0] = 0; - } - - if (params.reranking && params.embedding) { - throw std::invalid_argument("error: either --embedding or --reranking can be specified, but not both"); - } - - if (!params.chat_template.empty() && !common_chat_verify_template(params.chat_template, params.use_jinja)) { - throw std::runtime_error(string_format( - "error: the supplied chat template is not supported: %s%s\n", - params.chat_template.c_str(), - params.use_jinja ? "" : "\nnote: llama.cpp was started without --jinja, we only support commonly used templates" - )); - } - - return true; -} - -static void common_params_print_usage(common_params_context & ctx_arg) { - auto print_options = [](std::vector & options) { - for (common_arg * opt : options) { - printf("%s", opt->to_string().c_str()); - } - }; - - std::vector common_options; - std::vector sparam_options; - std::vector specific_options; - for (auto & opt : ctx_arg.options) { - // in case multiple LLAMA_EXAMPLE_* are set, we prioritize the LLAMA_EXAMPLE_* matching current example - if (opt.is_sparam) { - sparam_options.push_back(&opt); - } else if (opt.in_example(ctx_arg.ex)) { - specific_options.push_back(&opt); - } else { - common_options.push_back(&opt); - } - } - printf("----- common params -----\n\n"); - print_options(common_options); - printf("\n\n----- sampling params -----\n\n"); - print_options(sparam_options); - // TODO: maybe convert enum llama_example to string - printf("\n\n----- example-specific params -----\n\n"); - print_options(specific_options); -} - -static std::vector parse_device_list(const std::string & value) { - std::vector devices; - auto dev_names = string_split(value, ','); - if (dev_names.empty()) { - throw std::invalid_argument("no devices specified"); - } - if (dev_names.size() == 1 && dev_names[0] == "none") { - devices.push_back(nullptr); - } else { - for (const auto & device : dev_names) { - auto * dev = ggml_backend_dev_by_name(device.c_str()); - if (!dev || ggml_backend_dev_type(dev) != GGML_BACKEND_DEVICE_TYPE_GPU) { - throw std::invalid_argument(string_format("invalid device: %s", device.c_str())); - } - devices.push_back(dev); - } - devices.push_back(nullptr); - } - return devices; -} - -static void add_rpc_devices(std::string servers) { - auto rpc_servers = string_split(servers, ','); - if (rpc_servers.empty()) { - throw std::invalid_argument("no RPC servers specified"); - } - ggml_backend_reg_t rpc_reg = ggml_backend_reg_by_name("RPC"); - if (!rpc_reg) { - throw std::invalid_argument("failed to find RPC backend"); - } - typedef ggml_backend_dev_t (*ggml_backend_rpc_add_device_t)(const char * endpoint); - ggml_backend_rpc_add_device_t ggml_backend_rpc_add_device_fn = (ggml_backend_rpc_add_device_t) ggml_backend_reg_get_proc_address(rpc_reg, "ggml_backend_rpc_add_device"); - if (!ggml_backend_rpc_add_device_fn) { - throw std::invalid_argument("failed to find RPC device add function"); - } - for (const auto & server : rpc_servers) { - ggml_backend_dev_t dev = ggml_backend_rpc_add_device_fn(server.c_str()); - if (dev) { - ggml_backend_device_register(dev); - } else { - throw std::invalid_argument("failed to register RPC device"); - } - } -} - -bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **)) { - auto ctx_arg = common_params_parser_init(params, ex, print_usage); - const common_params params_org = ctx_arg.params; // the example can modify the default params - - try { - if (!common_params_parse_ex(argc, argv, ctx_arg)) { - ctx_arg.params = params_org; - return false; - } - if (ctx_arg.params.usage) { - common_params_print_usage(ctx_arg); - if (ctx_arg.print_usage) { - ctx_arg.print_usage(argc, argv); - } - exit(0); - } - } catch (const std::invalid_argument & ex) { - fprintf(stderr, "%s\n", ex.what()); - ctx_arg.params = params_org; - return false; - } - - return true; -} - -static std::string list_builtin_chat_templates() { - std::vector supported_tmpl; - int32_t res = llama_chat_builtin_templates(nullptr, 0); - supported_tmpl.resize(res); - res = llama_chat_builtin_templates(supported_tmpl.data(), supported_tmpl.size()); - std::ostringstream msg; - for (auto & tmpl : supported_tmpl) { - msg << tmpl << (&tmpl == &supported_tmpl.back() ? "" : ", "); - } - return msg.str(); -} - -common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **)) { - // load dynamic backends - ggml_backend_load_all(); - - common_params_context ctx_arg(params); - ctx_arg.print_usage = print_usage; - ctx_arg.ex = ex; - - std::string sampler_type_chars; - std::string sampler_type_names; - for (const auto & sampler : params.sampling.samplers) { - sampler_type_chars += common_sampler_type_to_chr(sampler); - sampler_type_names += common_sampler_type_to_str(sampler) + ";"; - } - sampler_type_names.pop_back(); - - - /** - * filter options by example - * rules: - * - all examples inherit options from LLAMA_EXAMPLE_COMMON - * - if LLAMA_EXAMPLE_* is set (other than COMMON), we only show the option in the corresponding example - * - if both {LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_*,} are set, we will prioritize the LLAMA_EXAMPLE_* matching current example - */ - auto add_opt = [&](common_arg arg) { - if ((arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) && !arg.is_exclude(ex)) { - ctx_arg.options.push_back(std::move(arg)); - } - }; - - - add_opt(common_arg( - {"-h", "--help", "--usage"}, - "print usage and exit", - [](common_params & params) { - params.usage = true; - } - )); - add_opt(common_arg( - {"--version"}, - "show version and build info", - [](common_params &) { - fprintf(stderr, "version: %d (%s)\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT); - fprintf(stderr, "built with %s for %s\n", LLAMA_COMPILER, LLAMA_BUILD_TARGET); - exit(0); - } - )); - add_opt(common_arg( - {"--verbose-prompt"}, - string_format("print a verbose prompt before generation (default: %s)", params.verbose_prompt ? "true" : "false"), - [](common_params & params) { - params.verbose_prompt = true; - } - )); - add_opt(common_arg( - {"--no-display-prompt"}, - string_format("don't print prompt at generation (default: %s)", !params.display_prompt ? "true" : "false"), - [](common_params & params) { - params.display_prompt = false; - } - ).set_examples({LLAMA_EXAMPLE_MAIN})); - add_opt(common_arg( - {"-co", "--color"}, - string_format("colorise output to distinguish prompt and user input from generations (default: %s)", params.use_color ? "true" : "false"), - [](common_params & params) { - params.use_color = true; - } - ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL, LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP})); - add_opt(common_arg( - {"-t", "--threads"}, "N", - string_format("number of threads to use during generation (default: %d)", params.cpuparams.n_threads), - [](common_params & params, int value) { - params.cpuparams.n_threads = value; - if (params.cpuparams.n_threads <= 0) { - params.cpuparams.n_threads = std::thread::hardware_concurrency(); - } - } - ).set_env("LLAMA_ARG_THREADS")); - add_opt(common_arg( - {"-tb", "--threads-batch"}, "N", - "number of threads to use during batch and prompt processing (default: same as --threads)", - [](common_params & params, int value) { - params.cpuparams_batch.n_threads = value; - if (params.cpuparams_batch.n_threads <= 0) { - params.cpuparams_batch.n_threads = std::thread::hardware_concurrency(); - } - } - )); - add_opt(common_arg( - {"-C", "--cpu-mask"}, "M", - "CPU affinity mask: arbitrarily long hex. Complements cpu-range (default: \"\")", - [](common_params & params, const std::string & mask) { - params.cpuparams.mask_valid = true; - if (!parse_cpu_mask(mask, params.cpuparams.cpumask)) { - throw std::invalid_argument("invalid cpumask"); - } - } - )); - add_opt(common_arg( - {"-Cr", "--cpu-range"}, "lo-hi", - "range of CPUs for affinity. Complements --cpu-mask", - [](common_params & params, const std::string & range) { - params.cpuparams.mask_valid = true; - if (!parse_cpu_range(range, params.cpuparams.cpumask)) { - throw std::invalid_argument("invalid range"); - } - } - )); - add_opt(common_arg( - {"--cpu-strict"}, "<0|1>", - string_format("use strict CPU placement (default: %u)\n", (unsigned) params.cpuparams.strict_cpu), - [](common_params & params, const std::string & value) { - params.cpuparams.strict_cpu = std::stoul(value); - } - )); - add_opt(common_arg( - {"--prio"}, "N", - string_format("set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams.priority), - [](common_params & params, int prio) { - if (prio < 0 || prio > 3) { - throw std::invalid_argument("invalid value"); - } - params.cpuparams.priority = (enum ggml_sched_priority) prio; - } - )); - add_opt(common_arg( - {"--poll"}, "<0...100>", - string_format("use polling level to wait for work (0 - no polling, default: %u)\n", (unsigned) params.cpuparams.poll), - [](common_params & params, const std::string & value) { - params.cpuparams.poll = std::stoul(value); - } - )); - add_opt(common_arg( - {"-Cb", "--cpu-mask-batch"}, "M", - "CPU affinity mask: arbitrarily long hex. Complements cpu-range-batch (default: same as --cpu-mask)", - [](common_params & params, const std::string & mask) { - params.cpuparams_batch.mask_valid = true; - if (!parse_cpu_mask(mask, params.cpuparams_batch.cpumask)) { - throw std::invalid_argument("invalid cpumask"); - } - } - )); - add_opt(common_arg( - {"-Crb", "--cpu-range-batch"}, "lo-hi", - "ranges of CPUs for affinity. Complements --cpu-mask-batch", - [](common_params & params, const std::string & range) { - params.cpuparams_batch.mask_valid = true; - if (!parse_cpu_range(range, params.cpuparams_batch.cpumask)) { - throw std::invalid_argument("invalid range"); - } - } - )); - add_opt(common_arg( - {"--cpu-strict-batch"}, "<0|1>", - "use strict CPU placement (default: same as --cpu-strict)", - [](common_params & params, int value) { - params.cpuparams_batch.strict_cpu = value; - } - )); - add_opt(common_arg( - {"--prio-batch"}, "N", - string_format("set process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.cpuparams_batch.priority), - [](common_params & params, int prio) { - if (prio < 0 || prio > 3) { - throw std::invalid_argument("invalid value"); - } - params.cpuparams_batch.priority = (enum ggml_sched_priority) prio; - } - )); - add_opt(common_arg( - {"--poll-batch"}, "<0|1>", - "use polling to wait for work (default: same as --poll)", - [](common_params & params, int value) { - params.cpuparams_batch.poll = value; - } - )); - add_opt(common_arg( - {"-lcs", "--lookup-cache-static"}, "FNAME", - "path to static lookup cache to use for lookup decoding (not updated by generation)", - [](common_params & params, const std::string & value) { - params.lookup_cache_static = value; - } - ).set_examples({LLAMA_EXAMPLE_LOOKUP})); - add_opt(common_arg( - {"-lcd", "--lookup-cache-dynamic"}, "FNAME", - "path to dynamic lookup cache to use for lookup decoding (updated by generation)", - [](common_params & params, const std::string & value) { - params.lookup_cache_dynamic = value; - } - ).set_examples({LLAMA_EXAMPLE_LOOKUP})); - add_opt(common_arg( - {"-c", "--ctx-size"}, "N", - string_format("size of the prompt context (default: %d, 0 = loaded from model)", params.n_ctx), - [](common_params & params, int value) { - params.n_ctx = value; - } - ).set_env("LLAMA_ARG_CTX_SIZE")); - add_opt(common_arg( - {"-n", "--predict", "--n-predict"}, "N", - string_format("number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)", params.n_predict), - [](common_params & params, int value) { - params.n_predict = value; - } - ).set_env("LLAMA_ARG_N_PREDICT")); - add_opt(common_arg( - {"-b", "--batch-size"}, "N", - string_format("logical maximum batch size (default: %d)", params.n_batch), - [](common_params & params, int value) { - params.n_batch = value; - } - ).set_env("LLAMA_ARG_BATCH")); - add_opt(common_arg( - {"-ub", "--ubatch-size"}, "N", - string_format("physical maximum batch size (default: %d)", params.n_ubatch), - [](common_params & params, int value) { - params.n_ubatch = value; - } - ).set_env("LLAMA_ARG_UBATCH")); - add_opt(common_arg( - {"--keep"}, "N", - string_format("number of tokens to keep from the initial prompt (default: %d, -1 = all)", params.n_keep), - [](common_params & params, int value) { - params.n_keep = value; - } - )); - add_opt(common_arg( - {"--no-context-shift"}, - string_format("disables context shift on inifinite text generation (default: %s)", params.ctx_shift ? "disabled" : "enabled"), - [](common_params & params) { - params.ctx_shift = false; - } - ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_PERPLEXITY}).set_env("LLAMA_ARG_NO_CONTEXT_SHIFT")); - add_opt(common_arg( - {"--chunks"}, "N", - string_format("max number of chunks to process (default: %d, -1 = all)", params.n_chunks), - [](common_params & params, int value) { - params.n_chunks = value; - } - ).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_PERPLEXITY, LLAMA_EXAMPLE_RETRIEVAL})); - add_opt(common_arg( - {"-fa", "--flash-attn"}, - string_format("enable Flash Attention (default: %s)", params.flash_attn ? "enabled" : "disabled"), - [](common_params & params) { - params.flash_attn = true; - } - ).set_env("LLAMA_ARG_FLASH_ATTN")); - add_opt(common_arg( - {"-p", "--prompt"}, "PROMPT", - ex == LLAMA_EXAMPLE_MAIN - ? "prompt to start generation with\nif -cnv is set, this will be used as system prompt" - : "prompt to start generation with", - [](common_params & params, const std::string & value) { - params.prompt = value; - } - ).set_excludes({LLAMA_EXAMPLE_SERVER})); - add_opt(common_arg( - {"--no-perf"}, - string_format("disable internal libllama performance timings (default: %s)", params.no_perf ? "true" : "false"), - [](common_params & params) { - params.no_perf = true; - params.sampling.no_perf = true; - } - ).set_env("LLAMA_ARG_NO_PERF")); - add_opt(common_arg( - {"-f", "--file"}, "FNAME", - "a file containing the prompt (default: none)", - [](common_params & params, const std::string & value) { - std::ifstream file(value); - if (!file) { - throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); - } - // store the external file name in params - params.prompt_file = value; - std::copy(std::istreambuf_iterator(file), std::istreambuf_iterator(), back_inserter(params.prompt)); - if (!params.prompt.empty() && params.prompt.back() == '\n') { - params.prompt.pop_back(); - } - } - ).set_excludes({LLAMA_EXAMPLE_SERVER})); - add_opt(common_arg( - {"--in-file"}, "FNAME", - "an input file (repeat to specify multiple files)", - [](common_params & params, const std::string & value) { - std::ifstream file(value); - if (!file) { - throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); - } - params.in_files.push_back(value); - } - ).set_examples({LLAMA_EXAMPLE_IMATRIX})); - add_opt(common_arg( - {"-bf", "--binary-file"}, "FNAME", - "binary file containing the prompt (default: none)", - [](common_params & params, const std::string & value) { - std::ifstream file(value, std::ios::binary); - if (!file) { - throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); - } - // store the external file name in params - params.prompt_file = value; - std::ostringstream ss; - ss << file.rdbuf(); - params.prompt = ss.str(); - fprintf(stderr, "Read %zu bytes from binary file %s\n", params.prompt.size(), value.c_str()); - } - ).set_excludes({LLAMA_EXAMPLE_SERVER})); - add_opt(common_arg( - {"-e", "--escape"}, - string_format("process escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\) (default: %s)", params.escape ? "true" : "false"), - [](common_params & params) { - params.escape = true; - } - )); - add_opt(common_arg( - {"--no-escape"}, - "do not process escape sequences", - [](common_params & params) { - params.escape = false; - } - )); - add_opt(common_arg( - {"-ptc", "--print-token-count"}, "N", - string_format("print token count every N tokens (default: %d)", params.n_print), - [](common_params & params, int value) { - params.n_print = value; - } - ).set_examples({LLAMA_EXAMPLE_MAIN})); - add_opt(common_arg( - {"--prompt-cache"}, "FNAME", - "file to cache prompt state for faster startup (default: none)", - [](common_params & params, const std::string & value) { - params.path_prompt_cache = value; - } - ).set_examples({LLAMA_EXAMPLE_MAIN})); - add_opt(common_arg( - {"--prompt-cache-all"}, - "if specified, saves user input and generations to cache as well\n", - [](common_params & params) { - params.prompt_cache_all = true; - } - ).set_examples({LLAMA_EXAMPLE_MAIN})); - add_opt(common_arg( - {"--prompt-cache-ro"}, - "if specified, uses the prompt cache but does not update it", - [](common_params & params) { - params.prompt_cache_ro = true; - } - ).set_examples({LLAMA_EXAMPLE_MAIN})); - add_opt(common_arg( - {"-r", "--reverse-prompt"}, "PROMPT", - "halt generation at PROMPT, return control in interactive mode\n", - [](common_params & params, const std::string & value) { - params.antiprompt.emplace_back(value); - } - ).set_examples({LLAMA_EXAMPLE_MAIN})); - add_opt(common_arg( - {"-sp", "--special"}, - string_format("special tokens output enabled (default: %s)", params.special ? "true" : "false"), - [](common_params & params) { - params.special = true; - } - ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER})); - add_opt(common_arg( - {"-cnv", "--conversation"}, - "run in conversation mode:\n" - "- does not print special tokens and suffix/prefix\n" - "- interactive mode is also enabled\n" - "(default: auto enabled if chat template is available)", - [](common_params & params) { - params.conversation_mode = COMMON_CONVERSATION_MODE_ENABLED; - } - ).set_examples({LLAMA_EXAMPLE_MAIN})); - add_opt(common_arg( - {"-no-cnv", "--no-conversation"}, - "force disable conversation mode (default: false)", - [](common_params & params) { - params.conversation_mode = COMMON_CONVERSATION_MODE_DISABLED; - } - ).set_examples({LLAMA_EXAMPLE_MAIN})); - add_opt(common_arg( - {"-i", "--interactive"}, - string_format("run in interactive mode (default: %s)", params.interactive ? "true" : "false"), - [](common_params & params) { - params.interactive = true; - } - ).set_examples({LLAMA_EXAMPLE_MAIN})); - add_opt(common_arg( - {"-if", "--interactive-first"}, - string_format("run in interactive mode and wait for input right away (default: %s)", params.interactive_first ? "true" : "false"), - [](common_params & params) { - params.interactive_first = true; - } - ).set_examples({LLAMA_EXAMPLE_MAIN})); - add_opt(common_arg( - {"-mli", "--multiline-input"}, - "allows you to write or paste multiple lines without ending each in '\\'", - [](common_params & params) { - params.multiline_input = true; - } - ).set_examples({LLAMA_EXAMPLE_MAIN})); - add_opt(common_arg( - {"--in-prefix-bos"}, - "prefix BOS to user inputs, preceding the `--in-prefix` string", - [](common_params & params) { - params.input_prefix_bos = true; - params.enable_chat_template = false; - } - ).set_examples({LLAMA_EXAMPLE_MAIN})); - add_opt(common_arg( - {"--in-prefix"}, "STRING", - "string to prefix user inputs with (default: empty)", - [](common_params & params, const std::string & value) { - params.input_prefix = value; - params.enable_chat_template = false; - } - ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL})); - add_opt(common_arg( - {"--in-suffix"}, "STRING", - "string to suffix after user inputs with (default: empty)", - [](common_params & params, const std::string & value) { - params.input_suffix = value; - params.enable_chat_template = false; - } - ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL})); - add_opt(common_arg( - {"--no-warmup"}, - "skip warming up the model with an empty run", - [](common_params & params) { - params.warmup = false; - } - ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_EMBEDDING})); - add_opt(common_arg( - {"--spm-infill"}, - string_format( - "use Suffix/Prefix/Middle pattern for infill (instead of Prefix/Suffix/Middle) as some models prefer this. (default: %s)", - params.spm_infill ? "enabled" : "disabled" - ), - [](common_params & params) { - params.spm_infill = true; - } - ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_INFILL})); - add_opt(common_arg( - {"--samplers"}, "SAMPLERS", - string_format("samplers that will be used for generation in the order, separated by \';\'\n(default: %s)", sampler_type_names.c_str()), - [](common_params & params, const std::string & value) { - const auto sampler_names = string_split(value, ';'); - params.sampling.samplers = common_sampler_types_from_names(sampler_names, true); - } - ).set_sparam()); - add_opt(common_arg( - {"-s", "--seed"}, "SEED", - string_format("RNG seed (default: %d, use random seed for %d)", params.sampling.seed, LLAMA_DEFAULT_SEED), - [](common_params & params, const std::string & value) { - params.sampling.seed = std::stoul(value); - } - ).set_sparam()); - add_opt(common_arg( - {"--sampling-seq", "--sampler-seq"}, "SEQUENCE", - string_format("simplified sequence for samplers that will be used (default: %s)", sampler_type_chars.c_str()), - [](common_params & params, const std::string & value) { - params.sampling.samplers = common_sampler_types_from_chars(value); - } - ).set_sparam()); - add_opt(common_arg( - {"--ignore-eos"}, - "ignore end of stream token and continue generating (implies --logit-bias EOS-inf)", - [](common_params & params) { - params.sampling.ignore_eos = true; - } - ).set_sparam()); - add_opt(common_arg( - {"--temp"}, "N", - string_format("temperature (default: %.1f)", (double)params.sampling.temp), - [](common_params & params, const std::string & value) { - params.sampling.temp = std::stof(value); - params.sampling.temp = std::max(params.sampling.temp, 0.0f); - } - ).set_sparam()); - add_opt(common_arg( - {"--top-k"}, "N", - string_format("top-k sampling (default: %d, 0 = disabled)", params.sampling.top_k), - [](common_params & params, int value) { - params.sampling.top_k = value; - } - ).set_sparam()); - add_opt(common_arg( - {"--top-p"}, "N", - string_format("top-p sampling (default: %.1f, 1.0 = disabled)", (double)params.sampling.top_p), - [](common_params & params, const std::string & value) { - params.sampling.top_p = std::stof(value); - } - ).set_sparam()); - add_opt(common_arg( - {"--min-p"}, "N", - string_format("min-p sampling (default: %.1f, 0.0 = disabled)", (double)params.sampling.min_p), - [](common_params & params, const std::string & value) { - params.sampling.min_p = std::stof(value); - } - ).set_sparam()); - add_opt(common_arg( - {"--xtc-probability"}, "N", - string_format("xtc probability (default: %.1f, 0.0 = disabled)", (double)params.sampling.xtc_probability), - [](common_params & params, const std::string & value) { - params.sampling.xtc_probability = std::stof(value); - } - ).set_sparam()); - add_opt(common_arg( - {"--xtc-threshold"}, "N", - string_format("xtc threshold (default: %.1f, 1.0 = disabled)", (double)params.sampling.xtc_threshold), - [](common_params & params, const std::string & value) { - params.sampling.xtc_threshold = std::stof(value); - } - ).set_sparam()); - add_opt(common_arg( - {"--typical"}, "N", - string_format("locally typical sampling, parameter p (default: %.1f, 1.0 = disabled)", (double)params.sampling.typ_p), - [](common_params & params, const std::string & value) { - params.sampling.typ_p = std::stof(value); - } - ).set_sparam()); - add_opt(common_arg( - {"--repeat-last-n"}, "N", - string_format("last n tokens to consider for penalize (default: %d, 0 = disabled, -1 = ctx_size)", params.sampling.penalty_last_n), - [](common_params & params, int value) { - if (value < -1) { - throw std::runtime_error(string_format("error: invalid repeat-last-n = %d\n", value)); - } - params.sampling.penalty_last_n = value; - params.sampling.n_prev = std::max(params.sampling.n_prev, params.sampling.penalty_last_n); - } - ).set_sparam()); - add_opt(common_arg( - {"--repeat-penalty"}, "N", - string_format("penalize repeat sequence of tokens (default: %.1f, 1.0 = disabled)", (double)params.sampling.penalty_repeat), - [](common_params & params, const std::string & value) { - params.sampling.penalty_repeat = std::stof(value); - } - ).set_sparam()); - add_opt(common_arg( - {"--presence-penalty"}, "N", - string_format("repeat alpha presence penalty (default: %.1f, 0.0 = disabled)", (double)params.sampling.penalty_present), - [](common_params & params, const std::string & value) { - params.sampling.penalty_present = std::stof(value); - } - ).set_sparam()); - add_opt(common_arg( - {"--frequency-penalty"}, "N", - string_format("repeat alpha frequency penalty (default: %.1f, 0.0 = disabled)", (double)params.sampling.penalty_freq), - [](common_params & params, const std::string & value) { - params.sampling.penalty_freq = std::stof(value); - } - ).set_sparam()); - add_opt(common_arg( - {"--dry-multiplier"}, "N", - string_format("set DRY sampling multiplier (default: %.1f, 0.0 = disabled)", (double)params.sampling.dry_multiplier), - [](common_params & params, const std::string & value) { - params.sampling.dry_multiplier = std::stof(value); - } - ).set_sparam()); - add_opt(common_arg( - {"--dry-base"}, "N", - string_format("set DRY sampling base value (default: %.2f)", (double)params.sampling.dry_base), - [](common_params & params, const std::string & value) { - float potential_base = std::stof(value); - if (potential_base >= 1.0f) - { - params.sampling.dry_base = potential_base; - } - } - ).set_sparam()); - add_opt(common_arg( - {"--dry-allowed-length"}, "N", - string_format("set allowed length for DRY sampling (default: %d)", params.sampling.dry_allowed_length), - [](common_params & params, int value) { - params.sampling.dry_allowed_length = value; - } - ).set_sparam()); - add_opt(common_arg( - {"--dry-penalty-last-n"}, "N", - string_format("set DRY penalty for the last n tokens (default: %d, 0 = disable, -1 = context size)", params.sampling.dry_penalty_last_n), - [](common_params & params, int value) { - if (value < -1) { - throw std::runtime_error(string_format("error: invalid dry-penalty-last-n = %d\n", value)); - } - params.sampling.dry_penalty_last_n = value; - } - ).set_sparam()); - add_opt(common_arg( - {"--dry-sequence-breaker"}, "STRING", - string_format("add sequence breaker for DRY sampling, clearing out default breakers (%s) in the process; use \"none\" to not use any sequence breakers\n", - params.sampling.dry_sequence_breakers.empty() ? "none" : - std::accumulate(std::next(params.sampling.dry_sequence_breakers.begin()), - params.sampling.dry_sequence_breakers.end(), - std::string("'") + (params.sampling.dry_sequence_breakers[0] == "\n" ? "\\n" : params.sampling.dry_sequence_breakers[0]) + "'", - [](const std::string& a, const std::string& b) { - std::string formatted_b = (b == "\n") ? "\\n" : b; - return a + ", '" + formatted_b + "'"; - }).c_str()), - [](common_params & params, const std::string & value) { - static bool defaults_cleared = false; - - if (!defaults_cleared) { - params.sampling.dry_sequence_breakers.clear(); - defaults_cleared = true; - } - - if (value == "none") { - params.sampling.dry_sequence_breakers.clear(); - } else { - params.sampling.dry_sequence_breakers.emplace_back(value); - } - } - ).set_sparam()); - add_opt(common_arg( - {"--dynatemp-range"}, "N", - string_format("dynamic temperature range (default: %.1f, 0.0 = disabled)", (double)params.sampling.dynatemp_range), - [](common_params & params, const std::string & value) { - params.sampling.dynatemp_range = std::stof(value); - } - ).set_sparam()); - add_opt(common_arg( - {"--dynatemp-exp"}, "N", - string_format("dynamic temperature exponent (default: %.1f)", (double)params.sampling.dynatemp_exponent), - [](common_params & params, const std::string & value) { - params.sampling.dynatemp_exponent = std::stof(value); - } - ).set_sparam()); - add_opt(common_arg( - {"--mirostat"}, "N", - string_format("use Mirostat sampling.\nTop K, Nucleus and Locally Typical samplers are ignored if used.\n" - "(default: %d, 0 = disabled, 1 = Mirostat, 2 = Mirostat 2.0)", params.sampling.mirostat), - [](common_params & params, int value) { - params.sampling.mirostat = value; - } - ).set_sparam()); - add_opt(common_arg( - {"--mirostat-lr"}, "N", - string_format("Mirostat learning rate, parameter eta (default: %.1f)", (double)params.sampling.mirostat_eta), - [](common_params & params, const std::string & value) { - params.sampling.mirostat_eta = std::stof(value); - } - ).set_sparam()); - add_opt(common_arg( - {"--mirostat-ent"}, "N", - string_format("Mirostat target entropy, parameter tau (default: %.1f)", (double)params.sampling.mirostat_tau), - [](common_params & params, const std::string & value) { - params.sampling.mirostat_tau = std::stof(value); - } - ).set_sparam()); - add_opt(common_arg( - {"-l", "--logit-bias"}, "TOKEN_ID(+/-)BIAS", - "modifies the likelihood of token appearing in the completion,\n" - "i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n" - "or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'", - [](common_params & params, const std::string & value) { - std::stringstream ss(value); - llama_token key; - char sign; - std::string value_str; - try { - if (ss >> key && ss >> sign && std::getline(ss, value_str) && (sign == '+' || sign == '-')) { - const float bias = std::stof(value_str) * ((sign == '-') ? -1.0f : 1.0f); - params.sampling.logit_bias.push_back({key, bias}); - } else { - throw std::invalid_argument("invalid input format"); - } - } catch (const std::exception&) { - throw std::invalid_argument("invalid input format"); - } - } - ).set_sparam()); - add_opt(common_arg( - {"--grammar"}, "GRAMMAR", - string_format("BNF-like grammar to constrain generations (see samples in grammars/ dir) (default: '%s')", params.sampling.grammar.c_str()), - [](common_params & params, const std::string & value) { - params.sampling.grammar = value; - } - ).set_sparam()); - add_opt(common_arg( - {"--grammar-file"}, "FNAME", - "file to read grammar from", - [](common_params & params, const std::string & value) { - std::ifstream file(value); - if (!file) { - throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); - } - std::copy( - std::istreambuf_iterator(file), - std::istreambuf_iterator(), - std::back_inserter(params.sampling.grammar) - ); - } - ).set_sparam()); - add_opt(common_arg( - {"-j", "--json-schema"}, "SCHEMA", - "JSON schema to constrain generations (https://json-schema.org/), e.g. `{}` for any JSON object\nFor schemas w/ external $refs, use --grammar + example/json_schema_to_grammar.py instead", - [](common_params & params, const std::string & value) { - params.sampling.grammar = json_schema_to_grammar(json::parse(value)); - } - ).set_sparam()); - add_opt(common_arg( - {"--pooling"}, "{none,mean,cls,last,rank}", - "pooling type for embeddings, use model default if unspecified", - [](common_params & params, const std::string & value) { - /**/ if (value == "none") { params.pooling_type = LLAMA_POOLING_TYPE_NONE; } - else if (value == "mean") { params.pooling_type = LLAMA_POOLING_TYPE_MEAN; } - else if (value == "cls") { params.pooling_type = LLAMA_POOLING_TYPE_CLS; } - else if (value == "last") { params.pooling_type = LLAMA_POOLING_TYPE_LAST; } - else if (value == "rank") { params.pooling_type = LLAMA_POOLING_TYPE_RANK; } - else { throw std::invalid_argument("invalid value"); } - } - ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_RETRIEVAL, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_POOLING")); - add_opt(common_arg( - {"--attention"}, "{causal,non-causal}", - "attention type for embeddings, use model default if unspecified", - [](common_params & params, const std::string & value) { - /**/ if (value == "causal") { params.attention_type = LLAMA_ATTENTION_TYPE_CAUSAL; } - else if (value == "non-causal") { params.attention_type = LLAMA_ATTENTION_TYPE_NON_CAUSAL; } - else { throw std::invalid_argument("invalid value"); } - } - ).set_examples({LLAMA_EXAMPLE_EMBEDDING})); - add_opt(common_arg( - {"--rope-scaling"}, "{none,linear,yarn}", - "RoPE frequency scaling method, defaults to linear unless specified by the model", - [](common_params & params, const std::string & value) { - /**/ if (value == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; } - else if (value == "linear") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_LINEAR; } - else if (value == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; } - else { throw std::invalid_argument("invalid value"); } - } - ).set_env("LLAMA_ARG_ROPE_SCALING_TYPE")); - add_opt(common_arg( - {"--rope-scale"}, "N", - "RoPE context scaling factor, expands context by a factor of N", - [](common_params & params, const std::string & value) { - params.rope_freq_scale = 1.0f / std::stof(value); - } - ).set_env("LLAMA_ARG_ROPE_SCALE")); - add_opt(common_arg( - {"--rope-freq-base"}, "N", - "RoPE base frequency, used by NTK-aware scaling (default: loaded from model)", - [](common_params & params, const std::string & value) { - params.rope_freq_base = std::stof(value); - } - ).set_env("LLAMA_ARG_ROPE_FREQ_BASE")); - add_opt(common_arg( - {"--rope-freq-scale"}, "N", - "RoPE frequency scaling factor, expands context by a factor of 1/N", - [](common_params & params, const std::string & value) { - params.rope_freq_scale = std::stof(value); - } - ).set_env("LLAMA_ARG_ROPE_FREQ_SCALE")); - add_opt(common_arg( - {"--yarn-orig-ctx"}, "N", - string_format("YaRN: original context size of model (default: %d = model training context size)", params.yarn_orig_ctx), - [](common_params & params, int value) { - params.yarn_orig_ctx = value; - } - ).set_env("LLAMA_ARG_YARN_ORIG_CTX")); - add_opt(common_arg( - {"--yarn-ext-factor"}, "N", - string_format("YaRN: extrapolation mix factor (default: %.1f, 0.0 = full interpolation)", (double)params.yarn_ext_factor), - [](common_params & params, const std::string & value) { - params.yarn_ext_factor = std::stof(value); - } - ).set_env("LLAMA_ARG_YARN_EXT_FACTOR")); - add_opt(common_arg( - {"--yarn-attn-factor"}, "N", - string_format("YaRN: scale sqrt(t) or attention magnitude (default: %.1f)", (double)params.yarn_attn_factor), - [](common_params & params, const std::string & value) { - params.yarn_attn_factor = std::stof(value); - } - ).set_env("LLAMA_ARG_YARN_ATTN_FACTOR")); - add_opt(common_arg( - {"--yarn-beta-slow"}, "N", - string_format("YaRN: high correction dim or alpha (default: %.1f)", (double)params.yarn_beta_slow), - [](common_params & params, const std::string & value) { - params.yarn_beta_slow = std::stof(value); - } - ).set_env("LLAMA_ARG_YARN_BETA_SLOW")); - add_opt(common_arg( - {"--yarn-beta-fast"}, "N", - string_format("YaRN: low correction dim or beta (default: %.1f)", (double)params.yarn_beta_fast), - [](common_params & params, const std::string & value) { - params.yarn_beta_fast = std::stof(value); - } - ).set_env("LLAMA_ARG_YARN_BETA_FAST")); - add_opt(common_arg( - {"-gan", "--grp-attn-n"}, "N", - string_format("group-attention factor (default: %d)", params.grp_attn_n), - [](common_params & params, int value) { - params.grp_attn_n = value; - } - ).set_env("LLAMA_ARG_GRP_ATTN_N").set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_PASSKEY})); - add_opt(common_arg( - {"-gaw", "--grp-attn-w"}, "N", - string_format("group-attention width (default: %d)", params.grp_attn_w), - [](common_params & params, int value) { - params.grp_attn_w = value; - } - ).set_env("LLAMA_ARG_GRP_ATTN_W").set_examples({LLAMA_EXAMPLE_MAIN})); - add_opt(common_arg( - {"-dkvc", "--dump-kv-cache"}, - "verbose print of the KV cache", - [](common_params & params) { - params.dump_kv_cache = true; - } - )); - add_opt(common_arg( - {"-nkvo", "--no-kv-offload"}, - "disable KV offload", - [](common_params & params) { - params.no_kv_offload = true; - } - ).set_env("LLAMA_ARG_NO_KV_OFFLOAD")); - add_opt(common_arg( - {"-ctk", "--cache-type-k"}, "TYPE", - string_format( - "KV cache data type for K\n" - "allowed values: %s\n" - "(default: %s)", - get_all_kv_cache_types().c_str(), - ggml_type_name(params.cache_type_k) - ), - [](common_params & params, const std::string & value) { - params.cache_type_k = kv_cache_type_from_str(value); - } - ).set_env("LLAMA_ARG_CACHE_TYPE_K")); - add_opt(common_arg( - {"-ctv", "--cache-type-v"}, "TYPE", - string_format( - "KV cache data type for V\n" - "allowed values: %s\n" - "(default: %s)", - get_all_kv_cache_types().c_str(), - ggml_type_name(params.cache_type_v) - ), - [](common_params & params, const std::string & value) { - params.cache_type_v = kv_cache_type_from_str(value); - } - ).set_env("LLAMA_ARG_CACHE_TYPE_V")); - add_opt(common_arg( - {"--perplexity", "--all-logits"}, - string_format("return logits for all tokens in the batch (default: %s)", params.logits_all ? "true" : "false"), - [](common_params & params) { - params.logits_all = true; - } - ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); - add_opt(common_arg( - {"--hellaswag"}, - "compute HellaSwag score over random tasks from datafile supplied with -f", - [](common_params & params) { - params.hellaswag = true; - } - ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); - add_opt(common_arg( - {"--hellaswag-tasks"}, "N", - string_format("number of tasks to use when computing the HellaSwag score (default: %zu)", params.hellaswag_tasks), - [](common_params & params, int value) { - params.hellaswag_tasks = value; - } - ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); - add_opt(common_arg( - {"--winogrande"}, - "compute Winogrande score over random tasks from datafile supplied with -f", - [](common_params & params) { - params.winogrande = true; - } - ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); - add_opt(common_arg( - {"--winogrande-tasks"}, "N", - string_format("number of tasks to use when computing the Winogrande score (default: %zu)", params.winogrande_tasks), - [](common_params & params, int value) { - params.winogrande_tasks = value; - } - ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); - add_opt(common_arg( - {"--multiple-choice"}, - "compute multiple choice score over random tasks from datafile supplied with -f", - [](common_params & params) { - params.multiple_choice = true; - } - ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); - add_opt(common_arg( - {"--multiple-choice-tasks"}, "N", - string_format("number of tasks to use when computing the multiple choice score (default: %zu)", params.multiple_choice_tasks), - [](common_params & params, int value) { - params.multiple_choice_tasks = value; - } - ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); - add_opt(common_arg( - {"--kl-divergence"}, - "computes KL-divergence to logits provided via --kl-divergence-base", - [](common_params & params) { - params.kl_divergence = true; - } - ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); - add_opt(common_arg( - {"--save-all-logits", "--kl-divergence-base"}, "FNAME", - "set logits file", - [](common_params & params, const std::string & value) { - params.logits_file = value; - } - ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); - add_opt(common_arg( - {"--ppl-stride"}, "N", - string_format("stride for perplexity calculation (default: %d)", params.ppl_stride), - [](common_params & params, int value) { - params.ppl_stride = value; - } - ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); - add_opt(common_arg( - {"--ppl-output-type"}, "<0|1>", - string_format("output type for perplexity calculation (default: %d)", params.ppl_output_type), - [](common_params & params, int value) { - params.ppl_output_type = value; - } - ).set_examples({LLAMA_EXAMPLE_PERPLEXITY})); - add_opt(common_arg( - {"-dt", "--defrag-thold"}, "N", - string_format("KV cache defragmentation threshold (default: %.1f, < 0 - disabled)", (double)params.defrag_thold), - [](common_params & params, const std::string & value) { - params.defrag_thold = std::stof(value); - } - ).set_env("LLAMA_ARG_DEFRAG_THOLD")); - add_opt(common_arg( - {"-np", "--parallel"}, "N", - string_format("number of parallel sequences to decode (default: %d)", params.n_parallel), - [](common_params & params, int value) { - params.n_parallel = value; - } - ).set_env("LLAMA_ARG_N_PARALLEL")); - add_opt(common_arg( - {"-ns", "--sequences"}, "N", - string_format("number of sequences to decode (default: %d)", params.n_sequences), - [](common_params & params, int value) { - params.n_sequences = value; - } - ).set_examples({LLAMA_EXAMPLE_PARALLEL})); - add_opt(common_arg( - {"-cb", "--cont-batching"}, - string_format("enable continuous batching (a.k.a dynamic batching) (default: %s)", params.cont_batching ? "enabled" : "disabled"), - [](common_params & params) { - params.cont_batching = true; - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CONT_BATCHING")); - add_opt(common_arg( - {"-nocb", "--no-cont-batching"}, - "disable continuous batching", - [](common_params & params) { - params.cont_batching = false; - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_CONT_BATCHING")); - add_opt(common_arg( - {"--mmproj"}, "FILE", - "path to a multimodal projector file for LLaVA. see examples/llava/README.md", - [](common_params & params, const std::string & value) { - params.mmproj = value; - } - ).set_examples({LLAMA_EXAMPLE_LLAVA})); - add_opt(common_arg( - {"--image"}, "FILE", - "path to an image file. use with multimodal models. Specify multiple times for batching", - [](common_params & params, const std::string & value) { - params.image.emplace_back(value); - } - ).set_examples({LLAMA_EXAMPLE_LLAVA})); - if (llama_supports_rpc()) { - add_opt(common_arg( - {"--rpc"}, "SERVERS", - "comma separated list of RPC servers", - [](common_params & params, const std::string & value) { - add_rpc_devices(value); - GGML_UNUSED(params); - } - ).set_env("LLAMA_ARG_RPC")); - } - add_opt(common_arg( - {"--mlock"}, - "force system to keep model in RAM rather than swapping or compressing", - [](common_params & params) { - params.use_mlock = true; - } - ).set_env("LLAMA_ARG_MLOCK")); - add_opt(common_arg( - {"--no-mmap"}, - "do not memory-map model (slower load but may reduce pageouts if not using mlock)", - [](common_params & params) { - params.use_mmap = false; - } - ).set_env("LLAMA_ARG_NO_MMAP")); - add_opt(common_arg( - {"--numa"}, "TYPE", - "attempt optimizations that help on some NUMA systems\n" - "- distribute: spread execution evenly over all nodes\n" - "- isolate: only spawn threads on CPUs on the node that execution started on\n" - "- numactl: use the CPU map provided by numactl\n" - "if run without this previously, it is recommended to drop the system page cache before using this\n" - "see https://github.com/ggerganov/llama.cpp/issues/1437", - [](common_params & params, const std::string & value) { - /**/ if (value == "distribute" || value == "") { params.numa = GGML_NUMA_STRATEGY_DISTRIBUTE; } - else if (value == "isolate") { params.numa = GGML_NUMA_STRATEGY_ISOLATE; } - else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; } - else { throw std::invalid_argument("invalid value"); } - } - ).set_env("LLAMA_ARG_NUMA")); - add_opt(common_arg( - {"-dev", "--device"}, "", - "comma-separated list of devices to use for offloading (none = don't offload)\n" - "use --list-devices to see a list of available devices", - [](common_params & params, const std::string & value) { - params.devices = parse_device_list(value); - } - ).set_env("LLAMA_ARG_DEVICE")); - add_opt(common_arg( - {"--list-devices"}, - "print list of available devices and exit", - [](common_params &) { - std::vector rpc_devices; - std::vector all_devices; - for (size_t i = 0; i < ggml_backend_dev_count(); ++i) { - auto * dev = ggml_backend_dev_get(i); - if (ggml_backend_dev_type(dev) == GGML_BACKEND_DEVICE_TYPE_GPU) { - ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev); - if (ggml_backend_reg_name(reg) == std::string("RPC")) { - rpc_devices.push_back(dev); - } else { - all_devices.push_back(dev); - } - } - } - // insert RPC devices in front - all_devices.insert(all_devices.begin(), rpc_devices.begin(), rpc_devices.end()); - printf("Available devices:\n"); - for (size_t i = 0; i < all_devices.size(); ++i) { - auto * dev = all_devices[i]; - size_t free, total; - ggml_backend_dev_memory(dev, &free, &total); - printf(" %s: %s (%zu MiB, %zu MiB free)\n", ggml_backend_dev_name(dev), ggml_backend_dev_description(dev), total / 1024 / 1024, free / 1024 / 1024); - } - exit(0); - } - )); - add_opt(common_arg( - {"-ngl", "--gpu-layers", "--n-gpu-layers"}, "N", - "number of layers to store in VRAM", - [](common_params & params, int value) { - params.n_gpu_layers = value; - if (!llama_supports_gpu_offload()) { - fprintf(stderr, "warning: no usable GPU found, --gpu-layers option will be ignored\n"); - fprintf(stderr, "warning: one possible reason is that llama.cpp was compiled without GPU support\n"); - fprintf(stderr, "warning: consult docs/build.md for compilation instructions\n"); - } - } - ).set_env("LLAMA_ARG_N_GPU_LAYERS")); - add_opt(common_arg( - {"-sm", "--split-mode"}, "{none,layer,row}", - "how to split the model across multiple GPUs, one of:\n" - "- none: use one GPU only\n" - "- layer (default): split layers and KV across GPUs\n" - "- row: split rows across GPUs", - [](common_params & params, const std::string & value) { - std::string arg_next = value; - if (arg_next == "none") { - params.split_mode = LLAMA_SPLIT_MODE_NONE; - } else if (arg_next == "layer") { - params.split_mode = LLAMA_SPLIT_MODE_LAYER; - } else if (arg_next == "row") { - params.split_mode = LLAMA_SPLIT_MODE_ROW; - } else { - throw std::invalid_argument("invalid value"); - } - if (!llama_supports_gpu_offload()) { - fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting the split mode has no effect.\n"); - } - } - ).set_env("LLAMA_ARG_SPLIT_MODE")); - add_opt(common_arg( - {"-ts", "--tensor-split"}, "N0,N1,N2,...", - "fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1", - [](common_params & params, const std::string & value) { - std::string arg_next = value; - - // split string by , and / - const std::regex regex{ R"([,/]+)" }; - std::sregex_token_iterator it{ arg_next.begin(), arg_next.end(), regex, -1 }; - std::vector split_arg{ it, {} }; - if (split_arg.size() >= llama_max_devices()) { - throw std::invalid_argument( - string_format("got %d input configs, but system only has %d devices", (int)split_arg.size(), (int)llama_max_devices()) - ); - } - for (size_t i = 0; i < llama_max_devices(); ++i) { - if (i < split_arg.size()) { - params.tensor_split[i] = std::stof(split_arg[i]); - } else { - params.tensor_split[i] = 0.0f; - } - } - if (!llama_supports_gpu_offload()) { - fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting a tensor split has no effect.\n"); - } - } - ).set_env("LLAMA_ARG_TENSOR_SPLIT")); - add_opt(common_arg( - {"-mg", "--main-gpu"}, "INDEX", - string_format("the GPU to use for the model (with split-mode = none), or for intermediate results and KV (with split-mode = row) (default: %d)", params.main_gpu), - [](common_params & params, int value) { - params.main_gpu = value; - if (!llama_supports_gpu_offload()) { - fprintf(stderr, "warning: llama.cpp was compiled without support for GPU offload. Setting the main GPU has no effect.\n"); - } - } - ).set_env("LLAMA_ARG_MAIN_GPU")); - add_opt(common_arg( - {"--check-tensors"}, - string_format("check model tensor data for invalid values (default: %s)", params.check_tensors ? "true" : "false"), - [](common_params & params) { - params.check_tensors = true; - } - )); - add_opt(common_arg( - {"--override-kv"}, "KEY=TYPE:VALUE", - "advanced option to override model metadata by key. may be specified multiple times.\n" - "types: int, float, bool, str. example: --override-kv tokenizer.ggml.add_bos_token=bool:false", - [](common_params & params, const std::string & value) { - if (!string_parse_kv_override(value.c_str(), params.kv_overrides)) { - throw std::runtime_error(string_format("error: Invalid type for KV override: %s\n", value.c_str())); - } - } - )); - add_opt(common_arg( - {"--lora"}, "FNAME", - "path to LoRA adapter (can be repeated to use multiple adapters)", - [](common_params & params, const std::string & value) { - params.lora_adapters.push_back({ std::string(value), 1.0, nullptr }); - } - // we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg - ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA})); - add_opt(common_arg( - {"--lora-scaled"}, "FNAME", "SCALE", - "path to LoRA adapter with user defined scaling (can be repeated to use multiple adapters)", - [](common_params & params, const std::string & fname, const std::string & scale) { - params.lora_adapters.push_back({ fname, std::stof(scale), nullptr }); - } - // we define this arg on both COMMON and EXPORT_LORA, so when showing help message of export-lora, it will be categorized as "example-specific" arg - ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA})); - add_opt(common_arg( - {"--control-vector"}, "FNAME", - "add a control vector\nnote: this argument can be repeated to add multiple control vectors", - [](common_params & params, const std::string & value) { - params.control_vectors.push_back({ 1.0f, value, }); - } - )); - add_opt(common_arg( - {"--control-vector-scaled"}, "FNAME", "SCALE", - "add a control vector with user defined scaling SCALE\n" - "note: this argument can be repeated to add multiple scaled control vectors", - [](common_params & params, const std::string & fname, const std::string & scale) { - params.control_vectors.push_back({ std::stof(scale), fname }); - } - )); - add_opt(common_arg( - {"--control-vector-layer-range"}, "START", "END", - "layer range to apply the control vector(s) to, start and end inclusive", - [](common_params & params, const std::string & start, const std::string & end) { - params.control_vector_layer_start = std::stoi(start); - params.control_vector_layer_end = std::stoi(end); - } - )); - add_opt(common_arg( - {"-a", "--alias"}, "STRING", - "set alias for model name (to be used by REST API)", - [](common_params & params, const std::string & value) { - params.model_alias = value; - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ALIAS")); - add_opt(common_arg( - {"-m", "--model"}, "FNAME", - ex == LLAMA_EXAMPLE_EXPORT_LORA - ? std::string("model path from which to load base model") - : string_format( - "model path (default: `models/$filename` with filename from `--hf-file` " - "or `--model-url` if set, otherwise %s)", DEFAULT_MODEL_PATH - ), - [](common_params & params, const std::string & value) { - params.model = value; - } - ).set_examples({LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_EXPORT_LORA}).set_env("LLAMA_ARG_MODEL")); - add_opt(common_arg( - {"-mu", "--model-url"}, "MODEL_URL", - "model download url (default: unused)", - [](common_params & params, const std::string & value) { - params.model_url = value; - } - ).set_env("LLAMA_ARG_MODEL_URL")); - add_opt(common_arg( - {"-hf", "-hfr", "--hf-repo"}, "/[:quant]", - "Hugging Face model repository; quant is optional, case-insensitive, default to Q4_K_M, or falls back to the first file in the repo if Q4_K_M doesn't exist.\n" - "example: unsloth/phi-4-GGUF:q4_k_m\n" - "(default: unused)", - [](common_params & params, const std::string & value) { - params.hf_repo = value; - } - ).set_env("LLAMA_ARG_HF_REPO")); - add_opt(common_arg( - {"-hfd", "-hfrd", "--hf-repo-draft"}, "/[:quant]", - "Same as --hf-repo, but for the draft model (default: unused)", - [](common_params & params, const std::string & value) { - params.speculative.hf_repo = value; - } - ).set_env("LLAMA_ARG_HFD_REPO")); - add_opt(common_arg( - {"-hff", "--hf-file"}, "FILE", - "Hugging Face model file. If specified, it will override the quant in --hf-repo (default: unused)", - [](common_params & params, const std::string & value) { - params.hf_file = value; - } - ).set_env("LLAMA_ARG_HF_FILE")); - add_opt(common_arg( - {"-hfv", "-hfrv", "--hf-repo-v"}, "/[:quant]", - "Hugging Face model repository for the vocoder model (default: unused)", - [](common_params & params, const std::string & value) { - params.vocoder.hf_repo = value; - } - ).set_env("LLAMA_ARG_HF_REPO_V")); - add_opt(common_arg( - {"-hffv", "--hf-file-v"}, "FILE", - "Hugging Face model file for the vocoder model (default: unused)", - [](common_params & params, const std::string & value) { - params.vocoder.hf_file = value; - } - ).set_env("LLAMA_ARG_HF_FILE_V")); - add_opt(common_arg( - {"-hft", "--hf-token"}, "TOKEN", - "Hugging Face access token (default: value from HF_TOKEN environment variable)", - [](common_params & params, const std::string & value) { - params.hf_token = value; - } - ).set_env("HF_TOKEN")); - add_opt(common_arg( - {"--context-file"}, "FNAME", - "file to load context from (repeat to specify multiple files)", - [](common_params & params, const std::string & value) { - std::ifstream file(value, std::ios::binary); - if (!file) { - throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); - } - params.context_files.push_back(value); - } - ).set_examples({LLAMA_EXAMPLE_RETRIEVAL})); - add_opt(common_arg( - {"--chunk-size"}, "N", - string_format("minimum length of embedded text chunks (default: %d)", params.chunk_size), - [](common_params & params, int value) { - params.chunk_size = value; - } - ).set_examples({LLAMA_EXAMPLE_RETRIEVAL})); - add_opt(common_arg( - {"--chunk-separator"}, "STRING", - string_format("separator between chunks (default: '%s')", params.chunk_separator.c_str()), - [](common_params & params, const std::string & value) { - params.chunk_separator = value; - } - ).set_examples({LLAMA_EXAMPLE_RETRIEVAL})); - add_opt(common_arg( - {"--junk"}, "N", - string_format("number of times to repeat the junk text (default: %d)", params.n_junk), - [](common_params & params, int value) { - params.n_junk = value; - } - ).set_examples({LLAMA_EXAMPLE_PASSKEY})); - add_opt(common_arg( - {"--pos"}, "N", - string_format("position of the passkey in the junk text (default: %d)", params.i_pos), - [](common_params & params, int value) { - params.i_pos = value; - } - ).set_examples({LLAMA_EXAMPLE_PASSKEY})); - add_opt(common_arg( - {"-o", "--output", "--output-file"}, "FNAME", - string_format("output file (default: '%s')", - ex == LLAMA_EXAMPLE_EXPORT_LORA - ? params.lora_outfile.c_str() - : ex == LLAMA_EXAMPLE_CVECTOR_GENERATOR - ? params.cvector_outfile.c_str() - : params.out_file.c_str()), - [](common_params & params, const std::string & value) { - params.out_file = value; - params.cvector_outfile = value; - params.lora_outfile = value; - } - ).set_examples({LLAMA_EXAMPLE_IMATRIX, LLAMA_EXAMPLE_CVECTOR_GENERATOR, LLAMA_EXAMPLE_EXPORT_LORA})); - add_opt(common_arg( - {"-ofreq", "--output-frequency"}, "N", - string_format("output the imatrix every N iterations (default: %d)", params.n_out_freq), - [](common_params & params, int value) { - params.n_out_freq = value; - } - ).set_examples({LLAMA_EXAMPLE_IMATRIX})); - add_opt(common_arg( - {"--save-frequency"}, "N", - string_format("save an imatrix copy every N iterations (default: %d)", params.n_save_freq), - [](common_params & params, int value) { - params.n_save_freq = value; - } - ).set_examples({LLAMA_EXAMPLE_IMATRIX})); - add_opt(common_arg( - {"--process-output"}, - string_format("collect data for the output tensor (default: %s)", params.process_output ? "true" : "false"), - [](common_params & params) { - params.process_output = true; - } - ).set_examples({LLAMA_EXAMPLE_IMATRIX})); - add_opt(common_arg( - {"--no-ppl"}, - string_format("do not compute perplexity (default: %s)", params.compute_ppl ? "true" : "false"), - [](common_params & params) { - params.compute_ppl = false; - } - ).set_examples({LLAMA_EXAMPLE_IMATRIX})); - add_opt(common_arg( - {"--chunk", "--from-chunk"}, "N", - string_format("start processing the input from chunk N (default: %d)", params.i_chunk), - [](common_params & params, int value) { - params.i_chunk = value; - } - ).set_examples({LLAMA_EXAMPLE_IMATRIX})); - add_opt(common_arg( - {"-pps"}, - string_format("is the prompt shared across parallel sequences (default: %s)", params.is_pp_shared ? "true" : "false"), - [](common_params & params) { - params.is_pp_shared = true; - } - ).set_examples({LLAMA_EXAMPLE_BENCH})); - add_opt(common_arg( - {"-npp"}, "n0,n1,...", - "number of prompt tokens", - [](common_params & params, const std::string & value) { - auto p = string_split(value, ','); - params.n_pp.insert(params.n_pp.end(), p.begin(), p.end()); - } - ).set_examples({LLAMA_EXAMPLE_BENCH})); - add_opt(common_arg( - {"-ntg"}, "n0,n1,...", - "number of text generation tokens", - [](common_params & params, const std::string & value) { - auto p = string_split(value, ','); - params.n_tg.insert(params.n_tg.end(), p.begin(), p.end()); - } - ).set_examples({LLAMA_EXAMPLE_BENCH})); - add_opt(common_arg( - {"-npl"}, "n0,n1,...", - "number of parallel prompts", - [](common_params & params, const std::string & value) { - auto p = string_split(value, ','); - params.n_pl.insert(params.n_pl.end(), p.begin(), p.end()); - } - ).set_examples({LLAMA_EXAMPLE_BENCH})); - add_opt(common_arg( - {"--embd-normalize"}, "N", - string_format("normalisation for embeddings (default: %d) (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)", params.embd_normalize), - [](common_params & params, int value) { - params.embd_normalize = value; - } - ).set_examples({LLAMA_EXAMPLE_EMBEDDING})); - add_opt(common_arg( - {"--embd-output-format"}, "FORMAT", - "empty = default, \"array\" = [[],[]...], \"json\" = openai style, \"json+\" = same \"json\" + cosine similarity matrix", - [](common_params & params, const std::string & value) { - params.embd_out = value; - } - ).set_examples({LLAMA_EXAMPLE_EMBEDDING})); - add_opt(common_arg( - {"--embd-separator"}, "STRING", - "separator of embeddings (default \\n) for example \"<#sep#>\"", - [](common_params & params, const std::string & value) { - params.embd_sep = value; - } - ).set_examples({LLAMA_EXAMPLE_EMBEDDING})); - add_opt(common_arg( - {"--host"}, "HOST", - string_format("ip address to listen (default: %s)", params.hostname.c_str()), - [](common_params & params, const std::string & value) { - params.hostname = value; - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_HOST")); - add_opt(common_arg( - {"--port"}, "PORT", - string_format("port to listen (default: %d)", params.port), - [](common_params & params, int value) { - params.port = value; - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_PORT")); - add_opt(common_arg( - {"--path"}, "PATH", - string_format("path to serve static files from (default: %s)", params.public_path.c_str()), - [](common_params & params, const std::string & value) { - params.public_path = value; - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_STATIC_PATH")); - add_opt(common_arg( - {"--no-webui"}, - string_format("Disable the Web UI (default: %s)", params.webui ? "enabled" : "disabled"), - [](common_params & params) { - params.webui = false; - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_WEBUI")); - add_opt(common_arg( - {"--embedding", "--embeddings"}, - string_format("restrict to only support embedding use case; use only with dedicated embedding models (default: %s)", params.embedding ? "enabled" : "disabled"), - [](common_params & params) { - params.embedding = true; - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_EMBEDDINGS")); - add_opt(common_arg( - {"--reranking", "--rerank"}, - string_format("enable reranking endpoint on server (default: %s)", params.reranking ? "enabled" : "disabled"), - [](common_params & params) { - params.reranking = true; - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_RERANKING")); - add_opt(common_arg( - {"--api-key"}, "KEY", - "API key to use for authentication (default: none)", - [](common_params & params, const std::string & value) { - params.api_keys.push_back(value); - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_API_KEY")); - add_opt(common_arg( - {"--api-key-file"}, "FNAME", - "path to file containing API keys (default: none)", - [](common_params & params, const std::string & value) { - std::ifstream key_file(value); - if (!key_file) { - throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); - } - std::string key; - while (std::getline(key_file, key)) { - if (!key.empty()) { - params.api_keys.push_back(key); - } - } - key_file.close(); - } - ).set_examples({LLAMA_EXAMPLE_SERVER})); - add_opt(common_arg( - {"--ssl-key-file"}, "FNAME", - "path to file a PEM-encoded SSL private key", - [](common_params & params, const std::string & value) { - params.ssl_file_key = value; - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SSL_KEY_FILE")); - add_opt(common_arg( - {"--ssl-cert-file"}, "FNAME", - "path to file a PEM-encoded SSL certificate", - [](common_params & params, const std::string & value) { - params.ssl_file_cert = value; - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_SSL_CERT_FILE")); - add_opt(common_arg( - {"-to", "--timeout"}, "N", - string_format("server read/write timeout in seconds (default: %d)", params.timeout_read), - [](common_params & params, int value) { - params.timeout_read = value; - params.timeout_write = value; - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_TIMEOUT")); - add_opt(common_arg( - {"--threads-http"}, "N", - string_format("number of threads used to process HTTP requests (default: %d)", params.n_threads_http), - [](common_params & params, int value) { - params.n_threads_http = value; - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_THREADS_HTTP")); - add_opt(common_arg( - {"--cache-reuse"}, "N", - string_format("min chunk size to attempt reusing from the cache via KV shifting (default: %d)", params.n_cache_reuse), - [](common_params & params, int value) { - params.n_cache_reuse = value; - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CACHE_REUSE")); - add_opt(common_arg( - {"--metrics"}, - string_format("enable prometheus compatible metrics endpoint (default: %s)", params.endpoint_metrics ? "enabled" : "disabled"), - [](common_params & params) { - params.endpoint_metrics = true; - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ENDPOINT_METRICS")); - add_opt(common_arg( - {"--slots"}, - string_format("enable slots monitoring endpoint (default: %s)", params.endpoint_slots ? "enabled" : "disabled"), - [](common_params & params) { - params.endpoint_slots = true; - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ENDPOINT_SLOTS")); - add_opt(common_arg( - {"--props"}, - string_format("enable changing global properties via POST /props (default: %s)", params.endpoint_props ? "enabled" : "disabled"), - [](common_params & params) { - params.endpoint_props = true; - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_ENDPOINT_PROPS")); - add_opt(common_arg( - {"--no-slots"}, - "disables slots monitoring endpoint", - [](common_params & params) { - params.endpoint_slots = false; - } - ).set_examples({LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_NO_ENDPOINT_SLOTS")); - add_opt(common_arg( - {"--slot-save-path"}, "PATH", - "path to save slot kv cache (default: disabled)", - [](common_params & params, const std::string & value) { - params.slot_save_path = value; - // if doesn't end with DIRECTORY_SEPARATOR, add it - if (!params.slot_save_path.empty() && params.slot_save_path[params.slot_save_path.size() - 1] != DIRECTORY_SEPARATOR) { - params.slot_save_path += DIRECTORY_SEPARATOR; - } - } - ).set_examples({LLAMA_EXAMPLE_SERVER})); - add_opt(common_arg( - {"--jinja"}, - "use jinja template for chat (default: disabled)", - [](common_params & params) { - params.use_jinja = true; - } - ).set_examples({LLAMA_EXAMPLE_SERVER, LLAMA_EXAMPLE_MAIN}).set_env("LLAMA_ARG_JINJA")); - add_opt(common_arg( - {"--chat-template"}, "JINJA_TEMPLATE", - string_format( - "set custom jinja chat template (default: template taken from model's metadata)\n" - "if suffix/prefix are specified, template will be disabled\n" - "only commonly used templates are accepted (unless --jinja is set before this flag):\n" - "list of built-in templates:\n%s", list_builtin_chat_templates().c_str() - ), - [](common_params & params, const std::string & value) { - params.chat_template = value; - } - ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE")); - add_opt(common_arg( - {"--chat-template-file"}, "JINJA_TEMPLATE_FILE", - string_format( - "set custom jinja chat template file (default: template taken from model's metadata)\n" - "if suffix/prefix are specified, template will be disabled\n" - "only commonly used templates are accepted (unless --jinja is set before this flag):\n" - "list of built-in templates:\n%s", list_builtin_chat_templates().c_str() - ), - [](common_params & params, const std::string & value) { - std::ifstream file(value); - if (!file) { - throw std::runtime_error(string_format("error: failed to open file '%s'\n", value.c_str())); - } - std::copy( - std::istreambuf_iterator(file), - std::istreambuf_iterator(), - std::back_inserter(params.chat_template)); - } - ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CHAT_TEMPLATE_FILE")); - add_opt(common_arg( - {"-sps", "--slot-prompt-similarity"}, "SIMILARITY", - string_format("how much the prompt of a request must match the prompt of a slot in order to use that slot (default: %.2f, 0.0 = disabled)\n", params.slot_prompt_similarity), - [](common_params & params, const std::string & value) { - params.slot_prompt_similarity = std::stof(value); - } - ).set_examples({LLAMA_EXAMPLE_SERVER})); - add_opt(common_arg( - {"--lora-init-without-apply"}, - string_format("load LoRA adapters without applying them (apply later via POST /lora-adapters) (default: %s)", params.lora_init_without_apply ? "enabled" : "disabled"), - [](common_params & params) { - params.lora_init_without_apply = true; - } - ).set_examples({LLAMA_EXAMPLE_SERVER})); - add_opt(common_arg( - {"--simple-io"}, - "use basic IO for better compatibility in subprocesses and limited consoles", - [](common_params & params) { - params.simple_io = true; - } - ).set_examples({LLAMA_EXAMPLE_MAIN, LLAMA_EXAMPLE_INFILL})); - add_opt(common_arg( - {"--positive-file"}, "FNAME", - string_format("positive prompts file, one prompt per line (default: '%s')", params.cvector_positive_file.c_str()), - [](common_params & params, const std::string & value) { - params.cvector_positive_file = value; - } - ).set_examples({LLAMA_EXAMPLE_CVECTOR_GENERATOR})); - add_opt(common_arg( - {"--negative-file"}, "FNAME", - string_format("negative prompts file, one prompt per line (default: '%s')", params.cvector_negative_file.c_str()), - [](common_params & params, const std::string & value) { - params.cvector_negative_file = value; - } - ).set_examples({LLAMA_EXAMPLE_CVECTOR_GENERATOR})); - add_opt(common_arg( - {"--pca-batch"}, "N", - string_format("batch size used for PCA. Larger batch runs faster, but uses more memory (default: %d)", params.n_pca_batch), - [](common_params & params, int value) { - params.n_pca_batch = value; - } - ).set_examples({LLAMA_EXAMPLE_CVECTOR_GENERATOR})); - add_opt(common_arg( - {"--pca-iter"}, "N", - string_format("number of iterations used for PCA (default: %d)", params.n_pca_iterations), - [](common_params & params, int value) { - params.n_pca_iterations = value; - } - ).set_examples({LLAMA_EXAMPLE_CVECTOR_GENERATOR})); - add_opt(common_arg( - {"--method"}, "{pca, mean}", - "dimensionality reduction method to be used (default: pca)", - [](common_params & params, const std::string & value) { - /**/ if (value == "pca") { params.cvector_dimre_method = DIMRE_METHOD_PCA; } - else if (value == "mean") { params.cvector_dimre_method = DIMRE_METHOD_MEAN; } - else { throw std::invalid_argument("invalid value"); } - } - ).set_examples({LLAMA_EXAMPLE_CVECTOR_GENERATOR})); - add_opt(common_arg( - {"--output-format"}, "{md,jsonl}", - "output format for batched-bench results (default: md)", - [](common_params & params, const std::string & value) { - /**/ if (value == "jsonl") { params.batched_bench_output_jsonl = true; } - else if (value == "md") { params.batched_bench_output_jsonl = false; } - else { std::invalid_argument("invalid value"); } - } - ).set_examples({LLAMA_EXAMPLE_BENCH})); - add_opt(common_arg( - {"--log-disable"}, - "Log disable", - [](common_params &) { - common_log_pause(common_log_main()); - } - )); - add_opt(common_arg( - {"--log-file"}, "FNAME", - "Log to file", - [](common_params &, const std::string & value) { - common_log_set_file(common_log_main(), value.c_str()); - } - )); - add_opt(common_arg( - {"--log-colors"}, - "Enable colored logging", - [](common_params &) { - common_log_set_colors(common_log_main(), true); - } - ).set_env("LLAMA_LOG_COLORS")); - add_opt(common_arg( - {"-v", "--verbose", "--log-verbose"}, - "Set verbosity level to infinity (i.e. log all messages, useful for debugging)", - [](common_params & params) { - params.verbosity = INT_MAX; - common_log_set_verbosity_thold(INT_MAX); - } - )); - add_opt(common_arg( - {"-lv", "--verbosity", "--log-verbosity"}, "N", - "Set the verbosity threshold. Messages with a higher verbosity will be ignored.", - [](common_params & params, int value) { - params.verbosity = value; - common_log_set_verbosity_thold(value); - } - ).set_env("LLAMA_LOG_VERBOSITY")); - add_opt(common_arg( - {"--log-prefix"}, - "Enable prefx in log messages", - [](common_params &) { - common_log_set_prefix(common_log_main(), true); - } - ).set_env("LLAMA_LOG_PREFIX")); - add_opt(common_arg( - {"--log-timestamps"}, - "Enable timestamps in log messages", - [](common_params &) { - common_log_set_timestamps(common_log_main(), true); - } - ).set_env("LLAMA_LOG_TIMESTAMPS")); - - // speculative parameters - add_opt(common_arg( - {"-td", "--threads-draft"}, "N", - "number of threads to use during generation (default: same as --threads)", - [](common_params & params, int value) { - params.speculative.cpuparams.n_threads = value; - if (params.speculative.cpuparams.n_threads <= 0) { - params.speculative.cpuparams.n_threads = std::thread::hardware_concurrency(); - } - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); - add_opt(common_arg( - {"-tbd", "--threads-batch-draft"}, "N", - "number of threads to use during batch and prompt processing (default: same as --threads-draft)", - [](common_params & params, int value) { - params.speculative.cpuparams_batch.n_threads = value; - if (params.speculative.cpuparams_batch.n_threads <= 0) { - params.speculative.cpuparams_batch.n_threads = std::thread::hardware_concurrency(); - } - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); - add_opt(common_arg( - {"-Cd", "--cpu-mask-draft"}, "M", - "Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)", - [](common_params & params, const std::string & mask) { - params.speculative.cpuparams.mask_valid = true; - if (!parse_cpu_mask(mask, params.speculative.cpuparams.cpumask)) { - throw std::invalid_argument("invalid cpumask"); - } - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); - add_opt(common_arg( - {"-Crd", "--cpu-range-draft"}, "lo-hi", - "Ranges of CPUs for affinity. Complements --cpu-mask-draft", - [](common_params & params, const std::string & range) { - params.speculative.cpuparams.mask_valid = true; - if (!parse_cpu_range(range, params.speculative.cpuparams.cpumask)) { - throw std::invalid_argument("invalid range"); - } - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); - add_opt(common_arg( - {"--cpu-strict-draft"}, "<0|1>", - "Use strict CPU placement for draft model (default: same as --cpu-strict)", - [](common_params & params, int value) { - params.speculative.cpuparams.strict_cpu = value; - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); - add_opt(common_arg( - {"--prio-draft"}, "N", - string_format("set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.speculative.cpuparams.priority), - [](common_params & params, int prio) { - if (prio < 0 || prio > 3) { - throw std::invalid_argument("invalid value"); - } - params.speculative.cpuparams.priority = (enum ggml_sched_priority) prio; - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); - add_opt(common_arg( - {"--poll-draft"}, "<0|1>", - "Use polling to wait for draft model work (default: same as --poll])", - [](common_params & params, int value) { - params.speculative.cpuparams.poll = value; - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); - add_opt(common_arg( - {"-Cbd", "--cpu-mask-batch-draft"}, "M", - "Draft model CPU affinity mask. Complements cpu-range-draft (default: same as --cpu-mask)", - [](common_params & params, const std::string & mask) { - params.speculative.cpuparams_batch.mask_valid = true; - if (!parse_cpu_mask(mask, params.speculative.cpuparams_batch.cpumask)) { - throw std::invalid_argument("invalid cpumask"); - } - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); - add_opt(common_arg( - {"-Crbd", "--cpu-range-batch-draft"}, "lo-hi", - "Ranges of CPUs for affinity. Complements --cpu-mask-draft-batch)", - [](common_params & params, const std::string & range) { - params.speculative.cpuparams_batch.mask_valid = true; - if (!parse_cpu_range(range, params.speculative.cpuparams_batch.cpumask)) { - throw std::invalid_argument("invalid cpumask"); - } - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); - add_opt(common_arg( - {"--cpu-strict-batch-draft"}, "<0|1>", - "Use strict CPU placement for draft model (default: --cpu-strict-draft)", - [](common_params & params, int value) { - params.speculative.cpuparams_batch.strict_cpu = value; - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); - add_opt(common_arg( - {"--prio-batch-draft"}, "N", - string_format("set draft process/thread priority : 0-normal, 1-medium, 2-high, 3-realtime (default: %d)\n", params.speculative.cpuparams_batch.priority), - [](common_params & params, int prio) { - if (prio < 0 || prio > 3) { - throw std::invalid_argument("invalid value"); - } - params.speculative.cpuparams_batch.priority = (enum ggml_sched_priority) prio; - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); - add_opt(common_arg( - {"--poll-batch-draft"}, "<0|1>", - "Use polling to wait for draft model work (default: --poll-draft)", - [](common_params & params, int value) { - params.speculative.cpuparams_batch.poll = value; - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); - add_opt(common_arg( - {"--draft-max", "--draft", "--draft-n"}, "N", - string_format("number of tokens to draft for speculative decoding (default: %d)", params.speculative.n_max), - [](common_params & params, int value) { - params.speculative.n_max = value; - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_DRAFT_MAX")); - add_opt(common_arg( - {"--draft-min", "--draft-n-min"}, "N", - string_format("minimum number of draft tokens to use for speculative decoding (default: %d)", params.speculative.n_min), - [](common_params & params, int value) { - params.speculative.n_min = value; - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_DRAFT_MIN")); - add_opt(common_arg( - {"--draft-p-split"}, "P", - string_format("speculative decoding split probability (default: %.1f)", (double)params.speculative.p_split), - [](common_params & params, const std::string & value) { - params.speculative.p_split = std::stof(value); - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE}).set_env("LLAMA_ARG_DRAFT_P_SPLIT")); - add_opt(common_arg( - {"--draft-p-min"}, "P", - string_format("minimum speculative decoding probability (greedy) (default: %.1f)", (double)params.speculative.p_min), - [](common_params & params, const std::string & value) { - params.speculative.p_min = std::stof(value); - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_DRAFT_P_MIN")); - add_opt(common_arg( - {"-cd", "--ctx-size-draft"}, "N", - string_format("size of the prompt context for the draft model (default: %d, 0 = loaded from model)", params.speculative.n_ctx), - [](common_params & params, int value) { - params.speculative.n_ctx = value; - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CTX_SIZE_DRAFT")); - add_opt(common_arg( - {"-devd", "--device-draft"}, "", - "comma-separated list of devices to use for offloading the draft model (none = don't offload)\n" - "use --list-devices to see a list of available devices", - [](common_params & params, const std::string & value) { - params.speculative.devices = parse_device_list(value); - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER})); - add_opt(common_arg( - {"-ngld", "--gpu-layers-draft", "--n-gpu-layers-draft"}, "N", - "number of layers to store in VRAM for the draft model", - [](common_params & params, int value) { - params.speculative.n_gpu_layers = value; - if (!llama_supports_gpu_offload()) { - fprintf(stderr, "warning: no usable GPU found, --gpu-layers-draft option will be ignored\n"); - fprintf(stderr, "warning: one possible reason is that llama.cpp was compiled without GPU support\n"); - fprintf(stderr, "warning: consult docs/build.md for compilation instructions\n"); - } - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_N_GPU_LAYERS_DRAFT")); - add_opt(common_arg( - {"-md", "--model-draft"}, "FNAME", - "draft model for speculative decoding (default: unused)", - [](common_params & params, const std::string & value) { - params.speculative.model = value; - } - ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODEL_DRAFT")); - - add_opt(common_arg( - {"-mv", "--model-vocoder"}, "FNAME", - "vocoder model for audio generation (default: unused)", - [](common_params & params, const std::string & value) { - params.vocoder.model = value; - } - ).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER})); - add_opt(common_arg( - {"--tts-use-guide-tokens"}, - "Use guide tokens to improve TTS word recall", - [](common_params & params) { - params.vocoder.use_guide_tokens = true; - } - ).set_examples({LLAMA_EXAMPLE_TTS, LLAMA_EXAMPLE_SERVER})); - - // model-specific - add_opt(common_arg( - {"--tts-oute-default"}, - string_format("use default OuteTTS models (note: can download weights from the internet)"), - [](common_params & params) { - params.hf_repo = "OuteAI/OuteTTS-0.2-500M-GGUF"; - params.hf_file = "OuteTTS-0.2-500M-Q8_0.gguf"; - params.vocoder.hf_repo = "ggml-org/WavTokenizer"; - params.vocoder.hf_file = "WavTokenizer-Large-75-F16.gguf"; - } - ).set_examples({LLAMA_EXAMPLE_TTS})); - - add_opt(common_arg( - {"--embd-bge-small-en-default"}, - string_format("use default bge-small-en-v1.5 model (note: can download weights from the internet)"), - [](common_params & params) { - params.hf_repo = "ggml-org/bge-small-en-v1.5-Q8_0-GGUF"; - params.hf_file = "bge-small-en-v1.5-q8_0.gguf"; - params.pooling_type = LLAMA_POOLING_TYPE_NONE; - params.embd_normalize = 2; - params.n_ctx = 512; - params.verbose_prompt = true; - params.embedding = true; - } - ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER})); - - add_opt(common_arg( - {"--embd-e5-small-en-default"}, - string_format("use default e5-small-v2 model (note: can download weights from the internet)"), - [](common_params & params) { - params.hf_repo = "ggml-org/e5-small-v2-Q8_0-GGUF"; - params.hf_file = "e5-small-v2-q8_0.gguf"; - params.pooling_type = LLAMA_POOLING_TYPE_NONE; - params.embd_normalize = 2; - params.n_ctx = 512; - params.verbose_prompt = true; - params.embedding = true; - } - ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER})); - - add_opt(common_arg( - {"--embd-gte-small-default"}, - string_format("use default gte-small model (note: can download weights from the internet)"), - [](common_params & params) { - params.hf_repo = "ggml-org/gte-small-Q8_0-GGUF"; - params.hf_file = "gte-small-q8_0.gguf"; - params.pooling_type = LLAMA_POOLING_TYPE_NONE; - params.embd_normalize = 2; - params.n_ctx = 512; - params.verbose_prompt = true; - params.embedding = true; - } - ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER})); - - return ctx_arg; -} diff --git a/common/arg.h b/common/arg.h deleted file mode 100644 index 49ab8667b..000000000 --- a/common/arg.h +++ /dev/null @@ -1,80 +0,0 @@ -#pragma once - -#include "common.h" - -#include -#include -#include - -// -// CLI argument parsing -// - -struct common_arg { - std::set examples = {LLAMA_EXAMPLE_COMMON}; - std::set excludes = {}; - std::vector args; - const char * value_hint = nullptr; // help text or example for arg value - const char * value_hint_2 = nullptr; // for second arg value - const char * env = nullptr; - std::string help; - bool is_sparam = false; // is current arg a sampling param? - void (*handler_void) (common_params & params) = nullptr; - void (*handler_string) (common_params & params, const std::string &) = nullptr; - void (*handler_str_str)(common_params & params, const std::string &, const std::string &) = nullptr; - void (*handler_int) (common_params & params, int) = nullptr; - - common_arg( - const std::initializer_list & args, - const char * value_hint, - const std::string & help, - void (*handler)(common_params & params, const std::string &) - ) : args(args), value_hint(value_hint), help(help), handler_string(handler) {} - - common_arg( - const std::initializer_list & args, - const char * value_hint, - const std::string & help, - void (*handler)(common_params & params, int) - ) : args(args), value_hint(value_hint), help(help), handler_int(handler) {} - - common_arg( - const std::initializer_list & args, - const std::string & help, - void (*handler)(common_params & params) - ) : args(args), help(help), handler_void(handler) {} - - // support 2 values for arg - common_arg( - const std::initializer_list & args, - const char * value_hint, - const char * value_hint_2, - const std::string & help, - void (*handler)(common_params & params, const std::string &, const std::string &) - ) : args(args), value_hint(value_hint), value_hint_2(value_hint_2), help(help), handler_str_str(handler) {} - - common_arg & set_examples(std::initializer_list examples); - common_arg & set_excludes(std::initializer_list excludes); - common_arg & set_env(const char * env); - common_arg & set_sparam(); - bool in_example(enum llama_example ex); - bool is_exclude(enum llama_example ex); - bool get_value_from_env(std::string & output); - bool has_value_from_env(); - std::string to_string(); -}; - -struct common_params_context { - enum llama_example ex = LLAMA_EXAMPLE_COMMON; - common_params & params; - std::vector options; - void(*print_usage)(int, char **) = nullptr; - common_params_context(common_params & params) : params(params) {} -}; - -// parse input arguments from CLI -// if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message) -bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr); - -// function to be used by test-arg-parser -common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr); diff --git a/common/base64.hpp b/common/base64.hpp deleted file mode 100644 index 563247a6e..000000000 --- a/common/base64.hpp +++ /dev/null @@ -1,392 +0,0 @@ -/* -This is free and unencumbered software released into the public domain. - -Anyone is free to copy, modify, publish, use, compile, sell, or -distribute this software, either in source code form or as a compiled -binary, for any purpose, commercial or non-commercial, and by any -means. - -In jurisdictions that recognize copyright laws, the author or authors -of this software dedicate any and all copyright interest in the -software to the public domain. We make this dedication for the benefit -of the public at large and to the detriment of our heirs and -successors. We intend this dedication to be an overt act of -relinquishment in perpetuity of all present and future rights to this -software under copyright law. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. -IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR -OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -OTHER DEALINGS IN THE SOFTWARE. - -For more information, please refer to -*/ - -#ifndef PUBLIC_DOMAIN_BASE64_HPP_ -#define PUBLIC_DOMAIN_BASE64_HPP_ - -#include -#include -#include -#include - -class base64_error : public std::runtime_error -{ -public: - using std::runtime_error::runtime_error; -}; - -class base64 -{ -public: - enum class alphabet - { - /** the alphabet is detected automatically */ - auto_, - /** the standard base64 alphabet is used */ - standard, - /** like `standard` except that the characters `+` and `/` are replaced by `-` and `_` respectively*/ - url_filename_safe - }; - - enum class decoding_behavior - { - /** if the input is not padded, the remaining bits are ignored */ - moderate, - /** if a padding character is encounter decoding is finished */ - loose - }; - - /** - Encodes all the elements from `in_begin` to `in_end` to `out`. - - @warning The source and destination cannot overlap. The destination must be able to hold at least - `required_encode_size(std::distance(in_begin, in_end))`, otherwise the behavior depends on the output iterator. - - @tparam Input_iterator the source; the returned elements are cast to `std::uint8_t` and should not be greater than - 8 bits - @tparam Output_iterator the destination; the elements written to it are from the type `char` - @param in_begin the beginning of the source - @param in_end the ending of the source - @param out the destination iterator - @param alphabet which alphabet should be used - @returns the iterator to the next element past the last element copied - @throws see `Input_iterator` and `Output_iterator` - */ - template - static Output_iterator encode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out, - alphabet alphabet = alphabet::standard) - { - constexpr auto pad = '='; - const char* alpha = alphabet == alphabet::url_filename_safe - ? "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_" - : "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - - while (in_begin != in_end) { - std::uint8_t i0 = 0, i1 = 0, i2 = 0; - - // first character - i0 = static_cast(*in_begin); - ++in_begin; - - *out = alpha[i0 >> 2 & 0x3f]; - ++out; - - // part of first character and second - if (in_begin != in_end) { - i1 = static_cast(*in_begin); - ++in_begin; - - *out = alpha[((i0 & 0x3) << 4) | (i1 >> 4 & 0x0f)]; - ++out; - } else { - *out = alpha[(i0 & 0x3) << 4]; - ++out; - - // last padding - *out = pad; - ++out; - - // last padding - *out = pad; - ++out; - - break; - } - - // part of second character and third - if (in_begin != in_end) { - i2 = static_cast(*in_begin); - ++in_begin; - - *out = alpha[((i1 & 0xf) << 2) | (i2 >> 6 & 0x03)]; - ++out; - } else { - *out = alpha[(i1 & 0xf) << 2]; - ++out; - - // last padding - *out = pad; - ++out; - - break; - } - - // rest of third - *out = alpha[i2 & 0x3f]; - ++out; - } - - return out; - } - /** - Encodes a string. - - @param str the string that should be encoded - @param alphabet which alphabet should be used - @returns the encoded base64 string - @throws see base64::encode() - */ - static std::string encode(const std::string& str, alphabet alphabet = alphabet::standard) - { - std::string result; - - result.reserve(required_encode_size(str.length()) + 1); - - encode(str.begin(), str.end(), std::back_inserter(result), alphabet); - - return result; - } - /** - Encodes a char array. - - @param buffer the char array - @param size the size of the array - @param alphabet which alphabet should be used - @returns the encoded string - */ - static std::string encode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::standard) - { - std::string result; - - result.reserve(required_encode_size(size) + 1); - - encode(buffer, buffer + size, std::back_inserter(result), alphabet); - - return result; - } - /** - Decodes all the elements from `in_begin` to `in_end` to `out`. `in_begin` may point to the same location as `out`, - in other words: inplace decoding is possible. - - @warning The destination must be able to hold at least `required_decode_size(std::distance(in_begin, in_end))`, - otherwise the behavior depends on the output iterator. - - @tparam Input_iterator the source; the returned elements are cast to `char` - @tparam Output_iterator the destination; the elements written to it are from the type `std::uint8_t` - @param in_begin the beginning of the source - @param in_end the ending of the source - @param out the destination iterator - @param alphabet which alphabet should be used - @param behavior the behavior when an error was detected - @returns the iterator to the next element past the last element copied - @throws base64_error depending on the set behavior - @throws see `Input_iterator` and `Output_iterator` - */ - template - static Output_iterator decode(Input_iterator in_begin, Input_iterator in_end, Output_iterator out, - alphabet alphabet = alphabet::auto_, - decoding_behavior behavior = decoding_behavior::moderate) - { - //constexpr auto pad = '='; - std::uint8_t last = 0; - auto bits = 0; - - while (in_begin != in_end) { - auto c = *in_begin; - ++in_begin; - - if (c == '=') { - break; - } - - auto part = _base64_value(alphabet, c); - - // enough bits for one byte - if (bits + 6 >= 8) { - *out = (last << (8 - bits)) | (part >> (bits - 2)); - ++out; - - bits -= 2; - } else { - bits += 6; - } - - last = part; - } - - // check padding - if (behavior != decoding_behavior::loose) { - while (in_begin != in_end) { - auto c = *in_begin; - ++in_begin; - - if (c != '=') { - throw base64_error("invalid base64 character."); - } - } - } - - return out; - } - /** - Decodes a string. - - @param str the base64 encoded string - @param alphabet which alphabet should be used - @param behavior the behavior when an error was detected - @returns the decoded string - @throws see base64::decode() - */ - static std::string decode(const std::string& str, alphabet alphabet = alphabet::auto_, - decoding_behavior behavior = decoding_behavior::moderate) - { - std::string result; - - result.reserve(max_decode_size(str.length())); - - decode(str.begin(), str.end(), std::back_inserter(result), alphabet, behavior); - - return result; - } - /** - Decodes a string. - - @param buffer the base64 encoded buffer - @param size the size of the buffer - @param alphabet which alphabet should be used - @param behavior the behavior when an error was detected - @returns the decoded string - @throws see base64::decode() - */ - static std::string decode(const char* buffer, std::size_t size, alphabet alphabet = alphabet::auto_, - decoding_behavior behavior = decoding_behavior::moderate) - { - std::string result; - - result.reserve(max_decode_size(size)); - - decode(buffer, buffer + size, std::back_inserter(result), alphabet, behavior); - - return result; - } - /** - Decodes a string inplace. - - @param[in,out] str the base64 encoded string - @param alphabet which alphabet should be used - @param behavior the behavior when an error was detected - @throws base64::decode_inplace() - */ - static void decode_inplace(std::string& str, alphabet alphabet = alphabet::auto_, - decoding_behavior behavior = decoding_behavior::moderate) - { - str.resize(decode(str.begin(), str.end(), str.begin(), alphabet, behavior) - str.begin()); - } - /** - Decodes a char array inplace. - - @param[in,out] str the string array - @param size the length of the array - @param alphabet which alphabet should be used - @param behavior the behavior when an error was detected - @returns the pointer to the next element past the last element decoded - @throws base64::decode_inplace() - */ - static char* decode_inplace(char* str, std::size_t size, alphabet alphabet = alphabet::auto_, - decoding_behavior behavior = decoding_behavior::moderate) - { - return decode(str, str + size, str, alphabet, behavior); - } - /** - Returns the required decoding size for a given size. The value is calculated with the following formula: - - $$ - \lceil \frac{size}{4} \rceil \cdot 3 - $$ - - @param size the size of the encoded input - @returns the size of the resulting decoded buffer; this the absolute maximum - */ - static std::size_t max_decode_size(std::size_t size) noexcept - { - return (size / 4 + (size % 4 ? 1 : 0)) * 3; - } - /** - Returns the required encoding size for a given size. The value is calculated with the following formula: - - $$ - \lceil \frac{size}{3} \rceil \cdot 4 - $$ - - @param size the size of the decoded input - @returns the size of the resulting encoded buffer - */ - static std::size_t required_encode_size(std::size_t size) noexcept - { - return (size / 3 + (size % 3 ? 1 : 0)) * 4; - } - -private: - static std::uint8_t _base64_value(alphabet& alphabet, char c) - { - if (c >= 'A' && c <= 'Z') { - return c - 'A'; - } else if (c >= 'a' && c <= 'z') { - return c - 'a' + 26; - } else if (c >= '0' && c <= '9') { - return c - '0' + 52; - } - - // comes down to alphabet - if (alphabet == alphabet::standard) { - if (c == '+') { - return 62; - } else if (c == '/') { - return 63; - } - } else if (alphabet == alphabet::url_filename_safe) { - if (c == '-') { - return 62; - } else if (c == '_') { - return 63; - } - } // auto detect - else { - if (c == '+') { - alphabet = alphabet::standard; - - return 62; - } else if (c == '/') { - alphabet = alphabet::standard; - - return 63; - } else if (c == '-') { - alphabet = alphabet::url_filename_safe; - - return 62; - } else if (c == '_') { - alphabet = alphabet::url_filename_safe; - - return 63; - } - } - - throw base64_error("invalid base64 character."); - } -}; - -#endif // !PUBLIC_DOMAIN_BASE64_HPP_ diff --git a/common/build-info.cpp.in b/common/build-info.cpp.in deleted file mode 100644 index 0b945aa68..000000000 --- a/common/build-info.cpp.in +++ /dev/null @@ -1,4 +0,0 @@ -int LLAMA_BUILD_NUMBER = @BUILD_NUMBER@; -char const *LLAMA_COMMIT = "@BUILD_COMMIT@"; -char const *LLAMA_COMPILER = "@BUILD_COMPILER@"; -char const *LLAMA_BUILD_TARGET = "@BUILD_TARGET@"; diff --git a/common/chat-template.hpp b/common/chat-template.hpp deleted file mode 100644 index 882ba41bd..000000000 --- a/common/chat-template.hpp +++ /dev/null @@ -1,529 +0,0 @@ -/* - Copyright 2024 Google LLC - - Use of this source code is governed by an MIT-style - license that can be found in the LICENSE file or at - https://opensource.org/licenses/MIT. -*/ -// SPDX-License-Identifier: MIT -#pragma once - -#include "minja.hpp" -#include -#include -#include - -using json = nlohmann::ordered_json; - -namespace minja { - -struct chat_template_caps { - bool supports_tools = false; - bool supports_tool_calls = false; - bool supports_tool_responses = false; - bool supports_system_role = false; - bool supports_parallel_tool_calls = false; - bool supports_tool_call_id = false; - // meta-llama/Llama-3.1-8B-Instruct expects arguments to be an object. - // Most other templates (and OpenAI's API) expect the arguments object to be stringified. - bool requires_object_arguments = false; - // CohereForAI/c4ai-command-r-plus simple variant - bool requires_non_null_content = false; - // MiniMaxAI/MiniMax-Text-01 special - bool requires_typed_content = false; -}; - -struct chat_template_inputs { - nlohmann::ordered_json messages; - nlohmann::ordered_json tools; - bool add_generation_prompt = true; - nlohmann::ordered_json extra_context; - std::chrono::system_clock::time_point now = std::chrono::system_clock::now(); -}; - -struct chat_template_options { - bool apply_polyfills = true; - bool use_bos_token = true; - bool use_eos_token = true; - bool define_strftime_now = true; - - bool polyfill_tools = true; - bool polyfill_tool_call_examples = true; - bool polyfill_tool_calls = true; - bool polyfill_tool_responses = true; - bool polyfill_system_role = true; - bool polyfill_object_arguments = true; - bool polyfill_typed_content = true; -}; - -class chat_template { - - private: - chat_template_caps caps_; - std::string source_; - std::string bos_token_; - std::string eos_token_; - std::shared_ptr template_root_; - std::string tool_call_example_; - - std::string try_raw_render( - const nlohmann::ordered_json & messages, - const nlohmann::ordered_json & tools, - bool add_generation_prompt, - const nlohmann::ordered_json & extra_context = nlohmann::ordered_json()) const - { - try { - chat_template_inputs inputs; - inputs.messages = messages; - inputs.tools = tools; - inputs.add_generation_prompt = add_generation_prompt; - inputs.extra_context = extra_context; - // Use fixed date for tests - inputs.now = std::chrono::system_clock::from_time_t(0); - - chat_template_options opts; - opts.apply_polyfills = false; - - auto prompt = apply(inputs, opts); - // fprintf(stderr, "try_raw_render: %s\n", prompt.c_str()); - return prompt; - } catch (const std::exception & e) { - // fprintf(stderr, "try_raw_render error: %s\n", e.what()); - return ""; - } - } - - public: - - chat_template(const std::string & source, const std::string & bos_token, const std::string & eos_token) - : source_(source), bos_token_(bos_token), eos_token_(eos_token) - { - template_root_ = minja::Parser::parse(source_, { - /* .trim_blocks = */ true, - /* .lstrip_blocks = */ true, - /* .keep_trailing_newline = */ false, - }); - - auto contains = [](const std::string & haystack, const std::string & needle) { - return haystack.find(needle) != std::string::npos; - }; - - const std::string user_needle = ""; - const std::string sys_needle = ""; - const json dummy_str_user_msg = {{"role", "user"}, {"content", user_needle}}; - const json dummy_typed_user_msg = {{"role", "user"}, {"content", json::array({{{"type", "text"}, {"text", user_needle}}})}}; - - caps_.requires_typed_content = - !contains(try_raw_render(json::array({dummy_str_user_msg}), {}, false), user_needle) - && contains(try_raw_render(json::array({dummy_typed_user_msg}), {}, false), user_needle); - - const auto dummy_user_msg = caps_.requires_typed_content - ? dummy_typed_user_msg - : dummy_str_user_msg; - const json needle_system_msg = { - {"role", "system"}, - {"content", caps_.requires_typed_content ? json::array({{{"type", "text"}, {"text", sys_needle}}}) : json(sys_needle)}, - }; - - caps_.supports_system_role = contains(try_raw_render({needle_system_msg, dummy_user_msg,}, {}, false), sys_needle); - - auto out = try_raw_render(json::array({ - dummy_user_msg - }), json::array({ - { - {"name", "some_tool"}, - {"type", "function"}, - {"function", { - {"name", "some_tool"}, - {"description", "Some tool."}, - {"parameters", { - {"type", "object"}, - {"properties", { - {"arg", { - {"type", "string"}, - {"description", "Some argument."}, - }}, - }}, - {"required", json::array({ "arg" })}, - }}, - }}, - }, - }), false); - caps_.supports_tools = contains(out, "some_tool"); - - auto make_tool_calls_msg = [&](const json & tool_calls) { - return json { - {"role", "assistant"}, - {"content", nullptr}, - {"tool_calls", tool_calls}, - }; - }; - auto make_tool_call = [](const std::string & tool_name, const json & arguments) { - return json { - {"id", "call_1___"}, - {"type", "function"}, - {"function", { - {"arguments", arguments}, - {"name", tool_name}, - }}, - }; - }; - const json dummy_args_obj {{"argument_needle", "print('Hello, World!')"}}; - - // Note: the arguments are rendered in both cases, but may be double-escaped, which we don't want. - out = try_raw_render(json::array({ - dummy_user_msg, - make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj.dump())})), - }), {}, false); - auto tool_call_renders_str_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':"); - out = try_raw_render(json::array({ - dummy_user_msg, - make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj)})), - }), {}, false); - auto tool_call_renders_obj_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':"); - - caps_.supports_tool_calls = tool_call_renders_str_arguments || tool_call_renders_obj_arguments; - caps_.requires_object_arguments = !tool_call_renders_str_arguments && tool_call_renders_obj_arguments; - auto out_empty = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", ""}}}), {}, false); - auto out_null = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", nullptr}}}), {}, false); - caps_.requires_non_null_content = contains(out_empty, user_needle) && !contains(out_null, user_needle); - - if (caps_.supports_tool_calls) { - auto dummy_args = caps_.requires_object_arguments ? dummy_args_obj : json(dummy_args_obj.dump()); - auto tc1 = make_tool_call("test_tool1", dummy_args); - auto tc2 = make_tool_call("test_tool2", dummy_args); - auto out = try_raw_render(json::array({ - dummy_user_msg, - make_tool_calls_msg(json::array({tc1, tc2})), - }), {}, false); - caps_.supports_parallel_tool_calls = contains(out, "test_tool1") && contains(out, "test_tool2"); - - out = try_raw_render(json::array({ - dummy_user_msg, - make_tool_calls_msg(json::array({tc1})), - { - {"role", "tool"}, - {"name", "test_tool1"}, - {"content", "Some response!"}, - {"tool_call_id", "call_911_"}, - } - }), {}, false); - caps_.supports_tool_responses = contains(out, "Some response!"); - caps_.supports_tool_call_id = contains(out, "call_911_"); - } - - try { - if (!caps_.supports_tools) { - const json user_msg { - {"role", "user"}, - {"content", "Hey"}, - }; - const json args { - {"arg1", "some_value"}, - }; - const json tool_call_msg { - {"role", "assistant"}, - {"content", nullptr}, - {"tool_calls", json::array({ - { - // TODO: detect if requires numerical id or fixed length == 6 like Nemo - {"id", "call_1___"}, - {"type", "function"}, - {"function", { - {"name", "tool_name"}, - {"arguments", (caps_.requires_object_arguments ? args : json(minja::Value(args).dump(-1, /* to_json= */ true)))}, - }}, - }, - })}, - }; - std::string prefix, full; - { - chat_template_inputs inputs; - inputs.messages = json::array({user_msg}); - inputs.add_generation_prompt = true; - prefix = apply(inputs); - } - { - chat_template_inputs inputs; - inputs.messages = json::array({user_msg, tool_call_msg}); - inputs.add_generation_prompt = false; - full = apply(inputs); - } - auto eos_pos_last = full.rfind(eos_token_); - if (eos_pos_last == prefix.size() - eos_token_.size() || - (full[full.size() - 1] == '\n' && (eos_pos_last == full.size() - eos_token_.size() - 1))) { - full = full.substr(0, eos_pos_last); - } - size_t common_prefix_length = 0; - for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) { - if (prefix[i] != full[i]) { - break; - } - if (prefix[i] == '<') { - // DeepSeek R1's template (as of 20250209) adds a trailing if add_generation_prompt, - // but it removes thinking tags for past messages. - // The prefix and full strings diverge at vs. <|tool▁calls▁begin|>, we avoid consuming the leading <. - continue; - } - common_prefix_length = i + 1; - } - auto example = full.substr(common_prefix_length); - if (example.find("tool_name") == std::string::npos && example.find("some_value") == std::string::npos) { - fprintf(stderr, "Failed to infer a tool call example (possible template bug)\n"); - } else { - tool_call_example_ = example; - } - } - } catch (const std::exception & e) { - fprintf(stderr, "Failed to generate tool call example: %s\n", e.what()); - } - } - - const std::string & source() const { return source_; } - const std::string & bos_token() const { return bos_token_; } - const std::string & eos_token() const { return eos_token_; } - const chat_template_caps & original_caps() const { return caps_; } - - // Deprecated, please use the form with chat_template_inputs and chat_template_options - std::string apply( - const nlohmann::ordered_json & messages, - const nlohmann::ordered_json & tools, - bool add_generation_prompt, - const nlohmann::ordered_json & extra_context = nlohmann::ordered_json(), - bool apply_polyfills = true) - { - fprintf(stderr, "[%s] Deprecated!\n", __func__); - chat_template_inputs inputs; - inputs.messages = messages; - inputs.tools = tools; - inputs.add_generation_prompt = add_generation_prompt; - inputs.extra_context = extra_context; - inputs.now = std::chrono::system_clock::now(); - - chat_template_options opts; - opts.apply_polyfills = apply_polyfills; - - return apply(inputs, opts); - } - - std::string apply( - const chat_template_inputs & inputs, - const chat_template_options & opts = chat_template_options()) const - { - json actual_messages; - - auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); - auto has_tool_calls = false; - auto has_tool_responses = false; - auto has_string_content = false; - for (const auto & message : inputs.messages) { - if (message.contains("tool_calls") && !message["tool_calls"].is_null()) { - has_tool_calls = true; - } - if (message.contains("role") && message["role"] == "tool") { - has_tool_responses = true; - } - if (message.contains("content") && message["content"].is_string()) { - has_string_content = true; - } - } - - auto polyfill_system_role = opts.polyfill_system_role && !caps_.supports_system_role; - auto polyfill_tools = opts.polyfill_tools && has_tools && !caps_.supports_tools; - auto polyfill_tool_call_example = polyfill_tools && opts.polyfill_tool_call_examples; - auto polyfill_tool_calls = opts.polyfill_tool_calls && has_tool_calls && !caps_.supports_tool_calls; - auto polyfill_tool_responses = opts.polyfill_tool_responses && has_tool_responses && !caps_.supports_tool_responses; - auto polyfill_object_arguments = opts.polyfill_object_arguments && has_tool_calls && caps_.requires_object_arguments; - auto polyfill_typed_content = opts.polyfill_typed_content && has_string_content && caps_.requires_typed_content; - - auto needs_polyfills = opts.apply_polyfills && (false - || polyfill_system_role - || polyfill_tools - || polyfill_tool_calls - || polyfill_tool_responses - || polyfill_object_arguments - || polyfill_typed_content - ); - - if (needs_polyfills) { - actual_messages = json::array(); - - auto add_message = [&](const json & msg) { - if (polyfill_typed_content && msg.contains("content") && !msg.at("content").is_null() && msg.at("content").is_string()) { - actual_messages.push_back({ - {"role", msg.at("role")}, - {"content", {{ - {"type", "text"}, - {"text", msg.at("content")}, - }}}, - }); - } else { - actual_messages.push_back(msg); - } - }; - - std::string pending_system; - auto flush_sys = [&]() { - if (!pending_system.empty()) { - add_message({ - {"role", "user"}, - {"content", pending_system}, - }); - pending_system.clear(); - } - }; - - json adjusted_messages; - if (polyfill_tools) { - adjusted_messages = add_system(inputs.messages, - "You can call any of the following tools to satisfy the user's requests: " + minja::Value(inputs.tools).dump(2, /* to_json= */ true) + - (!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_ + "\n\n")); - } else { - adjusted_messages = inputs.messages; - } - - for (const auto & message_ : adjusted_messages) { - auto message = message_; - if (!message.contains("role") || !message.contains("content")) { - throw std::runtime_error("message must have 'role' and 'content' fields: " + message.dump()); - } - std::string role = message.at("role"); - - if (message.contains("tool_calls")) { - if (polyfill_object_arguments || polyfill_tool_calls) { - for (auto & tool_call : message.at("tool_calls")) { - if (tool_call["type"] == "function") { - auto & function = tool_call.at("function"); - auto & arguments = function.at("arguments"); - if (arguments.is_string()) { - try { - arguments = json::parse(arguments.get()); - } catch (const std::exception & ecvt) { - fprintf(stderr, "Failed to parse arguments: %s\n", ecvt.what()); - } - } - } - } - } - if (polyfill_tool_calls) { - auto content = message.at("content"); - auto tool_calls = json::array(); - for (const auto & tool_call : message.at("tool_calls")) { - if (tool_call.at("type") != "function") { - continue; - } - const auto & function = tool_call.at("function"); - auto tc = json { - {"name", function.at("name")}, - {"arguments", function.at("arguments")}, - }; - if (tool_call.contains("id")) { - tc["id"] = tool_call["id"]; - } - tool_calls.push_back(tc); - } - auto obj = json { - {"tool_calls", tool_calls}, - }; - if (!content.is_null() && content != "") { - obj["content"] = content; - } - message["content"] = obj.dump(2); - message.erase("tool_calls"); - } - } - if (polyfill_tool_responses && role == "tool") { - message["role"] = "user"; - auto obj = json { - {"tool_response", { - {"content", message.at("content")}, - }}, - }; - if (message.contains("name")) { - obj["tool_response"]["name"] = message.at("name"); - } - if (message.contains("tool_call_id")) { - obj["tool_response"]["tool_call_id"] = message.at("tool_call_id"); - } - message["content"] = obj.dump(2); - message.erase("name"); - } - - if (!message["content"].is_null() && polyfill_system_role) { - std::string content = message.at("content"); - if (role == "system") { - if (!pending_system.empty()) pending_system += "\n"; - pending_system += content; - continue; - } else { - if (role == "user") { - if (!pending_system.empty()) { - message["content"] = pending_system + (content.empty() ? "" : "\n" + content); - pending_system.clear(); - } - } else { - flush_sys(); - } - } - } - add_message(message); - } - flush_sys(); - } else { - actual_messages = inputs.messages; - } - - auto context = minja::Context::make(json({ - {"messages", actual_messages}, - {"add_generation_prompt", inputs.add_generation_prompt}, - })); - context->set("bos_token", opts.use_bos_token ? bos_token_ : ""); - context->set("eos_token", opts.use_eos_token ? eos_token_ : ""); - if (opts.define_strftime_now) { - auto now = inputs.now; - context->set("strftime_now", Value::callable([now](const std::shared_ptr &, minja::ArgumentsValue & args) { - args.expectArgs("strftime_now", {1, 1}, {0, 0}); - auto format = args.args[0].get(); - - auto time = std::chrono::system_clock::to_time_t(now); - auto local_time = *std::localtime(&time); - std::ostringstream ss; - ss << std::put_time(&local_time, format.c_str()); - return ss.str(); - })); - } - if (!inputs.tools.is_null()) { - context->set("tools", minja::Value(inputs.tools)); - } - if (!inputs.extra_context.is_null()) { - for (auto & kv : inputs.extra_context.items()) { - context->set(kv.key(), minja::Value(kv.value())); - } - } - - auto ret = template_root_->render(context); - // fprintf(stderr, "actual_messages: %s\n", actual_messages.dump(2).c_str()); - // fprintf(stderr, "apply: %s\n\n", ret.c_str()); - return ret; - } - - static nlohmann::ordered_json add_system(const nlohmann::ordered_json & messages, const std::string & system_prompt) { - json messages_with_system = messages; - - if (messages_with_system.size() > 0 && messages_with_system[0].at("role") == "system") { - std::string existing_system = messages_with_system.at(0).at("content"); - messages_with_system[0] = json { - {"role", "system"}, - {"content", existing_system + "\n\n" + system_prompt}, - }; - } else { - messages_with_system.insert(messages_with_system.begin(), json { - {"role", "system"}, - {"content", system_prompt}, - }); - } - return messages_with_system; - } -}; - -} // namespace minja diff --git a/common/chat.cpp b/common/chat.cpp deleted file mode 100644 index ef1c6fb3d..000000000 --- a/common/chat.cpp +++ /dev/null @@ -1,966 +0,0 @@ -#include "chat.hpp" -#include "chat-template.hpp" -#include "json-schema-to-grammar.h" -#include "log.h" -#include "minja.hpp" - -std::string common_chat_format_name(common_chat_format format) { - switch (format) { - case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only"; - case COMMON_CHAT_FORMAT_GENERIC: return "Generic"; - case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo"; - case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x"; - case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools"; - case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1"; - case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2"; - case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2"; - case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1"; - case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro"; - case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B"; - default: - throw std::runtime_error("Unknown chat format"); - } -} - -const common_grammar_options grammar_options { - /* .dotall = */ false, - /* .compact_spaces = */ false, - // /* .compact_spaces = */ true, -}; - -static bool parse_json(std::string::const_iterator & it, const std::string::const_iterator & end, json & out) { - // // https://json.nlohmann.me/features/parsing/sax_interface/ - struct json_error_locator : public nlohmann::json_sax { - std::size_t position; - bool found_error; - - json_error_locator() : position(0), found_error(false) {} - - bool parse_error(std::size_t position, const std::string &, const json::exception &) override { - this->position = position - 1; - this->found_error = true; - return false; - } - bool null() override { return true; } - bool boolean(bool) override { return true; } - bool number_integer(number_integer_t) override { return true; } - bool number_unsigned(number_unsigned_t) override { return true; } - bool number_float(number_float_t, const string_t &) override { return true; } - bool string(string_t &) override { return true; } - bool binary(binary_t &) override { return true; } - bool start_object(std::size_t) override { return true; } - bool key(string_t &) override { return true; } - bool end_object() override { return true; } - bool start_array(std::size_t) override { return true; } - bool end_array() override { return true; } - }; - json_error_locator err_loc; - json::sax_parse(it, end, &err_loc); - - std::string::const_iterator temptative_end; - if (err_loc.found_error) { - temptative_end = it + err_loc.position; - } else { - temptative_end = end; - } - std::string json_sub {it, temptative_end}; - try { - out = json::parse(json_sub); - it = temptative_end; - return true; - } catch (const std::exception &) { - return false; - } -} - - -/** - * Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between. - * Aggregates the prefix, suffix and in-between text into the content. - */ -static common_chat_msg parse_json_tool_calls( - const std::string& input, - const std::optional & trigger_opt, - const std::regex & function_regex, - const std::regex & close_regex) { - std::smatch match; - - common_chat_msg result; - result.role = "assistant"; - - - auto end = input.end(); - auto it = input.begin(); - - if (trigger_opt) { - if (!std::regex_search(it, end, match, *trigger_opt)) { - result.content = input; - return result; - } - result.content = match.prefix().str(); - it = match.suffix().first; - } - - while (it != end) { - std::sregex_iterator rend; - std::sregex_iterator rit(it, end, function_regex); - if (rit == rend) { - fprintf(stderr, "No more tool calls found\n"); - result.content += std::string(it, end); - break; - } - auto name = rit->str(1); - result.content += std::string(it, rit->prefix().second); - it = rit->suffix().first; - - json arguments; - if (!parse_json(it, end, arguments)) { - throw std::runtime_error("Failed to parse json tool call arguments"); - } - if (!std::regex_search(it, end, match, close_regex)) { - throw std::runtime_error("Malformed input, missing closing pattern"); - } - it = match.suffix().first; - result.tool_calls.push_back({name, arguments.is_string() ? arguments.get() : arguments.dump(), /* id= */ ""}); - } - return result; -} - -static common_chat_msg parse_prefixed_json_tool_call_array(const std::string& input, const std::string & prefix, size_t rstrip_prefix = 0) { - auto content_end = input.find(prefix); - size_t tc_start = std::string::npos; - - common_chat_msg result; - result.role = "assistant"; - const auto process_tool_calls = [&](const json & tool_calls) { - for (const auto & tool_call : tool_calls) { - const auto & arguments = tool_call["arguments"]; - result.tool_calls.push_back({ - tool_call["name"], - arguments.is_string() ? arguments.get() : arguments.dump(), - tool_call.contains("id") ? tool_call["id"] : "", - }); - } - }; - if (content_end == std::string::npos) { - result.content = input; - } else { - tc_start = content_end + prefix.size() - rstrip_prefix; - result.content = input.substr(0, content_end); - auto tool_calls = json::parse(input.substr(tc_start)); - process_tool_calls(tool_calls); - } - return result; -} - -static void foreach_function(const json & tools, const std::function & fn) { - for (const auto & tool : tools) { - if (!tool.contains("type") || tool["type"] != "function" || !tool.contains("function")) { - LOG_INF("Skipping tool without function: %s", tool.dump(2).c_str()); - continue; - } - fn(tool); - } -} - -static std::string apply( - const common_chat_template & tmpl, - const nlohmann::ordered_json & messages, - const nlohmann::ordered_json & tools, - bool add_generation_prompt, - const nlohmann::ordered_json & extra_context = nlohmann::ordered_json()) -{ - minja::chat_template_inputs tmpl_inputs; - tmpl_inputs.messages = messages; - tmpl_inputs.tools = tools; - tmpl_inputs.add_generation_prompt = add_generation_prompt; - tmpl_inputs.extra_context = extra_context; - // TODO: add flag to control date/time, if only for testing purposes. - // tmpl_inputs.now = std::chrono::system_clock::now(); - - minja::chat_template_options tmpl_opts; - tmpl_opts.use_bos_token = false; - tmpl_opts.use_eos_token = false; - - return tmpl.apply(tmpl_inputs, tmpl_opts); -} - -static common_chat_params common_chat_params_init_generic(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { - common_chat_params data; - - auto tool_call_schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool["function"]; - auto tool_schema = json { - {"type", "object"}, - {"properties", { - {"name", { - {"type", "string"}, - {"const", function["name"]}, - }}, - {"arguments", function["parameters"]}, - }}, - {"required", json::array({"name", "arguments"})}, - }; - if (function.contains("description")) { - tool_schema["description"] = function["description"]; - } - if (inputs.parallel_tool_calls) { - tool_schema["properties"]["id"] = { - {"type", "string"}, - {"minLength", 4}, - }; - tool_schema["required"].push_back("id"); - } - tool_call_schemas.emplace_back(tool_schema); - }); - const auto tool_call = - inputs.parallel_tool_calls - ? json { - {"type", "object"}, - {"properties", { - {"tool_calls", { - {"type", "array"}, - {"items", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json { - {"anyOf", tool_call_schemas}, - }}, - {"minItems", 1}, - }}, - }}, - {"required", json::array({"tool_calls"})}, - } - : json { - {"type", "object"}, - {"properties", { - {"tool_call", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json { - {"anyOf", tool_call_schemas}, - }}, - }}, - {"required", json::array({"tool_call"})}, - }; - const auto schema = - inputs.tool_choice != "required" - ? json { - {"anyOf", json::array({ - tool_call, - { - {"type", "object"}, - {"properties", { - {"response", inputs.json_schema.is_null() - ? json {{"type", "string"}} - : inputs.json_schema - }, - }}, - {"required", json::array({"response"})}, - }, - })} - } - : tool_call; - - data.grammar_lazy = false; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - builder.add_schema("root", schema); - }, grammar_options); - - auto tweaked_messages = common_chat_template::add_system( - inputs.messages, - "Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request"); - - data.prompt = apply(tmpl, tweaked_messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); - data.format = COMMON_CHAT_FORMAT_GENERIC; - return data; -} -static common_chat_msg common_chat_parse_generic(const std::string & input) { - json data = json::parse(input); - common_chat_msg result; - result.role = "assistant"; - if (data.contains("tool_calls")) { - for (const auto & tool_call : data["tool_calls"]) { - result.tool_calls.push_back({ - tool_call["name"], - tool_call["arguments"].dump(), - tool_call.contains("id") ? tool_call["id"] : "", - }); - } - } else if (data.contains("tool_call")) { - result.tool_calls.push_back({ - data["tool_call"]["name"], - data["tool_call"]["arguments"].dump(), - /* id= */ "", - }); - } else if (data.contains("response")) { - const auto & response = data["response"]; - result.content = response.is_string() ? response.get() : response.dump(2); - } - return result; -} - -static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { - common_chat_params data; - data.grammar_lazy = inputs.tool_choice != "required"; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - auto schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool["function"]; - schemas.push_back({ - {"type", "object"}, - {"properties", { - // Important note: the model is probably trained to take a JSON stringified arguments value. - // It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object. - {"name", { - {"type", "string"}, - {"const", function["name"]}, - }}, - {"arguments", function["parameters"]}, - {"id", { - {"type", "string"}, - // Nemo's template expects a 9-character alphanumeric ID. - {"pattern", "^[a-zA-Z0-9]{9}$"}, - }}, - }}, - {"required", json::array({"name", "arguments", "id"})}, - }); - }); - auto schema = json { - {"type", "array"}, - {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, - {"minItems", 1}, - }; - if (!inputs.parallel_tool_calls) { - schema["maxItems"] = 1; - } - builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema)); - }, grammar_options); - data.grammar_triggers.push_back({"[TOOL_CALLS]", /* .at_start = */ true}); - data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); - data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO; - return data; -} -static common_chat_msg common_chat_parse_mistral_nemo(const std::string & input) { - return parse_prefixed_json_tool_call_array(input, "[TOOL_CALLS]"); -} - -static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { - common_chat_params data; - data.grammar_lazy = inputs.tool_choice != "required"; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - auto schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool["function"]; - schemas.push_back({ - {"type", "object"}, - {"properties", { - {"tool_call_id", { - {"type", "string"}, - // Command-R's template expects an integer string. - {"pattern", "^[0-9]{1,10}$"}, - }}, - {"tool_name", { - {"type", "string"}, - {"const", function["name"]}, - }}, - {"parameters", function["parameters"]}, - }}, - {"required", json::array({"tool_call_id", "tool_name", "parameters"})}, - }); - }); - auto schema = json { - {"type", "array"}, - {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, - {"minItems", 1}, - }; - if (!inputs.parallel_tool_calls) { - schema["maxItems"] = 1; - } - builder.add_rule("root", "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\""); - }, grammar_options); - data.grammar_triggers.push_back({"<|START_ACTION|>", /* .at_start = */ false}); - data.preserved_tokens = { - "<|START_RESPONSE|>", - "<|END_RESPONSE|>", - "<|START_THINKING|>", - "<|END_THINKING|>", - "<|END_ACTION|>", - }; - data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); - data.format = COMMON_CHAT_FORMAT_COMMAND_R7B; - return data; -} -static common_chat_msg common_chat_parse_command_r7b(const std::string & input) { - static std::regex response_regex("<\\|START_RESPONSE\\|>([\\s\\S\\n\\r]*?)<\\|END_RESPONSE\\|>"); - static std::regex thought_action_regex("<\\|START_THINKING\\|>([\\s\\S\\n\\r]*?)<\\|END_THINKING\\|><\\|START_ACTION\\|>([\\s\\S\\n\\r]*?)<\\|END_ACTION\\|>"); - std::smatch match; - - common_chat_msg result; - result.role = "assistant"; - if (std::regex_match(input, match, response_regex)) { - result.content = match[1].str(); - } else if (std::regex_match(input, match, thought_action_regex)) { - result.tool_plan = match[1].str(); - auto actions_str = match[2].str(); - auto actions = json::parse(actions_str); - for (const auto & action : actions) { - result.tool_calls.push_back({ - /* .name = */ action["tool_name"], - /* .arguments = */ action["parameters"].dump(), - /* .id = */ action["tool_call_id"], - }); - } - } else { - LOG_ERR("Failed to parse command_r output"); - result.content = input; - } - return result; -} - -static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector & expected_properties) { - if (!parameters.is_object() || !parameters.contains("type") || parameters["type"] != "object" || !parameters.contains("properties") || !parameters.contains("required")) { - throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties"); - } - const auto & parameters_properties = parameters.at("properties"); - const auto & parameters_required = parameters.at("required"); - for (const auto & prop : expected_properties) { - if (!parameters_properties.contains(prop)) { - throw std::runtime_error("Parameters of tool " + name + " is missing property: " + prop); - } - if (std::find(parameters_required.begin(), parameters_required.end(), json(prop)) == parameters_required.end()) { - throw std::runtime_error("Parameters of tool " + name + " must have property marked as required: " + prop); - } - } - if (parameters_properties.size() != expected_properties.size()) { - throw std::runtime_error("Parameters of tool " + name + " must only have these properties:" + string_join(expected_properties, ", ")); - } -} - -static common_chat_params common_chat_params_init_llama_3_1_tool_calls(const common_chat_template & tmpl, const struct common_chat_inputs & inputs, bool allow_python_tag_builtin_tools) { - auto builtin_tools = json::array(); - common_chat_params data; - data.grammar_lazy = inputs.tool_choice != "required"; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; - - auto handle_builtin_tool = [&](const std::string & name, const json & parameters) { - if (name == "wolfram_alpha") { - // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py - expect_tool_parameters(name, parameters, {"query"}); - } else if (name == "web_search" || name == "brave_search") { - // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py - expect_tool_parameters(name, parameters, {"query"}); - } else if (name == "python" || name == "code_interpreter") { - // https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py - expect_tool_parameters(name, parameters, {"code"}); - } else { - return false; - } - - std::vector kvs; - for (const auto & [key, value] : parameters.at("properties").items()) { - kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value)); - } - - tool_rules.push_back( - builder.add_rule( - name + "-call", - "\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\"")); - builtin_tools.push_back(name); - - return true; - }; - - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool["function"]; - std::string name = function["name"]; - auto parameters = function["parameters"]; - builder.resolve_refs(parameters); - - // https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime - if (allow_python_tag_builtin_tools) { - handle_builtin_tool(name, parameters); - } - tool_rules.push_back( - builder.add_rule( - name + "-call", - "\"{\" space " - "( \"\\\"type\\\":\" space \"\\\"function\\\",\" space )? " - "\"\\\"name\\\": \\\"" + name + "\\\", \\\"parameters\\\": \" " + - builder.add_schema(name + "-args", parameters) + - " \"}\"")); - data.grammar_triggers.push_back({"{\"name\": \"" + name + "\"", /* .at_start = */ true}); - }); - data.grammar_triggers.push_back({"{\"name\":", /* .at_start = */ true}); - data.grammar_triggers.push_back({"{\n \"name\":", /* .at_start = */ true}); - data.grammar_triggers.push_back({"{\n \"name\":", /* .at_start = */ true}); - data.grammar_triggers.push_back({"{\"type\": \"function\"", /* .at_start = */ true}); - data.grammar_triggers.push_back({"{\n \"type\": \"function\"", /* .at_start = */ true}); - data.grammar_triggers.push_back({"{\n \"type\": \"function\"", /* .at_start = */ true}); - if (!builtin_tools.empty()) { - data.grammar_triggers.push_back({"<|python_tag|>", /* .at_start = */ false}); - } - builder.add_rule("root", string_join(tool_rules, " | ")); - }, grammar_options); - data.additional_stops.push_back("<|eom_id|>"); - data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, { - {"tools_in_user_message", false}, - {"builtin_tools", builtin_tools.empty() ? json() : builtin_tools}, - }); - data.format = allow_python_tag_builtin_tools && !builtin_tools.empty() - ? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS - : COMMON_CHAT_FORMAT_LLAMA_3_X; - return data; -} -static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bool with_builtin_tools = false) { - // TODO: tighten & simplify the parser, don't accept leading text context. - static std::regex function_regex("\\{[\\s\\n\\r]*(?:\"type\"[\\s\\n\\r]*:[\\s\\n\\r]*\"function\"[\\s\\n\\r]*,[\\s\\n\\r]*|[\\s\\n\\r]*)\"name\"[\\s\\n\\r]*:[\\s\\n\\r]*\"([^\"]+)\"[\\s\\n\\r]*,[\\s\\n\\r]*\"parameters\": "); - static std::regex close_regex("\\}"); - static std::regex builtin_call_regex("<\\|python_tag\\|>([^.(]+)\\.call\\((.*)\\)"); - - if (with_builtin_tools) { - std::smatch match; - if (std::regex_match(input, match, builtin_call_regex)) { - auto name = match[1].str(); - auto raw_args = match[2].str(); - - // TODO: if/when builtin tools start accepting more than 1 argument, use parse_json for real parsing. - auto it_eq = raw_args.find('='); - auto arg_name = raw_args.substr(0, it_eq); - auto arg_value_str = raw_args.substr(it_eq + 1); - auto arg_value = json::parse(arg_value_str); - - return { - /* .role = */ "assistant", - /* .content = */ match.prefix().str(), - /* .tool_calls = */ { - { - /* .name = */ match[1], - /* .arguments = */ (json { - {arg_name, arg_value}, - }).dump(), - /* .id = */ "", - }, - }, - }; - } - } - return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex); -} - -static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { - common_chat_params data; - data.grammar_lazy = inputs.tool_choice != "required"; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool["function"]; - std::string name = function["name"]; - auto parameters = function["parameters"]; - auto args_rule = builder.add_schema(name + "-args", parameters); - tool_rules.push_back(builder.add_rule(name + "-call", - "\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n```json\\n\" " + args_rule + " \"```<|tool▁call▁end|>\"")); - }); - data.grammar_triggers.push_back({"<|tool▁calls▁begin|>", /* .at_start = */ false}); - data.preserved_tokens = { - "<|tool▁sep|>", - "<|tool▁call▁end|>", - }; - builder.add_rule("root", "\"<|tool▁calls▁begin|>\" (" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " space"); - }, grammar_options); - auto prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); - data.prompt = prompt; - data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1; - return data; -} -static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input) { - static std::regex trigger_regex("<|tool▁calls▁begin|>"); - static std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n"); - static std::regex close_regex("```<|tool▁call▁end|>"); - return parse_json_tool_calls(input, trigger_regex, function_regex, close_regex); -} - -static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { - fprintf(stderr, "%s\n", __func__); - common_chat_params data; - data.prompt = apply(tmpl, inputs.messages, /* tools= */ nullptr, inputs.add_generation_prompt, { - {"datetime", "Jan 29 2025 13:00:00 GMT"}, - {"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))}, - }); - if (!inputs.tools.is_null() && !inputs.tools.empty()) { - data.grammar_lazy = inputs.tool_choice != "required"; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - auto schemas = json::array(); - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool["function"]; - schemas.push_back({ - {"type", "object"}, - {"properties", { - {"name", { - {"type", "string"}, - {"const", function["name"]}, - }}, - {"arguments", function["parameters"]}, - }}, - {"required", json::array({"name", "arguments", "id"})}, - }); - }); - auto schema = json { - {"type", "array"}, - {"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}}, - {"minItems", 1}, - }; - if (!inputs.parallel_tool_calls) { - schema["maxItems"] = 1; - } - builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema)); - }, grammar_options); - data.grammar_triggers.push_back({" functools[", /* .at_start = */ false}); - data.format = COMMON_CHAT_FORMAT_FIREFUNCTION_V2; - } else { - data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - } - return data; -} -static common_chat_msg common_chat_parse_firefunction_v2(const std::string & input) { - return parse_prefixed_json_tool_call_array(input, " functools[", /* rstrip_prefix= */ 1); -} - -static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { - // >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}... - // Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar - common_chat_params data; - data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); - data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2; - if (!inputs.tools.is_null() && !inputs.tools.empty()) { - data.grammar_lazy = inputs.tool_choice != "required"; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector first_tool_rules; - std::vector subsequent_tool_rules; - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool["function"]; - std::string name = function["name"]; - auto parameters = function["parameters"]; - auto args_rule = builder.add_schema(name + "-args", parameters); - first_tool_rules.push_back(builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule)); - subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>" + name + "\\n\" " + args_rule)); - data.grammar_triggers.push_back({name, /* .at_start = */ true}); - data.grammar_triggers.push_back({">>>" + name, /* .at_start = */ false}); - }); - auto first_rule = first_tool_rules.empty() ? "" : builder.add_rule("first_tool_call", string_join(first_tool_rules, " | ")) + " space"; - if (inputs.parallel_tool_calls) { - auto subsequent_rule = builder.add_rule("subsequent_tool_call", string_join(subsequent_tool_rules, " | ")) + " space"; - builder.add_rule("root", first_rule + " (" + subsequent_rule + ")*"); - } else { - builder.add_rule("root", first_rule); - } - - }, grammar_options); - } - return data; -} - -static bool consume(std::string::const_iterator & it, const std::string::const_iterator & end, const std::string & expected) { - auto expected_it = expected.begin(); - auto tmp_it = it; - while (tmp_it != end && expected_it != expected.end() && *tmp_it == *expected_it) { - ++tmp_it; - ++expected_it; - } - if (expected_it == expected.end()) { - it = tmp_it; - return true; - } - return false; -} - -static common_chat_msg common_chat_parse_functionary_v3_2(const std::string & input) { - static std::regex function_regex(R"((?:>>>)?(\w+)\n)"); - static std::regex close_regex(R"($|(?=>>>))"); - - std::string content; - auto it = input.begin(); - const auto end = input.end(); - - if (consume(it, end, "all\n")) { - std::smatch match; - if (std::regex_search(it, end, match, function_regex)) { - auto fun_it = match.prefix().second; - content = std::string(it, fun_it); - it = fun_it; - } else { - common_chat_msg res; - res.role = "assistant"; - res.content = std::string(it, end); - return res; - } - } - // TODO: tighten & simplify. - try { - auto res = parse_json_tool_calls(std::string(it, end), std::nullopt, function_regex, close_regex); - res.content = content + res.content; - return res; - } catch (const std::exception & e) { - LOG_ERR("Failed to parse functionary v3.2 input: %s\n", e.what()); - common_chat_msg res; - res.role = "assistant"; - res.content = input; - return res; - } -} - -static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { - // https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt - common_chat_params data; - json tools = inputs.tools.is_null() ? inputs.tools : json::array(); - std::string python_code_argument_name; - auto has_raw_python = false; - - data.grammar_lazy = inputs.tool_choice != "required"; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool["function"]; - const auto & parameters = function["parameters"]; - std::string name = function["name"]; - if (name == "python" || name == "ipython") { - if (!parameters.contains("type")) { - throw std::runtime_error("Missing type in python tool"); - } - has_raw_python = true; - auto type = parameters.at("type"); - if (type == "object") { - auto properties = parameters.at("properties"); - for (auto it = properties.begin(); it != properties.end(); ++it) { - if (it.value().at("type") == "string") { - if (!python_code_argument_name.empty()) { - throw std::runtime_error("Multiple string arguments found in python tool"); - } - python_code_argument_name = it.key(); - } - } - if (python_code_argument_name.empty()) { - throw std::runtime_error("No string argument found in python tool"); - } - } else if (type != "string") { - throw std::runtime_error("Invalid type in python tool: " + type.dump()); - } - } - tool_rules.push_back(builder.add_rule(name + "-call", "\"\" " + builder.add_schema(name + "-args", parameters) + " \"\" space")); - }); - if (has_raw_python) { - tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*")); - data.grammar_triggers.push_back({"<|python_tag|>", /* .at_start = */ false}); - } - auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space"; - builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call); - data.grammar_triggers.push_back({"([\s\S\n]*)$)"); - std::smatch match; - if (std::regex_search(input, match, python_tag_regex)) { - auto code = match[1].str(); - return { - /* .role = */ "assistant", - /* .content = */ match.prefix().str(), - /* .tool_calls = */ { - { - /* .name = */ "python", - /* .arguments = */ (json {{"code", code}}).dump(), - /* .id = */ "", - }, - } - }; - } - static std::regex function_regex(R"()"); - static std::regex close_regex(R"()"); - // TODO: tighten & simplify. - return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex); -} - -static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { - common_chat_params data; - // (content)?({"name": "foo", "arguments": {"a": 1}})* - data.grammar_lazy = inputs.tool_choice != "required"; - data.grammar = build_grammar([&](const common_grammar_builder & builder) { - std::vector tool_rules; - foreach_function(inputs.tools, [&](const json & tool) { - const auto & function = tool["function"]; - std::string name = function["name"]; - auto parameters = function["parameters"]; - builder.resolve_refs(parameters); - tool_rules.push_back(builder.add_schema(name + "-call", { - {"type", "object"}, - {"properties", json { - {"name", json {{"const", name}}}, - {"arguments", parameters}, - }}, - {"required", json::array({"name", "arguments"})}, - })); - }); - auto tool_call = "\"\" space " + builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " \"\" space"; - builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call); - data.grammar_triggers.push_back({"", /* .at_start = */ false}); - data.preserved_tokens = { "" }; - }, grammar_options); - - data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); - data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO; - return data; -} -static common_chat_msg common_chat_parse_hermes_2_pro(const std::string & input) { - try { - std::regex start_pattern(R"([\n\s]*)"); - std::regex middle_pattern(R"([\n\s]*[\n\s]*)"); - std::regex end_pattern(R"([\n\s]*[\n\s]*$)"); - - auto end = input.end(); - std::sregex_iterator rend; - std::sregex_iterator rit(input.begin(), end, start_pattern); - if (rit == rend) { - return { - /* .role = */ "assistant", - /* .content = */ input, - /* .tool_calls = */ {}, - }; - } - - common_chat_msg result; - result.role = "assistant"; - result.content = rit->prefix(); - - auto it = rit->suffix().first; - while (it != end) { - json call; - if (!parse_json(it, end, call)) { - throw std::runtime_error("Failed to parse json tool call"); - } - const auto & arguments = call["arguments"]; - result.tool_calls.push_back({ - call["name"], - arguments.dump(), - // arguments.is_string() ? arguments.get() : arguments.dump(), - /* id= */ "", - }); - rit = {it, end, middle_pattern}; - if (rit != rend) { - it = rit->suffix().first; - } else { - rit = {it, end, end_pattern}; - if (rit == rend) { - throw std::runtime_error("Malformed input, missing "); - } - break; - } - } - return result; - } catch (const std::exception & e) { - return { - /* .role = */ "assistant", - /* .content = */ input, - /* .tool_calls = */ {}, - }; - } -} - -static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { - common_chat_params data; - data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt); - data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - data.grammar_lazy = false; - if (!inputs.json_schema.is_null()) { - if (!inputs.grammar.empty()) { - throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both"); - } - data.grammar = json_schema_to_grammar(inputs.json_schema); - } else { - data.grammar = inputs.grammar.empty(); - } - return data; -} - -common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) { - auto has_tools = !inputs.tools.is_null() && inputs.tool_choice != "none"; - LOG_DBG("[%s] has_tools=%s\n", __func__, has_tools ? "true" : "false"); - - if (has_tools && !inputs.grammar.empty()) { - throw std::runtime_error("Cannot specify grammar with tools"); - } - - const auto & src = tmpl.source(); - if (src.find(">>>all") != std::string::npos) { - // Functionary prepends "all\n" to plain content outputs, so we use the parser no matter when - return common_chat_params_init_functionary_v3_2(tmpl, inputs); - } - if (src.find(" functools[") != std::string::npos) { - // Firefunction v2 requires datetime and functions in the context, even w/o tools. - return common_chat_params_init_firefunction_v2(tmpl, inputs); - } - - if (!has_tools) { - return common_chat_params_init_without_tools(tmpl, inputs); - } - - if (src.find("") != std::string::npos) { - return common_chat_params_init_hermes_2_pro(tmpl, inputs); - } - if (src.find("<|start_header_id|>") != std::string::npos - && src.find("ipython<|end_header_id|>") != std::string::npos) { - auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos; - return common_chat_params_init_llama_3_1_tool_calls(tmpl, inputs, allow_python_tag_builtin_tools); - } - if (src.find("<|tool▁calls▁begin|>") != std::string::npos) { - return common_chat_params_init_deepseek_r1(tmpl, inputs); - } - if (src.find("[TOOL_CALLS]") != std::string::npos) { - return common_chat_params_init_mistral_nemo(tmpl, inputs); - } - if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos) { - return common_chat_params_init_command_r7b(tmpl, inputs); - } - return common_chat_params_init_generic(tmpl, inputs); -} - -static common_chat_msg common_chat_parse_content_only(const std::string & input) { - return { - /* .role = */ "assistant", - /* .content = */ input, - /* .tool_calls = */ {}, - }; -} - -common_chat_msg common_chat_parse(const std::string & input, common_chat_format format) { - switch (format) { - case COMMON_CHAT_FORMAT_CONTENT_ONLY: - return common_chat_parse_content_only(input); - case COMMON_CHAT_FORMAT_GENERIC: - return common_chat_parse_generic(input); - case COMMON_CHAT_FORMAT_MISTRAL_NEMO: - return common_chat_parse_mistral_nemo(input); - case COMMON_CHAT_FORMAT_LLAMA_3_X: - return common_chat_parse_llama_3_1(input); - case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: - return common_chat_parse_llama_3_1(input, /* with_builtin_tools= */ true); - case COMMON_CHAT_FORMAT_DEEPSEEK_R1: - return common_chat_parse_deepseek_r1(input); - case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: - return common_chat_parse_functionary_v3_2(input); - case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: - return common_chat_parse_functionary_v3_1_llama_3_1(input); - case COMMON_CHAT_FORMAT_HERMES_2_PRO: - return common_chat_parse_hermes_2_pro(input); - case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: - return common_chat_parse_firefunction_v2(input); - case COMMON_CHAT_FORMAT_COMMAND_R7B: - return common_chat_parse_command_r7b(input); - default: - throw std::runtime_error("Unsupported format: " + common_chat_format_name(format)); - } -} diff --git a/common/chat.hpp b/common/chat.hpp deleted file mode 100644 index 33e64a430..000000000 --- a/common/chat.hpp +++ /dev/null @@ -1,52 +0,0 @@ -// Chat support (incl. tool call grammar constraining & output parsing) w/ generic & custom template handlers. - -#pragma once - -#include "common.h" -#include -#include -#include -#include - -using json = nlohmann::ordered_json; - -struct common_chat_inputs { - json messages; - json tools; - json tool_choice; - json json_schema; - bool parallel_tool_calls; - bool stream; - std::string grammar; - bool add_generation_prompt = true; -}; - -enum common_chat_format { - COMMON_CHAT_FORMAT_CONTENT_ONLY, - COMMON_CHAT_FORMAT_GENERIC, - COMMON_CHAT_FORMAT_MISTRAL_NEMO, - COMMON_CHAT_FORMAT_LLAMA_3_X, - COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS, - COMMON_CHAT_FORMAT_DEEPSEEK_R1, - COMMON_CHAT_FORMAT_FIREFUNCTION_V2, - COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2, - COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1, - COMMON_CHAT_FORMAT_HERMES_2_PRO, - COMMON_CHAT_FORMAT_COMMAND_R7B, - - COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats -}; - -struct common_chat_params { - common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY; - json prompt; - std::string grammar; - bool grammar_lazy = false; - std::vector grammar_triggers; - std::vector preserved_tokens; - std::vector additional_stops; -}; - -struct common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & params); -std::string common_chat_format_name(common_chat_format format); -common_chat_msg common_chat_parse( const std::string & input, common_chat_format format); diff --git a/common/cmake/build-info-gen-cpp.cmake b/common/cmake/build-info-gen-cpp.cmake deleted file mode 100644 index fbc92b52c..000000000 --- a/common/cmake/build-info-gen-cpp.cmake +++ /dev/null @@ -1,24 +0,0 @@ -include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake) - -set(TEMPLATE_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp.in") -set(OUTPUT_FILE "${CMAKE_CURRENT_SOURCE_DIR}/common/build-info.cpp") - -# Only write the build info if it changed -if(EXISTS ${OUTPUT_FILE}) - file(READ ${OUTPUT_FILE} CONTENTS) - string(REGEX MATCH "LLAMA_COMMIT = \"([^\"]*)\";" _ ${CONTENTS}) - set(OLD_COMMIT ${CMAKE_MATCH_1}) - string(REGEX MATCH "LLAMA_COMPILER = \"([^\"]*)\";" _ ${CONTENTS}) - set(OLD_COMPILER ${CMAKE_MATCH_1}) - string(REGEX MATCH "LLAMA_BUILD_TARGET = \"([^\"]*)\";" _ ${CONTENTS}) - set(OLD_TARGET ${CMAKE_MATCH_1}) - if ( - NOT OLD_COMMIT STREQUAL BUILD_COMMIT OR - NOT OLD_COMPILER STREQUAL BUILD_COMPILER OR - NOT OLD_TARGET STREQUAL BUILD_TARGET - ) - configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE}) - endif() -else() - configure_file(${TEMPLATE_FILE} ${OUTPUT_FILE}) -endif() diff --git a/common/common.cpp b/common/common.cpp deleted file mode 100644 index 8661e164a..000000000 --- a/common/common.cpp +++ /dev/null @@ -1,2198 +0,0 @@ -#if defined(_MSC_VER) -#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING -#endif - -#include "ggml.h" -#include "gguf.h" - -#include "common.h" -#include "log.h" -// Change JSON_ASSERT from assert() to GGML_ASSERT: -#define JSON_ASSERT GGML_ASSERT -#include "json.hpp" -#include "json-schema-to-grammar.h" -#include "llama.h" -#include "chat.hpp" -#include "chat-template.hpp" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#if defined(__APPLE__) && defined(__MACH__) -#include -#include -#endif - -#if defined(_WIN32) -#define WIN32_LEAN_AND_MEAN -#ifndef NOMINMAX -# define NOMINMAX -#endif -#include -#include -#include -#include -#else -#include -#include -#include -#endif -#if defined(LLAMA_USE_CURL) -#include -#include -#include -#endif - -#if defined(_MSC_VER) -#pragma warning(disable: 4244 4267) // possible loss of data -#endif - -#if defined(LLAMA_USE_CURL) -#ifdef __linux__ -#include -#elif defined(_WIN32) -# if !defined(PATH_MAX) -# define PATH_MAX MAX_PATH -# endif -#else -#include -#endif -#define LLAMA_CURL_MAX_URL_LENGTH 2084 // Maximum URL Length in Chrome: 2083 - -// -// CURL utils -// - -using curl_ptr = std::unique_ptr; - -// cannot use unique_ptr for curl_slist, because we cannot update without destroying the old one -struct curl_slist_ptr { - struct curl_slist * ptr = nullptr; - ~curl_slist_ptr() { - if (ptr) { - curl_slist_free_all(ptr); - } - } -}; -#endif // LLAMA_USE_CURL - -using json = nlohmann::ordered_json; - -// -// CPU utils -// - -int32_t cpu_get_num_physical_cores() { -#ifdef __linux__ - // enumerate the set of thread siblings, num entries is num cores - std::unordered_set siblings; - for (uint32_t cpu=0; cpu < UINT32_MAX; ++cpu) { - std::ifstream thread_siblings("/sys/devices/system/cpu/cpu" - + std::to_string(cpu) + "/topology/thread_siblings"); - if (!thread_siblings.is_open()) { - break; // no more cpus - } - std::string line; - if (std::getline(thread_siblings, line)) { - siblings.insert(line); - } - } - if (!siblings.empty()) { - return static_cast(siblings.size()); - } -#elif defined(__APPLE__) && defined(__MACH__) - int32_t num_physical_cores; - size_t len = sizeof(num_physical_cores); - int result = sysctlbyname("hw.perflevel0.physicalcpu", &num_physical_cores, &len, NULL, 0); - if (result == 0) { - return num_physical_cores; - } - result = sysctlbyname("hw.physicalcpu", &num_physical_cores, &len, NULL, 0); - if (result == 0) { - return num_physical_cores; - } -#elif defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later - // TODO: windows + arm64 + mingw64 - unsigned int n_threads_win = std::thread::hardware_concurrency(); - unsigned int default_threads = n_threads_win > 0 ? (n_threads_win <= 4 ? n_threads_win : n_threads_win / 2) : 4; - - DWORD buffer_size = 0; - if (!GetLogicalProcessorInformationEx(RelationProcessorCore, nullptr, &buffer_size)) { - if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) { - return default_threads; - } - } - - std::vector buffer(buffer_size); - if (!GetLogicalProcessorInformationEx(RelationProcessorCore, reinterpret_cast(buffer.data()), &buffer_size)) { - return default_threads; - } - - int32_t num_physical_cores = 0; - PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX info = reinterpret_cast(buffer.data()); - while (buffer_size > 0) { - if (info->Relationship == RelationProcessorCore) { - num_physical_cores += info->Processor.GroupCount; - } - buffer_size -= info->Size; - info = reinterpret_cast(reinterpret_cast(info) + info->Size); - } - - return num_physical_cores > 0 ? num_physical_cores : default_threads; -#endif - unsigned int n_threads = std::thread::hardware_concurrency(); - return n_threads > 0 ? (n_threads <= 4 ? n_threads : n_threads / 2) : 4; -} - -#if defined(__x86_64__) && defined(__linux__) && !defined(__ANDROID__) -#include - -static void cpuid(unsigned leaf, unsigned subleaf, - unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx) { - __asm__("movq\t%%rbx,%%rsi\n\t" - "cpuid\n\t" - "xchgq\t%%rbx,%%rsi" - : "=a"(*eax), "=S"(*ebx), "=c"(*ecx), "=d"(*edx) - : "0"(leaf), "2"(subleaf)); -} - -static int pin_cpu(int cpu) { - cpu_set_t mask; - CPU_ZERO(&mask); - CPU_SET(cpu, &mask); - return pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask); -} - -static bool is_hybrid_cpu(void) { - unsigned eax, ebx, ecx, edx; - cpuid(7, 0, &eax, &ebx, &ecx, &edx); - return !!(edx & (1u << 15)); -} - -static bool is_running_on_efficiency_core(void) { - unsigned eax, ebx, ecx, edx; - cpuid(0x1a, 0, &eax, &ebx, &ecx, &edx); - int intel_atom = 0x20; - int core_type = (eax & 0xff000000u) >> 24; - return core_type == intel_atom; -} - -static int cpu_count_math_cpus(int n_cpu) { - int result = 0; - for (int cpu = 0; cpu < n_cpu; ++cpu) { - if (pin_cpu(cpu)) { - return -1; - } - if (is_running_on_efficiency_core()) { - continue; // efficiency cores harm lockstep threading - } - ++cpu; // hyperthreading isn't useful for linear algebra - ++result; - } - return result; -} - -#endif // __x86_64__ && __linux__ - -/** - * Returns number of CPUs on system that are useful for math. - */ -int32_t cpu_get_num_math() { -#if defined(__x86_64__) && defined(__linux__) && !defined(__ANDROID__) - int n_cpu = sysconf(_SC_NPROCESSORS_ONLN); - if (n_cpu < 1) { - return cpu_get_num_physical_cores(); - } - if (is_hybrid_cpu()) { - cpu_set_t affinity; - if (!pthread_getaffinity_np(pthread_self(), sizeof(affinity), &affinity)) { - int result = cpu_count_math_cpus(n_cpu); - pthread_setaffinity_np(pthread_self(), sizeof(affinity), &affinity); - if (result > 0) { - return result; - } - } - } -#endif - return cpu_get_num_physical_cores(); -} - -// Helper for setting process priority - -#if defined(_WIN32) - -bool set_process_priority(enum ggml_sched_priority prio) { - if (prio == GGML_SCHED_PRIO_NORMAL) { - return true; - } - - DWORD p = NORMAL_PRIORITY_CLASS; - switch (prio) { - case GGML_SCHED_PRIO_NORMAL: p = NORMAL_PRIORITY_CLASS; break; - case GGML_SCHED_PRIO_MEDIUM: p = ABOVE_NORMAL_PRIORITY_CLASS; break; - case GGML_SCHED_PRIO_HIGH: p = HIGH_PRIORITY_CLASS; break; - case GGML_SCHED_PRIO_REALTIME: p = REALTIME_PRIORITY_CLASS; break; - } - - if (!SetPriorityClass(GetCurrentProcess(), p)) { - LOG_WRN("failed to set process priority class %d : (%d)\n", prio, (int) GetLastError()); - return false; - } - - return true; -} - -#else // MacOS and POSIX -#include -#include - -bool set_process_priority(enum ggml_sched_priority prio) { - if (prio == GGML_SCHED_PRIO_NORMAL) { - return true; - } - - int p = 0; - switch (prio) { - case GGML_SCHED_PRIO_NORMAL: p = 0; break; - case GGML_SCHED_PRIO_MEDIUM: p = -5; break; - case GGML_SCHED_PRIO_HIGH: p = -10; break; - case GGML_SCHED_PRIO_REALTIME: p = -20; break; - } - - if (!setpriority(PRIO_PROCESS, 0, p)) { - LOG_WRN("failed to set process priority %d : %s (%d)\n", prio, strerror(errno), errno); - return false; - } - return true; -} - -#endif - -// -// CLI argument parsing -// - - -void postprocess_cpu_params(cpu_params& cpuparams, const cpu_params* role_model) { - int32_t n_set = 0; - - if (cpuparams.n_threads < 0) { - // Assuming everything about cpuparams is invalid - if (role_model != nullptr) { - cpuparams = *role_model; - } else { - cpuparams.n_threads = cpu_get_num_math(); - } - } - - for (int32_t i = 0; i < GGML_MAX_N_THREADS; i++) { - if (cpuparams.cpumask[i]) { - n_set++; - } - } - - if (n_set && n_set < cpuparams.n_threads) { - // Not enough set bits, may experience performance issues. - LOG_WRN("Not enough set bits in CPU mask (%d) to satisfy requested thread count: %d\n", n_set, cpuparams.n_threads); - } -} - -bool parse_cpu_range(const std::string & range, bool (&boolmask)[GGML_MAX_N_THREADS]) { - size_t dash_loc = range.find('-'); - if (dash_loc == std::string::npos) { - LOG_ERR("Format of CPU range is invalid! Expected []-[].\n"); - return false; - } - - size_t start_i; - size_t end_i; - - if (dash_loc == 0) { - start_i = 0; - } else { - start_i = std::stoull(range.substr(0, dash_loc)); - if (start_i >= GGML_MAX_N_THREADS) { - LOG_ERR("Start index out of bounds!\n"); - return false; - } - } - - if (dash_loc == range.length() - 1) { - end_i = GGML_MAX_N_THREADS - 1; - } else { - end_i = std::stoull(range.substr(dash_loc + 1)); - if (end_i >= GGML_MAX_N_THREADS) { - LOG_ERR("End index out of bounds!\n"); - return false; - } - } - - for (size_t i = start_i; i <= end_i; i++) { - boolmask[i] = true; - } - - return true; -} - -bool parse_cpu_mask(const std::string & mask, bool (&boolmask)[GGML_MAX_N_THREADS]) { - // Discard potential 0x prefix - size_t start_i = 0; - if (mask.length() >= 2 && mask.substr(0, 2) == "0x") { - start_i = 2; - } - - size_t num_digits = mask.length() - start_i; - if (num_digits > 128) num_digits = 128; - - size_t end_i = num_digits + start_i; - - for (size_t i = start_i, n = (num_digits*4 - 1); i < end_i; i++, n-=4) { - char c = mask.at(i); - int8_t id = c; - - if ((c >= '0' && c <= '9')) { - id -= '0'; - } else if (c >= 'a' && c <= 'f') { - id -= 'a' - 10; - } else if (c >= 'A' && c <= 'F') { - id -= 'A' - 10; - } else { - LOG_ERR("Invalid hex character '%c' at position %d\n", c, int32_t(i)); - return false; - } - - boolmask[ n ] = boolmask[ n ] || ((id & 8) != 0); - boolmask[n - 1] = boolmask[n - 1] || ((id & 4) != 0); - boolmask[n - 2] = boolmask[n - 2] || ((id & 2) != 0); - boolmask[n - 3] = boolmask[n - 3] || ((id & 1) != 0); - } - - return true; -} - -void common_init() { - llama_log_set([](ggml_log_level level, const char * text, void * /*user_data*/) { - if (LOG_DEFAULT_LLAMA <= common_log_verbosity_thold) { - common_log_add(common_log_main(), level, "%s", text); - } - }, NULL); - -#ifdef NDEBUG - const char * build_type = ""; -#else - const char * build_type = " (debug)"; -#endif - - LOG_INF("build: %d (%s) with %s for %s%s\n", LLAMA_BUILD_NUMBER, LLAMA_COMMIT, LLAMA_COMPILER, LLAMA_BUILD_TARGET, build_type); -} - -std::string common_params_get_system_info(const common_params & params) { - std::ostringstream os; - - os << "system_info: n_threads = " << params.cpuparams.n_threads; - if (params.cpuparams_batch.n_threads != -1) { - os << " (n_threads_batch = " << params.cpuparams_batch.n_threads << ")"; - } -#if defined(_WIN32) && (_WIN32_WINNT >= 0x0601) && !defined(__MINGW64__) // windows 7 and later - // TODO: windows + arm64 + mingw64 - DWORD logicalProcessorCount = GetActiveProcessorCount(ALL_PROCESSOR_GROUPS); - os << " / " << logicalProcessorCount << " | " << llama_print_system_info(); -#else - os << " / " << std::thread::hardware_concurrency() << " | " << llama_print_system_info(); -#endif - - return os.str(); -} - -// -// String utils -// - -std::string string_format(const char * fmt, ...) { - va_list ap; - va_list ap2; - va_start(ap, fmt); - va_copy(ap2, ap); - int size = vsnprintf(NULL, 0, fmt, ap); - GGML_ASSERT(size >= 0 && size < INT_MAX); // NOLINT - std::vector buf(size + 1); - int size2 = vsnprintf(buf.data(), size + 1, fmt, ap2); - GGML_ASSERT(size2 == size); - va_end(ap2); - va_end(ap); - return std::string(buf.data(), size); -} - -std::string string_strip(const std::string & str) { - size_t start = 0; - size_t end = str.size(); - while (start < end && std::isspace(str[start])) { - start++; - } - while (end > start && std::isspace(str[end - 1])) { - end--; - } - return str.substr(start, end - start); -} - -std::string string_get_sortable_timestamp() { - using clock = std::chrono::system_clock; - - const clock::time_point current_time = clock::now(); - const time_t as_time_t = clock::to_time_t(current_time); - char timestamp_no_ns[100]; - std::strftime(timestamp_no_ns, 100, "%Y_%m_%d-%H_%M_%S", std::localtime(&as_time_t)); - - const int64_t ns = std::chrono::duration_cast( - current_time.time_since_epoch() % 1000000000).count(); - char timestamp_ns[11]; - snprintf(timestamp_ns, 11, "%09" PRId64, ns); - - return std::string(timestamp_no_ns) + "." + std::string(timestamp_ns); -} - -void string_replace_all(std::string & s, const std::string & search, const std::string & replace) { - if (search.empty()) { - return; - } - std::string builder; - builder.reserve(s.length()); - size_t pos = 0; - size_t last_pos = 0; - while ((pos = s.find(search, last_pos)) != std::string::npos) { - builder.append(s, last_pos, pos - last_pos); - builder.append(replace); - last_pos = pos + search.length(); - } - builder.append(s, last_pos, std::string::npos); - s = std::move(builder); -} - -std::string string_join(const std::vector & values, const std::string & separator) { - std::ostringstream result; - for (size_t i = 0; i < values.size(); ++i) { - if (i > 0) { - result << separator; - } - result << values[i]; - } - return result.str(); -} - -std::vector string_split(const std::string & str, const std::string & delimiter) { - std::vector parts; - size_t start = 0; - size_t end = str.find(delimiter); - - while (end != std::string::npos) { - parts.push_back(str.substr(start, end - start)); - start = end + delimiter.length(); - end = str.find(delimiter, start); - } - - parts.push_back(str.substr(start)); - - return parts; -} - -std::string string_repeat(const std::string & str, size_t n) { - if (n == 0) { - return ""; - } - - std::string result; - result.reserve(str.length() * n); - - for (size_t i = 0; i < n; ++i) { - result += str; - } - - return result; -} - -std::string string_from(bool value) { - return value ? "true" : "false"; -} - -std::string string_from(const std::vector & values) { - std::stringstream buf; - - buf << "[ "; - bool first = true; - for (auto e : values) { - if (first) { - first = false; - } else { - buf << ", "; - } - buf << std::to_string(e); - } - buf << " ]"; - - return buf.str(); -} - -std::string string_from(const struct llama_context * ctx, const std::vector & tokens) { - std::stringstream buf; - - buf << "[ "; - - bool first = true; - for (const auto & token : tokens) { - if (!first) { - buf << ", "; - } else { - first = false; - } - - auto detokenized = common_token_to_piece(ctx, token); - - detokenized.erase( - std::remove_if( - detokenized.begin(), - detokenized.end(), - [](const unsigned char c) { return !std::isprint(c); }), - detokenized.end()); - - buf << "'" << detokenized << "'" - << ":" << std::to_string(token); - } - - buf << " ]"; - - return buf.str(); -} - -std::string string_from(const struct llama_context * ctx, const struct llama_batch & batch) { - std::stringstream buf; - - buf << "[ "; - - bool first = true; - for (int i = 0; i < batch.n_tokens; ++i) { - if (!first) { - buf << ", "; - } else { - first = false; - } - - auto detokenized = common_token_to_piece(ctx, batch.token[i]); - - detokenized.erase( - std::remove_if( - detokenized.begin(), - detokenized.end(), - [](const unsigned char c) { return !std::isprint(c); }), - detokenized.end()); - - buf << "\n" << std::to_string(i) - << ", token '" << detokenized << "'" - << ", pos " << std::to_string(batch.pos[i]) - << ", n_seq_id " << std::to_string(batch.n_seq_id[i]) - << ", seq_id " << std::to_string(batch.seq_id[i][0]) - << ", logits " << std::to_string(batch.logits[i]); - } - - buf << " ]"; - - return buf.str(); -} - -void string_process_escapes(std::string & input) { - std::size_t input_len = input.length(); - std::size_t output_idx = 0; - - for (std::size_t input_idx = 0; input_idx < input_len; ++input_idx) { - if (input[input_idx] == '\\' && input_idx + 1 < input_len) { - switch (input[++input_idx]) { - case 'n': input[output_idx++] = '\n'; break; - case 'r': input[output_idx++] = '\r'; break; - case 't': input[output_idx++] = '\t'; break; - case '\'': input[output_idx++] = '\''; break; - case '\"': input[output_idx++] = '\"'; break; - case '\\': input[output_idx++] = '\\'; break; - case 'x': - // Handle \x12, etc - if (input_idx + 2 < input_len) { - const char x[3] = { input[input_idx + 1], input[input_idx + 2], 0 }; - char *err_p = nullptr; - const long val = std::strtol(x, &err_p, 16); - if (err_p == x + 2) { - input_idx += 2; - input[output_idx++] = char(val); - break; - } - } - // fall through - default: input[output_idx++] = '\\'; - input[output_idx++] = input[input_idx]; break; - } - } else { - input[output_idx++] = input[input_idx]; - } - } - - input.resize(output_idx); -} - -bool string_parse_kv_override(const char * data, std::vector & overrides) { - const char * sep = strchr(data, '='); - if (sep == nullptr || sep - data >= 128) { - LOG_ERR("%s: malformed KV override '%s'\n", __func__, data); - return false; - } - llama_model_kv_override kvo; - std::strncpy(kvo.key, data, sep - data); - kvo.key[sep - data] = 0; - sep++; - if (strncmp(sep, "int:", 4) == 0) { - sep += 4; - kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT; - kvo.val_i64 = std::atol(sep); - } else if (strncmp(sep, "float:", 6) == 0) { - sep += 6; - kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT; - kvo.val_f64 = std::atof(sep); - } else if (strncmp(sep, "bool:", 5) == 0) { - sep += 5; - kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL; - if (std::strcmp(sep, "true") == 0) { - kvo.val_bool = true; - } else if (std::strcmp(sep, "false") == 0) { - kvo.val_bool = false; - } else { - LOG_ERR("%s: invalid boolean value for KV override '%s'\n", __func__, data); - return false; - } - } else if (strncmp(sep, "str:", 4) == 0) { - sep += 4; - kvo.tag = LLAMA_KV_OVERRIDE_TYPE_STR; - if (strlen(sep) > 127) { - LOG_ERR("%s: malformed KV override '%s', value cannot exceed 127 chars\n", __func__, data); - return false; - } - strncpy(kvo.val_str, sep, 127); - kvo.val_str[127] = '\0'; - } else { - LOG_ERR("%s: invalid type for KV override '%s'\n", __func__, data); - return false; - } - overrides.emplace_back(std::move(kvo)); - return true; -} - -// -// Filesystem utils -// - -// Validate if a filename is safe to use -// To validate a full path, split the path by the OS-specific path separator, and validate each part with this function -bool fs_validate_filename(const std::string & filename) { - if (!filename.length()) { - // Empty filename invalid - return false; - } - if (filename.length() > 255) { - // Limit at common largest possible filename on Linux filesystems - // to avoid unnecessary further validation - // (On systems with smaller limits it will be caught by the OS) - return false; - } - - std::u32string filename_utf32; - try { -#if defined(__clang__) - // disable C++17 deprecation warning for std::codecvt_utf8 -# pragma clang diagnostic push -# pragma clang diagnostic ignored "-Wdeprecated-declarations" -#endif - std::wstring_convert, char32_t> converter; - -#if defined(__clang__) -# pragma clang diagnostic pop -#endif - - filename_utf32 = converter.from_bytes(filename); - - // If the reverse conversion mismatches, it means overlong UTF-8 sequences were used, - // or invalid encodings were encountered. Reject such attempts - std::string filename_reencoded = converter.to_bytes(filename_utf32); - if (filename_reencoded != filename) { - return false; - } - } catch (const std::exception &) { - return false; - } - - // Check for forbidden codepoints: - // - Control characters - // - Unicode equivalents of illegal characters - // - UTF-16 surrogate pairs - // - UTF-8 replacement character - // - Byte order mark (BOM) - // - Illegal characters: / \ : * ? " < > | - for (char32_t c : filename_utf32) { - if (c <= 0x1F // Control characters (C0) - || c == 0x7F // Control characters (DEL) - || (c >= 0x80 && c <= 0x9F) // Control characters (C1) - || c == 0xFF0E // Fullwidth Full Stop (period equivalent) - || c == 0x2215 // Division Slash (forward slash equivalent) - || c == 0x2216 // Set Minus (backslash equivalent) - || (c >= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs - || c == 0xFFFD // Replacement Character (UTF-8) - || c == 0xFEFF // Byte Order Mark (BOM) - || c == '/' || c == '\\' || c == ':' || c == '*' // Illegal characters - || c == '?' || c == '"' || c == '<' || c == '>' || c == '|') { - return false; - } - } - - // Reject any leading or trailing ' ', or any trailing '.', these are stripped on Windows and will cause a different filename - // Unicode and other whitespace is not affected, only 0x20 space - if (filename.front() == ' ' || filename.back() == ' ' || filename.back() == '.') { - return false; - } - - // Reject any ".." (currently stricter than necessary, it should be fine to just check for == ".." instead) - if (filename.find("..") != std::string::npos) { - return false; - } - - // Reject "." - if (filename == ".") { - return false; - } - - return true; -} - -// returns true if successful, false otherwise -bool fs_create_directory_with_parents(const std::string & path) { -#ifdef _WIN32 - std::wstring_convert> converter; - std::wstring wpath = converter.from_bytes(path); - - // if the path already exists, check whether it's a directory - const DWORD attributes = GetFileAttributesW(wpath.c_str()); - if ((attributes != INVALID_FILE_ATTRIBUTES) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) { - return true; - } - - size_t pos_slash = 0; - - // process path from front to back, procedurally creating directories - while ((pos_slash = path.find('\\', pos_slash)) != std::string::npos) { - const std::wstring subpath = wpath.substr(0, pos_slash); - const wchar_t * test = subpath.c_str(); - - const bool success = CreateDirectoryW(test, NULL); - if (!success) { - const DWORD error = GetLastError(); - - // if the path already exists, ensure that it's a directory - if (error == ERROR_ALREADY_EXISTS) { - const DWORD attributes = GetFileAttributesW(subpath.c_str()); - if (attributes == INVALID_FILE_ATTRIBUTES || !(attributes & FILE_ATTRIBUTE_DIRECTORY)) { - return false; - } - } else { - return false; - } - } - - pos_slash += 1; - } - - return true; -#else - // if the path already exists, check whether it's a directory - struct stat info; - if (stat(path.c_str(), &info) == 0) { - return S_ISDIR(info.st_mode); - } - - size_t pos_slash = 1; // skip leading slashes for directory creation - - // process path from front to back, procedurally creating directories - while ((pos_slash = path.find('/', pos_slash)) != std::string::npos) { - const std::string subpath = path.substr(0, pos_slash); - struct stat info; - - // if the path already exists, ensure that it's a directory - if (stat(subpath.c_str(), &info) == 0) { - if (!S_ISDIR(info.st_mode)) { - return false; - } - } else { - // create parent directories - const int ret = mkdir(subpath.c_str(), 0755); - if (ret != 0) { - return false; - } - } - - pos_slash += 1; - } - - return true; -#endif // _WIN32 -} - -std::string fs_get_cache_directory() { - std::string cache_directory = ""; - auto ensure_trailing_slash = [](std::string p) { - // Make sure to add trailing slash - if (p.back() != DIRECTORY_SEPARATOR) { - p += DIRECTORY_SEPARATOR; - } - return p; - }; - if (getenv("LLAMA_CACHE")) { - cache_directory = std::getenv("LLAMA_CACHE"); - } else { -#ifdef __linux__ - if (std::getenv("XDG_CACHE_HOME")) { - cache_directory = std::getenv("XDG_CACHE_HOME"); - } else { - cache_directory = std::getenv("HOME") + std::string("/.cache/"); - } -#elif defined(__APPLE__) - cache_directory = std::getenv("HOME") + std::string("/Library/Caches/"); -#elif defined(_WIN32) - cache_directory = std::getenv("LOCALAPPDATA"); -#endif // __linux__ - cache_directory = ensure_trailing_slash(cache_directory); - cache_directory += "llama.cpp"; - } - return ensure_trailing_slash(cache_directory); -} - -std::string fs_get_cache_file(const std::string & filename) { - GGML_ASSERT(filename.find(DIRECTORY_SEPARATOR) == std::string::npos); - std::string cache_directory = fs_get_cache_directory(); - const bool success = fs_create_directory_with_parents(cache_directory); - if (!success) { - throw std::runtime_error("failed to create cache directory: " + cache_directory); - } - return cache_directory + filename; -} - - -// -// Model utils -// -struct common_init_result common_init_from_params(common_params & params) { - common_init_result iparams; - auto mparams = common_model_params_to_llama(params); - - llama_model * model = nullptr; - - if (!params.hf_repo.empty() && !params.hf_file.empty()) { - model = common_load_model_from_hf(params.hf_repo, params.hf_file, params.model, params.hf_token, mparams); - } else if (!params.model_url.empty()) { - model = common_load_model_from_url(params.model_url, params.model, params.hf_token, mparams); - } else { - model = llama_model_load_from_file(params.model.c_str(), mparams); - } - - if (model == NULL) { - LOG_ERR("%s: failed to load model '%s'\n", __func__, params.model.c_str()); - return iparams; - } - - const llama_vocab * vocab = llama_model_get_vocab(model); - - if (params.reranking) { - bool ok = true; - - if (llama_vocab_bos(vocab) == LLAMA_TOKEN_NULL) { - LOG_WRN("%s: warning: vocab does not have a BOS token, reranking will not work\n", __func__); - ok = false; - } - - if (llama_vocab_eos(vocab) == LLAMA_TOKEN_NULL) { - LOG_WRN("%s: warning: vocab does not have an EOS token, reranking will not work\n", __func__); - ok = false; - } - - if (llama_vocab_sep(vocab) == LLAMA_TOKEN_NULL) { - LOG_WRN("%s: warning: vocab does not have a SEP token, reranking will not work\n", __func__); - ok = false; - } - - if (!ok) { - llama_model_free(model); - - return iparams; - } - } - - auto cparams = common_context_params_to_llama(params); - - llama_context * lctx = llama_init_from_model(model, cparams); - if (lctx == NULL) { - LOG_ERR("%s: failed to create context with model '%s'\n", __func__, params.model.c_str()); - llama_model_free(model); - return iparams; - } - - if (params.ctx_shift && !llama_kv_cache_can_shift(lctx)) { - LOG_WRN("%s: KV cache shifting is not supported for this model, disabling KV cache shifting\n", __func__); - params.ctx_shift = false; - } - - if (!params.control_vectors.empty()) { - if (params.control_vector_layer_start <= 0) params.control_vector_layer_start = 1; - if (params.control_vector_layer_end <= 0) params.control_vector_layer_end = llama_model_n_layer(model); - - const auto cvec = common_control_vector_load(params.control_vectors); - if (cvec.n_embd == -1) { - llama_free(lctx); - llama_model_free(model); - - return iparams; - } - - int err = llama_apply_adapter_cvec( - lctx, - cvec.data.data(), - cvec.data.size(), - cvec.n_embd, - params.control_vector_layer_start, - params.control_vector_layer_end); - if (err) { - llama_free(lctx); - llama_model_free(model); - - return iparams; - } - } - - // load and optionally apply lora adapters - for (auto & la : params.lora_adapters) { - llama_adapter_lora_ptr lora; - lora.reset(llama_adapter_lora_init(model, la.path.c_str())); - if (lora == nullptr) { - LOG_ERR("%s: failed to apply lora adapter '%s'\n", __func__, la.path.c_str()); - llama_free(lctx); - llama_model_free(model); - return iparams; - } - - la.ptr = lora.get(); - iparams.lora.emplace_back(std::move(lora)); // copy to list of loaded adapters - } - - if (!params.lora_init_without_apply) { - common_set_adapter_lora(lctx, params.lora_adapters); - } - - if (params.sampling.ignore_eos && llama_vocab_eos(vocab) == LLAMA_TOKEN_NULL) { - LOG_WRN("%s: warning: vocab does not have an EOS token, ignoring --ignore-eos\n", __func__); - params.sampling.ignore_eos = false; - } - - if (params.sampling.ignore_eos) { - for (llama_token i = 0; i < llama_vocab_n_tokens(vocab); i++) { - if (llama_vocab_is_eog(vocab, i)) { - LOG_INF("%s: added %s logit bias = %f\n", __func__, common_token_to_piece(lctx, i).c_str(), -INFINITY); - params.sampling.logit_bias.push_back({i, -INFINITY}); - } - } - } - - if (params.sampling.penalty_last_n == -1) { - LOG_INF("%s: setting penalty_last_n to ctx_size = %d\n", __func__, llama_n_ctx(lctx)); - params.sampling.penalty_last_n = llama_n_ctx(lctx); - } - - if (params.sampling.dry_penalty_last_n == -1) { - LOG_INF("%s: setting dry_penalty_last_n to ctx_size = %d\n", __func__, llama_n_ctx(lctx)); - params.sampling.dry_penalty_last_n = llama_n_ctx(lctx); - } - - if (params.warmup) { - LOG_WRN("%s: warming up the model with an empty run - please wait ... (--no-warmup to disable)\n", __func__); - - std::vector tmp; - llama_token bos = llama_vocab_bos(vocab); - llama_token eos = llama_vocab_eos(vocab); - - // some models (e.g. T5) don't have a BOS token - if (bos != LLAMA_TOKEN_NULL) { - tmp.push_back(bos); - } - if (eos != LLAMA_TOKEN_NULL) { - tmp.push_back(eos); - } - if (tmp.empty()) { - tmp.push_back(0); - } - - if (llama_model_has_encoder(model)) { - llama_encode(lctx, llama_batch_get_one(tmp.data(), tmp.size())); - llama_token decoder_start_token_id = llama_model_decoder_start_token(model); - if (decoder_start_token_id == LLAMA_TOKEN_NULL) { - decoder_start_token_id = bos; - } - tmp.clear(); - tmp.push_back(decoder_start_token_id); - } - if (llama_model_has_decoder(model)) { - llama_decode(lctx, llama_batch_get_one(tmp.data(), std::min(tmp.size(), (size_t) params.n_batch))); - } - llama_kv_cache_clear(lctx); - llama_synchronize(lctx); - llama_perf_context_reset(lctx); - } - - iparams.model.reset(model); - iparams.context.reset(lctx); - - return iparams; -} - -void common_set_adapter_lora(struct llama_context * ctx, std::vector & lora) { - llama_clear_adapter_lora(ctx); - for (auto & la : lora) { - if (la.scale != 0.0f) { - llama_set_adapter_lora(ctx, la.ptr, la.scale); - } - } -} - -struct llama_model_params common_model_params_to_llama(common_params & params) { - auto mparams = llama_model_default_params(); - - if (!params.devices.empty()) { - mparams.devices = params.devices.data(); - } - if (params.n_gpu_layers != -1) { - mparams.n_gpu_layers = params.n_gpu_layers; - } - mparams.main_gpu = params.main_gpu; - mparams.split_mode = params.split_mode; - mparams.tensor_split = params.tensor_split; - mparams.use_mmap = params.use_mmap; - mparams.use_mlock = params.use_mlock; - mparams.check_tensors = params.check_tensors; - if (params.kv_overrides.empty()) { - mparams.kv_overrides = NULL; - } else { - GGML_ASSERT(params.kv_overrides.back().key[0] == 0 && "KV overrides not terminated with empty key"); - mparams.kv_overrides = params.kv_overrides.data(); - } - - return mparams; -} - -struct llama_context_params common_context_params_to_llama(const common_params & params) { - auto cparams = llama_context_default_params(); - - cparams.n_ctx = params.n_ctx; - cparams.n_seq_max = params.n_parallel; - cparams.n_batch = params.n_batch; - cparams.n_ubatch = params.n_ubatch; - cparams.n_threads = params.cpuparams.n_threads; - cparams.n_threads_batch = params.cpuparams_batch.n_threads == -1 ? - params.cpuparams.n_threads : params.cpuparams_batch.n_threads; - cparams.logits_all = params.logits_all; - cparams.embeddings = params.embedding; - cparams.rope_scaling_type = params.rope_scaling_type; - cparams.rope_freq_base = params.rope_freq_base; - cparams.rope_freq_scale = params.rope_freq_scale; - cparams.yarn_ext_factor = params.yarn_ext_factor; - cparams.yarn_attn_factor = params.yarn_attn_factor; - cparams.yarn_beta_fast = params.yarn_beta_fast; - cparams.yarn_beta_slow = params.yarn_beta_slow; - cparams.yarn_orig_ctx = params.yarn_orig_ctx; - cparams.pooling_type = params.pooling_type; - cparams.attention_type = params.attention_type; - cparams.defrag_thold = params.defrag_thold; - cparams.cb_eval = params.cb_eval; - cparams.cb_eval_user_data = params.cb_eval_user_data; - cparams.offload_kqv = !params.no_kv_offload; - cparams.flash_attn = params.flash_attn; - cparams.no_perf = params.no_perf; - - if (params.reranking) { - cparams.embeddings = true; - cparams.pooling_type = LLAMA_POOLING_TYPE_RANK; - } - - cparams.type_k = params.cache_type_k; - cparams.type_v = params.cache_type_v; - - return cparams; -} - -struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params) { - struct ggml_threadpool_params tpp; - - ggml_threadpool_params_init(&tpp, params.n_threads); // setup the defaults - - if (params.mask_valid) { - std::memcpy(&tpp.cpumask, ¶ms.cpumask, GGML_MAX_N_THREADS); - } - - tpp.prio = params.priority; - tpp.poll = params.poll; - tpp.strict_cpu = params.strict_cpu; - - return tpp; -} - -#ifdef LLAMA_USE_CURL - -#define CURL_MAX_RETRY 3 -#define CURL_RETRY_DELAY_SECONDS 2 - -static bool curl_perform_with_retry(const std::string & url, CURL * curl, int max_attempts, int retry_delay_seconds) { - int remaining_attempts = max_attempts; - - while (remaining_attempts > 0) { - LOG_INF("%s: Trying to download from %s (attempt %d of %d)...\n", __func__ , url.c_str(), max_attempts - remaining_attempts + 1, max_attempts); - - CURLcode res = curl_easy_perform(curl); - if (res == CURLE_OK) { - return true; - } - - int exponential_backoff_delay = std::pow(retry_delay_seconds, max_attempts - remaining_attempts) * 1000; - LOG_WRN("%s: curl_easy_perform() failed: %s, retrying after %d milliseconds...\n", __func__, curl_easy_strerror(res), exponential_backoff_delay); - - remaining_attempts--; - std::this_thread::sleep_for(std::chrono::milliseconds(exponential_backoff_delay)); - } - - LOG_ERR("%s: curl_easy_perform() failed after %d attempts\n", __func__, max_attempts); - - return false; -} - -static bool common_download_file(const std::string & url, const std::string & path, const std::string & hf_token) { - // Initialize libcurl - curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); - curl_slist_ptr http_headers; - if (!curl) { - LOG_ERR("%s: error initializing libcurl\n", __func__); - return false; - } - - bool force_download = false; - - // Set the URL, allow to follow http redirection - curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); - curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L); - - // Check if hf-token or bearer-token was specified - if (!hf_token.empty()) { - std::string auth_header = "Authorization: Bearer " + hf_token; - http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); - curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); - } - -#if defined(_WIN32) - // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of - // operating system. Currently implemented under MS-Windows. - curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); -#endif - - // Check if the file already exists locally - auto file_exists = std::filesystem::exists(path); - - // If the file exists, check its JSON metadata companion file. - std::string metadata_path = path + ".json"; - nlohmann::json metadata; - std::string etag; - std::string last_modified; - - if (file_exists) { - // Try and read the JSON metadata file (note: stream autoclosed upon exiting this block). - std::ifstream metadata_in(metadata_path); - if (metadata_in.good()) { - try { - metadata_in >> metadata; - LOG_INF("%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str()); - if (metadata.contains("url") && metadata.at("url").is_string()) { - auto previous_url = metadata.at("url").get(); - if (previous_url != url) { - LOG_ERR("%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str()); - return false; - } - } - if (metadata.contains("etag") && metadata.at("etag").is_string()) { - etag = metadata.at("etag"); - } - if (metadata.contains("lastModified") && metadata.at("lastModified").is_string()) { - last_modified = metadata.at("lastModified"); - } - } catch (const nlohmann::json::exception & e) { - LOG_ERR("%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what()); - return false; - } - } - } else { - LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str()); - } - - // Send a HEAD request to retrieve the etag and last-modified headers - struct common_load_model_from_url_headers { - std::string etag; - std::string last_modified; - }; - - common_load_model_from_url_headers headers; - - { - typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *); - auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t { - common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata; - - static std::regex header_regex("([^:]+): (.*)\r\n"); - static std::regex etag_regex("ETag", std::regex_constants::icase); - static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase); - - std::string header(buffer, n_items); - std::smatch match; - if (std::regex_match(header, match, header_regex)) { - const std::string & key = match[1]; - const std::string & value = match[2]; - if (std::regex_match(key, match, etag_regex)) { - headers->etag = value; - } else if (std::regex_match(key, match, last_modified_regex)) { - headers->last_modified = value; - } - } - return n_items; - }; - - curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb - curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress - curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast(header_callback)); - curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers); - - bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS); - if (!was_perform_successful) { - return false; - } - - long http_code = 0; - curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code); - if (http_code != 200) { - // HEAD not supported, we don't know if the file has changed - // force trigger downloading - force_download = true; - LOG_ERR("%s: HEAD invalid http status code received: %ld\n", __func__, http_code); - } - } - - bool should_download = !file_exists || force_download; - if (!should_download) { - if (!etag.empty() && etag != headers.etag) { - LOG_WRN("%s: ETag header is different (%s != %s): triggering a new download\n", __func__, etag.c_str(), headers.etag.c_str()); - should_download = true; - } else if (!last_modified.empty() && last_modified != headers.last_modified) { - LOG_WRN("%s: Last-Modified header is different (%s != %s): triggering a new download\n", __func__, last_modified.c_str(), headers.last_modified.c_str()); - should_download = true; - } - } - if (should_download) { - std::string path_temporary = path + ".downloadInProgress"; - if (file_exists) { - LOG_WRN("%s: deleting previous downloaded file: %s\n", __func__, path.c_str()); - if (remove(path.c_str()) != 0) { - LOG_ERR("%s: unable to delete file: %s\n", __func__, path.c_str()); - return false; - } - } - - // Set the output file - - struct FILE_deleter { - void operator()(FILE * f) const { - fclose(f); - } - }; - - std::unique_ptr outfile(fopen(path_temporary.c_str(), "wb")); - if (!outfile) { - LOG_ERR("%s: error opening local file for writing: %s\n", __func__, path.c_str()); - return false; - } - - typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * data, size_t size, size_t nmemb, void * fd); - auto write_callback = [](void * data, size_t size, size_t nmemb, void * fd) -> size_t { - return fwrite(data, size, nmemb, (FILE *)fd); - }; - curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 0L); - curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast(write_callback)); - curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, outfile.get()); - - // display download progress - curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 0L); - - // helper function to hide password in URL - auto llama_download_hide_password_in_url = [](const std::string & url) -> std::string { - std::size_t protocol_pos = url.find("://"); - if (protocol_pos == std::string::npos) { - return url; // Malformed URL - } - - std::size_t at_pos = url.find('@', protocol_pos + 3); - if (at_pos == std::string::npos) { - return url; // No password in URL - } - - return url.substr(0, protocol_pos + 3) + "********" + url.substr(at_pos); - }; - - // start the download - LOG_INF("%s: trying to download model from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__, - llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str()); - bool was_perform_successful = curl_perform_with_retry(url, curl.get(), CURL_MAX_RETRY, CURL_RETRY_DELAY_SECONDS); - if (!was_perform_successful) { - return false; - } - - long http_code = 0; - curl_easy_getinfo (curl.get(), CURLINFO_RESPONSE_CODE, &http_code); - if (http_code < 200 || http_code >= 400) { - LOG_ERR("%s: invalid http status code received: %ld\n", __func__, http_code); - return false; - } - - // Causes file to be closed explicitly here before we rename it. - outfile.reset(); - - // Write the updated JSON metadata file. - metadata.update({ - {"url", url}, - {"etag", headers.etag}, - {"lastModified", headers.last_modified} - }); - std::ofstream(metadata_path) << metadata.dump(4); - LOG_INF("%s: file metadata saved: %s\n", __func__, metadata_path.c_str()); - - if (rename(path_temporary.c_str(), path.c_str()) != 0) { - LOG_ERR("%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str()); - return false; - } - } - - return true; -} - -struct llama_model * common_load_model_from_url( - const std::string & model_url, - const std::string & local_path, - const std::string & hf_token, - const struct llama_model_params & params) { - // Basic validation of the model_url - if (model_url.empty()) { - LOG_ERR("%s: invalid model_url\n", __func__); - return NULL; - } - - if (!common_download_file(model_url, local_path, hf_token)) { - return NULL; - } - - // check for additional GGUFs split to download - int n_split = 0; - { - struct gguf_init_params gguf_params = { - /*.no_alloc = */ true, - /*.ctx = */ NULL, - }; - auto * ctx_gguf = gguf_init_from_file(local_path.c_str(), gguf_params); - if (!ctx_gguf) { - LOG_ERR("\n%s: failed to load input GGUF from %s\n", __func__, local_path.c_str()); - return NULL; - } - - auto key_n_split = gguf_find_key(ctx_gguf, LLM_KV_SPLIT_COUNT); - if (key_n_split >= 0) { - n_split = gguf_get_val_u16(ctx_gguf, key_n_split); - } - - gguf_free(ctx_gguf); - } - - if (n_split > 1) { - char split_prefix[PATH_MAX] = {0}; - char split_url_prefix[LLAMA_CURL_MAX_URL_LENGTH] = {0}; - - // Verify the first split file format - // and extract split URL and PATH prefixes - { - if (!llama_split_prefix(split_prefix, sizeof(split_prefix), local_path.c_str(), 0, n_split)) { - LOG_ERR("\n%s: unexpected model file name: %s n_split=%d\n", __func__, local_path.c_str(), n_split); - return NULL; - } - - if (!llama_split_prefix(split_url_prefix, sizeof(split_url_prefix), model_url.c_str(), 0, n_split)) { - LOG_ERR("\n%s: unexpected model url: %s n_split=%d\n", __func__, model_url.c_str(), n_split); - return NULL; - } - } - - // Prepare download in parallel - std::vector> futures_download; - for (int idx = 1; idx < n_split; idx++) { - futures_download.push_back(std::async(std::launch::async, [&split_prefix, &split_url_prefix, &n_split, hf_token](int download_idx) -> bool { - char split_path[PATH_MAX] = {0}; - llama_split_path(split_path, sizeof(split_path), split_prefix, download_idx, n_split); - - char split_url[LLAMA_CURL_MAX_URL_LENGTH] = {0}; - llama_split_path(split_url, sizeof(split_url), split_url_prefix, download_idx, n_split); - - return common_download_file(split_url, split_path, hf_token); - }, idx)); - } - - // Wait for all downloads to complete - for (auto & f : futures_download) { - if (!f.get()) { - return NULL; - } - } - } - - return llama_model_load_from_file(local_path.c_str(), params); -} - -struct llama_model * common_load_model_from_hf( - const std::string & repo, - const std::string & remote_path, - const std::string & local_path, - const std::string & hf_token, - const struct llama_model_params & params) { - // construct hugging face model url: - // - // --repo ggml-org/models --file tinyllama-1.1b/ggml-model-f16.gguf - // https://huggingface.co/ggml-org/models/resolve/main/tinyllama-1.1b/ggml-model-f16.gguf - // - // --repo TheBloke/Mixtral-8x7B-v0.1-GGUF --file mixtral-8x7b-v0.1.Q4_K_M.gguf - // https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF/resolve/main/mixtral-8x7b-v0.1.Q4_K_M.gguf - // - - std::string model_url = "https://huggingface.co/"; - model_url += repo; - model_url += "/resolve/main/"; - model_url += remote_path; - - return common_load_model_from_url(model_url, local_path, hf_token, params); -} - -/** - * Allow getting the HF file from the HF repo with tag (like ollama), for example: - * - bartowski/Llama-3.2-3B-Instruct-GGUF:q4 - * - bartowski/Llama-3.2-3B-Instruct-GGUF:Q4_K_M - * - bartowski/Llama-3.2-3B-Instruct-GGUF:q5_k_s - * Tag is optional, default to "latest" (meaning it checks for Q4_K_M first, then Q4, then if not found, return the first GGUF file in repo) - * - * Return pair of (with "repo" already having tag removed) - * - * Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files. - */ -std::pair common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & hf_token) { - auto parts = string_split(hf_repo_with_tag, ':'); - std::string tag = parts.size() > 1 ? parts.back() : "latest"; - std::string hf_repo = parts[0]; - if (string_split(hf_repo, '/').size() != 2) { - throw std::invalid_argument("error: invalid HF repo format, expected /[:quant]\n"); - } - - // fetch model info from Hugging Face Hub API - json model_info; - curl_ptr curl(curl_easy_init(), &curl_easy_cleanup); - curl_slist_ptr http_headers; - std::string res_str; - std::string url = "https://huggingface.co/v2/" + hf_repo + "/manifests/" + tag; - curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str()); - curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); - typedef size_t(*CURLOPT_WRITEFUNCTION_PTR)(void * ptr, size_t size, size_t nmemb, void * data); - auto write_callback = [](void * ptr, size_t size, size_t nmemb, void * data) -> size_t { - static_cast(data)->append((char * ) ptr, size * nmemb); - return size * nmemb; - }; - curl_easy_setopt(curl.get(), CURLOPT_WRITEFUNCTION, static_cast(write_callback)); - curl_easy_setopt(curl.get(), CURLOPT_WRITEDATA, &res_str); -#if defined(_WIN32) - curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA); -#endif - if (!hf_token.empty()) { - std::string auth_header = "Authorization: Bearer " + hf_token; - http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str()); - } - // Important: the User-Agent must be "llama-cpp" to get the "ggufFile" field in the response - http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp"); - http_headers.ptr = curl_slist_append(http_headers.ptr, "Accept: application/json"); - curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr); - - CURLcode res = curl_easy_perform(curl.get()); - - if (res != CURLE_OK) { - throw std::runtime_error("error: cannot make GET request to HF API"); - } - - long res_code; - curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &res_code); - if (res_code == 200) { - model_info = json::parse(res_str); - } else if (res_code == 401) { - throw std::runtime_error("error: model is private or does not exist; if you are accessing a gated model, please provide a valid HF token"); - } else { - throw std::runtime_error(string_format("error from HF API, response code: %ld, data: %s", res_code, res_str.c_str())); - } - - // check response - if (!model_info.contains("ggufFile")) { - throw std::runtime_error("error: model does not have ggufFile"); - } - json & gguf_file = model_info.at("ggufFile"); - if (!gguf_file.contains("rfilename")) { - throw std::runtime_error("error: ggufFile does not have rfilename"); - } - - return std::make_pair(hf_repo, gguf_file.at("rfilename")); -} - -#else - -struct llama_model * common_load_model_from_url( - const std::string & /*model_url*/, - const std::string & /*local_path*/, - const std::string & /*hf_token*/, - const struct llama_model_params & /*params*/) { - LOG_WRN("%s: llama.cpp built without libcurl, downloading from an url not supported.\n", __func__); - return nullptr; -} - -struct llama_model * common_load_model_from_hf( - const std::string & /*repo*/, - const std::string & /*remote_path*/, - const std::string & /*local_path*/, - const std::string & /*hf_token*/, - const struct llama_model_params & /*params*/) { - LOG_WRN("%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__); - return nullptr; -} - -std::pair common_get_hf_file(const std::string &, const std::string &) { - LOG_WRN("%s: llama.cpp built without libcurl, downloading from Hugging Face not supported.\n", __func__); - return std::make_pair("", ""); -} - -#endif // LLAMA_USE_CURL - -// -// Batch utils -// - -void common_batch_clear(struct llama_batch & batch) { - batch.n_tokens = 0; -} - -void common_batch_add( - struct llama_batch & batch, - llama_token id, - llama_pos pos, - const std::vector & seq_ids, - bool logits) { - GGML_ASSERT(batch.seq_id[batch.n_tokens] && "llama_batch size exceeded"); - - batch.token [batch.n_tokens] = id; - batch.pos [batch.n_tokens] = pos; - batch.n_seq_id[batch.n_tokens] = seq_ids.size(); - for (size_t i = 0; i < seq_ids.size(); ++i) { - batch.seq_id[batch.n_tokens][i] = seq_ids[i]; - } - batch.logits [batch.n_tokens] = logits; - - batch.n_tokens++; -} - -// -// Token utils -// - -size_t common_lcp(const llama_tokens & a, const llama_tokens & b) { - size_t i; - for (i = 0; i < a.size() && i < b.size() && a[i] == b[i]; i++) {} - - return i; -} - -size_t common_lcs(const llama_tokens & a, const llama_tokens & b) { - // check for empty sequences - if (a.empty() || b.empty()) { - return 0; - } - - // get the lengths of the input sequences - size_t a_len = a.size(); - size_t b_len = b.size(); - - // initialize the maximum length of the longest common subsequence (LCS) - size_t max_length = 0; - - // use two rows instead of a 2D matrix to optimize space - std::vector prev_row(b_len + 1, 0); - std::vector curr_row(b_len + 1, 0); - - // iterate through the elements of a - for (size_t i = 1; i <= a_len; i++) { - // iterate through the elements of b - for (size_t j = 1; j <= b_len; j++) { - // if elements at the current positions match - if (a[i - 1] == b[j - 1]) { - // if it's the first element of either sequences, set LCS length to 1 - if (i == 1 || j == 1) { - curr_row[j] = 1; - } else { - // increment LCS length by 1 compared to the previous element - curr_row[j] = prev_row[j - 1] + 1; - } - - // update max_length if necessary - if (curr_row[j] > max_length) { - max_length = curr_row[j]; - } - } else { - // reset LCS length if elements don't match - curr_row[j] = 0; - } - } - - // update the previous row for the next iteration - prev_row = curr_row; - } - - // return the maximum length of the LCS - return max_length; -} - -// -// Vocab utils -// - -std::vector common_tokenize( - const struct llama_context * ctx, - const std::string & text, - bool add_special, - bool parse_special) { - const llama_model * model = llama_get_model(ctx); - const llama_vocab * vocab = llama_model_get_vocab(model); - return common_tokenize(vocab, text, add_special, parse_special); -} - -std::vector common_tokenize( - const struct llama_vocab * vocab, - const std::string & text, - bool add_special, - bool parse_special) { - // upper limit for the number of tokens - int n_tokens = text.length() + 2 * add_special; - std::vector result(n_tokens); - n_tokens = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special); - if (n_tokens < 0) { - result.resize(-n_tokens); - int check = llama_tokenize(vocab, text.data(), text.length(), result.data(), result.size(), add_special, parse_special); - GGML_ASSERT(check == -n_tokens); - } else { - result.resize(n_tokens); - } - return result; -} - -std::string common_token_to_piece(const struct llama_context * ctx, llama_token token, bool special) { - const llama_model * model = llama_get_model(ctx); - const llama_vocab * vocab = llama_model_get_vocab(model); - return common_token_to_piece(vocab, token, special); -} - -std::string common_token_to_piece(const struct llama_vocab * vocab, llama_token token, bool special) { - std::string piece; - piece.resize(piece.capacity()); // using string internal cache, 15 bytes + '\n' - const int n_chars = llama_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special); - if (n_chars < 0) { - piece.resize(-n_chars); - int check = llama_token_to_piece(vocab, token, &piece[0], piece.size(), 0, special); - GGML_ASSERT(check == -n_chars); - } - else { - piece.resize(n_chars); - } - - return piece; -} - -std::string common_detokenize(const struct llama_context * ctx, const std::vector & tokens, bool special) { - const llama_model * model = llama_get_model(ctx); - const llama_vocab * vocab = llama_model_get_vocab(model); - return common_detokenize(vocab, tokens, special); -} - -std::string common_detokenize(const struct llama_vocab * vocab, const std::vector & tokens, bool special) { - std::string text; - text.resize(std::max(text.capacity(), tokens.size())); - int32_t n_chars = llama_detokenize(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special); - if (n_chars < 0) { - text.resize(-n_chars); - n_chars = llama_detokenize(vocab, tokens.data(), (int32_t)tokens.size(), &text[0], (int32_t)text.size(), false, special); - GGML_ASSERT(n_chars <= (int32_t)text.size()); // whitespace trimming is performed after per-token detokenization - } - - text.resize(n_chars); - - // NOTE: the original tokenizer decodes bytes after collecting the pieces. - return text; -} - -// -// Chat template utils -// - -bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) { - if (use_jinja) { - try { - auto chat_template = common_chat_template(tmpl, "", ""); - common_chat_inputs inputs; - inputs.messages = json::array({{ - {"role", "user"}, - {"content", "test"}, - }}); - common_chat_params_init(chat_template, inputs); - return true; - } catch (const std::exception & e) { - LOG_ERR("%s: failed to apply template: %s\n", __func__, e.what()); - return false; - } - } - llama_chat_message chat[] = {{"user", "test"}}; - const int res = llama_chat_apply_template(tmpl.c_str(), chat, 1, true, nullptr, 0); - return res >= 0; -} - -std::string common_chat_apply_template( - const common_chat_template & tmpl, - const std::vector & msgs, - bool add_ass, - bool use_jinja) { - if (use_jinja) { - auto messages = json::array(); - for (const auto & msg : msgs) { - messages.push_back({{"role", msg.role}, {"content", msg.content}}); - } - common_chat_inputs inputs; - inputs.messages = messages; - inputs.add_generation_prompt = add_ass; - return common_chat_params_init(tmpl, inputs).prompt; - } - - int alloc_size = 0; - std::vector chat; - for (const auto & msg : msgs) { - chat.push_back({msg.role.c_str(), msg.content.c_str()}); - alloc_size += (msg.role.size() + msg.content.size()) * 1.25; - } - - std::vector buf(alloc_size); - - // run the first time to get the total output length - int32_t res = llama_chat_apply_template(tmpl.source().c_str(), chat.data(), chat.size(), add_ass, buf.data(), buf.size()); - - // error: chat template is not supported - if (res < 0) { - // if the custom "tmpl" is not supported, we throw an error - // this is a bit redundant (for good), since we're not sure if user validated the custom template with llama_chat_verify_template() - throw std::runtime_error("this custom template is not supported"); - } - - // if it turns out that our buffer is too small, we resize it - if ((size_t) res > buf.size()) { - buf.resize(res); - res = llama_chat_apply_template(tmpl.source().c_str(), chat.data(), chat.size(), add_ass, buf.data(), buf.size()); - } - - std::string formatted_chat(buf.data(), res); - return formatted_chat; -} - -std::string common_chat_format_single( - const common_chat_template & tmpl, - const std::vector & past_msg, - const common_chat_msg & new_msg, - bool add_ass, - bool use_jinja) { - std::ostringstream ss; - auto fmt_past_msg = past_msg.empty() ? "" : common_chat_apply_template(tmpl, past_msg, false, use_jinja); - std::vector chat_new(past_msg); - // if the past_msg ends with a newline, we must preserve it in the formatted version - if (add_ass && !fmt_past_msg.empty() && fmt_past_msg.back() == '\n') { - ss << "\n"; - }; - // format chat with new_msg - chat_new.push_back(new_msg); - auto fmt_new_msg = common_chat_apply_template(tmpl, chat_new, add_ass, use_jinja); - // get the diff part - ss << fmt_new_msg.substr(fmt_past_msg.size(), fmt_new_msg.size() - fmt_past_msg.size()); - return ss.str(); -} - -std::string common_chat_format_example(const common_chat_template & tmpl, bool use_jinja) { - std::vector msgs = { - {"system", "You are a helpful assistant", {}}, - {"user", "Hello", {}}, - {"assistant", "Hi there", {}}, - {"user", "How are you?", {}}, - }; - return common_chat_apply_template(tmpl, msgs, true, use_jinja); -} - -#define CHATML_TEMPLATE_SRC \ - "{%- for message in messages -%}\n" \ - " {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>\n' -}}\n" \ - "{%- endfor -%}\n" \ - "{%- if add_generation_prompt -%}\n" \ - " {{- '<|im_start|>assistant\n' -}}\n" \ - "{%- endif -%}" - -common_chat_templates common_chat_templates_from_model(const struct llama_model * model, const std::string & chat_template_override) -{ - std::string default_template_src; - std::string template_tool_use_src; - - bool has_explicit_template = !chat_template_override.empty(); - if (chat_template_override.empty()) { - auto str = llama_model_chat_template(model, /* name */ nullptr); - if (str) { - default_template_src = str; - has_explicit_template = true; - } - str = llama_model_chat_template(model, /* name */ "tool_use"); - if (str) { - template_tool_use_src = str; - has_explicit_template = true; - } - } else { - default_template_src = chat_template_override; - } - if (default_template_src.empty() || default_template_src == "chatml") { - if (!template_tool_use_src.empty()) { - default_template_src = template_tool_use_src; - } else { - default_template_src = CHATML_TEMPLATE_SRC; - } - } - auto vocab = llama_model_get_vocab(model); - const auto get_token = [&](llama_token token, const char * name, const char * jinja_variable_name) { - if (token == LLAMA_TOKEN_NULL) { - if (default_template_src.find(jinja_variable_name) != std::string::npos - || template_tool_use_src.find(jinja_variable_name) != std::string::npos) { - LOG_WRN("%s: warning: vocab does not have a %s token, jinja template won't work as intended.\n", __func__, name); - } - return std::string(); - } else { - return common_token_to_piece(vocab, token, true); - } - }; - auto token_bos = get_token(llama_vocab_bos(vocab), "BOS", "bos_token"); - auto token_eos = get_token(llama_vocab_eos(vocab), "EOS", "eos_token"); - try { - return { - has_explicit_template, - std::make_unique(default_template_src, token_bos, token_eos), - template_tool_use_src.empty() - ? nullptr - : std::make_unique(template_tool_use_src, token_bos, token_eos), - }; - } catch (const std::exception & e) { - LOG_ERR("%s: failed to parse chat template: %s\n", __func__, e.what()); - return { - has_explicit_template, - std::make_unique(CHATML_TEMPLATE_SRC, token_bos, token_eos), - nullptr, - }; - } -} - -// -// KV cache utils -// - -void common_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size) { - static const char slot_chars[] = ".123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+"; - - printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d", - view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx); - - llama_kv_cache_view_cell * c_curr = view.cells; - llama_seq_id * cs_curr = view.cells_sequences; - - for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) { - if (i % row_size == 0) { - printf("\n%5d: ", i); - } - int seq_count = 0; - for (int j = 0; j < view.n_seq_max; j++) { - if (cs_curr[j] >= 0) { seq_count++; } - } - putchar(slot_chars[std::min(sizeof(slot_chars) - 2, size_t(seq_count))]); - } - - printf("\n=== Done dumping\n"); -} - -void common_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size) { - static const char slot_chars[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; - - printf("=== Dumping KV cache. total cells %d, max sequences per cell %d, populated cells %d, total tokens in cache %d, largest empty slot=%d @ %d\n", - view.n_cells, view.n_seq_max, view.used_cells, view.token_count, view.max_contiguous, view.max_contiguous_idx); - - std::unordered_map seqs; - llama_kv_cache_view_cell * c_curr = view.cells; - llama_seq_id * cs_curr = view.cells_sequences; - - for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) { - for (int j = 0; j < view.n_seq_max; j++) { - if (cs_curr[j] < 0) { continue; } - if (seqs.find(cs_curr[j]) == seqs.end()) { - if (seqs.size() + 1 >= sizeof(slot_chars)) { break; } - const size_t sz = seqs.size(); - seqs[cs_curr[j]] = sz; - } - } - if (seqs.size() + 1 >= sizeof(slot_chars)) { break; } - } - - printf("=== Sequence legend: "); - for (const auto & it : seqs) { - printf("%zu=%d, ", it.second, it.first); - } - printf("'+'=other sequence ids"); - - c_curr = view.cells; - cs_curr = view.cells_sequences; - for (int i = 0; i < view.n_cells; i++, c_curr++, cs_curr += view.n_seq_max) { - if (i % row_size == 0) { - printf("\n%5d: ", i); - } - for (int j = 0; j < view.n_seq_max; j++) { - if (cs_curr[j] >= 0) { - const auto & it = seqs.find(cs_curr[j]); - putchar(it != seqs.end() ? int(slot_chars[it->second]) : '+'); - } else { - putchar('.'); - } - } - putchar(' '); - } - - printf("\n=== Done dumping\n"); -} - -// -// Embedding utils -// - -void common_embd_normalize(const float * inp, float * out, int n, int embd_norm) { - double sum = 0.0; - - switch (embd_norm) { - case -1: // no normalisation - sum = 1.0; - break; - case 0: // max absolute - for (int i = 0; i < n; i++) { - if (sum < std::abs(inp[i])) { - sum = std::abs(inp[i]); - } - } - sum /= 32760.0; // make an int16 range - break; - case 2: // euclidean - for (int i = 0; i < n; i++) { - sum += inp[i] * inp[i]; - } - sum = std::sqrt(sum); - break; - default: // p-norm (euclidean is p-norm p=2) - for (int i = 0; i < n; i++) { - sum += std::pow(std::abs(inp[i]), embd_norm); - } - sum = std::pow(sum, 1.0 / embd_norm); - break; - } - - const float norm = sum > 0.0 ? 1.0 / sum : 0.0f; - - for (int i = 0; i < n; i++) { - out[i] = inp[i] * norm; - } -} - -float common_embd_similarity_cos(const float * embd1, const float * embd2, int n){ - double sum = 0.0; - double sum1 = 0.0; - double sum2 = 0.0; - - for (int i = 0; i < n; i++) { - sum += embd1[i] * embd2[i]; - sum1 += embd1[i] * embd1[i]; - sum2 += embd2[i] * embd2[i]; - } - - // Handle the case where one or both vectors are zero vectors - if (sum1 == 0.0 || sum2 == 0.0) { - if (sum1 == 0.0 && sum2 == 0.0) { - return 1.0f; // two zero vectors are similar - } - return 0.0f; - } - - return sum / (sqrt(sum1) * sqrt(sum2)); -} - -// -// Control vector utils -// - -static common_control_vector_data common_control_vector_load_one(const common_control_vector_load_info & load_info) { - common_control_vector_data result = { -1, {} }; - - ggml_context * ctx = nullptr; - struct gguf_init_params meta_gguf_params = { - /* .no_alloc = */ false, - /* .ctx = */ &ctx, - }; - struct gguf_context * ctx_gguf = gguf_init_from_file(load_info.fname.c_str(), meta_gguf_params); - if (!ctx_gguf) { - LOG_ERR("%s: failed to load control vector file from %s\n", __func__, load_info.fname.c_str()); - return result; - } - - int32_t n_tensors = gguf_get_n_tensors(ctx_gguf); - if (n_tensors == 0) { - LOG_WRN("%s: no direction tensors found in %s\n", __func__, load_info.fname.c_str()); - } - - for (int i = 0; i < n_tensors; i++) { - std::string name = gguf_get_tensor_name(ctx_gguf, i); - - int layer_idx = -1; - - // split on '.' - size_t dotpos = name.find('.'); - if (dotpos != std::string::npos && name.substr(0, dotpos) == "direction") { - try { - layer_idx = std::stoi(name.substr(dotpos + 1)); - } catch (...) { - layer_idx = -1; - } - } - if (layer_idx < 0) { - LOG_ERR("%s: invalid/unparsable direction tensor layer index in %s\n", __func__, load_info.fname.c_str()); - result.n_embd = -1; - break; - } else if (layer_idx == 0) { - LOG_ERR("%s: invalid (zero) direction tensor layer index in %s\n", __func__, load_info.fname.c_str()); - result.n_embd = -1; - break; - } - - struct ggml_tensor * tensor = ggml_get_tensor(ctx, name.c_str()); - if (tensor->type != GGML_TYPE_F32) { - LOG_ERR("%s: invalid (non-F32) direction tensor type in %s\n", __func__, load_info.fname.c_str()); - result.n_embd = -1; - break; - } - if (ggml_n_dims(tensor) != 1) { - LOG_ERR("%s: invalid (non-1D) direction tensor shape in %s\n", __func__, load_info.fname.c_str()); - result.n_embd = -1; - break; - } - - if (result.n_embd == -1) { - result.n_embd = ggml_nelements(tensor); - } else if (ggml_nelements(tensor) != result.n_embd) { - LOG_ERR("%s: direction tensor in %s does not match previous dimensions\n", __func__, load_info.fname.c_str()); - result.n_embd = -1; - break; - } - - // extend if necessary - do not store data for layer 0 (it's not used) - result.data.resize(std::max(result.data.size(), static_cast(result.n_embd * layer_idx)), 0.0f); - - const float * src = (const float *) tensor->data; - float * dst = result.data.data() + result.n_embd * (layer_idx - 1); // layer 1 at [0] - for (int j = 0; j < result.n_embd; j++) { - dst[j] += src[j] * load_info.strength; // allows multiple directions for same layer in same file - } - - } - - if (result.n_embd == -1) { - LOG_WRN("%s: skipping %s due to invalid direction tensors\n", __func__, load_info.fname.c_str()); - result.data.clear(); - } - - gguf_free(ctx_gguf); - ggml_free(ctx); - - return result; -} - -common_control_vector_data common_control_vector_load(const std::vector & load_infos) { - common_control_vector_data result = { -1, {} }; - - for (const auto & info : load_infos) { - auto cur = common_control_vector_load_one(info); - - if (cur.n_embd == -1) { - result.n_embd = -1; - break; - } - if (result.n_embd != -1 && result.n_embd != cur.n_embd) { - LOG_ERR("%s: control vectors in %s does not match previous dimensions\n", __func__, info.fname.c_str()); - result.n_embd = -1; - break; - } - - if (result.n_embd == -1) { - result = std::move(cur); - } else { - result.data.resize(std::max(result.data.size(), cur.data.size()), 0.0f); // extend if necessary - for (size_t i = 0; i < cur.data.size(); i++) { - result.data[i] += cur.data[i]; - } - } - } - - if (result.n_embd == -1) { - LOG_ERR("%s: no valid control vector files passed\n", __func__); - result.data.clear(); - } - - return result; -} - diff --git a/common/common.h b/common/common.h deleted file mode 100644 index b208d0c7e..000000000 --- a/common/common.h +++ /dev/null @@ -1,717 +0,0 @@ -// Various helper functions and utilities - -#pragma once - -#include "llama-cpp.h" - -#include -#include -#include -#include - -#ifdef _WIN32 -#define DIRECTORY_SEPARATOR '\\' -#else -#define DIRECTORY_SEPARATOR '/' -#endif // _WIN32 - -#define die(msg) do { fputs("error: " msg "\n", stderr); exit(1); } while (0) -#define die_fmt(fmt, ...) do { fprintf(stderr, "error: " fmt "\n", __VA_ARGS__); exit(1); } while (0) - -#define print_build_info() do { \ - fprintf(stderr, "%s: build = %d (%s)\n", __func__, LLAMA_BUILD_NUMBER, LLAMA_COMMIT); \ - fprintf(stderr, "%s: built with %s for %s\n", __func__, LLAMA_COMPILER, LLAMA_BUILD_TARGET); \ -} while(0) - -#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf" - -struct common_adapter_lora_info { - std::string path; - float scale; - - struct llama_adapter_lora * ptr; -}; - -using llama_tokens = std::vector; - -// build info -extern int LLAMA_BUILD_NUMBER; -extern const char * LLAMA_COMMIT; -extern const char * LLAMA_COMPILER; -extern const char * LLAMA_BUILD_TARGET; - -struct common_control_vector_load_info; - -// -// CPU utils -// - -struct cpu_params { - int n_threads = -1; - bool cpumask[GGML_MAX_N_THREADS] = {false}; // CPU affinity mask. - bool mask_valid = false; // Default: any CPU - enum ggml_sched_priority priority = GGML_SCHED_PRIO_NORMAL; // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime) - bool strict_cpu = false; // Use strict CPU placement - uint32_t poll = 50; // Polling (busywait) level (0 - no polling, 100 - mostly polling) -}; - -int32_t cpu_get_num_physical_cores(); -int32_t cpu_get_num_math(); - -// -// Common params -// - -enum llama_example { - LLAMA_EXAMPLE_COMMON, - LLAMA_EXAMPLE_SPECULATIVE, - LLAMA_EXAMPLE_MAIN, - LLAMA_EXAMPLE_INFILL, - LLAMA_EXAMPLE_EMBEDDING, - LLAMA_EXAMPLE_PERPLEXITY, - LLAMA_EXAMPLE_RETRIEVAL, - LLAMA_EXAMPLE_PASSKEY, - LLAMA_EXAMPLE_IMATRIX, - LLAMA_EXAMPLE_BENCH, - LLAMA_EXAMPLE_SERVER, - LLAMA_EXAMPLE_CVECTOR_GENERATOR, - LLAMA_EXAMPLE_EXPORT_LORA, - LLAMA_EXAMPLE_LLAVA, - LLAMA_EXAMPLE_LOOKUP, - LLAMA_EXAMPLE_PARALLEL, - LLAMA_EXAMPLE_TTS, - - LLAMA_EXAMPLE_COUNT, -}; - -enum common_sampler_type { - COMMON_SAMPLER_TYPE_NONE = 0, - COMMON_SAMPLER_TYPE_DRY = 1, - COMMON_SAMPLER_TYPE_TOP_K = 2, - COMMON_SAMPLER_TYPE_TOP_P = 3, - COMMON_SAMPLER_TYPE_MIN_P = 4, - //COMMON_SAMPLER_TYPE_TFS_Z = 5, - COMMON_SAMPLER_TYPE_TYPICAL_P = 6, - COMMON_SAMPLER_TYPE_TEMPERATURE = 7, - COMMON_SAMPLER_TYPE_XTC = 8, - COMMON_SAMPLER_TYPE_INFILL = 9, - COMMON_SAMPLER_TYPE_PENALTIES = 10, -}; - -// dimensionality reduction methods, used by cvector-generator -enum dimre_method { - DIMRE_METHOD_PCA, - DIMRE_METHOD_MEAN, -}; - -enum common_conversation_mode { - COMMON_CONVERSATION_MODE_DISABLED = 0, - COMMON_CONVERSATION_MODE_ENABLED = 1, - COMMON_CONVERSATION_MODE_AUTO = 2, -}; - -struct common_grammar_trigger { - std::string word; - bool at_start; -}; - -// sampling parameters -struct common_params_sampling { - uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler - - int32_t n_prev = 64; // number of previous tokens to remember - int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens. - int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens - int32_t top_k = 40; // <= 0 to use vocab size - float top_p = 0.95f; // 1.0 = disabled - float min_p = 0.05f; // 0.0 = disabled - float xtc_probability = 0.00f; // 0.0 = disabled - float xtc_threshold = 0.10f; // > 0.5 disables XTC - float typ_p = 1.00f; // typical_p, 1.0 = disabled - float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities - float dynatemp_range = 0.00f; // 0.0 = disabled - float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler - int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size) - float penalty_repeat = 1.00f; // 1.0 = disabled - float penalty_freq = 0.00f; // 0.0 = disabled - float penalty_present = 0.00f; // 0.0 = disabled - float dry_multiplier = 0.0f; // 0.0 = disabled; DRY repetition penalty for tokens extending repetition: - float dry_base = 1.75f; // 0.0 = disabled; multiplier * base ^ (length of sequence before token - allowed length) - int32_t dry_allowed_length = 2; // tokens extending repetitions beyond this receive penalty - int32_t dry_penalty_last_n = -1; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size) - int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0 - float mirostat_tau = 5.00f; // target entropy - float mirostat_eta = 0.10f; // learning rate - bool ignore_eos = false; - bool no_perf = false; // disable performance metrics - bool timing_per_token = false; - - std::vector dry_sequence_breakers = {"\n", ":", "\"", "*"}; // default sequence breakers for DRY - - - std::vector samplers = { - COMMON_SAMPLER_TYPE_PENALTIES, - COMMON_SAMPLER_TYPE_DRY, - COMMON_SAMPLER_TYPE_TOP_K, - COMMON_SAMPLER_TYPE_TYPICAL_P, - COMMON_SAMPLER_TYPE_TOP_P, - COMMON_SAMPLER_TYPE_MIN_P, - COMMON_SAMPLER_TYPE_XTC, - COMMON_SAMPLER_TYPE_TEMPERATURE, - }; - - std::string grammar; // optional BNF-like grammar to constrain sampling - bool grammar_lazy = false; - std::vector grammar_trigger_words; // optional trigger words to trigger lazy grammar - std::vector grammar_trigger_tokens; // optional trigger tokens to trigger lazy grammar and print trigger special tokens. - std::set preserved_tokens; - - std::vector logit_bias; // logit biases to apply - - // print the parameters into a string - std::string print() const; -}; - -struct common_params_speculative { - std::vector devices; // devices to use for offloading - - int32_t n_ctx = 0; // draft context size - int32_t n_max = 16; // maximum number of tokens to draft during speculative decoding - int32_t n_min = 5; // minimum number of draft tokens to use for speculative decoding - int32_t n_gpu_layers = -1; // number of layers to store in VRAM for the draft model (-1 - use default) - float p_split = 0.1f; // speculative decoding split probability - float p_min = 0.9f; // minimum speculative decoding probability (greedy) - - struct cpu_params cpuparams; - struct cpu_params cpuparams_batch; - - std::string hf_repo = ""; // HF repo // NOLINT - std::string hf_file = ""; // HF file // NOLINT - - std::string model = ""; // draft model for speculative decoding // NOLINT - std::string model_url = ""; // model url to download // NOLINT -}; - -struct common_params_vocoder { - std::string hf_repo = ""; // HF repo // NOLINT - std::string hf_file = ""; // HF file // NOLINT - - std::string model = ""; // model path // NOLINT - std::string model_url = ""; // model url to download // NOLINT - - bool use_guide_tokens = false; // enable guide tokens to improve TTS accuracy // NOLINT -}; - -struct common_params { - int32_t n_predict = -1; // new tokens to predict - int32_t n_ctx = 4096; // context size - int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS) - int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS) - int32_t n_keep = 0; // number of tokens to keep from initial prompt - int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited) - int32_t n_parallel = 1; // number of parallel sequences to decode - int32_t n_sequences = 1; // number of sequences to decode - int32_t grp_attn_n = 1; // group-attention factor - int32_t grp_attn_w = 512; // group-attention width - int32_t n_print = -1; // print token count every n tokens (-1 = disabled) - float rope_freq_base = 0.0f; // RoPE base frequency - float rope_freq_scale = 0.0f; // RoPE frequency scaling factor - float yarn_ext_factor = -1.0f; // YaRN extrapolation mix factor - float yarn_attn_factor = 1.0f; // YaRN magnitude scaling factor - float yarn_beta_fast = 32.0f; // YaRN low correction dim - float yarn_beta_slow = 1.0f; // YaRN high correction dim - int32_t yarn_orig_ctx = 0; // YaRN original context length - float defrag_thold = 0.1f; // KV cache defragmentation threshold - - // offload params - std::vector devices; // devices to use for offloading - - int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default) - int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors - float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs - - enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs - - struct cpu_params cpuparams; - struct cpu_params cpuparams_batch; - - ggml_backend_sched_eval_callback cb_eval = nullptr; - void * cb_eval_user_data = nullptr; - - ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED; - - enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED; - enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings - enum llama_attention_type attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED; // attention type for embeddings - - struct common_params_sampling sampling; - struct common_params_speculative speculative; - struct common_params_vocoder vocoder; - - std::string model = ""; // model path // NOLINT - std::string model_alias = ""; // model alias // NOLINT - std::string model_url = ""; // model url to download // NOLINT - std::string hf_token = ""; // HF token // NOLINT - std::string hf_repo = ""; // HF repo // NOLINT - std::string hf_file = ""; // HF file // NOLINT - std::string prompt = ""; // NOLINT - std::string prompt_file = ""; // store the external prompt file name // NOLINT - std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state // NOLINT - std::string input_prefix = ""; // string to prefix user inputs with // NOLINT - std::string input_suffix = ""; // string to suffix user inputs with // NOLINT - std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding // NOLINT - std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding // NOLINT - std::string logits_file = ""; // file for saving *all* logits // NOLINT - - std::vector in_files; // all input files - std::vector antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts) - std::vector kv_overrides; - - bool lora_init_without_apply = false; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_adapter_lora_apply) - std::vector lora_adapters; // lora adapter path with user defined scale - - std::vector control_vectors; // control vector with user defined scale - - int32_t verbosity = 0; - int32_t control_vector_layer_start = -1; // layer range for control vector - int32_t control_vector_layer_end = -1; // layer range for control vector - - int32_t ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used. - int32_t ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line - // (which is more convenient to use for plotting) - // - bool hellaswag = false; // compute HellaSwag score over random tasks from datafile supplied in prompt - size_t hellaswag_tasks = 400; // number of tasks to use when computing the HellaSwag score - - bool winogrande = false; // compute Winogrande score over random tasks from datafile supplied in prompt - size_t winogrande_tasks = 0; // number of tasks to use when computing the Winogrande score. If 0, all tasks will be computed - - bool multiple_choice = false; // compute TruthfulQA score over random tasks from datafile supplied in prompt - size_t multiple_choice_tasks = 0; // number of tasks to use when computing the TruthfulQA score. If 0, all tasks will be computed - - bool kl_divergence = false; // compute KL divergence - - bool usage = false; // print usage - bool use_color = false; // use color to distinguish generations and inputs - bool special = false; // enable special token output - bool interactive = false; // interactive mode - bool interactive_first = false; // wait for user input immediately - bool prompt_cache_all = false; // save user input and generations to prompt cache - bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it - - bool escape = true; // escape "\n", "\r", "\t", "\'", "\"", and "\\" - bool multiline_input = false; // reverse the usage of `\` - bool simple_io = false; // improves compatibility with subprocesses and limited consoles - bool cont_batching = true; // insert new sequences for decoding on-the-fly - bool flash_attn = false; // flash attention - bool no_perf = false; // disable performance metrics - bool ctx_shift = true; // context shift on inifinite text generation - - bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix - bool logits_all = false; // return logits for all tokens in the batch - bool use_mmap = true; // use mmap for faster loads - bool use_mlock = false; // use mlock to keep model in memory - bool verbose_prompt = false; // print prompt tokens before generation - bool display_prompt = true; // print prompt before generation - bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes - bool no_kv_offload = false; // disable KV offloading - bool warmup = true; // warmup run - bool check_tensors = false; // validate tensor data - - ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K - ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V - - common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO; - - // multimodal models (see examples/llava) - std::string mmproj = ""; // path to multimodal projector // NOLINT - std::vector image; // path to image file(s) - - // embedding - bool embedding = false; // get only sentence embedding - int32_t embd_normalize = 2; // normalisation for embeddings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm) - std::string embd_out = ""; // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix - std::string embd_sep = "\n"; // separator of embeddings - bool reranking = false; // enable reranking support on server - - // server params - int32_t port = 8080; // server listens on this network port - int32_t timeout_read = 600; // http read timeout in seconds - int32_t timeout_write = timeout_read; // http write timeout in seconds - int32_t n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool) - int32_t n_cache_reuse = 0; // min chunk size to reuse from the cache via KV shifting - - std::string hostname = "127.0.0.1"; - std::string public_path = ""; // NOLINT - std::string chat_template = ""; // NOLINT - bool use_jinja = false; // NOLINT - bool enable_chat_template = true; - - std::vector api_keys; - - std::string ssl_file_key = ""; // NOLINT - std::string ssl_file_cert = ""; // NOLINT - - // "advanced" endpoints are disabled by default for better security - bool webui = true; - bool endpoint_slots = false; - bool endpoint_props = false; // only control POST requests, not GET - bool endpoint_metrics = false; - - bool log_json = false; - - std::string slot_save_path; - - float slot_prompt_similarity = 0.5f; - - // batched-bench params - bool is_pp_shared = false; - - std::vector n_pp; - std::vector n_tg; - std::vector n_pl; - - // retrieval params - std::vector context_files; // context files to embed - - int32_t chunk_size = 64; // chunk size for context embedding - - std::string chunk_separator = "\n"; // chunk separator for context embedding - - // passkey params - int32_t n_junk = 250; // number of times to repeat the junk text - int32_t i_pos = -1; // position of the passkey in the junk text - - // imatrix params - std::string out_file = "imatrix.dat"; // save the resulting imatrix to this file - - int32_t n_out_freq = 10; // output the imatrix every n_out_freq iterations - int32_t n_save_freq = 0; // save the imatrix every n_save_freq iterations - int32_t i_chunk = 0; // start processing from this chunk - - bool process_output = false; // collect data for the output tensor - bool compute_ppl = true; // whether to compute perplexity - - // cvector-generator params - int n_pca_batch = 100; - int n_pca_iterations = 1000; - dimre_method cvector_dimre_method = DIMRE_METHOD_PCA; - std::string cvector_outfile = "control_vector.gguf"; - std::string cvector_positive_file = "examples/cvector-generator/positive.txt"; - std::string cvector_negative_file = "examples/cvector-generator/negative.txt"; - - bool spm_infill = false; // suffix/prefix/middle pattern for infill - - std::string lora_outfile = "ggml-lora-merged-f16.gguf"; - - // batched-bench params - bool batched_bench_output_jsonl = false; -}; - -// call once at the start of a program if it uses libcommon -// initializes the logging system and prints info about the build -void common_init(); - -std::string common_params_get_system_info(const common_params & params); - -bool parse_cpu_range(const std::string & range, bool(&boolmask)[GGML_MAX_N_THREADS]); -bool parse_cpu_mask(const std::string & mask, bool(&boolmask)[GGML_MAX_N_THREADS]); -void postprocess_cpu_params(cpu_params & cpuparams, const cpu_params * role_model = nullptr); -bool set_process_priority(enum ggml_sched_priority prio); - -// -// String utils -// - -#ifdef __GNUC__ -#ifdef __MINGW32__ -#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__))) -#else -#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__))) -#endif -#else -#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) -#endif - -LLAMA_COMMON_ATTRIBUTE_FORMAT(1, 2) -std::string string_format(const char * fmt, ...); - -std::string string_strip(const std::string & str); -std::string string_get_sortable_timestamp(); - -std::string string_join(const std::vector & values, const std::string & separator); -std::vector string_split(const std::string & str, const std::string & delimiter); -std::string string_repeat(const std::string & str, size_t n); - -void string_replace_all(std::string & s, const std::string & search, const std::string & replace); - -template -static std::vector string_split(const std::string & str, char delim) { - static_assert(!std::is_same::value, "Please use the specialized version for std::string"); - std::vector values; - std::istringstream str_stream(str); - std::string token; - while (std::getline(str_stream, token, delim)) { - T value; - std::istringstream token_stream(token); - token_stream >> value; - values.push_back(value); - } - return values; -} - -template<> -std::vector string_split(const std::string & input, char separator) -{ - std::vector parts; - size_t begin_pos = 0; - size_t separator_pos = input.find(separator); - while (separator_pos != std::string::npos) { - std::string part = input.substr(begin_pos, separator_pos - begin_pos); - parts.emplace_back(part); - begin_pos = separator_pos + 1; - separator_pos = input.find(separator, begin_pos); - } - parts.emplace_back(input.substr(begin_pos, separator_pos - begin_pos)); - return parts; -} - -static bool string_starts_with(const std::string & str, - const std::string & prefix) { // While we wait for C++20's std::string::starts_with... - return str.rfind(prefix, 0) == 0; -} - -static bool string_ends_with(const std::string & str, - const std::string & suffix) { // While we wait for C++20's std::string::ends_with... - return str.size() >= suffix.size() && str.compare(str.size()-suffix.size(), suffix.size(), suffix) == 0; -} - -bool string_parse_kv_override(const char * data, std::vector & overrides); -void string_process_escapes(std::string & input); - -std::string string_from(bool value); -std::string string_from(const std::vector & values); -std::string string_from(const struct llama_context * ctx, const std::vector & tokens); -std::string string_from(const struct llama_context * ctx, const struct llama_batch & batch); - -// -// Filesystem utils -// - -bool fs_validate_filename(const std::string & filename); -bool fs_create_directory_with_parents(const std::string & path); - -std::string fs_get_cache_directory(); -std::string fs_get_cache_file(const std::string & filename); - -// -// Model utils -// - -// note: defines object's lifetime -struct common_init_result { - llama_model_ptr model; - llama_context_ptr context; - - std::vector lora; -}; - -struct common_init_result common_init_from_params(common_params & params); - -struct llama_model_params common_model_params_to_llama ( common_params & params); -struct llama_context_params common_context_params_to_llama(const common_params & params); -struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params); - -struct llama_model * common_load_model_from_url( - const std::string & model_url, - const std::string & local_path, - const std::string & hf_token, - const struct llama_model_params & params); - -struct llama_model * common_load_model_from_hf( - const std::string & repo, - const std::string & remote_path, - const std::string & local_path, - const std::string & hf_token, - const struct llama_model_params & params); - -std::pair common_get_hf_file( - const std::string & hf_repo_with_tag, - const std::string & hf_token); - -// clear LoRA adapters from context, then apply new list of adapters -void common_set_adapter_lora(struct llama_context * ctx, std::vector & lora); - -// -// Batch utils -// - -void common_batch_clear(struct llama_batch & batch); - -void common_batch_add( - struct llama_batch & batch, - llama_token id, - llama_pos pos, - const std::vector & seq_ids, - bool logits); - -// -// Token utils -// - -// longest common prefix -size_t common_lcp(const llama_tokens & a, const llama_tokens & b); - -// longet common subsequence -size_t common_lcs(const llama_tokens & a, const llama_tokens & b); - -// -// Vocab utils -// - -// tokenizes a string into a vector of tokens -// should work similar to Python's `tokenizer.encode` -std::vector common_tokenize( - const struct llama_context * ctx, - const std::string & text, - bool add_special, - bool parse_special = false); - -std::vector common_tokenize( - const struct llama_vocab * vocab, - const std::string & text, - bool add_special, - bool parse_special = false); - -// tokenizes a token into a piece, optionally renders special/control tokens -// should work similar to Python's `tokenizer.id_to_piece` -std::string common_token_to_piece( - const struct llama_context * ctx, - llama_token token, - bool special = true); - -std::string common_token_to_piece( - const struct llama_vocab * vocab, - llama_token token, - bool special = true); - -// detokenizes a vector of tokens into a string -// should work similar to Python's `tokenizer.decode` -// optionally renders special/control tokens -std::string common_detokenize( - const struct llama_context * ctx, - const std::vector & tokens, - bool special = true); - -std::string common_detokenize( - const struct llama_vocab * vocab, - const std::vector & tokens, - bool special = true); - -// -// Chat template utils -// - -struct common_tool_call { - std::string name; - std::string arguments; - std::string id; -}; - -// same with llama_chat_message, but uses std::string -struct common_chat_msg { - std::string role; - std::string content; - std::vector tool_calls; - std::string tool_plan = ""; -}; - -// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid -bool common_chat_verify_template(const std::string & tmpl, bool use_jinja); - -namespace minja { - class chat_template; -} - -typedef minja::chat_template common_chat_template; - -struct common_chat_templates { - bool has_explicit_template; // Model had builtin template or template overridde was specified. - std::unique_ptr template_default; // always set (defaults to chatml) - std::unique_ptr template_tool_use; -}; - -// CPP wrapper for llama_chat_apply_template -// If the built-in template is not supported, we default to chatml -// If the custom "tmpl" is not supported, we throw an error -std::string common_chat_apply_template( - const common_chat_template & tmpl, - const std::vector & chat, - bool add_ass, - bool use_jinja); - -// Format single message, while taking into account the position of that message in chat history -std::string common_chat_format_single( - const common_chat_template & tmpl, - const std::vector & past_msg, - const common_chat_msg & new_msg, - bool add_ass, - bool use_jinja); - -// Returns an example of formatted chat -std::string common_chat_format_example( - const common_chat_template & tmpl, bool use_jinja); - -common_chat_templates common_chat_templates_from_model(const struct llama_model * model, const std::string & chat_template_override); - -// -// KV cache utils -// - -// Dump the KV cache view with the number of sequences per cell. -void common_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size = 80); - -// Dump the KV cache view showing individual sequences in each cell (long output). -void common_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size = 40); - -// -// Embedding utils -// - -// TODO: repace embd_norm with an enum -void common_embd_normalize(const float * inp, float * out, int n, int embd_norm); - -float common_embd_similarity_cos(const float * embd1, const float * embd2, int n); - -// -// Control vector utils -// - -struct common_control_vector_data { - int n_embd; - - // stores data for layers [1, n_layer] where n_layer = data.size() / n_embd - std::vector data; -}; - -struct common_control_vector_load_info { - float strength; - - std::string fname; -}; - -// Load control vectors, scale each by strength, and add them together. -// On error, returns {-1, empty} -common_control_vector_data common_control_vector_load(const std::vector & load_infos); - -// -// Split utils -// - -namespace { - -const char * const LLM_KV_SPLIT_NO = "split.no"; -const char * const LLM_KV_SPLIT_COUNT = "split.count"; -const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count"; - -} diff --git a/common/console.cpp b/common/console.cpp deleted file mode 100644 index 078a8d678..000000000 --- a/common/console.cpp +++ /dev/null @@ -1,504 +0,0 @@ -#include "console.h" -#include -#include - -#if defined(_WIN32) -#define WIN32_LEAN_AND_MEAN -#ifndef NOMINMAX -#define NOMINMAX -#endif -#include -#include -#include -#ifndef ENABLE_VIRTUAL_TERMINAL_PROCESSING -#define ENABLE_VIRTUAL_TERMINAL_PROCESSING 0x0004 -#endif -#else -#include -#include -#include -#include -#include -#include -#include -#include -#endif - -#define ANSI_COLOR_RED "\x1b[31m" -#define ANSI_COLOR_GREEN "\x1b[32m" -#define ANSI_COLOR_YELLOW "\x1b[33m" -#define ANSI_COLOR_BLUE "\x1b[34m" -#define ANSI_COLOR_MAGENTA "\x1b[35m" -#define ANSI_COLOR_CYAN "\x1b[36m" -#define ANSI_COLOR_RESET "\x1b[0m" -#define ANSI_BOLD "\x1b[1m" - -namespace console { - - // - // Console state - // - - static bool advanced_display = false; - static bool simple_io = true; - static display_t current_display = reset; - - static FILE* out = stdout; - -#if defined (_WIN32) - static void* hConsole; -#else - static FILE* tty = nullptr; - static termios initial_state; -#endif - - // - // Init and cleanup - // - - void init(bool use_simple_io, bool use_advanced_display) { - advanced_display = use_advanced_display; - simple_io = use_simple_io; -#if defined(_WIN32) - // Windows-specific console initialization - DWORD dwMode = 0; - hConsole = GetStdHandle(STD_OUTPUT_HANDLE); - if (hConsole == INVALID_HANDLE_VALUE || !GetConsoleMode(hConsole, &dwMode)) { - hConsole = GetStdHandle(STD_ERROR_HANDLE); - if (hConsole != INVALID_HANDLE_VALUE && (!GetConsoleMode(hConsole, &dwMode))) { - hConsole = nullptr; - simple_io = true; - } - } - if (hConsole) { - // Check conditions combined to reduce nesting - if (advanced_display && !(dwMode & ENABLE_VIRTUAL_TERMINAL_PROCESSING) && - !SetConsoleMode(hConsole, dwMode | ENABLE_VIRTUAL_TERMINAL_PROCESSING)) { - advanced_display = false; - } - // Set console output codepage to UTF8 - SetConsoleOutputCP(CP_UTF8); - } - HANDLE hConIn = GetStdHandle(STD_INPUT_HANDLE); - if (hConIn != INVALID_HANDLE_VALUE && GetConsoleMode(hConIn, &dwMode)) { - // Set console input codepage to UTF16 - _setmode(_fileno(stdin), _O_WTEXT); - - // Set ICANON (ENABLE_LINE_INPUT) and ECHO (ENABLE_ECHO_INPUT) - if (simple_io) { - dwMode |= ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT; - } else { - dwMode &= ~(ENABLE_LINE_INPUT | ENABLE_ECHO_INPUT); - } - if (!SetConsoleMode(hConIn, dwMode)) { - simple_io = true; - } - } - if (simple_io) { - _setmode(_fileno(stdin), _O_U8TEXT); - } -#else - // POSIX-specific console initialization - if (!simple_io) { - struct termios new_termios; - tcgetattr(STDIN_FILENO, &initial_state); - new_termios = initial_state; - new_termios.c_lflag &= ~(ICANON | ECHO); - new_termios.c_cc[VMIN] = 1; - new_termios.c_cc[VTIME] = 0; - tcsetattr(STDIN_FILENO, TCSANOW, &new_termios); - - tty = fopen("/dev/tty", "w+"); - if (tty != nullptr) { - out = tty; - } - } - - setlocale(LC_ALL, ""); -#endif - } - - void cleanup() { - // Reset console display - set_display(reset); - -#if !defined(_WIN32) - // Restore settings on POSIX systems - if (!simple_io) { - if (tty != nullptr) { - out = stdout; - fclose(tty); - tty = nullptr; - } - tcsetattr(STDIN_FILENO, TCSANOW, &initial_state); - } -#endif - } - - // - // Display and IO - // - - // Keep track of current display and only emit ANSI code if it changes - void set_display(display_t display) { - if (advanced_display && current_display != display) { - fflush(stdout); - switch(display) { - case reset: - fprintf(out, ANSI_COLOR_RESET); - break; - case prompt: - fprintf(out, ANSI_COLOR_YELLOW); - break; - case user_input: - fprintf(out, ANSI_BOLD ANSI_COLOR_GREEN); - break; - case error: - fprintf(out, ANSI_BOLD ANSI_COLOR_RED); - } - current_display = display; - fflush(out); - } - } - - static char32_t getchar32() { -#if defined(_WIN32) - HANDLE hConsole = GetStdHandle(STD_INPUT_HANDLE); - wchar_t high_surrogate = 0; - - while (true) { - INPUT_RECORD record; - DWORD count; - if (!ReadConsoleInputW(hConsole, &record, 1, &count) || count == 0) { - return WEOF; - } - - if (record.EventType == KEY_EVENT && record.Event.KeyEvent.bKeyDown) { - wchar_t wc = record.Event.KeyEvent.uChar.UnicodeChar; - if (wc == 0) { - continue; - } - - if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate - high_surrogate = wc; - continue; - } - if ((wc >= 0xDC00) && (wc <= 0xDFFF)) { // Check if wc is a low surrogate - if (high_surrogate != 0) { // Check if we have a high surrogate - return ((high_surrogate - 0xD800) << 10) + (wc - 0xDC00) + 0x10000; - } - } - - high_surrogate = 0; // Reset the high surrogate - return static_cast(wc); - } - } -#else - wchar_t wc = getwchar(); - if (static_cast(wc) == WEOF) { - return WEOF; - } - -#if WCHAR_MAX == 0xFFFF - if ((wc >= 0xD800) && (wc <= 0xDBFF)) { // Check if wc is a high surrogate - wchar_t low_surrogate = getwchar(); - if ((low_surrogate >= 0xDC00) && (low_surrogate <= 0xDFFF)) { // Check if the next wchar is a low surrogate - return (static_cast(wc & 0x03FF) << 10) + (low_surrogate & 0x03FF) + 0x10000; - } - } - if ((wc >= 0xD800) && (wc <= 0xDFFF)) { // Invalid surrogate pair - return 0xFFFD; // Return the replacement character U+FFFD - } -#endif - - return static_cast(wc); -#endif - } - - static void pop_cursor() { -#if defined(_WIN32) - if (hConsole != NULL) { - CONSOLE_SCREEN_BUFFER_INFO bufferInfo; - GetConsoleScreenBufferInfo(hConsole, &bufferInfo); - - COORD newCursorPosition = bufferInfo.dwCursorPosition; - if (newCursorPosition.X == 0) { - newCursorPosition.X = bufferInfo.dwSize.X - 1; - newCursorPosition.Y -= 1; - } else { - newCursorPosition.X -= 1; - } - - SetConsoleCursorPosition(hConsole, newCursorPosition); - return; - } -#endif - putc('\b', out); - } - - static int estimateWidth(char32_t codepoint) { -#if defined(_WIN32) - (void)codepoint; - return 1; -#else - return wcwidth(codepoint); -#endif - } - - static int put_codepoint(const char* utf8_codepoint, size_t length, int expectedWidth) { -#if defined(_WIN32) - CONSOLE_SCREEN_BUFFER_INFO bufferInfo; - if (!GetConsoleScreenBufferInfo(hConsole, &bufferInfo)) { - // go with the default - return expectedWidth; - } - COORD initialPosition = bufferInfo.dwCursorPosition; - DWORD nNumberOfChars = length; - WriteConsole(hConsole, utf8_codepoint, nNumberOfChars, &nNumberOfChars, NULL); - - CONSOLE_SCREEN_BUFFER_INFO newBufferInfo; - GetConsoleScreenBufferInfo(hConsole, &newBufferInfo); - - // Figure out our real position if we're in the last column - if (utf8_codepoint[0] != 0x09 && initialPosition.X == newBufferInfo.dwSize.X - 1) { - DWORD nNumberOfChars; - WriteConsole(hConsole, &" \b", 2, &nNumberOfChars, NULL); - GetConsoleScreenBufferInfo(hConsole, &newBufferInfo); - } - - int width = newBufferInfo.dwCursorPosition.X - initialPosition.X; - if (width < 0) { - width += newBufferInfo.dwSize.X; - } - return width; -#else - // We can trust expectedWidth if we've got one - if (expectedWidth >= 0 || tty == nullptr) { - fwrite(utf8_codepoint, length, 1, out); - return expectedWidth; - } - - fputs("\033[6n", tty); // Query cursor position - int x1; - int y1; - int x2; - int y2; - int results = 0; - results = fscanf(tty, "\033[%d;%dR", &y1, &x1); - - fwrite(utf8_codepoint, length, 1, tty); - - fputs("\033[6n", tty); // Query cursor position - results += fscanf(tty, "\033[%d;%dR", &y2, &x2); - - if (results != 4) { - return expectedWidth; - } - - int width = x2 - x1; - if (width < 0) { - // Calculate the width considering text wrapping - struct winsize w; - ioctl(STDOUT_FILENO, TIOCGWINSZ, &w); - width += w.ws_col; - } - return width; -#endif - } - - static void replace_last(char ch) { -#if defined(_WIN32) - pop_cursor(); - put_codepoint(&ch, 1, 1); -#else - fprintf(out, "\b%c", ch); -#endif - } - - static void append_utf8(char32_t ch, std::string & out) { - if (ch <= 0x7F) { - out.push_back(static_cast(ch)); - } else if (ch <= 0x7FF) { - out.push_back(static_cast(0xC0 | ((ch >> 6) & 0x1F))); - out.push_back(static_cast(0x80 | (ch & 0x3F))); - } else if (ch <= 0xFFFF) { - out.push_back(static_cast(0xE0 | ((ch >> 12) & 0x0F))); - out.push_back(static_cast(0x80 | ((ch >> 6) & 0x3F))); - out.push_back(static_cast(0x80 | (ch & 0x3F))); - } else if (ch <= 0x10FFFF) { - out.push_back(static_cast(0xF0 | ((ch >> 18) & 0x07))); - out.push_back(static_cast(0x80 | ((ch >> 12) & 0x3F))); - out.push_back(static_cast(0x80 | ((ch >> 6) & 0x3F))); - out.push_back(static_cast(0x80 | (ch & 0x3F))); - } else { - // Invalid Unicode code point - } - } - - // Helper function to remove the last UTF-8 character from a string - static void pop_back_utf8_char(std::string & line) { - if (line.empty()) { - return; - } - - size_t pos = line.length() - 1; - - // Find the start of the last UTF-8 character (checking up to 4 bytes back) - for (size_t i = 0; i < 3 && pos > 0; ++i, --pos) { - if ((line[pos] & 0xC0) != 0x80) { - break; // Found the start of the character - } - } - line.erase(pos); - } - - static bool readline_advanced(std::string & line, bool multiline_input) { - if (out != stdout) { - fflush(stdout); - } - - line.clear(); - std::vector widths; - bool is_special_char = false; - bool end_of_stream = false; - - char32_t input_char; - while (true) { - fflush(out); // Ensure all output is displayed before waiting for input - input_char = getchar32(); - - if (input_char == '\r' || input_char == '\n') { - break; - } - - if (input_char == (char32_t) WEOF || input_char == 0x04 /* Ctrl+D*/) { - end_of_stream = true; - break; - } - - if (is_special_char) { - set_display(user_input); - replace_last(line.back()); - is_special_char = false; - } - - if (input_char == '\033') { // Escape sequence - char32_t code = getchar32(); - if (code == '[' || code == 0x1B) { - // Discard the rest of the escape sequence - while ((code = getchar32()) != (char32_t) WEOF) { - if ((code >= 'A' && code <= 'Z') || (code >= 'a' && code <= 'z') || code == '~') { - break; - } - } - } - } else if (input_char == 0x08 || input_char == 0x7F) { // Backspace - if (!widths.empty()) { - int count; - do { - count = widths.back(); - widths.pop_back(); - // Move cursor back, print space, and move cursor back again - for (int i = 0; i < count; i++) { - replace_last(' '); - pop_cursor(); - } - pop_back_utf8_char(line); - } while (count == 0 && !widths.empty()); - } - } else { - int offset = line.length(); - append_utf8(input_char, line); - int width = put_codepoint(line.c_str() + offset, line.length() - offset, estimateWidth(input_char)); - if (width < 0) { - width = 0; - } - widths.push_back(width); - } - - if (!line.empty() && (line.back() == '\\' || line.back() == '/')) { - set_display(prompt); - replace_last(line.back()); - is_special_char = true; - } - } - - bool has_more = multiline_input; - if (is_special_char) { - replace_last(' '); - pop_cursor(); - - char last = line.back(); - line.pop_back(); - if (last == '\\') { - line += '\n'; - fputc('\n', out); - has_more = !has_more; - } else { - // llama will just eat the single space, it won't act as a space - if (line.length() == 1 && line.back() == ' ') { - line.clear(); - pop_cursor(); - } - has_more = false; - } - } else { - if (end_of_stream) { - has_more = false; - } else { - line += '\n'; - fputc('\n', out); - } - } - - fflush(out); - return has_more; - } - - static bool readline_simple(std::string & line, bool multiline_input) { -#if defined(_WIN32) - std::wstring wline; - if (!std::getline(std::wcin, wline)) { - // Input stream is bad or EOF received - line.clear(); - GenerateConsoleCtrlEvent(CTRL_C_EVENT, 0); - return false; - } - - int size_needed = WideCharToMultiByte(CP_UTF8, 0, &wline[0], (int)wline.size(), NULL, 0, NULL, NULL); - line.resize(size_needed); - WideCharToMultiByte(CP_UTF8, 0, &wline[0], (int)wline.size(), &line[0], size_needed, NULL, NULL); -#else - if (!std::getline(std::cin, line)) { - // Input stream is bad or EOF received - line.clear(); - return false; - } -#endif - if (!line.empty()) { - char last = line.back(); - if (last == '/') { // Always return control on '/' symbol - line.pop_back(); - return false; - } - if (last == '\\') { // '\\' changes the default action - line.pop_back(); - multiline_input = !multiline_input; - } - } - line += '\n'; - - // By default, continue input if multiline_input is set - return multiline_input; - } - - bool readline(std::string & line, bool multiline_input) { - set_display(user_input); - - if (simple_io) { - return readline_simple(line, multiline_input); - } - return readline_advanced(line, multiline_input); - } - -} diff --git a/common/console.h b/common/console.h deleted file mode 100644 index ec175269b..000000000 --- a/common/console.h +++ /dev/null @@ -1,19 +0,0 @@ -// Console functions - -#pragma once - -#include - -namespace console { - enum display_t { - reset = 0, - prompt, - user_input, - error - }; - - void init(bool use_simple_io, bool use_advanced_display); - void cleanup(); - void set_display(display_t display); - bool readline(std::string & line, bool multiline_input); -} diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp deleted file mode 100644 index 3ebcc3d9f..000000000 --- a/common/json-schema-to-grammar.cpp +++ /dev/null @@ -1,1025 +0,0 @@ -#include "json-schema-to-grammar.h" -#include "common.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -using json = nlohmann::ordered_json; - -static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "") { - auto has_max = max_items != std::numeric_limits::max(); - - if (min_items == 0 && max_items == 1) { - return item_rule + "?"; - } - - if (separator_rule.empty()) { - if (min_items == 1 && !has_max) { - return item_rule + "+"; - } else if (min_items == 0 && !has_max) { - return item_rule + "*"; - } else { - return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}"; - } - } - - auto result = item_rule + " " + build_repetition("(" + separator_rule + " " + item_rule + ")", min_items == 0 ? 0 : min_items - 1, has_max ? max_items - 1 : max_items); - if (min_items == 0) { - result = "(" + result + ")?"; - } - return result; -} - -/* Minimalistic replacement for std::string_view, which is only available from C++17 onwards */ -class string_view { - const std::string & _str; - const size_t _start; - const size_t _end; -public: - string_view(const std::string & str, size_t start = 0, size_t end = std::string::npos) : _str(str), _start(start), _end(end == std::string::npos ? str.length() : end) {} - - size_t size() const { - return _end - _start; - } - - size_t length() const { - return size(); - } - - operator std::string() const { - return str(); - } - - std::string str() const { - return _str.substr(_start, _end - _start); - } - - string_view substr(size_t pos, size_t len = std::string::npos) const { - return string_view(_str, _start + pos, len == std::string::npos ? _end : _start + pos + len); - } - - char operator[](size_t pos) const { - auto index = _start + pos; - if (index >= _end) { - throw std::out_of_range("string_view index out of range"); - } - return _str[_start + pos]; - } - - bool operator==(const string_view & other) const { - std::string this_str = *this; - std::string other_str = other; - return this_str == other_str; - } -}; - -static void _build_min_max_int(int min_value, int max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) { - auto has_min = min_value != std::numeric_limits::min(); - auto has_max = max_value != std::numeric_limits::max(); - - auto digit_range = [&](char from, char to) { - out << "["; - if (from == to) { - out << from; - } else { - out << from << "-" << to; - } - out << "]"; - }; - auto more_digits = [&](int min_digits, int max_digits) { - out << "[0-9]"; - if (min_digits == max_digits && min_digits == 1) { - return; - } - out << "{"; - out << min_digits; - if (max_digits != min_digits) { - out << ","; - if (max_digits != std::numeric_limits::max()) { - out << max_digits; - } - } - out << "}"; - }; - std::function uniform_range = - [&](const string_view & from, const string_view & to) { - size_t i = 0; - while (i < from.length() && i < to.length() && from[i] == to[i]) { - i++; - } - if (i > 0) { - out << "\"" << from.substr(0, i).str() << "\""; - } - if (i < from.length() && i < to.length()) { - if (i > 0) { - out << " "; - } - auto sub_len = from.length() - i - 1; - if (sub_len > 0) { - auto from_sub = from.substr(i + 1); - auto to_sub = to.substr(i + 1); - auto sub_zeros = string_repeat("0", sub_len); - auto sub_nines = string_repeat("9", sub_len); - - auto to_reached = false; - out << "("; - if (from_sub == sub_zeros) { - digit_range(from[i], to[i] - 1); - out << " "; - more_digits(sub_len, sub_len); - } else { - out << "[" << from[i] << "] "; - out << "("; - uniform_range(from_sub, sub_nines); - out << ")"; - if (from[i] < to[i] - 1) { - out << " | "; - if (to_sub == sub_nines) { - digit_range(from[i] + 1, to[i]); - to_reached = true; - } else { - digit_range(from[i] + 1, to[i] - 1); - } - out << " "; - more_digits(sub_len, sub_len); - } - } - if (!to_reached) { - out << " | "; - digit_range(to[i], to[i]); - out << " "; - uniform_range(sub_zeros, to_sub); - } - out << ")"; - } else { - out << "[" << from[i] << "-" << to[i] << "]"; - } - } - }; - - if (has_min && has_max) { - if (min_value < 0 && max_value < 0) { - out << "\"-\" ("; - _build_min_max_int(-max_value, -min_value, out, decimals_left, /* top_level= */ true); - out << ")"; - return; - } - - if (min_value < 0) { - out << "\"-\" ("; - _build_min_max_int(0, -min_value, out, decimals_left, /* top_level= */ true); - out << ") | "; - min_value = 0; - } - - auto min_s = std::to_string(min_value); - auto max_s = std::to_string(max_value); - auto min_digits = min_s.length(); - auto max_digits = max_s.length(); - - for (auto digits = min_digits; digits < max_digits; digits++) { - uniform_range(min_s, string_repeat("9", digits)); - min_s = "1" + string_repeat("0", digits); - out << " | "; - } - uniform_range(min_s, max_s); - return; - } - - auto less_decimals = std::max(decimals_left - 1, 1); - - if (has_min) { - if (min_value < 0) { - out << "\"-\" ("; - _build_min_max_int(std::numeric_limits::min(), -min_value, out, decimals_left, /* top_level= */ false); - out << ") | [0] | [1-9] "; - more_digits(0, decimals_left - 1); - } else if (min_value == 0) { - if (top_level) { - out << "[0] | [1-9] "; - more_digits(0, less_decimals); - } else { - more_digits(1, decimals_left); - } - } else if (min_value <= 9) { - char c = '0' + min_value; - auto range_start = top_level ? '1' : '0'; - if (c > range_start) { - digit_range(range_start, c - 1); - out << " "; - more_digits(1, less_decimals); - out << " | "; - } - digit_range(c, '9'); - out << " "; - more_digits(0, less_decimals); - } else { - auto min_s = std::to_string(min_value); - auto len = min_s.length(); - auto c = min_s[0]; - - if (c > '1') { - digit_range(top_level ? '1' : '0', c - 1); - out << " "; - more_digits(len, less_decimals); - out << " | "; - } - digit_range(c, c); - out << " ("; - _build_min_max_int(std::stoi(min_s.substr(1)), std::numeric_limits::max(), out, less_decimals, /* top_level= */ false); - out << ")"; - if (c < '9') { - out << " | "; - digit_range(c + 1, '9'); - out << " "; - more_digits(len - 1, less_decimals); - } - } - return; - } - - if (has_max) { - if (max_value >= 0) { - if (top_level) { - out << "\"-\" [1-9] "; - more_digits(0, less_decimals); - out << " | "; - } - _build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true); - } else { - out << "\"-\" ("; - _build_min_max_int(-max_value, std::numeric_limits::max(), out, decimals_left, /* top_level= */ false); - out << ")"; - } - return; - } - - throw std::runtime_error("At least one of min_value or max_value must be set"); -} - -const std::string SPACE_RULE = "| \" \" | \"\\n\" [ \\t]{0,20}"; - -struct BuiltinRule { - std::string content; - std::vector deps; -}; - -std::unordered_map PRIMITIVE_RULES = { - {"boolean", {"(\"true\" | \"false\") space", {}}}, - {"decimal-part", {"[0-9]{1,16}", {}}}, - {"integral-part", {"[0] | [1-9] [0-9]{0,15}", {}}}, - {"number", {"(\"-\"? integral-part) (\".\" decimal-part)? ([eE] [-+]? integral-part)? space", {"integral-part", "decimal-part"}}}, - {"integer", {"(\"-\"? integral-part) space", {"integral-part"}}}, - {"value", {"object | array | string | number | boolean | null", {"object", "array", "string", "number", "boolean", "null"}}}, - {"object", {"\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space", {"string", "value"}}}, - {"array", {"\"[\" space ( value (\",\" space value)* )? \"]\" space", {"value"}}}, - {"uuid", {"\"\\\"\" [0-9a-fA-F]{8} \"-\" [0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{12} \"\\\"\" space", {}}}, - {"char", {"[^\"\\\\\\x7F\\x00-\\x1F] | [\\\\] ([\"\\\\bfnrt] | \"u\" [0-9a-fA-F]{4})", {}}}, - {"string", {"\"\\\"\" char* \"\\\"\" space", {"char"}}}, - {"null", {"\"null\" space", {}}}, -}; - -std::unordered_map STRING_FORMAT_RULES = { - {"date", {"[0-9]{4} \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}}, - {"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9]{3} )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}}, - {"date-time", {"date \"T\" time", {"date", "time"}}}, - {"date-string", {"\"\\\"\" date \"\\\"\" space", {"date"}}}, - {"time-string", {"\"\\\"\" time \"\\\"\" space", {"time"}}}, - {"date-time-string", {"\"\\\"\" date-time \"\\\"\" space", {"date-time"}}} -}; - -static bool is_reserved_name(const std::string & name) { - static std::unordered_set RESERVED_NAMES; - if (RESERVED_NAMES.empty()) { - RESERVED_NAMES.insert("root"); - for (const auto &p : PRIMITIVE_RULES) RESERVED_NAMES.insert(p.first); - for (const auto &p : STRING_FORMAT_RULES) RESERVED_NAMES.insert(p.first); - } - return RESERVED_NAMES.find(name) != RESERVED_NAMES.end(); -} - -std::regex INVALID_RULE_CHARS_RE("[^a-zA-Z0-9-]+"); -std::regex GRAMMAR_LITERAL_ESCAPE_RE("[\r\n\"]"); -std::regex GRAMMAR_RANGE_LITERAL_ESCAPE_RE("[\r\n\"\\]\\-\\\\]"); -std::unordered_map GRAMMAR_LITERAL_ESCAPES = { - {'\r', "\\r"}, {'\n', "\\n"}, {'"', "\\\""}, {'-', "\\-"}, {']', "\\]"} -}; - -std::unordered_set NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'}; -std::unordered_set ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'}; - -static std::string replacePattern(const std::string & input, const std::regex & regex, const std::function & replacement) { - std::smatch match; - std::string result; - - std::string::const_iterator searchStart(input.cbegin()); - std::string::const_iterator searchEnd(input.cend()); - - while (std::regex_search(searchStart, searchEnd, match, regex)) { - result.append(searchStart, searchStart + match.position()); - result.append(replacement(match)); - searchStart = match.suffix().first; - } - - result.append(searchStart, searchEnd); - - return result; -} - -static std::string format_literal(const std::string & literal) { - std::string escaped = replacePattern(literal, GRAMMAR_LITERAL_ESCAPE_RE, [&](const std::smatch & match) { - char c = match.str()[0]; - return GRAMMAR_LITERAL_ESCAPES.at(c); - }); - return "\"" + escaped + "\""; -} - -class SchemaConverter { -private: - friend std::string build_grammar(const std::function & cb, const common_grammar_options & options); - std::function _fetch_json; - bool _dotall; - std::map _rules; - std::unordered_map _refs; - std::unordered_set _refs_being_resolved; - std::vector _errors; - std::vector _warnings; - - std::string _add_rule(const std::string & name, const std::string & rule) { - std::string esc_name = regex_replace(name, INVALID_RULE_CHARS_RE, "-"); - if (_rules.find(esc_name) == _rules.end() || _rules[esc_name] == rule) { - _rules[esc_name] = rule; - return esc_name; - } else { - int i = 0; - while (_rules.find(esc_name + std::to_string(i)) != _rules.end() && _rules[esc_name + std::to_string(i)] != rule) { - i++; - } - std::string key = esc_name + std::to_string(i); - _rules[key] = rule; - return key; - } - } - - std::string _generate_union_rule(const std::string & name, const std::vector & alt_schemas) { - std::vector rules; - for (size_t i = 0; i < alt_schemas.size(); i++) { - rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i))); - } - return string_join(rules, " | "); - } - - std::string _visit_pattern(const std::string & pattern, const std::string & name) { - if (!(pattern.front() == '^' && pattern.back() == '$')) { - _errors.push_back("Pattern must start with '^' and end with '$'"); - return ""; - } - std::string sub_pattern = pattern.substr(1, pattern.length() - 2); - std::unordered_map sub_rule_ids; - - size_t i = 0; - size_t length = sub_pattern.length(); - - using literal_or_rule = std::pair; - auto to_rule = [&](const literal_or_rule & ls) { - auto is_literal = ls.second; - auto s = ls.first; - return is_literal ? "\"" + s + "\"" : s; - }; - std::function transform = [&]() -> literal_or_rule { - size_t start = i; - std::vector seq; - - auto get_dot = [&]() { - std::string rule; - if (_dotall) { - rule = "[\\U00000000-\\U0010FFFF]"; - } else { - rule = "[^\\x0A\\x0D]"; - } - return _add_rule("dot", rule); - }; - - // Joins the sequence, merging consecutive literals together. - auto join_seq = [&]() { - std::vector ret; - - std::string literal; - auto flush_literal = [&]() { - if (literal.empty()) { - return false; - } - ret.emplace_back(literal, true); - literal.clear(); - return true; - }; - - for (const auto & item : seq) { - auto is_literal = item.second; - if (is_literal) { - literal += item.first; - } else { - flush_literal(); - ret.push_back(item); - } - } - flush_literal(); - - std::vector results; - for (const auto & item : ret) { - results.push_back(to_rule(item)); - } - return std::make_pair(string_join(results, " "), false); - }; - - while (i < length) { - char c = sub_pattern[i]; - if (c == '.') { - seq.emplace_back(get_dot(), false); - i++; - } else if (c == '(') { - i++; - if (i < length) { - if (sub_pattern[i] == '?') { - _warnings.push_back("Unsupported pattern syntax"); - } - } - seq.emplace_back("(" + to_rule(transform()) + ")", false); - } else if (c == ')') { - i++; - if (start > 0 && sub_pattern[start - 1] != '(') { - _errors.push_back("Unbalanced parentheses"); - } - return join_seq(); - } else if (c == '[') { - std::string square_brackets = std::string(1, c); - i++; - while (i < length && sub_pattern[i] != ']') { - if (sub_pattern[i] == '\\') { - square_brackets += sub_pattern.substr(i, 2); - i += 2; - } else { - square_brackets += sub_pattern[i]; - i++; - } - } - if (i >= length) { - _errors.push_back("Unbalanced square brackets"); - } - square_brackets += ']'; - i++; - seq.emplace_back(square_brackets, false); - } else if (c == '|') { - seq.emplace_back("|", false); - i++; - } else if (c == '*' || c == '+' || c == '?') { - seq.back() = std::make_pair(to_rule(seq.back()) + c, false); - i++; - } else if (c == '{') { - std::string curly_brackets = std::string(1, c); - i++; - while (i < length && sub_pattern[i] != '}') { - curly_brackets += sub_pattern[i]; - i++; - } - if (i >= length) { - _errors.push_back("Unbalanced curly brackets"); - } - curly_brackets += '}'; - i++; - auto nums = string_split(curly_brackets.substr(1, curly_brackets.length() - 2), ","); - int min_times = 0; - int max_times = std::numeric_limits::max(); - try { - if (nums.size() == 1) { - min_times = max_times = std::stoi(nums[0]); - } else if (nums.size() != 2) { - _errors.push_back("Wrong number of values in curly brackets"); - } else { - if (!nums[0].empty()) { - min_times = std::stoi(nums[0]); - } - if (!nums[1].empty()) { - max_times = std::stoi(nums[1]); - } - } - } catch (const std::invalid_argument & e) { - _errors.push_back("Invalid number in curly brackets"); - return std::make_pair("", false); - } - auto &last = seq.back(); - auto &sub = last.first; - auto sub_is_literal = last.second; - - if (!sub_is_literal) { - std::string & sub_id = sub_rule_ids[sub]; - if (sub_id.empty()) { - sub_id = _add_rule(name + "-" + std::to_string(sub_rule_ids.size()), sub); - } - sub = sub_id; - } - seq.back().first = build_repetition( - sub_is_literal ? "\"" + sub + "\"" : sub, - min_times, - max_times, - "" - ); - seq.back().second = false; - } else { - std::string literal; - auto is_non_literal = [&](char c) { - return NON_LITERAL_SET.find(c) != NON_LITERAL_SET.end(); - }; - while (i < length) { - if (sub_pattern[i] == '\\' && i < length - 1) { - char next = sub_pattern[i + 1]; - if (ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.find(next) != ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.end()) { - i++; - literal += sub_pattern[i]; - i++; - } else { - literal += sub_pattern.substr(i, 2); - i += 2; - } - } else if (sub_pattern[i] == '"') { - literal += "\\\""; - i++; - } else if (!is_non_literal(sub_pattern[i]) && - (i == length - 1 || literal.empty() || sub_pattern[i + 1] == '.' || !is_non_literal(sub_pattern[i + 1]))) { - literal += sub_pattern[i]; - i++; - } else { - break; - } - } - if (!literal.empty()) { - seq.emplace_back(literal, true); - } - } - } - return join_seq(); - }; - return _add_rule(name, "\"\\\"\" (" + to_rule(transform()) + ") \"\\\"\" space"); - } - - /* - Returns a rule that matches a JSON string that is none of the provided strings - - not_strings({"a"}) - -> ["] ( [a] char+ | [^"a] char* )? ["] space - not_strings({"and", "also"}) - -> ["] ( [a] ([l] ([s] ([o] char+ | [^"o] char*) | [^"s] char*) | [n] ([d] char+ | [^"d] char*) | [^"ln] char*) | [^"a] char* )? ["] space - */ - std::string _not_strings(const std::vector & strings) { - - struct TrieNode { - std::map children; - bool is_end_of_string; - - TrieNode() : is_end_of_string(false) {} - - void insert(const std::string & string) { - auto node = this; - for (char c : string) { - node = &node->children[c]; - } - node->is_end_of_string = true; - } - }; - - TrieNode trie; - for (const auto & s : strings) { - trie.insert(s); - } - - std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char")); - std::ostringstream out; - out << "[\"] ( "; - std::function visit = [&](const TrieNode & node) { - std::ostringstream rejects; - auto first = true; - for (const auto & kv : node.children) { - rejects << kv.first; - if (first) { - first = false; - } else { - out << " | "; - } - out << "[" << kv.first << "]"; - if (!kv.second.children.empty()) { - out << " ("; - visit(kv.second); - out << ")"; - } else if (kv.second.is_end_of_string) { - out << " " << char_rule << "+"; - } - } - if (!node.children.empty()) { - if (!first) { - out << " | "; - } - out << "[^\"" << rejects.str() << "] " << char_rule << "*"; - } - }; - visit(trie); - - out << " )"; - if (!trie.is_end_of_string) { - out << "?"; - } - out << " [\"] space"; - return out.str(); - } - - std::string _resolve_ref(const std::string & ref) { - std::string ref_name = ref.substr(ref.find_last_of('/') + 1); - if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) { - _refs_being_resolved.insert(ref); - json resolved = _refs[ref]; - ref_name = visit(resolved, ref_name); - _refs_being_resolved.erase(ref); - } - return ref_name; - } - - std::string _build_object_rule( - const std::vector> & properties, - const std::unordered_set & required, - const std::string & name, - const json & additional_properties) - { - std::vector required_props; - std::vector optional_props; - std::unordered_map prop_kv_rule_names; - std::vector prop_names; - for (const auto & kv : properties) { - const auto &prop_name = kv.first; - const auto &prop_schema = kv.second; - - std::string prop_rule_name = visit(prop_schema, name + (name.empty() ? "" : "-") + prop_name); - prop_kv_rule_names[prop_name] = _add_rule( - name + (name.empty() ? "" : "-") + prop_name + "-kv", - format_literal(json(prop_name).dump()) + " space \":\" space " + prop_rule_name - ); - if (required.find(prop_name) != required.end()) { - required_props.push_back(prop_name); - } else { - optional_props.push_back(prop_name); - } - prop_names.push_back(prop_name); - } - if ((additional_properties.is_boolean() && additional_properties.get()) || additional_properties.is_object()) { - std::string sub_name = name + (name.empty() ? "" : "-") + "additional"; - std::string value_rule = - additional_properties.is_object() ? visit(additional_properties, sub_name + "-value") - : _add_primitive("value", PRIMITIVE_RULES.at("value")); - - auto key_rule = - prop_names.empty() ? _add_primitive("string", PRIMITIVE_RULES.at("string")) - : _add_rule(sub_name + "-k", _not_strings(prop_names)); - std::string kv_rule = _add_rule(sub_name + "-kv", key_rule + " \":\" space " + value_rule); - prop_kv_rule_names["*"] = kv_rule; - optional_props.push_back("*"); - } - - std::string rule = "\"{\" space "; - for (size_t i = 0; i < required_props.size(); i++) { - if (i > 0) { - rule += " \",\" space "; - } - rule += prop_kv_rule_names[required_props[i]]; - } - - if (!optional_props.empty()) { - rule += " ("; - if (!required_props.empty()) { - rule += " \",\" space ( "; - } - - std::function &, bool)> get_recursive_refs = [&](const std::vector & ks, bool first_is_optional) { - std::string res; - if (ks.empty()) { - return res; - } - std::string k = ks[0]; - std::string kv_rule_name = prop_kv_rule_names[k]; - std::string comma_ref = "( \",\" space " + kv_rule_name + " )"; - if (first_is_optional) { - res = comma_ref + (k == "*" ? "*" : "?"); - } else { - res = kv_rule_name + (k == "*" ? " " + comma_ref + "*" : ""); - } - if (ks.size() > 1) { - res += " " + _add_rule( - name + (name.empty() ? "" : "-") + k + "-rest", - get_recursive_refs(std::vector(ks.begin() + 1, ks.end()), true) - ); - } - return res; - }; - - for (size_t i = 0; i < optional_props.size(); i++) { - if (i > 0) { - rule += " | "; - } - rule += get_recursive_refs(std::vector(optional_props.begin() + i, optional_props.end()), false); - } - if (!required_props.empty()) { - rule += " )"; - } - rule += " )?"; - } - - rule += " \"}\" space"; - - return rule; - } - - std::string _add_primitive(const std::string & name, const BuiltinRule & rule) { - auto n = _add_rule(name, rule.content); - for (const auto & dep : rule.deps) { - BuiltinRule dep_rule; - auto it = PRIMITIVE_RULES.find(dep); - if (it == PRIMITIVE_RULES.end()) { - it = STRING_FORMAT_RULES.find(dep); - if (it == STRING_FORMAT_RULES.end()) { - _errors.push_back("Rule " + dep + " not known"); - continue; - } - } - if (_rules.find(dep) == _rules.end()) { - _add_primitive(dep, it->second); - } - } - return n; - } - -public: - SchemaConverter( - const std::function & fetch_json, - bool dotall, - bool compact_spaces) - : _fetch_json(fetch_json), _dotall(dotall) - { - _rules["space"] = compact_spaces ? "\" \"?" : SPACE_RULE; - } - - void resolve_refs(json & schema, const std::string & url) { - /* - * Resolves all $ref fields in the given schema, fetching any remote schemas, - * replacing each $ref with absolute reference URL and populates _refs with the - * respective referenced (sub)schema dictionaries. - */ - std::function visit_refs = [&](json & n) { - if (n.is_array()) { - for (auto & x : n) { - visit_refs(x); - } - } else if (n.is_object()) { - if (n.contains("$ref")) { - std::string ref = n["$ref"]; - if (_refs.find(ref) == _refs.end()) { - json target; - if (ref.find("https://") == 0) { - std::string base_url = ref.substr(0, ref.find('#')); - auto it = _refs.find(base_url); - if (it != _refs.end()) { - target = it->second; - } else { - // Fetch the referenced schema and resolve its refs - auto referenced = _fetch_json(ref); - resolve_refs(referenced, base_url); - _refs[base_url] = referenced; - } - if (ref.find('#') == std::string::npos || ref.substr(ref.find('#') + 1).empty()) { - return; - } - } else if (ref.find("#/") == 0) { - target = schema; - n["$ref"] = url + ref; - ref = url + ref; - } else { - _errors.push_back("Unsupported ref: " + ref); - return; - } - std::string pointer = ref.substr(ref.find('#') + 1); - std::vector tokens = string_split(pointer, "/"); - for (size_t i = 1; i < tokens.size(); ++i) { - std::string sel = tokens[i]; - if (target.is_null() || !target.contains(sel)) { - _errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump()); - return; - } - target = target[sel]; - } - _refs[ref] = target; - } - } else { - for (auto & kv : n.items()) { - visit_refs(kv.value()); - } - } - } - }; - - visit_refs(schema); - } - - std::string _generate_constant_rule(const json & value) { - return format_literal(value.dump()); - } - - std::string visit(const json & schema, const std::string & name) { - json schema_type = schema.contains("type") ? schema["type"] : json(); - std::string schema_format = schema.contains("format") ? schema["format"].get() : ""; - std::string rule_name = is_reserved_name(name) ? name + "-" : name.empty() ? "root" : name; - - if (schema.contains("$ref")) { - return _add_rule(rule_name, _resolve_ref(schema["$ref"])); - } else if (schema.contains("oneOf") || schema.contains("anyOf")) { - std::vector alt_schemas = schema.contains("oneOf") ? schema["oneOf"].get>() : schema["anyOf"].get>(); - return _add_rule(rule_name, _generate_union_rule(name, alt_schemas)); - } else if (schema_type.is_array()) { - std::vector schema_types; - for (const auto & t : schema_type) { - json schema_copy(schema); - schema_copy["type"] = t; - schema_types.push_back(schema_copy); - } - return _add_rule(rule_name, _generate_union_rule(name, schema_types)); - } else if (schema.contains("const")) { - return _add_rule(rule_name, _generate_constant_rule(schema["const"]) + " space"); - } else if (schema.contains("enum")) { - std::vector enum_values; - for (const auto & v : schema["enum"]) { - enum_values.push_back(_generate_constant_rule(v)); - } - return _add_rule(rule_name, "(" + string_join(enum_values, " | ") + ") space"); - } else if ((schema_type.is_null() || schema_type == "object") - && (schema.contains("properties") || - (schema.contains("additionalProperties") && schema["additionalProperties"] != true))) { - std::unordered_set required; - if (schema.contains("required") && schema["required"].is_array()) { - for (const auto & item : schema["required"]) { - if (item.is_string()) { - required.insert(item.get()); - } - } - } - std::vector> properties; - if (schema.contains("properties")) { - for (const auto & prop : schema["properties"].items()) { - properties.emplace_back(prop.key(), prop.value()); - } - } - return _add_rule(rule_name, - _build_object_rule( - properties, required, name, - schema.contains("additionalProperties") ? schema["additionalProperties"] : json())); - } else if ((schema_type.is_null() || schema_type == "object") && schema.contains("allOf")) { - std::unordered_set required; - std::vector> properties; - std::string hybrid_name = name; - std::function add_component = [&](const json & comp_schema, bool is_required) { - if (comp_schema.contains("$ref")) { - add_component(_refs[comp_schema["$ref"]], is_required); - } else if (comp_schema.contains("properties")) { - for (const auto & prop : comp_schema["properties"].items()) { - properties.emplace_back(prop.key(), prop.value()); - if (is_required) { - required.insert(prop.key()); - } - } - } else { - // todo warning - } - }; - for (auto & t : schema["allOf"]) { - if (t.contains("anyOf")) { - for (auto & tt : t["anyOf"]) { - add_component(tt, false); - } - } else { - add_component(t, true); - } - } - return _add_rule(rule_name, _build_object_rule(properties, required, hybrid_name, json())); - } else if ((schema_type.is_null() || schema_type == "array") && (schema.contains("items") || schema.contains("prefixItems"))) { - json items = schema.contains("items") ? schema["items"] : schema["prefixItems"]; - if (items.is_array()) { - std::string rule = "\"[\" space "; - for (size_t i = 0; i < items.size(); i++) { - if (i > 0) { - rule += " \",\" space "; - } - rule += visit(items[i], name + (name.empty() ? "" : "-") + "tuple-" + std::to_string(i)); - } - rule += " \"]\" space"; - return _add_rule(rule_name, rule); - } else { - std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item"); - int min_items = schema.contains("minItems") ? schema["minItems"].get() : 0; - json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json(); - int max_items = max_items_json.is_number_integer() ? max_items_json.get() : std::numeric_limits::max(); - - return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space"); - } - } else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) { - return _visit_pattern(schema["pattern"], rule_name); - } else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) { - return _add_primitive(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid")); - } else if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) { - auto prim_name = schema_format + "-string"; - return _add_rule(rule_name, _add_primitive(prim_name, STRING_FORMAT_RULES.at(prim_name))); - } else if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) { - std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char")); - int min_len = schema.contains("minLength") ? schema["minLength"].get() : 0; - int max_len = schema.contains("maxLength") ? schema["maxLength"].get() : std::numeric_limits::max(); - return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space"); - } else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) { - int min_value = std::numeric_limits::min(); - int max_value = std::numeric_limits::max(); - if (schema.contains("minimum")) { - min_value = schema["minimum"].get(); - } else if (schema.contains("exclusiveMinimum")) { - min_value = schema["exclusiveMinimum"].get() + 1; - } - if (schema.contains("maximum")) { - max_value = schema["maximum"].get(); - } else if (schema.contains("exclusiveMaximum")) { - max_value = schema["exclusiveMaximum"].get() - 1; - } - std::stringstream out; - out << "("; - _build_min_max_int(min_value, max_value, out); - out << ") space"; - return _add_rule(rule_name, out.str()); - } else if (schema.empty() || schema_type == "object") { - return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object"))); - } else { - if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get()) == PRIMITIVE_RULES.end()) { - _errors.push_back("Unrecognized schema: " + schema.dump()); - return ""; - } - // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero - return _add_primitive(rule_name == "root" ? "root" : schema_type.get(), PRIMITIVE_RULES.at(schema_type.get())); - } - } - - void check_errors() { - if (!_errors.empty()) { - throw std::runtime_error("JSON schema conversion failed:\n" + string_join(_errors, "\n")); - } - if (!_warnings.empty()) { - fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", string_join(_warnings, "; ").c_str()); - } - } - - std::string format_grammar() { - std::stringstream ss; - for (const auto & kv : _rules) { - ss << kv.first << " ::= " << kv.second << std::endl; - } - return ss.str(); - } -}; - -std::string json_schema_to_grammar(const json & schema, bool force_gbnf) { -#ifdef LLAMA_USE_LLGUIDANCE - if (!force_gbnf) { - return "%llguidance {}\nstart: %json " + schema.dump(); - } -#else - (void)force_gbnf; -#endif // LLAMA_USE_LLGUIDANCE - return build_grammar([&](const common_grammar_builder & callbacks) { - auto copy = schema; - callbacks.resolve_refs(copy); - callbacks.add_schema("", copy); - }); -} - -std::string build_grammar(const std::function & cb, const common_grammar_options & options) { - SchemaConverter converter([&](const std::string &) { return json(); }, options.dotall, options.compact_spaces); - common_grammar_builder builder { - /* .add_rule = */ [&](const std::string & name, const std::string & rule) { - return converter._add_rule(name, rule); - }, - /* .add_schema = */ [&](const std::string & name, const nlohmann::ordered_json & schema) { - return converter.visit(schema, name == "root" ? "" : name); - }, - /* .resolve_refs = */ [&](nlohmann::ordered_json & schema) { - converter.resolve_refs(schema, ""); - } - }; - cb(builder); - converter.check_errors(); - return converter.format_grammar(); -} diff --git a/common/json-schema-to-grammar.h b/common/json-schema-to-grammar.h deleted file mode 100644 index 62a3b0a44..000000000 --- a/common/json-schema-to-grammar.h +++ /dev/null @@ -1,22 +0,0 @@ -#pragma once - -#include "ggml.h" -// Change JSON_ASSERT from assert() to GGML_ASSERT: -#define JSON_ASSERT GGML_ASSERT -#include "json.hpp" - -std::string json_schema_to_grammar(const nlohmann::ordered_json & schema, - bool force_gbnf = false); - -struct common_grammar_builder { - std::function add_rule; - std::function add_schema; - std::function resolve_refs; -}; - -struct common_grammar_options { - bool dotall = false; - bool compact_spaces = false; -}; - -std::string build_grammar(const std::function & cb, const common_grammar_options & options = {}); diff --git a/common/json.hpp b/common/json.hpp deleted file mode 100644 index a858728c4..000000000 --- a/common/json.hpp +++ /dev/null @@ -1,24766 +0,0 @@ -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - -/****************************************************************************\ - * Note on documentation: The source files contain links to the online * - * documentation of the public API at https://json.nlohmann.me. This URL * - * contains the most recent documentation and should also be applicable to * - * previous versions; documentation for deprecated functions is not * - * removed, but marked deprecated. See "Generate documentation" section in * - * file docs/README.md. * -\****************************************************************************/ - -#ifndef INCLUDE_NLOHMANN_JSON_HPP_ -#define INCLUDE_NLOHMANN_JSON_HPP_ - -#include // all_of, find, for_each -#include // nullptr_t, ptrdiff_t, size_t -#include // hash, less -#include // initializer_list -#ifndef JSON_NO_IO - #include // istream, ostream -#endif // JSON_NO_IO -#include // random_access_iterator_tag -#include // unique_ptr -#include // string, stoi, to_string -#include // declval, forward, move, pair, swap -#include // vector - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -#include - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -// This file contains all macro definitions affecting or depending on the ABI - -#ifndef JSON_SKIP_LIBRARY_VERSION_CHECK - #if defined(NLOHMANN_JSON_VERSION_MAJOR) && defined(NLOHMANN_JSON_VERSION_MINOR) && defined(NLOHMANN_JSON_VERSION_PATCH) - #if NLOHMANN_JSON_VERSION_MAJOR != 3 || NLOHMANN_JSON_VERSION_MINOR != 11 || NLOHMANN_JSON_VERSION_PATCH != 3 - #warning "Already included a different version of the library!" - #endif - #endif -#endif - -#define NLOHMANN_JSON_VERSION_MAJOR 3 // NOLINT(modernize-macro-to-enum) -#define NLOHMANN_JSON_VERSION_MINOR 11 // NOLINT(modernize-macro-to-enum) -#define NLOHMANN_JSON_VERSION_PATCH 3 // NOLINT(modernize-macro-to-enum) - -#ifndef JSON_DIAGNOSTICS - #define JSON_DIAGNOSTICS 0 -#endif - -#ifndef JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON - #define JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON 0 -#endif - -#if JSON_DIAGNOSTICS - #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS _diag -#else - #define NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS -#endif - -#if JSON_USE_LEGACY_DISCARDED_VALUE_COMPARISON - #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON _ldvcmp -#else - #define NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON -#endif - -#ifndef NLOHMANN_JSON_NAMESPACE_NO_VERSION - #define NLOHMANN_JSON_NAMESPACE_NO_VERSION 0 -#endif - -// Construct the namespace ABI tags component -#define NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) json_abi ## a ## b -#define NLOHMANN_JSON_ABI_TAGS_CONCAT(a, b) \ - NLOHMANN_JSON_ABI_TAGS_CONCAT_EX(a, b) - -#define NLOHMANN_JSON_ABI_TAGS \ - NLOHMANN_JSON_ABI_TAGS_CONCAT( \ - NLOHMANN_JSON_ABI_TAG_DIAGNOSTICS, \ - NLOHMANN_JSON_ABI_TAG_LEGACY_DISCARDED_VALUE_COMPARISON) - -// Construct the namespace version component -#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) \ - _v ## major ## _ ## minor ## _ ## patch -#define NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(major, minor, patch) \ - NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT_EX(major, minor, patch) - -#if NLOHMANN_JSON_NAMESPACE_NO_VERSION -#define NLOHMANN_JSON_NAMESPACE_VERSION -#else -#define NLOHMANN_JSON_NAMESPACE_VERSION \ - NLOHMANN_JSON_NAMESPACE_VERSION_CONCAT(NLOHMANN_JSON_VERSION_MAJOR, \ - NLOHMANN_JSON_VERSION_MINOR, \ - NLOHMANN_JSON_VERSION_PATCH) -#endif - -// Combine namespace components -#define NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) a ## b -#define NLOHMANN_JSON_NAMESPACE_CONCAT(a, b) \ - NLOHMANN_JSON_NAMESPACE_CONCAT_EX(a, b) - -#ifndef NLOHMANN_JSON_NAMESPACE -#define NLOHMANN_JSON_NAMESPACE \ - nlohmann::NLOHMANN_JSON_NAMESPACE_CONCAT( \ - NLOHMANN_JSON_ABI_TAGS, \ - NLOHMANN_JSON_NAMESPACE_VERSION) -#endif - -#ifndef NLOHMANN_JSON_NAMESPACE_BEGIN -#define NLOHMANN_JSON_NAMESPACE_BEGIN \ - namespace nlohmann \ - { \ - inline namespace NLOHMANN_JSON_NAMESPACE_CONCAT( \ - NLOHMANN_JSON_ABI_TAGS, \ - NLOHMANN_JSON_NAMESPACE_VERSION) \ - { -#endif - -#ifndef NLOHMANN_JSON_NAMESPACE_END -#define NLOHMANN_JSON_NAMESPACE_END \ - } /* namespace (inline namespace) NOLINT(readability/namespace) */ \ - } // namespace nlohmann -#endif - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -#include // transform -#include // array -#include // forward_list -#include // inserter, front_inserter, end -#include // map -#include // string -#include // tuple, make_tuple -#include // is_arithmetic, is_same, is_enum, underlying_type, is_convertible -#include // unordered_map -#include // pair, declval -#include // valarray - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -#include // nullptr_t -#include // exception -#if JSON_DIAGNOSTICS - #include // accumulate -#endif -#include // runtime_error -#include // to_string -#include // vector - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -#include // array -#include // size_t -#include // uint8_t -#include // string - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -#include // declval, pair -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -#include - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -// #include - - -NLOHMANN_JSON_NAMESPACE_BEGIN -namespace detail -{ - -template struct make_void -{ - using type = void; -}; -template using void_t = typename make_void::type; - -} // namespace detail -NLOHMANN_JSON_NAMESPACE_END - - -NLOHMANN_JSON_NAMESPACE_BEGIN -namespace detail -{ - -// https://en.cppreference.com/w/cpp/experimental/is_detected -struct nonesuch -{ - nonesuch() = delete; - ~nonesuch() = delete; - nonesuch(nonesuch const&) = delete; - nonesuch(nonesuch const&&) = delete; - void operator=(nonesuch const&) = delete; - void operator=(nonesuch&&) = delete; -}; - -template class Op, - class... Args> -struct detector -{ - using value_t = std::false_type; - using type = Default; -}; - -template class Op, class... Args> -struct detector>, Op, Args...> -{ - using value_t = std::true_type; - using type = Op; -}; - -template class Op, class... Args> -using is_detected = typename detector::value_t; - -template class Op, class... Args> -struct is_detected_lazy : is_detected { }; - -template class Op, class... Args> -using detected_t = typename detector::type; - -template class Op, class... Args> -using detected_or = detector; - -template class Op, class... Args> -using detected_or_t = typename detected_or::type; - -template class Op, class... Args> -using is_detected_exact = std::is_same>; - -template class Op, class... Args> -using is_detected_convertible = - std::is_convertible, To>; - -} // namespace detail -NLOHMANN_JSON_NAMESPACE_END - -// #include - - -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-FileCopyrightText: 2016-2021 Evan Nemerson -// SPDX-License-Identifier: MIT - -/* Hedley - https://nemequ.github.io/hedley - * Created by Evan Nemerson - */ - -#if !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < 15) -#if defined(JSON_HEDLEY_VERSION) - #undef JSON_HEDLEY_VERSION -#endif -#define JSON_HEDLEY_VERSION 15 - -#if defined(JSON_HEDLEY_STRINGIFY_EX) - #undef JSON_HEDLEY_STRINGIFY_EX -#endif -#define JSON_HEDLEY_STRINGIFY_EX(x) #x - -#if defined(JSON_HEDLEY_STRINGIFY) - #undef JSON_HEDLEY_STRINGIFY -#endif -#define JSON_HEDLEY_STRINGIFY(x) JSON_HEDLEY_STRINGIFY_EX(x) - -#if defined(JSON_HEDLEY_CONCAT_EX) - #undef JSON_HEDLEY_CONCAT_EX -#endif -#define JSON_HEDLEY_CONCAT_EX(a,b) a##b - -#if defined(JSON_HEDLEY_CONCAT) - #undef JSON_HEDLEY_CONCAT -#endif -#define JSON_HEDLEY_CONCAT(a,b) JSON_HEDLEY_CONCAT_EX(a,b) - -#if defined(JSON_HEDLEY_CONCAT3_EX) - #undef JSON_HEDLEY_CONCAT3_EX -#endif -#define JSON_HEDLEY_CONCAT3_EX(a,b,c) a##b##c - -#if defined(JSON_HEDLEY_CONCAT3) - #undef JSON_HEDLEY_CONCAT3 -#endif -#define JSON_HEDLEY_CONCAT3(a,b,c) JSON_HEDLEY_CONCAT3_EX(a,b,c) - -#if defined(JSON_HEDLEY_VERSION_ENCODE) - #undef JSON_HEDLEY_VERSION_ENCODE -#endif -#define JSON_HEDLEY_VERSION_ENCODE(major,minor,revision) (((major) * 1000000) + ((minor) * 1000) + (revision)) - -#if defined(JSON_HEDLEY_VERSION_DECODE_MAJOR) - #undef JSON_HEDLEY_VERSION_DECODE_MAJOR -#endif -#define JSON_HEDLEY_VERSION_DECODE_MAJOR(version) ((version) / 1000000) - -#if defined(JSON_HEDLEY_VERSION_DECODE_MINOR) - #undef JSON_HEDLEY_VERSION_DECODE_MINOR -#endif -#define JSON_HEDLEY_VERSION_DECODE_MINOR(version) (((version) % 1000000) / 1000) - -#if defined(JSON_HEDLEY_VERSION_DECODE_REVISION) - #undef JSON_HEDLEY_VERSION_DECODE_REVISION -#endif -#define JSON_HEDLEY_VERSION_DECODE_REVISION(version) ((version) % 1000) - -#if defined(JSON_HEDLEY_GNUC_VERSION) - #undef JSON_HEDLEY_GNUC_VERSION -#endif -#if defined(__GNUC__) && defined(__GNUC_PATCHLEVEL__) - #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) -#elif defined(__GNUC__) - #define JSON_HEDLEY_GNUC_VERSION JSON_HEDLEY_VERSION_ENCODE(__GNUC__, __GNUC_MINOR__, 0) -#endif - -#if defined(JSON_HEDLEY_GNUC_VERSION_CHECK) - #undef JSON_HEDLEY_GNUC_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_GNUC_VERSION) - #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GNUC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_MSVC_VERSION) - #undef JSON_HEDLEY_MSVC_VERSION -#endif -#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 140000000) && !defined(__ICL) - #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 10000000, (_MSC_FULL_VER % 10000000) / 100000, (_MSC_FULL_VER % 100000) / 100) -#elif defined(_MSC_FULL_VER) && !defined(__ICL) - #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_FULL_VER / 1000000, (_MSC_FULL_VER % 1000000) / 10000, (_MSC_FULL_VER % 10000) / 10) -#elif defined(_MSC_VER) && !defined(__ICL) - #define JSON_HEDLEY_MSVC_VERSION JSON_HEDLEY_VERSION_ENCODE(_MSC_VER / 100, _MSC_VER % 100, 0) -#endif - -#if defined(JSON_HEDLEY_MSVC_VERSION_CHECK) - #undef JSON_HEDLEY_MSVC_VERSION_CHECK -#endif -#if !defined(JSON_HEDLEY_MSVC_VERSION) - #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (0) -#elif defined(_MSC_VER) && (_MSC_VER >= 1400) - #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 10000000) + (minor * 100000) + (patch))) -#elif defined(_MSC_VER) && (_MSC_VER >= 1200) - #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_FULL_VER >= ((major * 1000000) + (minor * 10000) + (patch))) -#else - #define JSON_HEDLEY_MSVC_VERSION_CHECK(major,minor,patch) (_MSC_VER >= ((major * 100) + (minor))) -#endif - -#if defined(JSON_HEDLEY_INTEL_VERSION) - #undef JSON_HEDLEY_INTEL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && !defined(__ICL) - #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, __INTEL_COMPILER_UPDATE) -#elif defined(__INTEL_COMPILER) && !defined(__ICL) - #define JSON_HEDLEY_INTEL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER / 100, __INTEL_COMPILER % 100, 0) -#endif - -#if defined(JSON_HEDLEY_INTEL_VERSION_CHECK) - #undef JSON_HEDLEY_INTEL_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_INTEL_VERSION) - #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_INTEL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_INTEL_CL_VERSION) - #undef JSON_HEDLEY_INTEL_CL_VERSION -#endif -#if defined(__INTEL_COMPILER) && defined(__INTEL_COMPILER_UPDATE) && defined(__ICL) - #define JSON_HEDLEY_INTEL_CL_VERSION JSON_HEDLEY_VERSION_ENCODE(__INTEL_COMPILER, __INTEL_COMPILER_UPDATE, 0) -#endif - -#if defined(JSON_HEDLEY_INTEL_CL_VERSION_CHECK) - #undef JSON_HEDLEY_INTEL_CL_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_INTEL_CL_VERSION) - #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_INTEL_CL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_INTEL_CL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_PGI_VERSION) - #undef JSON_HEDLEY_PGI_VERSION -#endif -#if defined(__PGI) && defined(__PGIC__) && defined(__PGIC_MINOR__) && defined(__PGIC_PATCHLEVEL__) - #define JSON_HEDLEY_PGI_VERSION JSON_HEDLEY_VERSION_ENCODE(__PGIC__, __PGIC_MINOR__, __PGIC_PATCHLEVEL__) -#endif - -#if defined(JSON_HEDLEY_PGI_VERSION_CHECK) - #undef JSON_HEDLEY_PGI_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_PGI_VERSION) - #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PGI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_PGI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_SUNPRO_VERSION) - #undef JSON_HEDLEY_SUNPRO_VERSION -#endif -#if defined(__SUNPRO_C) && (__SUNPRO_C > 0x1000) - #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_C >> 16) & 0xf) * 10) + ((__SUNPRO_C >> 12) & 0xf), (((__SUNPRO_C >> 8) & 0xf) * 10) + ((__SUNPRO_C >> 4) & 0xf), (__SUNPRO_C & 0xf) * 10) -#elif defined(__SUNPRO_C) - #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_C >> 8) & 0xf, (__SUNPRO_C >> 4) & 0xf, (__SUNPRO_C) & 0xf) -#elif defined(__SUNPRO_CC) && (__SUNPRO_CC > 0x1000) - #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((((__SUNPRO_CC >> 16) & 0xf) * 10) + ((__SUNPRO_CC >> 12) & 0xf), (((__SUNPRO_CC >> 8) & 0xf) * 10) + ((__SUNPRO_CC >> 4) & 0xf), (__SUNPRO_CC & 0xf) * 10) -#elif defined(__SUNPRO_CC) - #define JSON_HEDLEY_SUNPRO_VERSION JSON_HEDLEY_VERSION_ENCODE((__SUNPRO_CC >> 8) & 0xf, (__SUNPRO_CC >> 4) & 0xf, (__SUNPRO_CC) & 0xf) -#endif - -#if defined(JSON_HEDLEY_SUNPRO_VERSION_CHECK) - #undef JSON_HEDLEY_SUNPRO_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_SUNPRO_VERSION) - #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_SUNPRO_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_SUNPRO_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION) - #undef JSON_HEDLEY_EMSCRIPTEN_VERSION -#endif -#if defined(__EMSCRIPTEN__) - #define JSON_HEDLEY_EMSCRIPTEN_VERSION JSON_HEDLEY_VERSION_ENCODE(__EMSCRIPTEN_major__, __EMSCRIPTEN_minor__, __EMSCRIPTEN_tiny__) -#endif - -#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK) - #undef JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_EMSCRIPTEN_VERSION) - #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_EMSCRIPTEN_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_EMSCRIPTEN_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_ARM_VERSION) - #undef JSON_HEDLEY_ARM_VERSION -#endif -#if defined(__CC_ARM) && defined(__ARMCOMPILER_VERSION) - #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCOMPILER_VERSION / 1000000, (__ARMCOMPILER_VERSION % 1000000) / 10000, (__ARMCOMPILER_VERSION % 10000) / 100) -#elif defined(__CC_ARM) && defined(__ARMCC_VERSION) - #define JSON_HEDLEY_ARM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ARMCC_VERSION / 1000000, (__ARMCC_VERSION % 1000000) / 10000, (__ARMCC_VERSION % 10000) / 100) -#endif - -#if defined(JSON_HEDLEY_ARM_VERSION_CHECK) - #undef JSON_HEDLEY_ARM_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_ARM_VERSION) - #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_ARM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_ARM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_IBM_VERSION) - #undef JSON_HEDLEY_IBM_VERSION -#endif -#if defined(__ibmxl__) - #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__ibmxl_version__, __ibmxl_release__, __ibmxl_modification__) -#elif defined(__xlC__) && defined(__xlC_ver__) - #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, (__xlC_ver__ >> 8) & 0xff) -#elif defined(__xlC__) - #define JSON_HEDLEY_IBM_VERSION JSON_HEDLEY_VERSION_ENCODE(__xlC__ >> 8, __xlC__ & 0xff, 0) -#endif - -#if defined(JSON_HEDLEY_IBM_VERSION_CHECK) - #undef JSON_HEDLEY_IBM_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_IBM_VERSION) - #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IBM_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_IBM_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_TI_VERSION) - #undef JSON_HEDLEY_TI_VERSION -#endif -#if \ - defined(__TI_COMPILER_VERSION__) && \ - ( \ - defined(__TMS470__) || defined(__TI_ARM__) || \ - defined(__MSP430__) || \ - defined(__TMS320C2000__) \ - ) -#if (__TI_COMPILER_VERSION__ >= 16000000) - #define JSON_HEDLEY_TI_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif -#endif - -#if defined(JSON_HEDLEY_TI_VERSION_CHECK) - #undef JSON_HEDLEY_TI_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_TI_VERSION) - #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_TI_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_TI_CL2000_VERSION) - #undef JSON_HEDLEY_TI_CL2000_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C2000__) - #define JSON_HEDLEY_TI_CL2000_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(JSON_HEDLEY_TI_CL2000_VERSION_CHECK) - #undef JSON_HEDLEY_TI_CL2000_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_TI_CL2000_VERSION) - #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL2000_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_TI_CL2000_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_TI_CL430_VERSION) - #undef JSON_HEDLEY_TI_CL430_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__MSP430__) - #define JSON_HEDLEY_TI_CL430_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(JSON_HEDLEY_TI_CL430_VERSION_CHECK) - #undef JSON_HEDLEY_TI_CL430_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_TI_CL430_VERSION) - #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL430_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_TI_CL430_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_TI_ARMCL_VERSION) - #undef JSON_HEDLEY_TI_ARMCL_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && (defined(__TMS470__) || defined(__TI_ARM__)) - #define JSON_HEDLEY_TI_ARMCL_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(JSON_HEDLEY_TI_ARMCL_VERSION_CHECK) - #undef JSON_HEDLEY_TI_ARMCL_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_TI_ARMCL_VERSION) - #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_ARMCL_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_TI_CL6X_VERSION) - #undef JSON_HEDLEY_TI_CL6X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__TMS320C6X__) - #define JSON_HEDLEY_TI_CL6X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(JSON_HEDLEY_TI_CL6X_VERSION_CHECK) - #undef JSON_HEDLEY_TI_CL6X_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_TI_CL6X_VERSION) - #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL6X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_TI_CL6X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_TI_CL7X_VERSION) - #undef JSON_HEDLEY_TI_CL7X_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__C7000__) - #define JSON_HEDLEY_TI_CL7X_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(JSON_HEDLEY_TI_CL7X_VERSION_CHECK) - #undef JSON_HEDLEY_TI_CL7X_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_TI_CL7X_VERSION) - #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CL7X_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_TI_CL7X_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_TI_CLPRU_VERSION) - #undef JSON_HEDLEY_TI_CLPRU_VERSION -#endif -#if defined(__TI_COMPILER_VERSION__) && defined(__PRU__) - #define JSON_HEDLEY_TI_CLPRU_VERSION JSON_HEDLEY_VERSION_ENCODE(__TI_COMPILER_VERSION__ / 1000000, (__TI_COMPILER_VERSION__ % 1000000) / 1000, (__TI_COMPILER_VERSION__ % 1000)) -#endif - -#if defined(JSON_HEDLEY_TI_CLPRU_VERSION_CHECK) - #undef JSON_HEDLEY_TI_CLPRU_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_TI_CLPRU_VERSION) - #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TI_CLPRU_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_CRAY_VERSION) - #undef JSON_HEDLEY_CRAY_VERSION -#endif -#if defined(_CRAYC) - #if defined(_RELEASE_PATCHLEVEL) - #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, _RELEASE_PATCHLEVEL) - #else - #define JSON_HEDLEY_CRAY_VERSION JSON_HEDLEY_VERSION_ENCODE(_RELEASE_MAJOR, _RELEASE_MINOR, 0) - #endif -#endif - -#if defined(JSON_HEDLEY_CRAY_VERSION_CHECK) - #undef JSON_HEDLEY_CRAY_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_CRAY_VERSION) - #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_CRAY_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_CRAY_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_IAR_VERSION) - #undef JSON_HEDLEY_IAR_VERSION -#endif -#if defined(__IAR_SYSTEMS_ICC__) - #if __VER__ > 1000 - #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE((__VER__ / 1000000), ((__VER__ / 1000) % 1000), (__VER__ % 1000)) - #else - #define JSON_HEDLEY_IAR_VERSION JSON_HEDLEY_VERSION_ENCODE(__VER__ / 100, __VER__ % 100, 0) - #endif -#endif - -#if defined(JSON_HEDLEY_IAR_VERSION_CHECK) - #undef JSON_HEDLEY_IAR_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_IAR_VERSION) - #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_IAR_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_IAR_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_TINYC_VERSION) - #undef JSON_HEDLEY_TINYC_VERSION -#endif -#if defined(__TINYC__) - #define JSON_HEDLEY_TINYC_VERSION JSON_HEDLEY_VERSION_ENCODE(__TINYC__ / 1000, (__TINYC__ / 100) % 10, __TINYC__ % 100) -#endif - -#if defined(JSON_HEDLEY_TINYC_VERSION_CHECK) - #undef JSON_HEDLEY_TINYC_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_TINYC_VERSION) - #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_TINYC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_TINYC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_DMC_VERSION) - #undef JSON_HEDLEY_DMC_VERSION -#endif -#if defined(__DMC__) - #define JSON_HEDLEY_DMC_VERSION JSON_HEDLEY_VERSION_ENCODE(__DMC__ >> 8, (__DMC__ >> 4) & 0xf, __DMC__ & 0xf) -#endif - -#if defined(JSON_HEDLEY_DMC_VERSION_CHECK) - #undef JSON_HEDLEY_DMC_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_DMC_VERSION) - #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_DMC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_DMC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_COMPCERT_VERSION) - #undef JSON_HEDLEY_COMPCERT_VERSION -#endif -#if defined(__COMPCERT_VERSION__) - #define JSON_HEDLEY_COMPCERT_VERSION JSON_HEDLEY_VERSION_ENCODE(__COMPCERT_VERSION__ / 10000, (__COMPCERT_VERSION__ / 100) % 100, __COMPCERT_VERSION__ % 100) -#endif - -#if defined(JSON_HEDLEY_COMPCERT_VERSION_CHECK) - #undef JSON_HEDLEY_COMPCERT_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_COMPCERT_VERSION) - #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_COMPCERT_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_COMPCERT_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_PELLES_VERSION) - #undef JSON_HEDLEY_PELLES_VERSION -#endif -#if defined(__POCC__) - #define JSON_HEDLEY_PELLES_VERSION JSON_HEDLEY_VERSION_ENCODE(__POCC__ / 100, __POCC__ % 100, 0) -#endif - -#if defined(JSON_HEDLEY_PELLES_VERSION_CHECK) - #undef JSON_HEDLEY_PELLES_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_PELLES_VERSION) - #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_PELLES_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_PELLES_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_MCST_LCC_VERSION) - #undef JSON_HEDLEY_MCST_LCC_VERSION -#endif -#if defined(__LCC__) && defined(__LCC_MINOR__) - #define JSON_HEDLEY_MCST_LCC_VERSION JSON_HEDLEY_VERSION_ENCODE(__LCC__ / 100, __LCC__ % 100, __LCC_MINOR__) -#endif - -#if defined(JSON_HEDLEY_MCST_LCC_VERSION_CHECK) - #undef JSON_HEDLEY_MCST_LCC_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_MCST_LCC_VERSION) - #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_MCST_LCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_MCST_LCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_GCC_VERSION) - #undef JSON_HEDLEY_GCC_VERSION -#endif -#if \ - defined(JSON_HEDLEY_GNUC_VERSION) && \ - !defined(__clang__) && \ - !defined(JSON_HEDLEY_INTEL_VERSION) && \ - !defined(JSON_HEDLEY_PGI_VERSION) && \ - !defined(JSON_HEDLEY_ARM_VERSION) && \ - !defined(JSON_HEDLEY_CRAY_VERSION) && \ - !defined(JSON_HEDLEY_TI_VERSION) && \ - !defined(JSON_HEDLEY_TI_ARMCL_VERSION) && \ - !defined(JSON_HEDLEY_TI_CL430_VERSION) && \ - !defined(JSON_HEDLEY_TI_CL2000_VERSION) && \ - !defined(JSON_HEDLEY_TI_CL6X_VERSION) && \ - !defined(JSON_HEDLEY_TI_CL7X_VERSION) && \ - !defined(JSON_HEDLEY_TI_CLPRU_VERSION) && \ - !defined(__COMPCERT__) && \ - !defined(JSON_HEDLEY_MCST_LCC_VERSION) - #define JSON_HEDLEY_GCC_VERSION JSON_HEDLEY_GNUC_VERSION -#endif - -#if defined(JSON_HEDLEY_GCC_VERSION_CHECK) - #undef JSON_HEDLEY_GCC_VERSION_CHECK -#endif -#if defined(JSON_HEDLEY_GCC_VERSION) - #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (JSON_HEDLEY_GCC_VERSION >= JSON_HEDLEY_VERSION_ENCODE(major, minor, patch)) -#else - #define JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) (0) -#endif - -#if defined(JSON_HEDLEY_HAS_ATTRIBUTE) - #undef JSON_HEDLEY_HAS_ATTRIBUTE -#endif -#if \ - defined(__has_attribute) && \ - ( \ - (!defined(JSON_HEDLEY_IAR_VERSION) || JSON_HEDLEY_IAR_VERSION_CHECK(8,5,9)) \ - ) -# define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) __has_attribute(attribute) -#else -# define JSON_HEDLEY_HAS_ATTRIBUTE(attribute) (0) -#endif - -#if defined(JSON_HEDLEY_GNUC_HAS_ATTRIBUTE) - #undef JSON_HEDLEY_GNUC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) - #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) -#else - #define JSON_HEDLEY_GNUC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_GCC_HAS_ATTRIBUTE) - #undef JSON_HEDLEY_GCC_HAS_ATTRIBUTE -#endif -#if defined(__has_attribute) - #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) -#else - #define JSON_HEDLEY_GCC_HAS_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE) - #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE -#endif -#if \ - defined(__has_cpp_attribute) && \ - defined(__cplusplus) && \ - (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) - #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) __has_cpp_attribute(attribute) -#else - #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) (0) -#endif - -#if defined(JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS) - #undef JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS -#endif -#if !defined(__cplusplus) || !defined(__has_cpp_attribute) - #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#elif \ - !defined(JSON_HEDLEY_PGI_VERSION) && \ - !defined(JSON_HEDLEY_IAR_VERSION) && \ - (!defined(JSON_HEDLEY_SUNPRO_VERSION) || JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0)) && \ - (!defined(JSON_HEDLEY_MSVC_VERSION) || JSON_HEDLEY_MSVC_VERSION_CHECK(19,20,0)) - #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(ns::attribute) -#else - #define JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(ns,attribute) (0) -#endif - -#if defined(JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE) - #undef JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) - #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else - #define JSON_HEDLEY_GNUC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE) - #undef JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE -#endif -#if defined(__has_cpp_attribute) && defined(__cplusplus) - #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) __has_cpp_attribute(attribute) -#else - #define JSON_HEDLEY_GCC_HAS_CPP_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_HAS_BUILTIN) - #undef JSON_HEDLEY_HAS_BUILTIN -#endif -#if defined(__has_builtin) - #define JSON_HEDLEY_HAS_BUILTIN(builtin) __has_builtin(builtin) -#else - #define JSON_HEDLEY_HAS_BUILTIN(builtin) (0) -#endif - -#if defined(JSON_HEDLEY_GNUC_HAS_BUILTIN) - #undef JSON_HEDLEY_GNUC_HAS_BUILTIN -#endif -#if defined(__has_builtin) - #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else - #define JSON_HEDLEY_GNUC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_GCC_HAS_BUILTIN) - #undef JSON_HEDLEY_GCC_HAS_BUILTIN -#endif -#if defined(__has_builtin) - #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) __has_builtin(builtin) -#else - #define JSON_HEDLEY_GCC_HAS_BUILTIN(builtin,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_HAS_FEATURE) - #undef JSON_HEDLEY_HAS_FEATURE -#endif -#if defined(__has_feature) - #define JSON_HEDLEY_HAS_FEATURE(feature) __has_feature(feature) -#else - #define JSON_HEDLEY_HAS_FEATURE(feature) (0) -#endif - -#if defined(JSON_HEDLEY_GNUC_HAS_FEATURE) - #undef JSON_HEDLEY_GNUC_HAS_FEATURE -#endif -#if defined(__has_feature) - #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else - #define JSON_HEDLEY_GNUC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_GCC_HAS_FEATURE) - #undef JSON_HEDLEY_GCC_HAS_FEATURE -#endif -#if defined(__has_feature) - #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) __has_feature(feature) -#else - #define JSON_HEDLEY_GCC_HAS_FEATURE(feature,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_HAS_EXTENSION) - #undef JSON_HEDLEY_HAS_EXTENSION -#endif -#if defined(__has_extension) - #define JSON_HEDLEY_HAS_EXTENSION(extension) __has_extension(extension) -#else - #define JSON_HEDLEY_HAS_EXTENSION(extension) (0) -#endif - -#if defined(JSON_HEDLEY_GNUC_HAS_EXTENSION) - #undef JSON_HEDLEY_GNUC_HAS_EXTENSION -#endif -#if defined(__has_extension) - #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else - #define JSON_HEDLEY_GNUC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_GCC_HAS_EXTENSION) - #undef JSON_HEDLEY_GCC_HAS_EXTENSION -#endif -#if defined(__has_extension) - #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) __has_extension(extension) -#else - #define JSON_HEDLEY_GCC_HAS_EXTENSION(extension,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE) - #undef JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) - #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) __has_declspec_attribute(attribute) -#else - #define JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) (0) -#endif - -#if defined(JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE) - #undef JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) - #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else - #define JSON_HEDLEY_GNUC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE) - #undef JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE -#endif -#if defined(__has_declspec_attribute) - #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) __has_declspec_attribute(attribute) -#else - #define JSON_HEDLEY_GCC_HAS_DECLSPEC_ATTRIBUTE(attribute,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_HAS_WARNING) - #undef JSON_HEDLEY_HAS_WARNING -#endif -#if defined(__has_warning) - #define JSON_HEDLEY_HAS_WARNING(warning) __has_warning(warning) -#else - #define JSON_HEDLEY_HAS_WARNING(warning) (0) -#endif - -#if defined(JSON_HEDLEY_GNUC_HAS_WARNING) - #undef JSON_HEDLEY_GNUC_HAS_WARNING -#endif -#if defined(__has_warning) - #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else - #define JSON_HEDLEY_GNUC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GNUC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_GCC_HAS_WARNING) - #undef JSON_HEDLEY_GCC_HAS_WARNING -#endif -#if defined(__has_warning) - #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) __has_warning(warning) -#else - #define JSON_HEDLEY_GCC_HAS_WARNING(warning,major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - defined(__clang__) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,0,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) || \ - JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,17) || \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(8,0,0) || \ - (JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) && defined(__C99_PRAGMA_OPERATOR)) - #define JSON_HEDLEY_PRAGMA(value) _Pragma(#value) -#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) - #define JSON_HEDLEY_PRAGMA(value) __pragma(value) -#else - #define JSON_HEDLEY_PRAGMA(value) -#endif - -#if defined(JSON_HEDLEY_DIAGNOSTIC_PUSH) - #undef JSON_HEDLEY_DIAGNOSTIC_PUSH -#endif -#if defined(JSON_HEDLEY_DIAGNOSTIC_POP) - #undef JSON_HEDLEY_DIAGNOSTIC_POP -#endif -#if defined(__clang__) - #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("clang diagnostic push") - #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("clang diagnostic pop") -#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") - #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) - #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("GCC diagnostic push") - #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("GCC diagnostic pop") -#elif \ - JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_DIAGNOSTIC_PUSH __pragma(warning(push)) - #define JSON_HEDLEY_DIAGNOSTIC_POP __pragma(warning(pop)) -#elif JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) - #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("push") - #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("pop") -#elif \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,4,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) - #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("diag_push") - #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("diag_pop") -#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0) - #define JSON_HEDLEY_DIAGNOSTIC_PUSH _Pragma("warning(push)") - #define JSON_HEDLEY_DIAGNOSTIC_POP _Pragma("warning(pop)") -#else - #define JSON_HEDLEY_DIAGNOSTIC_PUSH - #define JSON_HEDLEY_DIAGNOSTIC_POP -#endif - -/* JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) - #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_ -#endif -#if defined(__cplusplus) -# if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat") -# if JSON_HEDLEY_HAS_WARNING("-Wc++17-extensions") -# if JSON_HEDLEY_HAS_WARNING("-Wc++1z-extensions") -# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - _Pragma("clang diagnostic ignored \"-Wc++1z-extensions\"") \ - xpr \ - JSON_HEDLEY_DIAGNOSTIC_POP -# else -# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - _Pragma("clang diagnostic ignored \"-Wc++17-extensions\"") \ - xpr \ - JSON_HEDLEY_DIAGNOSTIC_POP -# endif -# else -# define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(xpr) \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wc++98-compat\"") \ - xpr \ - JSON_HEDLEY_DIAGNOSTIC_POP -# endif -# endif -#endif -#if !defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(x) x -#endif - -#if defined(JSON_HEDLEY_CONST_CAST) - #undef JSON_HEDLEY_CONST_CAST -#endif -#if defined(__cplusplus) -# define JSON_HEDLEY_CONST_CAST(T, expr) (const_cast(expr)) -#elif \ - JSON_HEDLEY_HAS_WARNING("-Wcast-qual") || \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define JSON_HEDLEY_CONST_CAST(T, expr) (__extension__ ({ \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL \ - ((T) (expr)); \ - JSON_HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define JSON_HEDLEY_CONST_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(JSON_HEDLEY_REINTERPRET_CAST) - #undef JSON_HEDLEY_REINTERPRET_CAST -#endif -#if defined(__cplusplus) - #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) (reinterpret_cast(expr)) -#else - #define JSON_HEDLEY_REINTERPRET_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(JSON_HEDLEY_STATIC_CAST) - #undef JSON_HEDLEY_STATIC_CAST -#endif -#if defined(__cplusplus) - #define JSON_HEDLEY_STATIC_CAST(T, expr) (static_cast(expr)) -#else - #define JSON_HEDLEY_STATIC_CAST(T, expr) ((T) (expr)) -#endif - -#if defined(JSON_HEDLEY_CPP_CAST) - #undef JSON_HEDLEY_CPP_CAST -#endif -#if defined(__cplusplus) -# if JSON_HEDLEY_HAS_WARNING("-Wold-style-cast") -# define JSON_HEDLEY_CPP_CAST(T, expr) \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wold-style-cast\"") \ - ((T) (expr)) \ - JSON_HEDLEY_DIAGNOSTIC_POP -# elif JSON_HEDLEY_IAR_VERSION_CHECK(8,3,0) -# define JSON_HEDLEY_CPP_CAST(T, expr) \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("diag_suppress=Pe137") \ - JSON_HEDLEY_DIAGNOSTIC_POP -# else -# define JSON_HEDLEY_CPP_CAST(T, expr) ((T) (expr)) -# endif -#else -# define JSON_HEDLEY_CPP_CAST(T, expr) (expr) -#endif - -#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED) - #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif -#if JSON_HEDLEY_HAS_WARNING("-Wdeprecated-declarations") - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") -#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warning(disable:1478 1786)") -#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:1478 1786)) -#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1216,1444,1445") -#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED __pragma(warning(disable:4996)) -#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1215,1444") -#elif \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress 1291,1718") -#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && !defined(__cplusplus) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,E_DEPRECATED_ATT,E_DEPRECATED_ATT_MESS)") -#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) && defined(__cplusplus) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("error_messages(off,symdeprecated,symdeprecated2)") -#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("diag_suppress=Pe1444,Pe1215") -#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,90,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED _Pragma("warn(disable:2241)") -#else - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_DEPRECATED -#endif - -#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS) - #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif -#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("clang diagnostic ignored \"-Wunknown-pragmas\"") -#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("warning(disable:161)") -#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:161)) -#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 1675") -#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("GCC diagnostic ignored \"-Wunknown-pragmas\"") -#elif JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS __pragma(warning(disable:4068)) -#elif \ - JSON_HEDLEY_TI_VERSION_CHECK(16,9,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 163") -#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress=Pe161") -#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS _Pragma("diag_suppress 161") -#else - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS -#endif - -#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES) - #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif -#if JSON_HEDLEY_HAS_WARNING("-Wunknown-attributes") - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("clang diagnostic ignored \"-Wunknown-attributes\"") -#elif JSON_HEDLEY_GCC_VERSION_CHECK(4,6,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") -#elif JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("warning(disable:1292)") -#elif JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:1292)) -#elif JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES __pragma(warning(disable:5030)) -#elif JSON_HEDLEY_PGI_VERSION_CHECK(20,7,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097,1098") -#elif JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("error_messages(off,attrskipunsup)") -#elif \ - JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1173") -#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress=Pe1097") -#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES _Pragma("diag_suppress 1097") -#else - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_CPP_ATTRIBUTES -#endif - -#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL) - #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif -#if JSON_HEDLEY_HAS_WARNING("-Wcast-qual") - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("clang diagnostic ignored \"-Wcast-qual\"") -#elif JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("warning(disable:2203 2331)") -#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL _Pragma("GCC diagnostic ignored \"-Wcast-qual\"") -#else - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_CAST_QUAL -#endif - -#if defined(JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION) - #undef JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif -#if JSON_HEDLEY_HAS_WARNING("-Wunused-function") - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("clang diagnostic ignored \"-Wunused-function\"") -#elif JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("GCC diagnostic ignored \"-Wunused-function\"") -#elif JSON_HEDLEY_MSVC_VERSION_CHECK(1,0,0) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION __pragma(warning(disable:4505)) -#elif JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION _Pragma("diag_suppress 3142") -#else - #define JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNUSED_FUNCTION -#endif - -#if defined(JSON_HEDLEY_DEPRECATED) - #undef JSON_HEDLEY_DEPRECATED -#endif -#if defined(JSON_HEDLEY_DEPRECATED_FOR) - #undef JSON_HEDLEY_DEPRECATED_FOR -#endif -#if \ - JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated("Since " # since)) - #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated("Since " #since "; use " #replacement)) -#elif \ - (JSON_HEDLEY_HAS_EXTENSION(attribute_deprecated_with_message) && !defined(JSON_HEDLEY_IAR_VERSION)) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,13,0) || \ - JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(18,1,0) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(18,1,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,3,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,3,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__("Since " #since))) - #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__("Since " #since "; use " #replacement))) -#elif defined(__cplusplus) && (__cplusplus >= 201402L) - #define JSON_HEDLEY_DEPRECATED(since) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since)]]) - #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[deprecated("Since " #since "; use " #replacement)]]) -#elif \ - JSON_HEDLEY_HAS_ATTRIBUTE(deprecated) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) - #define JSON_HEDLEY_DEPRECATED(since) __attribute__((__deprecated__)) - #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __attribute__((__deprecated__)) -#elif \ - JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - JSON_HEDLEY_PELLES_VERSION_CHECK(6,50,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_DEPRECATED(since) __declspec(deprecated) - #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) __declspec(deprecated) -#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) - #define JSON_HEDLEY_DEPRECATED(since) _Pragma("deprecated") - #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) _Pragma("deprecated") -#else - #define JSON_HEDLEY_DEPRECATED(since) - #define JSON_HEDLEY_DEPRECATED_FOR(since, replacement) -#endif - -#if defined(JSON_HEDLEY_UNAVAILABLE) - #undef JSON_HEDLEY_UNAVAILABLE -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(warning) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,3,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_UNAVAILABLE(available_since) __attribute__((__warning__("Not available until " #available_since))) -#else - #define JSON_HEDLEY_UNAVAILABLE(available_since) -#endif - -#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT) - #undef JSON_HEDLEY_WARN_UNUSED_RESULT -#endif -#if defined(JSON_HEDLEY_WARN_UNUSED_RESULT_MSG) - #undef JSON_HEDLEY_WARN_UNUSED_RESULT_MSG -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(warn_unused_result) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_WARN_UNUSED_RESULT __attribute__((__warn_unused_result__)) - #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) __attribute__((__warn_unused_result__)) -#elif (JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) >= 201907L) - #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) - #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard(msg)]]) -#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(nodiscard) - #define JSON_HEDLEY_WARN_UNUSED_RESULT JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) - #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[nodiscard]]) -#elif defined(_Check_return_) /* SAL */ - #define JSON_HEDLEY_WARN_UNUSED_RESULT _Check_return_ - #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) _Check_return_ -#else - #define JSON_HEDLEY_WARN_UNUSED_RESULT - #define JSON_HEDLEY_WARN_UNUSED_RESULT_MSG(msg) -#endif - -#if defined(JSON_HEDLEY_SENTINEL) - #undef JSON_HEDLEY_SENTINEL -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(sentinel) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_SENTINEL(position) __attribute__((__sentinel__(position))) -#else - #define JSON_HEDLEY_SENTINEL(position) -#endif - -#if defined(JSON_HEDLEY_NO_RETURN) - #undef JSON_HEDLEY_NO_RETURN -#endif -#if JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) - #define JSON_HEDLEY_NO_RETURN __noreturn -#elif \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L - #define JSON_HEDLEY_NO_RETURN _Noreturn -#elif defined(__cplusplus) && (__cplusplus >= 201103L) - #define JSON_HEDLEY_NO_RETURN JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[noreturn]]) -#elif \ - JSON_HEDLEY_HAS_ATTRIBUTE(noreturn) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,2,0) || \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) - #define JSON_HEDLEY_NO_RETURN __attribute__((__noreturn__)) -#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) - #define JSON_HEDLEY_NO_RETURN _Pragma("does_not_return") -#elif \ - JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_NO_RETURN __declspec(noreturn) -#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) - #define JSON_HEDLEY_NO_RETURN _Pragma("FUNC_NEVER_RETURNS;") -#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) - #define JSON_HEDLEY_NO_RETURN __attribute((noreturn)) -#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0) - #define JSON_HEDLEY_NO_RETURN __declspec(noreturn) -#else - #define JSON_HEDLEY_NO_RETURN -#endif - -#if defined(JSON_HEDLEY_NO_ESCAPE) - #undef JSON_HEDLEY_NO_ESCAPE -#endif -#if JSON_HEDLEY_HAS_ATTRIBUTE(noescape) - #define JSON_HEDLEY_NO_ESCAPE __attribute__((__noescape__)) -#else - #define JSON_HEDLEY_NO_ESCAPE -#endif - -#if defined(JSON_HEDLEY_UNREACHABLE) - #undef JSON_HEDLEY_UNREACHABLE -#endif -#if defined(JSON_HEDLEY_UNREACHABLE_RETURN) - #undef JSON_HEDLEY_UNREACHABLE_RETURN -#endif -#if defined(JSON_HEDLEY_ASSUME) - #undef JSON_HEDLEY_ASSUME -#endif -#if \ - JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_ASSUME(expr) __assume(expr) -#elif JSON_HEDLEY_HAS_BUILTIN(__builtin_assume) - #define JSON_HEDLEY_ASSUME(expr) __builtin_assume(expr) -#elif \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) - #if defined(__cplusplus) - #define JSON_HEDLEY_ASSUME(expr) std::_nassert(expr) - #else - #define JSON_HEDLEY_ASSUME(expr) _nassert(expr) - #endif -#endif -#if \ - (JSON_HEDLEY_HAS_BUILTIN(__builtin_unreachable) && (!defined(JSON_HEDLEY_ARM_VERSION))) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,5,0) || \ - JSON_HEDLEY_PGI_VERSION_CHECK(18,10,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(13,1,5) || \ - JSON_HEDLEY_CRAY_VERSION_CHECK(10,0,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_UNREACHABLE() __builtin_unreachable() -#elif defined(JSON_HEDLEY_ASSUME) - #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0) -#endif -#if !defined(JSON_HEDLEY_ASSUME) - #if defined(JSON_HEDLEY_UNREACHABLE) - #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, ((expr) ? 1 : (JSON_HEDLEY_UNREACHABLE(), 1))) - #else - #define JSON_HEDLEY_ASSUME(expr) JSON_HEDLEY_STATIC_CAST(void, expr) - #endif -#endif -#if defined(JSON_HEDLEY_UNREACHABLE) - #if \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) - #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (JSON_HEDLEY_STATIC_CAST(void, JSON_HEDLEY_ASSUME(0)), (value)) - #else - #define JSON_HEDLEY_UNREACHABLE_RETURN(value) JSON_HEDLEY_UNREACHABLE() - #endif -#else - #define JSON_HEDLEY_UNREACHABLE_RETURN(value) return (value) -#endif -#if !defined(JSON_HEDLEY_UNREACHABLE) - #define JSON_HEDLEY_UNREACHABLE() JSON_HEDLEY_ASSUME(0) -#endif - -JSON_HEDLEY_DIAGNOSTIC_PUSH -#if JSON_HEDLEY_HAS_WARNING("-Wpedantic") - #pragma clang diagnostic ignored "-Wpedantic" -#endif -#if JSON_HEDLEY_HAS_WARNING("-Wc++98-compat-pedantic") && defined(__cplusplus) - #pragma clang diagnostic ignored "-Wc++98-compat-pedantic" -#endif -#if JSON_HEDLEY_GCC_HAS_WARNING("-Wvariadic-macros",4,0,0) - #if defined(__clang__) - #pragma clang diagnostic ignored "-Wvariadic-macros" - #elif defined(JSON_HEDLEY_GCC_VERSION) - #pragma GCC diagnostic ignored "-Wvariadic-macros" - #endif -#endif -#if defined(JSON_HEDLEY_NON_NULL) - #undef JSON_HEDLEY_NON_NULL -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(nonnull) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define JSON_HEDLEY_NON_NULL(...) __attribute__((__nonnull__(__VA_ARGS__))) -#else - #define JSON_HEDLEY_NON_NULL(...) -#endif -JSON_HEDLEY_DIAGNOSTIC_POP - -#if defined(JSON_HEDLEY_PRINTF_FORMAT) - #undef JSON_HEDLEY_PRINTF_FORMAT -#endif -#if defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && !defined(__USE_MINGW_ANSI_STDIO) - #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(ms_printf, string_idx, first_to_check))) -#elif defined(__MINGW32__) && JSON_HEDLEY_GCC_HAS_ATTRIBUTE(format,4,4,0) && defined(__USE_MINGW_ANSI_STDIO) - #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(gnu_printf, string_idx, first_to_check))) -#elif \ - JSON_HEDLEY_HAS_ATTRIBUTE(format) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(5,6,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __attribute__((__format__(__printf__, string_idx, first_to_check))) -#elif JSON_HEDLEY_PELLES_VERSION_CHECK(6,0,0) - #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) __declspec(vaformat(printf,string_idx,first_to_check)) -#else - #define JSON_HEDLEY_PRINTF_FORMAT(string_idx,first_to_check) -#endif - -#if defined(JSON_HEDLEY_CONSTEXPR) - #undef JSON_HEDLEY_CONSTEXPR -#endif -#if defined(__cplusplus) - #if __cplusplus >= 201103L - #define JSON_HEDLEY_CONSTEXPR JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(constexpr) - #endif -#endif -#if !defined(JSON_HEDLEY_CONSTEXPR) - #define JSON_HEDLEY_CONSTEXPR -#endif - -#if defined(JSON_HEDLEY_PREDICT) - #undef JSON_HEDLEY_PREDICT -#endif -#if defined(JSON_HEDLEY_LIKELY) - #undef JSON_HEDLEY_LIKELY -#endif -#if defined(JSON_HEDLEY_UNLIKELY) - #undef JSON_HEDLEY_UNLIKELY -#endif -#if defined(JSON_HEDLEY_UNPREDICTABLE) - #undef JSON_HEDLEY_UNPREDICTABLE -#endif -#if JSON_HEDLEY_HAS_BUILTIN(__builtin_unpredictable) - #define JSON_HEDLEY_UNPREDICTABLE(expr) __builtin_unpredictable((expr)) -#endif -#if \ - (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect_with_probability) && !defined(JSON_HEDLEY_PGI_VERSION)) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(9,0,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define JSON_HEDLEY_PREDICT(expr, value, probability) __builtin_expect_with_probability( (expr), (value), (probability)) -# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) __builtin_expect_with_probability(!!(expr), 1 , (probability)) -# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) __builtin_expect_with_probability(!!(expr), 0 , (probability)) -# define JSON_HEDLEY_LIKELY(expr) __builtin_expect (!!(expr), 1 ) -# define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect (!!(expr), 0 ) -#elif \ - (JSON_HEDLEY_HAS_BUILTIN(__builtin_expect) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,0,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,15,0) && defined(__cplusplus)) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,7,0) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,1,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,27) || \ - JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define JSON_HEDLEY_PREDICT(expr, expected, probability) \ - (((probability) >= 0.9) ? __builtin_expect((expr), (expected)) : (JSON_HEDLEY_STATIC_CAST(void, expected), (expr))) -# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 1) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 0) : !!(expr))); \ - })) -# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) \ - (__extension__ ({ \ - double hedley_probability_ = (probability); \ - ((hedley_probability_ >= 0.9) ? __builtin_expect(!!(expr), 0) : ((hedley_probability_ <= 0.1) ? __builtin_expect(!!(expr), 1) : !!(expr))); \ - })) -# define JSON_HEDLEY_LIKELY(expr) __builtin_expect(!!(expr), 1) -# define JSON_HEDLEY_UNLIKELY(expr) __builtin_expect(!!(expr), 0) -#else -# define JSON_HEDLEY_PREDICT(expr, expected, probability) (JSON_HEDLEY_STATIC_CAST(void, expected), (expr)) -# define JSON_HEDLEY_PREDICT_TRUE(expr, probability) (!!(expr)) -# define JSON_HEDLEY_PREDICT_FALSE(expr, probability) (!!(expr)) -# define JSON_HEDLEY_LIKELY(expr) (!!(expr)) -# define JSON_HEDLEY_UNLIKELY(expr) (!!(expr)) -#endif -#if !defined(JSON_HEDLEY_UNPREDICTABLE) - #define JSON_HEDLEY_UNPREDICTABLE(expr) JSON_HEDLEY_PREDICT(expr, 1, 0.5) -#endif - -#if defined(JSON_HEDLEY_MALLOC) - #undef JSON_HEDLEY_MALLOC -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(malloc) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_MALLOC __attribute__((__malloc__)) -#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) - #define JSON_HEDLEY_MALLOC _Pragma("returns_new_memory") -#elif \ - JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_MALLOC __declspec(restrict) -#else - #define JSON_HEDLEY_MALLOC -#endif - -#if defined(JSON_HEDLEY_PURE) - #undef JSON_HEDLEY_PURE -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(pure) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(2,96,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define JSON_HEDLEY_PURE __attribute__((__pure__)) -#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) -# define JSON_HEDLEY_PURE _Pragma("does_not_write_global_data") -#elif defined(__cplusplus) && \ - ( \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(2,0,1) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(4,0,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) \ - ) -# define JSON_HEDLEY_PURE _Pragma("FUNC_IS_PURE;") -#else -# define JSON_HEDLEY_PURE -#endif - -#if defined(JSON_HEDLEY_CONST) - #undef JSON_HEDLEY_CONST -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(const) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(2,5,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_CONST __attribute__((__const__)) -#elif \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) - #define JSON_HEDLEY_CONST _Pragma("no_side_effect") -#else - #define JSON_HEDLEY_CONST JSON_HEDLEY_PURE -#endif - -#if defined(JSON_HEDLEY_RESTRICT) - #undef JSON_HEDLEY_RESTRICT -#endif -#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus) - #define JSON_HEDLEY_RESTRICT restrict -#elif \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,1,0) || \ - JSON_HEDLEY_MSVC_VERSION_CHECK(14,0,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - JSON_HEDLEY_PGI_VERSION_CHECK(17,10,0) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,4) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,1,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,14,0) && defined(__cplusplus)) || \ - JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) || \ - defined(__clang__) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_RESTRICT __restrict -#elif JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,3,0) && !defined(__cplusplus) - #define JSON_HEDLEY_RESTRICT _Restrict -#else - #define JSON_HEDLEY_RESTRICT -#endif - -#if defined(JSON_HEDLEY_INLINE) - #undef JSON_HEDLEY_INLINE -#endif -#if \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || \ - (defined(__cplusplus) && (__cplusplus >= 199711L)) - #define JSON_HEDLEY_INLINE inline -#elif \ - defined(JSON_HEDLEY_GCC_VERSION) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(6,2,0) - #define JSON_HEDLEY_INLINE __inline__ -#elif \ - JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,1,0) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(3,1,0) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,2,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(8,0,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_INLINE __inline -#else - #define JSON_HEDLEY_INLINE -#endif - -#if defined(JSON_HEDLEY_ALWAYS_INLINE) - #undef JSON_HEDLEY_ALWAYS_INLINE -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(always_inline) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) -# define JSON_HEDLEY_ALWAYS_INLINE __attribute__((__always_inline__)) JSON_HEDLEY_INLINE -#elif \ - JSON_HEDLEY_MSVC_VERSION_CHECK(12,0,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define JSON_HEDLEY_ALWAYS_INLINE __forceinline -#elif defined(__cplusplus) && \ - ( \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) \ - ) -# define JSON_HEDLEY_ALWAYS_INLINE _Pragma("FUNC_ALWAYS_INLINE;") -#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define JSON_HEDLEY_ALWAYS_INLINE _Pragma("inline=forced") -#else -# define JSON_HEDLEY_ALWAYS_INLINE JSON_HEDLEY_INLINE -#endif - -#if defined(JSON_HEDLEY_NEVER_INLINE) - #undef JSON_HEDLEY_NEVER_INLINE -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(noinline) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,0,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(10,1,0) || \ - JSON_HEDLEY_TI_VERSION_CHECK(15,12,0) || \ - (JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(4,8,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_ARMCL_VERSION_CHECK(5,2,0) || \ - (JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL2000_VERSION_CHECK(6,4,0) || \ - (JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,0,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(4,3,0) || \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) || \ - JSON_HEDLEY_TI_CL7X_VERSION_CHECK(1,2,0) || \ - JSON_HEDLEY_TI_CLPRU_VERSION_CHECK(2,1,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) || \ - JSON_HEDLEY_IAR_VERSION_CHECK(8,10,0) - #define JSON_HEDLEY_NEVER_INLINE __attribute__((__noinline__)) -#elif \ - JSON_HEDLEY_MSVC_VERSION_CHECK(13,10,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline) -#elif JSON_HEDLEY_PGI_VERSION_CHECK(10,2,0) - #define JSON_HEDLEY_NEVER_INLINE _Pragma("noinline") -#elif JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,0,0) && defined(__cplusplus) - #define JSON_HEDLEY_NEVER_INLINE _Pragma("FUNC_CANNOT_INLINE;") -#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) - #define JSON_HEDLEY_NEVER_INLINE _Pragma("inline=never") -#elif JSON_HEDLEY_COMPCERT_VERSION_CHECK(3,2,0) - #define JSON_HEDLEY_NEVER_INLINE __attribute((noinline)) -#elif JSON_HEDLEY_PELLES_VERSION_CHECK(9,0,0) - #define JSON_HEDLEY_NEVER_INLINE __declspec(noinline) -#else - #define JSON_HEDLEY_NEVER_INLINE -#endif - -#if defined(JSON_HEDLEY_PRIVATE) - #undef JSON_HEDLEY_PRIVATE -#endif -#if defined(JSON_HEDLEY_PUBLIC) - #undef JSON_HEDLEY_PUBLIC -#endif -#if defined(JSON_HEDLEY_IMPORT) - #undef JSON_HEDLEY_IMPORT -#endif -#if defined(_WIN32) || defined(__CYGWIN__) -# define JSON_HEDLEY_PRIVATE -# define JSON_HEDLEY_PUBLIC __declspec(dllexport) -# define JSON_HEDLEY_IMPORT __declspec(dllimport) -#else -# if \ - JSON_HEDLEY_HAS_ATTRIBUTE(visibility) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,11,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - ( \ - defined(__TI_EABI__) && \ - ( \ - (JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,2,0) && defined(__TI_GNU_ATTRIBUTE_SUPPORT__)) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(7,5,0) \ - ) \ - ) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) -# define JSON_HEDLEY_PRIVATE __attribute__((__visibility__("hidden"))) -# define JSON_HEDLEY_PUBLIC __attribute__((__visibility__("default"))) -# else -# define JSON_HEDLEY_PRIVATE -# define JSON_HEDLEY_PUBLIC -# endif -# define JSON_HEDLEY_IMPORT extern -#endif - -#if defined(JSON_HEDLEY_NO_THROW) - #undef JSON_HEDLEY_NO_THROW -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(nothrow) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,3,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_NO_THROW __attribute__((__nothrow__)) -#elif \ - JSON_HEDLEY_MSVC_VERSION_CHECK(13,1,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) - #define JSON_HEDLEY_NO_THROW __declspec(nothrow) -#else - #define JSON_HEDLEY_NO_THROW -#endif - -#if defined(JSON_HEDLEY_FALL_THROUGH) - #undef JSON_HEDLEY_FALL_THROUGH -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(fallthrough) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(7,0,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_FALL_THROUGH __attribute__((__fallthrough__)) -#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE_NS(clang,fallthrough) - #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[clang::fallthrough]]) -#elif JSON_HEDLEY_HAS_CPP_ATTRIBUTE(fallthrough) - #define JSON_HEDLEY_FALL_THROUGH JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_([[fallthrough]]) -#elif defined(__fallthrough) /* SAL */ - #define JSON_HEDLEY_FALL_THROUGH __fallthrough -#else - #define JSON_HEDLEY_FALL_THROUGH -#endif - -#if defined(JSON_HEDLEY_RETURNS_NON_NULL) - #undef JSON_HEDLEY_RETURNS_NON_NULL -#endif -#if \ - JSON_HEDLEY_HAS_ATTRIBUTE(returns_nonnull) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_RETURNS_NON_NULL __attribute__((__returns_nonnull__)) -#elif defined(_Ret_notnull_) /* SAL */ - #define JSON_HEDLEY_RETURNS_NON_NULL _Ret_notnull_ -#else - #define JSON_HEDLEY_RETURNS_NON_NULL -#endif - -#if defined(JSON_HEDLEY_ARRAY_PARAM) - #undef JSON_HEDLEY_ARRAY_PARAM -#endif -#if \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - !defined(__STDC_NO_VLA__) && \ - !defined(__cplusplus) && \ - !defined(JSON_HEDLEY_PGI_VERSION) && \ - !defined(JSON_HEDLEY_TINYC_VERSION) - #define JSON_HEDLEY_ARRAY_PARAM(name) (name) -#else - #define JSON_HEDLEY_ARRAY_PARAM(name) -#endif - -#if defined(JSON_HEDLEY_IS_CONSTANT) - #undef JSON_HEDLEY_IS_CONSTANT -#endif -#if defined(JSON_HEDLEY_REQUIRE_CONSTEXPR) - #undef JSON_HEDLEY_REQUIRE_CONSTEXPR -#endif -/* JSON_HEDLEY_IS_CONSTEXPR_ is for - HEDLEY INTERNAL USE ONLY. API subject to change without notice. */ -#if defined(JSON_HEDLEY_IS_CONSTEXPR_) - #undef JSON_HEDLEY_IS_CONSTEXPR_ -#endif -#if \ - JSON_HEDLEY_HAS_BUILTIN(__builtin_constant_p) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,19) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(4,1,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - JSON_HEDLEY_TI_CL6X_VERSION_CHECK(6,1,0) || \ - (JSON_HEDLEY_SUNPRO_VERSION_CHECK(5,10,0) && !defined(__cplusplus)) || \ - JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - JSON_HEDLEY_MCST_LCC_VERSION_CHECK(1,25,10) - #define JSON_HEDLEY_IS_CONSTANT(expr) __builtin_constant_p(expr) -#endif -#if !defined(__cplusplus) -# if \ - JSON_HEDLEY_HAS_BUILTIN(__builtin_types_compatible_p) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(3,4,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(13,1,0) || \ - JSON_HEDLEY_CRAY_VERSION_CHECK(8,1,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(5,4,0) || \ - JSON_HEDLEY_TINYC_VERSION_CHECK(0,9,24) -#if defined(__INTPTR_TYPE__) - #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0)), int*) -#else - #include - #define JSON_HEDLEY_IS_CONSTEXPR_(expr) __builtin_types_compatible_p(__typeof__((1 ? (void*) ((intptr_t) ((expr) * 0)) : (int*) 0)), int*) -#endif -# elif \ - ( \ - defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) && \ - !defined(JSON_HEDLEY_SUNPRO_VERSION) && \ - !defined(JSON_HEDLEY_PGI_VERSION) && \ - !defined(JSON_HEDLEY_IAR_VERSION)) || \ - (JSON_HEDLEY_HAS_EXTENSION(c_generic_selections) && !defined(JSON_HEDLEY_IAR_VERSION)) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,9,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(17,0,0) || \ - JSON_HEDLEY_IBM_VERSION_CHECK(12,1,0) || \ - JSON_HEDLEY_ARM_VERSION_CHECK(5,3,0) -#if defined(__INTPTR_TYPE__) - #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((__INTPTR_TYPE__) ((expr) * 0)) : (int*) 0), int*: 1, void*: 0) -#else - #include - #define JSON_HEDLEY_IS_CONSTEXPR_(expr) _Generic((1 ? (void*) ((intptr_t) * 0) : (int*) 0), int*: 1, void*: 0) -#endif -# elif \ - defined(JSON_HEDLEY_GCC_VERSION) || \ - defined(JSON_HEDLEY_INTEL_VERSION) || \ - defined(JSON_HEDLEY_TINYC_VERSION) || \ - defined(JSON_HEDLEY_TI_ARMCL_VERSION) || \ - JSON_HEDLEY_TI_CL430_VERSION_CHECK(18,12,0) || \ - defined(JSON_HEDLEY_TI_CL2000_VERSION) || \ - defined(JSON_HEDLEY_TI_CL6X_VERSION) || \ - defined(JSON_HEDLEY_TI_CL7X_VERSION) || \ - defined(JSON_HEDLEY_TI_CLPRU_VERSION) || \ - defined(__clang__) -# define JSON_HEDLEY_IS_CONSTEXPR_(expr) ( \ - sizeof(void) != \ - sizeof(*( \ - 1 ? \ - ((void*) ((expr) * 0L) ) : \ -((struct { char v[sizeof(void) * 2]; } *) 1) \ - ) \ - ) \ - ) -# endif -#endif -#if defined(JSON_HEDLEY_IS_CONSTEXPR_) - #if !defined(JSON_HEDLEY_IS_CONSTANT) - #define JSON_HEDLEY_IS_CONSTANT(expr) JSON_HEDLEY_IS_CONSTEXPR_(expr) - #endif - #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (JSON_HEDLEY_IS_CONSTEXPR_(expr) ? (expr) : (-1)) -#else - #if !defined(JSON_HEDLEY_IS_CONSTANT) - #define JSON_HEDLEY_IS_CONSTANT(expr) (0) - #endif - #define JSON_HEDLEY_REQUIRE_CONSTEXPR(expr) (expr) -#endif - -#if defined(JSON_HEDLEY_BEGIN_C_DECLS) - #undef JSON_HEDLEY_BEGIN_C_DECLS -#endif -#if defined(JSON_HEDLEY_END_C_DECLS) - #undef JSON_HEDLEY_END_C_DECLS -#endif -#if defined(JSON_HEDLEY_C_DECL) - #undef JSON_HEDLEY_C_DECL -#endif -#if defined(__cplusplus) - #define JSON_HEDLEY_BEGIN_C_DECLS extern "C" { - #define JSON_HEDLEY_END_C_DECLS } - #define JSON_HEDLEY_C_DECL extern "C" -#else - #define JSON_HEDLEY_BEGIN_C_DECLS - #define JSON_HEDLEY_END_C_DECLS - #define JSON_HEDLEY_C_DECL -#endif - -#if defined(JSON_HEDLEY_STATIC_ASSERT) - #undef JSON_HEDLEY_STATIC_ASSERT -#endif -#if \ - !defined(__cplusplus) && ( \ - (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)) || \ - (JSON_HEDLEY_HAS_FEATURE(c_static_assert) && !defined(JSON_HEDLEY_INTEL_CL_VERSION)) || \ - JSON_HEDLEY_GCC_VERSION_CHECK(6,0,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) || \ - defined(_Static_assert) \ - ) -# define JSON_HEDLEY_STATIC_ASSERT(expr, message) _Static_assert(expr, message) -#elif \ - (defined(__cplusplus) && (__cplusplus >= 201103L)) || \ - JSON_HEDLEY_MSVC_VERSION_CHECK(16,0,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define JSON_HEDLEY_STATIC_ASSERT(expr, message) JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(static_assert(expr, message)) -#else -# define JSON_HEDLEY_STATIC_ASSERT(expr, message) -#endif - -#if defined(JSON_HEDLEY_NULL) - #undef JSON_HEDLEY_NULL -#endif -#if defined(__cplusplus) - #if __cplusplus >= 201103L - #define JSON_HEDLEY_NULL JSON_HEDLEY_DIAGNOSTIC_DISABLE_CPP98_COMPAT_WRAP_(nullptr) - #elif defined(NULL) - #define JSON_HEDLEY_NULL NULL - #else - #define JSON_HEDLEY_NULL JSON_HEDLEY_STATIC_CAST(void*, 0) - #endif -#elif defined(NULL) - #define JSON_HEDLEY_NULL NULL -#else - #define JSON_HEDLEY_NULL ((void*) 0) -#endif - -#if defined(JSON_HEDLEY_MESSAGE) - #undef JSON_HEDLEY_MESSAGE -#endif -#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define JSON_HEDLEY_MESSAGE(msg) \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - JSON_HEDLEY_PRAGMA(message msg) \ - JSON_HEDLEY_DIAGNOSTIC_POP -#elif \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,4,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message msg) -#elif JSON_HEDLEY_CRAY_VERSION_CHECK(5,0,0) -# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(_CRI message msg) -#elif JSON_HEDLEY_IAR_VERSION_CHECK(8,0,0) -# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg)) -#elif JSON_HEDLEY_PELLES_VERSION_CHECK(2,0,0) -# define JSON_HEDLEY_MESSAGE(msg) JSON_HEDLEY_PRAGMA(message(msg)) -#else -# define JSON_HEDLEY_MESSAGE(msg) -#endif - -#if defined(JSON_HEDLEY_WARNING) - #undef JSON_HEDLEY_WARNING -#endif -#if JSON_HEDLEY_HAS_WARNING("-Wunknown-pragmas") -# define JSON_HEDLEY_WARNING(msg) \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - JSON_HEDLEY_DIAGNOSTIC_DISABLE_UNKNOWN_PRAGMAS \ - JSON_HEDLEY_PRAGMA(clang warning msg) \ - JSON_HEDLEY_DIAGNOSTIC_POP -#elif \ - JSON_HEDLEY_GCC_VERSION_CHECK(4,8,0) || \ - JSON_HEDLEY_PGI_VERSION_CHECK(18,4,0) || \ - JSON_HEDLEY_INTEL_VERSION_CHECK(13,0,0) -# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(GCC warning msg) -#elif \ - JSON_HEDLEY_MSVC_VERSION_CHECK(15,0,0) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) -# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_PRAGMA(message(msg)) -#else -# define JSON_HEDLEY_WARNING(msg) JSON_HEDLEY_MESSAGE(msg) -#endif - -#if defined(JSON_HEDLEY_REQUIRE) - #undef JSON_HEDLEY_REQUIRE -#endif -#if defined(JSON_HEDLEY_REQUIRE_MSG) - #undef JSON_HEDLEY_REQUIRE_MSG -#endif -#if JSON_HEDLEY_HAS_ATTRIBUTE(diagnose_if) -# if JSON_HEDLEY_HAS_WARNING("-Wgcc-compat") -# define JSON_HEDLEY_REQUIRE(expr) \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), #expr, "error"))) \ - JSON_HEDLEY_DIAGNOSTIC_POP -# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("clang diagnostic ignored \"-Wgcc-compat\"") \ - __attribute__((diagnose_if(!(expr), msg, "error"))) \ - JSON_HEDLEY_DIAGNOSTIC_POP -# else -# define JSON_HEDLEY_REQUIRE(expr) __attribute__((diagnose_if(!(expr), #expr, "error"))) -# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) __attribute__((diagnose_if(!(expr), msg, "error"))) -# endif -#else -# define JSON_HEDLEY_REQUIRE(expr) -# define JSON_HEDLEY_REQUIRE_MSG(expr,msg) -#endif - -#if defined(JSON_HEDLEY_FLAGS) - #undef JSON_HEDLEY_FLAGS -#endif -#if JSON_HEDLEY_HAS_ATTRIBUTE(flag_enum) && (!defined(__cplusplus) || JSON_HEDLEY_HAS_WARNING("-Wbitfield-enum-conversion")) - #define JSON_HEDLEY_FLAGS __attribute__((__flag_enum__)) -#else - #define JSON_HEDLEY_FLAGS -#endif - -#if defined(JSON_HEDLEY_FLAGS_CAST) - #undef JSON_HEDLEY_FLAGS_CAST -#endif -#if JSON_HEDLEY_INTEL_VERSION_CHECK(19,0,0) -# define JSON_HEDLEY_FLAGS_CAST(T, expr) (__extension__ ({ \ - JSON_HEDLEY_DIAGNOSTIC_PUSH \ - _Pragma("warning(disable:188)") \ - ((T) (expr)); \ - JSON_HEDLEY_DIAGNOSTIC_POP \ - })) -#else -# define JSON_HEDLEY_FLAGS_CAST(T, expr) JSON_HEDLEY_STATIC_CAST(T, expr) -#endif - -#if defined(JSON_HEDLEY_EMPTY_BASES) - #undef JSON_HEDLEY_EMPTY_BASES -#endif -#if \ - (JSON_HEDLEY_MSVC_VERSION_CHECK(19,0,23918) && !JSON_HEDLEY_MSVC_VERSION_CHECK(20,0,0)) || \ - JSON_HEDLEY_INTEL_CL_VERSION_CHECK(2021,1,0) - #define JSON_HEDLEY_EMPTY_BASES __declspec(empty_bases) -#else - #define JSON_HEDLEY_EMPTY_BASES -#endif - -/* Remaining macros are deprecated. */ - -#if defined(JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK) - #undef JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK -#endif -#if defined(__clang__) - #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) (0) -#else - #define JSON_HEDLEY_GCC_NOT_CLANG_VERSION_CHECK(major,minor,patch) JSON_HEDLEY_GCC_VERSION_CHECK(major,minor,patch) -#endif - -#if defined(JSON_HEDLEY_CLANG_HAS_ATTRIBUTE) - #undef JSON_HEDLEY_CLANG_HAS_ATTRIBUTE -#endif -#define JSON_HEDLEY_CLANG_HAS_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_ATTRIBUTE(attribute) - -#if defined(JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE) - #undef JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE -#endif -#define JSON_HEDLEY_CLANG_HAS_CPP_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_CPP_ATTRIBUTE(attribute) - -#if defined(JSON_HEDLEY_CLANG_HAS_BUILTIN) - #undef JSON_HEDLEY_CLANG_HAS_BUILTIN -#endif -#define JSON_HEDLEY_CLANG_HAS_BUILTIN(builtin) JSON_HEDLEY_HAS_BUILTIN(builtin) - -#if defined(JSON_HEDLEY_CLANG_HAS_FEATURE) - #undef JSON_HEDLEY_CLANG_HAS_FEATURE -#endif -#define JSON_HEDLEY_CLANG_HAS_FEATURE(feature) JSON_HEDLEY_HAS_FEATURE(feature) - -#if defined(JSON_HEDLEY_CLANG_HAS_EXTENSION) - #undef JSON_HEDLEY_CLANG_HAS_EXTENSION -#endif -#define JSON_HEDLEY_CLANG_HAS_EXTENSION(extension) JSON_HEDLEY_HAS_EXTENSION(extension) - -#if defined(JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE) - #undef JSON_HEDLEY_CLANG_HAS_DECLSPEC_DECLSPEC_ATTRIBUTE -#endif -#define JSON_HEDLEY_CLANG_HAS_DECLSPEC_ATTRIBUTE(attribute) JSON_HEDLEY_HAS_DECLSPEC_ATTRIBUTE(attribute) - -#if defined(JSON_HEDLEY_CLANG_HAS_WARNING) - #undef JSON_HEDLEY_CLANG_HAS_WARNING -#endif -#define JSON_HEDLEY_CLANG_HAS_WARNING(warning) JSON_HEDLEY_HAS_WARNING(warning) - -#endif /* !defined(JSON_HEDLEY_VERSION) || (JSON_HEDLEY_VERSION < X) */ - - -// This file contains all internal macro definitions (except those affecting ABI) -// You MUST include macro_unscope.hpp at the end of json.hpp to undef all of them - -// #include - - -// exclude unsupported compilers -#if !defined(JSON_SKIP_UNSUPPORTED_COMPILER_CHECK) - #if defined(__clang__) - #if (__clang_major__ * 10000 + __clang_minor__ * 100 + __clang_patchlevel__) < 30400 - #error "unsupported Clang version - see https://github.com/nlohmann/json#supported-compilers" - #endif - #elif defined(__GNUC__) && !(defined(__ICC) || defined(__INTEL_COMPILER)) - #if (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) < 40800 - #error "unsupported GCC version - see https://github.com/nlohmann/json#supported-compilers" - #endif - #endif -#endif - -// C++ language standard detection -// if the user manually specified the used c++ version this is skipped -#if !defined(JSON_HAS_CPP_20) && !defined(JSON_HAS_CPP_17) && !defined(JSON_HAS_CPP_14) && !defined(JSON_HAS_CPP_11) - #if (defined(__cplusplus) && __cplusplus >= 202002L) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) - #define JSON_HAS_CPP_20 - #define JSON_HAS_CPP_17 - #define JSON_HAS_CPP_14 - #elif (defined(__cplusplus) && __cplusplus >= 201703L) || (defined(_HAS_CXX17) && _HAS_CXX17 == 1) // fix for issue #464 - #define JSON_HAS_CPP_17 - #define JSON_HAS_CPP_14 - #elif (defined(__cplusplus) && __cplusplus >= 201402L) || (defined(_HAS_CXX14) && _HAS_CXX14 == 1) - #define JSON_HAS_CPP_14 - #endif - // the cpp 11 flag is always specified because it is the minimal required version - #define JSON_HAS_CPP_11 -#endif - -#ifdef __has_include - #if __has_include() - #include - #endif -#endif - -#if !defined(JSON_HAS_FILESYSTEM) && !defined(JSON_HAS_EXPERIMENTAL_FILESYSTEM) - #ifdef JSON_HAS_CPP_17 - #if defined(__cpp_lib_filesystem) - #define JSON_HAS_FILESYSTEM 1 - #elif defined(__cpp_lib_experimental_filesystem) - #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 - #elif !defined(__has_include) - #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 - #elif __has_include() - #define JSON_HAS_FILESYSTEM 1 - #elif __has_include() - #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 1 - #endif - - // std::filesystem does not work on MinGW GCC 8: https://sourceforge.net/p/mingw-w64/bugs/737/ - #if defined(__MINGW32__) && defined(__GNUC__) && __GNUC__ == 8 - #undef JSON_HAS_FILESYSTEM - #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM - #endif - - // no filesystem support before GCC 8: https://en.cppreference.com/w/cpp/compiler_support - #if defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 8 - #undef JSON_HAS_FILESYSTEM - #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM - #endif - - // no filesystem support before Clang 7: https://en.cppreference.com/w/cpp/compiler_support - #if defined(__clang_major__) && __clang_major__ < 7 - #undef JSON_HAS_FILESYSTEM - #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM - #endif - - // no filesystem support before MSVC 19.14: https://en.cppreference.com/w/cpp/compiler_support - #if defined(_MSC_VER) && _MSC_VER < 1914 - #undef JSON_HAS_FILESYSTEM - #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM - #endif - - // no filesystem support before iOS 13 - #if defined(__IPHONE_OS_VERSION_MIN_REQUIRED) && __IPHONE_OS_VERSION_MIN_REQUIRED < 130000 - #undef JSON_HAS_FILESYSTEM - #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM - #endif - - // no filesystem support before macOS Catalina - #if defined(__MAC_OS_X_VERSION_MIN_REQUIRED) && __MAC_OS_X_VERSION_MIN_REQUIRED < 101500 - #undef JSON_HAS_FILESYSTEM - #undef JSON_HAS_EXPERIMENTAL_FILESYSTEM - #endif - #endif -#endif - -#ifndef JSON_HAS_EXPERIMENTAL_FILESYSTEM - #define JSON_HAS_EXPERIMENTAL_FILESYSTEM 0 -#endif - -#ifndef JSON_HAS_FILESYSTEM - #define JSON_HAS_FILESYSTEM 0 -#endif - -#ifndef JSON_HAS_THREE_WAY_COMPARISON - #if defined(__cpp_impl_three_way_comparison) && __cpp_impl_three_way_comparison >= 201907L \ - && defined(__cpp_lib_three_way_comparison) && __cpp_lib_three_way_comparison >= 201907L - #define JSON_HAS_THREE_WAY_COMPARISON 1 - #else - #define JSON_HAS_THREE_WAY_COMPARISON 0 - #endif -#endif - -#ifndef JSON_HAS_RANGES - // ranges header shipping in GCC 11.1.0 (released 2021-04-27) has syntax error - #if defined(__GLIBCXX__) && __GLIBCXX__ == 20210427 - #define JSON_HAS_RANGES 0 - #elif defined(__cpp_lib_ranges) - #define JSON_HAS_RANGES 1 - #else - #define JSON_HAS_RANGES 0 - #endif -#endif - -#ifndef JSON_HAS_STATIC_RTTI - #if !defined(_HAS_STATIC_RTTI) || _HAS_STATIC_RTTI != 0 - #define JSON_HAS_STATIC_RTTI 1 - #else - #define JSON_HAS_STATIC_RTTI 0 - #endif -#endif - -#ifdef JSON_HAS_CPP_17 - #define JSON_INLINE_VARIABLE inline -#else - #define JSON_INLINE_VARIABLE -#endif - -#if JSON_HEDLEY_HAS_ATTRIBUTE(no_unique_address) - #define JSON_NO_UNIQUE_ADDRESS [[no_unique_address]] -#else - #define JSON_NO_UNIQUE_ADDRESS -#endif - -// disable documentation warnings on clang -#if defined(__clang__) - #pragma clang diagnostic push - #pragma clang diagnostic ignored "-Wdocumentation" - #pragma clang diagnostic ignored "-Wdocumentation-unknown-command" -#endif - -// allow disabling exceptions -#if (defined(__cpp_exceptions) || defined(__EXCEPTIONS) || defined(_CPPUNWIND)) && !defined(JSON_NOEXCEPTION) - #define JSON_THROW(exception) throw exception - #define JSON_TRY try - #define JSON_CATCH(exception) catch(exception) - #define JSON_INTERNAL_CATCH(exception) catch(exception) -#else - #include - #define JSON_THROW(exception) std::abort() - #define JSON_TRY if(true) - #define JSON_CATCH(exception) if(false) - #define JSON_INTERNAL_CATCH(exception) if(false) -#endif - -// override exception macros -#if defined(JSON_THROW_USER) - #undef JSON_THROW - #define JSON_THROW JSON_THROW_USER -#endif -#if defined(JSON_TRY_USER) - #undef JSON_TRY - #define JSON_TRY JSON_TRY_USER -#endif -#if defined(JSON_CATCH_USER) - #undef JSON_CATCH - #define JSON_CATCH JSON_CATCH_USER - #undef JSON_INTERNAL_CATCH - #define JSON_INTERNAL_CATCH JSON_CATCH_USER -#endif -#if defined(JSON_INTERNAL_CATCH_USER) - #undef JSON_INTERNAL_CATCH - #define JSON_INTERNAL_CATCH JSON_INTERNAL_CATCH_USER -#endif - -// allow overriding assert -#if !defined(JSON_ASSERT) - #include // assert - #define JSON_ASSERT(x) assert(x) -#endif - -// allow to access some private functions (needed by the test suite) -#if defined(JSON_TESTS_PRIVATE) - #define JSON_PRIVATE_UNLESS_TESTED public -#else - #define JSON_PRIVATE_UNLESS_TESTED private -#endif - -/*! -@brief macro to briefly define a mapping between an enum and JSON -@def NLOHMANN_JSON_SERIALIZE_ENUM -@since version 3.4.0 -*/ -#define NLOHMANN_JSON_SERIALIZE_ENUM(ENUM_TYPE, ...) \ - template \ - inline void to_json(BasicJsonType& j, const ENUM_TYPE& e) \ - { \ - static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ - static const std::pair m[] = __VA_ARGS__; \ - auto it = std::find_if(std::begin(m), std::end(m), \ - [e](const std::pair& ej_pair) -> bool \ - { \ - return ej_pair.first == e; \ - }); \ - j = ((it != std::end(m)) ? it : std::begin(m))->second; \ - } \ - template \ - inline void from_json(const BasicJsonType& j, ENUM_TYPE& e) \ - { \ - static_assert(std::is_enum::value, #ENUM_TYPE " must be an enum!"); \ - static const std::pair m[] = __VA_ARGS__; \ - auto it = std::find_if(std::begin(m), std::end(m), \ - [&j](const std::pair& ej_pair) -> bool \ - { \ - return ej_pair.second == j; \ - }); \ - e = ((it != std::end(m)) ? it : std::begin(m))->first; \ - } - -// Ugly macros to avoid uglier copy-paste when specializing basic_json. They -// may be removed in the future once the class is split. - -#define NLOHMANN_BASIC_JSON_TPL_DECLARATION \ - template class ObjectType, \ - template class ArrayType, \ - class StringType, class BooleanType, class NumberIntegerType, \ - class NumberUnsignedType, class NumberFloatType, \ - template class AllocatorType, \ - template class JSONSerializer, \ - class BinaryType, \ - class CustomBaseClass> - -#define NLOHMANN_BASIC_JSON_TPL \ - basic_json - -// Macros to simplify conversion from/to types - -#define NLOHMANN_JSON_EXPAND( x ) x -#define NLOHMANN_JSON_GET_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, _25, _26, _27, _28, _29, _30, _31, _32, _33, _34, _35, _36, _37, _38, _39, _40, _41, _42, _43, _44, _45, _46, _47, _48, _49, _50, _51, _52, _53, _54, _55, _56, _57, _58, _59, _60, _61, _62, _63, _64, NAME,...) NAME -#define NLOHMANN_JSON_PASTE(...) NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_GET_MACRO(__VA_ARGS__, \ - NLOHMANN_JSON_PASTE64, \ - NLOHMANN_JSON_PASTE63, \ - NLOHMANN_JSON_PASTE62, \ - NLOHMANN_JSON_PASTE61, \ - NLOHMANN_JSON_PASTE60, \ - NLOHMANN_JSON_PASTE59, \ - NLOHMANN_JSON_PASTE58, \ - NLOHMANN_JSON_PASTE57, \ - NLOHMANN_JSON_PASTE56, \ - NLOHMANN_JSON_PASTE55, \ - NLOHMANN_JSON_PASTE54, \ - NLOHMANN_JSON_PASTE53, \ - NLOHMANN_JSON_PASTE52, \ - NLOHMANN_JSON_PASTE51, \ - NLOHMANN_JSON_PASTE50, \ - NLOHMANN_JSON_PASTE49, \ - NLOHMANN_JSON_PASTE48, \ - NLOHMANN_JSON_PASTE47, \ - NLOHMANN_JSON_PASTE46, \ - NLOHMANN_JSON_PASTE45, \ - NLOHMANN_JSON_PASTE44, \ - NLOHMANN_JSON_PASTE43, \ - NLOHMANN_JSON_PASTE42, \ - NLOHMANN_JSON_PASTE41, \ - NLOHMANN_JSON_PASTE40, \ - NLOHMANN_JSON_PASTE39, \ - NLOHMANN_JSON_PASTE38, \ - NLOHMANN_JSON_PASTE37, \ - NLOHMANN_JSON_PASTE36, \ - NLOHMANN_JSON_PASTE35, \ - NLOHMANN_JSON_PASTE34, \ - NLOHMANN_JSON_PASTE33, \ - NLOHMANN_JSON_PASTE32, \ - NLOHMANN_JSON_PASTE31, \ - NLOHMANN_JSON_PASTE30, \ - NLOHMANN_JSON_PASTE29, \ - NLOHMANN_JSON_PASTE28, \ - NLOHMANN_JSON_PASTE27, \ - NLOHMANN_JSON_PASTE26, \ - NLOHMANN_JSON_PASTE25, \ - NLOHMANN_JSON_PASTE24, \ - NLOHMANN_JSON_PASTE23, \ - NLOHMANN_JSON_PASTE22, \ - NLOHMANN_JSON_PASTE21, \ - NLOHMANN_JSON_PASTE20, \ - NLOHMANN_JSON_PASTE19, \ - NLOHMANN_JSON_PASTE18, \ - NLOHMANN_JSON_PASTE17, \ - NLOHMANN_JSON_PASTE16, \ - NLOHMANN_JSON_PASTE15, \ - NLOHMANN_JSON_PASTE14, \ - NLOHMANN_JSON_PASTE13, \ - NLOHMANN_JSON_PASTE12, \ - NLOHMANN_JSON_PASTE11, \ - NLOHMANN_JSON_PASTE10, \ - NLOHMANN_JSON_PASTE9, \ - NLOHMANN_JSON_PASTE8, \ - NLOHMANN_JSON_PASTE7, \ - NLOHMANN_JSON_PASTE6, \ - NLOHMANN_JSON_PASTE5, \ - NLOHMANN_JSON_PASTE4, \ - NLOHMANN_JSON_PASTE3, \ - NLOHMANN_JSON_PASTE2, \ - NLOHMANN_JSON_PASTE1)(__VA_ARGS__)) -#define NLOHMANN_JSON_PASTE2(func, v1) func(v1) -#define NLOHMANN_JSON_PASTE3(func, v1, v2) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE2(func, v2) -#define NLOHMANN_JSON_PASTE4(func, v1, v2, v3) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE3(func, v2, v3) -#define NLOHMANN_JSON_PASTE5(func, v1, v2, v3, v4) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE4(func, v2, v3, v4) -#define NLOHMANN_JSON_PASTE6(func, v1, v2, v3, v4, v5) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE5(func, v2, v3, v4, v5) -#define NLOHMANN_JSON_PASTE7(func, v1, v2, v3, v4, v5, v6) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE6(func, v2, v3, v4, v5, v6) -#define NLOHMANN_JSON_PASTE8(func, v1, v2, v3, v4, v5, v6, v7) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE7(func, v2, v3, v4, v5, v6, v7) -#define NLOHMANN_JSON_PASTE9(func, v1, v2, v3, v4, v5, v6, v7, v8) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE8(func, v2, v3, v4, v5, v6, v7, v8) -#define NLOHMANN_JSON_PASTE10(func, v1, v2, v3, v4, v5, v6, v7, v8, v9) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE9(func, v2, v3, v4, v5, v6, v7, v8, v9) -#define NLOHMANN_JSON_PASTE11(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE10(func, v2, v3, v4, v5, v6, v7, v8, v9, v10) -#define NLOHMANN_JSON_PASTE12(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE11(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11) -#define NLOHMANN_JSON_PASTE13(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE12(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12) -#define NLOHMANN_JSON_PASTE14(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE13(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13) -#define NLOHMANN_JSON_PASTE15(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE14(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14) -#define NLOHMANN_JSON_PASTE16(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE15(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15) -#define NLOHMANN_JSON_PASTE17(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE16(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16) -#define NLOHMANN_JSON_PASTE18(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE17(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17) -#define NLOHMANN_JSON_PASTE19(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE18(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18) -#define NLOHMANN_JSON_PASTE20(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE19(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19) -#define NLOHMANN_JSON_PASTE21(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE20(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20) -#define NLOHMANN_JSON_PASTE22(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE21(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21) -#define NLOHMANN_JSON_PASTE23(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE22(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22) -#define NLOHMANN_JSON_PASTE24(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE23(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23) -#define NLOHMANN_JSON_PASTE25(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE24(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24) -#define NLOHMANN_JSON_PASTE26(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE25(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25) -#define NLOHMANN_JSON_PASTE27(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE26(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26) -#define NLOHMANN_JSON_PASTE28(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE27(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27) -#define NLOHMANN_JSON_PASTE29(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE28(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28) -#define NLOHMANN_JSON_PASTE30(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE29(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29) -#define NLOHMANN_JSON_PASTE31(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE30(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30) -#define NLOHMANN_JSON_PASTE32(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE31(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31) -#define NLOHMANN_JSON_PASTE33(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE32(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32) -#define NLOHMANN_JSON_PASTE34(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE33(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33) -#define NLOHMANN_JSON_PASTE35(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE34(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34) -#define NLOHMANN_JSON_PASTE36(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE35(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35) -#define NLOHMANN_JSON_PASTE37(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE36(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36) -#define NLOHMANN_JSON_PASTE38(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE37(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37) -#define NLOHMANN_JSON_PASTE39(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE38(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38) -#define NLOHMANN_JSON_PASTE40(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE39(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39) -#define NLOHMANN_JSON_PASTE41(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE40(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40) -#define NLOHMANN_JSON_PASTE42(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE41(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41) -#define NLOHMANN_JSON_PASTE43(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE42(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42) -#define NLOHMANN_JSON_PASTE44(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE43(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43) -#define NLOHMANN_JSON_PASTE45(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE44(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44) -#define NLOHMANN_JSON_PASTE46(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE45(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45) -#define NLOHMANN_JSON_PASTE47(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE46(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46) -#define NLOHMANN_JSON_PASTE48(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE47(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47) -#define NLOHMANN_JSON_PASTE49(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE48(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48) -#define NLOHMANN_JSON_PASTE50(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE49(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49) -#define NLOHMANN_JSON_PASTE51(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE50(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50) -#define NLOHMANN_JSON_PASTE52(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE51(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51) -#define NLOHMANN_JSON_PASTE53(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE52(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52) -#define NLOHMANN_JSON_PASTE54(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE53(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53) -#define NLOHMANN_JSON_PASTE55(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE54(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54) -#define NLOHMANN_JSON_PASTE56(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE55(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55) -#define NLOHMANN_JSON_PASTE57(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE56(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56) -#define NLOHMANN_JSON_PASTE58(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE57(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57) -#define NLOHMANN_JSON_PASTE59(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE58(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58) -#define NLOHMANN_JSON_PASTE60(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE59(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59) -#define NLOHMANN_JSON_PASTE61(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE60(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60) -#define NLOHMANN_JSON_PASTE62(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE61(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61) -#define NLOHMANN_JSON_PASTE63(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE62(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62) -#define NLOHMANN_JSON_PASTE64(func, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) NLOHMANN_JSON_PASTE2(func, v1) NLOHMANN_JSON_PASTE63(func, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50, v51, v52, v53, v54, v55, v56, v57, v58, v59, v60, v61, v62, v63) - -#define NLOHMANN_JSON_TO(v1) nlohmann_json_j[#v1] = nlohmann_json_t.v1; -#define NLOHMANN_JSON_FROM(v1) nlohmann_json_j.at(#v1).get_to(nlohmann_json_t.v1); -#define NLOHMANN_JSON_FROM_WITH_DEFAULT(v1) nlohmann_json_t.v1 = nlohmann_json_j.value(#v1, nlohmann_json_default_obj.v1); - -/*! -@brief macro -@def NLOHMANN_DEFINE_TYPE_INTRUSIVE -@since version 3.9.0 -*/ -#define NLOHMANN_DEFINE_TYPE_INTRUSIVE(Type, ...) \ - friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } - -#define NLOHMANN_DEFINE_TYPE_INTRUSIVE_WITH_DEFAULT(Type, ...) \ - friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - friend void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } - -#define NLOHMANN_DEFINE_TYPE_INTRUSIVE_ONLY_SERIALIZE(Type, ...) \ - friend void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } - -/*! -@brief macro -@def NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE -@since version 3.9.0 -*/ -#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE(Type, ...) \ - inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM, __VA_ARGS__)) } - -#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_ONLY_SERIALIZE(Type, ...) \ - inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } - -#define NLOHMANN_DEFINE_TYPE_NON_INTRUSIVE_WITH_DEFAULT(Type, ...) \ - inline void to_json(nlohmann::json& nlohmann_json_j, const Type& nlohmann_json_t) { NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_TO, __VA_ARGS__)) } \ - inline void from_json(const nlohmann::json& nlohmann_json_j, Type& nlohmann_json_t) { const Type nlohmann_json_default_obj{}; NLOHMANN_JSON_EXPAND(NLOHMANN_JSON_PASTE(NLOHMANN_JSON_FROM_WITH_DEFAULT, __VA_ARGS__)) } - -// inspired from https://stackoverflow.com/a/26745591 -// allows to call any std function as if (e.g. with begin): -// using std::begin; begin(x); -// -// it allows using the detected idiom to retrieve the return type -// of such an expression -#define NLOHMANN_CAN_CALL_STD_FUNC_IMPL(std_name) \ - namespace detail { \ - using std::std_name; \ - \ - template \ - using result_of_##std_name = decltype(std_name(std::declval()...)); \ - } \ - \ - namespace detail2 { \ - struct std_name##_tag \ - { \ - }; \ - \ - template \ - std_name##_tag std_name(T&&...); \ - \ - template \ - using result_of_##std_name = decltype(std_name(std::declval()...)); \ - \ - template \ - struct would_call_std_##std_name \ - { \ - static constexpr auto const value = ::nlohmann::detail:: \ - is_detected_exact::value; \ - }; \ - } /* namespace detail2 */ \ - \ - template \ - struct would_call_std_##std_name : detail2::would_call_std_##std_name \ - { \ - } - -#ifndef JSON_USE_IMPLICIT_CONVERSIONS - #define JSON_USE_IMPLICIT_CONVERSIONS 1 -#endif - -#if JSON_USE_IMPLICIT_CONVERSIONS - #define JSON_EXPLICIT -#else - #define JSON_EXPLICIT explicit -#endif - -#ifndef JSON_DISABLE_ENUM_SERIALIZATION - #define JSON_DISABLE_ENUM_SERIALIZATION 0 -#endif - -#ifndef JSON_USE_GLOBAL_UDLS - #define JSON_USE_GLOBAL_UDLS 1 -#endif - -#if JSON_HAS_THREE_WAY_COMPARISON - #include // partial_ordering -#endif - -NLOHMANN_JSON_NAMESPACE_BEGIN -namespace detail -{ - -/////////////////////////// -// JSON type enumeration // -/////////////////////////// - -/*! -@brief the JSON type enumeration - -This enumeration collects the different JSON types. It is internally used to -distinguish the stored values, and the functions @ref basic_json::is_null(), -@ref basic_json::is_object(), @ref basic_json::is_array(), -@ref basic_json::is_string(), @ref basic_json::is_boolean(), -@ref basic_json::is_number() (with @ref basic_json::is_number_integer(), -@ref basic_json::is_number_unsigned(), and @ref basic_json::is_number_float()), -@ref basic_json::is_discarded(), @ref basic_json::is_primitive(), and -@ref basic_json::is_structured() rely on it. - -@note There are three enumeration entries (number_integer, number_unsigned, and -number_float), because the library distinguishes these three types for numbers: -@ref basic_json::number_unsigned_t is used for unsigned integers, -@ref basic_json::number_integer_t is used for signed integers, and -@ref basic_json::number_float_t is used for floating-point numbers or to -approximate integers which do not fit in the limits of their respective type. - -@sa see @ref basic_json::basic_json(const value_t value_type) -- create a JSON -value with the default value for a given type - -@since version 1.0.0 -*/ -enum class value_t : std::uint8_t -{ - null, ///< null value - object, ///< object (unordered set of name/value pairs) - array, ///< array (ordered collection of values) - string, ///< string value - boolean, ///< boolean value - number_integer, ///< number value (signed integer) - number_unsigned, ///< number value (unsigned integer) - number_float, ///< number value (floating-point) - binary, ///< binary array (ordered collection of bytes) - discarded ///< discarded by the parser callback function -}; - -/*! -@brief comparison operator for JSON types - -Returns an ordering that is similar to Python: -- order: null < boolean < number < object < array < string < binary -- furthermore, each type is not smaller than itself -- discarded values are not comparable -- binary is represented as a b"" string in python and directly comparable to a - string; however, making a binary array directly comparable with a string would - be surprising behavior in a JSON file. - -@since version 1.0.0 -*/ -#if JSON_HAS_THREE_WAY_COMPARISON - inline std::partial_ordering operator<=>(const value_t lhs, const value_t rhs) noexcept // *NOPAD* -#else - inline bool operator<(const value_t lhs, const value_t rhs) noexcept -#endif -{ - static constexpr std::array order = {{ - 0 /* null */, 3 /* object */, 4 /* array */, 5 /* string */, - 1 /* boolean */, 2 /* integer */, 2 /* unsigned */, 2 /* float */, - 6 /* binary */ - } - }; - - const auto l_index = static_cast(lhs); - const auto r_index = static_cast(rhs); -#if JSON_HAS_THREE_WAY_COMPARISON - if (l_index < order.size() && r_index < order.size()) - { - return order[l_index] <=> order[r_index]; // *NOPAD* - } - return std::partial_ordering::unordered; -#else - return l_index < order.size() && r_index < order.size() && order[l_index] < order[r_index]; -#endif -} - -// GCC selects the built-in operator< over an operator rewritten from -// a user-defined spaceship operator -// Clang, MSVC, and ICC select the rewritten candidate -// (see GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=105200) -#if JSON_HAS_THREE_WAY_COMPARISON && defined(__GNUC__) -inline bool operator<(const value_t lhs, const value_t rhs) noexcept -{ - return std::is_lt(lhs <=> rhs); // *NOPAD* -} -#endif - -} // namespace detail -NLOHMANN_JSON_NAMESPACE_END - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -// #include - - -NLOHMANN_JSON_NAMESPACE_BEGIN -namespace detail -{ - -/*! -@brief replace all occurrences of a substring by another string - -@param[in,out] s the string to manipulate; changed so that all - occurrences of @a f are replaced with @a t -@param[in] f the substring to replace with @a t -@param[in] t the string to replace @a f - -@pre The search string @a f must not be empty. **This precondition is -enforced with an assertion.** - -@since version 2.0.0 -*/ -template -inline void replace_substring(StringType& s, const StringType& f, - const StringType& t) -{ - JSON_ASSERT(!f.empty()); - for (auto pos = s.find(f); // find first occurrence of f - pos != StringType::npos; // make sure f was found - s.replace(pos, f.size(), t), // replace with t, and - pos = s.find(f, pos + t.size())) // find next occurrence of f - {} -} - -/*! - * @brief string escaping as described in RFC 6901 (Sect. 4) - * @param[in] s string to escape - * @return escaped string - * - * Note the order of escaping "~" to "~0" and "/" to "~1" is important. - */ -template -inline StringType escape(StringType s) -{ - replace_substring(s, StringType{"~"}, StringType{"~0"}); - replace_substring(s, StringType{"/"}, StringType{"~1"}); - return s; -} - -/*! - * @brief string unescaping as described in RFC 6901 (Sect. 4) - * @param[in] s string to unescape - * @return unescaped string - * - * Note the order of escaping "~1" to "/" and "~0" to "~" is important. - */ -template -static void unescape(StringType& s) -{ - replace_substring(s, StringType{"~1"}, StringType{"/"}); - replace_substring(s, StringType{"~0"}, StringType{"~"}); -} - -} // namespace detail -NLOHMANN_JSON_NAMESPACE_END - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -#include // size_t - -// #include - - -NLOHMANN_JSON_NAMESPACE_BEGIN -namespace detail -{ - -/// struct to capture the start position of the current token -struct position_t -{ - /// the total number of characters read - std::size_t chars_read_total = 0; - /// the number of characters read in the current line - std::size_t chars_read_current_line = 0; - /// the number of lines read - std::size_t lines_read = 0; - - /// conversion to size_t to preserve SAX interface - constexpr operator size_t() const - { - return chars_read_total; - } -}; - -} // namespace detail -NLOHMANN_JSON_NAMESPACE_END - -// #include - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-FileCopyrightText: 2018 The Abseil Authors -// SPDX-License-Identifier: MIT - - - -#include // array -#include // size_t -#include // conditional, enable_if, false_type, integral_constant, is_constructible, is_integral, is_same, remove_cv, remove_reference, true_type -#include // index_sequence, make_index_sequence, index_sequence_for - -// #include - - -NLOHMANN_JSON_NAMESPACE_BEGIN -namespace detail -{ - -template -using uncvref_t = typename std::remove_cv::type>::type; - -#ifdef JSON_HAS_CPP_14 - -// the following utilities are natively available in C++14 -using std::enable_if_t; -using std::index_sequence; -using std::make_index_sequence; -using std::index_sequence_for; - -#else - -// alias templates to reduce boilerplate -template -using enable_if_t = typename std::enable_if::type; - -// The following code is taken from https://github.com/abseil/abseil-cpp/blob/10cb35e459f5ecca5b2ff107635da0bfa41011b4/absl/utility/utility.h -// which is part of Google Abseil (https://github.com/abseil/abseil-cpp), licensed under the Apache License 2.0. - -//// START OF CODE FROM GOOGLE ABSEIL - -// integer_sequence -// -// Class template representing a compile-time integer sequence. An instantiation -// of `integer_sequence` has a sequence of integers encoded in its -// type through its template arguments (which is a common need when -// working with C++11 variadic templates). `absl::integer_sequence` is designed -// to be a drop-in replacement for C++14's `std::integer_sequence`. -// -// Example: -// -// template< class T, T... Ints > -// void user_function(integer_sequence); -// -// int main() -// { -// // user_function's `T` will be deduced to `int` and `Ints...` -// // will be deduced to `0, 1, 2, 3, 4`. -// user_function(make_integer_sequence()); -// } -template -struct integer_sequence -{ - using value_type = T; - static constexpr std::size_t size() noexcept - { - return sizeof...(Ints); - } -}; - -// index_sequence -// -// A helper template for an `integer_sequence` of `size_t`, -// `absl::index_sequence` is designed to be a drop-in replacement for C++14's -// `std::index_sequence`. -template -using index_sequence = integer_sequence; - -namespace utility_internal -{ - -template -struct Extend; - -// Note that SeqSize == sizeof...(Ints). It's passed explicitly for efficiency. -template -struct Extend, SeqSize, 0> -{ - using type = integer_sequence < T, Ints..., (Ints + SeqSize)... >; -}; - -template -struct Extend, SeqSize, 1> -{ - using type = integer_sequence < T, Ints..., (Ints + SeqSize)..., 2 * SeqSize >; -}; - -// Recursion helper for 'make_integer_sequence'. -// 'Gen::type' is an alias for 'integer_sequence'. -template -struct Gen -{ - using type = - typename Extend < typename Gen < T, N / 2 >::type, N / 2, N % 2 >::type; -}; - -template -struct Gen -{ - using type = integer_sequence; -}; - -} // namespace utility_internal - -// Compile-time sequences of integers - -// make_integer_sequence -// -// This template alias is equivalent to -// `integer_sequence`, and is designed to be a drop-in -// replacement for C++14's `std::make_integer_sequence`. -template -using make_integer_sequence = typename utility_internal::Gen::type; - -// make_index_sequence -// -// This template alias is equivalent to `index_sequence<0, 1, ..., N-1>`, -// and is designed to be a drop-in replacement for C++14's -// `std::make_index_sequence`. -template -using make_index_sequence = make_integer_sequence; - -// index_sequence_for -// -// Converts a typename pack into an index sequence of the same length, and -// is designed to be a drop-in replacement for C++14's -// `std::index_sequence_for()` -template -using index_sequence_for = make_index_sequence; - -//// END OF CODE FROM GOOGLE ABSEIL - -#endif - -// dispatch utility (taken from ranges-v3) -template struct priority_tag : priority_tag < N - 1 > {}; -template<> struct priority_tag<0> {}; - -// taken from ranges-v3 -template -struct static_const -{ - static JSON_INLINE_VARIABLE constexpr T value{}; -}; - -#ifndef JSON_HAS_CPP_17 - template - constexpr T static_const::value; -#endif - -template -inline constexpr std::array make_array(Args&& ... args) -{ - return std::array {{static_cast(std::forward(args))...}}; -} - -} // namespace detail -NLOHMANN_JSON_NAMESPACE_END - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -#include // numeric_limits -#include // false_type, is_constructible, is_integral, is_same, true_type -#include // declval -#include // tuple -#include // char_traits - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -#include // random_access_iterator_tag - -// #include - -// #include - -// #include - - -NLOHMANN_JSON_NAMESPACE_BEGIN -namespace detail -{ - -template -struct iterator_types {}; - -template -struct iterator_types < - It, - void_t> -{ - using difference_type = typename It::difference_type; - using value_type = typename It::value_type; - using pointer = typename It::pointer; - using reference = typename It::reference; - using iterator_category = typename It::iterator_category; -}; - -// This is required as some compilers implement std::iterator_traits in a way that -// doesn't work with SFINAE. See https://github.com/nlohmann/json/issues/1341. -template -struct iterator_traits -{ -}; - -template -struct iterator_traits < T, enable_if_t < !std::is_pointer::value >> - : iterator_types -{ -}; - -template -struct iterator_traits::value>> -{ - using iterator_category = std::random_access_iterator_tag; - using value_type = T; - using difference_type = ptrdiff_t; - using pointer = T*; - using reference = T&; -}; - -} // namespace detail -NLOHMANN_JSON_NAMESPACE_END - -// #include - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -// #include - - -NLOHMANN_JSON_NAMESPACE_BEGIN - -NLOHMANN_CAN_CALL_STD_FUNC_IMPL(begin); - -NLOHMANN_JSON_NAMESPACE_END - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - - - -// #include - - -NLOHMANN_JSON_NAMESPACE_BEGIN - -NLOHMANN_CAN_CALL_STD_FUNC_IMPL(end); - -NLOHMANN_JSON_NAMESPACE_END - -// #include - -// #include - -// #include -// __ _____ _____ _____ -// __| | __| | | | JSON for Modern C++ -// | | |__ | | | | | | version 3.11.3 -// |_____|_____|_____|_|___| https://github.com/nlohmann/json -// -// SPDX-FileCopyrightText: 2013-2023 Niels Lohmann -// SPDX-License-Identifier: MIT - -#ifndef INCLUDE_NLOHMANN_JSON_FWD_HPP_ - #define INCLUDE_NLOHMANN_JSON_FWD_HPP_ - - #include // int64_t, uint64_t - #include // map - #include // allocator - #include // string - #include // vector - - // #include - - - /*! - @brief namespace for Niels Lohmann - @see https://github.com/nlohmann - @since version 1.0.0 - */ - NLOHMANN_JSON_NAMESPACE_BEGIN - - /*! - @brief default JSONSerializer template argument - - This serializer ignores the template arguments and uses ADL - ([argument-dependent lookup](https://en.cppreference.com/w/cpp/language/adl)) - for serialization. - */ - template - struct adl_serializer; - - /// a class to store JSON values - /// @sa https://json.nlohmann.me/api/basic_json/ - template class ObjectType = - std::map, - template class ArrayType = std::vector, - class StringType = std::string, class BooleanType = bool, - class NumberIntegerType = std::int64_t, - class NumberUnsignedType = std::uint64_t, - class NumberFloatType = double, - template class AllocatorType = std::allocator, - template class JSONSerializer = - adl_serializer, - class BinaryType = std::vector, // cppcheck-suppress syntaxError - class CustomBaseClass = void> - class basic_json; - - /// @brief JSON Pointer defines a string syntax for identifying a specific value within a JSON document - /// @sa https://json.nlohmann.me/api/json_pointer/ - template - class json_pointer; - - /*! - @brief default specialization - @sa https://json.nlohmann.me/api/json/ - */ - using json = basic_json<>; - - /// @brief a minimal map-like container that preserves insertion order - /// @sa https://json.nlohmann.me/api/ordered_map/ - template - struct ordered_map; - - /// @brief specialization that maintains the insertion order of object keys - /// @sa https://json.nlohmann.me/api/ordered_json/ - using ordered_json = basic_json; - - NLOHMANN_JSON_NAMESPACE_END - -#endif // INCLUDE_NLOHMANN_JSON_FWD_HPP_ - - -NLOHMANN_JSON_NAMESPACE_BEGIN -/*! -@brief detail namespace with internal helper functions - -This namespace collects functions that should not be exposed, -implementations of some @ref basic_json methods, and meta-programming helpers. - -@since version 2.1.0 -*/ -namespace detail -{ - -///////////// -// helpers // -///////////// - -// Note to maintainers: -// -// Every trait in this file expects a non CV-qualified type. -// The only exceptions are in the 'aliases for detected' section -// (i.e. those of the form: decltype(T::member_function(std::declval()))) -// -// In this case, T has to be properly CV-qualified to constraint the function arguments -// (e.g. to_json(BasicJsonType&, const T&)) - -template struct is_basic_json : std::false_type {}; - -NLOHMANN_BASIC_JSON_TPL_DECLARATION -struct is_basic_json : std::true_type {}; - -// used by exceptions create() member functions -// true_type for pointer to possibly cv-qualified basic_json or std::nullptr_t -// false_type otherwise -template -struct is_basic_json_context : - std::integral_constant < bool, - is_basic_json::type>::type>::value - || std::is_same::value > -{}; - -////////////////////// -// json_ref helpers // -////////////////////// - -template -class json_ref; - -template -struct is_json_ref : std::false_type {}; - -template -struct is_json_ref> : std::true_type {}; - -////////////////////////// -// aliases for detected // -////////////////////////// - -template -using mapped_type_t = typename T::mapped_type; - -template -using key_type_t = typename T::key_type; - -template -using value_type_t = typename T::value_type; - -template -using difference_type_t = typename T::difference_type; - -template -using pointer_t = typename T::pointer; - -template -using reference_t = typename T::reference; - -template -using iterator_category_t = typename T::iterator_category; - -template -using to_json_function = decltype(T::to_json(std::declval()...)); - -template -using from_json_function = decltype(T::from_json(std::declval()...)); - -template -using get_template_function = decltype(std::declval().template get()); - -// trait checking if JSONSerializer::from_json(json const&, udt&) exists -template -struct has_from_json : std::false_type {}; - -// trait checking if j.get is valid -// use this trait instead of std::is_constructible or std::is_convertible, -// both rely on, or make use of implicit conversions, and thus fail when T -// has several constructors/operator= (see https://github.com/nlohmann/json/issues/958) -template -struct is_getable -{ - static constexpr bool value = is_detected::value; -}; - -template -struct has_from_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> -{ - using serializer = typename BasicJsonType::template json_serializer; - - static constexpr bool value = - is_detected_exact::value; -}; - -// This trait checks if JSONSerializer::from_json(json const&) exists -// this overload is used for non-default-constructible user-defined-types -template -struct has_non_default_from_json : std::false_type {}; - -template -struct has_non_default_from_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> -{ - using serializer = typename BasicJsonType::template json_serializer; - - static constexpr bool value = - is_detected_exact::value; -}; - -// This trait checks if BasicJsonType::json_serializer::to_json exists -// Do not evaluate the trait when T is a basic_json type, to avoid template instantiation infinite recursion. -template -struct has_to_json : std::false_type {}; - -template -struct has_to_json < BasicJsonType, T, enable_if_t < !is_basic_json::value >> -{ - using serializer = typename BasicJsonType::template json_serializer; - - static constexpr bool value = - is_detected_exact::value; -}; - -template -using detect_key_compare = typename T::key_compare; - -template -struct has_key_compare : std::integral_constant::value> {}; - -// obtains the actual object key comparator -template -struct actual_object_comparator -{ - using object_t = typename BasicJsonType::object_t; - using object_comparator_t = typename BasicJsonType::default_object_comparator_t; - using type = typename std::conditional < has_key_compare::value, - typename object_t::key_compare, object_comparator_t>::type; -}; - -template -using actual_object_comparator_t = typename actual_object_comparator::type; - -///////////////// -// char_traits // -///////////////// - -// Primary template of char_traits calls std char_traits -template -struct char_traits : std::char_traits -{}; - -// Explicitly define char traits for unsigned char since it is not standard -template<> -struct char_traits : std::char_traits -{ - using char_type = unsigned char; - using int_type = uint64_t; - - // Redefine to_int_type function - static int_type to_int_type(char_type c) noexcept - { - return static_cast(c); - } - - static char_type to_char_type(int_type i) noexcept - { - return static_cast(i); - } - - static constexpr int_type eof() noexcept - { - return static_cast(EOF); - } -}; - -// Explicitly define char traits for signed char since it is not standard -template<> -struct char_traits : std::char_traits -{ - using char_type = signed char; - using int_type = uint64_t; - - // Redefine to_int_type function - static int_type to_int_type(char_type c) noexcept - { - return static_cast(c); - } - - static char_type to_char_type(int_type i) noexcept - { - return static_cast(i); - } - - static constexpr int_type eof() noexcept - { - return static_cast(EOF); - } -}; - -/////////////////// -// is_ functions // -/////////////////// - -// https://en.cppreference.com/w/cpp/types/conjunction -template struct conjunction : std::true_type { }; -template struct conjunction : B { }; -template -struct conjunction -: std::conditional(B::value), conjunction, B>::type {}; - -// https://en.cppreference.com/w/cpp/types/negation -template struct negation : std::integral_constant < bool, !B::value > { }; - -// Reimplementation of is_constructible and is_default_constructible, due to them being broken for -// std::pair and std::tuple until LWG 2367 fix (see https://cplusplus.github.io/LWG/lwg-defects.html#2367). -// This causes compile errors in e.g. clang 3.5 or gcc 4.9. -template -struct is_default_constructible : std::is_default_constructible {}; - -template -struct is_default_constructible> - : conjunction, is_default_constructible> {}; - -template -struct is_default_constructible> - : conjunction, is_default_constructible> {}; - -template -struct is_default_constructible> - : conjunction...> {}; - -template -struct is_default_constructible> - : conjunction...> {}; - -template -struct is_constructible : std::is_constructible {}; - -template -struct is_constructible> : is_default_constructible> {}; - -template -struct is_constructible> : is_default_constructible> {}; - -template -struct is_constructible> : is_default_constructible> {}; - -template -struct is_constructible> : is_default_constructible> {}; - -template -struct is_iterator_traits : std::false_type {}; - -template -struct is_iterator_traits> -{ - private: - using traits = iterator_traits; - - public: - static constexpr auto value = - is_detected::value && - is_detected::value && - is_detected::value && - is_detected::value && - is_detected::value; -}; - -template -struct is_range -{ - private: - using t_ref = typename std::add_lvalue_reference::type; - - using iterator = detected_t; - using sentinel = detected_t; - - // to be 100% correct, it should use https://en.cppreference.com/w/cpp/iterator/input_or_output_iterator - // and https://en.cppreference.com/w/cpp/iterator/sentinel_for - // but reimplementing these would be too much work, as a lot of other concepts are used underneath - static constexpr auto is_iterator_begin = - is_iterator_traits>::value; - - public: - static constexpr bool value = !std::is_same::value && !std::is_same::value && is_iterator_begin; -}; - -template -using iterator_t = enable_if_t::value, result_of_begin())>>; - -template -using range_value_t = value_type_t>>; - -// The following implementation of is_complete_type is taken from -// https://blogs.msdn.microsoft.com/vcblog/2015/12/02/partial-support-for-expression-sfinae-in-vs-2015-update-1/ -// and is written by Xiang Fan who agreed to using it in this library. - -template -struct is_complete_type : std::false_type {}; - -template -struct is_complete_type : std::true_type {}; - -template -struct is_compatible_object_type_impl : std::false_type {}; - -template -struct is_compatible_object_type_impl < - BasicJsonType, CompatibleObjectType, - enable_if_t < is_detected::value&& - is_detected::value >> -{ - using object_t = typename BasicJsonType::object_t; - - // macOS's is_constructible does not play well with nonesuch... - static constexpr bool value = - is_constructible::value && - is_constructible::value; -}; - -template -struct is_compatible_object_type - : is_compatible_object_type_impl {}; - -template -struct is_constructible_object_type_impl : std::false_type {}; - -template -struct is_constructible_object_type_impl < - BasicJsonType, ConstructibleObjectType, - enable_if_t < is_detected::value&& - is_detected::value >> -{ - using object_t = typename BasicJsonType::object_t; - - static constexpr bool value = - (is_default_constructible::value && - (std::is_move_assignable::value || - std::is_copy_assignable::value) && - (is_constructible::value && - std::is_same < - typename object_t::mapped_type, - typename ConstructibleObjectType::mapped_type >::value)) || - (has_from_json::value || - has_non_default_from_json < - BasicJsonType, - typename ConstructibleObjectType::mapped_type >::value); -}; - -template -struct is_constructible_object_type - : is_constructible_object_type_impl {}; - -template -struct is_compatible_string_type -{ - static constexpr auto value = - is_constructible::value; -}; - -template -struct is_constructible_string_type -{ - // launder type through decltype() to fix compilation failure on ICPC -#ifdef __INTEL_COMPILER - using laundered_type = decltype(std::declval()); -#else - using laundered_type = ConstructibleStringType; -#endif - - static constexpr auto value = - conjunction < - is_constructible, - is_detected_exact>::value; -}; - -template -struct is_compatible_array_type_impl : std::false_type {}; - -template -struct is_compatible_array_type_impl < - BasicJsonType, CompatibleArrayType, - enable_if_t < - is_detected::value&& - is_iterator_traits>>::value&& -// special case for types like std::filesystem::path whose iterator's value_type are themselves -// c.f. https://github.com/nlohmann/json/pull/3073 - !std::is_same>::value >> -{ - static constexpr bool value = - is_constructible>::value; -}; - -template -struct is_compatible_array_type - : is_compatible_array_type_impl {}; - -template -struct is_constructible_array_type_impl : std::false_type {}; - -template -struct is_constructible_array_type_impl < - BasicJsonType, ConstructibleArrayType, - enable_if_t::value >> - : std::true_type {}; - -template -struct is_constructible_array_type_impl < - BasicJsonType, ConstructibleArrayType, - enable_if_t < !std::is_same::value&& - !is_compatible_string_type::value&& - is_default_constructible::value&& -(std::is_move_assignable::value || - std::is_copy_assignable::value)&& -is_detected::value&& -is_iterator_traits>>::value&& -is_detected::value&& -// special case for types like std::filesystem::path whose iterator's value_type are themselves -// c.f. https://github.com/nlohmann/json/pull/3073 -!std::is_same>::value&& - is_complete_type < - detected_t>::value >> -{ - using value_type = range_value_t; - - static constexpr bool value = - std::is_same::value || - has_from_json::value || - has_non_default_from_json < - BasicJsonType, - value_type >::value; -}; - -template -struct is_constructible_array_type - : is_constructible_array_type_impl {}; - -template -struct is_compatible_integer_type_impl : std::false_type {}; - -template -struct is_compatible_integer_type_impl < - RealIntegerType, CompatibleNumberIntegerType, - enable_if_t < std::is_integral::value&& - std::is_integral::value&& - !std::is_same::value >> -{ - // is there an assert somewhere on overflows? - using RealLimits = std::numeric_limits; - using CompatibleLimits = std::numeric_limits; - - static constexpr auto value = - is_constructible::value && - CompatibleLimits::is_integer && - RealLimits::is_signed == CompatibleLimits::is_signed; -}; - -template -struct is_compatible_integer_type - : is_compatible_integer_type_impl {}; - -template -struct is_compatible_type_impl: std::false_type {}; - -template -struct is_compatible_type_impl < - BasicJsonType, CompatibleType, - enable_if_t::value >> -{ - static constexpr bool value = - has_to_json::value; -}; - -template -struct is_compatible_type - : is_compatible_type_impl {}; - -template -struct is_constructible_tuple : std::false_type {}; - -template -struct is_constructible_tuple> : conjunction...> {}; - -template -struct is_json_iterator_of : std::false_type {}; - -template -struct is_json_iterator_of : std::true_type {}; - -template -struct is_json_iterator_of : std::true_type -{}; - -// checks if a given type T is a template specialization of Primary -template