Compare commits
7 commits
master
...
gg/fix-pyt
Author | SHA1 | Date | |
---|---|---|---|
|
117f7adbd9 | ||
|
91deef4606 | ||
|
902de8826b | ||
|
3e3cc7102f | ||
|
c172b322c2 | ||
|
d8f2da6b9f | ||
|
39a41a53b0 |
904 changed files with 255472 additions and 215858 deletions
161
.clang-format
161
.clang-format
|
@ -1,161 +0,0 @@
|
||||||
---
|
|
||||||
Language: Cpp
|
|
||||||
AlignAfterOpenBracket: Align
|
|
||||||
AlignArrayOfStructures: Left
|
|
||||||
AlignConsecutiveAssignments: AcrossComments
|
|
||||||
AlignConsecutiveBitFields: AcrossComments
|
|
||||||
AlignConsecutiveDeclarations: AcrossComments
|
|
||||||
AlignConsecutiveMacros: AcrossComments
|
|
||||||
# AlignConsecutiveShortCaseStatements: AcrossComments
|
|
||||||
AlignEscapedNewlines: Left # LeftWithLastLine
|
|
||||||
AlignOperands: Align
|
|
||||||
AlignTrailingComments:
|
|
||||||
Kind: Always
|
|
||||||
OverEmptyLines: 1
|
|
||||||
AllowAllArgumentsOnNextLine: true
|
|
||||||
AllowAllParametersOfDeclarationOnNextLine: false
|
|
||||||
# AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
|
|
||||||
AllowShortBlocksOnASingleLine: Never
|
|
||||||
AllowShortCaseLabelsOnASingleLine: false
|
|
||||||
AllowShortFunctionsOnASingleLine: Inline
|
|
||||||
AllowShortIfStatementsOnASingleLine: Never
|
|
||||||
AllowShortLambdasOnASingleLine: Inline
|
|
||||||
AllowShortLoopsOnASingleLine: false
|
|
||||||
AlwaysBreakBeforeMultilineStrings: true
|
|
||||||
BinPackArguments: true
|
|
||||||
BinPackParameters: true # OnePerLine
|
|
||||||
BitFieldColonSpacing: Both
|
|
||||||
BreakBeforeBraces: Custom # Attach
|
|
||||||
BraceWrapping:
|
|
||||||
AfterCaseLabel: true
|
|
||||||
AfterClass: false
|
|
||||||
AfterControlStatement: false
|
|
||||||
AfterEnum: false
|
|
||||||
AfterFunction: false
|
|
||||||
AfterNamespace: false
|
|
||||||
AfterObjCDeclaration: false
|
|
||||||
AfterStruct: false
|
|
||||||
AfterUnion: false
|
|
||||||
AfterExternBlock: false
|
|
||||||
BeforeCatch: false
|
|
||||||
BeforeElse: false
|
|
||||||
BeforeLambdaBody: false
|
|
||||||
BeforeWhile: false
|
|
||||||
IndentBraces: false
|
|
||||||
SplitEmptyFunction: false
|
|
||||||
SplitEmptyRecord: false
|
|
||||||
SplitEmptyNamespace: false
|
|
||||||
# BreakAdjacentStringLiterals: true
|
|
||||||
BreakAfterAttributes: Never
|
|
||||||
BreakBeforeBinaryOperators: None
|
|
||||||
BreakBeforeInlineASMColon: OnlyMultiline
|
|
||||||
BreakBeforeTernaryOperators: false
|
|
||||||
# BreakBinaryOperations: Never
|
|
||||||
BreakConstructorInitializers: AfterColon
|
|
||||||
# BreakFunctionDefinitionParameters: false
|
|
||||||
BreakInheritanceList: AfterComma
|
|
||||||
BreakStringLiterals: true
|
|
||||||
# BreakTemplateDeclarations: Yes
|
|
||||||
ColumnLimit: 120
|
|
||||||
CommentPragmas: '^ IWYU pragma:'
|
|
||||||
CompactNamespaces: false
|
|
||||||
ConstructorInitializerIndentWidth: 4
|
|
||||||
ContinuationIndentWidth: 4
|
|
||||||
Cpp11BracedListStyle: false
|
|
||||||
DerivePointerAlignment: false
|
|
||||||
DisableFormat: false
|
|
||||||
EmptyLineBeforeAccessModifier: Leave
|
|
||||||
EmptyLineAfterAccessModifier: Never
|
|
||||||
ExperimentalAutoDetectBinPacking: false
|
|
||||||
FixNamespaceComments: true
|
|
||||||
IncludeBlocks: Regroup
|
|
||||||
IncludeCategories:
|
|
||||||
- Regex: '^<.*\.h>'
|
|
||||||
Priority: 1
|
|
||||||
SortPriority: 0
|
|
||||||
- Regex: '^<.*'
|
|
||||||
Priority: 2
|
|
||||||
SortPriority: 0
|
|
||||||
- Regex: '.*'
|
|
||||||
Priority: 3
|
|
||||||
SortPriority: 0
|
|
||||||
IncludeIsMainRegex: '([-_](test|unittest))?$'
|
|
||||||
IncludeIsMainSourceRegex: ''
|
|
||||||
IndentAccessModifiers: false
|
|
||||||
IndentCaseBlocks: true
|
|
||||||
IndentCaseLabels: true
|
|
||||||
IndentExternBlock: NoIndent
|
|
||||||
IndentGotoLabels: false
|
|
||||||
IndentPPDirectives: AfterHash
|
|
||||||
IndentWidth: 4
|
|
||||||
IndentWrappedFunctionNames: false
|
|
||||||
InsertBraces: true # NOTE: may lead to incorrect formatting
|
|
||||||
InsertNewlineAtEOF: true
|
|
||||||
JavaScriptQuotes: Leave
|
|
||||||
JavaScriptWrapImports: true
|
|
||||||
KeepEmptyLinesAtTheStartOfBlocks: false
|
|
||||||
LambdaBodyIndentation: Signature
|
|
||||||
LineEnding: LF
|
|
||||||
MacroBlockBegin: ''
|
|
||||||
MacroBlockEnd: ''
|
|
||||||
MaxEmptyLinesToKeep: 1
|
|
||||||
NamespaceIndentation: None
|
|
||||||
ObjCBinPackProtocolList: Auto
|
|
||||||
ObjCBlockIndentWidth: 4
|
|
||||||
ObjCSpaceAfterProperty: true
|
|
||||||
ObjCSpaceBeforeProtocolList: true
|
|
||||||
PPIndentWidth: -1
|
|
||||||
PackConstructorInitializers: CurrentLine
|
|
||||||
PenaltyBreakAssignment: 2
|
|
||||||
PenaltyBreakBeforeFirstCallParameter: 1
|
|
||||||
PenaltyBreakComment: 300
|
|
||||||
PenaltyBreakFirstLessLess: 120
|
|
||||||
PenaltyBreakString: 1000
|
|
||||||
PenaltyBreakTemplateDeclaration: 10
|
|
||||||
PenaltyExcessCharacter: 1000000
|
|
||||||
PenaltyReturnTypeOnItsOwnLine: 200
|
|
||||||
PointerAlignment: Middle
|
|
||||||
QualifierAlignment: Left
|
|
||||||
#QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
|
|
||||||
RawStringFormats:
|
|
||||||
- Language: Cpp
|
|
||||||
Delimiters:
|
|
||||||
- cc
|
|
||||||
- CC
|
|
||||||
- cpp
|
|
||||||
- Cpp
|
|
||||||
- CPP
|
|
||||||
- 'c++'
|
|
||||||
- 'C++'
|
|
||||||
CanonicalDelimiter: ''
|
|
||||||
ReferenceAlignment: Middle
|
|
||||||
ReflowComments: false # IndentOnly
|
|
||||||
SeparateDefinitionBlocks: Always
|
|
||||||
SortIncludes: CaseInsensitive
|
|
||||||
SortUsingDeclarations: LexicographicNumeric
|
|
||||||
SpaceAfterCStyleCast: true
|
|
||||||
SpaceAfterLogicalNot: false
|
|
||||||
SpaceAfterTemplateKeyword: true
|
|
||||||
SpaceBeforeAssignmentOperators: true
|
|
||||||
SpaceBeforeCpp11BracedList: false
|
|
||||||
SpaceBeforeCtorInitializerColon: true
|
|
||||||
SpaceBeforeInheritanceColon: true
|
|
||||||
SpaceBeforeParens: ControlStatements
|
|
||||||
SpaceBeforeRangeBasedForLoopColon: true
|
|
||||||
SpaceInEmptyBlock: false
|
|
||||||
SpaceInEmptyParentheses: false
|
|
||||||
SpacesBeforeTrailingComments: 2
|
|
||||||
SpacesInAngles: Never
|
|
||||||
SpacesInContainerLiterals: true
|
|
||||||
SpacesInLineCommentPrefix:
|
|
||||||
Minimum: 1
|
|
||||||
Maximum: -1
|
|
||||||
SpacesInParentheses: false
|
|
||||||
SpacesInSquareBrackets: false
|
|
||||||
SpaceBeforeSquareBrackets: false
|
|
||||||
Standard: c++17
|
|
||||||
TabWidth: 4
|
|
||||||
UseTab: Never
|
|
||||||
WhitespaceSensitiveMacros: ['STRINGIZE']
|
|
||||||
...
|
|
||||||
|
|
|
@ -17,10 +17,8 @@ Checks: >
|
||||||
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
|
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
|
||||||
performance-*,
|
performance-*,
|
||||||
portability-*,
|
portability-*,
|
||||||
-portability-simd-intrinsics,
|
|
||||||
misc-*,
|
misc-*,
|
||||||
-misc-const-correctness,
|
-misc-const-correctness,
|
||||||
-misc-non-private-member-variables-in-classes,
|
-misc-non-private-member-variables-in-classes,
|
||||||
-misc-no-recursion,
|
-misc-no-recursion,
|
||||||
-misc-use-anonymous-namespace,
|
|
||||||
FormatStyle: none
|
FormatStyle: none
|
||||||
|
|
|
@ -1,92 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
|
|
||||||
FROM ubuntu:$UBUNTU_VERSION AS build
|
|
||||||
|
|
||||||
ARG TARGETARCH
|
|
||||||
|
|
||||||
ARG GGML_CPU_ARM_ARCH=armv8-a
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y build-essential git cmake libcurl4-openssl-dev
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
RUN if [ "$TARGETARCH" = "amd64" ]; then \
|
|
||||||
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
|
|
||||||
elif [ "$TARGETARCH" = "arm64" ]; then \
|
|
||||||
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
|
|
||||||
else \
|
|
||||||
echo "Unsupported architecture"; \
|
|
||||||
exit 1; \
|
|
||||||
fi && \
|
|
||||||
cmake --build build -j $(nproc)
|
|
||||||
|
|
||||||
RUN mkdir -p /app/lib && \
|
|
||||||
find build -name "*.so" -exec cp {} /app/lib \;
|
|
||||||
|
|
||||||
RUN mkdir -p /app/full \
|
|
||||||
&& cp build/bin/* /app/full \
|
|
||||||
&& cp *.py /app/full \
|
|
||||||
&& cp -r gguf-py /app/full \
|
|
||||||
&& cp -r requirements /app/full \
|
|
||||||
&& cp requirements.txt /app/full \
|
|
||||||
&& cp .devops/tools.sh /app/full/tools.sh
|
|
||||||
|
|
||||||
## Base image
|
|
||||||
FROM ubuntu:$UBUNTU_VERSION AS base
|
|
||||||
|
|
||||||
RUN apt-get update \
|
|
||||||
&& apt-get install -y libgomp1 curl\
|
|
||||||
&& apt autoremove -y \
|
|
||||||
&& apt clean -y \
|
|
||||||
&& rm -rf /tmp/* /var/tmp/* \
|
|
||||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
|
||||||
&& find /var/cache -type f -delete
|
|
||||||
|
|
||||||
COPY --from=build /app/lib/ /app
|
|
||||||
|
|
||||||
### Full
|
|
||||||
FROM base AS full
|
|
||||||
|
|
||||||
COPY --from=build /app/full /app
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
RUN apt-get update \
|
|
||||||
&& apt-get install -y \
|
|
||||||
git \
|
|
||||||
python3 \
|
|
||||||
python3-pip \
|
|
||||||
&& pip install --upgrade pip setuptools wheel \
|
|
||||||
&& pip install -r requirements.txt \
|
|
||||||
&& apt autoremove -y \
|
|
||||||
&& apt clean -y \
|
|
||||||
&& rm -rf /tmp/* /var/tmp/* \
|
|
||||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
|
||||||
&& find /var/cache -type f -delete
|
|
||||||
|
|
||||||
ENTRYPOINT ["/app/tools.sh"]
|
|
||||||
|
|
||||||
### Light, CLI only
|
|
||||||
FROM base AS light
|
|
||||||
|
|
||||||
COPY --from=build /app/full/llama-cli /app
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/app/llama-cli" ]
|
|
||||||
|
|
||||||
### Server, Server only
|
|
||||||
FROM base AS server
|
|
||||||
|
|
||||||
ENV LLAMA_ARG_HOST=0.0.0.0
|
|
||||||
|
|
||||||
COPY --from=build /app/full/llama-server /app
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/app/llama-server" ]
|
|
|
@ -1,94 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
# This needs to generally match the container host's environment.
|
|
||||||
ARG CUDA_VERSION=12.6.0
|
|
||||||
# Target the CUDA build image
|
|
||||||
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
|
||||||
|
|
||||||
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
|
||||||
|
|
||||||
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
|
||||||
|
|
||||||
# CUDA architecture to build for (defaults to all supported archs)
|
|
||||||
ARG CUDA_DOCKER_ARCH=default
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
|
|
||||||
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
|
|
||||||
fi && \
|
|
||||||
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
|
||||||
cmake --build build --config Release -j$(nproc)
|
|
||||||
|
|
||||||
RUN mkdir -p /app/lib && \
|
|
||||||
find build -name "*.so" -exec cp {} /app/lib \;
|
|
||||||
|
|
||||||
RUN mkdir -p /app/full \
|
|
||||||
&& cp build/bin/* /app/full \
|
|
||||||
&& cp *.py /app/full \
|
|
||||||
&& cp -r gguf-py /app/full \
|
|
||||||
&& cp -r requirements /app/full \
|
|
||||||
&& cp requirements.txt /app/full \
|
|
||||||
&& cp .devops/tools.sh /app/full/tools.sh
|
|
||||||
|
|
||||||
## Base image
|
|
||||||
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
|
|
||||||
|
|
||||||
RUN apt-get update \
|
|
||||||
&& apt-get install -y libgomp1 curl\
|
|
||||||
&& apt autoremove -y \
|
|
||||||
&& apt clean -y \
|
|
||||||
&& rm -rf /tmp/* /var/tmp/* \
|
|
||||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
|
||||||
&& find /var/cache -type f -delete
|
|
||||||
|
|
||||||
COPY --from=build /app/lib/ /app
|
|
||||||
|
|
||||||
### Full
|
|
||||||
FROM base AS full
|
|
||||||
|
|
||||||
COPY --from=build /app/full /app
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
RUN apt-get update \
|
|
||||||
&& apt-get install -y \
|
|
||||||
git \
|
|
||||||
python3 \
|
|
||||||
python3-pip \
|
|
||||||
&& pip install --upgrade pip setuptools wheel \
|
|
||||||
&& pip install -r requirements.txt \
|
|
||||||
&& apt autoremove -y \
|
|
||||||
&& apt clean -y \
|
|
||||||
&& rm -rf /tmp/* /var/tmp/* \
|
|
||||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
|
||||||
&& find /var/cache -type f -delete
|
|
||||||
|
|
||||||
|
|
||||||
ENTRYPOINT ["/app/tools.sh"]
|
|
||||||
|
|
||||||
### Light, CLI only
|
|
||||||
FROM base AS light
|
|
||||||
|
|
||||||
COPY --from=build /app/full/llama-cli /app
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/app/llama-cli" ]
|
|
||||||
|
|
||||||
### Server, Server only
|
|
||||||
FROM base AS server
|
|
||||||
|
|
||||||
ENV LLAMA_ARG_HOST=0.0.0.0
|
|
||||||
|
|
||||||
COPY --from=build /app/full/llama-server /app
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/app/llama-server" ]
|
|
36
.devops/full-cuda.Dockerfile
Normal file
36
.devops/full-cuda.Dockerfile
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
ARG UBUNTU_VERSION=22.04
|
||||||
|
|
||||||
|
# This needs to generally match the container host's environment.
|
||||||
|
ARG CUDA_VERSION=11.7.1
|
||||||
|
|
||||||
|
# Target the CUDA build image
|
||||||
|
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||||
|
|
||||||
|
FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
||||||
|
|
||||||
|
# Unless otherwise specified, we make a fat build.
|
||||||
|
ARG CUDA_DOCKER_ARCH=all
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
|
||||||
|
|
||||||
|
COPY requirements.txt requirements.txt
|
||||||
|
COPY requirements requirements
|
||||||
|
|
||||||
|
RUN pip install --upgrade pip setuptools wheel \
|
||||||
|
&& pip install -r requirements.txt
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Set nvcc architecture
|
||||||
|
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||||
|
# Enable CUDA
|
||||||
|
ENV GGML_CUDA=1
|
||||||
|
# Enable cURL
|
||||||
|
ENV LLAMA_CURL=1
|
||||||
|
|
||||||
|
RUN make -j$(nproc)
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/.devops/tools.sh"]
|
50
.devops/full-rocm.Dockerfile
Normal file
50
.devops/full-rocm.Dockerfile
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
ARG UBUNTU_VERSION=22.04
|
||||||
|
|
||||||
|
# This needs to generally match the container host's environment.
|
||||||
|
ARG ROCM_VERSION=5.6
|
||||||
|
|
||||||
|
# Target the CUDA build image
|
||||||
|
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
||||||
|
|
||||||
|
FROM ${BASE_ROCM_DEV_CONTAINER} as build
|
||||||
|
|
||||||
|
# Unless otherwise specified, we make a fat build.
|
||||||
|
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
||||||
|
# This is mostly tied to rocBLAS supported archs.
|
||||||
|
ARG ROCM_DOCKER_ARCH=\
|
||||||
|
gfx803 \
|
||||||
|
gfx900 \
|
||||||
|
gfx906 \
|
||||||
|
gfx908 \
|
||||||
|
gfx90a \
|
||||||
|
gfx1010 \
|
||||||
|
gfx1030 \
|
||||||
|
gfx1100 \
|
||||||
|
gfx1101 \
|
||||||
|
gfx1102
|
||||||
|
|
||||||
|
COPY requirements.txt requirements.txt
|
||||||
|
COPY requirements requirements
|
||||||
|
|
||||||
|
RUN pip install --upgrade pip setuptools wheel \
|
||||||
|
&& pip install -r requirements.txt
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Set nvcc architecture
|
||||||
|
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
|
||||||
|
# Enable ROCm
|
||||||
|
ENV GGML_HIPBLAS=1
|
||||||
|
ENV CC=/opt/rocm/llvm/bin/clang
|
||||||
|
ENV CXX=/opt/rocm/llvm/bin/clang++
|
||||||
|
|
||||||
|
# Enable cURL
|
||||||
|
ENV LLAMA_CURL=1
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y libcurl4-openssl-dev
|
||||||
|
|
||||||
|
RUN make -j$(nproc)
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/.devops/tools.sh"]
|
25
.devops/full.Dockerfile
Normal file
25
.devops/full.Dockerfile
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
ARG UBUNTU_VERSION=22.04
|
||||||
|
|
||||||
|
FROM ubuntu:$UBUNTU_VERSION as build
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
|
||||||
|
|
||||||
|
COPY requirements.txt requirements.txt
|
||||||
|
COPY requirements requirements
|
||||||
|
|
||||||
|
RUN pip install --upgrade pip setuptools wheel \
|
||||||
|
&& pip install -r requirements.txt
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
ENV LLAMA_CURL=1
|
||||||
|
|
||||||
|
|
||||||
|
RUN make -j$(nproc)
|
||||||
|
|
||||||
|
ENV LC_ALL=C.utf8
|
||||||
|
|
||||||
|
ENTRYPOINT ["/app/.devops/tools.sh"]
|
|
@ -1,91 +0,0 @@
|
||||||
ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04
|
|
||||||
|
|
||||||
## Build Image
|
|
||||||
|
|
||||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
|
|
||||||
|
|
||||||
ARG GGML_SYCL_F16=OFF
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y git libcurl4-openssl-dev
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
|
||||||
echo "GGML_SYCL_F16 is set" \
|
|
||||||
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
|
|
||||||
fi && \
|
|
||||||
echo "Building with dynamic libs" && \
|
|
||||||
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
|
|
||||||
cmake --build build --config Release -j$(nproc)
|
|
||||||
|
|
||||||
RUN mkdir -p /app/lib && \
|
|
||||||
find build -name "*.so" -exec cp {} /app/lib \;
|
|
||||||
|
|
||||||
RUN mkdir -p /app/full \
|
|
||||||
&& cp build/bin/* /app/full \
|
|
||||||
&& cp *.py /app/full \
|
|
||||||
&& cp -r gguf-py /app/full \
|
|
||||||
&& cp -r requirements /app/full \
|
|
||||||
&& cp requirements.txt /app/full \
|
|
||||||
&& cp .devops/tools.sh /app/full/tools.sh
|
|
||||||
|
|
||||||
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS base
|
|
||||||
|
|
||||||
RUN apt-get update \
|
|
||||||
&& apt-get install -y libgomp1 curl\
|
|
||||||
&& apt autoremove -y \
|
|
||||||
&& apt clean -y \
|
|
||||||
&& rm -rf /tmp/* /var/tmp/* \
|
|
||||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
|
||||||
&& find /var/cache -type f -delete
|
|
||||||
|
|
||||||
### Full
|
|
||||||
FROM base AS full
|
|
||||||
|
|
||||||
COPY --from=build /app/lib/ /app
|
|
||||||
COPY --from=build /app/full /app
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
RUN apt-get update \
|
|
||||||
&& apt-get install -y \
|
|
||||||
git \
|
|
||||||
python3 \
|
|
||||||
python3-pip \
|
|
||||||
&& pip install --upgrade pip setuptools wheel \
|
|
||||||
&& pip install -r requirements.txt \
|
|
||||||
&& apt autoremove -y \
|
|
||||||
&& apt clean -y \
|
|
||||||
&& rm -rf /tmp/* /var/tmp/* \
|
|
||||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
|
||||||
&& find /var/cache -type f -delete
|
|
||||||
|
|
||||||
|
|
||||||
ENTRYPOINT ["/app/tools.sh"]
|
|
||||||
|
|
||||||
### Light, CLI only
|
|
||||||
FROM base AS light
|
|
||||||
|
|
||||||
COPY --from=build /app/lib/ /app
|
|
||||||
COPY --from=build /app/full/llama-cli /app
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/app/llama-cli" ]
|
|
||||||
|
|
||||||
### Server, Server only
|
|
||||||
FROM base AS server
|
|
||||||
|
|
||||||
ENV LLAMA_ARG_HOST=0.0.0.0
|
|
||||||
|
|
||||||
COPY --from=build /app/lib/ /app
|
|
||||||
COPY --from=build /app/full/llama-server /app
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/app/llama-server" ]
|
|
||||||
|
|
|
@ -1,44 +0,0 @@
|
||||||
ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8
|
|
||||||
|
|
||||||
FROM ascendai/cann:$ASCEND_VERSION AS build
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
RUN yum install -y gcc g++ cmake make
|
|
||||||
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
|
||||||
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
|
||||||
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
|
||||||
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
|
|
||||||
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
|
|
||||||
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
|
|
||||||
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
|
||||||
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
|
|
||||||
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
|
|
||||||
|
|
||||||
# find libascend_hal.so, because the drive hasn`t been mounted.
|
|
||||||
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
|
|
||||||
|
|
||||||
RUN echo "Building with static libs" && \
|
|
||||||
source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
|
|
||||||
cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \
|
|
||||||
cmake --build build --config Release --target llama-cli
|
|
||||||
|
|
||||||
# TODO: use image with NNRT
|
|
||||||
FROM ascendai/cann:$ASCEND_VERSION AS runtime
|
|
||||||
COPY --from=build /app/build/bin/llama-cli /llama-cli
|
|
||||||
|
|
||||||
ENV LC_ALL=C.utf8
|
|
||||||
|
|
||||||
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
|
||||||
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
|
||||||
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
|
||||||
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
|
|
||||||
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
|
|
||||||
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
|
|
||||||
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
|
||||||
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
|
|
||||||
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
|
|
||||||
|
|
||||||
ENTRYPOINT ["/llama-cli" ]
|
|
35
.devops/llama-cli-cuda.Dockerfile
Normal file
35
.devops/llama-cli-cuda.Dockerfile
Normal file
|
@ -0,0 +1,35 @@
|
||||||
|
ARG UBUNTU_VERSION=22.04
|
||||||
|
# This needs to generally match the container host's environment.
|
||||||
|
ARG CUDA_VERSION=11.7.1
|
||||||
|
# Target the CUDA build image
|
||||||
|
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||||
|
# Target the CUDA runtime image
|
||||||
|
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
||||||
|
|
||||||
|
FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
||||||
|
|
||||||
|
# Unless otherwise specified, we make a fat build.
|
||||||
|
ARG CUDA_DOCKER_ARCH=all
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y build-essential git
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Set nvcc architecture
|
||||||
|
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||||
|
# Enable CUDA
|
||||||
|
ENV GGML_CUDA=1
|
||||||
|
|
||||||
|
RUN make -j$(nproc) llama-cli
|
||||||
|
|
||||||
|
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y libgomp1
|
||||||
|
|
||||||
|
COPY --from=build /app/llama-cli /llama-cli
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/llama-cli" ]
|
26
.devops/llama-cli-intel.Dockerfile
Normal file
26
.devops/llama-cli-intel.Dockerfile
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
|
||||||
|
|
||||||
|
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
|
||||||
|
|
||||||
|
ARG GGML_SYCL_F16=OFF
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y git
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
||||||
|
echo "GGML_SYCL_F16 is set" && \
|
||||||
|
export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
|
||||||
|
fi && \
|
||||||
|
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ${OPT_SYCL_F16} && \
|
||||||
|
cmake --build build --config Release --target llama-cli
|
||||||
|
|
||||||
|
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
|
||||||
|
|
||||||
|
COPY --from=build /app/build/bin/llama-cli /llama-cli
|
||||||
|
|
||||||
|
ENV LC_ALL=C.utf8
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/llama-cli" ]
|
45
.devops/llama-cli-rocm.Dockerfile
Normal file
45
.devops/llama-cli-rocm.Dockerfile
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
ARG UBUNTU_VERSION=22.04
|
||||||
|
|
||||||
|
# This needs to generally match the container host's environment.
|
||||||
|
ARG ROCM_VERSION=5.6
|
||||||
|
|
||||||
|
# Target the CUDA build image
|
||||||
|
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
||||||
|
|
||||||
|
FROM ${BASE_ROCM_DEV_CONTAINER} as build
|
||||||
|
|
||||||
|
# Unless otherwise specified, we make a fat build.
|
||||||
|
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
||||||
|
# This is mostly tied to rocBLAS supported archs.
|
||||||
|
ARG ROCM_DOCKER_ARCH=\
|
||||||
|
gfx803 \
|
||||||
|
gfx900 \
|
||||||
|
gfx906 \
|
||||||
|
gfx908 \
|
||||||
|
gfx90a \
|
||||||
|
gfx1010 \
|
||||||
|
gfx1030 \
|
||||||
|
gfx1100 \
|
||||||
|
gfx1101 \
|
||||||
|
gfx1102
|
||||||
|
|
||||||
|
COPY requirements.txt requirements.txt
|
||||||
|
COPY requirements requirements
|
||||||
|
|
||||||
|
RUN pip install --upgrade pip setuptools wheel \
|
||||||
|
&& pip install -r requirements.txt
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Set nvcc architecture
|
||||||
|
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
|
||||||
|
# Enable ROCm
|
||||||
|
ENV GGML_HIPBLAS=1
|
||||||
|
ENV CC=/opt/rocm/llvm/bin/clang
|
||||||
|
ENV CXX=/opt/rocm/llvm/bin/clang++
|
||||||
|
|
||||||
|
RUN make -j$(nproc) llama-cli
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-cli" ]
|
27
.devops/llama-cli-vulkan.Dockerfile
Normal file
27
.devops/llama-cli-vulkan.Dockerfile
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
ARG UBUNTU_VERSION=jammy
|
||||||
|
|
||||||
|
FROM ubuntu:$UBUNTU_VERSION as build
|
||||||
|
|
||||||
|
# Install build tools
|
||||||
|
RUN apt update && apt install -y git build-essential cmake wget libgomp1
|
||||||
|
|
||||||
|
# Install Vulkan SDK
|
||||||
|
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
||||||
|
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
||||||
|
apt update -y && \
|
||||||
|
apt-get install -y vulkan-sdk
|
||||||
|
|
||||||
|
# Build it
|
||||||
|
WORKDIR /app
|
||||||
|
COPY . .
|
||||||
|
RUN cmake -B build -DGGML_VULKAN=1 && \
|
||||||
|
cmake --build build --config Release --target llama-cli
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
WORKDIR /
|
||||||
|
RUN cp /app/build/bin/llama-cli /llama-cli && \
|
||||||
|
rm -rf /app
|
||||||
|
|
||||||
|
ENV LC_ALL=C.utf8
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/llama-cli" ]
|
23
.devops/llama-cli.Dockerfile
Normal file
23
.devops/llama-cli.Dockerfile
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
ARG UBUNTU_VERSION=22.04
|
||||||
|
|
||||||
|
FROM ubuntu:$UBUNTU_VERSION as build
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y build-essential git
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN make -j$(nproc) llama-cli
|
||||||
|
|
||||||
|
FROM ubuntu:$UBUNTU_VERSION as runtime
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y libgomp1
|
||||||
|
|
||||||
|
COPY --from=build /app/llama-cli /llama-cli
|
||||||
|
|
||||||
|
ENV LC_ALL=C.utf8
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/llama-cli" ]
|
39
.devops/llama-server-cuda.Dockerfile
Normal file
39
.devops/llama-server-cuda.Dockerfile
Normal file
|
@ -0,0 +1,39 @@
|
||||||
|
ARG UBUNTU_VERSION=22.04
|
||||||
|
# This needs to generally match the container host's environment.
|
||||||
|
ARG CUDA_VERSION=11.7.1
|
||||||
|
# Target the CUDA build image
|
||||||
|
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
||||||
|
# Target the CUDA runtime image
|
||||||
|
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
||||||
|
|
||||||
|
FROM ${BASE_CUDA_DEV_CONTAINER} as build
|
||||||
|
|
||||||
|
# Unless otherwise specified, we make a fat build.
|
||||||
|
ARG CUDA_DOCKER_ARCH=all
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y build-essential git libcurl4-openssl-dev
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Set nvcc architecture
|
||||||
|
ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
|
||||||
|
# Enable CUDA
|
||||||
|
ENV GGML_CUDA=1
|
||||||
|
# Enable cURL
|
||||||
|
ENV LLAMA_CURL=1
|
||||||
|
|
||||||
|
RUN make -j$(nproc) llama-server
|
||||||
|
|
||||||
|
FROM ${BASE_CUDA_RUN_CONTAINER} as runtime
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y libcurl4-openssl-dev libgomp1 curl
|
||||||
|
|
||||||
|
COPY --from=build /app/llama-server /llama-server
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/llama-server" ]
|
31
.devops/llama-server-intel.Dockerfile
Normal file
31
.devops/llama-server-intel.Dockerfile
Normal file
|
@ -0,0 +1,31 @@
|
||||||
|
ARG ONEAPI_VERSION=2024.1.1-devel-ubuntu22.04
|
||||||
|
|
||||||
|
FROM intel/oneapi-basekit:$ONEAPI_VERSION as build
|
||||||
|
|
||||||
|
ARG GGML_SYCL_F16=OFF
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y git libcurl4-openssl-dev
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
||||||
|
echo "GGML_SYCL_F16 is set" && \
|
||||||
|
export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
|
||||||
|
fi && \
|
||||||
|
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
|
||||||
|
cmake --build build --config Release --target llama-server
|
||||||
|
|
||||||
|
FROM intel/oneapi-basekit:$ONEAPI_VERSION as runtime
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y libcurl4-openssl-dev curl
|
||||||
|
|
||||||
|
COPY --from=build /app/build/bin/llama-server /llama-server
|
||||||
|
|
||||||
|
ENV LC_ALL=C.utf8
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/llama-server" ]
|
52
.devops/llama-server-rocm.Dockerfile
Normal file
52
.devops/llama-server-rocm.Dockerfile
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
ARG UBUNTU_VERSION=22.04
|
||||||
|
|
||||||
|
# This needs to generally match the container host's environment.
|
||||||
|
ARG ROCM_VERSION=5.6
|
||||||
|
|
||||||
|
# Target the CUDA build image
|
||||||
|
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
||||||
|
|
||||||
|
FROM ${BASE_ROCM_DEV_CONTAINER} as build
|
||||||
|
|
||||||
|
# Unless otherwise specified, we make a fat build.
|
||||||
|
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
||||||
|
# This is mostly tied to rocBLAS supported archs.
|
||||||
|
ARG ROCM_DOCKER_ARCH=\
|
||||||
|
gfx803 \
|
||||||
|
gfx900 \
|
||||||
|
gfx906 \
|
||||||
|
gfx908 \
|
||||||
|
gfx90a \
|
||||||
|
gfx1010 \
|
||||||
|
gfx1030 \
|
||||||
|
gfx1100 \
|
||||||
|
gfx1101 \
|
||||||
|
gfx1102
|
||||||
|
|
||||||
|
COPY requirements.txt requirements.txt
|
||||||
|
COPY requirements requirements
|
||||||
|
|
||||||
|
RUN pip install --upgrade pip setuptools wheel \
|
||||||
|
&& pip install -r requirements.txt
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
# Set nvcc architecture
|
||||||
|
ENV GPU_TARGETS=${ROCM_DOCKER_ARCH}
|
||||||
|
# Enable ROCm
|
||||||
|
ENV GGML_HIPBLAS=1
|
||||||
|
ENV CC=/opt/rocm/llvm/bin/clang
|
||||||
|
ENV CXX=/opt/rocm/llvm/bin/clang++
|
||||||
|
|
||||||
|
# Enable cURL
|
||||||
|
ENV LLAMA_CURL=1
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y libcurl4-openssl-dev curl
|
||||||
|
|
||||||
|
RUN make -j$(nproc) llama-server
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/app/llama-server" ]
|
29
.devops/llama-server-vulkan.Dockerfile
Normal file
29
.devops/llama-server-vulkan.Dockerfile
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
ARG UBUNTU_VERSION=jammy
|
||||||
|
|
||||||
|
FROM ubuntu:$UBUNTU_VERSION as build
|
||||||
|
|
||||||
|
# Install build tools
|
||||||
|
RUN apt update && apt install -y git build-essential cmake wget
|
||||||
|
|
||||||
|
# Install Vulkan SDK and cURL
|
||||||
|
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
||||||
|
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
||||||
|
apt update -y && \
|
||||||
|
apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
|
||||||
|
|
||||||
|
# Build it
|
||||||
|
WORKDIR /app
|
||||||
|
COPY . .
|
||||||
|
RUN cmake -B build -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
|
||||||
|
cmake --build build --config Release --target llama-server
|
||||||
|
|
||||||
|
# Clean up
|
||||||
|
WORKDIR /
|
||||||
|
RUN cp /app/build/bin/llama-server /llama-server && \
|
||||||
|
rm -rf /app
|
||||||
|
|
||||||
|
ENV LC_ALL=C.utf8
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/llama-server" ]
|
27
.devops/llama-server.Dockerfile
Normal file
27
.devops/llama-server.Dockerfile
Normal file
|
@ -0,0 +1,27 @@
|
||||||
|
ARG UBUNTU_VERSION=22.04
|
||||||
|
|
||||||
|
FROM ubuntu:$UBUNTU_VERSION as build
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y build-essential git libcurl4-openssl-dev curl
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
ENV LLAMA_CURL=1
|
||||||
|
|
||||||
|
RUN make -j$(nproc) llama-server
|
||||||
|
|
||||||
|
FROM ubuntu:$UBUNTU_VERSION as runtime
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y libcurl4-openssl-dev libgomp1
|
||||||
|
|
||||||
|
COPY --from=build /app/llama-server /llama-server
|
||||||
|
|
||||||
|
ENV LC_ALL=C.utf8
|
||||||
|
|
||||||
|
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
||||||
|
|
||||||
|
ENTRYPOINT [ "/llama-server" ]
|
|
@ -1,108 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=22.04
|
|
||||||
# This needs to generally match the container host's environment.
|
|
||||||
ARG MUSA_VERSION=rc3.1.0
|
|
||||||
# Target the MUSA build image
|
|
||||||
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
|
||||||
|
|
||||||
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
|
||||||
|
|
||||||
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
|
|
||||||
|
|
||||||
# MUSA architecture to build for (defaults to all supported archs)
|
|
||||||
ARG MUSA_DOCKER_ARCH=default
|
|
||||||
|
|
||||||
RUN apt-get update && \
|
|
||||||
apt-get install -y \
|
|
||||||
build-essential \
|
|
||||||
cmake \
|
|
||||||
python3 \
|
|
||||||
python3-pip \
|
|
||||||
git \
|
|
||||||
libcurl4-openssl-dev \
|
|
||||||
libgomp1
|
|
||||||
|
|
||||||
COPY requirements.txt requirements.txt
|
|
||||||
COPY requirements requirements
|
|
||||||
|
|
||||||
RUN pip install --upgrade pip setuptools wheel \
|
|
||||||
&& pip install -r requirements.txt
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
# Use the default MUSA archs if not specified
|
|
||||||
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
|
|
||||||
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
|
|
||||||
fi && \
|
|
||||||
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
|
||||||
cmake --build build --config Release -j$(nproc)
|
|
||||||
|
|
||||||
RUN mkdir -p /app/lib && \
|
|
||||||
find build -name "*.so" -exec cp {} /app/lib \;
|
|
||||||
|
|
||||||
RUN mkdir -p /app/full \
|
|
||||||
&& cp build/bin/* /app/full \
|
|
||||||
&& cp *.py /app/full \
|
|
||||||
&& cp -r gguf-py /app/full \
|
|
||||||
&& cp -r requirements /app/full \
|
|
||||||
&& cp requirements.txt /app/full \
|
|
||||||
&& cp .devops/tools.sh /app/full/tools.sh
|
|
||||||
|
|
||||||
## Base image
|
|
||||||
FROM ${BASE_MUSA_RUN_CONTAINER} AS base
|
|
||||||
|
|
||||||
RUN apt-get update \
|
|
||||||
&& apt-get install -y libgomp1 curl\
|
|
||||||
&& apt autoremove -y \
|
|
||||||
&& apt clean -y \
|
|
||||||
&& rm -rf /tmp/* /var/tmp/* \
|
|
||||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
|
||||||
&& find /var/cache -type f -delete
|
|
||||||
|
|
||||||
COPY --from=build /app/lib/ /app
|
|
||||||
|
|
||||||
### Full
|
|
||||||
FROM base AS full
|
|
||||||
|
|
||||||
COPY --from=build /app/full /app
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
RUN apt-get update \
|
|
||||||
&& apt-get install -y \
|
|
||||||
git \
|
|
||||||
python3 \
|
|
||||||
python3-pip \
|
|
||||||
&& pip install --upgrade pip setuptools wheel \
|
|
||||||
&& pip install -r requirements.txt \
|
|
||||||
&& apt autoremove -y \
|
|
||||||
&& apt clean -y \
|
|
||||||
&& rm -rf /tmp/* /var/tmp/* \
|
|
||||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
|
||||||
&& find /var/cache -type f -delete
|
|
||||||
|
|
||||||
|
|
||||||
ENTRYPOINT ["/app/tools.sh"]
|
|
||||||
|
|
||||||
### Light, CLI only
|
|
||||||
FROM base AS light
|
|
||||||
|
|
||||||
COPY --from=build /app/full/llama-cli /app
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/app/llama-cli" ]
|
|
||||||
|
|
||||||
### Server, Server only
|
|
||||||
FROM base AS server
|
|
||||||
|
|
||||||
ENV LLAMA_ARG_HOST=0.0.0.0
|
|
||||||
|
|
||||||
COPY --from=build /app/full/llama-server /app
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/app/llama-server" ]
|
|
|
@ -10,6 +10,7 @@
|
||||||
"llama-embedding"
|
"llama-embedding"
|
||||||
"llama-server"
|
"llama-server"
|
||||||
"llama-quantize"
|
"llama-quantize"
|
||||||
|
"llama-train-text-from-scratch"
|
||||||
];
|
];
|
||||||
mkApp = name: {
|
mkApp = name: {
|
||||||
type = "app";
|
type = "app";
|
||||||
|
|
|
@ -1,52 +1,13 @@
|
||||||
{ inputs, ... }:
|
|
||||||
|
|
||||||
{
|
{
|
||||||
perSystem =
|
perSystem =
|
||||||
{
|
{ config, lib, ... }:
|
||||||
config,
|
|
||||||
lib,
|
|
||||||
system,
|
|
||||||
...
|
|
||||||
}:
|
|
||||||
{
|
{
|
||||||
devShells =
|
devShells =
|
||||||
let
|
lib.concatMapAttrs
|
||||||
pkgs = import inputs.nixpkgs { inherit system; };
|
(name: package: {
|
||||||
stdenv = pkgs.stdenv;
|
${name} = package.passthru.shell;
|
||||||
scripts = config.packages.python-scripts;
|
${name + "-extra"} = package.passthru.shell-extra;
|
||||||
in
|
})
|
||||||
lib.pipe (config.packages) [
|
config.packages;
|
||||||
(lib.concatMapAttrs (
|
|
||||||
name: package: {
|
|
||||||
${name} = pkgs.mkShell {
|
|
||||||
name = "${name}";
|
|
||||||
inputsFrom = [ package ];
|
|
||||||
shellHook = ''
|
|
||||||
echo "Entering ${name} devShell"
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
"${name}-extra" =
|
|
||||||
if (name == "python-scripts") then
|
|
||||||
null
|
|
||||||
else
|
|
||||||
pkgs.mkShell {
|
|
||||||
name = "${name}-extra";
|
|
||||||
inputsFrom = [
|
|
||||||
package
|
|
||||||
scripts
|
|
||||||
];
|
|
||||||
# Extra packages that *may* be used by some scripts
|
|
||||||
packages = [
|
|
||||||
pkgs.python3Packages.tiktoken
|
|
||||||
];
|
|
||||||
shellHook = ''
|
|
||||||
echo "Entering ${name} devShell"
|
|
||||||
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
|
|
||||||
'';
|
|
||||||
};
|
|
||||||
}
|
|
||||||
))
|
|
||||||
(lib.filterAttrs (name: value: value != null))
|
|
||||||
];
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,14 +26,16 @@
|
||||||
config.cudaSupport = true;
|
config.cudaSupport = true;
|
||||||
config.allowUnfreePredicate =
|
config.allowUnfreePredicate =
|
||||||
p:
|
p:
|
||||||
builtins.all (
|
builtins.all
|
||||||
license:
|
(
|
||||||
license.free
|
license:
|
||||||
|| builtins.elem license.shortName [
|
license.free
|
||||||
"CUDA EULA"
|
|| builtins.elem license.shortName [
|
||||||
"cuDNN EULA"
|
"CUDA EULA"
|
||||||
]
|
"cuDNN EULA"
|
||||||
) (p.meta.licenses or [ p.meta.license ]);
|
]
|
||||||
|
)
|
||||||
|
(p.meta.licenses or [ p.meta.license ]);
|
||||||
};
|
};
|
||||||
# Ensure dependencies use ROCm consistently
|
# Ensure dependencies use ROCm consistently
|
||||||
pkgsRocm = import inputs.nixpkgs {
|
pkgsRocm = import inputs.nixpkgs {
|
||||||
|
|
|
@ -1,36 +0,0 @@
|
||||||
{
|
|
||||||
lib,
|
|
||||||
llamaVersion,
|
|
||||||
numpy,
|
|
||||||
tqdm,
|
|
||||||
sentencepiece,
|
|
||||||
pyyaml,
|
|
||||||
poetry-core,
|
|
||||||
buildPythonPackage,
|
|
||||||
pytestCheckHook,
|
|
||||||
}:
|
|
||||||
|
|
||||||
buildPythonPackage {
|
|
||||||
pname = "gguf";
|
|
||||||
version = llamaVersion;
|
|
||||||
pyproject = true;
|
|
||||||
nativeBuildInputs = [ poetry-core ];
|
|
||||||
propagatedBuildInputs = [
|
|
||||||
numpy
|
|
||||||
tqdm
|
|
||||||
sentencepiece
|
|
||||||
pyyaml
|
|
||||||
];
|
|
||||||
src = lib.cleanSource ../../gguf-py;
|
|
||||||
pythonImportsCheck = [
|
|
||||||
"numpy"
|
|
||||||
"gguf"
|
|
||||||
];
|
|
||||||
nativeCheckInputs = [ pytestCheckHook ];
|
|
||||||
doCheck = true;
|
|
||||||
meta = with lib; {
|
|
||||||
description = "Python package for writing binary files in the GGUF format";
|
|
||||||
license = licenses.mit;
|
|
||||||
maintainers = [ maintainers.ditsuke ];
|
|
||||||
};
|
|
||||||
}
|
|
|
@ -3,35 +3,31 @@
|
||||||
glibc,
|
glibc,
|
||||||
config,
|
config,
|
||||||
stdenv,
|
stdenv,
|
||||||
|
mkShell,
|
||||||
runCommand,
|
runCommand,
|
||||||
cmake,
|
cmake,
|
||||||
ninja,
|
ninja,
|
||||||
pkg-config,
|
pkg-config,
|
||||||
git,
|
git,
|
||||||
|
python3,
|
||||||
mpi,
|
mpi,
|
||||||
blas,
|
blas,
|
||||||
cudaPackages,
|
cudaPackages,
|
||||||
autoAddDriverRunpath,
|
|
||||||
darwin,
|
darwin,
|
||||||
rocmPackages,
|
rocmPackages,
|
||||||
vulkan-headers,
|
vulkan-headers,
|
||||||
vulkan-loader,
|
vulkan-loader,
|
||||||
curl,
|
curl,
|
||||||
shaderc,
|
useBlas ? builtins.all (x: !x) [
|
||||||
useBlas ?
|
useCuda
|
||||||
builtins.all (x: !x) [
|
useMetalKit
|
||||||
useCuda
|
useRocm
|
||||||
useMetalKit
|
useVulkan
|
||||||
useRocm
|
] && blas.meta.available,
|
||||||
useVulkan
|
|
||||||
]
|
|
||||||
&& blas.meta.available,
|
|
||||||
useCuda ? config.cudaSupport,
|
useCuda ? config.cudaSupport,
|
||||||
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
|
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
|
||||||
# Increases the runtime closure size by ~700M
|
useMpi ? false, # Increases the runtime closure size by ~700M
|
||||||
useMpi ? false,
|
|
||||||
useRocm ? config.rocmSupport,
|
useRocm ? config.rocmSupport,
|
||||||
rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
|
|
||||||
enableCurl ? true,
|
enableCurl ? true,
|
||||||
useVulkan ? false,
|
useVulkan ? false,
|
||||||
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
|
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
|
||||||
|
@ -40,8 +36,8 @@
|
||||||
# otherwise we get libstdc++ errors downstream.
|
# otherwise we get libstdc++ errors downstream.
|
||||||
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
|
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
|
||||||
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
|
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
|
||||||
precompileMetalShaders ? false,
|
precompileMetalShaders ? false
|
||||||
}:
|
}@inputs:
|
||||||
|
|
||||||
let
|
let
|
||||||
inherit (lib)
|
inherit (lib)
|
||||||
|
@ -49,6 +45,7 @@ let
|
||||||
cmakeFeature
|
cmakeFeature
|
||||||
optionals
|
optionals
|
||||||
strings
|
strings
|
||||||
|
versionOlder
|
||||||
;
|
;
|
||||||
|
|
||||||
stdenv = throw "Use effectiveStdenv instead";
|
stdenv = throw "Use effectiveStdenv instead";
|
||||||
|
@ -64,11 +61,38 @@ let
|
||||||
pnameSuffix =
|
pnameSuffix =
|
||||||
strings.optionalString (suffices != [ ])
|
strings.optionalString (suffices != [ ])
|
||||||
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
|
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
|
||||||
descriptionSuffix = strings.optionalString (
|
descriptionSuffix =
|
||||||
suffices != [ ]
|
strings.optionalString (suffices != [ ])
|
||||||
) ", accelerated with ${strings.concatStringsSep ", " suffices}";
|
", accelerated with ${strings.concatStringsSep ", " suffices}";
|
||||||
|
|
||||||
xcrunHost = runCommand "xcrunHost" { } ''
|
executableSuffix = effectiveStdenv.hostPlatform.extensions.executable;
|
||||||
|
|
||||||
|
# TODO: package the Python in this repository in a Nix-like way.
|
||||||
|
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
|
||||||
|
# is PEP 517-compatible, and ensure the correct .dist-info is generated.
|
||||||
|
# https://peps.python.org/pep-0517/
|
||||||
|
#
|
||||||
|
# TODO: Package up each Python script or service appropriately, by making
|
||||||
|
# them into "entrypoints"
|
||||||
|
llama-python = python3.withPackages (
|
||||||
|
ps: [
|
||||||
|
ps.numpy
|
||||||
|
ps.sentencepiece
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
|
||||||
|
llama-python-extra = python3.withPackages (
|
||||||
|
ps: [
|
||||||
|
ps.numpy
|
||||||
|
ps.sentencepiece
|
||||||
|
ps.tiktoken
|
||||||
|
ps.torchWithoutCuda
|
||||||
|
ps.transformers
|
||||||
|
]
|
||||||
|
);
|
||||||
|
|
||||||
|
xcrunHost = runCommand "xcrunHost" {} ''
|
||||||
mkdir -p $out/bin
|
mkdir -p $out/bin
|
||||||
ln -s /usr/bin/xcrun $out/bin
|
ln -s /usr/bin/xcrun $out/bin
|
||||||
'';
|
'';
|
||||||
|
@ -85,9 +109,16 @@ let
|
||||||
++ optionals useMetalKit [ MetalKit ];
|
++ optionals useMetalKit [ MetalKit ];
|
||||||
|
|
||||||
cudaBuildInputs = with cudaPackages; [
|
cudaBuildInputs = with cudaPackages; [
|
||||||
cuda_cudart
|
cuda_cccl.dev # <nv/target>
|
||||||
cuda_cccl # <nv/target>
|
|
||||||
libcublas
|
# A temporary hack for reducing the closure size, remove once cudaPackages
|
||||||
|
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
|
||||||
|
cuda_cudart.dev
|
||||||
|
cuda_cudart.lib
|
||||||
|
cuda_cudart.static
|
||||||
|
libcublas.dev
|
||||||
|
libcublas.lib
|
||||||
|
libcublas.static
|
||||||
];
|
];
|
||||||
|
|
||||||
rocmBuildInputs = with rocmPackages; [
|
rocmBuildInputs = with rocmPackages; [
|
||||||
|
@ -99,149 +130,184 @@ let
|
||||||
vulkanBuildInputs = [
|
vulkanBuildInputs = [
|
||||||
vulkan-headers
|
vulkan-headers
|
||||||
vulkan-loader
|
vulkan-loader
|
||||||
shaderc
|
|
||||||
];
|
];
|
||||||
in
|
in
|
||||||
|
|
||||||
effectiveStdenv.mkDerivation (finalAttrs: {
|
effectiveStdenv.mkDerivation (
|
||||||
pname = "llama-cpp${pnameSuffix}";
|
finalAttrs: {
|
||||||
version = llamaVersion;
|
pname = "llama-cpp${pnameSuffix}";
|
||||||
|
version = llamaVersion;
|
||||||
|
|
||||||
# Note: none of the files discarded here are visible in the sandbox or
|
# Note: none of the files discarded here are visible in the sandbox or
|
||||||
# affect the output hash. This also means they can be modified without
|
# affect the output hash. This also means they can be modified without
|
||||||
# triggering a rebuild.
|
# triggering a rebuild.
|
||||||
src = lib.cleanSourceWith {
|
src = lib.cleanSourceWith {
|
||||||
filter =
|
filter =
|
||||||
name: type:
|
name: type:
|
||||||
let
|
let
|
||||||
noneOf = builtins.all (x: !x);
|
noneOf = builtins.all (x: !x);
|
||||||
baseName = baseNameOf name;
|
baseName = baseNameOf name;
|
||||||
in
|
in
|
||||||
noneOf [
|
noneOf [
|
||||||
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
|
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
|
||||||
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
|
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
|
||||||
(lib.hasPrefix "." baseName) # Skip hidden files and directories
|
(lib.hasPrefix "." baseName) # Skip hidden files and directories
|
||||||
(baseName == "flake.lock")
|
(baseName == "flake.lock")
|
||||||
|
];
|
||||||
|
src = lib.cleanSource ../../.;
|
||||||
|
};
|
||||||
|
|
||||||
|
postPatch = ''
|
||||||
|
substituteInPlace ./ggml/src/ggml-metal.m \
|
||||||
|
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
||||||
|
substituteInPlace ./ggml/src/ggml-metal.m \
|
||||||
|
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
||||||
|
'';
|
||||||
|
|
||||||
|
# With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
|
||||||
|
# `default.metallib` may be compiled with Metal compiler from XCode
|
||||||
|
# and we need to escape sandbox on MacOS to access Metal compiler.
|
||||||
|
# `xcrun` is used find the path of the Metal compiler, which is varible
|
||||||
|
# and not on $PATH
|
||||||
|
# see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
|
||||||
|
__noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
|
||||||
|
|
||||||
|
nativeBuildInputs =
|
||||||
|
[
|
||||||
|
cmake
|
||||||
|
ninja
|
||||||
|
pkg-config
|
||||||
|
git
|
||||||
|
]
|
||||||
|
++ optionals useCuda [
|
||||||
|
cudaPackages.cuda_nvcc
|
||||||
|
|
||||||
|
# TODO: Replace with autoAddDriverRunpath
|
||||||
|
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
|
||||||
|
cudaPackages.autoAddOpenGLRunpathHook
|
||||||
|
]
|
||||||
|
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [
|
||||||
|
glibc.static
|
||||||
|
] ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [
|
||||||
|
xcrunHost
|
||||||
];
|
];
|
||||||
src = lib.cleanSource ../../.;
|
|
||||||
};
|
|
||||||
|
|
||||||
postPatch = ''
|
buildInputs =
|
||||||
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
|
optionals effectiveStdenv.isDarwin darwinBuildInputs
|
||||||
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
++ optionals useCuda cudaBuildInputs
|
||||||
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
|
++ optionals useMpi [ mpi ]
|
||||||
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
++ optionals useRocm rocmBuildInputs
|
||||||
'';
|
++ optionals useBlas [ blas ]
|
||||||
|
++ optionals useVulkan vulkanBuildInputs
|
||||||
|
++ optionals enableCurl [ curl ];
|
||||||
|
|
||||||
# With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
|
cmakeFlags =
|
||||||
# `default.metallib` may be compiled with Metal compiler from XCode
|
[
|
||||||
# and we need to escape sandbox on MacOS to access Metal compiler.
|
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
||||||
# `xcrun` is used find the path of the Metal compiler, which is varible
|
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
|
||||||
# and not on $PATH
|
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
||||||
# see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
|
(cmakeBool "LLAMA_CURL" enableCurl)
|
||||||
__noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
|
(cmakeBool "GGML_NATIVE" false)
|
||||||
|
(cmakeBool "GGML_BLAS" useBlas)
|
||||||
nativeBuildInputs =
|
(cmakeBool "GGML_CUDA" useCuda)
|
||||||
[
|
(cmakeBool "GGML_HIPBLAS" useRocm)
|
||||||
cmake
|
(cmakeBool "GGML_METAL" useMetalKit)
|
||||||
ninja
|
(cmakeBool "GGML_VULKAN" useVulkan)
|
||||||
pkg-config
|
(cmakeBool "GGML_STATIC" enableStatic)
|
||||||
git
|
]
|
||||||
]
|
++ optionals useCuda [
|
||||||
++ optionals useCuda [
|
(
|
||||||
cudaPackages.cuda_nvcc
|
with cudaPackages.flags;
|
||||||
|
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
|
||||||
autoAddDriverRunpath
|
builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
|
||||||
]
|
)
|
||||||
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
|
|
||||||
++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
|
|
||||||
|
|
||||||
buildInputs =
|
|
||||||
optionals effectiveStdenv.isDarwin darwinBuildInputs
|
|
||||||
++ optionals useCuda cudaBuildInputs
|
|
||||||
++ optionals useMpi [ mpi ]
|
|
||||||
++ optionals useRocm rocmBuildInputs
|
|
||||||
++ optionals useBlas [ blas ]
|
|
||||||
++ optionals useVulkan vulkanBuildInputs
|
|
||||||
++ optionals enableCurl [ curl ];
|
|
||||||
|
|
||||||
cmakeFlags =
|
|
||||||
[
|
|
||||||
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
|
||||||
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
|
|
||||||
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
|
||||||
(cmakeBool "LLAMA_CURL" enableCurl)
|
|
||||||
(cmakeBool "GGML_NATIVE" false)
|
|
||||||
(cmakeBool "GGML_BLAS" useBlas)
|
|
||||||
(cmakeBool "GGML_CUDA" useCuda)
|
|
||||||
(cmakeBool "GGML_HIP" useRocm)
|
|
||||||
(cmakeBool "GGML_METAL" useMetalKit)
|
|
||||||
(cmakeBool "GGML_VULKAN" useVulkan)
|
|
||||||
(cmakeBool "GGML_STATIC" enableStatic)
|
|
||||||
]
|
|
||||||
++ optionals useCuda [
|
|
||||||
(
|
|
||||||
with cudaPackages.flags;
|
|
||||||
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
|
|
||||||
builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
|
|
||||||
)
|
)
|
||||||
)
|
]
|
||||||
]
|
++ optionals useRocm [
|
||||||
++ optionals useRocm [
|
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
|
||||||
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
|
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
|
||||||
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
|
]
|
||||||
]
|
++ optionals useMetalKit [
|
||||||
++ optionals useMetalKit [
|
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
||||||
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
(cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
|
||||||
(cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
|
];
|
||||||
];
|
|
||||||
|
|
||||||
# Environment variables needed for ROCm
|
# Environment variables needed for ROCm
|
||||||
env = optionals useRocm {
|
env = optionals useRocm {
|
||||||
ROCM_PATH = "${rocmPackages.clr}";
|
ROCM_PATH = "${rocmPackages.clr}";
|
||||||
HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
|
HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
|
||||||
};
|
};
|
||||||
|
|
||||||
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
||||||
# if they haven't been added yet.
|
# if they haven't been added yet.
|
||||||
postInstall = ''
|
postInstall = ''
|
||||||
mkdir -p $out/include
|
mkdir -p $out/include
|
||||||
cp $src/include/llama.h $out/include/
|
cp $src/include/llama.h $out/include/
|
||||||
'';
|
'';
|
||||||
|
|
||||||
meta = {
|
# Define the shells here, but don't add in the inputsFrom to avoid recursion.
|
||||||
# Configurations we don't want even the CI to evaluate. Results in the
|
passthru = {
|
||||||
# "unsupported platform" messages. This is mostly a no-op, because
|
inherit
|
||||||
# cudaPackages would've refused to evaluate anyway.
|
useBlas
|
||||||
badPlatforms = optionals useCuda lib.platforms.darwin;
|
useCuda
|
||||||
|
useMetalKit
|
||||||
|
useMpi
|
||||||
|
useRocm
|
||||||
|
useVulkan
|
||||||
|
;
|
||||||
|
|
||||||
# Configurations that are known to result in build failures. Can be
|
shell = mkShell {
|
||||||
# overridden by importing Nixpkgs with `allowBroken = true`.
|
name = "shell-${finalAttrs.finalPackage.name}";
|
||||||
broken = (useMetalKit && !effectiveStdenv.isDarwin);
|
description = "contains numpy and sentencepiece";
|
||||||
|
buildInputs = [ llama-python ];
|
||||||
|
inputsFrom = [ finalAttrs.finalPackage ];
|
||||||
|
shellHook = ''
|
||||||
|
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
|
||||||
|
'';
|
||||||
|
};
|
||||||
|
|
||||||
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
|
shell-extra = mkShell {
|
||||||
homepage = "https://github.com/ggerganov/llama.cpp/";
|
name = "shell-extra-${finalAttrs.finalPackage.name}";
|
||||||
license = lib.licenses.mit;
|
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
|
||||||
|
buildInputs = [ llama-python-extra ];
|
||||||
|
inputsFrom = [ finalAttrs.finalPackage ];
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
# Accommodates `nix run` and `lib.getExe`
|
meta = {
|
||||||
mainProgram = "llama-cli";
|
# Configurations we don't want even the CI to evaluate. Results in the
|
||||||
|
# "unsupported platform" messages. This is mostly a no-op, because
|
||||||
|
# cudaPackages would've refused to evaluate anyway.
|
||||||
|
badPlatforms = optionals useCuda lib.platforms.darwin;
|
||||||
|
|
||||||
# These people might respond, on the best effort basis, if you ping them
|
# Configurations that are known to result in build failures. Can be
|
||||||
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
# overridden by importing Nixpkgs with `allowBroken = true`.
|
||||||
# Consider adding yourself to this list if you want to ensure this flake
|
broken = (useMetalKit && !effectiveStdenv.isDarwin);
|
||||||
# stays maintained and you're willing to invest your time. Do not add
|
|
||||||
# other people without their consent. Consider removing people after
|
|
||||||
# they've been unreachable for long periods of time.
|
|
||||||
|
|
||||||
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
|
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
|
||||||
# an attrset following the same format as in
|
homepage = "https://github.com/ggerganov/llama.cpp/";
|
||||||
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
|
license = lib.licenses.mit;
|
||||||
maintainers = with lib.maintainers; [
|
|
||||||
philiptaron
|
|
||||||
SomeoneSerge
|
|
||||||
];
|
|
||||||
|
|
||||||
# Extend `badPlatforms` instead
|
# Accommodates `nix run` and `lib.getExe`
|
||||||
platforms = lib.platforms.all;
|
mainProgram = "llama-cli";
|
||||||
};
|
|
||||||
})
|
# These people might respond, on the best effort basis, if you ping them
|
||||||
|
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
||||||
|
# Consider adding yourself to this list if you want to ensure this flake
|
||||||
|
# stays maintained and you're willing to invest your time. Do not add
|
||||||
|
# other people without their consent. Consider removing people after
|
||||||
|
# they've been unreachable for long periods of time.
|
||||||
|
|
||||||
|
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
|
||||||
|
# an attrset following the same format as in
|
||||||
|
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
|
||||||
|
maintainers = with lib.maintainers; [
|
||||||
|
philiptaron
|
||||||
|
SomeoneSerge
|
||||||
|
];
|
||||||
|
|
||||||
|
# Extend `badPlatforms` instead
|
||||||
|
platforms = lib.platforms.all;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
|
@ -1,66 +0,0 @@
|
||||||
{
|
|
||||||
lib,
|
|
||||||
stdenv,
|
|
||||||
buildPythonPackage,
|
|
||||||
poetry-core,
|
|
||||||
mkShell,
|
|
||||||
python3Packages,
|
|
||||||
gguf-py,
|
|
||||||
}@inputs:
|
|
||||||
|
|
||||||
let
|
|
||||||
llama-python-deps = with python3Packages; [
|
|
||||||
numpy
|
|
||||||
sentencepiece
|
|
||||||
transformers
|
|
||||||
protobuf
|
|
||||||
torchWithoutCuda
|
|
||||||
gguf-py
|
|
||||||
tqdm
|
|
||||||
|
|
||||||
# for scripts/compare-llama-bench.py
|
|
||||||
gitpython
|
|
||||||
tabulate
|
|
||||||
|
|
||||||
# for examples/pydantic-models-to-grammar-examples.py
|
|
||||||
docstring-parser
|
|
||||||
pydantic
|
|
||||||
|
|
||||||
];
|
|
||||||
|
|
||||||
llama-python-test-deps = with python3Packages; [
|
|
||||||
# Server bench
|
|
||||||
matplotlib
|
|
||||||
|
|
||||||
# server tests
|
|
||||||
openai
|
|
||||||
pytest
|
|
||||||
prometheus-client
|
|
||||||
];
|
|
||||||
in
|
|
||||||
|
|
||||||
buildPythonPackage ({
|
|
||||||
pname = "llama-scripts";
|
|
||||||
version = "0.0.0";
|
|
||||||
pyproject = true;
|
|
||||||
|
|
||||||
# NOTE: The files filtered out here are not visible in the build sandbox, neither
|
|
||||||
# do they affect the output hash. They can be modified without triggering a rebuild.
|
|
||||||
src = lib.cleanSourceWith {
|
|
||||||
filter =
|
|
||||||
name: type:
|
|
||||||
let
|
|
||||||
any = builtins.any (x: x);
|
|
||||||
baseName = builtins.baseNameOf name;
|
|
||||||
in
|
|
||||||
any [
|
|
||||||
(lib.hasSuffix ".py" name)
|
|
||||||
(baseName == "README.md")
|
|
||||||
(baseName == "pyproject.toml")
|
|
||||||
];
|
|
||||||
src = lib.cleanSource ../../.;
|
|
||||||
};
|
|
||||||
nativeBuildInputs = [ poetry-core ];
|
|
||||||
nativeCheckInputs = llama-python-test-deps;
|
|
||||||
dependencies = llama-python-deps;
|
|
||||||
})
|
|
|
@ -1,41 +1,19 @@
|
||||||
{
|
{
|
||||||
lib,
|
lib,
|
||||||
newScope,
|
newScope,
|
||||||
python3,
|
|
||||||
llamaVersion ? "0.0.0",
|
llamaVersion ? "0.0.0",
|
||||||
}:
|
}:
|
||||||
|
|
||||||
let
|
|
||||||
pythonPackages = python3.pkgs;
|
|
||||||
buildPythonPackage = pythonPackages.buildPythonPackage;
|
|
||||||
numpy = pythonPackages.numpy;
|
|
||||||
tqdm = pythonPackages.tqdm;
|
|
||||||
sentencepiece = pythonPackages.sentencepiece;
|
|
||||||
pyyaml = pythonPackages.pyyaml;
|
|
||||||
poetry-core = pythonPackages.poetry-core;
|
|
||||||
pytestCheckHook = pythonPackages.pytestCheckHook;
|
|
||||||
in
|
|
||||||
|
|
||||||
# We're using `makeScope` instead of just writing out an attrset
|
# We're using `makeScope` instead of just writing out an attrset
|
||||||
# because it allows users to apply overlays later using `overrideScope'`.
|
# because it allows users to apply overlays later using `overrideScope'`.
|
||||||
# Cf. https://noogle.dev/f/lib/makeScope
|
# Cf. https://noogle.dev/f/lib/makeScope
|
||||||
|
|
||||||
lib.makeScope newScope (self: {
|
lib.makeScope newScope (
|
||||||
inherit llamaVersion;
|
self: {
|
||||||
gguf-py = self.callPackage ./package-gguf-py.nix {
|
inherit llamaVersion;
|
||||||
inherit
|
llama-cpp = self.callPackage ./package.nix { };
|
||||||
buildPythonPackage
|
docker = self.callPackage ./docker.nix { };
|
||||||
numpy
|
docker-min = self.callPackage ./docker.nix { interactive = false; };
|
||||||
tqdm
|
sif = self.callPackage ./sif.nix { };
|
||||||
sentencepiece
|
}
|
||||||
poetry-core
|
)
|
||||||
pyyaml
|
|
||||||
pytestCheckHook
|
|
||||||
;
|
|
||||||
};
|
|
||||||
python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; };
|
|
||||||
llama-cpp = self.callPackage ./package.nix { };
|
|
||||||
docker = self.callPackage ./docker.nix { };
|
|
||||||
docker-min = self.callPackage ./docker.nix { interactive = false; };
|
|
||||||
sif = self.callPackage ./sif.nix { };
|
|
||||||
})
|
|
||||||
|
|
|
@ -1,113 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=24.04
|
|
||||||
|
|
||||||
# This needs to generally match the container host's environment.
|
|
||||||
ARG ROCM_VERSION=6.3
|
|
||||||
ARG AMDGPU_VERSION=6.3
|
|
||||||
|
|
||||||
# Target the CUDA build image
|
|
||||||
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
|
||||||
|
|
||||||
### Build image
|
|
||||||
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
|
|
||||||
|
|
||||||
# Unless otherwise specified, we make a fat build.
|
|
||||||
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
|
||||||
# This is mostly tied to rocBLAS supported archs.
|
|
||||||
# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
|
|
||||||
# gfx906 is deprecated
|
|
||||||
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
|
|
||||||
|
|
||||||
#ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
|
|
||||||
ARG ROCM_DOCKER_ARCH=gfx1100
|
|
||||||
|
|
||||||
# Set nvcc architectured
|
|
||||||
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
|
|
||||||
# Enable ROCm
|
|
||||||
# ENV CC=/opt/rocm/llvm/bin/clang
|
|
||||||
# ENV CXX=/opt/rocm/llvm/bin/clang++
|
|
||||||
|
|
||||||
RUN apt-get update \
|
|
||||||
&& apt-get install -y \
|
|
||||||
build-essential \
|
|
||||||
cmake \
|
|
||||||
git \
|
|
||||||
libcurl4-openssl-dev \
|
|
||||||
curl \
|
|
||||||
libgomp1
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
|
|
||||||
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \
|
|
||||||
&& cmake --build build --config Release -j$(nproc)
|
|
||||||
|
|
||||||
RUN mkdir -p /app/lib \
|
|
||||||
&& find build -name "*.so" -exec cp {} /app/lib \;
|
|
||||||
|
|
||||||
RUN mkdir -p /app/full \
|
|
||||||
&& cp build/bin/* /app/full \
|
|
||||||
&& cp *.py /app/full \
|
|
||||||
&& cp -r gguf-py /app/full \
|
|
||||||
&& cp -r requirements /app/full \
|
|
||||||
&& cp requirements.txt /app/full \
|
|
||||||
&& cp .devops/tools.sh /app/full/tools.sh
|
|
||||||
|
|
||||||
## Base image
|
|
||||||
FROM ${BASE_ROCM_DEV_CONTAINER} AS base
|
|
||||||
|
|
||||||
RUN apt-get update \
|
|
||||||
&& apt-get install -y libgomp1 curl\
|
|
||||||
&& apt autoremove -y \
|
|
||||||
&& apt clean -y \
|
|
||||||
&& rm -rf /tmp/* /var/tmp/* \
|
|
||||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
|
||||||
&& find /var/cache -type f -delete
|
|
||||||
|
|
||||||
COPY --from=build /app/lib/ /app
|
|
||||||
|
|
||||||
### Full
|
|
||||||
FROM base AS full
|
|
||||||
|
|
||||||
COPY --from=build /app/full /app
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
RUN apt-get update \
|
|
||||||
&& apt-get install -y \
|
|
||||||
git \
|
|
||||||
python3-pip \
|
|
||||||
python3 \
|
|
||||||
python3-wheel\
|
|
||||||
&& pip install --break-system-packages --upgrade setuptools \
|
|
||||||
&& pip install --break-system-packages -r requirements.txt \
|
|
||||||
&& apt autoremove -y \
|
|
||||||
&& apt clean -y \
|
|
||||||
&& rm -rf /tmp/* /var/tmp/* \
|
|
||||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
|
||||||
&& find /var/cache -type f -delete
|
|
||||||
|
|
||||||
ENTRYPOINT ["/app/tools.sh"]
|
|
||||||
|
|
||||||
### Light, CLI only
|
|
||||||
FROM base AS light
|
|
||||||
|
|
||||||
COPY --from=build /app/full/llama-cli /app
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/app/llama-cli" ]
|
|
||||||
|
|
||||||
### Server, Server only
|
|
||||||
FROM base AS server
|
|
||||||
|
|
||||||
ENV LLAMA_ARG_HOST=0.0.0.0
|
|
||||||
|
|
||||||
COPY --from=build /app/full/llama-server /app
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/app/llama-server" ]
|
|
|
@ -8,40 +8,36 @@ arg1="$1"
|
||||||
shift
|
shift
|
||||||
|
|
||||||
if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
|
if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
|
||||||
exec python3 ./convert_hf_to_gguf.py "$@"
|
python3 ./convert-hf-to-gguf.py "$@"
|
||||||
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
|
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
|
||||||
exec ./llama-quantize "$@"
|
./llama-quantize "$@"
|
||||||
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
|
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
|
||||||
exec ./llama-cli "$@"
|
./llama-cli "$@"
|
||||||
elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
|
elif [[ "$arg1" == '--finetune' || "$arg1" == '-f' ]]; then
|
||||||
exec ./llama-bench "$@"
|
./llama-finetune "$@"
|
||||||
elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
|
|
||||||
exec ./llama-perplexity "$@"
|
|
||||||
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
|
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
|
||||||
echo "Converting PTH to GGML..."
|
echo "Converting PTH to GGML..."
|
||||||
for i in $(ls $1/$2/ggml-model-f16.bin*); do
|
for i in `ls $1/$2/ggml-model-f16.bin*`; do
|
||||||
if [ -f "${i/f16/q4_0}" ]; then
|
if [ -f "${i/f16/q4_0}" ]; then
|
||||||
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
|
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
|
||||||
else
|
else
|
||||||
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
|
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
|
||||||
exec ./llama-quantize "$i" "${i/f16/q4_0}" q4_0
|
./llama-quantize "$i" "${i/f16/q4_0}" q4_0
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
|
elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
|
||||||
exec ./llama-server "$@"
|
./llama-server "$@"
|
||||||
else
|
else
|
||||||
echo "Unknown command: $arg1"
|
echo "Unknown command: $arg1"
|
||||||
echo "Available commands: "
|
echo "Available commands: "
|
||||||
echo " --run (-r): Run a model previously converted into ggml"
|
echo " --run (-r): Run a model previously converted into ggml"
|
||||||
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
|
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
|
||||||
echo " --bench (-b): Benchmark the performance of the inference for various parameters."
|
|
||||||
echo " ex: -m model.gguf"
|
|
||||||
echo " --perplexity (-p): Measure the perplexity of a model over a given text."
|
|
||||||
echo " ex: -m model.gguf -f file.txt"
|
|
||||||
echo " --convert (-c): Convert a llama model into ggml"
|
echo " --convert (-c): Convert a llama model into ggml"
|
||||||
echo " ex: --outtype f16 \"/models/7B/\" "
|
echo " ex: --outtype f16 \"/models/7B/\" "
|
||||||
echo " --quantize (-q): Optimize with quantization process ggml"
|
echo " --quantize (-q): Optimize with quantization process ggml"
|
||||||
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
|
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
|
||||||
|
echo " --finetune (-f): Run finetune command to create a lora finetune of the model"
|
||||||
|
echo " See documentation for finetune for command-line parameters"
|
||||||
echo " --all-in-one (-a): Execute --convert & --quantize"
|
echo " --all-in-one (-a): Execute --convert & --quantize"
|
||||||
echo " ex: \"/models/\" 7B"
|
echo " ex: \"/models/\" 7B"
|
||||||
echo " --server (-s): Run a model on the server"
|
echo " --server (-s): Run a model on the server"
|
||||||
|
|
|
@ -1,89 +0,0 @@
|
||||||
ARG UBUNTU_VERSION=24.04
|
|
||||||
|
|
||||||
FROM ubuntu:$UBUNTU_VERSION AS build
|
|
||||||
|
|
||||||
# Install build tools
|
|
||||||
RUN apt update && apt install -y git build-essential cmake wget
|
|
||||||
|
|
||||||
# Install Vulkan SDK and cURL
|
|
||||||
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
|
||||||
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
|
|
||||||
apt update -y && \
|
|
||||||
apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
|
|
||||||
|
|
||||||
# Build it
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
|
|
||||||
cmake --build build --config Release -j$(nproc)
|
|
||||||
|
|
||||||
RUN mkdir -p /app/lib && \
|
|
||||||
find build -name "*.so" -exec cp {} /app/lib \;
|
|
||||||
|
|
||||||
RUN mkdir -p /app/full \
|
|
||||||
&& cp build/bin/* /app/full \
|
|
||||||
&& cp *.py /app/full \
|
|
||||||
&& cp -r gguf-py /app/full \
|
|
||||||
&& cp -r requirements /app/full \
|
|
||||||
&& cp requirements.txt /app/full \
|
|
||||||
&& cp .devops/tools.sh /app/full/tools.sh
|
|
||||||
|
|
||||||
## Base image
|
|
||||||
FROM ubuntu:$UBUNTU_VERSION AS base
|
|
||||||
|
|
||||||
RUN apt-get update \
|
|
||||||
&& apt-get install -y libgomp1 curl libvulkan-dev \
|
|
||||||
&& apt autoremove -y \
|
|
||||||
&& apt clean -y \
|
|
||||||
&& rm -rf /tmp/* /var/tmp/* \
|
|
||||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
|
||||||
&& find /var/cache -type f -delete
|
|
||||||
|
|
||||||
COPY --from=build /app/lib/ /app
|
|
||||||
|
|
||||||
### Full
|
|
||||||
FROM base AS full
|
|
||||||
|
|
||||||
COPY --from=build /app/full /app
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
RUN apt-get update \
|
|
||||||
&& apt-get install -y \
|
|
||||||
git \
|
|
||||||
python3 \
|
|
||||||
python3-pip \
|
|
||||||
python3-wheel \
|
|
||||||
&& pip install --break-system-packages --upgrade setuptools \
|
|
||||||
&& pip install --break-system-packages -r requirements.txt \
|
|
||||||
&& apt autoremove -y \
|
|
||||||
&& apt clean -y \
|
|
||||||
&& rm -rf /tmp/* /var/tmp/* \
|
|
||||||
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
|
||||||
&& find /var/cache -type f -delete
|
|
||||||
|
|
||||||
ENTRYPOINT ["/app/tools.sh"]
|
|
||||||
|
|
||||||
### Light, CLI only
|
|
||||||
FROM base AS light
|
|
||||||
|
|
||||||
COPY --from=build /app/full/llama-cli /app
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/app/llama-cli" ]
|
|
||||||
|
|
||||||
### Server, Server only
|
|
||||||
FROM base AS server
|
|
||||||
|
|
||||||
ENV LLAMA_ARG_HOST=0.0.0.0
|
|
||||||
|
|
||||||
COPY --from=build /app/full/llama-server /app
|
|
||||||
|
|
||||||
WORKDIR /app
|
|
||||||
|
|
||||||
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
|
||||||
|
|
||||||
ENTRYPOINT [ "/app/llama-server" ]
|
|
|
@ -1,7 +1,7 @@
|
||||||
*.o
|
*.o
|
||||||
*.a
|
*.a
|
||||||
.cache/
|
.cache/
|
||||||
# Do not ignore .git directory, otherwise the reported build number will always be 0
|
.git/
|
||||||
.github/
|
.github/
|
||||||
.gitignore
|
.gitignore
|
||||||
.vs/
|
.vs/
|
||||||
|
|
2
.ecrc
2
.ecrc
|
@ -1,5 +1,5 @@
|
||||||
{
|
{
|
||||||
"Exclude": ["^\\.gitmodules$", "stb_image\\.h"],
|
"Exclude": ["^\\.gitmodules$"],
|
||||||
"Disable": {
|
"Disable": {
|
||||||
"IndentSize": true
|
"IndentSize": true
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,27 +24,9 @@ insert_final_newline = unset
|
||||||
[examples/server/public/*]
|
[examples/server/public/*]
|
||||||
indent_size = 2
|
indent_size = 2
|
||||||
|
|
||||||
[examples/server/public/deps_*]
|
|
||||||
trim_trailing_whitespace = unset
|
|
||||||
indent_style = unset
|
|
||||||
indent_size = unset
|
|
||||||
|
|
||||||
[examples/server/deps_*]
|
|
||||||
trim_trailing_whitespace = unset
|
|
||||||
indent_style = unset
|
|
||||||
indent_size = unset
|
|
||||||
|
|
||||||
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
|
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
|
||||||
indent_style = tab
|
indent_style = tab
|
||||||
|
|
||||||
[examples/cvector-generator/*.txt]
|
[examples/cvector-generator/*.txt]
|
||||||
trim_trailing_whitespace = unset
|
trim_trailing_whitespace = unset
|
||||||
insert_final_newline = unset
|
insert_final_newline = unset
|
||||||
|
|
||||||
[models/templates/*.jinja]
|
|
||||||
indent_style = unset
|
|
||||||
indent_size = unset
|
|
||||||
end_of_line = unset
|
|
||||||
charset = unset
|
|
||||||
trim_trailing_whitespace = unset
|
|
||||||
insert_final_newline = unset
|
|
||||||
|
|
50
.github/ISSUE_TEMPLATE/01-bug-low.yml
vendored
Normal file
50
.github/ISSUE_TEMPLATE/01-bug-low.yml
vendored
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
name: Low Severity Bugs
|
||||||
|
description: Used to report low severity bugs in llama.cpp (e.g. cosmetic issues, non critical UI glitches)
|
||||||
|
title: "Bug: "
|
||||||
|
labels: ["bug-unconfirmed", "low severity"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: |
|
||||||
|
Thanks for taking the time to fill out this bug report!
|
||||||
|
Please include information about your system, the steps to reproduce the bug,
|
||||||
|
and the version of llama.cpp that you are using.
|
||||||
|
If possible, please provide a minimal code example that reproduces the bug.
|
||||||
|
- type: textarea
|
||||||
|
id: what-happened
|
||||||
|
attributes:
|
||||||
|
label: What happened?
|
||||||
|
description: Also tell us, what did you expect to happen?
|
||||||
|
placeholder: Tell us what you see!
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: version
|
||||||
|
attributes:
|
||||||
|
label: Name and Version
|
||||||
|
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
||||||
|
placeholder: |
|
||||||
|
$./llama-cli --version
|
||||||
|
version: 2999 (42b4109e)
|
||||||
|
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: operating-system
|
||||||
|
attributes:
|
||||||
|
label: What operating system are you seeing the problem on?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- Linux
|
||||||
|
- Mac
|
||||||
|
- Windows
|
||||||
|
- BSD
|
||||||
|
- Other? (Please let us know in description)
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: logs
|
||||||
|
attributes:
|
||||||
|
label: Relevant log output
|
||||||
|
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
||||||
|
render: shell
|
87
.github/ISSUE_TEMPLATE/010-bug-compilation.yml
vendored
87
.github/ISSUE_TEMPLATE/010-bug-compilation.yml
vendored
|
@ -1,87 +0,0 @@
|
||||||
name: Bug (compilation)
|
|
||||||
description: Something goes wrong when trying to compile llama.cpp.
|
|
||||||
title: "Compile bug: "
|
|
||||||
labels: ["bug-unconfirmed", "compilation"]
|
|
||||||
body:
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: >
|
|
||||||
Thanks for taking the time to fill out this bug report!
|
|
||||||
This issue template is intended for bug reports where the compilation of llama.cpp fails.
|
|
||||||
Before opening an issue, please confirm that the compilation still fails with `-DGGML_CCACHE=OFF`.
|
|
||||||
If the compilation succeeds with ccache disabled you should be able to permanently fix the issue
|
|
||||||
by clearing `~/.cache/ccache` (on Linux).
|
|
||||||
- type: textarea
|
|
||||||
id: commit
|
|
||||||
attributes:
|
|
||||||
label: Git commit
|
|
||||||
description: Which commit are you trying to compile?
|
|
||||||
placeholder: |
|
|
||||||
$git rev-parse HEAD
|
|
||||||
84a07a17b1b08cf2b9747c633a2372782848a27f
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: dropdown
|
|
||||||
id: operating-system
|
|
||||||
attributes:
|
|
||||||
label: Operating systems
|
|
||||||
description: Which operating systems do you know to be affected?
|
|
||||||
multiple: true
|
|
||||||
options:
|
|
||||||
- Linux
|
|
||||||
- Mac
|
|
||||||
- Windows
|
|
||||||
- BSD
|
|
||||||
- Other? (Please let us know in description)
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: dropdown
|
|
||||||
id: backends
|
|
||||||
attributes:
|
|
||||||
label: GGML backends
|
|
||||||
description: Which GGML backends do you know to be affected?
|
|
||||||
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
|
|
||||||
multiple: true
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: info
|
|
||||||
attributes:
|
|
||||||
label: Problem description & steps to reproduce
|
|
||||||
description: >
|
|
||||||
Please give us a summary of the problem and tell us how to reproduce it.
|
|
||||||
If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us.
|
|
||||||
placeholder: >
|
|
||||||
I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY.
|
|
||||||
Here are the exact commands that I used: ...
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: first_bad_commit
|
|
||||||
attributes:
|
|
||||||
label: First Bad Commit
|
|
||||||
description: >
|
|
||||||
If the bug was not present on an earlier version: when did it start appearing?
|
|
||||||
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: command
|
|
||||||
attributes:
|
|
||||||
label: Compile command
|
|
||||||
description: >
|
|
||||||
Please provide the exact command you used to compile llama.cpp. For example: `cmake -B ...`.
|
|
||||||
This will be automatically formatted into code, so no need for backticks.
|
|
||||||
render: shell
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: logs
|
|
||||||
attributes:
|
|
||||||
label: Relevant log output
|
|
||||||
description: >
|
|
||||||
Please copy and paste any relevant log output, including any generated text.
|
|
||||||
This will be automatically formatted into code, so no need for backticks.
|
|
||||||
render: shell
|
|
||||||
validations:
|
|
||||||
required: true
|
|
101
.github/ISSUE_TEMPLATE/011-bug-results.yml
vendored
101
.github/ISSUE_TEMPLATE/011-bug-results.yml
vendored
|
@ -1,101 +0,0 @@
|
||||||
name: Bug (model use)
|
|
||||||
description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module).
|
|
||||||
title: "Eval bug: "
|
|
||||||
labels: ["bug-unconfirmed", "model evaluation"]
|
|
||||||
body:
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: >
|
|
||||||
Thanks for taking the time to fill out this bug report!
|
|
||||||
This issue template is intended for bug reports where the model evaluation results
|
|
||||||
(i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation.
|
|
||||||
If you encountered the issue while using an external UI (e.g. ollama),
|
|
||||||
please reproduce your issue using one of the examples/binaries in this repository.
|
|
||||||
The `llama-cli` binary can be used for simple and reproducible model inference.
|
|
||||||
- type: textarea
|
|
||||||
id: version
|
|
||||||
attributes:
|
|
||||||
label: Name and Version
|
|
||||||
description: Which version of our software are you running? (use `--version` to get a version string)
|
|
||||||
placeholder: |
|
|
||||||
$./llama-cli --version
|
|
||||||
version: 2999 (42b4109e)
|
|
||||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: dropdown
|
|
||||||
id: operating-system
|
|
||||||
attributes:
|
|
||||||
label: Operating systems
|
|
||||||
description: Which operating systems do you know to be affected?
|
|
||||||
multiple: true
|
|
||||||
options:
|
|
||||||
- Linux
|
|
||||||
- Mac
|
|
||||||
- Windows
|
|
||||||
- BSD
|
|
||||||
- Other? (Please let us know in description)
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: dropdown
|
|
||||||
id: backends
|
|
||||||
attributes:
|
|
||||||
label: GGML backends
|
|
||||||
description: Which GGML backends do you know to be affected?
|
|
||||||
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
|
|
||||||
multiple: true
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: hardware
|
|
||||||
attributes:
|
|
||||||
label: Hardware
|
|
||||||
description: Which CPUs/GPUs are you using?
|
|
||||||
placeholder: >
|
|
||||||
e.g. Ryzen 5950X + 2x RTX 4090
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: model
|
|
||||||
attributes:
|
|
||||||
label: Models
|
|
||||||
description: >
|
|
||||||
Which model(s) at which quantization were you using when encountering the bug?
|
|
||||||
If you downloaded a GGUF file off of Huggingface, please provide a link.
|
|
||||||
placeholder: >
|
|
||||||
e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: info
|
|
||||||
attributes:
|
|
||||||
label: Problem description & steps to reproduce
|
|
||||||
description: >
|
|
||||||
Please give us a summary of the problem and tell us how to reproduce it.
|
|
||||||
If you can narrow down the bug to specific hardware, compile flags, or command line arguments,
|
|
||||||
that information would be very much appreciated by us.
|
|
||||||
placeholder: >
|
|
||||||
e.g. when I run llama-cli with -ngl 99 I get garbled outputs.
|
|
||||||
When I use -ngl 0 it works correctly.
|
|
||||||
Here are the exact commands that I used: ...
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: first_bad_commit
|
|
||||||
attributes:
|
|
||||||
label: First Bad Commit
|
|
||||||
description: >
|
|
||||||
If the bug was not present on an earlier version: when did it start appearing?
|
|
||||||
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: logs
|
|
||||||
attributes:
|
|
||||||
label: Relevant log output
|
|
||||||
description: >
|
|
||||||
Please copy and paste any relevant log output, including the command that you entered and any generated text.
|
|
||||||
This will be automatically formatted into code, so no need for backticks.
|
|
||||||
render: shell
|
|
||||||
validations:
|
|
||||||
required: true
|
|
91
.github/ISSUE_TEMPLATE/019-bug-misc.yml
vendored
91
.github/ISSUE_TEMPLATE/019-bug-misc.yml
vendored
|
@ -1,91 +0,0 @@
|
||||||
name: Bug (misc.)
|
|
||||||
description: Something is not working the way it should (and it's not covered by any of the above cases).
|
|
||||||
title: "Misc. bug: "
|
|
||||||
labels: ["bug-unconfirmed"]
|
|
||||||
body:
|
|
||||||
- type: markdown
|
|
||||||
attributes:
|
|
||||||
value: >
|
|
||||||
Thanks for taking the time to fill out this bug report!
|
|
||||||
This issue template is intended for miscellaneous bugs that don't fit into any other category.
|
|
||||||
If you encountered the issue while using an external UI (e.g. ollama),
|
|
||||||
please reproduce your issue using one of the examples/binaries in this repository.
|
|
||||||
- type: textarea
|
|
||||||
id: version
|
|
||||||
attributes:
|
|
||||||
label: Name and Version
|
|
||||||
description: Which version of our software is affected? (You can use `--version` to get a version string.)
|
|
||||||
placeholder: |
|
|
||||||
$./llama-cli --version
|
|
||||||
version: 2999 (42b4109e)
|
|
||||||
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: dropdown
|
|
||||||
id: operating-system
|
|
||||||
attributes:
|
|
||||||
label: Operating systems
|
|
||||||
description: Which operating systems do you know to be affected?
|
|
||||||
multiple: true
|
|
||||||
options:
|
|
||||||
- Linux
|
|
||||||
- Mac
|
|
||||||
- Windows
|
|
||||||
- BSD
|
|
||||||
- Other? (Please let us know in description)
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: dropdown
|
|
||||||
id: module
|
|
||||||
attributes:
|
|
||||||
label: Which llama.cpp modules do you know to be affected?
|
|
||||||
multiple: true
|
|
||||||
options:
|
|
||||||
- Documentation/Github
|
|
||||||
- libllama (core library)
|
|
||||||
- llama-cli
|
|
||||||
- llama-server
|
|
||||||
- llama-bench
|
|
||||||
- llama-quantize
|
|
||||||
- Python/Bash scripts
|
|
||||||
- Test code
|
|
||||||
- Other (Please specify in the next section)
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: command
|
|
||||||
attributes:
|
|
||||||
label: Command line
|
|
||||||
description: >
|
|
||||||
Please provide the exact commands you entered, if applicable. For example: `llama-server -m ... -c ...`, `llama-cli -m ...`, etc.
|
|
||||||
This will be automatically formatted into code, so no need for backticks.
|
|
||||||
render: shell
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: info
|
|
||||||
attributes:
|
|
||||||
label: Problem description & steps to reproduce
|
|
||||||
description: >
|
|
||||||
Please give us a summary of the problem and tell us how to reproduce it (if applicable).
|
|
||||||
validations:
|
|
||||||
required: true
|
|
||||||
- type: textarea
|
|
||||||
id: first_bad_commit
|
|
||||||
attributes:
|
|
||||||
label: First Bad Commit
|
|
||||||
description: >
|
|
||||||
If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing?
|
|
||||||
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
|
||||||
validations:
|
|
||||||
required: false
|
|
||||||
- type: textarea
|
|
||||||
id: logs
|
|
||||||
attributes:
|
|
||||||
label: Relevant log output
|
|
||||||
description: >
|
|
||||||
If applicable, please copy and paste any relevant log output, including any generated text.
|
|
||||||
This will be automatically formatted into code, so no need for backticks.
|
|
||||||
render: shell
|
|
||||||
validations:
|
|
||||||
required: false
|
|
50
.github/ISSUE_TEMPLATE/02-bug-medium.yml
vendored
Normal file
50
.github/ISSUE_TEMPLATE/02-bug-medium.yml
vendored
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
name: Medium Severity Bug
|
||||||
|
description: Used to report medium severity bugs in llama.cpp (e.g. Malfunctioning Features but generally still useable)
|
||||||
|
title: "Bug: "
|
||||||
|
labels: ["bug-unconfirmed", "medium severity"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: |
|
||||||
|
Thanks for taking the time to fill out this bug report!
|
||||||
|
Please include information about your system, the steps to reproduce the bug,
|
||||||
|
and the version of llama.cpp that you are using.
|
||||||
|
If possible, please provide a minimal code example that reproduces the bug.
|
||||||
|
- type: textarea
|
||||||
|
id: what-happened
|
||||||
|
attributes:
|
||||||
|
label: What happened?
|
||||||
|
description: Also tell us, what did you expect to happen?
|
||||||
|
placeholder: Tell us what you see!
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: version
|
||||||
|
attributes:
|
||||||
|
label: Name and Version
|
||||||
|
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
||||||
|
placeholder: |
|
||||||
|
$./llama-cli --version
|
||||||
|
version: 2999 (42b4109e)
|
||||||
|
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: operating-system
|
||||||
|
attributes:
|
||||||
|
label: What operating system are you seeing the problem on?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- Linux
|
||||||
|
- Mac
|
||||||
|
- Windows
|
||||||
|
- BSD
|
||||||
|
- Other? (Please let us know in description)
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: logs
|
||||||
|
attributes:
|
||||||
|
label: Relevant log output
|
||||||
|
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
||||||
|
render: shell
|
50
.github/ISSUE_TEMPLATE/03-bug-high.yml
vendored
Normal file
50
.github/ISSUE_TEMPLATE/03-bug-high.yml
vendored
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
name: High Severity Bug
|
||||||
|
description: Used to report high severity bugs in llama.cpp (e.g. Malfunctioning features hindering important common workflow)
|
||||||
|
title: "Bug: "
|
||||||
|
labels: ["bug-unconfirmed", "high severity"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: |
|
||||||
|
Thanks for taking the time to fill out this bug report!
|
||||||
|
Please include information about your system, the steps to reproduce the bug,
|
||||||
|
and the version of llama.cpp that you are using.
|
||||||
|
If possible, please provide a minimal code example that reproduces the bug.
|
||||||
|
- type: textarea
|
||||||
|
id: what-happened
|
||||||
|
attributes:
|
||||||
|
label: What happened?
|
||||||
|
description: Also tell us, what did you expect to happen?
|
||||||
|
placeholder: Tell us what you see!
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: version
|
||||||
|
attributes:
|
||||||
|
label: Name and Version
|
||||||
|
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
||||||
|
placeholder: |
|
||||||
|
$./llama-cli --version
|
||||||
|
version: 2999 (42b4109e)
|
||||||
|
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: operating-system
|
||||||
|
attributes:
|
||||||
|
label: What operating system are you seeing the problem on?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- Linux
|
||||||
|
- Mac
|
||||||
|
- Windows
|
||||||
|
- BSD
|
||||||
|
- Other? (Please let us know in description)
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: logs
|
||||||
|
attributes:
|
||||||
|
label: Relevant log output
|
||||||
|
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
||||||
|
render: shell
|
50
.github/ISSUE_TEMPLATE/04-bug-critical.yml
vendored
Normal file
50
.github/ISSUE_TEMPLATE/04-bug-critical.yml
vendored
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
name: Critical Severity Bug
|
||||||
|
description: Used to report critical severity bugs in llama.cpp (e.g. Crashing, Corrupted, Dataloss)
|
||||||
|
title: "Bug: "
|
||||||
|
labels: ["bug-unconfirmed", "critical severity"]
|
||||||
|
body:
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: |
|
||||||
|
Thanks for taking the time to fill out this bug report!
|
||||||
|
Please include information about your system, the steps to reproduce the bug,
|
||||||
|
and the version of llama.cpp that you are using.
|
||||||
|
If possible, please provide a minimal code example that reproduces the bug.
|
||||||
|
- type: textarea
|
||||||
|
id: what-happened
|
||||||
|
attributes:
|
||||||
|
label: What happened?
|
||||||
|
description: Also tell us, what did you expect to happen?
|
||||||
|
placeholder: Tell us what you see!
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: textarea
|
||||||
|
id: version
|
||||||
|
attributes:
|
||||||
|
label: Name and Version
|
||||||
|
description: Which executable and which version of our software are you running? (use `--version` to get a version string)
|
||||||
|
placeholder: |
|
||||||
|
$./llama-cli --version
|
||||||
|
version: 2999 (42b4109e)
|
||||||
|
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
- type: dropdown
|
||||||
|
id: operating-system
|
||||||
|
attributes:
|
||||||
|
label: What operating system are you seeing the problem on?
|
||||||
|
multiple: true
|
||||||
|
options:
|
||||||
|
- Linux
|
||||||
|
- Mac
|
||||||
|
- Windows
|
||||||
|
- BSD
|
||||||
|
- Other? (Please let us know in description)
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
- type: textarea
|
||||||
|
id: logs
|
||||||
|
attributes:
|
||||||
|
label: Relevant log output
|
||||||
|
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
||||||
|
render: shell
|
|
@ -1,5 +1,5 @@
|
||||||
name: Enhancement
|
name: Enhancement
|
||||||
description: Used to request enhancements for llama.cpp.
|
description: Used to request enhancements for llama.cpp
|
||||||
title: "Feature Request: "
|
title: "Feature Request: "
|
||||||
labels: ["enhancement"]
|
labels: ["enhancement"]
|
||||||
body:
|
body:
|
|
@ -1,5 +1,5 @@
|
||||||
name: Research
|
name: Research
|
||||||
description: Track new technical research area.
|
description: Track new technical research area
|
||||||
title: "Research: "
|
title: "Research: "
|
||||||
labels: ["research 🔬"]
|
labels: ["research 🔬"]
|
||||||
body:
|
body:
|
|
@ -1,5 +1,5 @@
|
||||||
name: Refactor (Maintainers)
|
name: Refactor (Maintainers)
|
||||||
description: Used to track refactoring opportunities.
|
description: Used to track refactoring opportunities
|
||||||
title: "Refactor: "
|
title: "Refactor: "
|
||||||
labels: ["refactor"]
|
labels: ["refactor"]
|
||||||
body:
|
body:
|
19
.github/labeler.yml
vendored
19
.github/labeler.yml
vendored
|
@ -3,21 +3,20 @@ Kompute:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
- ggml/include/ggml-kompute.h
|
- ggml/include/ggml-kompute.h
|
||||||
- ggml/src/ggml-kompute/**
|
- ggml/src/ggml-kompute.cpp
|
||||||
- README-kompute.md
|
- README-kompute.md
|
||||||
Apple Metal:
|
Apple Metal:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
- ggml/include/ggml-metal.h
|
- ggml/include/ggml-metal.h
|
||||||
- ggml/src/ggml-metal/**
|
- ggml/src/ggml-metal.cpp
|
||||||
- README-metal.md
|
- README-metal.md
|
||||||
SYCL:
|
SYCL:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
- ggml/include/ggml-sycl.h
|
- ggml/include/ggml-sycl.h
|
||||||
- ggml/src/ggml-sycl/**
|
- ggml/src/ggml-sycl.cpp
|
||||||
- docs/backend/SYCL.md
|
- README-sycl.md
|
||||||
- examples/sycl/**
|
|
||||||
Nvidia GPU:
|
Nvidia GPU:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
|
@ -26,8 +25,8 @@ Nvidia GPU:
|
||||||
Vulkan:
|
Vulkan:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
- ggml/include/ggml-vulkan.h
|
- ggml/ggml_vk_generate_shaders.py
|
||||||
- ggml/src/ggml-vulkan/**
|
- ggml/src/ggml-vulkan*
|
||||||
documentation:
|
documentation:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
|
@ -74,7 +73,11 @@ server:
|
||||||
ggml:
|
ggml:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
- ggml/**
|
- ggml/include/ggml*.h
|
||||||
|
- ggml/src/ggml*.c
|
||||||
|
- ggml/src/ggml*.cpp
|
||||||
|
- ggml/src/ggml*.h
|
||||||
|
- ggml-cuda/**
|
||||||
nix:
|
nix:
|
||||||
- changed-files:
|
- changed-files:
|
||||||
- any-glob-to-any-file:
|
- any-glob-to-any-file:
|
||||||
|
|
8
.github/pull_request_template.md
vendored
8
.github/pull_request_template.md
vendored
|
@ -1 +1,7 @@
|
||||||
*Make sure to read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR*
|
|
||||||
|
|
||||||
|
- [x] I have read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md)
|
||||||
|
- Self-reported review complexity:
|
||||||
|
- [ ] Low
|
||||||
|
- [ ] Medium
|
||||||
|
- [ ] High
|
||||||
|
|
|
@ -1,6 +1,3 @@
|
||||||
# TODO: there have been some issues with the workflow, so disabling for now
|
|
||||||
# https://github.com/ggerganov/llama.cpp/issues/7893
|
|
||||||
#
|
|
||||||
# Benchmark
|
# Benchmark
|
||||||
name: Benchmark
|
name: Benchmark
|
||||||
|
|
||||||
|
@ -27,10 +24,10 @@ on:
|
||||||
push:
|
push:
|
||||||
branches:
|
branches:
|
||||||
- master
|
- master
|
||||||
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
||||||
pull_request_target:
|
pull_request_target:
|
||||||
types: [opened, synchronize, reopened]
|
types: [opened, synchronize, reopened]
|
||||||
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.c', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
||||||
schedule:
|
schedule:
|
||||||
- cron: '04 2 * * *'
|
- cron: '04 2 * * *'
|
||||||
|
|
||||||
|
@ -132,8 +129,6 @@ jobs:
|
||||||
|
|
||||||
- name: Server bench
|
- name: Server bench
|
||||||
id: server_bench
|
id: server_bench
|
||||||
env:
|
|
||||||
HEAD_REF: ${{ github.head_ref || github.ref_name }}
|
|
||||||
run: |
|
run: |
|
||||||
set -eux
|
set -eux
|
||||||
|
|
||||||
|
@ -142,7 +137,7 @@ jobs:
|
||||||
python bench.py \
|
python bench.py \
|
||||||
--runner-label ${{ env.RUNNER_LABEL }} \
|
--runner-label ${{ env.RUNNER_LABEL }} \
|
||||||
--name ${{ github.job }} \
|
--name ${{ github.job }} \
|
||||||
--branch $HEAD_REF \
|
--branch ${{ github.head_ref || github.ref_name }} \
|
||||||
--commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
|
--commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
|
||||||
--scenario script.js \
|
--scenario script.js \
|
||||||
--duration ${{ github.event.inputs.duration || env.DURATION }} \
|
--duration ${{ github.event.inputs.duration || env.DURATION }} \
|
858
.github/workflows/build.yml
vendored
858
.github/workflows/build.yml
vendored
File diff suppressed because it is too large
Load diff
7
.github/workflows/close-issue.yml
vendored
7
.github/workflows/close-issue.yml
vendored
|
@ -3,11 +3,6 @@ on:
|
||||||
schedule:
|
schedule:
|
||||||
- cron: "42 0 * * *"
|
- cron: "42 0 * * *"
|
||||||
|
|
||||||
# Fine-grant permission
|
|
||||||
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
|
||||||
permissions:
|
|
||||||
issues: write
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
close-issues:
|
close-issues:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
@ -17,7 +12,7 @@ jobs:
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/stale@v5
|
- uses: actions/stale@v5
|
||||||
with:
|
with:
|
||||||
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap"
|
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug"
|
||||||
days-before-issue-stale: 30
|
days-before-issue-stale: 30
|
||||||
days-before-issue-close: 14
|
days-before-issue-close: 14
|
||||||
stale-issue-label: "stale"
|
stale-issue-label: "stale"
|
||||||
|
|
163
.github/workflows/docker.yml
vendored
163
.github/workflows/docker.yml
vendored
|
@ -10,50 +10,48 @@
|
||||||
name: Publish Docker image
|
name: Publish Docker image
|
||||||
|
|
||||||
on:
|
on:
|
||||||
workflow_dispatch: # allows manual triggering
|
#pull_request:
|
||||||
schedule:
|
push:
|
||||||
# Rebuild daily rather than on every push because it is expensive
|
branches:
|
||||||
- cron: '12 4 * * *'
|
- master
|
||||||
|
paths: ['.github/workflows/docker.yml', '.devops/*.Dockerfile', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
|
||||||
# Fine-grant permission
|
|
||||||
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
|
||||||
permissions:
|
|
||||||
packages: write
|
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
push_to_registry:
|
push_to_registry:
|
||||||
name: Push Docker image to Docker Hub
|
name: Push Docker image to Docker Hub
|
||||||
|
#if: github.event.pull_request.draft == false
|
||||||
|
|
||||||
runs-on: ubuntu-22.04
|
runs-on: ubuntu-latest
|
||||||
env:
|
env:
|
||||||
COMMIT_SHA: ${{ github.sha }}
|
COMMIT_SHA: ${{ github.sha }}
|
||||||
strategy:
|
strategy:
|
||||||
fail-fast: false
|
|
||||||
matrix:
|
matrix:
|
||||||
config:
|
config:
|
||||||
# Multi-stage build
|
- { tag: "light", dockerfile: ".devops/llama-cli.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||||
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false}
|
- { tag: "server", dockerfile: ".devops/llama-server.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||||
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
- { tag: "full", dockerfile: ".devops/full.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||||
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
- { tag: "light-cuda", dockerfile: ".devops/llama-cli-cuda.Dockerfile", platforms: "linux/amd64" }
|
||||||
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
- { tag: "server-cuda", dockerfile: ".devops/llama-server-cuda.Dockerfile", platforms: "linux/amd64" }
|
||||||
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
- { tag: "full-cuda", dockerfile: ".devops/full-cuda.Dockerfile", platforms: "linux/amd64" }
|
||||||
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
|
- { tag: "light-rocm", dockerfile: ".devops/llama-cli-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||||
#- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: true }
|
- { tag: "server-rocm", dockerfile: ".devops/llama-server-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||||
|
# Note: the full-rocm image is failing due to a "no space left on device" error. It is disabled for now to allow the workflow to complete.
|
||||||
|
#- { tag: "full-rocm", dockerfile: ".devops/full-rocm.Dockerfile", platforms: "linux/amd64,linux/arm64" }
|
||||||
|
- { tag: "light-intel", dockerfile: ".devops/llama-cli-intel.Dockerfile", platforms: "linux/amd64" }
|
||||||
|
- { tag: "server-intel", dockerfile: ".devops/llama-server-intel.Dockerfile", platforms: "linux/amd64" }
|
||||||
steps:
|
steps:
|
||||||
- name: Check out the repo
|
- name: Check out the repo
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@v4
|
||||||
with:
|
|
||||||
fetch-depth: 0 # preserve git history, so we can determine the build number
|
|
||||||
|
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@v3
|
uses: docker/setup-qemu-action@v2
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@v2
|
||||||
|
|
||||||
- name: Log in to Docker Hub
|
- name: Log in to Docker Hub
|
||||||
uses: docker/login-action@v2
|
uses: docker/login-action@v2
|
||||||
|
@ -62,45 +60,9 @@ jobs:
|
||||||
username: ${{ github.repository_owner }}
|
username: ${{ github.repository_owner }}
|
||||||
password: ${{ secrets.GITHUB_TOKEN }}
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
- name: Determine tag name
|
# https://github.com/jlumbroso/free-disk-space/tree/54081f138730dfa15788a46383842cd2f914a1be#example
|
||||||
id: tag
|
|
||||||
shell: bash
|
|
||||||
run: |
|
|
||||||
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
|
||||||
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
|
||||||
REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
|
|
||||||
REPO_NAME="${{ github.event.repository.name }}"
|
|
||||||
|
|
||||||
# determine tag name postfix (build number, commit hash)
|
|
||||||
if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then
|
|
||||||
TAG_POSTFIX="-b${BUILD_NUMBER}"
|
|
||||||
else
|
|
||||||
SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-')
|
|
||||||
TAG_POSTFIX="-${SAFE_NAME}-${SHORT_HASH}"
|
|
||||||
fi
|
|
||||||
# list all tags possible
|
|
||||||
if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then
|
|
||||||
TYPE=""
|
|
||||||
else
|
|
||||||
TYPE="-${{ matrix.config.tag }}"
|
|
||||||
fi
|
|
||||||
PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
|
|
||||||
FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}${TAG_POSTFIX}"
|
|
||||||
LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}${TAG_POSTFIX}"
|
|
||||||
SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}${TAG_POSTFIX}"
|
|
||||||
echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT
|
|
||||||
echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT
|
|
||||||
echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT
|
|
||||||
echo "full_output_tags=$FULLTAGS" # print out for debugging
|
|
||||||
echo "light_output_tags=$LIGHTTAGS" # print out for debugging
|
|
||||||
echo "server_output_tags=$SERVERTAGS" # print out for debugging
|
|
||||||
env:
|
|
||||||
GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
|
||||||
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
|
|
||||||
|
|
||||||
- name: Free Disk Space (Ubuntu)
|
- name: Free Disk Space (Ubuntu)
|
||||||
if: ${{ matrix.config.free_disk_space == true }}
|
uses: jlumbroso/free-disk-space@main
|
||||||
uses: ggml-org/free-disk-space@v1.3.1
|
|
||||||
with:
|
with:
|
||||||
# this might remove tools that are actually needed,
|
# this might remove tools that are actually needed,
|
||||||
# if set to "true" but frees about 6 GB
|
# if set to "true" but frees about 6 GB
|
||||||
|
@ -115,59 +77,40 @@ jobs:
|
||||||
docker-images: true
|
docker-images: true
|
||||||
swap-storage: true
|
swap-storage: true
|
||||||
|
|
||||||
- name: Build and push Full Docker image (tagged + versioned)
|
- name: Determine tag name
|
||||||
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }}
|
id: tag
|
||||||
uses: docker/build-push-action@v6
|
shell: bash
|
||||||
with:
|
run: |
|
||||||
context: .
|
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
||||||
push: true
|
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
||||||
platforms: ${{ matrix.config.platforms }}
|
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
||||||
# tag list is generated from step above
|
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
||||||
tags: ${{ steps.tag.outputs.full_output_tags }}
|
else
|
||||||
file: ${{ matrix.config.dockerfile }}
|
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
||||||
target: full
|
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
||||||
provenance: false
|
fi
|
||||||
# using github experimental cache
|
|
||||||
cache-from: type=gha
|
|
||||||
cache-to: type=gha,mode=max
|
|
||||||
# return to this if the experimental github cache is having issues
|
|
||||||
#cache-to: type=local,dest=/tmp/.buildx-cache
|
|
||||||
#cache-from: type=local,src=/tmp/.buildx-cache
|
|
||||||
|
|
||||||
- name: Build and push Light Docker image (tagged + versioned)
|
- name: Downcase github.repository_owner
|
||||||
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }}
|
run: |
|
||||||
uses: docker/build-push-action@v6
|
echo "repository_owner_lowercase=${GITHUB_REPOSITORY_OWNER@L}" >> $GITHUB_ENV
|
||||||
with:
|
env:
|
||||||
context: .
|
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
|
||||||
push: true
|
|
||||||
platforms: ${{ matrix.config.platforms }}
|
|
||||||
# tag list is generated from step above
|
|
||||||
tags: ${{ steps.tag.outputs.light_output_tags }}
|
|
||||||
file: ${{ matrix.config.dockerfile }}
|
|
||||||
target: light
|
|
||||||
provenance: false
|
|
||||||
# using github experimental cache
|
|
||||||
cache-from: type=gha
|
|
||||||
cache-to: type=gha,mode=max
|
|
||||||
# return to this if the experimental github cache is having issues
|
|
||||||
#cache-to: type=local,dest=/tmp/.buildx-cache
|
|
||||||
#cache-from: type=local,src=/tmp/.buildx-cache
|
|
||||||
|
|
||||||
- name: Build and push Server Docker image (tagged + versioned)
|
- name: Build and push Docker image (versioned)
|
||||||
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }}
|
if: github.event_name == 'push'
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v4
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
push: true
|
push: true
|
||||||
platforms: ${{ matrix.config.platforms }}
|
platforms: ${{ matrix.config.platforms }}
|
||||||
# tag list is generated from step above
|
tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
|
||||||
tags: ${{ steps.tag.outputs.server_output_tags }}
|
file: ${{ matrix.config.dockerfile }}
|
||||||
|
|
||||||
|
- name: Build and push Docker image (tagged)
|
||||||
|
uses: docker/build-push-action@v4
|
||||||
|
with:
|
||||||
|
context: .
|
||||||
|
push: ${{ github.event_name == 'push' }}
|
||||||
|
platforms: ${{ matrix.config.platforms }}
|
||||||
|
tags: "ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }},ghcr.io/${{ env.repository_owner_lowercase }}/llama.cpp:${{ matrix.config.tag }}-${{ steps.tag.outputs.name }}"
|
||||||
file: ${{ matrix.config.dockerfile }}
|
file: ${{ matrix.config.dockerfile }}
|
||||||
target: server
|
|
||||||
provenance: false
|
|
||||||
# using github experimental cache
|
|
||||||
cache-from: type=gha
|
|
||||||
cache-to: type=gha,mode=max
|
|
||||||
# return to this if the experimental github cache is having issues
|
|
||||||
#cache-to: type=local,dest=/tmp/.buildx-cache
|
|
||||||
#cache-from: type=local,src=/tmp/.buildx-cache
|
|
||||||
|
|
4
.github/workflows/editorconfig.yml
vendored
4
.github/workflows/editorconfig.yml
vendored
|
@ -23,7 +23,5 @@ jobs:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- uses: editorconfig-checker/action-editorconfig-checker@v2
|
- uses: editorconfig-checker/action-editorconfig-checker@main
|
||||||
with:
|
|
||||||
version: v3.0.3
|
|
||||||
- run: editorconfig-checker
|
- run: editorconfig-checker
|
||||||
|
|
65
.github/workflows/nix-ci-aarch64.yml
vendored
Normal file
65
.github/workflows/nix-ci-aarch64.yml
vendored
Normal file
|
@ -0,0 +1,65 @@
|
||||||
|
name: Nix aarch64 builds
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch: # allows manual triggering
|
||||||
|
schedule:
|
||||||
|
# Rebuild daily rather than on every push because QEMU is expensive (e.g.
|
||||||
|
# 1.5h instead of minutes with the cold cache).
|
||||||
|
#
|
||||||
|
# randint(0, 59), randint(0, 23)
|
||||||
|
- cron: '26 12 * * *'
|
||||||
|
# But also rebuild if we touched any of the Nix expressions:
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
paths: ['**/*.nix', 'flake.lock']
|
||||||
|
pull_request:
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
|
paths: ['**/*.nix', 'flake.lock']
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
nix-build-aarch64:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
- name: Install QEMU
|
||||||
|
# Copy-paste from https://github.com/orgs/community/discussions/8305#discussioncomment-5888654
|
||||||
|
run: |
|
||||||
|
sudo apt-get update
|
||||||
|
sudo apt-get install -y qemu-user-static qemu-system-aarch64
|
||||||
|
sudo usermod -a -G kvm $USER
|
||||||
|
- name: Install Nix
|
||||||
|
uses: DeterminateSystems/nix-installer-action@v9
|
||||||
|
with:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
extra-conf: |
|
||||||
|
extra-platforms = aarch64-linux
|
||||||
|
extra-system-features = nixos-test kvm
|
||||||
|
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
|
||||||
|
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
|
||||||
|
- uses: DeterminateSystems/magic-nix-cache-action@v2
|
||||||
|
with:
|
||||||
|
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
|
||||||
|
- name: Set-up cachix to push the results to
|
||||||
|
uses: cachix/cachix-action@v13
|
||||||
|
with:
|
||||||
|
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
|
||||||
|
name: llama-cpp
|
||||||
|
- name: Show all output paths
|
||||||
|
run: >
|
||||||
|
nix run github:nix-community/nix-eval-jobs
|
||||||
|
-- --gc-roots-dir gcroot
|
||||||
|
--flake
|
||||||
|
".#packages.aarch64-linux"
|
||||||
|
- name: Build
|
||||||
|
run: >
|
||||||
|
nix run github:Mic92/nix-fast-build
|
||||||
|
-- --skip-cached --no-nom
|
||||||
|
--systems aarch64-linux
|
||||||
|
--flake
|
||||||
|
".#checks.aarch64-linux"
|
72
.github/workflows/nix-ci.yml
vendored
Normal file
72
.github/workflows/nix-ci.yml
vendored
Normal file
|
@ -0,0 +1,72 @@
|
||||||
|
name: Nix CI
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch: # allows manual triggering
|
||||||
|
push:
|
||||||
|
branches:
|
||||||
|
- master
|
||||||
|
pull_request:
|
||||||
|
types: [opened, synchronize, reopened]
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
nix-eval:
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [ ubuntu-latest, macos-latest ]
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
- name: Install Nix
|
||||||
|
uses: DeterminateSystems/nix-installer-action@v9
|
||||||
|
with:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
extra-conf: |
|
||||||
|
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
|
||||||
|
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
|
||||||
|
- uses: DeterminateSystems/magic-nix-cache-action@v2
|
||||||
|
with:
|
||||||
|
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
|
||||||
|
- name: List all flake outputs
|
||||||
|
run: nix flake show --all-systems
|
||||||
|
- name: Show all output paths
|
||||||
|
run: >
|
||||||
|
nix run github:nix-community/nix-eval-jobs
|
||||||
|
-- --gc-roots-dir gcroot
|
||||||
|
--flake
|
||||||
|
".#packages.$(nix eval --raw --impure --expr builtins.currentSystem)"
|
||||||
|
nix-build:
|
||||||
|
strategy:
|
||||||
|
fail-fast: false
|
||||||
|
matrix:
|
||||||
|
os: [ ubuntu-latest, macos-latest ]
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
- name: Install Nix
|
||||||
|
uses: DeterminateSystems/nix-installer-action@v9
|
||||||
|
with:
|
||||||
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
extra-conf: |
|
||||||
|
extra-substituters = https://llama-cpp.cachix.org https://cuda-maintainers.cachix.org
|
||||||
|
extra-trusted-public-keys = llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc= cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=
|
||||||
|
- uses: DeterminateSystems/magic-nix-cache-action@v2
|
||||||
|
with:
|
||||||
|
upstream-cache: https://${{ matrix.cachixName }}.cachix.org
|
||||||
|
- name: Set-up cachix to push the results to
|
||||||
|
uses: cachix/cachix-action@v13
|
||||||
|
with:
|
||||||
|
authToken: '${{ secrets.CACHIX_AUTH_TOKEN }}'
|
||||||
|
name: llama-cpp
|
||||||
|
- name: Build
|
||||||
|
run: >
|
||||||
|
nix run github:Mic92/nix-fast-build
|
||||||
|
-- --skip-cached --no-nom
|
||||||
|
--flake
|
||||||
|
".#checks.$(nix eval --raw --impure --expr builtins.currentSystem)"
|
22
.github/workflows/nix-flake-update.yml
vendored
Normal file
22
.github/workflows/nix-flake-update.yml
vendored
Normal file
|
@ -0,0 +1,22 @@
|
||||||
|
name: update-flake-lock
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
schedule:
|
||||||
|
- cron: '0 0 * * 0' # runs weekly on Sunday at 00:00
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
lockfile:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- name: Checkout repository
|
||||||
|
uses: actions/checkout@v4
|
||||||
|
- name: Install Nix
|
||||||
|
uses: DeterminateSystems/nix-installer-action@main
|
||||||
|
- name: Update flake.lock
|
||||||
|
uses: DeterminateSystems/update-flake-lock@main
|
||||||
|
with:
|
||||||
|
pr-title: "nix: update flake.lock"
|
||||||
|
pr-labels: |
|
||||||
|
nix
|
||||||
|
pr-reviewers: philiptaron,SomeoneSerge
|
||||||
|
token: ${{ secrets.FLAKE_TOKEN }}
|
36
.github/workflows/nix-publish-flake.yml
vendored
Normal file
36
.github/workflows/nix-publish-flake.yml
vendored
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
# Make the flake discoverable on https://flakestry.dev and https://flakehub.com/flakes
|
||||||
|
name: "Publish a flake to flakestry & flakehub"
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
tags:
|
||||||
|
- "*"
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
tag:
|
||||||
|
description: "The existing tag to publish"
|
||||||
|
type: "string"
|
||||||
|
required: true
|
||||||
|
jobs:
|
||||||
|
flakestry-publish:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
permissions:
|
||||||
|
id-token: "write"
|
||||||
|
contents: "read"
|
||||||
|
steps:
|
||||||
|
- uses: flakestry/flakestry-publish@main
|
||||||
|
with:
|
||||||
|
version: "${{ inputs.tag || github.ref_name }}"
|
||||||
|
flakehub-publish:
|
||||||
|
runs-on: "ubuntu-latest"
|
||||||
|
permissions:
|
||||||
|
id-token: "write"
|
||||||
|
contents: "read"
|
||||||
|
steps:
|
||||||
|
- uses: "actions/checkout@v4"
|
||||||
|
with:
|
||||||
|
ref: "${{ (inputs.tag != null) && format('refs/tags/{0}', inputs.tag) || '' }}"
|
||||||
|
- uses: "DeterminateSystems/nix-installer-action@main"
|
||||||
|
- uses: "DeterminateSystems/flakehub-push@main"
|
||||||
|
with:
|
||||||
|
visibility: "public"
|
||||||
|
tag: "${{ inputs.tag }}"
|
|
@ -6,13 +6,15 @@ on:
|
||||||
- '.github/workflows/python-check-requirements.yml'
|
- '.github/workflows/python-check-requirements.yml'
|
||||||
- 'scripts/check-requirements.sh'
|
- 'scripts/check-requirements.sh'
|
||||||
- 'convert*.py'
|
- 'convert*.py'
|
||||||
- '**/requirements*.txt'
|
- 'requirements.txt'
|
||||||
|
- 'requirements/*.txt'
|
||||||
pull_request:
|
pull_request:
|
||||||
paths:
|
paths:
|
||||||
- '.github/workflows/python-check-requirements.yml'
|
- '.github/workflows/python-check-requirements.yml'
|
||||||
- 'scripts/check-requirements.sh'
|
- 'scripts/check-requirements.sh'
|
||||||
- 'convert*.py'
|
- 'convert*.py'
|
||||||
- '**/requirements*.txt'
|
- 'requirements.txt'
|
||||||
|
- 'requirements/*.txt'
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
|
9
.github/workflows/python-lint.yml
vendored
9
.github/workflows/python-lint.yml
vendored
|
@ -1,13 +1,6 @@
|
||||||
name: flake8 Lint
|
name: flake8 Lint
|
||||||
|
|
||||||
on:
|
on: [push, pull_request]
|
||||||
push:
|
|
||||||
branches:
|
|
||||||
- master
|
|
||||||
paths: ['.github/workflows/python-lint.yml', '**/*.py']
|
|
||||||
pull_request:
|
|
||||||
types: [opened, synchronize, reopened]
|
|
||||||
paths: ['.github/workflows/python-lint.yml', '**/*.py']
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
||||||
|
|
40
.github/workflows/python-type-check.yml
vendored
40
.github/workflows/python-type-check.yml
vendored
|
@ -1,40 +0,0 @@
|
||||||
name: Python Type-Check
|
|
||||||
|
|
||||||
on:
|
|
||||||
push:
|
|
||||||
paths:
|
|
||||||
- '.github/workflows/python-type-check.yml'
|
|
||||||
- 'pyrightconfig.json'
|
|
||||||
- '**.py'
|
|
||||||
- '**/requirements*.txt'
|
|
||||||
pull_request:
|
|
||||||
paths:
|
|
||||||
- '.github/workflows/python-type-check.yml'
|
|
||||||
- 'pyrightconfig.json'
|
|
||||||
- '**.py'
|
|
||||||
- '**/requirements*.txt'
|
|
||||||
|
|
||||||
concurrency:
|
|
||||||
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
||||||
cancel-in-progress: true
|
|
||||||
|
|
||||||
jobs:
|
|
||||||
python-type-check:
|
|
||||||
runs-on: ubuntu-latest
|
|
||||||
name: pyright type-check
|
|
||||||
steps:
|
|
||||||
- name: Check out source repository
|
|
||||||
uses: actions/checkout@v4
|
|
||||||
- name: Set up Python environment
|
|
||||||
uses: actions/setup-python@v5
|
|
||||||
with:
|
|
||||||
python-version: "3.11"
|
|
||||||
- name: Install Python dependencies
|
|
||||||
# TODO: use a venv
|
|
||||||
run: pip install -r requirements/requirements-all.txt
|
|
||||||
- name: Type-check with Pyright
|
|
||||||
uses: jakebailey/pyright-action@v2
|
|
||||||
with:
|
|
||||||
version: 1.1.382
|
|
||||||
level: warning
|
|
||||||
warnings: true
|
|
90
.github/workflows/server.yml
vendored
90
.github/workflows/server.yml
vendored
|
@ -20,12 +20,6 @@ on:
|
||||||
types: [opened, synchronize, reopened]
|
types: [opened, synchronize, reopened]
|
||||||
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
|
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
|
||||||
|
|
||||||
env:
|
|
||||||
LLAMA_LOG_COLORS: 1
|
|
||||||
LLAMA_LOG_PREFIX: 1
|
|
||||||
LLAMA_LOG_TIMESTAMPS: 1
|
|
||||||
LLAMA_LOG_VERBOSITY: 10
|
|
||||||
|
|
||||||
concurrency:
|
concurrency:
|
||||||
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
|
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
|
||||||
cancel-in-progress: true
|
cancel-in-progress: true
|
||||||
|
@ -76,49 +70,20 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
pip install -r examples/server/tests/requirements.txt
|
pip install -r examples/server/tests/requirements.txt
|
||||||
|
|
||||||
# Setup nodejs (to be used for verifying bundled index.html)
|
- name: Verify server deps
|
||||||
- uses: actions/setup-node@v4
|
id: verify_server_deps
|
||||||
with:
|
|
||||||
node-version: '22.11.0'
|
|
||||||
|
|
||||||
- name: WebUI - Install dependencies
|
|
||||||
id: webui_lint
|
|
||||||
run: |
|
|
||||||
cd examples/server/webui
|
|
||||||
npm ci
|
|
||||||
|
|
||||||
- name: WebUI - Check code format
|
|
||||||
id: webui_format
|
|
||||||
run: |
|
run: |
|
||||||
git config --global --add safe.directory $(realpath .)
|
git config --global --add safe.directory $(realpath .)
|
||||||
cd examples/server/webui
|
cd examples/server
|
||||||
|
git ls-files --others --modified
|
||||||
git status
|
git status
|
||||||
|
./deps.sh
|
||||||
npm run format
|
|
||||||
git status
|
git status
|
||||||
modified_files="$(git status -s)"
|
not_ignored_files="$(git ls-files --others --modified)"
|
||||||
echo "Modified files: ${modified_files}"
|
echo "Modified files: ${not_ignored_files}"
|
||||||
if [ -n "${modified_files}" ]; then
|
if [ -n "${not_ignored_files}" ]; then
|
||||||
echo "Files do not follow coding style. To fix: npm run format"
|
echo "Repository is dirty or server deps are not built as expected"
|
||||||
echo "${modified_files}"
|
echo "${not_ignored_files}"
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Verify bundled index.html
|
|
||||||
id: verify_server_index_html
|
|
||||||
run: |
|
|
||||||
git config --global --add safe.directory $(realpath .)
|
|
||||||
cd examples/server/webui
|
|
||||||
git status
|
|
||||||
|
|
||||||
npm run build
|
|
||||||
git status
|
|
||||||
modified_files="$(git status -s)"
|
|
||||||
echo "Modified files: ${modified_files}"
|
|
||||||
if [ -n "${modified_files}" ]; then
|
|
||||||
echo "Repository is dirty or server/webui is not built as expected"
|
|
||||||
echo "Hint: You may need to follow Web UI build guide in server/README.md"
|
|
||||||
echo "${modified_files}"
|
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
@ -135,9 +100,9 @@ jobs:
|
||||||
-DGGML_OPENMP=OFF ;
|
-DGGML_OPENMP=OFF ;
|
||||||
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
||||||
|
|
||||||
- name: Build (sanitizers)
|
- name: Build
|
||||||
id: cmake_build_sanitizers
|
id: cmake_build
|
||||||
if: ${{ matrix.sanitizer != '' && matrix.sanitizer != 'THREAD' }}
|
if: ${{ matrix.sanitizer != 'THREAD' }}
|
||||||
run: |
|
run: |
|
||||||
cmake -B build \
|
cmake -B build \
|
||||||
-DGGML_NATIVE=OFF \
|
-DGGML_NATIVE=OFF \
|
||||||
|
@ -147,37 +112,18 @@ jobs:
|
||||||
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
|
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
|
||||||
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
||||||
|
|
||||||
- name: Build (sanitizers)
|
|
||||||
id: cmake_build
|
|
||||||
if: ${{ matrix.sanitizer == '' }}
|
|
||||||
run: |
|
|
||||||
cmake -B build \
|
|
||||||
-DGGML_NATIVE=OFF \
|
|
||||||
-DLLAMA_BUILD_SERVER=ON \
|
|
||||||
-DLLAMA_CURL=ON \
|
|
||||||
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ;
|
|
||||||
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
|
||||||
|
|
||||||
- name: Tests
|
- name: Tests
|
||||||
id: server_integration_tests
|
id: server_integration_tests
|
||||||
if: ${{ matrix.sanitizer == '' }}
|
|
||||||
run: |
|
run: |
|
||||||
cd examples/server/tests
|
cd examples/server/tests
|
||||||
./tests.sh
|
PORT=8888 ./tests.sh
|
||||||
|
|
||||||
- name: Tests (sanitizers)
|
|
||||||
id: server_integration_tests_sanitizers
|
|
||||||
if: ${{ matrix.sanitizer != '' }}
|
|
||||||
run: |
|
|
||||||
cd examples/server/tests
|
|
||||||
LLAMA_SANITIZE=1 ./tests.sh
|
|
||||||
|
|
||||||
- name: Slow tests
|
- name: Slow tests
|
||||||
id: server_integration_tests_slow
|
id: server_integration_tests_slow
|
||||||
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
||||||
run: |
|
run: |
|
||||||
cd examples/server/tests
|
cd examples/server/tests
|
||||||
SLOW_TESTS=1 ./tests.sh
|
PORT=8888 ./tests.sh --stop --no-skipped --no-capture --tags slow
|
||||||
|
|
||||||
|
|
||||||
server-windows:
|
server-windows:
|
||||||
|
@ -227,13 +173,11 @@ jobs:
|
||||||
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
|
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
|
||||||
run: |
|
run: |
|
||||||
cd examples/server/tests
|
cd examples/server/tests
|
||||||
$env:PYTHONIOENCODING = ":replace"
|
behave.exe --summary --stop --no-capture --exclude 'issues|wrong_usages|passkey' --tags llama.cpp
|
||||||
pytest -v -x -m "not slow"
|
|
||||||
|
|
||||||
- name: Slow tests
|
- name: Slow tests
|
||||||
id: server_integration_tests_slow
|
id: server_integration_tests_slow
|
||||||
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
||||||
run: |
|
run: |
|
||||||
cd examples/server/tests
|
cd examples/server/tests
|
||||||
$env:SLOW_TESTS = "1"
|
behave.exe --stop --no-skipped --no-capture --tags slow
|
||||||
pytest -v -x
|
|
||||||
|
|
22
.gitignore
vendored
22
.gitignore
vendored
|
@ -3,7 +3,6 @@
|
||||||
*.a
|
*.a
|
||||||
*.bat
|
*.bat
|
||||||
*.bin
|
*.bin
|
||||||
*.d
|
|
||||||
*.dll
|
*.dll
|
||||||
*.dot
|
*.dot
|
||||||
*.etag
|
*.etag
|
||||||
|
@ -18,7 +17,6 @@
|
||||||
*.metallib
|
*.metallib
|
||||||
*.o
|
*.o
|
||||||
*.so
|
*.so
|
||||||
*.swp
|
|
||||||
*.tmp
|
*.tmp
|
||||||
|
|
||||||
# IDE / OS
|
# IDE / OS
|
||||||
|
@ -49,10 +47,8 @@ build*
|
||||||
!build-info.cpp.in
|
!build-info.cpp.in
|
||||||
!build-info.sh
|
!build-info.sh
|
||||||
!build.zig
|
!build.zig
|
||||||
!docs/build.md
|
|
||||||
/libllama.so
|
/libllama.so
|
||||||
/llama-*
|
/llama-*
|
||||||
/vulkan-shaders-gen
|
|
||||||
android-ndk-*
|
android-ndk-*
|
||||||
arm_neon.h
|
arm_neon.h
|
||||||
cmake-build-*
|
cmake-build-*
|
||||||
|
@ -63,12 +59,6 @@ llama-batched-swift
|
||||||
/rpc-server
|
/rpc-server
|
||||||
out/
|
out/
|
||||||
tmp/
|
tmp/
|
||||||
autogen-*.md
|
|
||||||
|
|
||||||
# Deprecated
|
|
||||||
|
|
||||||
/main
|
|
||||||
/server
|
|
||||||
|
|
||||||
# CI
|
# CI
|
||||||
|
|
||||||
|
@ -82,6 +72,7 @@ models-mnt
|
||||||
!models/ggml-vocab-*.gguf*
|
!models/ggml-vocab-*.gguf*
|
||||||
|
|
||||||
# Zig
|
# Zig
|
||||||
|
|
||||||
zig-out/
|
zig-out/
|
||||||
zig-cache/
|
zig-cache/
|
||||||
|
|
||||||
|
@ -105,10 +96,6 @@ examples/server/*.mjs.hpp
|
||||||
!examples/sycl/*.bat
|
!examples/sycl/*.bat
|
||||||
!examples/sycl/*.sh
|
!examples/sycl/*.sh
|
||||||
|
|
||||||
# Server Web UI temporary files
|
|
||||||
node_modules
|
|
||||||
examples/server/webui/dist
|
|
||||||
|
|
||||||
# Python
|
# Python
|
||||||
|
|
||||||
/.venv
|
/.venv
|
||||||
|
@ -136,10 +123,3 @@ poetry.toml
|
||||||
|
|
||||||
# Scripts
|
# Scripts
|
||||||
!/scripts/install-oneapi.bat
|
!/scripts/install-oneapi.bat
|
||||||
|
|
||||||
# Test models for lora adapters
|
|
||||||
/lora-tests
|
|
||||||
|
|
||||||
# Local scripts
|
|
||||||
/run-vim.sh
|
|
||||||
/run-chat.sh
|
|
||||||
|
|
2
.gitmodules
vendored
2
.gitmodules
vendored
|
@ -1,3 +1,3 @@
|
||||||
[submodule "kompute"]
|
[submodule "kompute"]
|
||||||
path = ggml/src/ggml-kompute/kompute
|
path = ggml/src/kompute
|
||||||
url = https://github.com/nomic-ai/kompute.git
|
url = https://github.com/nomic-ai/kompute.git
|
||||||
|
|
267
AUTHORS
267
AUTHORS
|
@ -1,4 +1,4 @@
|
||||||
# date: Tue Feb 4 13:04:05 EET 2025
|
# date: Wed Jun 26 19:36:34 EEST 2024
|
||||||
# this file is auto-generated by scripts/gen-authors.sh
|
# this file is auto-generated by scripts/gen-authors.sh
|
||||||
|
|
||||||
0cc4m <picard12@live.de>
|
0cc4m <picard12@live.de>
|
||||||
|
@ -7,7 +7,6 @@
|
||||||
2f38b454 <dxf@protonmail.com>
|
2f38b454 <dxf@protonmail.com>
|
||||||
3ooabkhxtn <31479382+3ooabkhxtn@users.noreply.github.com>
|
3ooabkhxtn <31479382+3ooabkhxtn@users.noreply.github.com>
|
||||||
44670 <44670@users.noreply.github.com>
|
44670 <44670@users.noreply.github.com>
|
||||||
65a <10104049+65a@users.noreply.github.com>
|
|
||||||
AN Long <aisk@users.noreply.github.com>
|
AN Long <aisk@users.noreply.github.com>
|
||||||
AT <manyoso@users.noreply.github.com>
|
AT <manyoso@users.noreply.github.com>
|
||||||
Aarni Koskela <akx@iki.fi>
|
Aarni Koskela <akx@iki.fi>
|
||||||
|
@ -20,30 +19,20 @@ Adithya Balaji <adithya.b94@gmail.com>
|
||||||
AdithyanI <adithyan.i4internet@gmail.com>
|
AdithyanI <adithyan.i4internet@gmail.com>
|
||||||
Adrian <smith.adriane@gmail.com>
|
Adrian <smith.adriane@gmail.com>
|
||||||
Adrian Hesketh <a-h@users.noreply.github.com>
|
Adrian Hesketh <a-h@users.noreply.github.com>
|
||||||
Adrien Gallouët <adrien@gallouet.fr>
|
|
||||||
Adrien Gallouët <angt@huggingface.co>
|
|
||||||
Ahmad Tameem <113388789+Tameem-10xE@users.noreply.github.com>
|
|
||||||
Ahmet Zeer <ahmed.zeer@std.yildiz.edu.tr>
|
Ahmet Zeer <ahmed.zeer@std.yildiz.edu.tr>
|
||||||
AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
|
AidanBeltonS <87009434+AidanBeltonS@users.noreply.github.com>
|
||||||
AidanBeltonS <aidan.belton@codeplay.com>
|
|
||||||
Aisuko <urakiny@gmail.com>
|
Aisuko <urakiny@gmail.com>
|
||||||
Akarshan Biswas <akarshan.biswas@gmail.com>
|
|
||||||
Akarshan Biswas <akarshanbiswas@fedoraproject.org>
|
Akarshan Biswas <akarshanbiswas@fedoraproject.org>
|
||||||
Al Mochkin <14274697+amochkin@users.noreply.github.com>
|
|
||||||
Albert Jin <albert.jin@gmail.com>
|
Albert Jin <albert.jin@gmail.com>
|
||||||
Alberto <57916483+albbus-stack@users.noreply.github.com>
|
Alberto <57916483+albbus-stack@users.noreply.github.com>
|
||||||
Alberto Cabrera Pérez <alberto.cabrera@codeplay.com>
|
|
||||||
Alberto Cabrera Pérez <alberto.cabrera@intel.com>
|
|
||||||
Alex <awhill19@icloud.com>
|
Alex <awhill19@icloud.com>
|
||||||
Alex Azarov <alex@azarov.by>
|
Alex Azarov <alex@azarov.by>
|
||||||
Alex Azarov <alexander.azarov@mapbox.com>
|
Alex Azarov <alexander.azarov@mapbox.com>
|
||||||
Alex Klinkhamer <from.github.com.917@grencez.dev>
|
Alex Klinkhamer <from.github.com.917@grencez.dev>
|
||||||
Alex Klinkhamer <git@grencez.dev>
|
Alex Klinkhamer <git@grencez.dev>
|
||||||
Alex Nguyen <tiendung@users.noreply.github.com>
|
Alex Nguyen <tiendung@users.noreply.github.com>
|
||||||
Alex O'Connell <35843486+acon96@users.noreply.github.com>
|
|
||||||
Alex Petenchea <alex.petenchea@gmail.com>
|
Alex Petenchea <alex.petenchea@gmail.com>
|
||||||
Alex Renda <alexrenda@users.noreply.github.com>
|
Alex Renda <alexrenda@users.noreply.github.com>
|
||||||
Alex Tuddenham <61622354+AlexsCode@users.noreply.github.com>
|
|
||||||
Alex von Gluck IV <kallisti5@unixzen.com>
|
Alex von Gluck IV <kallisti5@unixzen.com>
|
||||||
Alexey Parfenov <zxed@alkatrazstudio.net>
|
Alexey Parfenov <zxed@alkatrazstudio.net>
|
||||||
Ali Chraghi <63465728+alichraghi@users.noreply.github.com>
|
Ali Chraghi <63465728+alichraghi@users.noreply.github.com>
|
||||||
|
@ -56,26 +45,18 @@ AmirAli Mirian <37371367+amiralimi@users.noreply.github.com>
|
||||||
Ananta Bastola <anantarajbastola@gmail.com>
|
Ananta Bastola <anantarajbastola@gmail.com>
|
||||||
Anas Ahouzi <112881240+aahouzi@users.noreply.github.com>
|
Anas Ahouzi <112881240+aahouzi@users.noreply.github.com>
|
||||||
András Salamon <ott2@users.noreply.github.com>
|
András Salamon <ott2@users.noreply.github.com>
|
||||||
Andreas (Andi) Kunar <andreask@msn.com>
|
|
||||||
Andreas Kieslinger <47689530+aendk@users.noreply.github.com>
|
|
||||||
Andrei <abetlen@gmail.com>
|
Andrei <abetlen@gmail.com>
|
||||||
Andrew Canis <andrew.canis@gmail.com>
|
Andrew Canis <andrew.canis@gmail.com>
|
||||||
Andrew Downing <andrew2085@gmail.com>
|
Andrew Downing <andrew2085@gmail.com>
|
||||||
Andrew Duffy <a10y@users.noreply.github.com>
|
Andrew Duffy <a10y@users.noreply.github.com>
|
||||||
Andrew Godfrey <AndrewGodfrey@users.noreply.github.com>
|
Andrew Godfrey <AndrewGodfrey@users.noreply.github.com>
|
||||||
Andrew Minh Nguyen <40281306+amqdn@users.noreply.github.com>
|
|
||||||
Andy Salerno <andysalerno@gmail.com>
|
|
||||||
Andy Tai <andy-tai@users.noreply.github.com>
|
Andy Tai <andy-tai@users.noreply.github.com>
|
||||||
Anthony Van de Gejuchte <anthonyvdgent@gmail.com>
|
|
||||||
Antonis Makropoulos <benuix@gmail.com>
|
|
||||||
Arik Poznanski <arikpoz@users.noreply.github.com>
|
Arik Poznanski <arikpoz@users.noreply.github.com>
|
||||||
Armen Kaleshian <kriation@users.noreply.github.com>
|
|
||||||
Artem <guinmoon@gmail.com>
|
Artem <guinmoon@gmail.com>
|
||||||
Artem Zinnatullin <ceo@abstractny.gay>
|
Artem Zinnatullin <ceo@abstractny.gay>
|
||||||
Artyom Lebedev <vagran.ast@gmail.com>
|
Artyom Lebedev <vagran.ast@gmail.com>
|
||||||
Asbjørn Olling <asbjornolling@gmail.com>
|
Asbjørn Olling <asbjornolling@gmail.com>
|
||||||
Ásgeir Bjarni Ingvarsson <asgeir@fundinn.org>
|
Ásgeir Bjarni Ingvarsson <asgeir@fundinn.org>
|
||||||
Asghar Ghorbani <a-ghorbani@users.noreply.github.com>
|
|
||||||
Ashish <1856117+ashishdatta@users.noreply.github.com>
|
Ashish <1856117+ashishdatta@users.noreply.github.com>
|
||||||
Ashok Gelal <401055+ashokgelal@users.noreply.github.com>
|
Ashok Gelal <401055+ashokgelal@users.noreply.github.com>
|
||||||
Ashraful Islam <ashraful.meche@gmail.com>
|
Ashraful Islam <ashraful.meche@gmail.com>
|
||||||
|
@ -94,21 +75,13 @@ Ben Siraphob <bensiraphob@gmail.com>
|
||||||
Ben Williams <ben@719ben.com>
|
Ben Williams <ben@719ben.com>
|
||||||
Benjamin Findley <39356821+Kartoffelsaft@users.noreply.github.com>
|
Benjamin Findley <39356821+Kartoffelsaft@users.noreply.github.com>
|
||||||
Benjamin Lecaillon <84293038+blecaillon@users.noreply.github.com>
|
Benjamin Lecaillon <84293038+blecaillon@users.noreply.github.com>
|
||||||
Benson Wong <mostlygeek@gmail.com>
|
|
||||||
Bernat Vadell <hounter.caza@gmail.com>
|
Bernat Vadell <hounter.caza@gmail.com>
|
||||||
Bernhard M. Wiedemann <githubbmwprimary@lsmod.de>
|
|
||||||
Bert Wagner <github@bertwagner.com>
|
|
||||||
Billel Mokeddem <billel.mokeddem.ml@gmail.com>
|
|
||||||
Bingan <70050083+binganao@users.noreply.github.com>
|
Bingan <70050083+binganao@users.noreply.github.com>
|
||||||
Bjarke Viksøe <164612031+bviksoe@users.noreply.github.com>
|
|
||||||
Bodo Graumann <mail@bodograumann.de>
|
Bodo Graumann <mail@bodograumann.de>
|
||||||
Bono Lv <lvscar@users.noreply.github.com>
|
Bono Lv <lvscar@users.noreply.github.com>
|
||||||
Borislav Stanimirov <b.stanimirov@abv.bg>
|
Borislav Stanimirov <b.stanimirov@abv.bg>
|
||||||
Borislav Stanimirov <b@ibob.bg>
|
|
||||||
Branden Butler <bwtbutler@hotmail.com>
|
Branden Butler <bwtbutler@hotmail.com>
|
||||||
Brandon Squizzato <35474886+bsquizz@users.noreply.github.com>
|
|
||||||
Brian <mofosyne@gmail.com>
|
Brian <mofosyne@gmail.com>
|
||||||
Brian Cunnie <brian.cunnie@gmail.com>
|
|
||||||
Bruce MacDonald <brucewmacdonald@gmail.com>
|
Bruce MacDonald <brucewmacdonald@gmail.com>
|
||||||
Bryan Honof <bryanhonof@gmail.com>
|
Bryan Honof <bryanhonof@gmail.com>
|
||||||
CJ Pais <cj@cjpais.com>
|
CJ Pais <cj@cjpais.com>
|
||||||
|
@ -117,51 +90,32 @@ Calvin Laurenson <calvin@laurenson.dev>
|
||||||
Cameron <csteele@steelecameron.com>
|
Cameron <csteele@steelecameron.com>
|
||||||
Cameron Kaiser <classilla@users.noreply.github.com>
|
Cameron Kaiser <classilla@users.noreply.github.com>
|
||||||
Carolinabanana <140120812+Carolinabanana@users.noreply.github.com>
|
Carolinabanana <140120812+Carolinabanana@users.noreply.github.com>
|
||||||
CarryFun <76023481+CarryFun@users.noreply.github.com>
|
|
||||||
Carsten Kragelund Jørgensen <carsten@kragelund.me>
|
|
||||||
CarterLi999 <664681047@qq.com>
|
|
||||||
Casey Primozic <casey@cprimozic.net>
|
Casey Primozic <casey@cprimozic.net>
|
||||||
Casey Primozic <me@ameo.link>
|
Casey Primozic <me@ameo.link>
|
||||||
CausalLM <148736309+CausalLM@users.noreply.github.com>
|
CausalLM <148736309+CausalLM@users.noreply.github.com>
|
||||||
Cebtenzzre <cebtenzzre@gmail.com>
|
Cebtenzzre <cebtenzzre@gmail.com>
|
||||||
CentricStorm <CentricStorm@users.noreply.github.com>
|
|
||||||
Chad Brewbaker <crb002@gmail.com>
|
Chad Brewbaker <crb002@gmail.com>
|
||||||
Changyeon Kim <cyzero.kim@samsung.com>
|
|
||||||
Chao Jiang <jc19chaoj@zoho.com>
|
Chao Jiang <jc19chaoj@zoho.com>
|
||||||
Charles Xu <63788048+chaxu01@users.noreply.github.com>
|
|
||||||
Charles Xu <charles.xu@arm.com>
|
|
||||||
Chen Xi <xi2.chen@intel.com>
|
|
||||||
Chen Xi <xixichen08@foxmail.com>
|
|
||||||
Cheng Shao <terrorjack@type.dance>
|
Cheng Shao <terrorjack@type.dance>
|
||||||
Chenguang Li <87689256+noemotiovon@users.noreply.github.com>
|
|
||||||
Chris Elrod <elrodc@gmail.com>
|
Chris Elrod <elrodc@gmail.com>
|
||||||
Chris Kuehl <ckuehl@ckuehl.me>
|
Chris Kuehl <ckuehl@ckuehl.me>
|
||||||
Christian Demsar <christian@github.email.demsar.us>
|
Christian Demsar <christian@github.email.demsar.us>
|
||||||
Christian Demsar <crasm@git.vczf.us>
|
Christian Demsar <crasm@git.vczf.us>
|
||||||
Christian Falch <875252+chrfalch@users.noreply.github.com>
|
Christian Falch <875252+chrfalch@users.noreply.github.com>
|
||||||
Christian Kastner <ckk@kvr.at>
|
|
||||||
Christian Kögler <ck3d@gmx.de>
|
Christian Kögler <ck3d@gmx.de>
|
||||||
Christian Köhnenkamp <cvk5@me.com>
|
|
||||||
Christian Zhou-Zheng <59622928+christianazinn@users.noreply.github.com>
|
Christian Zhou-Zheng <59622928+christianazinn@users.noreply.github.com>
|
||||||
Christopher Nielsen <62156882+mascguy@users.noreply.github.com>
|
|
||||||
Clark Saben <76020733+csaben@users.noreply.github.com>
|
Clark Saben <76020733+csaben@users.noreply.github.com>
|
||||||
Clint Herron <hanclinto@gmail.com>
|
Clint Herron <hanclinto@gmail.com>
|
||||||
Conrad Kramer <conrad@conradkramer.com>
|
|
||||||
Corentin REGAL <corentin.regal@gmail.com>
|
|
||||||
CrispStrobe <154636388+CrispStrobe@users.noreply.github.com>
|
CrispStrobe <154636388+CrispStrobe@users.noreply.github.com>
|
||||||
Csaba Kecskemeti <csaba.kecskemeti@gmail.com>
|
|
||||||
Cuong Trinh Manh <nguoithichkhampha@gmail.com>
|
Cuong Trinh Manh <nguoithichkhampha@gmail.com>
|
||||||
DAN™ <dranger003@gmail.com>
|
DAN™ <dranger003@gmail.com>
|
||||||
Damian Stewart <d@damianstewart.com>
|
Damian Stewart <d@damianstewart.com>
|
||||||
Dan Johansson <164997844+eddnjjn@users.noreply.github.com>
|
|
||||||
Dan Johansson <dan.johansson@arm.com>
|
|
||||||
Dane Madsen <dane_madsen@hotmail.com>
|
Dane Madsen <dane_madsen@hotmail.com>
|
||||||
DaniAndTheWeb <57776841+DaniAndTheWeb@users.noreply.github.com>
|
DaniAndTheWeb <57776841+DaniAndTheWeb@users.noreply.github.com>
|
||||||
Daniel Bevenius <daniel.bevenius@gmail.com>
|
Daniel Bevenius <daniel.bevenius@gmail.com>
|
||||||
Daniel Drake <drake@endlessos.org>
|
Daniel Drake <drake@endlessos.org>
|
||||||
Daniel Hiltgen <dhiltgen@users.noreply.github.com>
|
Daniel Hiltgen <dhiltgen@users.noreply.github.com>
|
||||||
Daniel Illescas Romero <illescas.daniel@protonmail.com>
|
Daniel Illescas Romero <illescas.daniel@protonmail.com>
|
||||||
Daniel Kleine <53251018+d-kleine@users.noreply.github.com>
|
|
||||||
Daniele <57776841+daniandtheweb@users.noreply.github.com>
|
Daniele <57776841+daniandtheweb@users.noreply.github.com>
|
||||||
DannyDaemonic <DannyDaemonic@gmail.com>
|
DannyDaemonic <DannyDaemonic@gmail.com>
|
||||||
Dat Quoc Nguyen <2412555+datquocnguyen@users.noreply.github.com>
|
Dat Quoc Nguyen <2412555+datquocnguyen@users.noreply.github.com>
|
||||||
|
@ -175,29 +129,19 @@ David Pflug <david@pflug.email>
|
||||||
David Renshaw <dwrenshaw@gmail.com>
|
David Renshaw <dwrenshaw@gmail.com>
|
||||||
David Sommers <12738+databyte@users.noreply.github.com>
|
David Sommers <12738+databyte@users.noreply.github.com>
|
||||||
David Yang <davidyang6us@gmail.com>
|
David Yang <davidyang6us@gmail.com>
|
||||||
DavidKorczynski <david@adalogics.com>
|
|
||||||
Dawid Potocki <github@dawidpotocki.com>
|
Dawid Potocki <github@dawidpotocki.com>
|
||||||
Dawid Wysocki <62249621+TortillaZHawaii@users.noreply.github.com>
|
Dawid Wysocki <62249621+TortillaZHawaii@users.noreply.github.com>
|
||||||
Dean <Dean.Sinaean@gmail.com>
|
Dean <Dean.Sinaean@gmail.com>
|
||||||
Deins <deinsegle@gmail.com>
|
Deins <deinsegle@gmail.com>
|
||||||
Denis Spasyuk <34203011+dspasyuk@users.noreply.github.com>
|
|
||||||
Derrick T. Woolworth <dwoolworth@gmail.com>
|
|
||||||
Deven Mistry <31466137+deven367@users.noreply.github.com>
|
Deven Mistry <31466137+deven367@users.noreply.github.com>
|
||||||
Dibakar Gope <dibakar.gope@arm.com>
|
|
||||||
Didzis Gosko <didzis@users.noreply.github.com>
|
Didzis Gosko <didzis@users.noreply.github.com>
|
||||||
Diego Devesa <slarengh@gmail.com>
|
|
||||||
Diogo Teles Sant'Anna <diogoteles@google.com>
|
|
||||||
Djip007 <3705339+Djip007@users.noreply.github.com>
|
|
||||||
Djip007 <djip.perois@free.fr>
|
Djip007 <djip.perois@free.fr>
|
||||||
Don Mahurin <dmahurin@users.noreply.github.com>
|
Don Mahurin <dmahurin@users.noreply.github.com>
|
||||||
DooWoong Lee (David) <manics99@naver.com>
|
DooWoong Lee (David) <manics99@naver.com>
|
||||||
Doomsdayrs <38189170+Doomsdayrs@users.noreply.github.com>
|
Doomsdayrs <38189170+Doomsdayrs@users.noreply.github.com>
|
||||||
Dou Xinpeng <15529241576@163.com>
|
|
||||||
Dou Xinpeng <81913537+Dou-Git@users.noreply.github.com>
|
|
||||||
Douglas Hanley <thesecretaryofwar@gmail.com>
|
Douglas Hanley <thesecretaryofwar@gmail.com>
|
||||||
Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
|
Dr. Tom Murphy VII Ph.D <499244+tom7@users.noreply.github.com>
|
||||||
Ebey Abraham <ebey97@gmail.com>
|
Ebey Abraham <ebey97@gmail.com>
|
||||||
Echo Nolan <echo@echonolan.net>
|
|
||||||
Ed Lee <edilee@mozilla.com>
|
Ed Lee <edilee@mozilla.com>
|
||||||
Ed Lepedus <ed.lepedus@googlemail.com>
|
Ed Lepedus <ed.lepedus@googlemail.com>
|
||||||
Eddie-Wang <wangjinheng1120@163.com>
|
Eddie-Wang <wangjinheng1120@163.com>
|
||||||
|
@ -205,16 +149,12 @@ Edward Taylor <edeetee@gmail.com>
|
||||||
Elaine <elaine.zosa@gmail.com>
|
Elaine <elaine.zosa@gmail.com>
|
||||||
Elbios <141279586+Elbios@users.noreply.github.com>
|
Elbios <141279586+Elbios@users.noreply.github.com>
|
||||||
Elton Kola <eltonkola@gmail.com>
|
Elton Kola <eltonkola@gmail.com>
|
||||||
Emreerdog <34742675+Emreerdog@users.noreply.github.com>
|
|
||||||
Engininja2 <139037756+Engininja2@users.noreply.github.com>
|
Engininja2 <139037756+Engininja2@users.noreply.github.com>
|
||||||
Equim <sayaka@ekyu.moe>
|
Equim <sayaka@ekyu.moe>
|
||||||
Eric Curtin <ecurtin@redhat.com>
|
|
||||||
Eric Curtin <ericcurtin17@gmail.com>
|
|
||||||
Eric Sommerlade <es0m@users.noreply.github.com>
|
Eric Sommerlade <es0m@users.noreply.github.com>
|
||||||
Eric Zhang <34133756+EZForever@users.noreply.github.com>
|
Eric Zhang <34133756+EZForever@users.noreply.github.com>
|
||||||
Erik Garrison <erik.garrison@gmail.com>
|
Erik Garrison <erik.garrison@gmail.com>
|
||||||
Erik Scholz <Green-Sky@users.noreply.github.com>
|
Erik Scholz <Green-Sky@users.noreply.github.com>
|
||||||
Esko Toivonen <eskot98@gmail.com>
|
|
||||||
Ettore Di Giacinto <mudler@users.noreply.github.com>
|
Ettore Di Giacinto <mudler@users.noreply.github.com>
|
||||||
Evan Jones <evan.q.jones@gmail.com>
|
Evan Jones <evan.q.jones@gmail.com>
|
||||||
Evan Miller <emmiller@gmail.com>
|
Evan Miller <emmiller@gmail.com>
|
||||||
|
@ -226,27 +166,19 @@ FK <sozforex@gmail.com>
|
||||||
Fabian <cmdrf@users.noreply.github.com>
|
Fabian <cmdrf@users.noreply.github.com>
|
||||||
Fabio R. Sluzala <Fabio3rs@users.noreply.github.com>
|
Fabio R. Sluzala <Fabio3rs@users.noreply.github.com>
|
||||||
Faez Shakil <faez.shakil@gmail.com>
|
Faez Shakil <faez.shakil@gmail.com>
|
||||||
Faisal Zaghloul <faisal.zaghloul@gmail.com>
|
|
||||||
Faisal Zaghloul <quic_fzaghlou@quicinc.com>
|
|
||||||
Fan Shupei <dymarkfan@outlook.com>
|
|
||||||
FantasyGmm <16450052+FantasyGmm@users.noreply.github.com>
|
FantasyGmm <16450052+FantasyGmm@users.noreply.github.com>
|
||||||
Farbod Bijary <110523279+farbodbj@users.noreply.github.com>
|
|
||||||
Fattire <528174+fat-tire@users.noreply.github.com>
|
Fattire <528174+fat-tire@users.noreply.github.com>
|
||||||
Felix <stenbackfelix@gmail.com>
|
Felix <stenbackfelix@gmail.com>
|
||||||
Finn Voorhees <finnvoorhees@gmail.com>
|
Finn Voorhees <finnvoorhees@gmail.com>
|
||||||
Firat <firatkiral@gmail.com>
|
Firat <firatkiral@gmail.com>
|
||||||
FirstTimeEZ <179362031+FirstTimeEZ@users.noreply.github.com>
|
|
||||||
Folko-Ven <71110216+Folko-Ven@users.noreply.github.com>
|
Folko-Ven <71110216+Folko-Ven@users.noreply.github.com>
|
||||||
Foul-Tarnished <107711110+Foul-Tarnished@users.noreply.github.com>
|
Foul-Tarnished <107711110+Foul-Tarnished@users.noreply.github.com>
|
||||||
Francisco Melo <43780565+francis2tm@users.noreply.github.com>
|
Francisco Melo <43780565+francis2tm@users.noreply.github.com>
|
||||||
Frank Mai <thxcode0824@gmail.com>
|
Frank Mai <thxcode0824@gmail.com>
|
||||||
FrankHB <frankhb1989@gmail.com>
|
FrankHB <frankhb1989@gmail.com>
|
||||||
Frankie Robertson <frankier@users.noreply.github.com>
|
|
||||||
Fred Douglas <43351173+fredlas@users.noreply.github.com>
|
Fred Douglas <43351173+fredlas@users.noreply.github.com>
|
||||||
Frederik Vogel <Schaltfehler@users.noreply.github.com>
|
Frederik Vogel <Schaltfehler@users.noreply.github.com>
|
||||||
Gabe Goodhart <gabe.l.hart@gmail.com>
|
Gabe Goodhart <gabe.l.hart@gmail.com>
|
||||||
Gabe Goodhart <ghart@us.ibm.com>
|
|
||||||
Gaetan Bisson <gaetan@fenua.org>
|
|
||||||
GainLee <perfecter.gen@gmail.com>
|
GainLee <perfecter.gen@gmail.com>
|
||||||
Galunid <karolek1231456@gmail.com>
|
Galunid <karolek1231456@gmail.com>
|
||||||
Gary Linscott <glinscott@gmail.com>
|
Gary Linscott <glinscott@gmail.com>
|
||||||
|
@ -255,15 +187,12 @@ Gavin Zhao <gavinzhaojw@protonmail.com>
|
||||||
Genkagaku.GPT <hlhr202@163.com>
|
Genkagaku.GPT <hlhr202@163.com>
|
||||||
Georgi Gerganov <ggerganov@gmail.com>
|
Georgi Gerganov <ggerganov@gmail.com>
|
||||||
Gilad S <giladgd@users.noreply.github.com>
|
Gilad S <giladgd@users.noreply.github.com>
|
||||||
Gilad S. <7817232+giladgd@users.noreply.github.com>
|
|
||||||
Giuseppe Scrivano <giuseppe@scrivano.org>
|
Giuseppe Scrivano <giuseppe@scrivano.org>
|
||||||
GiviMAD <GiviMAD@users.noreply.github.com>
|
GiviMAD <GiviMAD@users.noreply.github.com>
|
||||||
Govlzkoy <gotope@users.noreply.github.com>
|
Govlzkoy <gotope@users.noreply.github.com>
|
||||||
Guillaume "Vermeille" Sanchez <Guillaume.V.Sanchez@gmail.com>
|
Guillaume "Vermeille" Sanchez <Guillaume.V.Sanchez@gmail.com>
|
||||||
Guillaume Wenzek <gwenzek@users.noreply.github.com>
|
Guillaume Wenzek <gwenzek@users.noreply.github.com>
|
||||||
Guoliang Hua <32868157+nbcsm@users.noreply.github.com>
|
|
||||||
Guoteng <32697156+SolenoidWGT@users.noreply.github.com>
|
Guoteng <32697156+SolenoidWGT@users.noreply.github.com>
|
||||||
Guspan Tanadi <36249910+guspan-tanadi@users.noreply.github.com>
|
|
||||||
Gustavo Rocha Dias <91472747+gustrd@users.noreply.github.com>
|
Gustavo Rocha Dias <91472747+gustrd@users.noreply.github.com>
|
||||||
Haggai Nuchi <h.nuchi@gmail.com>
|
Haggai Nuchi <h.nuchi@gmail.com>
|
||||||
Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
|
Halalaluyafail3 <55773281+Halalaluyafail3@users.noreply.github.com>
|
||||||
|
@ -274,47 +203,35 @@ Haoxiang Fei <tonyfettes@tonyfettes.com>
|
||||||
Harald Fernengel <harald.fernengel@here.com>
|
Harald Fernengel <harald.fernengel@here.com>
|
||||||
Hatsune Miku <129688334+at8u@users.noreply.github.com>
|
Hatsune Miku <129688334+at8u@users.noreply.github.com>
|
||||||
HatsuneMikuUwU33 <173229399+HatsuneMikuUwU33@users.noreply.github.com>
|
HatsuneMikuUwU33 <173229399+HatsuneMikuUwU33@users.noreply.github.com>
|
||||||
Haus1 <haus.xda@gmail.com>
|
|
||||||
Henk Poley <HenkPoley@gmail.com>
|
Henk Poley <HenkPoley@gmail.com>
|
||||||
Henri Vasserman <henv@hot.ee>
|
Henri Vasserman <henv@hot.ee>
|
||||||
Henrik Forstén <henrik.forsten@gmail.com>
|
Henrik Forstén <henrik.forsten@gmail.com>
|
||||||
Herman Semenov <GermanAizek@yandex.ru>
|
Herman Semenov <GermanAizek@yandex.ru>
|
||||||
Hesen Peng <hesen.peng@gmail.com>
|
Hesen Peng <hesen.peng@gmail.com>
|
||||||
HimariO <dsfhe49854@gmail.com>
|
|
||||||
Hoang Nguyen <hugo53@users.noreply.github.com>
|
Hoang Nguyen <hugo53@users.noreply.github.com>
|
||||||
Hong Bo PENG <penghb@cn.ibm.com>
|
Hong Bo PENG <penghb@cn.ibm.com>
|
||||||
Hongyu Ouyang <96765450+casavaca@users.noreply.github.com>
|
Hongyu Ouyang <96765450+casavaca@users.noreply.github.com>
|
||||||
Howard Su <howard0su@gmail.com>
|
Howard Su <howard0su@gmail.com>
|
||||||
Hua Jiang <allenhjiang@outlook.com>
|
Hua Jiang <allenhjiang@outlook.com>
|
||||||
Huang Qi <huangqi3@xiaomi.com>
|
|
||||||
Huawei Lin <huaweilin.cs@gmail.com>
|
Huawei Lin <huaweilin.cs@gmail.com>
|
||||||
Hugo Roussel <hugo.rous@gmail.com>
|
Hugo Roussel <hugo.rous@gmail.com>
|
||||||
Huifeng Ou <79071290+ho2103@users.noreply.github.com>
|
|
||||||
Ian Bull <irbull@eclipsesource.com>
|
Ian Bull <irbull@eclipsesource.com>
|
||||||
Ian Bull <irbull@gmail.com>
|
Ian Bull <irbull@gmail.com>
|
||||||
Ian Scrivener <github@zilogy.asia>
|
Ian Scrivener <github@zilogy.asia>
|
||||||
Icecream95 <the.real.icecream95@gmail.com>
|
|
||||||
Ido S <ido.pluto@gmail.com>
|
Ido S <ido.pluto@gmail.com>
|
||||||
IgnacioFDM <ignaciofdm@gmail.com>
|
IgnacioFDM <ignaciofdm@gmail.com>
|
||||||
Igor Okulist <okigan@gmail.com>
|
Igor Okulist <okigan@gmail.com>
|
||||||
Ihar Hrachyshka <ihrachys@redhat.com>
|
|
||||||
Ikko Eltociear Ashimine <eltociear@gmail.com>
|
Ikko Eltociear Ashimine <eltociear@gmail.com>
|
||||||
Ilya Kurdyukov <59548320+ilyakurdyukov@users.noreply.github.com>
|
Ilya Kurdyukov <59548320+ilyakurdyukov@users.noreply.github.com>
|
||||||
Ionoclast Laboratories <brigham@ionoclast.com>
|
Ionoclast Laboratories <brigham@ionoclast.com>
|
||||||
Isaac McFadyen <isaac@imcf.me>
|
Isaac McFadyen <isaac@imcf.me>
|
||||||
IsaacDynamo <61521674+IsaacDynamo@users.noreply.github.com>
|
IsaacDynamo <61521674+IsaacDynamo@users.noreply.github.com>
|
||||||
Ivan <nekotekina@gmail.com>
|
|
||||||
Ivan Filipov <159561759+vanaka11@users.noreply.github.com>
|
|
||||||
Ivan Komarov <Ivan.Komarov@dfyz.info>
|
Ivan Komarov <Ivan.Komarov@dfyz.info>
|
||||||
Ivan Stepanov <ivanstepanovftw@gmail.com>
|
Ivan Stepanov <ivanstepanovftw@gmail.com>
|
||||||
JFLFY2255 <JFLFY2255@163.com>
|
|
||||||
JH23X <165871467+JH23X@users.noreply.github.com>
|
JH23X <165871467+JH23X@users.noreply.github.com>
|
||||||
Jack Mousseau <jack@software.inc>
|
|
||||||
Jack Mousseau <jmousseau@users.noreply.github.com>
|
Jack Mousseau <jmousseau@users.noreply.github.com>
|
||||||
JackJollimore <130917767+JackJollimore@users.noreply.github.com>
|
JackJollimore <130917767+JackJollimore@users.noreply.github.com>
|
||||||
Jaeden Amero <jaeden@patater.com>
|
|
||||||
Jaemin Son <woalsdnd@gmail.com>
|
Jaemin Son <woalsdnd@gmail.com>
|
||||||
Jafar Uruç <jafar.uruc@gmail.com>
|
|
||||||
Jag Chadha <jagtesh@gmail.com>
|
Jag Chadha <jagtesh@gmail.com>
|
||||||
Jakub N <jakubniemczyk97@gmail.com>
|
Jakub N <jakubniemczyk97@gmail.com>
|
||||||
James A Capozzoli <157492257+jac-jim@users.noreply.github.com>
|
James A Capozzoli <157492257+jac-jim@users.noreply.github.com>
|
||||||
|
@ -326,16 +243,11 @@ Jannis Schönleber <joennlae@gmail.com>
|
||||||
Jared Van Bortel <cebtenzzre@gmail.com>
|
Jared Van Bortel <cebtenzzre@gmail.com>
|
||||||
Jared Van Bortel <jared@nomic.ai>
|
Jared Van Bortel <jared@nomic.ai>
|
||||||
Jason McCartney <jmac@theroot.org>
|
Jason McCartney <jmac@theroot.org>
|
||||||
Jason Stillerman <jason.t.stillerman@gmail.com>
|
|
||||||
Jean-Christophe Hoelt <hoelt@fovea.cc>
|
Jean-Christophe Hoelt <hoelt@fovea.cc>
|
||||||
Jean-Michaël Celerier <jeanmichael.celerier+github@gmail.com>
|
Jean-Michaël Celerier <jeanmichael.celerier+github@gmail.com>
|
||||||
Jed Fox <git@jedfox.com>
|
Jed Fox <git@jedfox.com>
|
||||||
Jeff Bolz <jbolz@nvidia.com>
|
|
||||||
Jeffrey Morgan <jmorganca@gmail.com>
|
|
||||||
Jeffrey Quesnelle <emozilla@nousresearch.com>
|
Jeffrey Quesnelle <emozilla@nousresearch.com>
|
||||||
Jeroen Mostert <jeroen.mostert@cm.com>
|
|
||||||
Jesse Jojo Johnson <williamsaintgeorge@gmail.com>
|
Jesse Jojo Johnson <williamsaintgeorge@gmail.com>
|
||||||
Jett Janiak <jettjaniak@gmail.com>
|
|
||||||
Jeximo <jeximo@gmail.com>
|
Jeximo <jeximo@gmail.com>
|
||||||
Jhen-Jie Hong <iainst0409@gmail.com>
|
Jhen-Jie Hong <iainst0409@gmail.com>
|
||||||
Jiahao Li <liplus17@163.com>
|
Jiahao Li <liplus17@163.com>
|
||||||
|
@ -346,9 +258,6 @@ Jiří Podivín <66251151+jpodivin@users.noreply.github.com>
|
||||||
Jiří Sejkora <Sejseloid@gmail.com>
|
Jiří Sejkora <Sejseloid@gmail.com>
|
||||||
Joan Fontanals <jfontanalsmartinez@gmail.com>
|
Joan Fontanals <jfontanalsmartinez@gmail.com>
|
||||||
Joan Fontanals <joan.fontanals.martinez@jina.ai>
|
Joan Fontanals <joan.fontanals.martinez@jina.ai>
|
||||||
João Dinis Ferreira <hello@joaof.eu>
|
|
||||||
Joe Eli McIlvain <joe.eli.mac@gmail.com>
|
|
||||||
Joe Todd <joe.todd@codeplay.com>
|
|
||||||
Johan <JohanAR@users.noreply.github.com>
|
Johan <JohanAR@users.noreply.github.com>
|
||||||
Johannes Gäßler <johannesg@5d6.de>
|
Johannes Gäßler <johannesg@5d6.de>
|
||||||
Johannes Rudolph <johannes.rudolph@gmail.com>
|
Johannes Rudolph <johannes.rudolph@gmail.com>
|
||||||
|
@ -364,11 +273,8 @@ Josh Ramer <josh.ramer@icloud.com>
|
||||||
Joyce <joycebrum@google.com>
|
Joyce <joycebrum@google.com>
|
||||||
Juan Calderon-Perez <835733+gaby@users.noreply.github.com>
|
Juan Calderon-Perez <835733+gaby@users.noreply.github.com>
|
||||||
Judd <foldl@users.noreply.github.com>
|
Judd <foldl@users.noreply.github.com>
|
||||||
Juk Armstrong <69222624+jukofyork@users.noreply.github.com>
|
|
||||||
Julius Arkenberg <arki05@users.noreply.github.com>
|
Julius Arkenberg <arki05@users.noreply.github.com>
|
||||||
Jun Hee Yoo <contact.jhyoo@gmail.com>
|
|
||||||
Jun Jie <71215065+junnjiee16@users.noreply.github.com>
|
Jun Jie <71215065+junnjiee16@users.noreply.github.com>
|
||||||
Junil Kim <logyourself@gmail.com>
|
|
||||||
Junyang Lin <justinlin930319@hotmail.com>
|
Junyang Lin <justinlin930319@hotmail.com>
|
||||||
Juraj Bednar <juraj@bednar.io>
|
Juraj Bednar <juraj@bednar.io>
|
||||||
Justin Parker <jparkerweb@gmail.com>
|
Justin Parker <jparkerweb@gmail.com>
|
||||||
|
@ -379,7 +285,6 @@ Justine Tunney <jtunney@mozilla.com>
|
||||||
Juuso Alasuutari <juuso.alasuutari@gmail.com>
|
Juuso Alasuutari <juuso.alasuutari@gmail.com>
|
||||||
KASR <karim.asrih@gmail.com>
|
KASR <karim.asrih@gmail.com>
|
||||||
Kamil Tomšík <info@tomsik.cz>
|
Kamil Tomšík <info@tomsik.cz>
|
||||||
Karol Kontny <82021046+kkontny@users.noreply.github.com>
|
|
||||||
Karsten Weiss <knweiss@gmail.com>
|
Karsten Weiss <knweiss@gmail.com>
|
||||||
Karthick <j.karthic2004@gmail.com>
|
Karthick <j.karthic2004@gmail.com>
|
||||||
Karthik Kumar Viswanathan <195178+guilt@users.noreply.github.com>
|
Karthik Kumar Viswanathan <195178+guilt@users.noreply.github.com>
|
||||||
|
@ -387,19 +292,16 @@ Karthik Sethuraman <k.seth1993@gmail.com>
|
||||||
Kasumi <90275229+kasumi-1@users.noreply.github.com>
|
Kasumi <90275229+kasumi-1@users.noreply.github.com>
|
||||||
Kawrakow <48489457+ikawrakow@users.noreply.github.com>
|
Kawrakow <48489457+ikawrakow@users.noreply.github.com>
|
||||||
Keiichi Tabata <keiichi.tabata@outlook.com>
|
Keiichi Tabata <keiichi.tabata@outlook.com>
|
||||||
Keke Han <hankeke303@163.com>
|
|
||||||
Kenvix ⭐ <kenvixzure@live.com>
|
Kenvix ⭐ <kenvixzure@live.com>
|
||||||
Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>
|
Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>
|
||||||
Kevin Gibbons <bakkot@gmail.com>
|
Kevin Gibbons <bakkot@gmail.com>
|
||||||
Kevin Ji <1146876+kevinji@users.noreply.github.com>
|
Kevin Ji <1146876+kevinji@users.noreply.github.com>
|
||||||
Kevin Kwok <antimatter15@gmail.com>
|
Kevin Kwok <antimatter15@gmail.com>
|
||||||
Kevin Lo <kevlo@kevlo.org>
|
Kevin Lo <kevlo@kevlo.org>
|
||||||
Kevin Wang <kevmo314@gmail.com>
|
|
||||||
Kolen Cheung <ickc@users.noreply.github.com>
|
Kolen Cheung <ickc@users.noreply.github.com>
|
||||||
Konstantin Herud <konstantin.herud@denkbares.com>
|
Konstantin Herud <konstantin.herud@denkbares.com>
|
||||||
Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com>
|
Konstantin Zhuravlyov <konstantin.zhuravlyov@amd.com>
|
||||||
Kunshang Ji <kunshang.ji@intel.com>
|
Kunshang Ji <kunshang.ji@intel.com>
|
||||||
Kyle Bruene <KyleBruene@users.noreply.github.com>
|
|
||||||
Kyle Liang <liangmanlai@gmail.com>
|
Kyle Liang <liangmanlai@gmail.com>
|
||||||
Kyle Mistele <kyle@mistele.com>
|
Kyle Mistele <kyle@mistele.com>
|
||||||
Kylin <56434533+KyL0N@users.noreply.github.com>
|
Kylin <56434533+KyL0N@users.noreply.github.com>
|
||||||
|
@ -413,30 +315,22 @@ LeonEricsson <70749762+LeonEricsson@users.noreply.github.com>
|
||||||
Leonardo Neumann <leonardo@neumann.dev.br>
|
Leonardo Neumann <leonardo@neumann.dev.br>
|
||||||
Li Tan <tanliboy@gmail.com>
|
Li Tan <tanliboy@gmail.com>
|
||||||
Linwei Wang <wanix1988@gmail.com>
|
Linwei Wang <wanix1988@gmail.com>
|
||||||
Liu Jia <109258120+Septa2112@users.noreply.github.com>
|
|
||||||
Liu Jia <jia3.liu@intel.com>
|
|
||||||
LoganDark <github@logandark.mozmail.com>
|
LoganDark <github@logandark.mozmail.com>
|
||||||
Loïc Carrère <loic.carrere@gmail.com>
|
|
||||||
LostRuins <39025047+LostRuins@users.noreply.github.com>
|
LostRuins <39025047+LostRuins@users.noreply.github.com>
|
||||||
LostRuins Concedo <39025047+LostRuins@users.noreply.github.com>
|
|
||||||
Luciano <lucianostrika44@gmail.com>
|
Luciano <lucianostrika44@gmail.com>
|
||||||
Luo Tian <lt@basecity.com>
|
Luo Tian <lt@basecity.com>
|
||||||
Lyle Dean <dean@lyle.dev>
|
Lyle Dean <dean@lyle.dev>
|
||||||
M-A <maruel@gmail.com>
|
|
||||||
M. Yusuf Sarıgöz <yusufsarigoz@gmail.com>
|
M. Yusuf Sarıgöz <yusufsarigoz@gmail.com>
|
||||||
Ma Mingfei <mingfei.ma@intel.com>
|
|
||||||
Maarten ter Huurne <maarten@treewalker.org>
|
Maarten ter Huurne <maarten@treewalker.org>
|
||||||
Mack Straight <eiz@users.noreply.github.com>
|
Mack Straight <eiz@users.noreply.github.com>
|
||||||
Maël Kerbiriou <m431.kerbiriou@gmail.com>
|
Maël Kerbiriou <m431.kerbiriou@gmail.com>
|
||||||
MaggotHATE <clay1326@gmail.com>
|
MaggotHATE <clay1326@gmail.com>
|
||||||
Mahesh Madhav <67384846+heshpdx@users.noreply.github.com>
|
|
||||||
Manuel <44313466+makuche@users.noreply.github.com>
|
Manuel <44313466+makuche@users.noreply.github.com>
|
||||||
Marc Köhlbrugge <subscriptions@marckohlbrugge.com>
|
Marc Köhlbrugge <subscriptions@marckohlbrugge.com>
|
||||||
Marco Matthies <71844+marcom@users.noreply.github.com>
|
Marco Matthies <71844+marcom@users.noreply.github.com>
|
||||||
Marcus Dunn <51931484+MarcusDunn@users.noreply.github.com>
|
Marcus Dunn <51931484+MarcusDunn@users.noreply.github.com>
|
||||||
Marian Cepok <marian.cepok@gmail.com>
|
Marian Cepok <marian.cepok@gmail.com>
|
||||||
Mark Fairbairn <thebaron88@gmail.com>
|
Mark Fairbairn <thebaron88@gmail.com>
|
||||||
Mark Zhuang <zhuangqiubin@gmail.com>
|
|
||||||
Marko Tasic <mtasic85@gmail.com>
|
Marko Tasic <mtasic85@gmail.com>
|
||||||
Markus Tavenrath <mtavenrath@users.noreply.github.com>
|
Markus Tavenrath <mtavenrath@users.noreply.github.com>
|
||||||
Martin Delille <martin@delille.org>
|
Martin Delille <martin@delille.org>
|
||||||
|
@ -448,16 +342,11 @@ MasterYi1024 <39848311+MasterYi1024@users.noreply.github.com>
|
||||||
Mateusz Charytoniuk <mateusz.charytoniuk@protonmail.com>
|
Mateusz Charytoniuk <mateusz.charytoniuk@protonmail.com>
|
||||||
Matheus C. França <matheus-catarino@hotmail.com>
|
Matheus C. França <matheus-catarino@hotmail.com>
|
||||||
Matheus Gabriel Alves Silva <matheusgasource@gmail.com>
|
Matheus Gabriel Alves Silva <matheusgasource@gmail.com>
|
||||||
Mathieu Baudier <mbaudier@argeo.org>
|
|
||||||
Mathieu Geli <mathieu.geli@gmail.com>
|
|
||||||
Mathieu Nayrolles <MathieuNls@users.noreply.github.com>
|
Mathieu Nayrolles <MathieuNls@users.noreply.github.com>
|
||||||
Mathijs Henquet <mathijs.henquet@gmail.com>
|
|
||||||
Mathijs de Bruin <mathijs@mathijsfietst.nl>
|
Mathijs de Bruin <mathijs@mathijsfietst.nl>
|
||||||
Matt Clayton <156335168+mattjcly@users.noreply.github.com>
|
Matt Clayton <156335168+mattjcly@users.noreply.github.com>
|
||||||
Matt Pulver <matt.pulver@heavy.ai>
|
Matt Pulver <matt.pulver@heavy.ai>
|
||||||
Matt Stephenson <mstephenson6@users.noreply.github.com>
|
|
||||||
Matteo Boschini <12133566+mbosc@users.noreply.github.com>
|
Matteo Boschini <12133566+mbosc@users.noreply.github.com>
|
||||||
Matteo Mortari <matteo.mortari@gmail.com>
|
|
||||||
Mattheus Chediak <shammcity00@gmail.com>
|
Mattheus Chediak <shammcity00@gmail.com>
|
||||||
Matthew Tejo <matthew.tejo@gmail.com>
|
Matthew Tejo <matthew.tejo@gmail.com>
|
||||||
Matvey Soloviev <blackhole89@gmail.com>
|
Matvey Soloviev <blackhole89@gmail.com>
|
||||||
|
@ -467,11 +356,8 @@ Maxime <672982+maximegmd@users.noreply.github.com>
|
||||||
Maximilian Winter <maximilian.winter.91@gmail.com>
|
Maximilian Winter <maximilian.winter.91@gmail.com>
|
||||||
Meng Zhang <meng@tabbyml.com>
|
Meng Zhang <meng@tabbyml.com>
|
||||||
Meng, Hengyu <hengyu.meng@intel.com>
|
Meng, Hengyu <hengyu.meng@intel.com>
|
||||||
Mengqing Cao <cmq0113@163.com>
|
|
||||||
Merrick Christensen <merrick.christensen@gmail.com>
|
Merrick Christensen <merrick.christensen@gmail.com>
|
||||||
Michael Coppola <m18coppola@gmail.com>
|
Michael Coppola <m18coppola@gmail.com>
|
||||||
Michael Engel <mengel@redhat.com>
|
|
||||||
Michael Francis <edude03@gmail.com>
|
|
||||||
Michael Hueschen <m@mhueschen.dev>
|
Michael Hueschen <m@mhueschen.dev>
|
||||||
Michael Kesper <mkesper@schokokeks.org>
|
Michael Kesper <mkesper@schokokeks.org>
|
||||||
Michael Klimenko <mklimenko29@gmail.com>
|
Michael Klimenko <mklimenko29@gmail.com>
|
||||||
|
@ -479,81 +365,52 @@ Michael Podvitskiy <podvitskiymichael@gmail.com>
|
||||||
Michael Potter <NanoTekGuy@Gmail.com>
|
Michael Potter <NanoTekGuy@Gmail.com>
|
||||||
Michael de Gans <michael.john.degans@gmail.com>
|
Michael de Gans <michael.john.degans@gmail.com>
|
||||||
Michaël de Vries <vriesdemichael@gmail.com>
|
Michaël de Vries <vriesdemichael@gmail.com>
|
||||||
Michał Moskal <michal@moskal.me>
|
|
||||||
Michał Tuszyński <srgtuszy@gmail.com>
|
|
||||||
Michelle Tan <41475767+MichelleTanPY@users.noreply.github.com>
|
|
||||||
Mihai <mihai.chirculescu@yahoo.com>
|
Mihai <mihai.chirculescu@yahoo.com>
|
||||||
Mike <ytianhui2004@gmail.com>
|
Mike <ytianhui2004@gmail.com>
|
||||||
Mikko Juola <mikjuo@gmail.com>
|
Mikko Juola <mikjuo@gmail.com>
|
||||||
Minsoo Cheong <54794500+mscheong01@users.noreply.github.com>
|
Minsoo Cheong <54794500+mscheong01@users.noreply.github.com>
|
||||||
Minsoo Cheong <icycle0409@snu.ac.kr>
|
|
||||||
Mirko185 <mirkosig@gmail.com>
|
Mirko185 <mirkosig@gmail.com>
|
||||||
Mirror Azure <54669636+MirrorAzure@users.noreply.github.com>
|
Mirror Azure <54669636+MirrorAzure@users.noreply.github.com>
|
||||||
MistApproach <98988043+MistApproach@users.noreply.github.com>
|
|
||||||
Miwa / Ensan <63481257+ensan-hcl@users.noreply.github.com>
|
Miwa / Ensan <63481257+ensan-hcl@users.noreply.github.com>
|
||||||
Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
|
Mohammadreza Hendiani <hendiani.mohammadreza@gmail.com>
|
||||||
Mohammadreza Hendiani <mohammad.r.hendiani@gmail.com>
|
Mohammadreza Hendiani <mohammad.r.hendiani@gmail.com>
|
||||||
Molly Sophia <mollysophia379@gmail.com>
|
|
||||||
MorganRO8 <47795945+MorganRO8@users.noreply.github.com>
|
|
||||||
Murilo Santana <mvrilo@gmail.com>
|
Murilo Santana <mvrilo@gmail.com>
|
||||||
Musab Gultekin <musabgultekin@users.noreply.github.com>
|
Musab Gultekin <musabgultekin@users.noreply.github.com>
|
||||||
Nam D. Tran <42194884+namtranase@users.noreply.github.com>
|
Nam D. Tran <42194884+namtranase@users.noreply.github.com>
|
||||||
Nathan Epstein <nate2@umbc.edu>
|
Nathan Epstein <nate2@umbc.edu>
|
||||||
Natsu <chino@hotococoa.moe>
|
|
||||||
NawafAlansari <72708095+NawafAlansari@users.noreply.github.com>
|
NawafAlansari <72708095+NawafAlansari@users.noreply.github.com>
|
||||||
Nebula <infinitewormhole@gmail.com>
|
Nebula <infinitewormhole@gmail.com>
|
||||||
Neo Zhang <14088817+arthw@users.noreply.github.com>
|
Neo Zhang <14088817+arthw@users.noreply.github.com>
|
||||||
Neo Zhang <zhang.jianyu@outlook.com>
|
Neo Zhang <zhang.jianyu@outlook.com>
|
||||||
Neo Zhang Jianyu <jianyu.zhang@intel.com>
|
Neo Zhang Jianyu <jianyu.zhang@intel.com>
|
||||||
Neuman Vong <neuman.vong@gmail.com>
|
Neuman Vong <neuman.vong@gmail.com>
|
||||||
NeverLucky <92274250+nvrxq@users.noreply.github.com>
|
|
||||||
Nexes the Old <124105151+Nexesenex@users.noreply.github.com>
|
|
||||||
Nexesenex <124105151+Nexesenex@users.noreply.github.com>
|
Nexesenex <124105151+Nexesenex@users.noreply.github.com>
|
||||||
Niall Coates <1349685+Niall-@users.noreply.github.com>
|
Niall Coates <1349685+Niall-@users.noreply.github.com>
|
||||||
Nicholai Tukanov <nicholaitukanov@gmail.com>
|
|
||||||
Nico Bosshard <nico@bosshome.ch>
|
|
||||||
Nicolai Weitkemper <kontakt@nicolaiweitkemper.de>
|
Nicolai Weitkemper <kontakt@nicolaiweitkemper.de>
|
||||||
Nicolás Pérez <nicolas_perez@brown.edu>
|
Nicolás Pérez <nicolas_perez@brown.edu>
|
||||||
Nicolò Scipione <nicolo.scipione@codeplay.com>
|
|
||||||
Nigel Bosch <pnigelb@gmail.com>
|
Nigel Bosch <pnigelb@gmail.com>
|
||||||
Nikita Sarychev <42014488+sARY77@users.noreply.github.com>
|
|
||||||
Niklas Korz <niklas@niklaskorz.de>
|
Niklas Korz <niklas@niklaskorz.de>
|
||||||
NikolaiLyssogor <59844691+NikolaiLyssogor@users.noreply.github.com>
|
|
||||||
Nikolaos Pothitos <pothitos@di.uoa.gr>
|
|
||||||
Nikolas <127742645+nneubacher@users.noreply.github.com>
|
Nikolas <127742645+nneubacher@users.noreply.github.com>
|
||||||
Nindaleth <Nindaleth@users.noreply.github.com>
|
Nindaleth <Nindaleth@users.noreply.github.com>
|
||||||
Nuno <rare-magma@posteo.eu>
|
|
||||||
OSecret <135510162+OLSecret@users.noreply.github.com>
|
|
||||||
Oleksandr Nikitin <oleksandr@tvori.info>
|
Oleksandr Nikitin <oleksandr@tvori.info>
|
||||||
Oleksii Maryshchenko <oleksii.maryshchenko@gmail.com>
|
Oleksii Maryshchenko <oleksii.maryshchenko@gmail.com>
|
||||||
Olivier Chafik <ochafik@users.noreply.github.com>
|
Olivier Chafik <ochafik@users.noreply.github.com>
|
||||||
Ondřej Čertík <ondrej@certik.us>
|
Ondřej Čertík <ondrej@certik.us>
|
||||||
Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
|
Ouadie EL FAROUKI <ouadie.elfarouki@codeplay.com>
|
||||||
PAB <pierreantoine.bannier@gmail.com>
|
|
||||||
Pablo Duboue <pablo.duboue@gmail.com>
|
|
||||||
Pascal Patry <ppatry@mtacitlabs.com>
|
|
||||||
Patrice Ferlet <metal3d@gmail.com>
|
Patrice Ferlet <metal3d@gmail.com>
|
||||||
Paul Tsochantaris <ptsochantaris@icloud.com>
|
Paul Tsochantaris <ptsochantaris@icloud.com>
|
||||||
Pavel Zloi <github.com@drteam.rocks>
|
|
||||||
Pavol Rusnak <pavol@rusnak.io>
|
Pavol Rusnak <pavol@rusnak.io>
|
||||||
Paweł Wodnicki <151604+32bitmicro@users.noreply.github.com>
|
|
||||||
Pedro Cuenca <pedro@huggingface.co>
|
Pedro Cuenca <pedro@huggingface.co>
|
||||||
Peter <peter277@users.noreply.github.com>
|
|
||||||
Peter Sugihara <peter@campsh.com>
|
Peter Sugihara <peter@campsh.com>
|
||||||
Phil H <5756783+phiharri@users.noreply.github.com>
|
Phil H <5756783+phiharri@users.noreply.github.com>
|
||||||
Philip Taron <philip.taron@gmail.com>
|
Philip Taron <philip.taron@gmail.com>
|
||||||
Phillip Kravtsov <phillip@kravtsov.net>
|
Phillip Kravtsov <phillip@kravtsov.net>
|
||||||
Pierre Alexandre SCHEMBRI <pa.schembri@gmail.com>
|
Pierre Alexandre SCHEMBRI <pa.schembri@gmail.com>
|
||||||
Pierrick Hymbert <pierrick.hymbert@gmail.com>
|
Pierrick Hymbert <pierrick.hymbert@gmail.com>
|
||||||
Pieter Ouwerkerk <pieter.ouwerkerk@gmail.com>
|
|
||||||
Plamen Minev <pacominev@gmail.com>
|
|
||||||
Prashant Vithule <119530321+Vithulep@users.noreply.github.com>
|
|
||||||
Przemysław Pawełczyk <przemoc@gmail.com>
|
Przemysław Pawełczyk <przemoc@gmail.com>
|
||||||
Qin Yue Chen <71813199+chenqiny@users.noreply.github.com>
|
Qin Yue Chen <71813199+chenqiny@users.noreply.github.com>
|
||||||
Qingyou Meng <meng.qingyou@gmail.com>
|
Qingyou Meng <meng.qingyou@gmail.com>
|
||||||
Qu Zongfu <43257352+yancaoweidaode@users.noreply.github.com>
|
Qu Zongfu <43257352+yancaoweidaode@users.noreply.github.com>
|
||||||
R0CKSTAR <xiaodong.ye@mthreads.com>
|
|
||||||
R0CKSTAR <yeahdongcn@gmail.com>
|
|
||||||
RJ Adriaansen <adriaansen@eshcc.eur.nl>
|
RJ Adriaansen <adriaansen@eshcc.eur.nl>
|
||||||
Radoslav Gerganov <rgerganov@gmail.com>
|
Radoslav Gerganov <rgerganov@gmail.com>
|
||||||
Radosław Gryta <radek.gryta@gmail.com>
|
Radosław Gryta <radek.gryta@gmail.com>
|
||||||
|
@ -562,16 +419,11 @@ Raj Hammeer Singh Hada <hammeerraj@gmail.com>
|
||||||
Ralph Soika <ralph.soika@imixs.com>
|
Ralph Soika <ralph.soika@imixs.com>
|
||||||
Rand Xie <randxiexyy29@gmail.com>
|
Rand Xie <randxiexyy29@gmail.com>
|
||||||
Randall Fitzgerald <randall@dasaku.net>
|
Randall Fitzgerald <randall@dasaku.net>
|
||||||
Random Fly <renfei8@live.cn>
|
|
||||||
Reinforce-II <fate@eastal.com>
|
Reinforce-II <fate@eastal.com>
|
||||||
Rémy Oudompheng <oudomphe@phare.normalesup.org>
|
|
||||||
Ren Xuancheng <jklj077@users.noreply.github.com>
|
Ren Xuancheng <jklj077@users.noreply.github.com>
|
||||||
Rene Leonhardt <65483435+reneleonhardt@users.noreply.github.com>
|
Rene Leonhardt <65483435+reneleonhardt@users.noreply.github.com>
|
||||||
Reza Kakhki <rezakakhki.de@gmail.com>
|
|
||||||
RhinoDevel <RhinoDevel@users.noreply.github.com>
|
RhinoDevel <RhinoDevel@users.noreply.github.com>
|
||||||
Riccardo Orlando <Riccorl@users.noreply.github.com>
|
|
||||||
Riceball LEE <snowyu.lee@gmail.com>
|
Riceball LEE <snowyu.lee@gmail.com>
|
||||||
Rich Dougherty <rich@rd.nz>
|
|
||||||
Richard Kiss <him@richardkiss.com>
|
Richard Kiss <him@richardkiss.com>
|
||||||
Richard Roberson <richardr1126@gmail.com>
|
Richard Roberson <richardr1126@gmail.com>
|
||||||
Rick G <26732651+TheFlipbook@users.noreply.github.com>
|
Rick G <26732651+TheFlipbook@users.noreply.github.com>
|
||||||
|
@ -582,39 +434,26 @@ Riley Stewart <ristew@users.noreply.github.com>
|
||||||
Rinne <AsakusaRinne@gmail.com>
|
Rinne <AsakusaRinne@gmail.com>
|
||||||
Rinne <liu_yaohui1998@126.com>
|
Rinne <liu_yaohui1998@126.com>
|
||||||
Robert Brisita <986796+rbrisita@users.noreply.github.com>
|
Robert Brisita <986796+rbrisita@users.noreply.github.com>
|
||||||
Robert Collins <roberto.tomas.cuentas@gmail.com>
|
|
||||||
Robert Ormandi <52251610+ormandi@users.noreply.github.com>
|
|
||||||
Robert Sung-wook Shin <edp1096@users.noreply.github.com>
|
Robert Sung-wook Shin <edp1096@users.noreply.github.com>
|
||||||
Robey Holderith <robey@flaminglunchbox.net>
|
Robey Holderith <robey@flaminglunchbox.net>
|
||||||
Robyn <robyngraf@users.noreply.github.com>
|
Robyn <robyngraf@users.noreply.github.com>
|
||||||
Roger Meier <r.meier@siemens.com>
|
Roger Meier <r.meier@siemens.com>
|
||||||
Roland <14355895+rbur0425@users.noreply.github.com>
|
Roland <14355895+rbur0425@users.noreply.github.com>
|
||||||
Romain Biessy <romain.biessy@codeplay.com>
|
|
||||||
Romain D <90720+Artefact2@users.noreply.github.com>
|
Romain D <90720+Artefact2@users.noreply.github.com>
|
||||||
Romain Neutron <romain@neutron.io>
|
Romain Neutron <romain@neutron.io>
|
||||||
Roman Parykin <donderom@gmail.com>
|
Roman Parykin <donderom@gmail.com>
|
||||||
Ron Evans <ron@hybridgroup.com>
|
Ron Evans <ron@hybridgroup.com>
|
||||||
Ron Jailall <rojailal@gmail.com>
|
Ron Jailall <rojailal@gmail.com>
|
||||||
Roni <sulpher@gmx.net>
|
|
||||||
Ronny Brendel <ronnybrendel@gmail.com>
|
Ronny Brendel <ronnybrendel@gmail.com>
|
||||||
Ronsor <ronsor@ronsor.pw>
|
Ronsor <ronsor@ronsor.pw>
|
||||||
Rowan Hart <rowanbhart@gmail.com>
|
Rowan Hart <rowanbhart@gmail.com>
|
||||||
Ruan <47767371+ruanych@users.noreply.github.com>
|
|
||||||
Ruchira Hasaranga <ruchira66@gmail.com>
|
|
||||||
Rudi Servo <rudiservo@gmail.com>
|
|
||||||
Ruixin Huang <18860020911@163.com>
|
|
||||||
Rune <43761327+Rune-AI@users.noreply.github.com>
|
Rune <43761327+Rune-AI@users.noreply.github.com>
|
||||||
RunningLeon <maningsheng@sensetime.com>
|
|
||||||
RunningLeon <mnsheng@yeah.net>
|
|
||||||
Ryan Landay <rlanday@gmail.com>
|
Ryan Landay <rlanday@gmail.com>
|
||||||
Ryder Wishart <ryderwishart@gmail.com>
|
Ryder Wishart <ryderwishart@gmail.com>
|
||||||
Ryuei <louixs@users.noreply.github.com>
|
Ryuei <louixs@users.noreply.github.com>
|
||||||
Rőczey Barnabás <31726601+An0nie@users.noreply.github.com>
|
Rőczey Barnabás <31726601+An0nie@users.noreply.github.com>
|
||||||
SRHMorris <69468379+SRHMorris@users.noreply.github.com>
|
|
||||||
SXX <sxx1136965276@gmail.com>
|
|
||||||
SakuraUmi <yukinon244@gmail.com>
|
SakuraUmi <yukinon244@gmail.com>
|
||||||
Salvador E. Tropea <stropea@inti.gob.ar>
|
Salvador E. Tropea <stropea@inti.gob.ar>
|
||||||
Salvatore Mesoraca <s.mesoraca16@gmail.com>
|
|
||||||
Sam Spilsbury <smspillaz@gmail.com>
|
Sam Spilsbury <smspillaz@gmail.com>
|
||||||
Sami Farin <3876865+Safari77@users.noreply.github.com>
|
Sami Farin <3876865+Safari77@users.noreply.github.com>
|
||||||
Samuel Maynard <samwmaynard@gmail.com>
|
Samuel Maynard <samwmaynard@gmail.com>
|
||||||
|
@ -624,29 +463,23 @@ Sebastián A <sebastian.aedo29@gmail.com>
|
||||||
SebastianApel <13675545+SebastianApel@users.noreply.github.com>
|
SebastianApel <13675545+SebastianApel@users.noreply.github.com>
|
||||||
Senemu <10880819+Senemu@users.noreply.github.com>
|
Senemu <10880819+Senemu@users.noreply.github.com>
|
||||||
Sergey Alirzaev <zl29ah@gmail.com>
|
Sergey Alirzaev <zl29ah@gmail.com>
|
||||||
Sergio López <slp@redhat.com>
|
|
||||||
Sergio López <slp@sinrega.org>
|
Sergio López <slp@sinrega.org>
|
||||||
Sertaç Özercan <852750+sozercan@users.noreply.github.com>
|
Sertaç Özercan <852750+sozercan@users.noreply.github.com>
|
||||||
SeungWon Jeong <65549245+redlion0929@users.noreply.github.com>
|
SeungWon Jeong <65549245+redlion0929@users.noreply.github.com>
|
||||||
ShadovvBeast <ShadovvBeast@gmail.com>
|
ShadovvBeast <ShadovvBeast@gmail.com>
|
||||||
Shakhar Dasgupta <shakhardasgupta@gmail.com>
|
Shakhar Dasgupta <shakhardasgupta@gmail.com>
|
||||||
Shane A <shanea@allenai.org>
|
|
||||||
Shangning Xu <32517059+xushangning@users.noreply.github.com>
|
Shangning Xu <32517059+xushangning@users.noreply.github.com>
|
||||||
Shankar <gshankar.87@gmail.com>
|
|
||||||
Shanshan Shen <467638484@qq.com>
|
|
||||||
Shijie <821898965@qq.com>
|
Shijie <821898965@qq.com>
|
||||||
Shintarou Okada <kokuzen@gmail.com>
|
Shintarou Okada <kokuzen@gmail.com>
|
||||||
Shouzheng Liu <61452103+lshzh-ww@users.noreply.github.com>
|
Shouzheng Liu <61452103+lshzh-ww@users.noreply.github.com>
|
||||||
Shouzheng Liu <lshzh.hi@gmail.com>
|
Shouzheng Liu <lshzh.hi@gmail.com>
|
||||||
Shuichi Tsutsumi <shuichi0526@gmail.com>
|
Shuichi Tsutsumi <shuichi0526@gmail.com>
|
||||||
Shupei Fan <dymarkfan@outlook.com>
|
|
||||||
Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
|
Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
|
||||||
Simon Willison <swillison@gmail.com>
|
Simon Willison <swillison@gmail.com>
|
||||||
Siwen Yu <yusiwen@gmail.com>
|
Siwen Yu <yusiwen@gmail.com>
|
||||||
Sky Yan <skyan83@gmail.com>
|
Sky Yan <skyan83@gmail.com>
|
||||||
Slaren <2141330+slaren@users.noreply.github.com>
|
Slaren <2141330+slaren@users.noreply.github.com>
|
||||||
Slava Primenko <primenko.s@gmail.com>
|
Slava Primenko <primenko.s@gmail.com>
|
||||||
Small Grass Forest <zixuanxcl@gmail.com>
|
|
||||||
SoftwareRenderer <138734813+SoftwareRenderer@users.noreply.github.com>
|
SoftwareRenderer <138734813+SoftwareRenderer@users.noreply.github.com>
|
||||||
Someone <sergei.kozlukov@aalto.fi>
|
Someone <sergei.kozlukov@aalto.fi>
|
||||||
Someone Serge <sergei.kozlukov@aalto.fi>
|
Someone Serge <sergei.kozlukov@aalto.fi>
|
||||||
|
@ -658,33 +491,25 @@ Stefan Sydow <stefan@sydow.email>
|
||||||
Steffen Röcker <sroecker@gmail.com>
|
Steffen Röcker <sroecker@gmail.com>
|
||||||
Stephan Walter <stephan@walter.name>
|
Stephan Walter <stephan@walter.name>
|
||||||
Stephen Nichols <snichols@users.noreply.github.com>
|
Stephen Nichols <snichols@users.noreply.github.com>
|
||||||
Steve Bonds <sbonds@gmail.com>
|
|
||||||
Steve Grubb <ausearch.1@gmail.com>
|
Steve Grubb <ausearch.1@gmail.com>
|
||||||
Steven Prichard <spprichard20@gmail.com>
|
Steven Prichard <spprichard20@gmail.com>
|
||||||
Steven Roussey <sroussey@gmail.com>
|
Steven Roussey <sroussey@gmail.com>
|
||||||
Steward Garcia <57494570+FSSRepo@users.noreply.github.com>
|
Steward Garcia <57494570+FSSRepo@users.noreply.github.com>
|
||||||
StrangeBytesDev <141275258+StrangeBytesDev@users.noreply.github.com>
|
|
||||||
Suaj Carrot <72162667+SuajCarrot@users.noreply.github.com>
|
Suaj Carrot <72162667+SuajCarrot@users.noreply.github.com>
|
||||||
Sukriti Sharma <Ssukriti@users.noreply.github.com>
|
|
||||||
SuperUserNameMan <yoann@terminajones.com>
|
SuperUserNameMan <yoann@terminajones.com>
|
||||||
Sutou Kouhei <kou@cozmixng.org>
|
|
||||||
Tai Duc Nguyen <taiducnguyen.drexel@gmail.com>
|
Tai Duc Nguyen <taiducnguyen.drexel@gmail.com>
|
||||||
Taikono-Himazin <kazu@po.harenet.ne.jp>
|
Taikono-Himazin <kazu@po.harenet.ne.jp>
|
||||||
Tameem <113388789+AhmadTameem@users.noreply.github.com>
|
Tameem <113388789+AhmadTameem@users.noreply.github.com>
|
||||||
Tamotsu Takahashi <ttakah+github@gmail.com>
|
Tamotsu Takahashi <ttakah+github@gmail.com>
|
||||||
Tei Home <taiteitonghome@proton.me>
|
|
||||||
Thái Hoàng Tâm <75922889+RoyalHeart@users.noreply.github.com>
|
Thái Hoàng Tâm <75922889+RoyalHeart@users.noreply.github.com>
|
||||||
Thatcher Chamberlin <j.thatcher.c@gmail.com>
|
Thatcher Chamberlin <j.thatcher.c@gmail.com>
|
||||||
Theia Vogel <theia@vgel.me>
|
Theia Vogel <theia@vgel.me>
|
||||||
Thérence <13496987+Royalphax@users.noreply.github.com>
|
Thérence <13496987+Royalphax@users.noreply.github.com>
|
||||||
Thibault Terrasson <thibault.terrasson@gmail.com>
|
Thibault Terrasson <thibault.terrasson@gmail.com>
|
||||||
Thomas Klausner <wiz@gatalith.at>
|
Thomas Klausner <wiz@gatalith.at>
|
||||||
Thorsten Sommer <SommerEngineering@users.noreply.github.com>
|
|
||||||
Tim Miller <drasticactions@users.noreply.github.com>
|
Tim Miller <drasticactions@users.noreply.github.com>
|
||||||
Tim Wang <overocean@gmail.com>
|
|
||||||
Timmy Knight <r2d2fish@gmail.com>
|
Timmy Knight <r2d2fish@gmail.com>
|
||||||
Timothy Cronin <40186632+4imothy@users.noreply.github.com>
|
Timothy Cronin <40186632+4imothy@users.noreply.github.com>
|
||||||
Ting Lou <louting@189.cn>
|
|
||||||
Ting Lou <ting.lou@gmail.com>
|
Ting Lou <ting.lou@gmail.com>
|
||||||
Ting Sun <suntcrick@gmail.com>
|
Ting Sun <suntcrick@gmail.com>
|
||||||
Tobias Lütke <tobi@shopify.com>
|
Tobias Lütke <tobi@shopify.com>
|
||||||
|
@ -692,44 +517,32 @@ Tom C <tom.corelis@gmail.com>
|
||||||
Tom Jobbins <784313+TheBloke@users.noreply.github.com>
|
Tom Jobbins <784313+TheBloke@users.noreply.github.com>
|
||||||
Tomas <tom.tomas.36478119@gmail.com>
|
Tomas <tom.tomas.36478119@gmail.com>
|
||||||
Tomáš Pazdiora <tomas.pazdiora@gmail.com>
|
Tomáš Pazdiora <tomas.pazdiora@gmail.com>
|
||||||
Tony Wasserka <4840017+neobrain@users.noreply.github.com>
|
|
||||||
Tristan Druyen <tristan@vault81.mozmail.com>
|
Tristan Druyen <tristan@vault81.mozmail.com>
|
||||||
Tristan Ross <rosscomputerguy@protonmail.com>
|
Tristan Ross <rosscomputerguy@protonmail.com>
|
||||||
Trivikram Kamat <16024985+trivikr@users.noreply.github.com>
|
|
||||||
Tungsten842 <886724vf@anonaddy.me>
|
Tungsten842 <886724vf@anonaddy.me>
|
||||||
Tungsten842 <quantmint@protonmail.com>
|
Tungsten842 <quantmint@protonmail.com>
|
||||||
Tushar <ditsuke@protonmail.com>
|
Tushar <ditsuke@protonmail.com>
|
||||||
UEXTM.com <84163508+uextm@users.noreply.github.com>
|
UEXTM.com <84163508+uextm@users.noreply.github.com>
|
||||||
Ujjawal Panchal <31011628+Ujjawal-K-Panchal@users.noreply.github.com>
|
|
||||||
Ulrich Drepper <drepper@gmail.com>
|
Ulrich Drepper <drepper@gmail.com>
|
||||||
Uzo Nweke <uzoechi@gmail.com>
|
Uzo Nweke <uzoechi@gmail.com>
|
||||||
Vaibhav Srivastav <vaibhavs10@gmail.com>
|
Vaibhav Srivastav <vaibhavs10@gmail.com>
|
||||||
Val Kharitonov <mail@kharvd.com>
|
Val Kharitonov <mail@kharvd.com>
|
||||||
Valentin Konovalov <valle.ketsujin@gmail.com>
|
Valentin Konovalov <valle.ketsujin@gmail.com>
|
||||||
Valentin Mamedov <45292985+Inf1delis@users.noreply.github.com>
|
|
||||||
Valentyn Bezshapkin <61702053+valentynbez@users.noreply.github.com>
|
Valentyn Bezshapkin <61702053+valentynbez@users.noreply.github.com>
|
||||||
Vali Malinoiu <0x4139@gmail.com>
|
|
||||||
Victor Nogueira <felladrin@gmail.com>
|
Victor Nogueira <felladrin@gmail.com>
|
||||||
Victor Z. Peng <ziliangdotme@gmail.com>
|
Victor Z. Peng <ziliangdotme@gmail.com>
|
||||||
Viet-Anh NGUYEN (Andrew) <vietanh.dev@gmail.com>
|
|
||||||
Vinesh Janarthanan <36610342+VJHack@users.noreply.github.com>
|
|
||||||
Vlad <spitfireage@gmail.com>
|
Vlad <spitfireage@gmail.com>
|
||||||
Vladimir <bogdad@gmail.com>
|
Vladimir <bogdad@gmail.com>
|
||||||
Vladimir Malyutin <first-leon@yandex.ru>
|
Vladimir Malyutin <first-leon@yandex.ru>
|
||||||
Vladimir Zorin <vladimir@deviant.guru>
|
Vladimir Zorin <vladimir@deviant.guru>
|
||||||
VoidIsVoid <343750470@qq.com>
|
|
||||||
Volodymyr Vitvitskyi <72226+signalpillar@users.noreply.github.com>
|
Volodymyr Vitvitskyi <72226+signalpillar@users.noreply.github.com>
|
||||||
Wang Qin <37098874+wangqin0@users.noreply.github.com>
|
|
||||||
Wang Ran (汪然) <wangr@smail.nju.edu.cn>
|
|
||||||
WangHaoranRobin <56047610+WangHaoranRobin@users.noreply.github.com>
|
WangHaoranRobin <56047610+WangHaoranRobin@users.noreply.github.com>
|
||||||
Weird Constructor <weirdconstructor@gmail.com>
|
Weird Constructor <weirdconstructor@gmail.com>
|
||||||
Welby Seely <welbyseely@gmail.com>
|
Welby Seely <welbyseely@gmail.com>
|
||||||
Wentai Zhang <rchardx@gmail.com>
|
Wentai Zhang <rchardx@gmail.com>
|
||||||
WillCorticesAI <150854901+WillCorticesAI@users.noreply.github.com>
|
WillCorticesAI <150854901+WillCorticesAI@users.noreply.github.com>
|
||||||
William Tambellini <william.tambellini@gmail.com>
|
William Tambellini <william.tambellini@gmail.com>
|
||||||
William Tambellini <wtambellini@sdl.com>
|
|
||||||
Willy Tarreau <w@1wt.eu>
|
Willy Tarreau <w@1wt.eu>
|
||||||
Woof Dog <197125663+woof-dog@users.noreply.github.com>
|
|
||||||
Wouter <9594229+DifferentialityDevelopment@users.noreply.github.com>
|
Wouter <9594229+DifferentialityDevelopment@users.noreply.github.com>
|
||||||
Wu Jian Ping <wujjpp@hotmail.com>
|
Wu Jian Ping <wujjpp@hotmail.com>
|
||||||
Wu Jian Ping <wujp@greatld.com>
|
Wu Jian Ping <wujp@greatld.com>
|
||||||
|
@ -738,25 +551,15 @@ Xiang (Kevin) Li <kevinli020508@gmail.com>
|
||||||
Xiao-Yong Jin <jinxiaoyong@gmail.com>
|
Xiao-Yong Jin <jinxiaoyong@gmail.com>
|
||||||
XiaotaoChen <chenxiaotao1234@gmail.com>
|
XiaotaoChen <chenxiaotao1234@gmail.com>
|
||||||
Xiaoyi Chen <cxychina@gmail.com>
|
Xiaoyi Chen <cxychina@gmail.com>
|
||||||
Xie Yanbo <xieyanbo@gmail.com>
|
|
||||||
Xingchen Song(宋星辰) <xingchensong1996@163.com>
|
Xingchen Song(宋星辰) <xingchensong1996@163.com>
|
||||||
Xinpeng Dou <81913537+Dou-Git@users.noreply.github.com>
|
|
||||||
Xuan Son Nguyen <thichthat@gmail.com>
|
Xuan Son Nguyen <thichthat@gmail.com>
|
||||||
Xuan-Son Nguyen <thichthat@gmail.com>
|
|
||||||
Yaiko <elyaiko@hotmail.com>
|
|
||||||
Yann Follet <131855179+YannFollet@users.noreply.github.com>
|
Yann Follet <131855179+YannFollet@users.noreply.github.com>
|
||||||
Yaroslav <yaroslav.yashin@me.com>
|
Yaroslav <yaroslav.yashin@me.com>
|
||||||
Yazan Agha-Schrader <mountaiin@icloud.com>
|
Yazan Agha-Schrader <mountaiin@icloud.com>
|
||||||
Yiming Cui <conandiy@vip.qq.com>
|
Yiming Cui <conandiy@vip.qq.com>
|
||||||
Yishuo Wang <MeouSker77@outlook.com>
|
Yishuo Wang <MeouSker77@outlook.com>
|
||||||
Yoshi Suhara <y.suhara@gmail.com>
|
|
||||||
Yoshi Suhara <ysuhara@nvidia.com>
|
|
||||||
Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
|
|
||||||
Yueh-Po Peng <94939112+y10ab1@users.noreply.github.com>
|
Yueh-Po Peng <94939112+y10ab1@users.noreply.github.com>
|
||||||
Yüg <eugeniosegalaweb@gmail.com>
|
|
||||||
Yui <dev@sleepyyui.com>
|
Yui <dev@sleepyyui.com>
|
||||||
Yun Dou <dixyes@gmail.com>
|
|
||||||
Yuri Khrustalev <ykhrustalev@users.noreply.github.com>
|
|
||||||
Yusuf Kağan Hanoğlu <hanoglu@yahoo.com>
|
Yusuf Kağan Hanoğlu <hanoglu@yahoo.com>
|
||||||
Yuval Peled <31162840+Yuval-Peled@users.noreply.github.com>
|
Yuval Peled <31162840+Yuval-Peled@users.noreply.github.com>
|
||||||
ZHAOKAI WANG <sanxianwei@163.com>
|
ZHAOKAI WANG <sanxianwei@163.com>
|
||||||
|
@ -765,27 +568,19 @@ Zay <95888118+isaiahbjork@users.noreply.github.com>
|
||||||
Zenix <zenixls2@gmail.com>
|
Zenix <zenixls2@gmail.com>
|
||||||
Zhang Peiyuan <a1286225768@gmail.com>
|
Zhang Peiyuan <a1286225768@gmail.com>
|
||||||
Zheng.Deng <32841220+dengzheng-cloud@users.noreply.github.com>
|
Zheng.Deng <32841220+dengzheng-cloud@users.noreply.github.com>
|
||||||
Zhenwei Jin <109658203+kylo5aby@users.noreply.github.com>
|
|
||||||
Zhiyuan Li <lizhiyuan@uniartisan.com>
|
|
||||||
Zhiyuan Li <uniartisan2017@gmail.com>
|
|
||||||
ZhouYuChen <zhouyuchen@naver.com>
|
ZhouYuChen <zhouyuchen@naver.com>
|
||||||
Ziad Ben Hadj-Alouane <zied.benhadjalouane@gmail.com>
|
Ziad Ben Hadj-Alouane <zied.benhadjalouane@gmail.com>
|
||||||
Ziang Wu <97337387+ZiangWu-77@users.noreply.github.com>
|
Ziang Wu <97337387+ZiangWu-77@users.noreply.github.com>
|
||||||
Zsapi <martin1.zsapka@gmail.com>
|
Zsapi <martin1.zsapka@gmail.com>
|
||||||
a-n-n-a-l-e-e <150648636+a-n-n-a-l-e-e@users.noreply.github.com>
|
a-n-n-a-l-e-e <150648636+a-n-n-a-l-e-e@users.noreply.github.com>
|
||||||
a3sh <38979186+A3shTnT@users.noreply.github.com>
|
|
||||||
adel boussaken <netdur@gmail.com>
|
adel boussaken <netdur@gmail.com>
|
||||||
afrideva <95653597+afrideva@users.noreply.github.com>
|
afrideva <95653597+afrideva@users.noreply.github.com>
|
||||||
ag2s20150909 <19373730+ag2s20150909@users.noreply.github.com>
|
|
||||||
agray3 <agray3@users.noreply.github.com>
|
agray3 <agray3@users.noreply.github.com>
|
||||||
akawrykow <142945436+akawrykow@users.noreply.github.com>
|
akawrykow <142945436+akawrykow@users.noreply.github.com>
|
||||||
alek3y <44779186+alek3y@users.noreply.github.com>
|
|
||||||
alexpinel <93524949+alexpinel@users.noreply.github.com>
|
alexpinel <93524949+alexpinel@users.noreply.github.com>
|
||||||
alonfaraj <alonfaraj@gmail.com>
|
alonfaraj <alonfaraj@gmail.com>
|
||||||
alwqx <kenan3015@gmail.com>
|
alwqx <kenan3015@gmail.com>
|
||||||
amd-dwang <dong.wang@amd.com>
|
|
||||||
amd-lalithnc <lalithnc@amd.com>
|
amd-lalithnc <lalithnc@amd.com>
|
||||||
amritahs-ibm <amritahs@linux.vnet.ibm.com>
|
|
||||||
andrijdavid <david@geek.mg>
|
andrijdavid <david@geek.mg>
|
||||||
anon998 <131767832+anon998@users.noreply.github.com>
|
anon998 <131767832+anon998@users.noreply.github.com>
|
||||||
anzz1 <anzz1@live.com>
|
anzz1 <anzz1@live.com>
|
||||||
|
@ -793,31 +588,24 @@ apaz <aarpazdera@gmail.com>
|
||||||
apcameron <37645737+apcameron@users.noreply.github.com>
|
apcameron <37645737+apcameron@users.noreply.github.com>
|
||||||
arch-btw <57669023+arch-btw@users.noreply.github.com>
|
arch-btw <57669023+arch-btw@users.noreply.github.com>
|
||||||
arcrank <arcrank@gmail.com>
|
arcrank <arcrank@gmail.com>
|
||||||
ardfork <134447697+ardfork@users.noreply.github.com>
|
|
||||||
arlo-phoenix <140345165+arlo-phoenix@users.noreply.github.com>
|
arlo-phoenix <140345165+arlo-phoenix@users.noreply.github.com>
|
||||||
aryantandon01 <80969509+aryantandon01@users.noreply.github.com>
|
|
||||||
at8u <129688334+at8u@users.noreply.github.com>
|
at8u <129688334+at8u@users.noreply.github.com>
|
||||||
automaticcat <daogiatuank54@gmail.com>
|
automaticcat <daogiatuank54@gmail.com>
|
||||||
awatuna <23447591+awatuna@users.noreply.github.com>
|
|
||||||
b4b4o <zwbao@foxmail.com>
|
|
||||||
bandoti <141645996+bandoti@users.noreply.github.com>
|
bandoti <141645996+bandoti@users.noreply.github.com>
|
||||||
beiller <beiller@gmail.com>
|
beiller <beiller@gmail.com>
|
||||||
bhubbb <79117352+bhubbb@users.noreply.github.com>
|
bhubbb <79117352+bhubbb@users.noreply.github.com>
|
||||||
bmwl <brian.marshall@tolko.com>
|
bmwl <brian.marshall@tolko.com>
|
||||||
bobqianic <129547291+bobqianic@users.noreply.github.com>
|
bobqianic <129547291+bobqianic@users.noreply.github.com>
|
||||||
brucepro <git@brucepro.net>
|
|
||||||
bryanSwk <93190252+bryanSwk@users.noreply.github.com>
|
bryanSwk <93190252+bryanSwk@users.noreply.github.com>
|
||||||
bsilvereagle <bsilvereagle@users.noreply.github.com>
|
bsilvereagle <bsilvereagle@users.noreply.github.com>
|
||||||
bssrdf <merlintiger@hotmail.com>
|
bssrdf <merlintiger@hotmail.com>
|
||||||
byte-6174 <88070277+byte-6174@users.noreply.github.com>
|
byte-6174 <88070277+byte-6174@users.noreply.github.com>
|
||||||
cduk <19917266+cduk@users.noreply.github.com>
|
|
||||||
cebtenzzre <cebtenzzre@gmail.com>
|
cebtenzzre <cebtenzzre@gmail.com>
|
||||||
chaihahaha <chai836275709@gmail.com>
|
chaihahaha <chai836275709@gmail.com>
|
||||||
chiranko <96988916+chiranko@users.noreply.github.com>
|
chiranko <96988916+chiranko@users.noreply.github.com>
|
||||||
clibdev <52199778+clibdev@users.noreply.github.com>
|
clibdev <52199778+clibdev@users.noreply.github.com>
|
||||||
clyang <clyang@clyang.net>
|
clyang <clyang@clyang.net>
|
||||||
cocktailpeanut <121128867+cocktailpeanut@users.noreply.github.com>
|
cocktailpeanut <121128867+cocktailpeanut@users.noreply.github.com>
|
||||||
codezjx <code.zjx@gmail.com>
|
|
||||||
coezbek <c.oezbek@gmail.com>
|
coezbek <c.oezbek@gmail.com>
|
||||||
comex <comexk@gmail.com>
|
comex <comexk@gmail.com>
|
||||||
compilade <113953597+compilade@users.noreply.github.com>
|
compilade <113953597+compilade@users.noreply.github.com>
|
||||||
|
@ -826,14 +614,10 @@ cpumaxx <163466046+cpumaxx@users.noreply.github.com>
|
||||||
crasm <crasm@git.vczf.net>
|
crasm <crasm@git.vczf.net>
|
||||||
crasm <crasm@git.vczf.us>
|
crasm <crasm@git.vczf.us>
|
||||||
daboe01 <daboe01@googlemail.com>
|
daboe01 <daboe01@googlemail.com>
|
||||||
daghanerdonmez <44506702+daghanerdonmez@users.noreply.github.com>
|
|
||||||
daminho <37615795+daminho@users.noreply.github.com>
|
|
||||||
david raistrick <keen99@users.noreply.github.com>
|
david raistrick <keen99@users.noreply.github.com>
|
||||||
ddh0 <dylanhalladay02@icloud.com>
|
ddh0 <dylanhalladay02@icloud.com>
|
||||||
ddpasa <112642920+ddpasa@users.noreply.github.com>
|
ddpasa <112642920+ddpasa@users.noreply.github.com>
|
||||||
deepdiffuser <112834445+deepdiffuser@users.noreply.github.com>
|
deepdiffuser <112834445+deepdiffuser@users.noreply.github.com>
|
||||||
devojony <61173062+devojony@users.noreply.github.com>
|
|
||||||
ditsuke <ditsuke@protonmail.com>
|
|
||||||
divinity76 <divinity76@gmail.com>
|
divinity76 <divinity76@gmail.com>
|
||||||
dm4 <sunrisedm4@gmail.com>
|
dm4 <sunrisedm4@gmail.com>
|
||||||
dotpy314 <33351922+dotpy314@users.noreply.github.com>
|
dotpy314 <33351922+dotpy314@users.noreply.github.com>
|
||||||
|
@ -841,25 +625,18 @@ drbh <david.richard.holtz@gmail.com>
|
||||||
ds5t5 <145942675+ds5t5@users.noreply.github.com>
|
ds5t5 <145942675+ds5t5@users.noreply.github.com>
|
||||||
dylan <canardleteer@users.noreply.github.com>
|
dylan <canardleteer@users.noreply.github.com>
|
||||||
eastriver <lee@eastriver.dev>
|
eastriver <lee@eastriver.dev>
|
||||||
ebraminio <ebrahim@gnu.org>
|
|
||||||
ebraminio <ebraminio@gmail.com>
|
ebraminio <ebraminio@gmail.com>
|
||||||
eiery <19350831+eiery@users.noreply.github.com>
|
eiery <19350831+eiery@users.noreply.github.com>
|
||||||
eric8607242 <e0928021388@gmail.com>
|
eric8607242 <e0928021388@gmail.com>
|
||||||
fairydreaming <166155368+fairydreaming@users.noreply.github.com>
|
fairydreaming <166155368+fairydreaming@users.noreply.github.com>
|
||||||
fengerhu1 <2748250768@qq.com>
|
|
||||||
fj-y-saito <85871716+fj-y-saito@users.noreply.github.com>
|
|
||||||
fraxy-v <65565042+fraxy-v@users.noreply.github.com>
|
fraxy-v <65565042+fraxy-v@users.noreply.github.com>
|
||||||
github-actions[bot] <github-actions[bot]@users.noreply.github.com>
|
github-actions[bot] <github-actions[bot]@users.noreply.github.com>
|
||||||
gliptic <gliptic@users.noreply.github.com>
|
gliptic <gliptic@users.noreply.github.com>
|
||||||
gn64 <yukikaze.jp@gmail.com>
|
|
||||||
goerch <jhr.walter@t-online.de>
|
goerch <jhr.walter@t-online.de>
|
||||||
grahameth <96447521+grahameth@users.noreply.github.com>
|
grahameth <96447521+grahameth@users.noreply.github.com>
|
||||||
gtygo <gtydoit@gmail.com>
|
|
||||||
gwjr <502526+gwjr@users.noreply.github.com>
|
gwjr <502526+gwjr@users.noreply.github.com>
|
||||||
h-h-h-h <13482553+h-h-h-h@users.noreply.github.com>
|
h-h-h-h <13482553+h-h-h-h@users.noreply.github.com>
|
||||||
hankcs <cnhankmc@gmail.com>
|
hankcs <cnhankmc@gmail.com>
|
||||||
haopeng <657407891@qq.com>
|
|
||||||
hipudding <huafengchun@gmail.com>
|
|
||||||
hoangmit <hoangmit@users.noreply.github.com>
|
hoangmit <hoangmit@users.noreply.github.com>
|
||||||
hongbo.mo <352280764@qq.com>
|
hongbo.mo <352280764@qq.com>
|
||||||
hopkins385 <98618192+hopkins385@users.noreply.github.com>
|
hopkins385 <98618192+hopkins385@users.noreply.github.com>
|
||||||
|
@ -872,16 +649,12 @@ hxer7963 <hxer7963@gmail.com>
|
||||||
hydai <z54981220@gmail.com>
|
hydai <z54981220@gmail.com>
|
||||||
iSma <ismail.senhaji@gmail.com>
|
iSma <ismail.senhaji@gmail.com>
|
||||||
iacore <74560659+iacore@users.noreply.github.com>
|
iacore <74560659+iacore@users.noreply.github.com>
|
||||||
icppWorld <124377669+icppWorld@users.noreply.github.com>
|
|
||||||
igarnier <igarnier@protonmail.com>
|
igarnier <igarnier@protonmail.com>
|
||||||
intelmatt <61025942+intelmatt@users.noreply.github.com>
|
intelmatt <61025942+intelmatt@users.noreply.github.com>
|
||||||
iohub <rickyang.pro@gmail.com>
|
iohub <rickyang.pro@gmail.com>
|
||||||
issixx <46835150+issixx@users.noreply.github.com>
|
|
||||||
jacobi petrucciani <8117202+jpetrucciani@users.noreply.github.com>
|
jacobi petrucciani <8117202+jpetrucciani@users.noreply.github.com>
|
||||||
jaime-m-p <167997752+jaime-m-p@users.noreply.github.com>
|
jaime-m-p <167997752+jaime-m-p@users.noreply.github.com>
|
||||||
jameswu2014 <545426914@qq.com>
|
jameswu2014 <545426914@qq.com>
|
||||||
jdomke <28772296+jdomke@users.noreply.github.com>
|
|
||||||
jiahao su <damow890@gmail.com>
|
|
||||||
jiez <373447296@qq.com>
|
jiez <373447296@qq.com>
|
||||||
jneem <joeneeman@gmail.com>
|
jneem <joeneeman@gmail.com>
|
||||||
joecryptotoo <80373433+joecryptotoo@users.noreply.github.com>
|
joecryptotoo <80373433+joecryptotoo@users.noreply.github.com>
|
||||||
|
@ -894,7 +667,6 @@ junchao-loongson <68935141+junchao-loongson@users.noreply.github.com>
|
||||||
jwj7140 <32943891+jwj7140@users.noreply.github.com>
|
jwj7140 <32943891+jwj7140@users.noreply.github.com>
|
||||||
k.h.lai <adrian.k.h.lai@outlook.com>
|
k.h.lai <adrian.k.h.lai@outlook.com>
|
||||||
kaizau <kaizau@users.noreply.github.com>
|
kaizau <kaizau@users.noreply.github.com>
|
||||||
kallewoof <kalle.alm@gmail.com>
|
|
||||||
kalomaze <66376113+kalomaze@users.noreply.github.com>
|
kalomaze <66376113+kalomaze@users.noreply.github.com>
|
||||||
kang <tpdns9032100@gmail.com>
|
kang <tpdns9032100@gmail.com>
|
||||||
katsu560 <118887472+katsu560@users.noreply.github.com>
|
katsu560 <118887472+katsu560@users.noreply.github.com>
|
||||||
|
@ -902,46 +674,32 @@ kchro3 <62481661+kchro3@users.noreply.github.com>
|
||||||
khimaros <me@khimaros.com>
|
khimaros <me@khimaros.com>
|
||||||
kiltyj <kiltyj@gmail.com>
|
kiltyj <kiltyj@gmail.com>
|
||||||
klosax <131523366+klosax@users.noreply.github.com>
|
klosax <131523366+klosax@users.noreply.github.com>
|
||||||
krystiancha <krystian@krystianch.com>
|
|
||||||
kunal-vaishnavi <115581922+kunal-vaishnavi@users.noreply.github.com>
|
kunal-vaishnavi <115581922+kunal-vaishnavi@users.noreply.github.com>
|
||||||
kunnis <kunnis@users.noreply.github.com>
|
kunnis <kunnis@users.noreply.github.com>
|
||||||
kuronekosaiko <EvanChanJ@163.com>
|
kuronekosaiko <EvanChanJ@163.com>
|
||||||
kustaaya <58045274+kustaaya@users.noreply.github.com>
|
|
||||||
kuvaus <22169537+kuvaus@users.noreply.github.com>
|
kuvaus <22169537+kuvaus@users.noreply.github.com>
|
||||||
kwin1412 <42286931+kwin1412@users.noreply.github.com>
|
kwin1412 <42286931+kwin1412@users.noreply.github.com>
|
||||||
l3utterfly <gc.pthzfoldr@gmail.com>
|
l3utterfly <gc.pthzfoldr@gmail.com>
|
||||||
laik <laik.lj@me.com>
|
|
||||||
ldwang <ftgreat@163.com>
|
ldwang <ftgreat@163.com>
|
||||||
le.chang <cljs118@126.com>
|
le.chang <cljs118@126.com>
|
||||||
leejet <leejet714@gmail.com>
|
leejet <leejet714@gmail.com>
|
||||||
leo-pony <nengjunma@outlook.com>
|
|
||||||
lexasub <lexakopp2212@gmail.com>
|
|
||||||
lhez <quic_lih@quicinc.com>
|
|
||||||
limitedAtonement <limitedAtonement@users.noreply.github.com>
|
limitedAtonement <limitedAtonement@users.noreply.github.com>
|
||||||
liuwei-git <14815172+liuwei-git@users.noreply.github.com>
|
liuwei-git <14815172+liuwei-git@users.noreply.github.com>
|
||||||
lon <114724657+longregen@users.noreply.github.com>
|
lon <114724657+longregen@users.noreply.github.com>
|
||||||
loonerin <132926317+loonerin@users.noreply.github.com>
|
loonerin <132926317+loonerin@users.noreply.github.com>
|
||||||
ltoniazzi <61414566+ltoniazzi@users.noreply.github.com>
|
|
||||||
luoyu-intel <yu.luo@intel.com>
|
luoyu-intel <yu.luo@intel.com>
|
||||||
m3ndax <adrian.goessl@outlook.com>
|
m3ndax <adrian.goessl@outlook.com>
|
||||||
maddes8cht <55592906+maddes8cht@users.noreply.github.com>
|
maddes8cht <55592906+maddes8cht@users.noreply.github.com>
|
||||||
mahorozte <41834471+mahorozte@users.noreply.github.com>
|
|
||||||
makomk <makosoft@googlemail.com>
|
makomk <makosoft@googlemail.com>
|
||||||
manikbhandari <mbbhandarimanik2@gmail.com>
|
manikbhandari <mbbhandarimanik2@gmail.com>
|
||||||
maor-ps <154728172+maor-ps@users.noreply.github.com>
|
maor-ps <154728172+maor-ps@users.noreply.github.com>
|
||||||
mashdragon <122402293+mashdragon@users.noreply.github.com>
|
|
||||||
matiaslin <45382001+matiaslin@users.noreply.github.com>
|
|
||||||
matt23654 <matthew.webber@protonmail.com>
|
|
||||||
matteo <matteogeniaccio@yahoo.it>
|
|
||||||
mdrokz <mohammadmunshi@gmail.com>
|
mdrokz <mohammadmunshi@gmail.com>
|
||||||
mgroeber9110 <45620825+mgroeber9110@users.noreply.github.com>
|
mgroeber9110 <45620825+mgroeber9110@users.noreply.github.com>
|
||||||
minarchist <minarchist@users.noreply.github.com>
|
minarchist <minarchist@users.noreply.github.com>
|
||||||
mj-shifu <77107165+mj-shifu@users.noreply.github.com>
|
mj-shifu <77107165+mj-shifu@users.noreply.github.com>
|
||||||
mmyjona <jonathan.gonse@gmail.com>
|
mmyjona <jonathan.gonse@gmail.com>
|
||||||
momonga <115213907+mmnga@users.noreply.github.com>
|
momonga <115213907+mmnga@users.noreply.github.com>
|
||||||
momonga <146910567+mmngays@users.noreply.github.com>
|
|
||||||
moritzbrantner <31051084+moritzbrantner@users.noreply.github.com>
|
moritzbrantner <31051084+moritzbrantner@users.noreply.github.com>
|
||||||
musoles <135031143+musoles@users.noreply.github.com>
|
|
||||||
mzcu <milos.cubrilo@gmail.com>
|
mzcu <milos.cubrilo@gmail.com>
|
||||||
nanahi <130121847+na-na-hi@users.noreply.github.com>
|
nanahi <130121847+na-na-hi@users.noreply.github.com>
|
||||||
ngc92 <7938269+ngc92@users.noreply.github.com>
|
ngc92 <7938269+ngc92@users.noreply.github.com>
|
||||||
|
@ -958,21 +716,16 @@ omahs <73983677+omahs@users.noreply.github.com>
|
||||||
oobabooga <112222186+oobabooga@users.noreply.github.com>
|
oobabooga <112222186+oobabooga@users.noreply.github.com>
|
||||||
opparco <parco.opaai@gmail.com>
|
opparco <parco.opaai@gmail.com>
|
||||||
ostix360 <55257054+ostix360@users.noreply.github.com>
|
ostix360 <55257054+ostix360@users.noreply.github.com>
|
||||||
pculliton <phillipculliton@gmail.com>
|
|
||||||
peidaqi <peidaqi@gmail.com>
|
|
||||||
pengxin99 <pengxin.yuan@intel.com>
|
pengxin99 <pengxin.yuan@intel.com>
|
||||||
perserk <perserk@gmail.com>
|
perserk <perserk@gmail.com>
|
||||||
piDack <104877312+piDack@users.noreply.github.com>
|
|
||||||
pmysl <piotr.myslinski@outlook.com>
|
pmysl <piotr.myslinski@outlook.com>
|
||||||
postmasters <namnguyen@google.com>
|
postmasters <namnguyen@google.com>
|
||||||
pudepiedj <pudepiedj@gmail.com>
|
pudepiedj <pudepiedj@gmail.com>
|
||||||
qingfengfenga <41416092+qingfengfenga@users.noreply.github.com>
|
qingfengfenga <41416092+qingfengfenga@users.noreply.github.com>
|
||||||
qingy1337 <qxli2@students.everettcc.edu>
|
|
||||||
qouoq <qouoq@fastmail.com>
|
qouoq <qouoq@fastmail.com>
|
||||||
qunash <anzoria@gmail.com>
|
qunash <anzoria@gmail.com>
|
||||||
rabidcopy <rabidcopy@yahoo.com>
|
rabidcopy <rabidcopy@yahoo.com>
|
||||||
rankaiyx <rankaiyx@rankaiyx.com>
|
rankaiyx <rankaiyx@rankaiyx.com>
|
||||||
redbeard <bharrington@alticon.net>
|
|
||||||
rhjdvsgsgks <26178113+rhjdvsgsgks@users.noreply.github.com>
|
rhjdvsgsgks <26178113+rhjdvsgsgks@users.noreply.github.com>
|
||||||
rhuddleston <ryan.huddleston@percona.com>
|
rhuddleston <ryan.huddleston@percona.com>
|
||||||
rimoliga <53384203+rimoliga@users.noreply.github.com>
|
rimoliga <53384203+rimoliga@users.noreply.github.com>
|
||||||
|
@ -980,7 +733,6 @@ runfuture <runfuture@users.noreply.github.com>
|
||||||
sandyiscool <sandyiscool@gmail.com>
|
sandyiscool <sandyiscool@gmail.com>
|
||||||
sasha0552 <admin@sasha0552.org>
|
sasha0552 <admin@sasha0552.org>
|
||||||
semidark <me@semidark.net>
|
semidark <me@semidark.net>
|
||||||
serhii-nakon <57632032+serhii-nakon@users.noreply.github.com>
|
|
||||||
sharpHL <132747147+sharpHL@users.noreply.github.com>
|
sharpHL <132747147+sharpHL@users.noreply.github.com>
|
||||||
shibe2 <shibe@tuta.io>
|
shibe2 <shibe@tuta.io>
|
||||||
singularity <12184989+singularity-s0@users.noreply.github.com>
|
singularity <12184989+singularity-s0@users.noreply.github.com>
|
||||||
|
@ -989,59 +741,42 @@ sjxx <63994076+ylsdamxssjxxdd@users.noreply.github.com>
|
||||||
slaren <2141330+slaren@users.noreply.github.com>
|
slaren <2141330+slaren@users.noreply.github.com>
|
||||||
slaren <slarengh@gmail.com>
|
slaren <slarengh@gmail.com>
|
||||||
snadampal <87143774+snadampal@users.noreply.github.com>
|
snadampal <87143774+snadampal@users.noreply.github.com>
|
||||||
someone13574 <81528246+someone13574@users.noreply.github.com>
|
|
||||||
standby24x7 <standby24x7@gmail.com>
|
|
||||||
staviq <staviq@gmail.com>
|
staviq <staviq@gmail.com>
|
||||||
stduhpf <stephduh@live.fr>
|
stduhpf <stephduh@live.fr>
|
||||||
strawberrymelonpanda <152940198+strawberrymelonpanda@users.noreply.github.com>
|
strawberrymelonpanda <152940198+strawberrymelonpanda@users.noreply.github.com>
|
||||||
swittk <switt1995@gmail.com>
|
swittk <switt1995@gmail.com>
|
||||||
takov751 <40316768+takov751@users.noreply.github.com>
|
takov751 <40316768+takov751@users.noreply.github.com>
|
||||||
tarcey <cey.tarik@gmail.com>
|
tarcey <cey.tarik@gmail.com>
|
||||||
tc-mb <157115220+tc-mb@users.noreply.github.com>
|
|
||||||
texmex76 <40733439+texmex76@users.noreply.github.com>
|
texmex76 <40733439+texmex76@users.noreply.github.com>
|
||||||
thement <40525767+thement@users.noreply.github.com>
|
thement <40525767+thement@users.noreply.github.com>
|
||||||
thewh1teagle <61390950+thewh1teagle@users.noreply.github.com>
|
|
||||||
tjohnman <tjohnman@users.noreply.github.com>
|
tjohnman <tjohnman@users.noreply.github.com>
|
||||||
toyer <2042519524@qq.com>
|
|
||||||
tslmy <tslmy@users.noreply.github.com>
|
tslmy <tslmy@users.noreply.github.com>
|
||||||
ubik2 <ubik2@users.noreply.github.com>
|
ubik2 <ubik2@users.noreply.github.com>
|
||||||
uint256_t <konndennsa@gmail.com>
|
uint256_t <konndennsa@gmail.com>
|
||||||
uint256_t <maekawatoshiki1017@gmail.com>
|
uint256_t <maekawatoshiki1017@gmail.com>
|
||||||
unbounded <haakon@likedan.net>
|
unbounded <haakon@likedan.net>
|
||||||
uvos <devnull@uvos.xyz>
|
|
||||||
uvos <philipp@uvos.xyz>
|
|
||||||
valiray <133289098+valiray@users.noreply.github.com>
|
valiray <133289098+valiray@users.noreply.github.com>
|
||||||
vb <vaibhavs10@gmail.com>
|
|
||||||
vik <vikhyatk@gmail.com>
|
vik <vikhyatk@gmail.com>
|
||||||
viric <viric@viric.name>
|
viric <viric@viric.name>
|
||||||
vodkaslime <646329483@qq.com>
|
vodkaslime <646329483@qq.com>
|
||||||
vvhg1 <94630311+vvhg1@users.noreply.github.com>
|
vvhg1 <94630311+vvhg1@users.noreply.github.com>
|
||||||
vxiiduu <73044267+vxiiduu@users.noreply.github.com>
|
vxiiduu <73044267+vxiiduu@users.noreply.github.com>
|
||||||
wangshuai09 <391746016@qq.com>
|
|
||||||
wbpxre150 <100937007+wbpxre150@users.noreply.github.com>
|
wbpxre150 <100937007+wbpxre150@users.noreply.github.com>
|
||||||
whoreson <139810751+whoreson@users.noreply.github.com>
|
whoreson <139810751+whoreson@users.noreply.github.com>
|
||||||
woachk <24752637+woachk@users.noreply.github.com>
|
woachk <24752637+woachk@users.noreply.github.com>
|
||||||
wonjun Jang <strutive07@gmail.com>
|
wonjun Jang <strutive07@gmail.com>
|
||||||
woodx <124784234+woodx9@users.noreply.github.com>
|
woodx <124784234+woodx9@users.noreply.github.com>
|
||||||
wwoodsTM <104587230+wwoodsTM@users.noreply.github.com>
|
|
||||||
wzy <32936898+Freed-Wu@users.noreply.github.com>
|
wzy <32936898+Freed-Wu@users.noreply.github.com>
|
||||||
xaedes <xaedes@gmail.com>
|
xaedes <xaedes@gmail.com>
|
||||||
xaedes <xaedes@googlemail.com>
|
xaedes <xaedes@googlemail.com>
|
||||||
xctan <axunlei@gmail.com>
|
|
||||||
xloem <0xloem@gmail.com>
|
xloem <0xloem@gmail.com>
|
||||||
yangli2 <yangli2@gmail.com>
|
yangli2 <yangli2@gmail.com>
|
||||||
ymcki <84055651+ymcki@users.noreply.github.com>
|
|
||||||
yuiseki <yuiseki@gmail.com>
|
yuiseki <yuiseki@gmail.com>
|
||||||
yuri@FreeBSD <yurivict@users.noreply.github.com>
|
|
||||||
zakkor <edward.partenie@gmail.com>
|
zakkor <edward.partenie@gmail.com>
|
||||||
zhangkaihuo <zhangkaihuo@gmail.com>
|
zhangkaihuo <zhangkaihuo@gmail.com>
|
||||||
zhentaoyu <zhentao.yu@intel.com>
|
|
||||||
zhouwg <6889919+zhouwg@users.noreply.github.com>
|
zhouwg <6889919+zhouwg@users.noreply.github.com>
|
||||||
zhouwg <zhouwg2000@gmail.com>
|
zhouwg <zhouwg2000@gmail.com>
|
||||||
zrm <trustiosity.zrm@gmail.com>
|
zrm <trustiosity.zrm@gmail.com>
|
||||||
Ștefan-Gabriel Muscalu <legraphista@users.noreply.github.com>
|
Ștefan-Gabriel Muscalu <legraphista@users.noreply.github.com>
|
||||||
杨朱 · Kiki <baofa.fan@daocloud.io>
|
|
||||||
源文雨 <41315874+fumiama@users.noreply.github.com>
|
源文雨 <41315874+fumiama@users.noreply.github.com>
|
||||||
蕭澧邦 <45505768+shou692199@users.noreply.github.com>
|
|
||||||
谢乃闻 <sienaiwun@users.noreply.github.com>
|
|
||||||
Нияз Гарифзянов <112617865+garrnizon@users.noreply.github.com>
|
Нияз Гарифзянов <112617865+garrnizon@users.noreply.github.com>
|
||||||
|
|
109
CMakeLists.txt
109
CMakeLists.txt
|
@ -16,7 +16,6 @@ endif()
|
||||||
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
|
||||||
|
|
||||||
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
||||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
|
|
||||||
|
|
||||||
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
|
||||||
set(LLAMA_STANDALONE ON)
|
set(LLAMA_STANDALONE ON)
|
||||||
|
@ -47,17 +46,13 @@ if (WIN32)
|
||||||
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
|
add_compile_definitions(_CRT_SECURE_NO_WARNINGS)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (MSVC)
|
|
||||||
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/utf-8>")
|
|
||||||
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/utf-8>")
|
|
||||||
add_compile_options("$<$<COMPILE_LANGUAGE:C>:/bigobj>")
|
|
||||||
add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:/bigobj>")
|
|
||||||
endif()
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# option list
|
# option list
|
||||||
#
|
#
|
||||||
|
|
||||||
|
# general
|
||||||
|
option(LLAMA_CCACHE "llama: use ccache if available" ON)
|
||||||
|
|
||||||
# debug
|
# debug
|
||||||
option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON)
|
option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON)
|
||||||
option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF)
|
option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF)
|
||||||
|
@ -70,9 +65,6 @@ option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF)
|
||||||
option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF)
|
option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF)
|
||||||
option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)
|
option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF)
|
||||||
|
|
||||||
# utils
|
|
||||||
option(LLAMA_BUILD_COMMON "llama: build common utils library" ${LLAMA_STANDALONE})
|
|
||||||
|
|
||||||
# extra artifacts
|
# extra artifacts
|
||||||
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
|
option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE})
|
||||||
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
|
option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE})
|
||||||
|
@ -80,23 +72,25 @@ option(LLAMA_BUILD_SERVER "llama: build server example" ${LLAMA_STANDALONE})
|
||||||
|
|
||||||
# 3rd party libs
|
# 3rd party libs
|
||||||
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
|
option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF)
|
||||||
option(LLAMA_LLGUIDANCE "llama-common: include LLGuidance library for structured output in common utils" OFF)
|
|
||||||
|
|
||||||
# Required for relocatable CMake package
|
# Required for relocatable CMake package
|
||||||
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
|
||||||
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/common.cmake)
|
|
||||||
|
|
||||||
# override ggml options
|
# override ggml options
|
||||||
set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
|
set(GGML_CCACHE ${LLAMA_CCACHE})
|
||||||
set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
|
set(GGML_SANITIZE_THREAD ${LLAMA_SANITIZE_THREAD})
|
||||||
|
set(GGML_SANITIZE_ADDRESS ${LLAMA_SANITIZE_ADDRESS})
|
||||||
|
set(GGML_SANITIZE_UNDEFINED ${LLAMA_SANITIZE_UNDEFINED})
|
||||||
|
set(GGML_ALL_WARNINGS ${LLAMA_ALL_WARNINGS})
|
||||||
|
set(GGML_FATAL_WARNINGS ${LLAMA_FATAL_WARNINGS})
|
||||||
|
|
||||||
# change the default for these ggml options
|
# change the default for these ggml options
|
||||||
if (NOT DEFINED GGML_LLAMAFILE)
|
if (NOT DEFINED GGML_LLAMAFILE)
|
||||||
set(GGML_LLAMAFILE_DEFAULT ON)
|
set(GGML_LLAMAFILE ON)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if (NOT DEFINED GGML_CUDA_GRAPHS)
|
if (NOT DEFINED GGML_CUDA_USE_GRAPHS)
|
||||||
set(GGML_CUDA_GRAPHS_DEFAULT ON)
|
set(GGML_CUDA_USE_GRAPHS ON)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# transition helpers
|
# transition helpers
|
||||||
|
@ -116,64 +110,14 @@ llama_option_depr(WARNING LLAMA_NATIVE GGML_NATIVE)
|
||||||
llama_option_depr(WARNING LLAMA_RPC GGML_RPC)
|
llama_option_depr(WARNING LLAMA_RPC GGML_RPC)
|
||||||
llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
|
llama_option_depr(WARNING LLAMA_SYCL GGML_SYCL)
|
||||||
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
|
llama_option_depr(WARNING LLAMA_SYCL_F16 GGML_SYCL_F16)
|
||||||
llama_option_depr(WARNING LLAMA_CANN GGML_CANN)
|
|
||||||
|
|
||||||
if (NOT MSVC)
|
|
||||||
if (LLAMA_SANITIZE_THREAD)
|
|
||||||
message(STATUS "Using -fsanitize=thread")
|
|
||||||
|
|
||||||
add_compile_options(-fsanitize=thread)
|
|
||||||
link_libraries (-fsanitize=thread)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (LLAMA_SANITIZE_ADDRESS)
|
|
||||||
message(STATUS "Using -fsanitize=address")
|
|
||||||
|
|
||||||
add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
|
|
||||||
link_libraries (-fsanitize=address)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (LLAMA_SANITIZE_UNDEFINED)
|
|
||||||
message(STATUS "Using -fsanitize=undefined")
|
|
||||||
|
|
||||||
add_compile_options(-fsanitize=undefined)
|
|
||||||
link_libraries (-fsanitize=undefined)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
#
|
|
||||||
# 3rd-party
|
|
||||||
#
|
|
||||||
|
|
||||||
if (NOT TARGET ggml)
|
|
||||||
add_subdirectory(ggml)
|
|
||||||
# ... otherwise assume ggml is added by a parent CMakeLists.txt
|
|
||||||
endif()
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# build the library
|
# build the library
|
||||||
#
|
#
|
||||||
|
|
||||||
|
add_subdirectory(ggml)
|
||||||
add_subdirectory(src)
|
add_subdirectory(src)
|
||||||
|
|
||||||
#
|
|
||||||
# utils, programs, examples and tests
|
|
||||||
#
|
|
||||||
|
|
||||||
if (LLAMA_BUILD_COMMON)
|
|
||||||
add_subdirectory(common)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
|
||||||
include(CTest)
|
|
||||||
add_subdirectory(tests)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_EXAMPLES)
|
|
||||||
add_subdirectory(examples)
|
|
||||||
add_subdirectory(pocs)
|
|
||||||
endif()
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# install
|
# install
|
||||||
#
|
#
|
||||||
|
@ -189,14 +133,9 @@ set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location o
|
||||||
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
|
set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
|
||||||
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
|
set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
|
||||||
|
|
||||||
set(LLAMA_PUBLIC_HEADERS
|
get_directory_property(LLAMA_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h
|
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/include/llama-cpp.h)
|
|
||||||
|
|
||||||
set_target_properties(llama
|
|
||||||
PROPERTIES
|
|
||||||
PUBLIC_HEADER "${LLAMA_PUBLIC_HEADERS}")
|
|
||||||
|
|
||||||
|
set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/llama.h)
|
||||||
install(TARGETS llama LIBRARY PUBLIC_HEADER)
|
install(TARGETS llama LIBRARY PUBLIC_HEADER)
|
||||||
|
|
||||||
configure_package_config_file(
|
configure_package_config_file(
|
||||||
|
@ -233,4 +172,20 @@ configure_file(cmake/llama.pc.in
|
||||||
@ONLY)
|
@ONLY)
|
||||||
|
|
||||||
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
|
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc"
|
||||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
|
DESTINATION lib/pkgconfig)
|
||||||
|
|
||||||
|
#
|
||||||
|
# programs, examples and tests
|
||||||
|
#
|
||||||
|
|
||||||
|
add_subdirectory(common)
|
||||||
|
|
||||||
|
if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
|
||||||
|
include(CTest)
|
||||||
|
add_subdirectory(tests)
|
||||||
|
endif ()
|
||||||
|
|
||||||
|
if (LLAMA_BUILD_EXAMPLES)
|
||||||
|
add_subdirectory(examples)
|
||||||
|
add_subdirectory(pocs)
|
||||||
|
endif()
|
||||||
|
|
|
@ -24,24 +24,15 @@
|
||||||
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
|
"CMAKE_INSTALL_RPATH": "$ORIGIN;$ORIGIN/.."
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
|
{ "name": "debug", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Debug" } },
|
||||||
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
|
{ "name": "release", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "Release" } },
|
||||||
{ "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
|
{ "name": "reldbg", "hidden": true, "cacheVariables": { "CMAKE_BUILD_TYPE": "RelWithDebInfo" } },
|
||||||
{ "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } },
|
{ "name": "static", "hidden": true, "cacheVariables": { "GGML_STATIC": "ON" } },
|
||||||
{ "name": "sycl_f16", "hidden": true, "cacheVariables": { "GGML_SYCL_F16": "ON" } },
|
|
||||||
{ "name": "vulkan", "hidden": true, "cacheVariables": { "GGML_VULKAN": "ON" } },
|
|
||||||
|
|
||||||
{
|
|
||||||
"name": "x64-windows-llvm", "hidden": true,
|
|
||||||
"cacheVariables": {
|
|
||||||
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/x64-windows-llvm.cmake"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
|
|
||||||
{
|
{
|
||||||
"name": "arm64-windows-msvc", "hidden": true,
|
"name": "arm64-windows-msvc", "hidden": true,
|
||||||
"architecture": { "value": "arm64", "strategy": "external" },
|
"architecture": { "value": "arm64", "strategy": "external" },
|
||||||
"toolset": { "value": "host=x64", "strategy": "external" },
|
"toolset": { "value": "host=x86_64", "strategy": "external" },
|
||||||
"cacheVariables": {
|
"cacheVariables": {
|
||||||
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-msvc.cmake"
|
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-msvc.cmake"
|
||||||
}
|
}
|
||||||
|
@ -49,49 +40,26 @@
|
||||||
|
|
||||||
{
|
{
|
||||||
"name": "arm64-windows-llvm", "hidden": true,
|
"name": "arm64-windows-llvm", "hidden": true,
|
||||||
"architecture": { "value": "arm64", "strategy": "external" },
|
"architecture": { "value": "arm64", "strategy": "external" },
|
||||||
"toolset": { "value": "host=x64", "strategy": "external" },
|
"toolset": { "value": "host=x86_64", "strategy": "external" },
|
||||||
"cacheVariables": {
|
"cacheVariables": {
|
||||||
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake"
|
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-windows-llvm.cmake"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
||||||
{
|
{ "name": "arm64-windows-llvm-debug" , "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
|
||||||
"name": "arm64-apple-clang", "hidden": true,
|
{ "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] },
|
||||||
"architecture": { "value": "arm64", "strategy": "external" },
|
{ "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] },
|
||||||
"toolset": { "value": "host=x64", "strategy": "external" },
|
|
||||||
"cacheVariables": {
|
|
||||||
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/cmake/arm64-apple-clang.cmake"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
|
|
||||||
{ "name": "arm64-windows-llvm-debug", "inherits": [ "base", "arm64-windows-llvm", "debug" ] },
|
{ "name": "arm64-windows-msvc-debug" , "inherits": [ "base", "arm64-windows-msvc", "debug" ] },
|
||||||
{ "name": "arm64-windows-llvm-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg" ] },
|
|
||||||
{ "name": "arm64-windows-llvm+static-release", "inherits": [ "base", "arm64-windows-llvm", "reldbg", "static" ] },
|
|
||||||
|
|
||||||
{ "name": "arm64-apple-clang-debug", "inherits": [ "base", "arm64-apple-clang", "debug" ] },
|
|
||||||
{ "name": "arm64-apple-clang-release", "inherits": [ "base", "arm64-apple-clang", "reldbg" ] },
|
|
||||||
{ "name": "arm64-apple-clang+static-release", "inherits": [ "base", "arm64-apple-clang", "reldbg", "static" ] },
|
|
||||||
|
|
||||||
{ "name": "arm64-windows-msvc-debug", "inherits": [ "base", "arm64-windows-msvc", "debug" ] },
|
|
||||||
{ "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] },
|
{ "name": "arm64-windows-msvc-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg" ] },
|
||||||
{ "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] },
|
{ "name": "arm64-windows-msvc+static-release", "inherits": [ "base", "arm64-windows-msvc", "reldbg", "static" ] },
|
||||||
|
|
||||||
{ "name": "x64-windows-llvm-debug", "inherits": [ "base", "x64-windows-llvm", "debug" ] },
|
{ "name": "x64-windows-msvc-debug" , "inherits": [ "base", "debug" ] },
|
||||||
{ "name": "x64-windows-llvm-release", "inherits": [ "base", "x64-windows-llvm", "release" ] },
|
|
||||||
{ "name": "x64-windows-llvm-reldbg", "inherits": [ "base", "x64-windows-llvm", "reldbg" ] },
|
|
||||||
{ "name": "x64-windows-llvm+static-release", "inherits": [ "base", "x64-windows-llvm", "reldbg", "static" ] },
|
|
||||||
|
|
||||||
{ "name": "x64-windows-msvc-debug", "inherits": [ "base", "debug" ] },
|
|
||||||
{ "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
|
{ "name": "x64-windows-msvc-release", "inherits": [ "base", "reldbg" ] },
|
||||||
{ "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },
|
{ "name": "x64-windows-msvc+static-release", "inherits": [ "base", "reldbg", "static" ] },
|
||||||
|
|
||||||
{ "name": "x64-windows-sycl-debug", "inherits": [ "sycl-base", "debug" ] },
|
{ "name": "x64-windows-sycl-debug" , "inherits": [ "sycl-base", "debug" ] },
|
||||||
{ "name": "x64-windows-sycl-debug-f16", "inherits": [ "sycl-base", "debug", "sycl_f16" ] },
|
{ "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] }
|
||||||
{ "name": "x64-windows-sycl-release", "inherits": [ "sycl-base", "release" ] },
|
|
||||||
{ "name": "x64-windows-sycl-release-f16", "inherits": [ "sycl-base", "release", "sycl_f16" ] },
|
|
||||||
|
|
||||||
{ "name": "x64-windows-vulkan-debug", "inherits": [ "base", "vulkan", "debug" ] },
|
|
||||||
{ "name": "x64-windows-vulkan-release", "inherits": [ "base", "vulkan", "release" ] }
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
11
CODEOWNERS
11
CODEOWNERS
|
@ -1,11 +0,0 @@
|
||||||
# collaborators can optionally add themselves here to indicate their availability for reviewing related PRs
|
|
||||||
|
|
||||||
/ci/ @ggerganov
|
|
||||||
/.devops/*.Dockerfile @ngxson
|
|
||||||
/examples/server/ @ngxson
|
|
||||||
/ggml/src/ggml-cuda/fattn* @JohannesGaessler
|
|
||||||
/ggml/src/ggml-cuda/mmq.* @JohannesGaessler
|
|
||||||
/ggml/src/ggml-cuda/mmv.* @JohannesGaessler
|
|
||||||
/ggml/src/ggml-cuda/mmvq.* @JohannesGaessler
|
|
||||||
/ggml/src/ggml-opt.cpp @JohannesGaessler
|
|
||||||
/ggml/src/gguf.cpp @JohannesGaessler
|
|
131
CONTRIBUTING.md
131
CONTRIBUTING.md
|
@ -1,125 +1,14 @@
|
||||||
# Pull requests (for contributors)
|
# Contributing Guidelines
|
||||||
|
|
||||||
- Test your changes:
|
## Checklist
|
||||||
- Execute [the full CI locally on your machine](ci/README.md) before publishing
|
|
||||||
- Verify that the perplexity and the performance are not affected negatively by your changes (use `llama-perplexity` and `llama-bench`)
|
|
||||||
- If you modified the `ggml` source, run the `test-backend-ops` tool to check whether different backend implementations of the `ggml` operators produce consistent results (this requires access to at least two different `ggml` backends)
|
|
||||||
- If you modified a `ggml` operator or added a new one, add the corresponding test cases to `test-backend-ops`
|
|
||||||
- Consider allowing write access to your branch for faster reviews, as reviewers can push commits directly
|
|
||||||
- If your PR becomes stale, don't hesitate to ping the maintainers in the comments
|
|
||||||
|
|
||||||
# Pull requests (for collaborators)
|
* Make sure your PR follows the [coding guidelines](https://github.com/ggerganov/llama.cpp/blob/master/README.md#coding-guidelines)
|
||||||
|
* Test your changes using the commands in the [`tests`](tests) folder. For instance, running the `./tests/test-backend-ops` command tests different backend implementations of the GGML library
|
||||||
|
* Execute [the full CI locally on your machine](ci/README.md) before publishing
|
||||||
|
|
||||||
- Squash-merge PRs
|
## PR formatting
|
||||||
- Use the following format for the squashed commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : fix typo in utils.py (#1234)`
|
|
||||||
- Optionally pick a `<module>` from here: https://github.com/ggerganov/llama.cpp/wiki/Modules
|
|
||||||
- Consider adding yourself to [CODEOWNERS](CODEOWNERS)
|
|
||||||
|
|
||||||
# Coding guidelines
|
* Please rate the complexity of your PR (i.e. `Review Complexity : Low`, `Review Complexity : Medium`, `Review Complexity : High`). This makes it easier for maintainers to triage the PRs.
|
||||||
|
- The PR template has a series of review complexity checkboxes `[ ]` that you can mark as `[X]` for your conveience. Refer to [About task lists](https://docs.github.com/en/get-started/writing-on-github/working-with-advanced-formatting/about-task-lists) for more information.
|
||||||
- Avoid adding third-party dependencies, extra files, extra headers, etc.
|
* If the pull request only contains documentation changes (e.g., updating READMEs, adding new wiki pages), please add `[no ci]` to the commit title. This will skip unnecessary CI checks and help reduce build times.
|
||||||
- Always consider cross-compatibility with other operating systems and architectures
|
* When squashing multiple commits on merge, use the following format for your commit title: `<module> : <commit title> (#<issue_number>)`. For example: `utils : Fix typo in utils.py (#1234)`
|
||||||
- Avoid fancy-looking modern STL constructs, use basic `for` loops, avoid templates, keep it simple
|
|
||||||
- Vertical alignment makes things more readable and easier to batch edit
|
|
||||||
- Clean-up any trailing whitespaces, use 4 spaces for indentation, brackets on the same line, `void * ptr`, `int & a`
|
|
||||||
- Use sized integer types such as `int32_t` in the public API, e.g. `size_t` may also be appropriate for allocation sizes or byte offsets
|
|
||||||
- Declare structs with `struct foo {}` instead of `typedef struct foo {} foo`
|
|
||||||
- In C++ code omit optional `struct` and `enum` keyword whenever they are not necessary
|
|
||||||
```cpp
|
|
||||||
// OK
|
|
||||||
llama_context * ctx;
|
|
||||||
const llama_rope_type rope_type;
|
|
||||||
|
|
||||||
// not OK
|
|
||||||
struct llama_context * ctx;
|
|
||||||
const enum llama_rope_type rope_type;
|
|
||||||
```
|
|
||||||
|
|
||||||
_(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline.)_
|
|
||||||
|
|
||||||
- Try to follow the existing patterns in the code (indentation, spaces, etc.). In case of doubt use `clang-format` to format the added code
|
|
||||||
- For anything not covered in the current guidelines, refer to the [C++ Core Guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines)
|
|
||||||
- Tensors store data in row-major order. We refer to dimension 0 as columns, 1 as rows, 2 as matrices
|
|
||||||
- Matrix multiplication is unconventional: [`C = ggml_mul_mat(ctx, A, B)`](https://github.com/ggerganov/llama.cpp/blob/880e352277fc017df4d5794f0c21c44e1eae2b84/ggml.h#L1058-L1064) means $C^T = A B^T \Leftrightarrow C = B A^T.$
|
|
||||||
|
|
||||||

|
|
||||||
|
|
||||||
# Naming guidelines
|
|
||||||
|
|
||||||
- Use `snake_case` for function, variable and type names
|
|
||||||
- Naming usually optimizes for longest common prefix (see https://github.com/ggerganov/ggml/pull/302#discussion_r1243240963)
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
// not OK
|
|
||||||
int small_number;
|
|
||||||
int big_number;
|
|
||||||
|
|
||||||
// OK
|
|
||||||
int number_small;
|
|
||||||
int number_big;
|
|
||||||
```
|
|
||||||
|
|
||||||
- Enum values are always in upper case and prefixed with the enum name
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
enum llama_vocab_type {
|
|
||||||
LLAMA_VOCAB_TYPE_NONE = 0,
|
|
||||||
LLAMA_VOCAB_TYPE_SPM = 1,
|
|
||||||
LLAMA_VOCAB_TYPE_BPE = 2,
|
|
||||||
LLAMA_VOCAB_TYPE_WPM = 3,
|
|
||||||
LLAMA_VOCAB_TYPE_UGM = 4,
|
|
||||||
LLAMA_VOCAB_TYPE_RWKV = 5,
|
|
||||||
};
|
|
||||||
```
|
|
||||||
|
|
||||||
- The general naming pattern is `<class>_<method>`, with `<method>` being `<action>_<noun>`
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
llama_model_init(); // class: "llama_model", method: "init"
|
|
||||||
llama_sampler_chain_remove(); // class: "llama_sampler_chain", method: "remove"
|
|
||||||
llama_sampler_get_seed(); // class: "llama_sampler", method: "get_seed"
|
|
||||||
llama_set_embeddings(); // class: "llama_context", method: "set_embeddings"
|
|
||||||
llama_n_threads(); // class: "llama_context", method: "n_threads"
|
|
||||||
llama_adapter_lora_free(); // class: "llama_adapter_lora", method: "free"
|
|
||||||
```
|
|
||||||
|
|
||||||
- The `get` `<action>` can be omitted
|
|
||||||
- The `<noun>` can be omitted if not necessary
|
|
||||||
- The `_context` suffix of the `<class>` is optional. Use it to disambiguate symbols when needed
|
|
||||||
- Use `init`/`free` for constructor/destructor `<action>`
|
|
||||||
|
|
||||||
- Use the `_t` suffix when a type is supposed to be opaque to the user - it's not relevant to them if it is a struct or anything else
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
typedef struct llama_context * llama_context_t;
|
|
||||||
|
|
||||||
enum llama_pooling_type llama_pooling_type(const llama_context_t ctx);
|
|
||||||
```
|
|
||||||
|
|
||||||
_(NOTE: this guideline is yet to be applied to the `llama.cpp` codebase. New code should follow this guideline)_
|
|
||||||
|
|
||||||
- C/C++ filenames are all lowercase with dashes. Headers use the `.h` extension. Source files use the `.c` or `.cpp` extension
|
|
||||||
- Python filenames are all lowercase with underscores
|
|
||||||
|
|
||||||
- _(TODO: abbreviations usage)_
|
|
||||||
|
|
||||||
# Preprocessor directives
|
|
||||||
|
|
||||||
- _(TODO: add guidelines with examples and apply them to the codebase)_
|
|
||||||
|
|
||||||
```cpp
|
|
||||||
#ifdef FOO
|
|
||||||
#endif // FOO
|
|
||||||
```
|
|
||||||
|
|
||||||
# Documentation
|
|
||||||
|
|
||||||
- Documentation is a community effort
|
|
||||||
- When you need to look into the source code to figure out how to use an API consider adding a short summary to the header file for future reference
|
|
||||||
- When you notice incorrect or outdated documentation, please update it
|
|
||||||
|
|
||||||
# Resources
|
|
||||||
|
|
||||||
The Github issues, PRs and discussions contain a lot of information that can be useful to get familiar with the codebase. For convenience, some of the more important information is referenced from Github projects:
|
|
||||||
|
|
||||||
https://github.com/ggerganov/llama.cpp/projects
|
|
||||||
|
|
|
@ -2,6 +2,44 @@
|
||||||
|
|
||||||
import PackageDescription
|
import PackageDescription
|
||||||
|
|
||||||
|
var sources = [
|
||||||
|
"src/llama.cpp",
|
||||||
|
"src/unicode.cpp",
|
||||||
|
"src/unicode-data.cpp",
|
||||||
|
"ggml/src/ggml.c",
|
||||||
|
"ggml/src/ggml-alloc.c",
|
||||||
|
"ggml/src/ggml-backend.c",
|
||||||
|
"ggml/src/ggml-quants.c",
|
||||||
|
]
|
||||||
|
|
||||||
|
var resources: [Resource] = []
|
||||||
|
var linkerSettings: [LinkerSetting] = []
|
||||||
|
var cSettings: [CSetting] = [
|
||||||
|
.unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
|
||||||
|
.unsafeFlags(["-fno-objc-arc"]),
|
||||||
|
// NOTE: NEW_LAPACK will required iOS version 16.4+
|
||||||
|
// We should consider add this in the future when we drop support for iOS 14
|
||||||
|
// (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
|
||||||
|
// .define("ACCELERATE_NEW_LAPACK"),
|
||||||
|
// .define("ACCELERATE_LAPACK_ILP64")
|
||||||
|
]
|
||||||
|
|
||||||
|
#if canImport(Darwin)
|
||||||
|
sources.append("ggml/src/ggml-metal.m")
|
||||||
|
resources.append(.process("ggml/src/ggml-metal.metal"))
|
||||||
|
linkerSettings.append(.linkedFramework("Accelerate"))
|
||||||
|
cSettings.append(
|
||||||
|
contentsOf: [
|
||||||
|
.define("GGML_USE_ACCELERATE"),
|
||||||
|
.define("GGML_USE_METAL")
|
||||||
|
]
|
||||||
|
)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if os(Linux)
|
||||||
|
cSettings.append(.define("_GNU_SOURCE"))
|
||||||
|
#endif
|
||||||
|
|
||||||
let package = Package(
|
let package = Package(
|
||||||
name: "llama",
|
name: "llama",
|
||||||
platforms: [
|
platforms: [
|
||||||
|
@ -14,6 +52,24 @@ let package = Package(
|
||||||
.library(name: "llama", targets: ["llama"]),
|
.library(name: "llama", targets: ["llama"]),
|
||||||
],
|
],
|
||||||
targets: [
|
targets: [
|
||||||
.systemLibrary(name: "llama", pkgConfig: "llama"),
|
.target(
|
||||||
]
|
name: "llama",
|
||||||
|
path: ".",
|
||||||
|
exclude: [
|
||||||
|
"cmake",
|
||||||
|
"examples",
|
||||||
|
"scripts",
|
||||||
|
"models",
|
||||||
|
"tests",
|
||||||
|
"CMakeLists.txt",
|
||||||
|
"Makefile"
|
||||||
|
],
|
||||||
|
sources: sources,
|
||||||
|
resources: resources,
|
||||||
|
publicHeadersPath: "spm-headers",
|
||||||
|
cSettings: cSettings,
|
||||||
|
linkerSettings: linkerSettings
|
||||||
|
)
|
||||||
|
],
|
||||||
|
cxxLanguageStandard: .cxx11
|
||||||
)
|
)
|
||||||
|
|
|
@ -20,13 +20,17 @@
|
||||||
**oneAPI** is an open ecosystem and a standard-based specification, supporting multiple architectures including but not limited to intel CPUs, GPUs and FPGAs. The key components of the oneAPI ecosystem include:
|
**oneAPI** is an open ecosystem and a standard-based specification, supporting multiple architectures including but not limited to intel CPUs, GPUs and FPGAs. The key components of the oneAPI ecosystem include:
|
||||||
|
|
||||||
- **DPCPP** *(Data Parallel C++)*: The primary oneAPI SYCL implementation, which includes the icpx/icx Compilers.
|
- **DPCPP** *(Data Parallel C++)*: The primary oneAPI SYCL implementation, which includes the icpx/icx Compilers.
|
||||||
- **oneAPI Libraries**: A set of highly optimized libraries targeting multiple domains *(e.g. oneMKL and oneDNN)*.
|
- **oneAPI Libraries**: A set of highly optimized libraries targeting multiple domains *(e.g. oneMKL - Math Kernel Library)*.
|
||||||
- **oneAPI LevelZero**: A high performance low level interface for fine-grained control over intel iGPUs and dGPUs.
|
- **oneAPI LevelZero**: A high performance low level interface for fine-grained control over intel iGPUs and dGPUs.
|
||||||
- **Nvidia & AMD Plugins**: These are plugins extending oneAPI's DPCPP support to SYCL on Nvidia and AMD GPU targets.
|
- **Nvidia & AMD Plugins**: These are plugins extending oneAPI's DPCPP support to SYCL on Nvidia and AMD GPU targets.
|
||||||
|
|
||||||
### Llama.cpp + SYCL
|
### Llama.cpp + SYCL
|
||||||
|
|
||||||
The llama.cpp SYCL backend is designed to support **Intel GPU** firstly. Based on the cross-platform feature of SYCL, it also supports other vendor GPUs: Nvidia and AMD.
|
The llama.cpp SYCL backend is designed to support **Intel GPU** firstly. Based on the cross-platform feature of SYCL, it could support other vendor GPUs: Nvidia GPU (*AMD GPU coming*).
|
||||||
|
|
||||||
|
When targeting **Intel CPU**, it is recommended to use llama.cpp for [Intel oneMKL](README.md#intel-onemkl) backend.
|
||||||
|
|
||||||
|
It has the similar design of other llama.cpp BLAS-based paths such as *OpenBLAS, cuBLAS, etc..*. In beginning work, the oneAPI's [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) open-source migration tool (Commercial release [Intel® DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) was used for this purpose.
|
||||||
|
|
||||||
## Recommended Release
|
## Recommended Release
|
||||||
|
|
||||||
|
@ -34,20 +38,13 @@ The SYCL backend would be broken by some PRs due to no online CI.
|
||||||
|
|
||||||
The following release is verified with good quality:
|
The following release is verified with good quality:
|
||||||
|
|
||||||
|Commit ID|Tag|Release|Verified Platform| Update date|
|
|Commit ID|Tag|Release|Verified Platform|
|
||||||
|-|-|-|-|-|
|
|-|-|-|-|
|
||||||
|3bcd40b3c593d14261fb2abfabad3c0fb5b9e318|b4040 |[llama-b4040-bin-win-sycl-x64.zip](https://github.com/ggerganov/llama.cpp/releases/download/b4040/llama-b4040-bin-win-sycl-x64.zip) |Arc770/Linux/oneAPI 2024.1<br>MTL Arc GPU/Windows 11/oneAPI 2024.1| 2024-11-19|
|
|fb76ec31a9914b7761c1727303ab30380fd4f05c|b3038 |[llama-b3038-bin-win-sycl-x64.zip](https://github.com/ggerganov/llama.cpp/releases/download/b3038/llama-b3038-bin-win-sycl-x64.zip) |Arc770/Linux/oneAPI 2024.1<br>MTL Arc GPU/Windows 11/oneAPI 2024.1|
|
||||||
|fb76ec31a9914b7761c1727303ab30380fd4f05c|b3038 |[llama-b3038-bin-win-sycl-x64.zip](https://github.com/ggerganov/llama.cpp/releases/download/b3038/llama-b3038-bin-win-sycl-x64.zip) |Arc770/Linux/oneAPI 2024.1<br>MTL Arc GPU/Windows 11/oneAPI 2024.1||
|
|
||||||
|
|
||||||
|
|
||||||
## News
|
## News
|
||||||
|
|
||||||
- 2024.11
|
|
||||||
- Use syclcompat to improve the performance on some platforms. This requires to use oneAPI 2025.0 or newer.
|
|
||||||
|
|
||||||
- 2024.8
|
|
||||||
- Use oneDNN as the default GEMM library, improve the compatibility for new Intel GPUs.
|
|
||||||
|
|
||||||
- 2024.5
|
- 2024.5
|
||||||
- Performance is increased: 34 -> 37 tokens/s of llama-2-7b.Q4_0 on Arc770.
|
- Performance is increased: 34 -> 37 tokens/s of llama-2-7b.Q4_0 on Arc770.
|
||||||
- Arch Linux is verified successfully.
|
- Arch Linux is verified successfully.
|
||||||
|
@ -83,14 +80,7 @@ The following release is verified with good quality:
|
||||||
|
|
||||||
### Intel GPU
|
### Intel GPU
|
||||||
|
|
||||||
SYCL backend supports Intel GPU Family:
|
**Verified devices**
|
||||||
|
|
||||||
- Intel Data Center Max Series
|
|
||||||
- Intel Flex Series, Arc Series
|
|
||||||
- Intel Built-in Arc GPU
|
|
||||||
- Intel iGPU in Core CPU (11th Generation Core CPU and newer, refer to [oneAPI supported GPU](https://www.intel.com/content/www/us/en/developer/articles/system-requirements/intel-oneapi-base-toolkit-system-requirements.html#inpage-nav-1-1)).
|
|
||||||
|
|
||||||
#### Verified devices
|
|
||||||
|
|
||||||
| Intel GPU | Status | Verified Model |
|
| Intel GPU | Status | Verified Model |
|
||||||
|-------------------------------|---------|---------------------------------------|
|
|-------------------------------|---------|---------------------------------------|
|
||||||
|
@ -98,7 +88,7 @@ SYCL backend supports Intel GPU Family:
|
||||||
| Intel Data Center Flex Series | Support | Flex 170 |
|
| Intel Data Center Flex Series | Support | Flex 170 |
|
||||||
| Intel Arc Series | Support | Arc 770, 730M, Arc A750 |
|
| Intel Arc Series | Support | Arc 770, 730M, Arc A750 |
|
||||||
| Intel built-in Arc GPU | Support | built-in Arc GPU in Meteor Lake |
|
| Intel built-in Arc GPU | Support | built-in Arc GPU in Meteor Lake |
|
||||||
| Intel iGPU | Support | iGPU in 13700k, i5-1250P, i7-1260P, i7-1165G7 |
|
| Intel iGPU | Support | iGPU in i5-1250P, i7-1260P, i7-1165G7 |
|
||||||
|
|
||||||
*Notes:*
|
*Notes:*
|
||||||
|
|
||||||
|
@ -114,18 +104,10 @@ SYCL backend supports Intel GPU Family:
|
||||||
|
|
||||||
**Verified devices**
|
**Verified devices**
|
||||||
|
|
||||||
| Nvidia GPU | Status | Verified Model |
|
| Nvidia GPU | Status | Verified Model |
|
||||||
|--------------------------|-----------|----------------|
|
|--------------------------|---------|----------------|
|
||||||
| Ampere Series | Supported | A100, A4000 |
|
| Ampere Series | Support | A100, A4000 |
|
||||||
| Ampere Series *(Mobile)* | Supported | RTX 40 Series |
|
| Ampere Series *(Mobile)* | Support | RTX 40 Series |
|
||||||
|
|
||||||
| AMD GPU | Status | Verified Model |
|
|
||||||
|--------------------------|--------------|----------------|
|
|
||||||
| Radeon Pro | Experimental | W6800 |
|
|
||||||
| Radeon RX | Experimental | 6700 XT |
|
|
||||||
|
|
||||||
Note: AMD GPU support is highly experimental and is incompatible with F16.
|
|
||||||
Additionally, it only supports GPUs with a sub_group_size (warp size) of 32.
|
|
||||||
|
|
||||||
## Docker
|
## Docker
|
||||||
The docker build option is currently limited to *intel GPU* targets.
|
The docker build option is currently limited to *intel GPU* targets.
|
||||||
|
@ -133,7 +115,7 @@ The docker build option is currently limited to *intel GPU* targets.
|
||||||
### Build image
|
### Build image
|
||||||
```sh
|
```sh
|
||||||
# Using FP16
|
# Using FP16
|
||||||
docker build -t llama-cpp-sycl --build-arg="GGML_SYCL_F16=ON" --target light -f .devops/intel.Dockerfile .
|
docker build -t llama-cpp-sycl --build-arg="GGML_SYCL_F16=ON" -f .devops/llama-cli-intel.Dockerfile .
|
||||||
```
|
```
|
||||||
|
|
||||||
*Notes*:
|
*Notes*:
|
||||||
|
@ -197,10 +179,6 @@ Platform #0: Intel(R) OpenCL HD Graphics
|
||||||
|
|
||||||
In order to target Nvidia GPUs through SYCL, please make sure the CUDA/CUBLAS native requirements *-found [here](README.md#cuda)-* are installed.
|
In order to target Nvidia GPUs through SYCL, please make sure the CUDA/CUBLAS native requirements *-found [here](README.md#cuda)-* are installed.
|
||||||
|
|
||||||
- **AMD GPU**
|
|
||||||
|
|
||||||
To target AMD GPUs with SYCL, the ROCm stack must be installed first.
|
|
||||||
|
|
||||||
2. **Install Intel® oneAPI Base toolkit**
|
2. **Install Intel® oneAPI Base toolkit**
|
||||||
|
|
||||||
- **For Intel GPU**
|
- **For Intel GPU**
|
||||||
|
@ -211,7 +189,7 @@ Please follow the instructions for downloading and installing the Toolkit for Li
|
||||||
|
|
||||||
Following guidelines/code snippets assume the default installation values. Otherwise, please make sure the necessary changes are reflected where applicable.
|
Following guidelines/code snippets assume the default installation values. Otherwise, please make sure the necessary changes are reflected where applicable.
|
||||||
|
|
||||||
Upon a successful installation, SYCL is enabled for the available intel devices, along with relevant libraries such as oneAPI oneDNN for Intel GPUs.
|
Upon a successful installation, SYCL is enabled for the available intel devices, along with relevant libraries such as oneAPI MKL for intel GPUs.
|
||||||
|
|
||||||
- **Adding support to Nvidia GPUs**
|
- **Adding support to Nvidia GPUs**
|
||||||
|
|
||||||
|
@ -227,19 +205,6 @@ cmake -B buildWithCublas -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENAB
|
||||||
cmake --build buildWithCublas --config Release
|
cmake --build buildWithCublas --config Release
|
||||||
```
|
```
|
||||||
|
|
||||||
- **Adding support to AMD GPUs**
|
|
||||||
|
|
||||||
**oneAPI Plugin**: In order to enable SYCL support on AMD GPUs, please install the [Codeplay oneAPI Plugin for AMD GPUs](https://developer.codeplay.com/products/oneapi/amd/download). As with Nvidia GPUs, the user should also make sure the plugin version matches the installed base toolkit.
|
|
||||||
|
|
||||||
**oneMKL for rocBlas**: The current oneMKL releases *(shipped with the oneAPI base-toolkit)* doesn't contain the rocBLAS backend. A build from source of the upstream [oneMKL](https://github.com/oneapi-src/oneMKL) with the *rocBLAS* backend enabled is thus required to run it on AMD GPUs.
|
|
||||||
|
|
||||||
```sh
|
|
||||||
git clone https://github.com/oneapi-src/oneMKL
|
|
||||||
cd oneMKL
|
|
||||||
# Find your HIPTARGET with rocminfo, under the key 'Name:'
|
|
||||||
cmake -B buildWithrocBLAS -DCMAKE_CXX_COMPILER=icpx -DCMAKE_C_COMPILER=icx -DENABLE_MKLGPU_BACKEND=OFF -DENABLE_MKLCPU_BACKEND=OFF -DENABLE_ROCBLAS_BACKEND=ON -DHIPTARGETS=${HIPTARGET} -DTARGET_DOMAINS=blas
|
|
||||||
cmake --build buildWithrocBLAS --config Release
|
|
||||||
```
|
|
||||||
|
|
||||||
3. **Verify installation and environment**
|
3. **Verify installation and environment**
|
||||||
|
|
||||||
|
@ -251,48 +216,33 @@ sycl-ls
|
||||||
|
|
||||||
- **Intel GPU**
|
- **Intel GPU**
|
||||||
|
|
||||||
When targeting an intel GPU, the user should expect one or more level-zero devices among the available SYCL devices. Please make sure that at least one GPU is present, for instance [`level_zero:gpu`] in the sample output below:
|
When targeting an intel GPU, the user should expect one or more level-zero devices among the available SYCL devices. Please make sure that at least one GPU is present, for instance [`ext_oneapi_level_zero:gpu:0`] in the sample output below:
|
||||||
|
|
||||||
```
|
```
|
||||||
[opencl:acc][opencl:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000]
|
[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000]
|
||||||
[opencl:cpu][opencl:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
|
[opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
|
||||||
[opencl:gpu][opencl:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO [23.30.26918.50]
|
[opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO [23.30.26918.50]
|
||||||
[level_zero:gpu][level_zero:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]
|
[ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]
|
||||||
```
|
```
|
||||||
|
|
||||||
- **Nvidia GPU**
|
- **Nvidia GPU**
|
||||||
|
|
||||||
Similarly, user targeting Nvidia GPUs should expect at least one SYCL-CUDA device [`cuda:gpu`] as below:
|
Similarly, user targeting Nvidia GPUs should expect at least one SYCL-CUDA device [`ext_oneapi_cuda:gpu`] as bellow:
|
||||||
|
|
||||||
```
|
```
|
||||||
[opencl:acc][opencl:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.12.0.12_195853.xmain-hotfix]
|
[opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.12.0.12_195853.xmain-hotfix]
|
||||||
[opencl:cpu][opencl:1] Intel(R) OpenCL, Intel(R) Xeon(R) Gold 6326 CPU @ 2.90GHz OpenCL 3.0 (Build 0) [2023.16.12.0.12_195853.xmain-hotfix]
|
[opencl:cpu:1] Intel(R) OpenCL, Intel(R) Xeon(R) Gold 6326 CPU @ 2.90GHz OpenCL 3.0 (Build 0) [2023.16.12.0.12_195853.xmain-hotfix]
|
||||||
[cuda:gpu][cuda:0] NVIDIA CUDA BACKEND, NVIDIA A100-PCIE-40GB 8.0 [CUDA 12.5]
|
[ext_oneapi_cuda:gpu:0] NVIDIA CUDA BACKEND, NVIDIA A100-PCIE-40GB 8.0 [CUDA 12.2]
|
||||||
```
|
|
||||||
|
|
||||||
- **AMD GPU**
|
|
||||||
|
|
||||||
For AMD GPUs we should expect at least one SYCL-HIP device [`hip:gpu`]:
|
|
||||||
|
|
||||||
```
|
|
||||||
[opencl:cpu][opencl:0] Intel(R) OpenCL, 12th Gen Intel(R) Core(TM) i9-12900K OpenCL 3.0 (Build 0) [2024.18.6.0.02_160000]
|
|
||||||
[hip:gpu][hip:0] AMD HIP BACKEND, AMD Radeon PRO W6800 gfx1030 [HIP 60140.9]
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### II. Build llama.cpp
|
### II. Build llama.cpp
|
||||||
|
|
||||||
#### Intel GPU
|
#### Intel GPU
|
||||||
|
|
||||||
```
|
|
||||||
./examples/sycl/build.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
or
|
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
# Export relevant ENV variables
|
# Export relevant ENV variables
|
||||||
source /opt/intel/oneapi/setvars.sh
|
source /opt/intel/oneapi/setvars.sh
|
||||||
|
|
||||||
|
# Build LLAMA with MKL BLAS acceleration for intel GPU
|
||||||
|
|
||||||
# Option 1: Use FP32 (recommended for better performance in most cases)
|
# Option 1: Use FP32 (recommended for better performance in most cases)
|
||||||
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
|
||||||
|
@ -304,7 +254,6 @@ cmake --build build --config Release -j -v
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Nvidia GPU
|
#### Nvidia GPU
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
# Export relevant ENV variables
|
# Export relevant ENV variables
|
||||||
export LD_LIBRARY_PATH=/path/to/oneMKL/buildWithCublas/lib:$LD_LIBRARY_PATH
|
export LD_LIBRARY_PATH=/path/to/oneMKL/buildWithCublas/lib:$LD_LIBRARY_PATH
|
||||||
|
@ -313,106 +262,62 @@ export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithCublas/include:$CPLUS_INCLUDE_
|
||||||
export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR
|
export CPLUS_INCLUDE_DIR=/path/to/oneMKL/include:$CPLUS_INCLUDE_DIR
|
||||||
|
|
||||||
# Build LLAMA with Nvidia BLAS acceleration through SYCL
|
# Build LLAMA with Nvidia BLAS acceleration through SYCL
|
||||||
# Setting GGML_SYCL_DEVICE_ARCH is optional but can improve performance
|
|
||||||
GGML_SYCL_DEVICE_ARCH=sm_80 # Example architecture
|
|
||||||
|
|
||||||
# Option 1: Use FP32 (recommended for better performance in most cases)
|
# Option 1: Use FP32 (recommended for better performance in most cases)
|
||||||
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
||||||
|
|
||||||
# Option 2: Use FP16
|
# Option 2: Use FP16
|
||||||
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
|
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=NVIDIA -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON
|
||||||
|
|
||||||
# build all binary
|
# build all binary
|
||||||
cmake --build build --config Release -j -v
|
cmake --build build --config Release -j -v
|
||||||
```
|
|
||||||
|
|
||||||
#### AMD GPU
|
|
||||||
|
|
||||||
```sh
|
|
||||||
# Export relevant ENV variables
|
|
||||||
export LD_LIBRARY_PATH=/path/to/oneMKL/buildWithrocBLAS/lib:$LD_LIBRARY_PATH
|
|
||||||
export LIBRARY_PATH=/path/to/oneMKL/buildWithrocBLAS/lib:$LIBRARY_PATH
|
|
||||||
export CPLUS_INCLUDE_DIR=/path/to/oneMKL/buildWithrocBLAS/include:$CPLUS_INCLUDE_DIR
|
|
||||||
|
|
||||||
# Build LLAMA with rocBLAS acceleration through SYCL
|
|
||||||
|
|
||||||
## AMD
|
|
||||||
# Use FP32, FP16 is not supported
|
|
||||||
# Find your GGML_SYCL_DEVICE_ARCH with rocminfo, under the key 'Name:'
|
|
||||||
GGML_SYCL_DEVICE_ARCH=gfx90a # Example architecture
|
|
||||||
cmake -B build -DGGML_SYCL=ON -DGGML_SYCL_TARGET=AMD -DGGML_SYCL_DEVICE_ARCH=${GGML_SYCL_DEVICE_ARCH} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
|
|
||||||
|
|
||||||
# build all binary
|
|
||||||
cmake --build build --config Release -j -v
|
|
||||||
```
|
```
|
||||||
|
|
||||||
### III. Run the inference
|
### III. Run the inference
|
||||||
|
|
||||||
#### Retrieve and prepare model
|
1. Retrieve and prepare model
|
||||||
|
|
||||||
You can refer to the general [*Prepare and Quantize*](README.md#prepare-and-quantize) guide for model prepration, or simply download [llama-2-7b.Q4_0.gguf](https://huggingface.co/TheBloke/Llama-2-7B-GGUF/blob/main/llama-2-7b.Q4_0.gguf) model as example.
|
You can refer to the general [*Prepare and Quantize*](README.md#prepare-and-quantize) guide for model prepration, or simply download [llama-2-7b.Q4_0.gguf](https://huggingface.co/TheBloke/Llama-2-7B-GGUF/blob/main/llama-2-7b.Q4_0.gguf) model as example.
|
||||||
|
|
||||||
##### Check device
|
2. Enable oneAPI running environment
|
||||||
|
|
||||||
1. Enable oneAPI running environment
|
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
source /opt/intel/oneapi/setvars.sh
|
source /opt/intel/oneapi/setvars.sh
|
||||||
```
|
```
|
||||||
|
|
||||||
2. List devices information
|
3. List devices information
|
||||||
|
|
||||||
Similar to the native `sycl-ls`, available SYCL devices can be queried as follow:
|
Similar to the native `sycl-ls`, available SYCL devices can be queried as follow:
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
./build/bin/llama-ls-sycl-device
|
./build/bin/llama-ls-sycl-device
|
||||||
```
|
```
|
||||||
|
A example of such log in a system with 1 *intel CPU* and 1 *intel GPU* can look like the following:
|
||||||
This command will only display the selected backend that is supported by SYCL. The default backend is level_zero. For example, in a system with 2 *intel GPU* it would look like the following:
|
|
||||||
```
|
```
|
||||||
found 2 SYCL devices:
|
found 6 SYCL devices:
|
||||||
|
|
||||||
| | | |Compute |Max compute|Max work|Max sub| |
|
| | | |Compute |Max compute|Max work|Max sub| |
|
||||||
|ID| Device Type| Name|capability|units |group |group |Global mem size|
|
|ID| Device Type| Name|capability|units |group |group |Global mem size|
|
||||||
|--|------------------|---------------------------------------------|----------|-----------|--------|-------|---------------|
|
|--|------------------|---------------------------------------------|----------|-----------|--------|-------|---------------|
|
||||||
| 0|[level_zero:gpu:0]| Intel(R) Arc(TM) A770 Graphics| 1.3| 512| 1024| 32| 16225243136|
|
| 0|[level_zero:gpu:0]| Intel(R) Arc(TM) A770 Graphics| 1.3| 512| 1024| 32| 16225243136|
|
||||||
| 1|[level_zero:gpu:1]| Intel(R) UHD Graphics 770| 1.3| 32| 512| 32| 53651849216|
|
| 1|[level_zero:gpu:1]| Intel(R) UHD Graphics 770| 1.3| 32| 512| 32| 53651849216|
|
||||||
|
| 2| [opencl:gpu:0]| Intel(R) Arc(TM) A770 Graphics| 3.0| 512| 1024| 32| 16225243136|
|
||||||
|
| 3| [opencl:gpu:1]| Intel(R) UHD Graphics 770| 3.0| 32| 512| 32| 53651849216|
|
||||||
|
| 4| [opencl:cpu:0]| 13th Gen Intel(R) Core(TM) i7-13700K| 3.0| 24| 8192| 64| 67064815616|
|
||||||
|
| 5| [opencl:acc:0]| Intel(R) FPGA Emulation Device| 1.2| 24|67108864| 64| 67064815616|
|
||||||
```
|
```
|
||||||
|
|
||||||
#### Choose level-zero devices
|
| Attribute | Note |
|
||||||
|
|------------------------|-------------------------------------------------------------|
|
||||||
|
| compute capability 1.3 | Level-zero driver/runtime, recommended |
|
||||||
|
| compute capability 3.0 | OpenCL driver/runtime, slower than level-zero in most cases |
|
||||||
|
|
||||||
|Chosen Device ID|Setting|
|
4. Launch inference
|
||||||
|-|-|
|
|
||||||
|0|`export ONEAPI_DEVICE_SELECTOR="level_zero:0"` or no action|
|
|
||||||
|1|`export ONEAPI_DEVICE_SELECTOR="level_zero:1"`|
|
|
||||||
|0 & 1|`export ONEAPI_DEVICE_SELECTOR="level_zero:0;level_zero:1"`|
|
|
||||||
|
|
||||||
#### Execute
|
|
||||||
|
|
||||||
Choose one of following methods to run.
|
|
||||||
|
|
||||||
1. Script
|
|
||||||
|
|
||||||
- Use device 0:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
./examples/sycl/run-llama2.sh 0
|
|
||||||
```
|
|
||||||
- Use multiple devices:
|
|
||||||
|
|
||||||
```sh
|
|
||||||
./examples/sycl/run-llama2.sh
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Command line
|
|
||||||
Launch inference
|
|
||||||
|
|
||||||
There are two device selection modes:
|
There are two device selection modes:
|
||||||
|
|
||||||
- Single device: Use one device assigned by user. Default device id is 0.
|
- Single device: Use one device target specified by the user.
|
||||||
- Multiple devices: Automatically choose the devices with the same backend.
|
- Multiple devices: Automatically select the devices with the same largest Max compute-units.
|
||||||
|
|
||||||
In two device selection modes, the default SYCL backend is level_zero, you can choose other backend supported by SYCL by setting environment variable ONEAPI_DEVICE_SELECTOR.
|
|
||||||
|
|
||||||
| Device selection | Parameter |
|
| Device selection | Parameter |
|
||||||
|------------------|----------------------------------------|
|
|------------------|----------------------------------------|
|
||||||
|
@ -426,6 +331,11 @@ Examples:
|
||||||
```sh
|
```sh
|
||||||
ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm none -mg 0
|
ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm none -mg 0
|
||||||
```
|
```
|
||||||
|
or run by script:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
./examples/sycl/run_llama2.sh 0
|
||||||
|
```
|
||||||
|
|
||||||
- Use multiple devices:
|
- Use multiple devices:
|
||||||
|
|
||||||
|
@ -433,6 +343,12 @@ ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/llama-2-7b.Q4_0.gguf -p "Bui
|
||||||
ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm layer
|
ZES_ENABLE_SYSMAN=1 ./build/bin/llama-cli -m models/llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:" -n 400 -e -ngl 33 -sm layer
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Otherwise, you can run the script:
|
||||||
|
|
||||||
|
```sh
|
||||||
|
./examples/sycl/run_llama2.sh
|
||||||
|
```
|
||||||
|
|
||||||
*Notes:*
|
*Notes:*
|
||||||
|
|
||||||
- Upon execution, verify the selected device(s) ID(s) in the output log, which can for instance be displayed as follow:
|
- Upon execution, verify the selected device(s) ID(s) in the output log, which can for instance be displayed as follow:
|
||||||
|
@ -479,7 +395,7 @@ c. Verify installation
|
||||||
In the oneAPI command line, run the following to print the available SYCL devices:
|
In the oneAPI command line, run the following to print the available SYCL devices:
|
||||||
|
|
||||||
```
|
```
|
||||||
sycl-ls.exe
|
sycl-ls
|
||||||
```
|
```
|
||||||
|
|
||||||
There should be one or more *level-zero* GPU devices displayed as **[ext_oneapi_level_zero:gpu]**. Below is example of such output detecting an *intel Iris Xe* GPU as a Level-zero SYCL device:
|
There should be one or more *level-zero* GPU devices displayed as **[ext_oneapi_level_zero:gpu]**. Below is example of such output detecting an *intel Iris Xe* GPU as a Level-zero SYCL device:
|
||||||
|
@ -500,18 +416,6 @@ b. The new Visual Studio will install Ninja as default. (If not, please install
|
||||||
|
|
||||||
### II. Build llama.cpp
|
### II. Build llama.cpp
|
||||||
|
|
||||||
You could download the release package for Windows directly, which including binary files and depended oneAPI dll files.
|
|
||||||
|
|
||||||
Choose one of following methods to build from source code.
|
|
||||||
|
|
||||||
1. Script
|
|
||||||
|
|
||||||
```sh
|
|
||||||
.\examples\sycl\win-build-sycl.bat
|
|
||||||
```
|
|
||||||
|
|
||||||
2. CMake
|
|
||||||
|
|
||||||
On the oneAPI command line window, step into the llama.cpp main directory and run the following:
|
On the oneAPI command line window, step into the llama.cpp main directory and run the following:
|
||||||
|
|
||||||
```
|
```
|
||||||
|
@ -526,8 +430,12 @@ cmake -B build -G "Ninja" -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPI
|
||||||
cmake --build build --config Release -j
|
cmake --build build --config Release -j
|
||||||
```
|
```
|
||||||
|
|
||||||
Or, use CMake presets to build:
|
Otherwise, run the `win-build-sycl.bat` wrapper which encapsulates the former instructions:
|
||||||
|
```sh
|
||||||
|
.\examples\sycl\win-build-sycl.bat
|
||||||
|
```
|
||||||
|
|
||||||
|
Or, use CMake presets to build:
|
||||||
```sh
|
```sh
|
||||||
cmake --preset x64-windows-sycl-release
|
cmake --preset x64-windows-sycl-release
|
||||||
cmake --build build-x64-windows-sycl-release -j --target llama-cli
|
cmake --build build-x64-windows-sycl-release -j --target llama-cli
|
||||||
|
@ -539,9 +447,7 @@ cmake --preset x64-windows-sycl-debug
|
||||||
cmake --build build-x64-windows-sycl-debug -j --target llama-cli
|
cmake --build build-x64-windows-sycl-debug -j --target llama-cli
|
||||||
```
|
```
|
||||||
|
|
||||||
3. Visual Studio
|
Or, you can use Visual Studio to open llama.cpp folder as a CMake project. Choose the sycl CMake presets (`x64-windows-sycl-release` or `x64-windows-sycl-debug`) before you compile the project.
|
||||||
|
|
||||||
You can use Visual Studio to open llama.cpp folder as a CMake project. Choose the sycl CMake presets (`x64-windows-sycl-release` or `x64-windows-sycl-debug`) before you compile the project.
|
|
||||||
|
|
||||||
*Notes:*
|
*Notes:*
|
||||||
|
|
||||||
|
@ -549,65 +455,52 @@ You can use Visual Studio to open llama.cpp folder as a CMake project. Choose th
|
||||||
|
|
||||||
### III. Run the inference
|
### III. Run the inference
|
||||||
|
|
||||||
#### Retrieve and prepare model
|
1. Retrieve and prepare model
|
||||||
|
|
||||||
You can refer to the general [*Prepare and Quantize*](README.md#prepare-and-quantize) guide for model prepration, or simply download [llama-2-7b.Q4_0.gguf](https://huggingface.co/TheBloke/Llama-2-7B-GGUF/blob/main/llama-2-7b.Q4_0.gguf) model as example.
|
You can refer to the general [*Prepare and Quantize*](README#prepare-and-quantize) guide for model prepration, or simply download [llama-2-7b.Q4_0.gguf](https://huggingface.co/TheBloke/Llama-2-7B-GGUF/blob/main/llama-2-7b.Q4_0.gguf) model as example.
|
||||||
|
|
||||||
##### Check device
|
2. Enable oneAPI running environment
|
||||||
|
|
||||||
1. Enable oneAPI running environment
|
|
||||||
|
|
||||||
On the oneAPI command line window, run the following and step into the llama.cpp directory:
|
On the oneAPI command line window, run the following and step into the llama.cpp directory:
|
||||||
```
|
```
|
||||||
"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64
|
"C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64
|
||||||
```
|
```
|
||||||
|
|
||||||
2. List devices information
|
3. List devices information
|
||||||
|
|
||||||
Similar to the native `sycl-ls`, available SYCL devices can be queried as follow:
|
Similar to the native `sycl-ls`, available SYCL devices can be queried as follow:
|
||||||
|
|
||||||
```
|
```
|
||||||
build\bin\llama-ls-sycl-device.exe
|
build\bin\ls-sycl-device.exe
|
||||||
```
|
```
|
||||||
|
|
||||||
This command will only display the selected backend that is supported by SYCL. The default backend is level_zero. For example, in a system with 2 *intel GPU* it would look like the following:
|
The output of this command in a system with 1 *intel CPU* and 1 *intel GPU* would look like the following:
|
||||||
```
|
```
|
||||||
found 2 SYCL devices:
|
found 6 SYCL devices:
|
||||||
| | | |Compute |Max compute|Max work|Max sub| |
|
| | | |Compute |Max compute|Max work|Max sub| |
|
||||||
|ID| Device Type| Name|capability|units |group |group |Global mem size|
|
|ID| Device Type| Name|capability|units |group |group |Global mem size|
|
||||||
|--|------------------|---------------------------------------------|----------|-----------|--------|-------|---------------|
|
|--|------------------|---------------------------------------------|----------|-----------|--------|-------|---------------|
|
||||||
| 0|[level_zero:gpu:0]| Intel(R) Arc(TM) A770 Graphics| 1.3| 512| 1024| 32| 16225243136|
|
| 0|[level_zero:gpu:0]| Intel(R) Arc(TM) A770 Graphics| 1.3| 512| 1024| 32| 16225243136|
|
||||||
| 1|[level_zero:gpu:1]| Intel(R) UHD Graphics 770| 1.3| 32| 512| 32| 53651849216|
|
| 1|[level_zero:gpu:1]| Intel(R) UHD Graphics 770| 1.3| 32| 512| 32| 53651849216|
|
||||||
|
| 2| [opencl:gpu:0]| Intel(R) Arc(TM) A770 Graphics| 3.0| 512| 1024| 32| 16225243136|
|
||||||
|
| 3| [opencl:gpu:1]| Intel(R) UHD Graphics 770| 3.0| 32| 512| 32| 53651849216|
|
||||||
|
| 4| [opencl:cpu:0]| 13th Gen Intel(R) Core(TM) i7-13700K| 3.0| 24| 8192| 64| 67064815616|
|
||||||
|
| 5| [opencl:acc:0]| Intel(R) FPGA Emulation Device| 1.2| 24|67108864| 64| 67064815616|
|
||||||
|
|
||||||
```
|
```
|
||||||
#### Choose level-zero devices
|
|
||||||
|
|
||||||
|Chosen Device ID|Setting|
|
| Attribute | Note |
|
||||||
|-|-|
|
|------------------------|-----------------------------------------------------------|
|
||||||
|0|`set ONEAPI_DEVICE_SELECTOR="level_zero:1"` or no action|
|
| compute capability 1.3 | Level-zero running time, recommended |
|
||||||
|1|`set ONEAPI_DEVICE_SELECTOR="level_zero:1"`|
|
| compute capability 3.0 | OpenCL running time, slower than level-zero in most cases |
|
||||||
|0 & 1|`set ONEAPI_DEVICE_SELECTOR="level_zero:0;level_zero:1"`|
|
|
||||||
|
|
||||||
#### Execute
|
|
||||||
|
|
||||||
Choose one of following methods to run.
|
4. Launch inference
|
||||||
|
|
||||||
1. Script
|
|
||||||
|
|
||||||
```
|
|
||||||
examples\sycl\win-run-llama2.bat
|
|
||||||
```
|
|
||||||
|
|
||||||
2. Command line
|
|
||||||
|
|
||||||
Launch inference
|
|
||||||
|
|
||||||
There are two device selection modes:
|
There are two device selection modes:
|
||||||
|
|
||||||
- Single device: Use one device assigned by user. Default device id is 0.
|
- Single device: Use one device assigned by user.
|
||||||
- Multiple devices: Automatically choose the devices with the same backend.
|
- Multiple devices: Automatically choose the devices with the same biggest Max compute units.
|
||||||
|
|
||||||
In two device selection modes, the default SYCL backend is level_zero, you can choose other backend supported by SYCL by setting environment variable ONEAPI_DEVICE_SELECTOR.
|
|
||||||
|
|
||||||
| Device selection | Parameter |
|
| Device selection | Parameter |
|
||||||
|------------------|----------------------------------------|
|
|------------------|----------------------------------------|
|
||||||
|
@ -627,7 +520,11 @@ build\bin\llama-cli.exe -m models\llama-2-7b.Q4_0.gguf -p "Building a website ca
|
||||||
```
|
```
|
||||||
build\bin\llama-cli.exe -m models\llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e -ngl 33 -s 0 -sm layer
|
build\bin\llama-cli.exe -m models\llama-2-7b.Q4_0.gguf -p "Building a website can be done in 10 simple steps:\nStep 1:" -n 400 -e -ngl 33 -s 0 -sm layer
|
||||||
```
|
```
|
||||||
|
Otherwise, run the following wrapper script:
|
||||||
|
|
||||||
|
```
|
||||||
|
.\examples\sycl\win-run-llama2.bat
|
||||||
|
```
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
|
|
||||||
|
@ -641,19 +538,17 @@ Or
|
||||||
use 1 SYCL GPUs: [0] with Max compute units:512
|
use 1 SYCL GPUs: [0] with Max compute units:512
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
## Environment Variable
|
## Environment Variable
|
||||||
|
|
||||||
#### Build
|
#### Build
|
||||||
|
|
||||||
| Name | Value | Function |
|
| Name | Value | Function |
|
||||||
|--------------------|---------------------------------------|---------------------------------------------|
|
|--------------------|-----------------------------------|---------------------------------------------|
|
||||||
| GGML_SYCL | ON (mandatory) | Enable build with SYCL code path.<br>FP32 path - recommended for better perforemance than FP16 on quantized model|
|
| GGML_SYCL | ON (mandatory) | Enable build with SYCL code path. |
|
||||||
| GGML_SYCL_TARGET | INTEL *(default)* \| NVIDIA \| AMD | Set the SYCL target device type. |
|
| GGML_SYCL_TARGET | INTEL *(default)* \| NVIDIA | Set the SYCL target device type. |
|
||||||
| GGML_SYCL_DEVICE_ARCH | Optional (except for AMD) | Set the SYCL device architecture, optional except for AMD. Setting the device architecture can improve the performance. See the table [--offload-arch](https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OffloadDesign.md#--offload-arch) for a list of valid architectures. |
|
| GGML_SYCL_F16 | OFF *(default)* \|ON *(optional)* | Enable FP16 build with SYCL code path. |
|
||||||
| GGML_SYCL_F16 | OFF *(default)* \|ON *(optional)* | Enable FP16 build with SYCL code path. |
|
| CMAKE_C_COMPILER | icx | Set *icx* compiler for SYCL code path. |
|
||||||
| CMAKE_C_COMPILER | `icx` *(Linux)*, `icx/cl` *(Windows)* | Set `icx` compiler for SYCL code path. |
|
| CMAKE_CXX_COMPILER | icpx *(Linux)*, icx *(Windows)* | Set `icpx/icx` compiler for SYCL code path. |
|
||||||
| CMAKE_CXX_COMPILER | `icpx` *(Linux)*, `icx` *(Windows)* | Set `icpx/icx` compiler for SYCL code path. |
|
|
||||||
|
|
||||||
#### Runtime
|
#### Runtime
|
||||||
|
|
||||||
|
@ -689,26 +584,9 @@ use 1 SYCL GPUs: [0] with Max compute units:512
|
||||||
```
|
```
|
||||||
Otherwise, please double-check the GPU driver installation steps.
|
Otherwise, please double-check the GPU driver installation steps.
|
||||||
|
|
||||||
- Can I report Ollama issue on Intel GPU to llama.cpp SYCL backend?
|
|
||||||
|
|
||||||
No. We can't support Ollama issue directly, because we aren't familiar with Ollama.
|
|
||||||
|
|
||||||
Sugguest reproducing on llama.cpp and report similar issue to llama.cpp. We will surpport it.
|
|
||||||
|
|
||||||
It's same for other projects including llama.cpp SYCL backend.
|
|
||||||
|
|
||||||
- Meet issue: `Native API failed. Native API returns: -6 (PI_ERROR_OUT_OF_HOST_MEMORY) -6 (PI_ERROR_OUT_OF_HOST_MEMORY) -999 (UNKNOWN PI error)` or `failed to allocate SYCL0 buffer`
|
|
||||||
|
|
||||||
Device Memory is not enough.
|
|
||||||
|
|
||||||
|Reason|Solution|
|
|
||||||
|-|-|
|
|
||||||
|Default Context is too big. It leads to more memory usage.|Set `-c 8192` or smaller value.|
|
|
||||||
|Model is big and require more memory than device's.|Choose smaller quantized model, like Q5 -> Q4;<br>Use more than one devices to load model.|
|
|
||||||
|
|
||||||
### **GitHub contribution**:
|
### **GitHub contribution**:
|
||||||
Please add the **[SYCL]** prefix/tag in issues/PRs titles to help the SYCL-team check/address them without delay.
|
Please add the **[SYCL]** prefix/tag in issues/PRs titles to help the SYCL-team check/address them without delay.
|
||||||
|
|
||||||
## TODO
|
## TODO
|
||||||
|
|
||||||
- NA
|
- Support row layer split for multiple card runs.
|
|
@ -1,4 +0,0 @@
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include <llama.h>
|
|
||||||
|
|
|
@ -1,5 +0,0 @@
|
||||||
module llama [system] {
|
|
||||||
header "llama.h"
|
|
||||||
link "llama"
|
|
||||||
export *
|
|
||||||
}
|
|
293
ci/run.sh
293
ci/run.sh
|
@ -1,4 +1,4 @@
|
||||||
#!/bin/bash
|
#/bin/bash
|
||||||
#
|
#
|
||||||
# sample usage:
|
# sample usage:
|
||||||
#
|
#
|
||||||
|
@ -13,9 +13,6 @@
|
||||||
# # with SYCL support
|
# # with SYCL support
|
||||||
# GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
# GG_BUILD_SYCL=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
||||||
#
|
#
|
||||||
# # with VULKAN support
|
|
||||||
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ./tmp/results ./tmp/mnt
|
|
||||||
#
|
|
||||||
|
|
||||||
if [ -z "$2" ]; then
|
if [ -z "$2" ]; then
|
||||||
echo "usage: $0 <output-dir> <mnt-dir>"
|
echo "usage: $0 <output-dir> <mnt-dir>"
|
||||||
|
@ -39,11 +36,11 @@ SRC=`pwd`
|
||||||
CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON"
|
CMAKE_EXTRA="-DLLAMA_FATAL_WARNINGS=ON"
|
||||||
|
|
||||||
if [ ! -z ${GG_BUILD_METAL} ]; then
|
if [ ! -z ${GG_BUILD_METAL} ]; then
|
||||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON -DGGML_METAL_USE_BF16=ON"
|
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_METAL=ON"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -z ${GG_BUILD_CUDA} ]; then
|
if [ ! -z ${GG_BUILD_CUDA} ]; then
|
||||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=native"
|
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_CUDA=1"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ ! -z ${GG_BUILD_SYCL} ]; then
|
if [ ! -z ${GG_BUILD_SYCL} ]; then
|
||||||
|
@ -53,11 +50,7 @@ if [ ! -z ${GG_BUILD_SYCL} ]; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
|
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_SYCL=1 DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON"
|
||||||
fi
|
|
||||||
|
|
||||||
if [ ! -z ${GG_BUILD_VULKAN} ]; then
|
|
||||||
CMAKE_EXTRA="${CMAKE_EXTRA} -DGGML_VULKAN=1"
|
|
||||||
fi
|
fi
|
||||||
## helpers
|
## helpers
|
||||||
|
|
||||||
|
@ -110,11 +103,8 @@ function gg_run_ctest_debug {
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
# Check cmake, make and ctest are installed
|
|
||||||
gg_check_build_requirements
|
|
||||||
|
|
||||||
(time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
(time cmake -DCMAKE_BUILD_TYPE=Debug ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
(time ctest --output-on-failure -L main -E test-opt ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
||||||
|
|
||||||
|
@ -141,11 +131,8 @@ function gg_run_ctest_release {
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
# Check cmake, make and ctest are installed
|
|
||||||
gg_check_build_requirements
|
|
||||||
|
|
||||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
||||||
(time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
(time ctest --output-on-failure -L main ) 2>&1 | tee -a $OUT/${ci}-ctest.log
|
||||||
|
@ -273,6 +260,7 @@ function gg_sum_ctest_with_model_release {
|
||||||
}
|
}
|
||||||
|
|
||||||
# open_llama_7b_v2
|
# open_llama_7b_v2
|
||||||
|
# requires: GG_BUILD_CUDA
|
||||||
|
|
||||||
function gg_run_open_llama_7b_v2 {
|
function gg_run_open_llama_7b_v2 {
|
||||||
cd ${SRC}
|
cd ${SRC}
|
||||||
|
@ -296,8 +284,8 @@ function gg_run_open_llama_7b_v2 {
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} -DGGML_CUDA=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
python3 ../examples/convert_legacy_llama.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
python3 ../examples/convert_legacy_llama.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||||
|
|
||||||
|
@ -326,36 +314,36 @@ function gg_run_open_llama_7b_v2 {
|
||||||
./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k
|
./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k
|
||||||
./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k
|
./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k
|
||||||
|
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_f16} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
(time ./bin/llama-cli --model ${model_f16} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q8_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
(time ./bin/llama-cli --model ${model_q8_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q4_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
(time ./bin/llama-cli --model ${model_q4_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q4_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
(time ./bin/llama-cli --model ${model_q4_1} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q5_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
(time ./bin/llama-cli --model ${model_q5_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q5_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
(time ./bin/llama-cli --model ${model_q5_1} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q2_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
(time ./bin/llama-cli --model ${model_q2_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q3_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
(time ./bin/llama-cli --model ${model_q3_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q4_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
(time ./bin/llama-cli --model ${model_q4_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q5_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
(time ./bin/llama-cli --model ${model_q5_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q6_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
(time ./bin/llama-cli --model ${model_q6_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||||
|
|
||||||
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||||
|
|
||||||
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
||||||
|
|
||||||
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state -ngl 10 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state -fa -ngl 10 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state -ngl 99 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
(time ./bin/llama-save-load-state--model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state -fa -ngl 99 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
|
|
||||||
function check_ppl {
|
function check_ppl {
|
||||||
qnt="$1"
|
qnt="$1"
|
||||||
|
@ -431,7 +419,7 @@ function gg_run_pythia_1_4b {
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||||
|
|
||||||
|
@ -460,34 +448,34 @@ function gg_run_pythia_1_4b {
|
||||||
./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k
|
./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k
|
||||||
./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k
|
./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k
|
||||||
|
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_f16} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
(time ./bin/llama-cli --model ${model_f16} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q8_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
(time ./bin/llama-cli --model ${model_q8_0} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q4_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
(time ./bin/llama-cli --model ${model_q4_0} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q4_1} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
(time ./bin/llama-cli --model ${model_q4_1} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q5_0} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
(time ./bin/llama-cli --model ${model_q5_0} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q5_1} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
(time ./bin/llama-cli --model ${model_q5_1} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q2_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
(time ./bin/llama-cli --model ${model_q2_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q3_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
(time ./bin/llama-cli --model ${model_q3_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q4_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
(time ./bin/llama-cli --model ${model_q4_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q5_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
(time ./bin/llama-cli --model ${model_q5_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q6_k} -ngl 99 -c 0 -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
(time ./bin/llama-cli --model ${model_q6_k} -s 1234 -n 64 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||||
|
|
||||||
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||||
|
|
||||||
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test_60} -ngl 99 -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test_60} -c 128 -b 128 --chunks 1 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
||||||
|
|
||||||
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state -fa --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
|
|
||||||
function check_ppl {
|
function check_ppl {
|
||||||
qnt="$1"
|
qnt="$1"
|
||||||
|
@ -541,6 +529,7 @@ function gg_sum_pythia_1_4b {
|
||||||
}
|
}
|
||||||
|
|
||||||
# pythia_2_8b
|
# pythia_2_8b
|
||||||
|
# requires: GG_BUILD_CUDA
|
||||||
|
|
||||||
function gg_run_pythia_2_8b {
|
function gg_run_pythia_2_8b {
|
||||||
cd ${SRC}
|
cd ${SRC}
|
||||||
|
@ -561,8 +550,8 @@ function gg_run_pythia_2_8b {
|
||||||
|
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} -DGGML_CUDA=1 .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||||
|
|
||||||
|
@ -591,36 +580,36 @@ function gg_run_pythia_2_8b {
|
||||||
./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k
|
./bin/llama-quantize ${model_f16} ${model_q5_k} q5_k
|
||||||
./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k
|
./bin/llama-quantize ${model_f16} ${model_q6_k} q6_k
|
||||||
|
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_f16} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
(time ./bin/llama-cli --model ${model_f16} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q8_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
(time ./bin/llama-cli --model ${model_q8_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q4_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
(time ./bin/llama-cli --model ${model_q4_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q4_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
(time ./bin/llama-cli --model ${model_q4_1} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q5_0} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
(time ./bin/llama-cli --model ${model_q5_0} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q5_1} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
(time ./bin/llama-cli --model ${model_q5_1} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q2_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
(time ./bin/llama-cli --model ${model_q2_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q3_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
(time ./bin/llama-cli --model ${model_q3_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q4_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
(time ./bin/llama-cli --model ${model_q4_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q5_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
(time ./bin/llama-cli --model ${model_q5_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||||
(time ./bin/llama-cli -no-cnv --model ${model_q6_k} -t 1 -ngl 99 -c 0 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
(time ./bin/llama-cli --model ${model_q6_k} -t 1 -ngl 999 -s 1234 -n 256 --ignore-eos -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||||
|
|
||||||
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
(time ./bin/llama-perplexity --model ${model_f16} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
(time ./bin/llama-perplexity --model ${model_q8_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
(time ./bin/llama-perplexity --model ${model_q4_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_0.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
(time ./bin/llama-perplexity --model ${model_q4_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_1.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
(time ./bin/llama-perplexity --model ${model_q5_0} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_0.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
(time ./bin/llama-perplexity --model ${model_q5_1} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_1.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
(time ./bin/llama-perplexity --model ${model_q2_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q2_k.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
(time ./bin/llama-perplexity --model ${model_q3_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q3_k.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
(time ./bin/llama-perplexity --model ${model_q4_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q4_k.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
(time ./bin/llama-perplexity --model ${model_q5_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q5_k.log
|
||||||
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
(time ./bin/llama-perplexity --model ${model_q6_k} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-tg-q6_k.log
|
||||||
|
|
||||||
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 99 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
(time ./bin/llama-imatrix --model ${model_f16} -f ${wiki_test} -t 1 -ngl 999 -c 2048 -b 512 --chunks 4 ) 2>&1 | tee -a $OUT/${ci}-imatrix.log
|
||||||
|
|
||||||
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state -ngl 10 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 10 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state -fa -ngl 10 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state -ngl 99 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
(time ./bin/llama-save-load-state --model ${model_q4_0} -ngl 99 -c 0 -fa ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
(time ./bin/llama-save-load-state -fa -ngl 99 --model ${model_q4_0} ) 2>&1 | tee -a $OUT/${ci}-save-load-state.log
|
||||||
|
|
||||||
function check_ppl {
|
function check_ppl {
|
||||||
qnt="$1"
|
qnt="$1"
|
||||||
|
@ -697,7 +686,7 @@ function gg_run_embd_bge_small {
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
||||||
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
(time make -j ) 2>&1 | tee -a $OUT/${ci}-make.log
|
||||||
|
|
||||||
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
||||||
|
|
||||||
|
@ -706,8 +695,8 @@ function gg_run_embd_bge_small {
|
||||||
|
|
||||||
./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0
|
./bin/llama-quantize ${model_f16} ${model_q8_0} q8_0
|
||||||
|
|
||||||
(time ./bin/llama-embedding --model ${model_f16} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
(time ./bin/llama-embedding --model ${model_f16} -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-f16.log
|
||||||
(time ./bin/llama-embedding --model ${model_q8_0} -p "I believe the meaning of life is" -ngl 99 -c 0 ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
(time ./bin/llama-embedding --model ${model_q8_0} -p "I believe the meaning of life is" ) 2>&1 | tee -a $OUT/${ci}-tg-q8_0.log
|
||||||
|
|
||||||
set +e
|
set +e
|
||||||
}
|
}
|
||||||
|
@ -721,92 +710,8 @@ function gg_sum_embd_bge_small {
|
||||||
gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
|
gg_printf '- q8_0:\n```\n%s\n```\n' "$(cat $OUT/${ci}-tg-q8_0.log)"
|
||||||
}
|
}
|
||||||
|
|
||||||
# rerank_tiny
|
|
||||||
|
|
||||||
function gg_run_rerank_tiny {
|
|
||||||
cd ${SRC}
|
|
||||||
|
|
||||||
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/config.json
|
|
||||||
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/tokenizer.json
|
|
||||||
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/tokenizer_config.json
|
|
||||||
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/special_tokens_map.json
|
|
||||||
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/resolve/main/pytorch_model.bin
|
|
||||||
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/sentence_bert_config.json
|
|
||||||
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/vocab.txt
|
|
||||||
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/modules.json
|
|
||||||
gg_wget models-mnt/rerank-tiny/ https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/config.json
|
|
||||||
|
|
||||||
gg_wget models-mnt/rerank-tiny/1_Pooling https://huggingface.co/jinaai/jina-reranker-v1-tiny-en/raw/main/1_Pooling/config.json
|
|
||||||
|
|
||||||
path_models="../models-mnt/rerank-tiny"
|
|
||||||
|
|
||||||
rm -rf build-ci-release && mkdir build-ci-release && cd build-ci-release
|
|
||||||
|
|
||||||
set -e
|
|
||||||
|
|
||||||
(time cmake -DCMAKE_BUILD_TYPE=Release ${CMAKE_EXTRA} .. ) 2>&1 | tee -a $OUT/${ci}-cmake.log
|
|
||||||
(time make -j$(nproc) ) 2>&1 | tee -a $OUT/${ci}-make.log
|
|
||||||
|
|
||||||
python3 ../convert_hf_to_gguf.py ${path_models} --outfile ${path_models}/ggml-model-f16.gguf
|
|
||||||
|
|
||||||
model_f16="${path_models}/ggml-model-f16.gguf"
|
|
||||||
|
|
||||||
# for this model, the SEP token is "</s>"
|
|
||||||
(time ./bin/llama-embedding --model ${model_f16} -p "what is panda?</s></s>hi\nwhat is panda?</s></s>it's a bear\nwhat is panda?</s></s>The giant panda (Ailuropoda melanoleuca), sometimes called a panda bear or simply panda, is a bear species endemic to China." -ngl 99 -c 0 --pooling rank --embd-normalize -1 --verbose-prompt) 2>&1 | tee -a $OUT/${ci}-rk-f16.log
|
|
||||||
|
|
||||||
# sample output
|
|
||||||
# rerank score 0: 0.029
|
|
||||||
# rerank score 1: 0.029
|
|
||||||
# rerank score 2: 0.135
|
|
||||||
|
|
||||||
# check that the score is in the range [$3, $4]
|
|
||||||
function check_score {
|
|
||||||
qnt="$1"
|
|
||||||
score=$(echo "$2" | grep -oE "[0-9]+\.[0-9]+" | tail -n 1)
|
|
||||||
|
|
||||||
if [ $(echo "$score < $3" | bc) -eq 1 ] || [ $(echo "$score > $4" | bc) -eq 1 ]; then
|
|
||||||
printf ' - %s @ %s (FAIL: score not in range [%s, %s])\n' "$qnt" "$score" "$3" "$4"
|
|
||||||
return 20
|
|
||||||
fi
|
|
||||||
|
|
||||||
printf ' - %s @ %s OK\n' "$qnt" "$score"
|
|
||||||
return 0
|
|
||||||
}
|
|
||||||
|
|
||||||
check_score "rerank score 0" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 0")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
|
|
||||||
check_score "rerank score 1" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 1")" "0.00" "0.05" | tee -a $OUT/${ci}-rk-f16.log
|
|
||||||
check_score "rerank score 2" "$(cat $OUT/${ci}-rk-f16.log | grep "rerank score 2")" "0.10" "0.30" | tee -a $OUT/${ci}-rk-f16.log
|
|
||||||
|
|
||||||
set +e
|
|
||||||
}
|
|
||||||
|
|
||||||
function gg_sum_rerank_tiny {
|
|
||||||
gg_printf '### %s\n\n' "${ci}"
|
|
||||||
|
|
||||||
gg_printf 'Rerank Tiny (Jina):\n'
|
|
||||||
gg_printf '- status: %s\n' "$(cat $OUT/${ci}.exit)"
|
|
||||||
gg_printf '- f16: \n```\n%s\n```\n' "$(cat $OUT/${ci}-rk-f16.log)"
|
|
||||||
}
|
|
||||||
|
|
||||||
function gg_check_build_requirements {
|
|
||||||
if ! command -v cmake &> /dev/null; then
|
|
||||||
gg_printf 'cmake not found, please install'
|
|
||||||
fi
|
|
||||||
|
|
||||||
if ! command -v make &> /dev/null; then
|
|
||||||
gg_printf 'make not found, please install'
|
|
||||||
fi
|
|
||||||
|
|
||||||
if ! command -v ctest &> /dev/null; then
|
|
||||||
gg_printf 'ctest not found, please install'
|
|
||||||
fi
|
|
||||||
}
|
|
||||||
|
|
||||||
## main
|
## main
|
||||||
|
|
||||||
export LLAMA_LOG_PREFIX=1
|
|
||||||
export LLAMA_LOG_TIMESTAMPS=1
|
|
||||||
|
|
||||||
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
||||||
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
|
# Create symlink: ./llama.cpp/models-mnt -> $MNT/models/models-mnt
|
||||||
rm -rf ${SRC}/models-mnt
|
rm -rf ${SRC}/models-mnt
|
||||||
|
@ -815,10 +720,7 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
||||||
ln -sfn ${mnt_models} ${SRC}/models-mnt
|
ln -sfn ${mnt_models} ${SRC}/models-mnt
|
||||||
|
|
||||||
# Create a fresh python3 venv and enter it
|
# Create a fresh python3 venv and enter it
|
||||||
if ! python3 -m venv "$MNT/venv"; then
|
python3 -m venv "$MNT/venv"
|
||||||
echo "Error: Failed to create Python virtual environment at $MNT/venv."
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
source "$MNT/venv/bin/activate"
|
source "$MNT/venv/bin/activate"
|
||||||
|
|
||||||
pip install -r ${SRC}/requirements.txt --disable-pip-version-check
|
pip install -r ${SRC}/requirements.txt --disable-pip-version-check
|
||||||
|
@ -832,7 +734,6 @@ test $ret -eq 0 && gg_run ctest_release
|
||||||
|
|
||||||
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
||||||
test $ret -eq 0 && gg_run embd_bge_small
|
test $ret -eq 0 && gg_run embd_bge_small
|
||||||
test $ret -eq 0 && gg_run rerank_tiny
|
|
||||||
|
|
||||||
if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then
|
if [ -z ${GG_BUILD_CLOUD} ] || [ ${GG_BUILD_EXTRA_TESTS_0} ]; then
|
||||||
test $ret -eq 0 && gg_run test_scripts_debug
|
test $ret -eq 0 && gg_run test_scripts_debug
|
||||||
|
@ -840,7 +741,7 @@ if [ -z ${GG_BUILD_LOW_PERF} ]; then
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then
|
if [ -z ${GG_BUILD_VRAM_GB} ] || [ ${GG_BUILD_VRAM_GB} -ge 8 ]; then
|
||||||
if [ -z ${GG_BUILD_CUDA} ] && [ -z ${GG_BUILD_VULKAN} ]; then
|
if [ -z ${GG_BUILD_CUDA} ]; then
|
||||||
test $ret -eq 0 && gg_run pythia_1_4b
|
test $ret -eq 0 && gg_run pythia_1_4b
|
||||||
else
|
else
|
||||||
test $ret -eq 0 && gg_run pythia_2_8b
|
test $ret -eq 0 && gg_run pythia_2_8b
|
||||||
|
|
|
@ -1,16 +0,0 @@
|
||||||
set( CMAKE_SYSTEM_NAME Darwin )
|
|
||||||
set( CMAKE_SYSTEM_PROCESSOR arm64 )
|
|
||||||
|
|
||||||
set( target arm64-apple-darwin-macho )
|
|
||||||
|
|
||||||
set( CMAKE_C_COMPILER clang )
|
|
||||||
set( CMAKE_CXX_COMPILER clang++ )
|
|
||||||
|
|
||||||
set( CMAKE_C_COMPILER_TARGET ${target} )
|
|
||||||
set( CMAKE_CXX_COMPILER_TARGET ${target} )
|
|
||||||
|
|
||||||
set( arch_c_flags "-march=armv8.4-a -fvectorize -ffp-model=fast -fno-finite-math-only" )
|
|
||||||
set( warn_c_flags "-Wno-format -Wno-unused-variable -Wno-unused-function" )
|
|
||||||
|
|
||||||
set( CMAKE_C_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|
|
||||||
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags} ${warn_c_flags}" )
|
|
|
@ -44,7 +44,7 @@ if(MSVC)
|
||||||
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
|
set(BUILD_TARGET ${CMAKE_VS_PLATFORM_NAME})
|
||||||
else()
|
else()
|
||||||
execute_process(
|
execute_process(
|
||||||
COMMAND sh -c "\"$@\" --version | head -1" _ ${CMAKE_C_COMPILER}
|
COMMAND sh -c "$@ --version | head -1" _ ${CMAKE_C_COMPILER}
|
||||||
OUTPUT_VARIABLE OUT
|
OUTPUT_VARIABLE OUT
|
||||||
OUTPUT_STRIP_TRAILING_WHITESPACE
|
OUTPUT_STRIP_TRAILING_WHITESPACE
|
||||||
)
|
)
|
||||||
|
|
|
@ -1,33 +0,0 @@
|
||||||
function(llama_add_compile_flags)
|
|
||||||
if (LLAMA_FATAL_WARNINGS)
|
|
||||||
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
|
||||||
list(APPEND C_FLAGS -Werror)
|
|
||||||
list(APPEND CXX_FLAGS -Werror)
|
|
||||||
elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC")
|
|
||||||
add_compile_options(/WX)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
|
|
||||||
if (LLAMA_ALL_WARNINGS)
|
|
||||||
if (NOT MSVC)
|
|
||||||
list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes
|
|
||||||
-Werror=implicit-int -Werror=implicit-function-declaration)
|
|
||||||
|
|
||||||
list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn)
|
|
||||||
|
|
||||||
list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function)
|
|
||||||
|
|
||||||
list(APPEND C_FLAGS ${WARNING_FLAGS})
|
|
||||||
list(APPEND CXX_FLAGS ${WARNING_FLAGS})
|
|
||||||
|
|
||||||
ggml_get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION})
|
|
||||||
|
|
||||||
add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>"
|
|
||||||
"$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>")
|
|
||||||
else()
|
|
||||||
# todo : msvc
|
|
||||||
set(C_FLAGS "" PARENT_SCOPE)
|
|
||||||
set(CXX_FLAGS "" PARENT_SCOPE)
|
|
||||||
endif()
|
|
||||||
endif()
|
|
||||||
endfunction()
|
|
|
@ -3,28 +3,63 @@ set(LLAMA_BUILD_COMMIT @LLAMA_BUILD_COMMIT@)
|
||||||
set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
|
set(LLAMA_BUILD_NUMBER @LLAMA_BUILD_NUMBER@)
|
||||||
set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
|
set(LLAMA_SHARED_LIB @BUILD_SHARED_LIBS@)
|
||||||
|
|
||||||
|
set(GGML_BLAS @GGML_BLAS@)
|
||||||
|
set(GGML_CUDA @GGML_CUDA@)
|
||||||
|
set(GGML_METAL @GGML_METAL@)
|
||||||
|
set(GGML_HIPBLAS @GGML_HIPBLAS@)
|
||||||
|
set(GGML_ACCELERATE @GGML_ACCELERATE@)
|
||||||
|
|
||||||
@PACKAGE_INIT@
|
@PACKAGE_INIT@
|
||||||
|
|
||||||
set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@")
|
set_and_check(LLAMA_INCLUDE_DIR "@PACKAGE_LLAMA_INCLUDE_INSTALL_DIR@")
|
||||||
set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@")
|
set_and_check(LLAMA_LIB_DIR "@PACKAGE_LLAMA_LIB_INSTALL_DIR@")
|
||||||
set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@")
|
set_and_check(LLAMA_BIN_DIR "@PACKAGE_LLAMA_BIN_INSTALL_DIR@")
|
||||||
|
|
||||||
find_package(ggml REQUIRED HINTS ${LLAMA_LIB_DIR}/cmake)
|
# Ensure transient dependencies satisfied
|
||||||
|
|
||||||
|
find_package(Threads REQUIRED)
|
||||||
|
|
||||||
|
if (APPLE AND GGML_ACCELERATE)
|
||||||
|
find_library(ACCELERATE_FRAMEWORK Accelerate REQUIRED)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (GGML_BLAS)
|
||||||
|
find_package(BLAS REQUIRED)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (GGML_CUDA)
|
||||||
|
find_package(CUDAToolkit REQUIRED)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (GGML_METAL)
|
||||||
|
find_library(FOUNDATION_LIBRARY Foundation REQUIRED)
|
||||||
|
find_library(METAL_FRAMEWORK Metal REQUIRED)
|
||||||
|
find_library(METALKIT_FRAMEWORK MetalKit REQUIRED)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if (GGML_HIPBLAS)
|
||||||
|
find_package(hip REQUIRED)
|
||||||
|
find_package(hipblas REQUIRED)
|
||||||
|
find_package(rocblas REQUIRED)
|
||||||
|
endif()
|
||||||
|
|
||||||
find_library(llama_LIBRARY llama
|
find_library(llama_LIBRARY llama
|
||||||
REQUIRED
|
REQUIRED
|
||||||
HINTS ${LLAMA_LIB_DIR}
|
HINTS ${LLAMA_LIB_DIR})
|
||||||
NO_CMAKE_FIND_ROOT_PATH
|
|
||||||
)
|
set(_llama_link_deps "Threads::Threads" "@LLAMA_EXTRA_LIBS@")
|
||||||
|
set(_llama_transient_defines "@LLAMA_TRANSIENT_DEFINES@")
|
||||||
|
|
||||||
add_library(llama UNKNOWN IMPORTED)
|
add_library(llama UNKNOWN IMPORTED)
|
||||||
|
|
||||||
set_target_properties(llama
|
set_target_properties(llama
|
||||||
PROPERTIES
|
PROPERTIES
|
||||||
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
|
INTERFACE_INCLUDE_DIRECTORIES "${LLAMA_INCLUDE_DIR}"
|
||||||
INTERFACE_LINK_LIBRARIES "ggml::ggml;ggml::ggml-base;"
|
INTERFACE_LINK_LIBRARIES "${_llama_link_deps}"
|
||||||
|
INTERFACE_COMPILE_DEFINITIONS "${_llama_transient_defines}"
|
||||||
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
|
IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
|
||||||
IMPORTED_LOCATION "${llama_LIBRARY}"
|
IMPORTED_LOCATION "${llama_LIBRARY}"
|
||||||
INTERFACE_COMPILE_FEATURES c_std_90
|
INTERFACE_COMPILE_FEATURES cxx_std_11
|
||||||
POSITION_INDEPENDENT_CODE ON)
|
POSITION_INDEPENDENT_CODE ON )
|
||||||
|
|
||||||
check_required_components(Llama)
|
check_required_components(Llama)
|
||||||
|
|
|
@ -1,10 +1,10 @@
|
||||||
prefix=@CMAKE_INSTALL_PREFIX@
|
prefix=@CMAKE_INSTALL_PREFIX@
|
||||||
exec_prefix=@CMAKE_INSTALL_PREFIX@
|
exec_prefix=${prefix}
|
||||||
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
|
libdir=${exec_prefix}/lib
|
||||||
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@
|
includedir=${prefix}/include
|
||||||
|
|
||||||
Name: llama
|
Name: llama
|
||||||
Description: Port of Facebook's LLaMA model in C/C++
|
Description: Port of Facebook's LLaMA model in C/C++
|
||||||
Version: @LLAMA_INSTALL_VERSION@
|
Version: @PROJECT_VERSION@
|
||||||
Libs: -L${libdir} -lggml -lggml-base -lllama
|
Libs: -L${libdir} -lllama
|
||||||
Cflags: -I${includedir}
|
Cflags: -I${includedir}
|
||||||
|
|
|
@ -1,11 +0,0 @@
|
||||||
set( CMAKE_SYSTEM_NAME Windows )
|
|
||||||
set( CMAKE_SYSTEM_PROCESSOR x86_64 )
|
|
||||||
|
|
||||||
set( CMAKE_C_COMPILER clang )
|
|
||||||
set( CMAKE_CXX_COMPILER clang++ )
|
|
||||||
|
|
||||||
set( arch_c_flags "-march=native" )
|
|
||||||
|
|
||||||
set( CMAKE_C_FLAGS_INIT "${arch_c_flags}" )
|
|
||||||
set( CMAKE_CXX_FLAGS_INIT "${arch_c_flags}" )
|
|
||||||
|
|
|
@ -2,8 +2,6 @@
|
||||||
|
|
||||||
find_package(Threads REQUIRED)
|
find_package(Threads REQUIRED)
|
||||||
|
|
||||||
llama_add_compile_flags()
|
|
||||||
|
|
||||||
# Build info header
|
# Build info header
|
||||||
#
|
#
|
||||||
|
|
||||||
|
@ -53,28 +51,21 @@ endif()
|
||||||
set(TARGET common)
|
set(TARGET common)
|
||||||
|
|
||||||
add_library(${TARGET} STATIC
|
add_library(${TARGET} STATIC
|
||||||
arg.cpp
|
|
||||||
arg.h
|
|
||||||
base64.hpp
|
base64.hpp
|
||||||
chat.cpp
|
|
||||||
chat.hpp
|
|
||||||
chat-template.hpp
|
|
||||||
common.cpp
|
|
||||||
common.h
|
common.h
|
||||||
console.cpp
|
common.cpp
|
||||||
console.h
|
|
||||||
json-schema-to-grammar.cpp
|
|
||||||
json.hpp
|
|
||||||
llguidance.cpp
|
|
||||||
log.cpp
|
|
||||||
log.h
|
|
||||||
minja.hpp
|
|
||||||
ngram-cache.cpp
|
|
||||||
ngram-cache.h
|
|
||||||
sampling.cpp
|
|
||||||
sampling.h
|
sampling.h
|
||||||
speculative.cpp
|
sampling.cpp
|
||||||
speculative.h
|
console.h
|
||||||
|
console.cpp
|
||||||
|
grammar-parser.h
|
||||||
|
grammar-parser.cpp
|
||||||
|
json.hpp
|
||||||
|
json-schema-to-grammar.cpp
|
||||||
|
train.h
|
||||||
|
train.cpp
|
||||||
|
ngram-cache.h
|
||||||
|
ngram-cache.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
if (BUILD_SHARED_LIBS)
|
if (BUILD_SHARED_LIBS)
|
||||||
|
@ -86,39 +77,12 @@ set(LLAMA_COMMON_EXTRA_LIBS build_info)
|
||||||
# Use curl to download model url
|
# Use curl to download model url
|
||||||
if (LLAMA_CURL)
|
if (LLAMA_CURL)
|
||||||
find_package(CURL REQUIRED)
|
find_package(CURL REQUIRED)
|
||||||
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_CURL)
|
add_definitions(-DLLAMA_USE_CURL)
|
||||||
include_directories(${CURL_INCLUDE_DIRS})
|
include_directories(${CURL_INCLUDE_DIRS})
|
||||||
find_library(CURL_LIBRARY curl REQUIRED)
|
find_library(CURL_LIBRARY curl REQUIRED)
|
||||||
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY})
|
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} ${CURL_LIBRARY})
|
||||||
endif ()
|
endif ()
|
||||||
|
|
||||||
if (LLAMA_LLGUIDANCE)
|
|
||||||
include(ExternalProject)
|
|
||||||
set(LLGUIDANCE_SRC ${CMAKE_BINARY_DIR}/llguidance/source)
|
|
||||||
set(LLGUIDANCE_PATH ${LLGUIDANCE_SRC}/target/release)
|
|
||||||
ExternalProject_Add(llguidance_ext
|
|
||||||
GIT_REPOSITORY https://github.com/guidance-ai/llguidance
|
|
||||||
# v0.6.12:
|
|
||||||
GIT_TAG ced1c9023d47ec194fa977932d35ce65c2ebfc09
|
|
||||||
PREFIX ${CMAKE_BINARY_DIR}/llguidance
|
|
||||||
SOURCE_DIR ${LLGUIDANCE_SRC}
|
|
||||||
BUILD_IN_SOURCE TRUE
|
|
||||||
CONFIGURE_COMMAND ""
|
|
||||||
BUILD_COMMAND cargo build --release
|
|
||||||
INSTALL_COMMAND ""
|
|
||||||
BUILD_BYPRODUCTS ${LLGUIDANCE_PATH}/libllguidance.a ${LLGUIDANCE_PATH}/llguidance.h
|
|
||||||
UPDATE_COMMAND ""
|
|
||||||
)
|
|
||||||
target_compile_definitions(${TARGET} PUBLIC LLAMA_USE_LLGUIDANCE)
|
|
||||||
|
|
||||||
add_library(llguidance STATIC IMPORTED)
|
|
||||||
set_target_properties(llguidance PROPERTIES IMPORTED_LOCATION ${LLGUIDANCE_PATH}/libllguidance.a)
|
|
||||||
add_dependencies(llguidance llguidance_ext)
|
|
||||||
|
|
||||||
target_include_directories(${TARGET} PRIVATE ${LLGUIDANCE_PATH})
|
|
||||||
set(LLAMA_COMMON_EXTRA_LIBS ${LLAMA_COMMON_EXTRA_LIBS} llguidance)
|
|
||||||
endif ()
|
|
||||||
|
|
||||||
target_include_directories(${TARGET} PUBLIC .)
|
target_include_directories(${TARGET} PUBLIC .)
|
||||||
target_compile_features (${TARGET} PUBLIC cxx_std_17)
|
target_compile_features (${TARGET} PUBLIC cxx_std_11)
|
||||||
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
|
target_link_libraries (${TARGET} PRIVATE ${LLAMA_COMMON_EXTRA_LIBS} PUBLIC llama Threads::Threads)
|
||||||
|
|
2370
common/arg.cpp
2370
common/arg.cpp
File diff suppressed because it is too large
Load diff
80
common/arg.h
80
common/arg.h
|
@ -1,80 +0,0 @@
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
#include <set>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
//
|
|
||||||
// CLI argument parsing
|
|
||||||
//
|
|
||||||
|
|
||||||
struct common_arg {
|
|
||||||
std::set<enum llama_example> examples = {LLAMA_EXAMPLE_COMMON};
|
|
||||||
std::set<enum llama_example> excludes = {};
|
|
||||||
std::vector<const char *> args;
|
|
||||||
const char * value_hint = nullptr; // help text or example for arg value
|
|
||||||
const char * value_hint_2 = nullptr; // for second arg value
|
|
||||||
const char * env = nullptr;
|
|
||||||
std::string help;
|
|
||||||
bool is_sparam = false; // is current arg a sampling param?
|
|
||||||
void (*handler_void) (common_params & params) = nullptr;
|
|
||||||
void (*handler_string) (common_params & params, const std::string &) = nullptr;
|
|
||||||
void (*handler_str_str)(common_params & params, const std::string &, const std::string &) = nullptr;
|
|
||||||
void (*handler_int) (common_params & params, int) = nullptr;
|
|
||||||
|
|
||||||
common_arg(
|
|
||||||
const std::initializer_list<const char *> & args,
|
|
||||||
const char * value_hint,
|
|
||||||
const std::string & help,
|
|
||||||
void (*handler)(common_params & params, const std::string &)
|
|
||||||
) : args(args), value_hint(value_hint), help(help), handler_string(handler) {}
|
|
||||||
|
|
||||||
common_arg(
|
|
||||||
const std::initializer_list<const char *> & args,
|
|
||||||
const char * value_hint,
|
|
||||||
const std::string & help,
|
|
||||||
void (*handler)(common_params & params, int)
|
|
||||||
) : args(args), value_hint(value_hint), help(help), handler_int(handler) {}
|
|
||||||
|
|
||||||
common_arg(
|
|
||||||
const std::initializer_list<const char *> & args,
|
|
||||||
const std::string & help,
|
|
||||||
void (*handler)(common_params & params)
|
|
||||||
) : args(args), help(help), handler_void(handler) {}
|
|
||||||
|
|
||||||
// support 2 values for arg
|
|
||||||
common_arg(
|
|
||||||
const std::initializer_list<const char *> & args,
|
|
||||||
const char * value_hint,
|
|
||||||
const char * value_hint_2,
|
|
||||||
const std::string & help,
|
|
||||||
void (*handler)(common_params & params, const std::string &, const std::string &)
|
|
||||||
) : args(args), value_hint(value_hint), value_hint_2(value_hint_2), help(help), handler_str_str(handler) {}
|
|
||||||
|
|
||||||
common_arg & set_examples(std::initializer_list<enum llama_example> examples);
|
|
||||||
common_arg & set_excludes(std::initializer_list<enum llama_example> excludes);
|
|
||||||
common_arg & set_env(const char * env);
|
|
||||||
common_arg & set_sparam();
|
|
||||||
bool in_example(enum llama_example ex);
|
|
||||||
bool is_exclude(enum llama_example ex);
|
|
||||||
bool get_value_from_env(std::string & output);
|
|
||||||
bool has_value_from_env();
|
|
||||||
std::string to_string();
|
|
||||||
};
|
|
||||||
|
|
||||||
struct common_params_context {
|
|
||||||
enum llama_example ex = LLAMA_EXAMPLE_COMMON;
|
|
||||||
common_params & params;
|
|
||||||
std::vector<common_arg> options;
|
|
||||||
void(*print_usage)(int, char **) = nullptr;
|
|
||||||
common_params_context(common_params & params) : params(params) {}
|
|
||||||
};
|
|
||||||
|
|
||||||
// parse input arguments from CLI
|
|
||||||
// if one argument has invalid value, it will automatically display usage of the specific argument (and not the full usage message)
|
|
||||||
bool common_params_parse(int argc, char ** argv, common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
|
|
||||||
|
|
||||||
// function to be used by test-arg-parser
|
|
||||||
common_params_context common_params_parser_init(common_params & params, llama_example ex, void(*print_usage)(int, char **) = nullptr);
|
|
|
@ -1,529 +0,0 @@
|
||||||
/*
|
|
||||||
Copyright 2024 Google LLC
|
|
||||||
|
|
||||||
Use of this source code is governed by an MIT-style
|
|
||||||
license that can be found in the LICENSE file or at
|
|
||||||
https://opensource.org/licenses/MIT.
|
|
||||||
*/
|
|
||||||
// SPDX-License-Identifier: MIT
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "minja.hpp"
|
|
||||||
#include <json.hpp>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
using json = nlohmann::ordered_json;
|
|
||||||
|
|
||||||
namespace minja {
|
|
||||||
|
|
||||||
struct chat_template_caps {
|
|
||||||
bool supports_tools = false;
|
|
||||||
bool supports_tool_calls = false;
|
|
||||||
bool supports_tool_responses = false;
|
|
||||||
bool supports_system_role = false;
|
|
||||||
bool supports_parallel_tool_calls = false;
|
|
||||||
bool supports_tool_call_id = false;
|
|
||||||
// meta-llama/Llama-3.1-8B-Instruct expects arguments to be an object.
|
|
||||||
// Most other templates (and OpenAI's API) expect the arguments object to be stringified.
|
|
||||||
bool requires_object_arguments = false;
|
|
||||||
// CohereForAI/c4ai-command-r-plus simple variant
|
|
||||||
bool requires_non_null_content = false;
|
|
||||||
// MiniMaxAI/MiniMax-Text-01 special
|
|
||||||
bool requires_typed_content = false;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct chat_template_inputs {
|
|
||||||
nlohmann::ordered_json messages;
|
|
||||||
nlohmann::ordered_json tools;
|
|
||||||
bool add_generation_prompt = true;
|
|
||||||
nlohmann::ordered_json extra_context;
|
|
||||||
std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
|
|
||||||
};
|
|
||||||
|
|
||||||
struct chat_template_options {
|
|
||||||
bool apply_polyfills = true;
|
|
||||||
bool use_bos_token = true;
|
|
||||||
bool use_eos_token = true;
|
|
||||||
bool define_strftime_now = true;
|
|
||||||
|
|
||||||
bool polyfill_tools = true;
|
|
||||||
bool polyfill_tool_call_examples = true;
|
|
||||||
bool polyfill_tool_calls = true;
|
|
||||||
bool polyfill_tool_responses = true;
|
|
||||||
bool polyfill_system_role = true;
|
|
||||||
bool polyfill_object_arguments = true;
|
|
||||||
bool polyfill_typed_content = true;
|
|
||||||
};
|
|
||||||
|
|
||||||
class chat_template {
|
|
||||||
|
|
||||||
private:
|
|
||||||
chat_template_caps caps_;
|
|
||||||
std::string source_;
|
|
||||||
std::string bos_token_;
|
|
||||||
std::string eos_token_;
|
|
||||||
std::shared_ptr<minja::TemplateNode> template_root_;
|
|
||||||
std::string tool_call_example_;
|
|
||||||
|
|
||||||
std::string try_raw_render(
|
|
||||||
const nlohmann::ordered_json & messages,
|
|
||||||
const nlohmann::ordered_json & tools,
|
|
||||||
bool add_generation_prompt,
|
|
||||||
const nlohmann::ordered_json & extra_context = nlohmann::ordered_json()) const
|
|
||||||
{
|
|
||||||
try {
|
|
||||||
chat_template_inputs inputs;
|
|
||||||
inputs.messages = messages;
|
|
||||||
inputs.tools = tools;
|
|
||||||
inputs.add_generation_prompt = add_generation_prompt;
|
|
||||||
inputs.extra_context = extra_context;
|
|
||||||
// Use fixed date for tests
|
|
||||||
inputs.now = std::chrono::system_clock::from_time_t(0);
|
|
||||||
|
|
||||||
chat_template_options opts;
|
|
||||||
opts.apply_polyfills = false;
|
|
||||||
|
|
||||||
auto prompt = apply(inputs, opts);
|
|
||||||
// fprintf(stderr, "try_raw_render: %s\n", prompt.c_str());
|
|
||||||
return prompt;
|
|
||||||
} catch (const std::exception & e) {
|
|
||||||
// fprintf(stderr, "try_raw_render error: %s\n", e.what());
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public:
|
|
||||||
|
|
||||||
chat_template(const std::string & source, const std::string & bos_token, const std::string & eos_token)
|
|
||||||
: source_(source), bos_token_(bos_token), eos_token_(eos_token)
|
|
||||||
{
|
|
||||||
template_root_ = minja::Parser::parse(source_, {
|
|
||||||
/* .trim_blocks = */ true,
|
|
||||||
/* .lstrip_blocks = */ true,
|
|
||||||
/* .keep_trailing_newline = */ false,
|
|
||||||
});
|
|
||||||
|
|
||||||
auto contains = [](const std::string & haystack, const std::string & needle) {
|
|
||||||
return haystack.find(needle) != std::string::npos;
|
|
||||||
};
|
|
||||||
|
|
||||||
const std::string user_needle = "<User Needle>";
|
|
||||||
const std::string sys_needle = "<System Needle>";
|
|
||||||
const json dummy_str_user_msg = {{"role", "user"}, {"content", user_needle}};
|
|
||||||
const json dummy_typed_user_msg = {{"role", "user"}, {"content", json::array({{{"type", "text"}, {"text", user_needle}}})}};
|
|
||||||
|
|
||||||
caps_.requires_typed_content =
|
|
||||||
!contains(try_raw_render(json::array({dummy_str_user_msg}), {}, false), user_needle)
|
|
||||||
&& contains(try_raw_render(json::array({dummy_typed_user_msg}), {}, false), user_needle);
|
|
||||||
|
|
||||||
const auto dummy_user_msg = caps_.requires_typed_content
|
|
||||||
? dummy_typed_user_msg
|
|
||||||
: dummy_str_user_msg;
|
|
||||||
const json needle_system_msg = {
|
|
||||||
{"role", "system"},
|
|
||||||
{"content", caps_.requires_typed_content ? json::array({{{"type", "text"}, {"text", sys_needle}}}) : json(sys_needle)},
|
|
||||||
};
|
|
||||||
|
|
||||||
caps_.supports_system_role = contains(try_raw_render({needle_system_msg, dummy_user_msg,}, {}, false), sys_needle);
|
|
||||||
|
|
||||||
auto out = try_raw_render(json::array({
|
|
||||||
dummy_user_msg
|
|
||||||
}), json::array({
|
|
||||||
{
|
|
||||||
{"name", "some_tool"},
|
|
||||||
{"type", "function"},
|
|
||||||
{"function", {
|
|
||||||
{"name", "some_tool"},
|
|
||||||
{"description", "Some tool."},
|
|
||||||
{"parameters", {
|
|
||||||
{"type", "object"},
|
|
||||||
{"properties", {
|
|
||||||
{"arg", {
|
|
||||||
{"type", "string"},
|
|
||||||
{"description", "Some argument."},
|
|
||||||
}},
|
|
||||||
}},
|
|
||||||
{"required", json::array({ "arg" })},
|
|
||||||
}},
|
|
||||||
}},
|
|
||||||
},
|
|
||||||
}), false);
|
|
||||||
caps_.supports_tools = contains(out, "some_tool");
|
|
||||||
|
|
||||||
auto make_tool_calls_msg = [&](const json & tool_calls) {
|
|
||||||
return json {
|
|
||||||
{"role", "assistant"},
|
|
||||||
{"content", nullptr},
|
|
||||||
{"tool_calls", tool_calls},
|
|
||||||
};
|
|
||||||
};
|
|
||||||
auto make_tool_call = [](const std::string & tool_name, const json & arguments) {
|
|
||||||
return json {
|
|
||||||
{"id", "call_1___"},
|
|
||||||
{"type", "function"},
|
|
||||||
{"function", {
|
|
||||||
{"arguments", arguments},
|
|
||||||
{"name", tool_name},
|
|
||||||
}},
|
|
||||||
};
|
|
||||||
};
|
|
||||||
const json dummy_args_obj {{"argument_needle", "print('Hello, World!')"}};
|
|
||||||
|
|
||||||
// Note: the arguments are rendered in both cases, but may be double-escaped, which we don't want.
|
|
||||||
out = try_raw_render(json::array({
|
|
||||||
dummy_user_msg,
|
|
||||||
make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj.dump())})),
|
|
||||||
}), {}, false);
|
|
||||||
auto tool_call_renders_str_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':");
|
|
||||||
out = try_raw_render(json::array({
|
|
||||||
dummy_user_msg,
|
|
||||||
make_tool_calls_msg(json::array({make_tool_call("ipython", dummy_args_obj)})),
|
|
||||||
}), {}, false);
|
|
||||||
auto tool_call_renders_obj_arguments = contains(out, "\"argument_needle\":") || contains(out, "'argument_needle':");
|
|
||||||
|
|
||||||
caps_.supports_tool_calls = tool_call_renders_str_arguments || tool_call_renders_obj_arguments;
|
|
||||||
caps_.requires_object_arguments = !tool_call_renders_str_arguments && tool_call_renders_obj_arguments;
|
|
||||||
auto out_empty = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", ""}}}), {}, false);
|
|
||||||
auto out_null = try_raw_render(json::array({dummy_user_msg, {{"role", "assistant"}, {"content", nullptr}}}), {}, false);
|
|
||||||
caps_.requires_non_null_content = contains(out_empty, user_needle) && !contains(out_null, user_needle);
|
|
||||||
|
|
||||||
if (caps_.supports_tool_calls) {
|
|
||||||
auto dummy_args = caps_.requires_object_arguments ? dummy_args_obj : json(dummy_args_obj.dump());
|
|
||||||
auto tc1 = make_tool_call("test_tool1", dummy_args);
|
|
||||||
auto tc2 = make_tool_call("test_tool2", dummy_args);
|
|
||||||
auto out = try_raw_render(json::array({
|
|
||||||
dummy_user_msg,
|
|
||||||
make_tool_calls_msg(json::array({tc1, tc2})),
|
|
||||||
}), {}, false);
|
|
||||||
caps_.supports_parallel_tool_calls = contains(out, "test_tool1") && contains(out, "test_tool2");
|
|
||||||
|
|
||||||
out = try_raw_render(json::array({
|
|
||||||
dummy_user_msg,
|
|
||||||
make_tool_calls_msg(json::array({tc1})),
|
|
||||||
{
|
|
||||||
{"role", "tool"},
|
|
||||||
{"name", "test_tool1"},
|
|
||||||
{"content", "Some response!"},
|
|
||||||
{"tool_call_id", "call_911_"},
|
|
||||||
}
|
|
||||||
}), {}, false);
|
|
||||||
caps_.supports_tool_responses = contains(out, "Some response!");
|
|
||||||
caps_.supports_tool_call_id = contains(out, "call_911_");
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (!caps_.supports_tools) {
|
|
||||||
const json user_msg {
|
|
||||||
{"role", "user"},
|
|
||||||
{"content", "Hey"},
|
|
||||||
};
|
|
||||||
const json args {
|
|
||||||
{"arg1", "some_value"},
|
|
||||||
};
|
|
||||||
const json tool_call_msg {
|
|
||||||
{"role", "assistant"},
|
|
||||||
{"content", nullptr},
|
|
||||||
{"tool_calls", json::array({
|
|
||||||
{
|
|
||||||
// TODO: detect if requires numerical id or fixed length == 6 like Nemo
|
|
||||||
{"id", "call_1___"},
|
|
||||||
{"type", "function"},
|
|
||||||
{"function", {
|
|
||||||
{"name", "tool_name"},
|
|
||||||
{"arguments", (caps_.requires_object_arguments ? args : json(minja::Value(args).dump(-1, /* to_json= */ true)))},
|
|
||||||
}},
|
|
||||||
},
|
|
||||||
})},
|
|
||||||
};
|
|
||||||
std::string prefix, full;
|
|
||||||
{
|
|
||||||
chat_template_inputs inputs;
|
|
||||||
inputs.messages = json::array({user_msg});
|
|
||||||
inputs.add_generation_prompt = true;
|
|
||||||
prefix = apply(inputs);
|
|
||||||
}
|
|
||||||
{
|
|
||||||
chat_template_inputs inputs;
|
|
||||||
inputs.messages = json::array({user_msg, tool_call_msg});
|
|
||||||
inputs.add_generation_prompt = false;
|
|
||||||
full = apply(inputs);
|
|
||||||
}
|
|
||||||
auto eos_pos_last = full.rfind(eos_token_);
|
|
||||||
if (eos_pos_last == prefix.size() - eos_token_.size() ||
|
|
||||||
(full[full.size() - 1] == '\n' && (eos_pos_last == full.size() - eos_token_.size() - 1))) {
|
|
||||||
full = full.substr(0, eos_pos_last);
|
|
||||||
}
|
|
||||||
size_t common_prefix_length = 0;
|
|
||||||
for (size_t i = 0; i < prefix.size() && i < full.size(); ++i) {
|
|
||||||
if (prefix[i] != full[i]) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (prefix[i] == '<') {
|
|
||||||
// DeepSeek R1's template (as of 20250209) adds a trailing <think> if add_generation_prompt,
|
|
||||||
// but it removes thinking tags for past messages.
|
|
||||||
// The prefix and full strings diverge at <think> vs. <|tool▁calls▁begin|>, we avoid consuming the leading <.
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
common_prefix_length = i + 1;
|
|
||||||
}
|
|
||||||
auto example = full.substr(common_prefix_length);
|
|
||||||
if (example.find("tool_name") == std::string::npos && example.find("some_value") == std::string::npos) {
|
|
||||||
fprintf(stderr, "Failed to infer a tool call example (possible template bug)\n");
|
|
||||||
} else {
|
|
||||||
tool_call_example_ = example;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (const std::exception & e) {
|
|
||||||
fprintf(stderr, "Failed to generate tool call example: %s\n", e.what());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const std::string & source() const { return source_; }
|
|
||||||
const std::string & bos_token() const { return bos_token_; }
|
|
||||||
const std::string & eos_token() const { return eos_token_; }
|
|
||||||
const chat_template_caps & original_caps() const { return caps_; }
|
|
||||||
|
|
||||||
// Deprecated, please use the form with chat_template_inputs and chat_template_options
|
|
||||||
std::string apply(
|
|
||||||
const nlohmann::ordered_json & messages,
|
|
||||||
const nlohmann::ordered_json & tools,
|
|
||||||
bool add_generation_prompt,
|
|
||||||
const nlohmann::ordered_json & extra_context = nlohmann::ordered_json(),
|
|
||||||
bool apply_polyfills = true)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "[%s] Deprecated!\n", __func__);
|
|
||||||
chat_template_inputs inputs;
|
|
||||||
inputs.messages = messages;
|
|
||||||
inputs.tools = tools;
|
|
||||||
inputs.add_generation_prompt = add_generation_prompt;
|
|
||||||
inputs.extra_context = extra_context;
|
|
||||||
inputs.now = std::chrono::system_clock::now();
|
|
||||||
|
|
||||||
chat_template_options opts;
|
|
||||||
opts.apply_polyfills = apply_polyfills;
|
|
||||||
|
|
||||||
return apply(inputs, opts);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string apply(
|
|
||||||
const chat_template_inputs & inputs,
|
|
||||||
const chat_template_options & opts = chat_template_options()) const
|
|
||||||
{
|
|
||||||
json actual_messages;
|
|
||||||
|
|
||||||
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
|
|
||||||
auto has_tool_calls = false;
|
|
||||||
auto has_tool_responses = false;
|
|
||||||
auto has_string_content = false;
|
|
||||||
for (const auto & message : inputs.messages) {
|
|
||||||
if (message.contains("tool_calls") && !message["tool_calls"].is_null()) {
|
|
||||||
has_tool_calls = true;
|
|
||||||
}
|
|
||||||
if (message.contains("role") && message["role"] == "tool") {
|
|
||||||
has_tool_responses = true;
|
|
||||||
}
|
|
||||||
if (message.contains("content") && message["content"].is_string()) {
|
|
||||||
has_string_content = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
auto polyfill_system_role = opts.polyfill_system_role && !caps_.supports_system_role;
|
|
||||||
auto polyfill_tools = opts.polyfill_tools && has_tools && !caps_.supports_tools;
|
|
||||||
auto polyfill_tool_call_example = polyfill_tools && opts.polyfill_tool_call_examples;
|
|
||||||
auto polyfill_tool_calls = opts.polyfill_tool_calls && has_tool_calls && !caps_.supports_tool_calls;
|
|
||||||
auto polyfill_tool_responses = opts.polyfill_tool_responses && has_tool_responses && !caps_.supports_tool_responses;
|
|
||||||
auto polyfill_object_arguments = opts.polyfill_object_arguments && has_tool_calls && caps_.requires_object_arguments;
|
|
||||||
auto polyfill_typed_content = opts.polyfill_typed_content && has_string_content && caps_.requires_typed_content;
|
|
||||||
|
|
||||||
auto needs_polyfills = opts.apply_polyfills && (false
|
|
||||||
|| polyfill_system_role
|
|
||||||
|| polyfill_tools
|
|
||||||
|| polyfill_tool_calls
|
|
||||||
|| polyfill_tool_responses
|
|
||||||
|| polyfill_object_arguments
|
|
||||||
|| polyfill_typed_content
|
|
||||||
);
|
|
||||||
|
|
||||||
if (needs_polyfills) {
|
|
||||||
actual_messages = json::array();
|
|
||||||
|
|
||||||
auto add_message = [&](const json & msg) {
|
|
||||||
if (polyfill_typed_content && msg.contains("content") && !msg.at("content").is_null() && msg.at("content").is_string()) {
|
|
||||||
actual_messages.push_back({
|
|
||||||
{"role", msg.at("role")},
|
|
||||||
{"content", {{
|
|
||||||
{"type", "text"},
|
|
||||||
{"text", msg.at("content")},
|
|
||||||
}}},
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
actual_messages.push_back(msg);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
std::string pending_system;
|
|
||||||
auto flush_sys = [&]() {
|
|
||||||
if (!pending_system.empty()) {
|
|
||||||
add_message({
|
|
||||||
{"role", "user"},
|
|
||||||
{"content", pending_system},
|
|
||||||
});
|
|
||||||
pending_system.clear();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
json adjusted_messages;
|
|
||||||
if (polyfill_tools) {
|
|
||||||
adjusted_messages = add_system(inputs.messages,
|
|
||||||
"You can call any of the following tools to satisfy the user's requests: " + minja::Value(inputs.tools).dump(2, /* to_json= */ true) +
|
|
||||||
(!polyfill_tool_call_example || tool_call_example_.empty() ? "" : "\n\nExample tool call syntax:\n\n" + tool_call_example_ + "\n\n"));
|
|
||||||
} else {
|
|
||||||
adjusted_messages = inputs.messages;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const auto & message_ : adjusted_messages) {
|
|
||||||
auto message = message_;
|
|
||||||
if (!message.contains("role") || !message.contains("content")) {
|
|
||||||
throw std::runtime_error("message must have 'role' and 'content' fields: " + message.dump());
|
|
||||||
}
|
|
||||||
std::string role = message.at("role");
|
|
||||||
|
|
||||||
if (message.contains("tool_calls")) {
|
|
||||||
if (polyfill_object_arguments || polyfill_tool_calls) {
|
|
||||||
for (auto & tool_call : message.at("tool_calls")) {
|
|
||||||
if (tool_call["type"] == "function") {
|
|
||||||
auto & function = tool_call.at("function");
|
|
||||||
auto & arguments = function.at("arguments");
|
|
||||||
if (arguments.is_string()) {
|
|
||||||
try {
|
|
||||||
arguments = json::parse(arguments.get<std::string>());
|
|
||||||
} catch (const std::exception & ecvt) {
|
|
||||||
fprintf(stderr, "Failed to parse arguments: %s\n", ecvt.what());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (polyfill_tool_calls) {
|
|
||||||
auto content = message.at("content");
|
|
||||||
auto tool_calls = json::array();
|
|
||||||
for (const auto & tool_call : message.at("tool_calls")) {
|
|
||||||
if (tool_call.at("type") != "function") {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
const auto & function = tool_call.at("function");
|
|
||||||
auto tc = json {
|
|
||||||
{"name", function.at("name")},
|
|
||||||
{"arguments", function.at("arguments")},
|
|
||||||
};
|
|
||||||
if (tool_call.contains("id")) {
|
|
||||||
tc["id"] = tool_call["id"];
|
|
||||||
}
|
|
||||||
tool_calls.push_back(tc);
|
|
||||||
}
|
|
||||||
auto obj = json {
|
|
||||||
{"tool_calls", tool_calls},
|
|
||||||
};
|
|
||||||
if (!content.is_null() && content != "") {
|
|
||||||
obj["content"] = content;
|
|
||||||
}
|
|
||||||
message["content"] = obj.dump(2);
|
|
||||||
message.erase("tool_calls");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (polyfill_tool_responses && role == "tool") {
|
|
||||||
message["role"] = "user";
|
|
||||||
auto obj = json {
|
|
||||||
{"tool_response", {
|
|
||||||
{"content", message.at("content")},
|
|
||||||
}},
|
|
||||||
};
|
|
||||||
if (message.contains("name")) {
|
|
||||||
obj["tool_response"]["name"] = message.at("name");
|
|
||||||
}
|
|
||||||
if (message.contains("tool_call_id")) {
|
|
||||||
obj["tool_response"]["tool_call_id"] = message.at("tool_call_id");
|
|
||||||
}
|
|
||||||
message["content"] = obj.dump(2);
|
|
||||||
message.erase("name");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!message["content"].is_null() && polyfill_system_role) {
|
|
||||||
std::string content = message.at("content");
|
|
||||||
if (role == "system") {
|
|
||||||
if (!pending_system.empty()) pending_system += "\n";
|
|
||||||
pending_system += content;
|
|
||||||
continue;
|
|
||||||
} else {
|
|
||||||
if (role == "user") {
|
|
||||||
if (!pending_system.empty()) {
|
|
||||||
message["content"] = pending_system + (content.empty() ? "" : "\n" + content);
|
|
||||||
pending_system.clear();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
flush_sys();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
add_message(message);
|
|
||||||
}
|
|
||||||
flush_sys();
|
|
||||||
} else {
|
|
||||||
actual_messages = inputs.messages;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto context = minja::Context::make(json({
|
|
||||||
{"messages", actual_messages},
|
|
||||||
{"add_generation_prompt", inputs.add_generation_prompt},
|
|
||||||
}));
|
|
||||||
context->set("bos_token", opts.use_bos_token ? bos_token_ : "");
|
|
||||||
context->set("eos_token", opts.use_eos_token ? eos_token_ : "");
|
|
||||||
if (opts.define_strftime_now) {
|
|
||||||
auto now = inputs.now;
|
|
||||||
context->set("strftime_now", Value::callable([now](const std::shared_ptr<minja::Context> &, minja::ArgumentsValue & args) {
|
|
||||||
args.expectArgs("strftime_now", {1, 1}, {0, 0});
|
|
||||||
auto format = args.args[0].get<std::string>();
|
|
||||||
|
|
||||||
auto time = std::chrono::system_clock::to_time_t(now);
|
|
||||||
auto local_time = *std::localtime(&time);
|
|
||||||
std::ostringstream ss;
|
|
||||||
ss << std::put_time(&local_time, format.c_str());
|
|
||||||
return ss.str();
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
if (!inputs.tools.is_null()) {
|
|
||||||
context->set("tools", minja::Value(inputs.tools));
|
|
||||||
}
|
|
||||||
if (!inputs.extra_context.is_null()) {
|
|
||||||
for (auto & kv : inputs.extra_context.items()) {
|
|
||||||
context->set(kv.key(), minja::Value(kv.value()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
auto ret = template_root_->render(context);
|
|
||||||
// fprintf(stderr, "actual_messages: %s\n", actual_messages.dump(2).c_str());
|
|
||||||
// fprintf(stderr, "apply: %s\n\n", ret.c_str());
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static nlohmann::ordered_json add_system(const nlohmann::ordered_json & messages, const std::string & system_prompt) {
|
|
||||||
json messages_with_system = messages;
|
|
||||||
|
|
||||||
if (messages_with_system.size() > 0 && messages_with_system[0].at("role") == "system") {
|
|
||||||
std::string existing_system = messages_with_system.at(0).at("content");
|
|
||||||
messages_with_system[0] = json {
|
|
||||||
{"role", "system"},
|
|
||||||
{"content", existing_system + "\n\n" + system_prompt},
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
messages_with_system.insert(messages_with_system.begin(), json {
|
|
||||||
{"role", "system"},
|
|
||||||
{"content", system_prompt},
|
|
||||||
});
|
|
||||||
}
|
|
||||||
return messages_with_system;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
} // namespace minja
|
|
966
common/chat.cpp
966
common/chat.cpp
|
@ -1,966 +0,0 @@
|
||||||
#include "chat.hpp"
|
|
||||||
#include "chat-template.hpp"
|
|
||||||
#include "json-schema-to-grammar.h"
|
|
||||||
#include "log.h"
|
|
||||||
#include "minja.hpp"
|
|
||||||
|
|
||||||
std::string common_chat_format_name(common_chat_format format) {
|
|
||||||
switch (format) {
|
|
||||||
case COMMON_CHAT_FORMAT_CONTENT_ONLY: return "Content-only";
|
|
||||||
case COMMON_CHAT_FORMAT_GENERIC: return "Generic";
|
|
||||||
case COMMON_CHAT_FORMAT_MISTRAL_NEMO: return "Mistral Nemo";
|
|
||||||
case COMMON_CHAT_FORMAT_LLAMA_3_X: return "Llama 3.x";
|
|
||||||
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS: return "Llama 3.x with builtin tools";
|
|
||||||
case COMMON_CHAT_FORMAT_DEEPSEEK_R1: return "DeepSeek R1";
|
|
||||||
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
|
|
||||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
|
|
||||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
|
|
||||||
case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
|
|
||||||
case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
|
|
||||||
default:
|
|
||||||
throw std::runtime_error("Unknown chat format");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const common_grammar_options grammar_options {
|
|
||||||
/* .dotall = */ false,
|
|
||||||
/* .compact_spaces = */ false,
|
|
||||||
// /* .compact_spaces = */ true,
|
|
||||||
};
|
|
||||||
|
|
||||||
static bool parse_json(std::string::const_iterator & it, const std::string::const_iterator & end, json & out) {
|
|
||||||
// // https://json.nlohmann.me/features/parsing/sax_interface/
|
|
||||||
struct json_error_locator : public nlohmann::json_sax<json> {
|
|
||||||
std::size_t position;
|
|
||||||
bool found_error;
|
|
||||||
|
|
||||||
json_error_locator() : position(0), found_error(false) {}
|
|
||||||
|
|
||||||
bool parse_error(std::size_t position, const std::string &, const json::exception &) override {
|
|
||||||
this->position = position - 1;
|
|
||||||
this->found_error = true;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
bool null() override { return true; }
|
|
||||||
bool boolean(bool) override { return true; }
|
|
||||||
bool number_integer(number_integer_t) override { return true; }
|
|
||||||
bool number_unsigned(number_unsigned_t) override { return true; }
|
|
||||||
bool number_float(number_float_t, const string_t &) override { return true; }
|
|
||||||
bool string(string_t &) override { return true; }
|
|
||||||
bool binary(binary_t &) override { return true; }
|
|
||||||
bool start_object(std::size_t) override { return true; }
|
|
||||||
bool key(string_t &) override { return true; }
|
|
||||||
bool end_object() override { return true; }
|
|
||||||
bool start_array(std::size_t) override { return true; }
|
|
||||||
bool end_array() override { return true; }
|
|
||||||
};
|
|
||||||
json_error_locator err_loc;
|
|
||||||
json::sax_parse(it, end, &err_loc);
|
|
||||||
|
|
||||||
std::string::const_iterator temptative_end;
|
|
||||||
if (err_loc.found_error) {
|
|
||||||
temptative_end = it + err_loc.position;
|
|
||||||
} else {
|
|
||||||
temptative_end = end;
|
|
||||||
}
|
|
||||||
std::string json_sub {it, temptative_end};
|
|
||||||
try {
|
|
||||||
out = json::parse(json_sub);
|
|
||||||
it = temptative_end;
|
|
||||||
return true;
|
|
||||||
} catch (const std::exception &) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Takes a prefix regex that must have 1 group to capture the function name, a closing suffix, and expects json parameters in between.
|
|
||||||
* Aggregates the prefix, suffix and in-between text into the content.
|
|
||||||
*/
|
|
||||||
static common_chat_msg parse_json_tool_calls(
|
|
||||||
const std::string& input,
|
|
||||||
const std::optional<std::regex> & trigger_opt,
|
|
||||||
const std::regex & function_regex,
|
|
||||||
const std::regex & close_regex) {
|
|
||||||
std::smatch match;
|
|
||||||
|
|
||||||
common_chat_msg result;
|
|
||||||
result.role = "assistant";
|
|
||||||
|
|
||||||
|
|
||||||
auto end = input.end();
|
|
||||||
auto it = input.begin();
|
|
||||||
|
|
||||||
if (trigger_opt) {
|
|
||||||
if (!std::regex_search(it, end, match, *trigger_opt)) {
|
|
||||||
result.content = input;
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
result.content = match.prefix().str();
|
|
||||||
it = match.suffix().first;
|
|
||||||
}
|
|
||||||
|
|
||||||
while (it != end) {
|
|
||||||
std::sregex_iterator rend;
|
|
||||||
std::sregex_iterator rit(it, end, function_regex);
|
|
||||||
if (rit == rend) {
|
|
||||||
fprintf(stderr, "No more tool calls found\n");
|
|
||||||
result.content += std::string(it, end);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
auto name = rit->str(1);
|
|
||||||
result.content += std::string(it, rit->prefix().second);
|
|
||||||
it = rit->suffix().first;
|
|
||||||
|
|
||||||
json arguments;
|
|
||||||
if (!parse_json(it, end, arguments)) {
|
|
||||||
throw std::runtime_error("Failed to parse json tool call arguments");
|
|
||||||
}
|
|
||||||
if (!std::regex_search(it, end, match, close_regex)) {
|
|
||||||
throw std::runtime_error("Malformed input, missing closing pattern");
|
|
||||||
}
|
|
||||||
it = match.suffix().first;
|
|
||||||
result.tool_calls.push_back({name, arguments.is_string() ? arguments.get<std::string>() : arguments.dump(), /* id= */ ""});
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
static common_chat_msg parse_prefixed_json_tool_call_array(const std::string& input, const std::string & prefix, size_t rstrip_prefix = 0) {
|
|
||||||
auto content_end = input.find(prefix);
|
|
||||||
size_t tc_start = std::string::npos;
|
|
||||||
|
|
||||||
common_chat_msg result;
|
|
||||||
result.role = "assistant";
|
|
||||||
const auto process_tool_calls = [&](const json & tool_calls) {
|
|
||||||
for (const auto & tool_call : tool_calls) {
|
|
||||||
const auto & arguments = tool_call["arguments"];
|
|
||||||
result.tool_calls.push_back({
|
|
||||||
tool_call["name"],
|
|
||||||
arguments.is_string() ? arguments.get<std::string>() : arguments.dump(),
|
|
||||||
tool_call.contains("id") ? tool_call["id"] : "",
|
|
||||||
});
|
|
||||||
}
|
|
||||||
};
|
|
||||||
if (content_end == std::string::npos) {
|
|
||||||
result.content = input;
|
|
||||||
} else {
|
|
||||||
tc_start = content_end + prefix.size() - rstrip_prefix;
|
|
||||||
result.content = input.substr(0, content_end);
|
|
||||||
auto tool_calls = json::parse(input.substr(tc_start));
|
|
||||||
process_tool_calls(tool_calls);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void foreach_function(const json & tools, const std::function<void(const json &)> & fn) {
|
|
||||||
for (const auto & tool : tools) {
|
|
||||||
if (!tool.contains("type") || tool["type"] != "function" || !tool.contains("function")) {
|
|
||||||
LOG_INF("Skipping tool without function: %s", tool.dump(2).c_str());
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
fn(tool);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::string apply(
|
|
||||||
const common_chat_template & tmpl,
|
|
||||||
const nlohmann::ordered_json & messages,
|
|
||||||
const nlohmann::ordered_json & tools,
|
|
||||||
bool add_generation_prompt,
|
|
||||||
const nlohmann::ordered_json & extra_context = nlohmann::ordered_json())
|
|
||||||
{
|
|
||||||
minja::chat_template_inputs tmpl_inputs;
|
|
||||||
tmpl_inputs.messages = messages;
|
|
||||||
tmpl_inputs.tools = tools;
|
|
||||||
tmpl_inputs.add_generation_prompt = add_generation_prompt;
|
|
||||||
tmpl_inputs.extra_context = extra_context;
|
|
||||||
// TODO: add flag to control date/time, if only for testing purposes.
|
|
||||||
// tmpl_inputs.now = std::chrono::system_clock::now();
|
|
||||||
|
|
||||||
minja::chat_template_options tmpl_opts;
|
|
||||||
tmpl_opts.use_bos_token = false;
|
|
||||||
tmpl_opts.use_eos_token = false;
|
|
||||||
|
|
||||||
return tmpl.apply(tmpl_inputs, tmpl_opts);
|
|
||||||
}
|
|
||||||
|
|
||||||
static common_chat_params common_chat_params_init_generic(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
|
||||||
common_chat_params data;
|
|
||||||
|
|
||||||
auto tool_call_schemas = json::array();
|
|
||||||
foreach_function(inputs.tools, [&](const json & tool) {
|
|
||||||
const auto & function = tool["function"];
|
|
||||||
auto tool_schema = json {
|
|
||||||
{"type", "object"},
|
|
||||||
{"properties", {
|
|
||||||
{"name", {
|
|
||||||
{"type", "string"},
|
|
||||||
{"const", function["name"]},
|
|
||||||
}},
|
|
||||||
{"arguments", function["parameters"]},
|
|
||||||
}},
|
|
||||||
{"required", json::array({"name", "arguments"})},
|
|
||||||
};
|
|
||||||
if (function.contains("description")) {
|
|
||||||
tool_schema["description"] = function["description"];
|
|
||||||
}
|
|
||||||
if (inputs.parallel_tool_calls) {
|
|
||||||
tool_schema["properties"]["id"] = {
|
|
||||||
{"type", "string"},
|
|
||||||
{"minLength", 4},
|
|
||||||
};
|
|
||||||
tool_schema["required"].push_back("id");
|
|
||||||
}
|
|
||||||
tool_call_schemas.emplace_back(tool_schema);
|
|
||||||
});
|
|
||||||
const auto tool_call =
|
|
||||||
inputs.parallel_tool_calls
|
|
||||||
? json {
|
|
||||||
{"type", "object"},
|
|
||||||
{"properties", {
|
|
||||||
{"tool_calls", {
|
|
||||||
{"type", "array"},
|
|
||||||
{"items", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
|
|
||||||
{"anyOf", tool_call_schemas},
|
|
||||||
}},
|
|
||||||
{"minItems", 1},
|
|
||||||
}},
|
|
||||||
}},
|
|
||||||
{"required", json::array({"tool_calls"})},
|
|
||||||
}
|
|
||||||
: json {
|
|
||||||
{"type", "object"},
|
|
||||||
{"properties", {
|
|
||||||
{"tool_call", tool_call_schemas.size() == 1 ? tool_call_schemas[0] : json {
|
|
||||||
{"anyOf", tool_call_schemas},
|
|
||||||
}},
|
|
||||||
}},
|
|
||||||
{"required", json::array({"tool_call"})},
|
|
||||||
};
|
|
||||||
const auto schema =
|
|
||||||
inputs.tool_choice != "required"
|
|
||||||
? json {
|
|
||||||
{"anyOf", json::array({
|
|
||||||
tool_call,
|
|
||||||
{
|
|
||||||
{"type", "object"},
|
|
||||||
{"properties", {
|
|
||||||
{"response", inputs.json_schema.is_null()
|
|
||||||
? json {{"type", "string"}}
|
|
||||||
: inputs.json_schema
|
|
||||||
},
|
|
||||||
}},
|
|
||||||
{"required", json::array({"response"})},
|
|
||||||
},
|
|
||||||
})}
|
|
||||||
}
|
|
||||||
: tool_call;
|
|
||||||
|
|
||||||
data.grammar_lazy = false;
|
|
||||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
||||||
builder.add_schema("root", schema);
|
|
||||||
}, grammar_options);
|
|
||||||
|
|
||||||
auto tweaked_messages = common_chat_template::add_system(
|
|
||||||
inputs.messages,
|
|
||||||
"Respond in JSON format, either with `tool_call` (a request to call tools) or with `response` reply to the user's request");
|
|
||||||
|
|
||||||
data.prompt = apply(tmpl, tweaked_messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
|
||||||
data.format = COMMON_CHAT_FORMAT_GENERIC;
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
static common_chat_msg common_chat_parse_generic(const std::string & input) {
|
|
||||||
json data = json::parse(input);
|
|
||||||
common_chat_msg result;
|
|
||||||
result.role = "assistant";
|
|
||||||
if (data.contains("tool_calls")) {
|
|
||||||
for (const auto & tool_call : data["tool_calls"]) {
|
|
||||||
result.tool_calls.push_back({
|
|
||||||
tool_call["name"],
|
|
||||||
tool_call["arguments"].dump(),
|
|
||||||
tool_call.contains("id") ? tool_call["id"] : "",
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else if (data.contains("tool_call")) {
|
|
||||||
result.tool_calls.push_back({
|
|
||||||
data["tool_call"]["name"],
|
|
||||||
data["tool_call"]["arguments"].dump(),
|
|
||||||
/* id= */ "",
|
|
||||||
});
|
|
||||||
} else if (data.contains("response")) {
|
|
||||||
const auto & response = data["response"];
|
|
||||||
result.content = response.is_string() ? response.get<std::string>() : response.dump(2);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
static common_chat_params common_chat_params_init_mistral_nemo(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
|
||||||
common_chat_params data;
|
|
||||||
data.grammar_lazy = inputs.tool_choice != "required";
|
|
||||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
||||||
auto schemas = json::array();
|
|
||||||
foreach_function(inputs.tools, [&](const json & tool) {
|
|
||||||
const auto & function = tool["function"];
|
|
||||||
schemas.push_back({
|
|
||||||
{"type", "object"},
|
|
||||||
{"properties", {
|
|
||||||
// Important note: the model is probably trained to take a JSON stringified arguments value.
|
|
||||||
// It's hard to constrain that for now (while reusing the JSON schema conversion), so we're just expecting a plain object.
|
|
||||||
{"name", {
|
|
||||||
{"type", "string"},
|
|
||||||
{"const", function["name"]},
|
|
||||||
}},
|
|
||||||
{"arguments", function["parameters"]},
|
|
||||||
{"id", {
|
|
||||||
{"type", "string"},
|
|
||||||
// Nemo's template expects a 9-character alphanumeric ID.
|
|
||||||
{"pattern", "^[a-zA-Z0-9]{9}$"},
|
|
||||||
}},
|
|
||||||
}},
|
|
||||||
{"required", json::array({"name", "arguments", "id"})},
|
|
||||||
});
|
|
||||||
});
|
|
||||||
auto schema = json {
|
|
||||||
{"type", "array"},
|
|
||||||
{"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
|
|
||||||
{"minItems", 1},
|
|
||||||
};
|
|
||||||
if (!inputs.parallel_tool_calls) {
|
|
||||||
schema["maxItems"] = 1;
|
|
||||||
}
|
|
||||||
builder.add_rule("root", "\"[TOOL_CALLS]\" " + builder.add_schema("tool_calls", schema));
|
|
||||||
}, grammar_options);
|
|
||||||
data.grammar_triggers.push_back({"[TOOL_CALLS]", /* .at_start = */ true});
|
|
||||||
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
|
||||||
data.format = COMMON_CHAT_FORMAT_MISTRAL_NEMO;
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
static common_chat_msg common_chat_parse_mistral_nemo(const std::string & input) {
|
|
||||||
return parse_prefixed_json_tool_call_array(input, "[TOOL_CALLS]");
|
|
||||||
}
|
|
||||||
|
|
||||||
static common_chat_params common_chat_params_init_command_r7b(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
|
||||||
common_chat_params data;
|
|
||||||
data.grammar_lazy = inputs.tool_choice != "required";
|
|
||||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
||||||
auto schemas = json::array();
|
|
||||||
foreach_function(inputs.tools, [&](const json & tool) {
|
|
||||||
const auto & function = tool["function"];
|
|
||||||
schemas.push_back({
|
|
||||||
{"type", "object"},
|
|
||||||
{"properties", {
|
|
||||||
{"tool_call_id", {
|
|
||||||
{"type", "string"},
|
|
||||||
// Command-R's template expects an integer string.
|
|
||||||
{"pattern", "^[0-9]{1,10}$"},
|
|
||||||
}},
|
|
||||||
{"tool_name", {
|
|
||||||
{"type", "string"},
|
|
||||||
{"const", function["name"]},
|
|
||||||
}},
|
|
||||||
{"parameters", function["parameters"]},
|
|
||||||
}},
|
|
||||||
{"required", json::array({"tool_call_id", "tool_name", "parameters"})},
|
|
||||||
});
|
|
||||||
});
|
|
||||||
auto schema = json {
|
|
||||||
{"type", "array"},
|
|
||||||
{"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
|
|
||||||
{"minItems", 1},
|
|
||||||
};
|
|
||||||
if (!inputs.parallel_tool_calls) {
|
|
||||||
schema["maxItems"] = 1;
|
|
||||||
}
|
|
||||||
builder.add_rule("root", "\"<|START_ACTION|>\" " + builder.add_schema("tool_calls", schema) + " \"<|END_ACTION|>\"");
|
|
||||||
}, grammar_options);
|
|
||||||
data.grammar_triggers.push_back({"<|START_ACTION|>", /* .at_start = */ false});
|
|
||||||
data.preserved_tokens = {
|
|
||||||
"<|START_RESPONSE|>",
|
|
||||||
"<|END_RESPONSE|>",
|
|
||||||
"<|START_THINKING|>",
|
|
||||||
"<|END_THINKING|>",
|
|
||||||
"<|END_ACTION|>",
|
|
||||||
};
|
|
||||||
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
|
||||||
data.format = COMMON_CHAT_FORMAT_COMMAND_R7B;
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
static common_chat_msg common_chat_parse_command_r7b(const std::string & input) {
|
|
||||||
static std::regex response_regex("<\\|START_RESPONSE\\|>([\\s\\S\\n\\r]*?)<\\|END_RESPONSE\\|>");
|
|
||||||
static std::regex thought_action_regex("<\\|START_THINKING\\|>([\\s\\S\\n\\r]*?)<\\|END_THINKING\\|><\\|START_ACTION\\|>([\\s\\S\\n\\r]*?)<\\|END_ACTION\\|>");
|
|
||||||
std::smatch match;
|
|
||||||
|
|
||||||
common_chat_msg result;
|
|
||||||
result.role = "assistant";
|
|
||||||
if (std::regex_match(input, match, response_regex)) {
|
|
||||||
result.content = match[1].str();
|
|
||||||
} else if (std::regex_match(input, match, thought_action_regex)) {
|
|
||||||
result.tool_plan = match[1].str();
|
|
||||||
auto actions_str = match[2].str();
|
|
||||||
auto actions = json::parse(actions_str);
|
|
||||||
for (const auto & action : actions) {
|
|
||||||
result.tool_calls.push_back({
|
|
||||||
/* .name = */ action["tool_name"],
|
|
||||||
/* .arguments = */ action["parameters"].dump(),
|
|
||||||
/* .id = */ action["tool_call_id"],
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
LOG_ERR("Failed to parse command_r output");
|
|
||||||
result.content = input;
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void expect_tool_parameters(const std::string & name, const json & parameters, const std::vector<std::string> & expected_properties) {
|
|
||||||
if (!parameters.is_object() || !parameters.contains("type") || parameters["type"] != "object" || !parameters.contains("properties") || !parameters.contains("required")) {
|
|
||||||
throw std::runtime_error("Parameters of tool " + name + " must be an object w/ required properties");
|
|
||||||
}
|
|
||||||
const auto & parameters_properties = parameters.at("properties");
|
|
||||||
const auto & parameters_required = parameters.at("required");
|
|
||||||
for (const auto & prop : expected_properties) {
|
|
||||||
if (!parameters_properties.contains(prop)) {
|
|
||||||
throw std::runtime_error("Parameters of tool " + name + " is missing property: " + prop);
|
|
||||||
}
|
|
||||||
if (std::find(parameters_required.begin(), parameters_required.end(), json(prop)) == parameters_required.end()) {
|
|
||||||
throw std::runtime_error("Parameters of tool " + name + " must have property marked as required: " + prop);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (parameters_properties.size() != expected_properties.size()) {
|
|
||||||
throw std::runtime_error("Parameters of tool " + name + " must only have these properties:" + string_join(expected_properties, ", "));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static common_chat_params common_chat_params_init_llama_3_1_tool_calls(const common_chat_template & tmpl, const struct common_chat_inputs & inputs, bool allow_python_tag_builtin_tools) {
|
|
||||||
auto builtin_tools = json::array();
|
|
||||||
common_chat_params data;
|
|
||||||
data.grammar_lazy = inputs.tool_choice != "required";
|
|
||||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
||||||
std::vector<std::string> tool_rules;
|
|
||||||
|
|
||||||
auto handle_builtin_tool = [&](const std::string & name, const json & parameters) {
|
|
||||||
if (name == "wolfram_alpha") {
|
|
||||||
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/wolfram_alpha/wolfram_alpha.py
|
|
||||||
expect_tool_parameters(name, parameters, {"query"});
|
|
||||||
} else if (name == "web_search" || name == "brave_search") {
|
|
||||||
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/remote/tool_runtime/brave_search/brave_search.py
|
|
||||||
expect_tool_parameters(name, parameters, {"query"});
|
|
||||||
} else if (name == "python" || name == "code_interpreter") {
|
|
||||||
// https://github.com/meta-llama/llama-stack/blob/main/llama_stack/providers/inline/tool_runtime/code_interpreter/code_interpreter.py
|
|
||||||
expect_tool_parameters(name, parameters, {"code"});
|
|
||||||
} else {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::string> kvs;
|
|
||||||
for (const auto & [key, value] : parameters.at("properties").items()) {
|
|
||||||
kvs.push_back("\"" + key + "=\" " + builder.add_schema(name + "-args-" + key, value));
|
|
||||||
}
|
|
||||||
|
|
||||||
tool_rules.push_back(
|
|
||||||
builder.add_rule(
|
|
||||||
name + "-call",
|
|
||||||
"\"<|python_tag|>" + name + ".call(\" " + string_join(kvs, " \", \" ") + " \")\""));
|
|
||||||
builtin_tools.push_back(name);
|
|
||||||
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
|
|
||||||
foreach_function(inputs.tools, [&](const json & tool) {
|
|
||||||
const auto & function = tool["function"];
|
|
||||||
std::string name = function["name"];
|
|
||||||
auto parameters = function["parameters"];
|
|
||||||
builder.resolve_refs(parameters);
|
|
||||||
|
|
||||||
// https://github.com/meta-llama/llama-stack/tree/main/llama_stack/providers/remote/tool_runtime
|
|
||||||
if (allow_python_tag_builtin_tools) {
|
|
||||||
handle_builtin_tool(name, parameters);
|
|
||||||
}
|
|
||||||
tool_rules.push_back(
|
|
||||||
builder.add_rule(
|
|
||||||
name + "-call",
|
|
||||||
"\"{\" space "
|
|
||||||
"( \"\\\"type\\\":\" space \"\\\"function\\\",\" space )? "
|
|
||||||
"\"\\\"name\\\": \\\"" + name + "\\\", \\\"parameters\\\": \" " +
|
|
||||||
builder.add_schema(name + "-args", parameters) +
|
|
||||||
" \"}\""));
|
|
||||||
data.grammar_triggers.push_back({"{\"name\": \"" + name + "\"", /* .at_start = */ true});
|
|
||||||
});
|
|
||||||
data.grammar_triggers.push_back({"{\"name\":", /* .at_start = */ true});
|
|
||||||
data.grammar_triggers.push_back({"{\n \"name\":", /* .at_start = */ true});
|
|
||||||
data.grammar_triggers.push_back({"{\n \"name\":", /* .at_start = */ true});
|
|
||||||
data.grammar_triggers.push_back({"{\"type\": \"function\"", /* .at_start = */ true});
|
|
||||||
data.grammar_triggers.push_back({"{\n \"type\": \"function\"", /* .at_start = */ true});
|
|
||||||
data.grammar_triggers.push_back({"{\n \"type\": \"function\"", /* .at_start = */ true});
|
|
||||||
if (!builtin_tools.empty()) {
|
|
||||||
data.grammar_triggers.push_back({"<|python_tag|>", /* .at_start = */ false});
|
|
||||||
}
|
|
||||||
builder.add_rule("root", string_join(tool_rules, " | "));
|
|
||||||
}, grammar_options);
|
|
||||||
data.additional_stops.push_back("<|eom_id|>");
|
|
||||||
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt, {
|
|
||||||
{"tools_in_user_message", false},
|
|
||||||
{"builtin_tools", builtin_tools.empty() ? json() : builtin_tools},
|
|
||||||
});
|
|
||||||
data.format = allow_python_tag_builtin_tools && !builtin_tools.empty()
|
|
||||||
? COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS
|
|
||||||
: COMMON_CHAT_FORMAT_LLAMA_3_X;
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
static common_chat_msg common_chat_parse_llama_3_1(const std::string & input, bool with_builtin_tools = false) {
|
|
||||||
// TODO: tighten & simplify the parser, don't accept leading text context.
|
|
||||||
static std::regex function_regex("\\{[\\s\\n\\r]*(?:\"type\"[\\s\\n\\r]*:[\\s\\n\\r]*\"function\"[\\s\\n\\r]*,[\\s\\n\\r]*|[\\s\\n\\r]*)\"name\"[\\s\\n\\r]*:[\\s\\n\\r]*\"([^\"]+)\"[\\s\\n\\r]*,[\\s\\n\\r]*\"parameters\": ");
|
|
||||||
static std::regex close_regex("\\}");
|
|
||||||
static std::regex builtin_call_regex("<\\|python_tag\\|>([^.(]+)\\.call\\((.*)\\)");
|
|
||||||
|
|
||||||
if (with_builtin_tools) {
|
|
||||||
std::smatch match;
|
|
||||||
if (std::regex_match(input, match, builtin_call_regex)) {
|
|
||||||
auto name = match[1].str();
|
|
||||||
auto raw_args = match[2].str();
|
|
||||||
|
|
||||||
// TODO: if/when builtin tools start accepting more than 1 argument, use parse_json for real parsing.
|
|
||||||
auto it_eq = raw_args.find('=');
|
|
||||||
auto arg_name = raw_args.substr(0, it_eq);
|
|
||||||
auto arg_value_str = raw_args.substr(it_eq + 1);
|
|
||||||
auto arg_value = json::parse(arg_value_str);
|
|
||||||
|
|
||||||
return {
|
|
||||||
/* .role = */ "assistant",
|
|
||||||
/* .content = */ match.prefix().str(),
|
|
||||||
/* .tool_calls = */ {
|
|
||||||
{
|
|
||||||
/* .name = */ match[1],
|
|
||||||
/* .arguments = */ (json {
|
|
||||||
{arg_name, arg_value},
|
|
||||||
}).dump(),
|
|
||||||
/* .id = */ "",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
|
|
||||||
}
|
|
||||||
|
|
||||||
static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
|
||||||
common_chat_params data;
|
|
||||||
data.grammar_lazy = inputs.tool_choice != "required";
|
|
||||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
||||||
std::vector<std::string> tool_rules;
|
|
||||||
foreach_function(inputs.tools, [&](const json & tool) {
|
|
||||||
const auto & function = tool["function"];
|
|
||||||
std::string name = function["name"];
|
|
||||||
auto parameters = function["parameters"];
|
|
||||||
auto args_rule = builder.add_schema(name + "-args", parameters);
|
|
||||||
tool_rules.push_back(builder.add_rule(name + "-call",
|
|
||||||
"\"<|tool▁call▁begin|>function<|tool▁sep|>" + name + "\\n```json\\n\" " + args_rule + " \"```<|tool▁call▁end|>\""));
|
|
||||||
});
|
|
||||||
data.grammar_triggers.push_back({"<|tool▁calls▁begin|>", /* .at_start = */ false});
|
|
||||||
data.preserved_tokens = {
|
|
||||||
"<|tool▁sep|>",
|
|
||||||
"<|tool▁call▁end|>",
|
|
||||||
};
|
|
||||||
builder.add_rule("root", "\"<|tool▁calls▁begin|>\" (" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " space");
|
|
||||||
}, grammar_options);
|
|
||||||
auto prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
|
||||||
data.prompt = prompt;
|
|
||||||
data.format = COMMON_CHAT_FORMAT_DEEPSEEK_R1;
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
static common_chat_msg common_chat_parse_deepseek_r1(const std::string & input) {
|
|
||||||
static std::regex trigger_regex("<|tool▁calls▁begin|>");
|
|
||||||
static std::regex function_regex("<|tool▁call▁begin|>function<|tool▁sep|>([^\n]+)\n```json\n");
|
|
||||||
static std::regex close_regex("```<|tool▁call▁end|>");
|
|
||||||
return parse_json_tool_calls(input, trigger_regex, function_regex, close_regex);
|
|
||||||
}
|
|
||||||
|
|
||||||
static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
|
||||||
fprintf(stderr, "%s\n", __func__);
|
|
||||||
common_chat_params data;
|
|
||||||
data.prompt = apply(tmpl, inputs.messages, /* tools= */ nullptr, inputs.add_generation_prompt, {
|
|
||||||
{"datetime", "Jan 29 2025 13:00:00 GMT"},
|
|
||||||
{"functions", json(inputs.tools.empty() ? "" : inputs.tools.dump(2))},
|
|
||||||
});
|
|
||||||
if (!inputs.tools.is_null() && !inputs.tools.empty()) {
|
|
||||||
data.grammar_lazy = inputs.tool_choice != "required";
|
|
||||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
||||||
auto schemas = json::array();
|
|
||||||
foreach_function(inputs.tools, [&](const json & tool) {
|
|
||||||
const auto & function = tool["function"];
|
|
||||||
schemas.push_back({
|
|
||||||
{"type", "object"},
|
|
||||||
{"properties", {
|
|
||||||
{"name", {
|
|
||||||
{"type", "string"},
|
|
||||||
{"const", function["name"]},
|
|
||||||
}},
|
|
||||||
{"arguments", function["parameters"]},
|
|
||||||
}},
|
|
||||||
{"required", json::array({"name", "arguments", "id"})},
|
|
||||||
});
|
|
||||||
});
|
|
||||||
auto schema = json {
|
|
||||||
{"type", "array"},
|
|
||||||
{"items", schemas.size() == 1 ? schemas[0] : json {{"anyOf", schemas}}},
|
|
||||||
{"minItems", 1},
|
|
||||||
};
|
|
||||||
if (!inputs.parallel_tool_calls) {
|
|
||||||
schema["maxItems"] = 1;
|
|
||||||
}
|
|
||||||
builder.add_rule("root", "\" functools\"? " + builder.add_schema("tool_calls", schema));
|
|
||||||
}, grammar_options);
|
|
||||||
data.grammar_triggers.push_back({" functools[", /* .at_start = */ false});
|
|
||||||
data.format = COMMON_CHAT_FORMAT_FIREFUNCTION_V2;
|
|
||||||
} else {
|
|
||||||
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
|
||||||
}
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
static common_chat_msg common_chat_parse_firefunction_v2(const std::string & input) {
|
|
||||||
return parse_prefixed_json_tool_call_array(input, " functools[", /* rstrip_prefix= */ 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
static common_chat_params common_chat_params_init_functionary_v3_2(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
|
||||||
// >>>all\nlet's call functions>>>fn1\n{"arg1": 1...}\n>>>fn2\n{"arg1": 1...}...
|
|
||||||
// Using ">>>f1\n", ">>>f2\n"... as trigger words for the grammar
|
|
||||||
common_chat_params data;
|
|
||||||
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
|
||||||
data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2;
|
|
||||||
if (!inputs.tools.is_null() && !inputs.tools.empty()) {
|
|
||||||
data.grammar_lazy = inputs.tool_choice != "required";
|
|
||||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
||||||
std::vector<std::string> first_tool_rules;
|
|
||||||
std::vector<std::string> subsequent_tool_rules;
|
|
||||||
foreach_function(inputs.tools, [&](const json & tool) {
|
|
||||||
const auto & function = tool["function"];
|
|
||||||
std::string name = function["name"];
|
|
||||||
auto parameters = function["parameters"];
|
|
||||||
auto args_rule = builder.add_schema(name + "-args", parameters);
|
|
||||||
first_tool_rules.push_back(builder.add_rule(name + "-call", "\"" + name + "\\n\" " + args_rule));
|
|
||||||
subsequent_tool_rules.push_back(builder.add_rule(name + "-call2", "\">>>" + name + "\\n\" " + args_rule));
|
|
||||||
data.grammar_triggers.push_back({name, /* .at_start = */ true});
|
|
||||||
data.grammar_triggers.push_back({">>>" + name, /* .at_start = */ false});
|
|
||||||
});
|
|
||||||
auto first_rule = first_tool_rules.empty() ? "" : builder.add_rule("first_tool_call", string_join(first_tool_rules, " | ")) + " space";
|
|
||||||
if (inputs.parallel_tool_calls) {
|
|
||||||
auto subsequent_rule = builder.add_rule("subsequent_tool_call", string_join(subsequent_tool_rules, " | ")) + " space";
|
|
||||||
builder.add_rule("root", first_rule + " (" + subsequent_rule + ")*");
|
|
||||||
} else {
|
|
||||||
builder.add_rule("root", first_rule);
|
|
||||||
}
|
|
||||||
|
|
||||||
}, grammar_options);
|
|
||||||
}
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool consume(std::string::const_iterator & it, const std::string::const_iterator & end, const std::string & expected) {
|
|
||||||
auto expected_it = expected.begin();
|
|
||||||
auto tmp_it = it;
|
|
||||||
while (tmp_it != end && expected_it != expected.end() && *tmp_it == *expected_it) {
|
|
||||||
++tmp_it;
|
|
||||||
++expected_it;
|
|
||||||
}
|
|
||||||
if (expected_it == expected.end()) {
|
|
||||||
it = tmp_it;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static common_chat_msg common_chat_parse_functionary_v3_2(const std::string & input) {
|
|
||||||
static std::regex function_regex(R"((?:>>>)?(\w+)\n)");
|
|
||||||
static std::regex close_regex(R"($|(?=>>>))");
|
|
||||||
|
|
||||||
std::string content;
|
|
||||||
auto it = input.begin();
|
|
||||||
const auto end = input.end();
|
|
||||||
|
|
||||||
if (consume(it, end, "all\n")) {
|
|
||||||
std::smatch match;
|
|
||||||
if (std::regex_search(it, end, match, function_regex)) {
|
|
||||||
auto fun_it = match.prefix().second;
|
|
||||||
content = std::string(it, fun_it);
|
|
||||||
it = fun_it;
|
|
||||||
} else {
|
|
||||||
common_chat_msg res;
|
|
||||||
res.role = "assistant";
|
|
||||||
res.content = std::string(it, end);
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// TODO: tighten & simplify.
|
|
||||||
try {
|
|
||||||
auto res = parse_json_tool_calls(std::string(it, end), std::nullopt, function_regex, close_regex);
|
|
||||||
res.content = content + res.content;
|
|
||||||
return res;
|
|
||||||
} catch (const std::exception & e) {
|
|
||||||
LOG_ERR("Failed to parse functionary v3.2 input: %s\n", e.what());
|
|
||||||
common_chat_msg res;
|
|
||||||
res.role = "assistant";
|
|
||||||
res.content = input;
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static common_chat_params common_chat_params_init_functionary_v3_1_llama_3_1(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
|
||||||
// https://github.com/MeetKai/functionary/blob/main/tests/prompt_test_v3-llama3.1.txt
|
|
||||||
common_chat_params data;
|
|
||||||
json tools = inputs.tools.is_null() ? inputs.tools : json::array();
|
|
||||||
std::string python_code_argument_name;
|
|
||||||
auto has_raw_python = false;
|
|
||||||
|
|
||||||
data.grammar_lazy = inputs.tool_choice != "required";
|
|
||||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
||||||
std::vector<std::string> tool_rules;
|
|
||||||
foreach_function(inputs.tools, [&](const json & tool) {
|
|
||||||
const auto & function = tool["function"];
|
|
||||||
const auto & parameters = function["parameters"];
|
|
||||||
std::string name = function["name"];
|
|
||||||
if (name == "python" || name == "ipython") {
|
|
||||||
if (!parameters.contains("type")) {
|
|
||||||
throw std::runtime_error("Missing type in python tool");
|
|
||||||
}
|
|
||||||
has_raw_python = true;
|
|
||||||
auto type = parameters.at("type");
|
|
||||||
if (type == "object") {
|
|
||||||
auto properties = parameters.at("properties");
|
|
||||||
for (auto it = properties.begin(); it != properties.end(); ++it) {
|
|
||||||
if (it.value().at("type") == "string") {
|
|
||||||
if (!python_code_argument_name.empty()) {
|
|
||||||
throw std::runtime_error("Multiple string arguments found in python tool");
|
|
||||||
}
|
|
||||||
python_code_argument_name = it.key();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (python_code_argument_name.empty()) {
|
|
||||||
throw std::runtime_error("No string argument found in python tool");
|
|
||||||
}
|
|
||||||
} else if (type != "string") {
|
|
||||||
throw std::runtime_error("Invalid type in python tool: " + type.dump());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
tool_rules.push_back(builder.add_rule(name + "-call", "\"<function=" + name + ">\" " + builder.add_schema(name + "-args", parameters) + " \"</function>\" space"));
|
|
||||||
});
|
|
||||||
if (has_raw_python) {
|
|
||||||
tool_rules.push_back(builder.add_rule("python-call", "\"<|python_tag|>\" .*"));
|
|
||||||
data.grammar_triggers.push_back({"<|python_tag|>", /* .at_start = */ false});
|
|
||||||
}
|
|
||||||
auto tool_call = builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " space";
|
|
||||||
builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
|
|
||||||
data.grammar_triggers.push_back({"<function=", /* .at_start = */ false});
|
|
||||||
}, grammar_options);
|
|
||||||
|
|
||||||
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
|
||||||
// TODO: if (has_raw_python)
|
|
||||||
data.format = COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1;
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
static common_chat_msg common_chat_parse_functionary_v3_1_llama_3_1(const std::string & input) {
|
|
||||||
// This version of Functionary still supports the llama 3.1 tool call format for the python tool.
|
|
||||||
static std::regex python_tag_regex(R"(<\|python_tag\|>([\s\S\n]*)$)");
|
|
||||||
std::smatch match;
|
|
||||||
if (std::regex_search(input, match, python_tag_regex)) {
|
|
||||||
auto code = match[1].str();
|
|
||||||
return {
|
|
||||||
/* .role = */ "assistant",
|
|
||||||
/* .content = */ match.prefix().str(),
|
|
||||||
/* .tool_calls = */ {
|
|
||||||
{
|
|
||||||
/* .name = */ "python",
|
|
||||||
/* .arguments = */ (json {{"code", code}}).dump(),
|
|
||||||
/* .id = */ "",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
static std::regex function_regex(R"(<function=(\w+)>)");
|
|
||||||
static std::regex close_regex(R"(</function>)");
|
|
||||||
// TODO: tighten & simplify.
|
|
||||||
return parse_json_tool_calls(input, std::nullopt, function_regex, close_regex);
|
|
||||||
}
|
|
||||||
|
|
||||||
static common_chat_params common_chat_params_init_hermes_2_pro(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
|
||||||
common_chat_params data;
|
|
||||||
// (content)?(<tool_call>{"name": "foo", "arguments": {"a": 1}}</tool_call>)*
|
|
||||||
data.grammar_lazy = inputs.tool_choice != "required";
|
|
||||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
|
||||||
std::vector<std::string> tool_rules;
|
|
||||||
foreach_function(inputs.tools, [&](const json & tool) {
|
|
||||||
const auto & function = tool["function"];
|
|
||||||
std::string name = function["name"];
|
|
||||||
auto parameters = function["parameters"];
|
|
||||||
builder.resolve_refs(parameters);
|
|
||||||
tool_rules.push_back(builder.add_schema(name + "-call", {
|
|
||||||
{"type", "object"},
|
|
||||||
{"properties", json {
|
|
||||||
{"name", json {{"const", name}}},
|
|
||||||
{"arguments", parameters},
|
|
||||||
}},
|
|
||||||
{"required", json::array({"name", "arguments"})},
|
|
||||||
}));
|
|
||||||
});
|
|
||||||
auto tool_call = "\"<tool_call>\" space " + builder.add_rule("tool_call", string_join(tool_rules, " | ")) + " \"</tool_call>\" space";
|
|
||||||
builder.add_rule("root", inputs.parallel_tool_calls ? "(" + tool_call + ")+" : tool_call);
|
|
||||||
data.grammar_triggers.push_back({"<tool_call>", /* .at_start = */ false});
|
|
||||||
data.preserved_tokens = { "</tool_call>" };
|
|
||||||
}, grammar_options);
|
|
||||||
|
|
||||||
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
|
||||||
data.format = COMMON_CHAT_FORMAT_HERMES_2_PRO;
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
static common_chat_msg common_chat_parse_hermes_2_pro(const std::string & input) {
|
|
||||||
try {
|
|
||||||
std::regex start_pattern(R"([\n\s]*<tool_call>)");
|
|
||||||
std::regex middle_pattern(R"([\n\s]*</tool_call>[\n\s]*<tool_call>)");
|
|
||||||
std::regex end_pattern(R"([\n\s]*</tool_call>[\n\s]*$)");
|
|
||||||
|
|
||||||
auto end = input.end();
|
|
||||||
std::sregex_iterator rend;
|
|
||||||
std::sregex_iterator rit(input.begin(), end, start_pattern);
|
|
||||||
if (rit == rend) {
|
|
||||||
return {
|
|
||||||
/* .role = */ "assistant",
|
|
||||||
/* .content = */ input,
|
|
||||||
/* .tool_calls = */ {},
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
common_chat_msg result;
|
|
||||||
result.role = "assistant";
|
|
||||||
result.content = rit->prefix();
|
|
||||||
|
|
||||||
auto it = rit->suffix().first;
|
|
||||||
while (it != end) {
|
|
||||||
json call;
|
|
||||||
if (!parse_json(it, end, call)) {
|
|
||||||
throw std::runtime_error("Failed to parse json tool call");
|
|
||||||
}
|
|
||||||
const auto & arguments = call["arguments"];
|
|
||||||
result.tool_calls.push_back({
|
|
||||||
call["name"],
|
|
||||||
arguments.dump(),
|
|
||||||
// arguments.is_string() ? arguments.get<std::string>() : arguments.dump(),
|
|
||||||
/* id= */ "",
|
|
||||||
});
|
|
||||||
rit = {it, end, middle_pattern};
|
|
||||||
if (rit != rend) {
|
|
||||||
it = rit->suffix().first;
|
|
||||||
} else {
|
|
||||||
rit = {it, end, end_pattern};
|
|
||||||
if (rit == rend) {
|
|
||||||
throw std::runtime_error("Malformed input, missing </tool_call>");
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
} catch (const std::exception & e) {
|
|
||||||
return {
|
|
||||||
/* .role = */ "assistant",
|
|
||||||
/* .content = */ input,
|
|
||||||
/* .tool_calls = */ {},
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static common_chat_params common_chat_params_init_without_tools(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
|
||||||
common_chat_params data;
|
|
||||||
data.prompt = apply(tmpl, inputs.messages, inputs.tools.empty() ? json() : inputs.tools, inputs.add_generation_prompt);
|
|
||||||
data.format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
|
||||||
data.grammar_lazy = false;
|
|
||||||
if (!inputs.json_schema.is_null()) {
|
|
||||||
if (!inputs.grammar.empty()) {
|
|
||||||
throw std::runtime_error("Either \"json_schema\" or \"grammar\" can be specified, but not both");
|
|
||||||
}
|
|
||||||
data.grammar = json_schema_to_grammar(inputs.json_schema);
|
|
||||||
} else {
|
|
||||||
data.grammar = inputs.grammar.empty();
|
|
||||||
}
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
|
|
||||||
common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & inputs) {
|
|
||||||
auto has_tools = !inputs.tools.is_null() && inputs.tool_choice != "none";
|
|
||||||
LOG_DBG("[%s] has_tools=%s\n", __func__, has_tools ? "true" : "false");
|
|
||||||
|
|
||||||
if (has_tools && !inputs.grammar.empty()) {
|
|
||||||
throw std::runtime_error("Cannot specify grammar with tools");
|
|
||||||
}
|
|
||||||
|
|
||||||
const auto & src = tmpl.source();
|
|
||||||
if (src.find(">>>all") != std::string::npos) {
|
|
||||||
// Functionary prepends "all\n" to plain content outputs, so we use the parser no matter when
|
|
||||||
return common_chat_params_init_functionary_v3_2(tmpl, inputs);
|
|
||||||
}
|
|
||||||
if (src.find(" functools[") != std::string::npos) {
|
|
||||||
// Firefunction v2 requires datetime and functions in the context, even w/o tools.
|
|
||||||
return common_chat_params_init_firefunction_v2(tmpl, inputs);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!has_tools) {
|
|
||||||
return common_chat_params_init_without_tools(tmpl, inputs);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (src.find("<tool_call>") != std::string::npos) {
|
|
||||||
return common_chat_params_init_hermes_2_pro(tmpl, inputs);
|
|
||||||
}
|
|
||||||
if (src.find("<|start_header_id|>") != std::string::npos
|
|
||||||
&& src.find("<function=") != std::string::npos) {
|
|
||||||
return common_chat_params_init_functionary_v3_1_llama_3_1(tmpl, inputs);
|
|
||||||
}
|
|
||||||
if (src.find("<|start_header_id|>ipython<|end_header_id|>") != std::string::npos) {
|
|
||||||
auto allow_python_tag_builtin_tools = src.find("<|python_tag|>") != std::string::npos;
|
|
||||||
return common_chat_params_init_llama_3_1_tool_calls(tmpl, inputs, allow_python_tag_builtin_tools);
|
|
||||||
}
|
|
||||||
if (src.find("<|tool▁calls▁begin|>") != std::string::npos) {
|
|
||||||
return common_chat_params_init_deepseek_r1(tmpl, inputs);
|
|
||||||
}
|
|
||||||
if (src.find("[TOOL_CALLS]") != std::string::npos) {
|
|
||||||
return common_chat_params_init_mistral_nemo(tmpl, inputs);
|
|
||||||
}
|
|
||||||
if (src.find("<|END_THINKING|><|START_ACTION|>") != std::string::npos) {
|
|
||||||
return common_chat_params_init_command_r7b(tmpl, inputs);
|
|
||||||
}
|
|
||||||
return common_chat_params_init_generic(tmpl, inputs);
|
|
||||||
}
|
|
||||||
|
|
||||||
static common_chat_msg common_chat_parse_content_only(const std::string & input) {
|
|
||||||
return {
|
|
||||||
/* .role = */ "assistant",
|
|
||||||
/* .content = */ input,
|
|
||||||
/* .tool_calls = */ {},
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
common_chat_msg common_chat_parse(const std::string & input, common_chat_format format) {
|
|
||||||
switch (format) {
|
|
||||||
case COMMON_CHAT_FORMAT_CONTENT_ONLY:
|
|
||||||
return common_chat_parse_content_only(input);
|
|
||||||
case COMMON_CHAT_FORMAT_GENERIC:
|
|
||||||
return common_chat_parse_generic(input);
|
|
||||||
case COMMON_CHAT_FORMAT_MISTRAL_NEMO:
|
|
||||||
return common_chat_parse_mistral_nemo(input);
|
|
||||||
case COMMON_CHAT_FORMAT_LLAMA_3_X:
|
|
||||||
return common_chat_parse_llama_3_1(input);
|
|
||||||
case COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS:
|
|
||||||
return common_chat_parse_llama_3_1(input, /* with_builtin_tools= */ true);
|
|
||||||
case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
|
|
||||||
return common_chat_parse_deepseek_r1(input);
|
|
||||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
|
|
||||||
return common_chat_parse_functionary_v3_2(input);
|
|
||||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1:
|
|
||||||
return common_chat_parse_functionary_v3_1_llama_3_1(input);
|
|
||||||
case COMMON_CHAT_FORMAT_HERMES_2_PRO:
|
|
||||||
return common_chat_parse_hermes_2_pro(input);
|
|
||||||
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2:
|
|
||||||
return common_chat_parse_firefunction_v2(input);
|
|
||||||
case COMMON_CHAT_FORMAT_COMMAND_R7B:
|
|
||||||
return common_chat_parse_command_r7b(input);
|
|
||||||
default:
|
|
||||||
throw std::runtime_error("Unsupported format: " + common_chat_format_name(format));
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -1,52 +0,0 @@
|
||||||
// Chat support (incl. tool call grammar constraining & output parsing) w/ generic & custom template handlers.
|
|
||||||
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "common.h"
|
|
||||||
#include <json.hpp>
|
|
||||||
#include <optional>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
using json = nlohmann::ordered_json;
|
|
||||||
|
|
||||||
struct common_chat_inputs {
|
|
||||||
json messages;
|
|
||||||
json tools;
|
|
||||||
json tool_choice;
|
|
||||||
json json_schema;
|
|
||||||
bool parallel_tool_calls;
|
|
||||||
bool stream;
|
|
||||||
std::string grammar;
|
|
||||||
bool add_generation_prompt = true;
|
|
||||||
};
|
|
||||||
|
|
||||||
enum common_chat_format {
|
|
||||||
COMMON_CHAT_FORMAT_CONTENT_ONLY,
|
|
||||||
COMMON_CHAT_FORMAT_GENERIC,
|
|
||||||
COMMON_CHAT_FORMAT_MISTRAL_NEMO,
|
|
||||||
COMMON_CHAT_FORMAT_LLAMA_3_X,
|
|
||||||
COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
|
|
||||||
COMMON_CHAT_FORMAT_DEEPSEEK_R1,
|
|
||||||
COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
|
|
||||||
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
|
|
||||||
COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
|
|
||||||
COMMON_CHAT_FORMAT_HERMES_2_PRO,
|
|
||||||
COMMON_CHAT_FORMAT_COMMAND_R7B,
|
|
||||||
|
|
||||||
COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
|
|
||||||
};
|
|
||||||
|
|
||||||
struct common_chat_params {
|
|
||||||
common_chat_format format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
|
|
||||||
json prompt;
|
|
||||||
std::string grammar;
|
|
||||||
bool grammar_lazy = false;
|
|
||||||
std::vector<common_grammar_trigger> grammar_triggers;
|
|
||||||
std::vector<std::string> preserved_tokens;
|
|
||||||
std::vector<std::string> additional_stops;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct common_chat_params common_chat_params_init(const common_chat_template & tmpl, const struct common_chat_inputs & params);
|
|
||||||
std::string common_chat_format_name(common_chat_format format);
|
|
||||||
common_chat_msg common_chat_parse( const std::string & input, common_chat_format format);
|
|
2922
common/common.cpp
2922
common/common.cpp
File diff suppressed because it is too large
Load diff
526
common/common.h
526
common/common.h
|
@ -2,12 +2,20 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "llama-cpp.h"
|
#include "llama.h"
|
||||||
|
|
||||||
#include <set>
|
#include "sampling.h"
|
||||||
|
|
||||||
|
#define LOG_NO_FILE_LINE_FUNCTION
|
||||||
|
#include "log.h"
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <sstream>
|
#include <random>
|
||||||
|
#include <thread>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <tuple>
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
#define DIRECTORY_SEPARATOR '\\'
|
#define DIRECTORY_SEPARATOR '\\'
|
||||||
|
@ -25,192 +33,52 @@
|
||||||
|
|
||||||
#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf"
|
#define DEFAULT_MODEL_PATH "models/7B/ggml-model-f16.gguf"
|
||||||
|
|
||||||
struct common_adapter_lora_info {
|
|
||||||
std::string path;
|
|
||||||
float scale;
|
|
||||||
|
|
||||||
struct llama_adapter_lora * ptr;
|
|
||||||
};
|
|
||||||
|
|
||||||
using llama_tokens = std::vector<llama_token>;
|
|
||||||
|
|
||||||
// build info
|
// build info
|
||||||
extern int LLAMA_BUILD_NUMBER;
|
extern int LLAMA_BUILD_NUMBER;
|
||||||
extern const char * LLAMA_COMMIT;
|
extern char const * LLAMA_COMMIT;
|
||||||
extern const char * LLAMA_COMPILER;
|
extern char const * LLAMA_COMPILER;
|
||||||
extern const char * LLAMA_BUILD_TARGET;
|
extern char const * LLAMA_BUILD_TARGET;
|
||||||
|
|
||||||
struct common_control_vector_load_info;
|
struct llama_control_vector_load_info;
|
||||||
|
|
||||||
//
|
//
|
||||||
// CPU utils
|
// CPU utils
|
||||||
//
|
//
|
||||||
|
|
||||||
struct cpu_params {
|
|
||||||
int n_threads = -1;
|
|
||||||
bool cpumask[GGML_MAX_N_THREADS] = {false}; // CPU affinity mask.
|
|
||||||
bool mask_valid = false; // Default: any CPU
|
|
||||||
enum ggml_sched_priority priority = GGML_SCHED_PRIO_NORMAL; // Scheduling prio : (0 - normal, 1 - medium, 2 - high, 3 - realtime)
|
|
||||||
bool strict_cpu = false; // Use strict CPU placement
|
|
||||||
uint32_t poll = 50; // Polling (busywait) level (0 - no polling, 100 - mostly polling)
|
|
||||||
};
|
|
||||||
|
|
||||||
int32_t cpu_get_num_physical_cores();
|
int32_t cpu_get_num_physical_cores();
|
||||||
int32_t cpu_get_num_math();
|
int32_t cpu_get_num_math();
|
||||||
|
|
||||||
//
|
//
|
||||||
// Common params
|
// CLI argument parsing
|
||||||
//
|
//
|
||||||
|
|
||||||
enum llama_example {
|
|
||||||
LLAMA_EXAMPLE_COMMON,
|
|
||||||
LLAMA_EXAMPLE_SPECULATIVE,
|
|
||||||
LLAMA_EXAMPLE_MAIN,
|
|
||||||
LLAMA_EXAMPLE_INFILL,
|
|
||||||
LLAMA_EXAMPLE_EMBEDDING,
|
|
||||||
LLAMA_EXAMPLE_PERPLEXITY,
|
|
||||||
LLAMA_EXAMPLE_RETRIEVAL,
|
|
||||||
LLAMA_EXAMPLE_PASSKEY,
|
|
||||||
LLAMA_EXAMPLE_IMATRIX,
|
|
||||||
LLAMA_EXAMPLE_BENCH,
|
|
||||||
LLAMA_EXAMPLE_SERVER,
|
|
||||||
LLAMA_EXAMPLE_CVECTOR_GENERATOR,
|
|
||||||
LLAMA_EXAMPLE_EXPORT_LORA,
|
|
||||||
LLAMA_EXAMPLE_LLAVA,
|
|
||||||
LLAMA_EXAMPLE_LOOKUP,
|
|
||||||
LLAMA_EXAMPLE_PARALLEL,
|
|
||||||
LLAMA_EXAMPLE_TTS,
|
|
||||||
|
|
||||||
LLAMA_EXAMPLE_COUNT,
|
|
||||||
};
|
|
||||||
|
|
||||||
enum common_sampler_type {
|
|
||||||
COMMON_SAMPLER_TYPE_NONE = 0,
|
|
||||||
COMMON_SAMPLER_TYPE_DRY = 1,
|
|
||||||
COMMON_SAMPLER_TYPE_TOP_K = 2,
|
|
||||||
COMMON_SAMPLER_TYPE_TOP_P = 3,
|
|
||||||
COMMON_SAMPLER_TYPE_MIN_P = 4,
|
|
||||||
//COMMON_SAMPLER_TYPE_TFS_Z = 5,
|
|
||||||
COMMON_SAMPLER_TYPE_TYPICAL_P = 6,
|
|
||||||
COMMON_SAMPLER_TYPE_TEMPERATURE = 7,
|
|
||||||
COMMON_SAMPLER_TYPE_XTC = 8,
|
|
||||||
COMMON_SAMPLER_TYPE_INFILL = 9,
|
|
||||||
COMMON_SAMPLER_TYPE_PENALTIES = 10,
|
|
||||||
};
|
|
||||||
|
|
||||||
// dimensionality reduction methods, used by cvector-generator
|
// dimensionality reduction methods, used by cvector-generator
|
||||||
enum dimre_method {
|
enum dimre_method {
|
||||||
DIMRE_METHOD_PCA,
|
DIMRE_METHOD_PCA,
|
||||||
DIMRE_METHOD_MEAN,
|
DIMRE_METHOD_MEAN,
|
||||||
};
|
};
|
||||||
|
|
||||||
enum common_conversation_mode {
|
struct gpt_params {
|
||||||
COMMON_CONVERSATION_MODE_DISABLED = 0,
|
uint32_t seed = LLAMA_DEFAULT_SEED; // RNG seed
|
||||||
COMMON_CONVERSATION_MODE_ENABLED = 1,
|
|
||||||
COMMON_CONVERSATION_MODE_AUTO = 2,
|
|
||||||
};
|
|
||||||
|
|
||||||
struct common_grammar_trigger {
|
int32_t n_threads = cpu_get_num_math();
|
||||||
std::string word;
|
int32_t n_threads_draft = -1;
|
||||||
bool at_start;
|
int32_t n_threads_batch = -1; // number of threads to use for batch processing (-1 = use n_threads)
|
||||||
};
|
int32_t n_threads_batch_draft = -1;
|
||||||
|
|
||||||
// sampling parameters
|
|
||||||
struct common_params_sampling {
|
|
||||||
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampler
|
|
||||||
|
|
||||||
int32_t n_prev = 64; // number of previous tokens to remember
|
|
||||||
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
|
||||||
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
|
|
||||||
int32_t top_k = 40; // <= 0 to use vocab size
|
|
||||||
float top_p = 0.95f; // 1.0 = disabled
|
|
||||||
float min_p = 0.05f; // 0.0 = disabled
|
|
||||||
float xtc_probability = 0.00f; // 0.0 = disabled
|
|
||||||
float xtc_threshold = 0.10f; // > 0.5 disables XTC
|
|
||||||
float typ_p = 1.00f; // typical_p, 1.0 = disabled
|
|
||||||
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
|
|
||||||
float dynatemp_range = 0.00f; // 0.0 = disabled
|
|
||||||
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
|
|
||||||
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
|
||||||
float penalty_repeat = 1.00f; // 1.0 = disabled
|
|
||||||
float penalty_freq = 0.00f; // 0.0 = disabled
|
|
||||||
float penalty_present = 0.00f; // 0.0 = disabled
|
|
||||||
float dry_multiplier = 0.0f; // 0.0 = disabled; DRY repetition penalty for tokens extending repetition:
|
|
||||||
float dry_base = 1.75f; // 0.0 = disabled; multiplier * base ^ (length of sequence before token - allowed length)
|
|
||||||
int32_t dry_allowed_length = 2; // tokens extending repetitions beyond this receive penalty
|
|
||||||
int32_t dry_penalty_last_n = -1; // how many tokens to scan for repetitions (0 = disable penalty, -1 = context size)
|
|
||||||
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
|
||||||
float mirostat_tau = 5.00f; // target entropy
|
|
||||||
float mirostat_eta = 0.10f; // learning rate
|
|
||||||
bool ignore_eos = false;
|
|
||||||
bool no_perf = false; // disable performance metrics
|
|
||||||
bool timing_per_token = false;
|
|
||||||
|
|
||||||
std::vector<std::string> dry_sequence_breakers = {"\n", ":", "\"", "*"}; // default sequence breakers for DRY
|
|
||||||
|
|
||||||
|
|
||||||
std::vector<enum common_sampler_type> samplers = {
|
|
||||||
COMMON_SAMPLER_TYPE_PENALTIES,
|
|
||||||
COMMON_SAMPLER_TYPE_DRY,
|
|
||||||
COMMON_SAMPLER_TYPE_TOP_K,
|
|
||||||
COMMON_SAMPLER_TYPE_TYPICAL_P,
|
|
||||||
COMMON_SAMPLER_TYPE_TOP_P,
|
|
||||||
COMMON_SAMPLER_TYPE_MIN_P,
|
|
||||||
COMMON_SAMPLER_TYPE_XTC,
|
|
||||||
COMMON_SAMPLER_TYPE_TEMPERATURE,
|
|
||||||
};
|
|
||||||
|
|
||||||
std::string grammar; // optional BNF-like grammar to constrain sampling
|
|
||||||
bool grammar_lazy = false;
|
|
||||||
std::vector<common_grammar_trigger> grammar_trigger_words; // optional trigger words to trigger lazy grammar
|
|
||||||
std::vector<llama_token> grammar_trigger_tokens; // optional trigger tokens to trigger lazy grammar and print trigger special tokens.
|
|
||||||
std::set<llama_token> preserved_tokens;
|
|
||||||
|
|
||||||
std::vector<llama_logit_bias> logit_bias; // logit biases to apply
|
|
||||||
|
|
||||||
// print the parameters into a string
|
|
||||||
std::string print() const;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct common_params_speculative {
|
|
||||||
std::vector<ggml_backend_dev_t> devices; // devices to use for offloading
|
|
||||||
|
|
||||||
int32_t n_ctx = 0; // draft context size
|
|
||||||
int32_t n_max = 16; // maximum number of tokens to draft during speculative decoding
|
|
||||||
int32_t n_min = 5; // minimum number of draft tokens to use for speculative decoding
|
|
||||||
int32_t n_gpu_layers = -1; // number of layers to store in VRAM for the draft model (-1 - use default)
|
|
||||||
float p_split = 0.1f; // speculative decoding split probability
|
|
||||||
float p_min = 0.9f; // minimum speculative decoding probability (greedy)
|
|
||||||
|
|
||||||
struct cpu_params cpuparams;
|
|
||||||
struct cpu_params cpuparams_batch;
|
|
||||||
|
|
||||||
std::string hf_repo = ""; // HF repo // NOLINT
|
|
||||||
std::string hf_file = ""; // HF file // NOLINT
|
|
||||||
|
|
||||||
std::string model = ""; // draft model for speculative decoding // NOLINT
|
|
||||||
std::string model_url = ""; // model url to download // NOLINT
|
|
||||||
};
|
|
||||||
|
|
||||||
struct common_params_vocoder {
|
|
||||||
std::string hf_repo = ""; // HF repo // NOLINT
|
|
||||||
std::string hf_file = ""; // HF file // NOLINT
|
|
||||||
|
|
||||||
std::string model = ""; // model path // NOLINT
|
|
||||||
std::string model_url = ""; // model url to download // NOLINT
|
|
||||||
|
|
||||||
bool use_guide_tokens = false; // enable guide tokens to improve TTS accuracy // NOLINT
|
|
||||||
};
|
|
||||||
|
|
||||||
struct common_params {
|
|
||||||
int32_t n_predict = -1; // new tokens to predict
|
int32_t n_predict = -1; // new tokens to predict
|
||||||
int32_t n_ctx = 4096; // context size
|
int32_t n_ctx = 0; // context size
|
||||||
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
int32_t n_batch = 2048; // logical batch size for prompt processing (must be >=32 to use BLAS)
|
||||||
int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS)
|
int32_t n_ubatch = 512; // physical batch size for prompt processing (must be >=32 to use BLAS)
|
||||||
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
int32_t n_keep = 0; // number of tokens to keep from initial prompt
|
||||||
|
int32_t n_draft = 5; // number of tokens to draft during speculative decoding
|
||||||
int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited)
|
int32_t n_chunks = -1; // max number of chunks to process (-1 = unlimited)
|
||||||
int32_t n_parallel = 1; // number of parallel sequences to decode
|
int32_t n_parallel = 1; // number of parallel sequences to decode
|
||||||
int32_t n_sequences = 1; // number of sequences to decode
|
int32_t n_sequences = 1; // number of sequences to decode
|
||||||
|
float p_split = 0.1f; // speculative decoding split probability
|
||||||
|
int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default)
|
||||||
|
int32_t n_gpu_layers_draft = -1; // number of layers to store in VRAM for the draft model (-1 - use default)
|
||||||
|
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
|
||||||
|
float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs
|
||||||
int32_t grp_attn_n = 1; // group-attention factor
|
int32_t grp_attn_n = 1; // group-attention factor
|
||||||
int32_t grp_attn_w = 512; // group-attention width
|
int32_t grp_attn_w = 512; // group-attention width
|
||||||
int32_t n_print = -1; // print token count every n tokens (-1 = disabled)
|
int32_t n_print = -1; // print token count every n tokens (-1 = disabled)
|
||||||
|
@ -221,56 +89,46 @@ struct common_params {
|
||||||
float yarn_beta_fast = 32.0f; // YaRN low correction dim
|
float yarn_beta_fast = 32.0f; // YaRN low correction dim
|
||||||
float yarn_beta_slow = 1.0f; // YaRN high correction dim
|
float yarn_beta_slow = 1.0f; // YaRN high correction dim
|
||||||
int32_t yarn_orig_ctx = 0; // YaRN original context length
|
int32_t yarn_orig_ctx = 0; // YaRN original context length
|
||||||
float defrag_thold = 0.1f; // KV cache defragmentation threshold
|
float defrag_thold = -1.0f; // KV cache defragmentation threshold
|
||||||
|
|
||||||
// offload params
|
|
||||||
std::vector<ggml_backend_dev_t> devices; // devices to use for offloading
|
|
||||||
|
|
||||||
int32_t n_gpu_layers = -1; // number of layers to store in VRAM (-1 - use default)
|
|
||||||
int32_t main_gpu = 0; // the GPU that is used for scratch and small tensors
|
|
||||||
float tensor_split[128] = {0}; // how split tensors should be distributed across GPUs
|
|
||||||
|
|
||||||
enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs
|
|
||||||
|
|
||||||
struct cpu_params cpuparams;
|
|
||||||
struct cpu_params cpuparams_batch;
|
|
||||||
|
|
||||||
ggml_backend_sched_eval_callback cb_eval = nullptr;
|
ggml_backend_sched_eval_callback cb_eval = nullptr;
|
||||||
void * cb_eval_user_data = nullptr;
|
void * cb_eval_user_data = nullptr;
|
||||||
|
|
||||||
ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
|
ggml_numa_strategy numa = GGML_NUMA_STRATEGY_DISABLED;
|
||||||
|
|
||||||
|
enum llama_split_mode split_mode = LLAMA_SPLIT_MODE_LAYER; // how to split the model across GPUs
|
||||||
enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
|
enum llama_rope_scaling_type rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_UNSPECIFIED;
|
||||||
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
|
enum llama_pooling_type pooling_type = LLAMA_POOLING_TYPE_UNSPECIFIED; // pooling type for embeddings
|
||||||
enum llama_attention_type attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED; // attention type for embeddings
|
|
||||||
|
|
||||||
struct common_params_sampling sampling;
|
// // sampling parameters
|
||||||
struct common_params_speculative speculative;
|
struct llama_sampling_params sparams;
|
||||||
struct common_params_vocoder vocoder;
|
|
||||||
|
|
||||||
std::string model = ""; // model path // NOLINT
|
std::string model = ""; // model path
|
||||||
std::string model_alias = ""; // model alias // NOLINT
|
std::string model_draft = ""; // draft model for speculative decoding
|
||||||
std::string model_url = ""; // model url to download // NOLINT
|
std::string model_alias = "unknown"; // model alias
|
||||||
std::string hf_token = ""; // HF token // NOLINT
|
std::string model_url = ""; // model url to download
|
||||||
std::string hf_repo = ""; // HF repo // NOLINT
|
std::string hf_repo = ""; // HF repo
|
||||||
std::string hf_file = ""; // HF file // NOLINT
|
std::string hf_file = ""; // HF file
|
||||||
std::string prompt = ""; // NOLINT
|
std::string prompt = "";
|
||||||
std::string prompt_file = ""; // store the external prompt file name // NOLINT
|
std::string prompt_file = ""; // store the external prompt file name
|
||||||
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state // NOLINT
|
std::string path_prompt_cache = ""; // path to file for saving/loading prompt eval state
|
||||||
std::string input_prefix = ""; // string to prefix user inputs with // NOLINT
|
std::string input_prefix = ""; // string to prefix user inputs with
|
||||||
std::string input_suffix = ""; // string to suffix user inputs with // NOLINT
|
std::string input_suffix = ""; // string to suffix user inputs with
|
||||||
std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding // NOLINT
|
std::string logdir = ""; // directory in which to save YAML log files
|
||||||
std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding // NOLINT
|
std::string lookup_cache_static = ""; // path of static ngram cache file for lookup decoding
|
||||||
std::string logits_file = ""; // file for saving *all* logits // NOLINT
|
std::string lookup_cache_dynamic = ""; // path of dynamic ngram cache file for lookup decoding
|
||||||
|
std::string logits_file = ""; // file for saving *all* logits
|
||||||
|
std::string rpc_servers = ""; // comma separated list of RPC servers
|
||||||
|
|
||||||
std::vector<std::string> in_files; // all input files
|
std::vector<std::string> in_files; // all input files
|
||||||
std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
|
std::vector<std::string> antiprompt; // strings upon which more user input is prompted (a.k.a. reverse prompts)
|
||||||
std::vector<llama_model_kv_override> kv_overrides;
|
std::vector<llama_model_kv_override> kv_overrides;
|
||||||
|
|
||||||
bool lora_init_without_apply = false; // only load lora to memory, but do not apply it to ctx (user can manually apply lora later using llama_adapter_lora_apply)
|
// TODO: avoid tuple, use struct
|
||||||
std::vector<common_adapter_lora_info> lora_adapters; // lora adapter path with user defined scale
|
std::vector<std::tuple<std::string, float>> lora_adapter; // lora adapter path with user defined scale
|
||||||
|
std::string lora_base = ""; // base model path for the lora adapter
|
||||||
|
|
||||||
std::vector<common_control_vector_load_info> control_vectors; // control vector with user defined scale
|
std::vector<llama_control_vector_load_info> control_vectors; // control vector with user defined scale
|
||||||
|
|
||||||
int32_t verbosity = 0;
|
int32_t verbosity = 0;
|
||||||
int32_t control_vector_layer_start = -1; // layer range for control vector
|
int32_t control_vector_layer_start = -1; // layer range for control vector
|
||||||
|
@ -296,6 +154,7 @@ struct common_params {
|
||||||
bool special = false; // enable special token output
|
bool special = false; // enable special token output
|
||||||
bool interactive = false; // interactive mode
|
bool interactive = false; // interactive mode
|
||||||
bool interactive_first = false; // wait for user input immediately
|
bool interactive_first = false; // wait for user input immediately
|
||||||
|
bool conversation = false; // conversation mode (does not print special tokens and suffix/prefix)
|
||||||
bool prompt_cache_all = false; // save user input and generations to prompt cache
|
bool prompt_cache_all = false; // save user input and generations to prompt cache
|
||||||
bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it
|
bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it
|
||||||
|
|
||||||
|
@ -304,58 +163,51 @@ struct common_params {
|
||||||
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
|
bool simple_io = false; // improves compatibility with subprocesses and limited consoles
|
||||||
bool cont_batching = true; // insert new sequences for decoding on-the-fly
|
bool cont_batching = true; // insert new sequences for decoding on-the-fly
|
||||||
bool flash_attn = false; // flash attention
|
bool flash_attn = false; // flash attention
|
||||||
bool no_perf = false; // disable performance metrics
|
|
||||||
bool ctx_shift = true; // context shift on inifinite text generation
|
|
||||||
|
|
||||||
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
bool input_prefix_bos = false; // prefix BOS to user inputs, preceding input_prefix
|
||||||
|
bool ignore_eos = false; // ignore generated EOS tokens
|
||||||
bool logits_all = false; // return logits for all tokens in the batch
|
bool logits_all = false; // return logits for all tokens in the batch
|
||||||
bool use_mmap = true; // use mmap for faster loads
|
bool use_mmap = true; // use mmap for faster loads
|
||||||
bool use_mlock = false; // use mlock to keep model in memory
|
bool use_mlock = false; // use mlock to keep model in memory
|
||||||
bool verbose_prompt = false; // print prompt tokens before generation
|
bool verbose_prompt = false; // print prompt tokens before generation
|
||||||
bool display_prompt = true; // print prompt before generation
|
bool display_prompt = true; // print prompt before generation
|
||||||
|
bool infill = false; // use infill mode
|
||||||
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
|
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
|
||||||
bool no_kv_offload = false; // disable KV offloading
|
bool no_kv_offload = false; // disable KV offloading
|
||||||
bool warmup = true; // warmup run
|
bool warmup = true; // warmup run
|
||||||
bool check_tensors = false; // validate tensor data
|
bool check_tensors = false; // validate tensor data
|
||||||
|
|
||||||
ggml_type cache_type_k = GGML_TYPE_F16; // KV cache data type for the K
|
std::string cache_type_k = "f16"; // KV cache data type for the K
|
||||||
ggml_type cache_type_v = GGML_TYPE_F16; // KV cache data type for the V
|
std::string cache_type_v = "f16"; // KV cache data type for the V
|
||||||
|
|
||||||
common_conversation_mode conversation_mode = COMMON_CONVERSATION_MODE_AUTO;
|
|
||||||
|
|
||||||
// multimodal models (see examples/llava)
|
// multimodal models (see examples/llava)
|
||||||
std::string mmproj = ""; // path to multimodal projector // NOLINT
|
std::string mmproj = ""; // path to multimodal projector
|
||||||
std::vector<std::string> image; // path to image file(s)
|
std::vector<std::string> image; // path to image file(s)
|
||||||
|
|
||||||
// embedding
|
// embedding
|
||||||
bool embedding = false; // get only sentence embedding
|
bool embedding = false; // get only sentence embedding
|
||||||
int32_t embd_normalize = 2; // normalisation for embeddings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
|
int32_t embd_normalize = 2; // normalisation for embendings (-1=none, 0=max absolute int16, 1=taxicab, 2=euclidean, >2=p-norm)
|
||||||
std::string embd_out = ""; // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix
|
std::string embd_out = ""; // empty = default, "array" = [[],[]...], "json" = openai style, "json+" = same "json" + cosine similarity matrix
|
||||||
std::string embd_sep = "\n"; // separator of embeddings
|
std::string embd_sep = "\n"; // separator of embendings
|
||||||
bool reranking = false; // enable reranking support on server
|
|
||||||
|
|
||||||
// server params
|
// server params
|
||||||
int32_t port = 8080; // server listens on this network port
|
int32_t port = 8080; // server listens on this network port
|
||||||
int32_t timeout_read = 600; // http read timeout in seconds
|
int32_t timeout_read = 600; // http read timeout in seconds
|
||||||
int32_t timeout_write = timeout_read; // http write timeout in seconds
|
int32_t timeout_write = timeout_read; // http write timeout in seconds
|
||||||
int32_t n_threads_http = -1; // number of threads to process HTTP requests (TODO: support threadpool)
|
int32_t n_threads_http = -1; // number of threads to process HTTP requests
|
||||||
int32_t n_cache_reuse = 0; // min chunk size to reuse from the cache via KV shifting
|
|
||||||
|
|
||||||
std::string hostname = "127.0.0.1";
|
std::string hostname = "127.0.0.1";
|
||||||
std::string public_path = ""; // NOLINT
|
std::string public_path = "";
|
||||||
std::string chat_template = ""; // NOLINT
|
std::string chat_template = "";
|
||||||
bool use_jinja = false; // NOLINT
|
std::string system_prompt = "";
|
||||||
bool enable_chat_template = true;
|
bool enable_chat_template = true;
|
||||||
|
|
||||||
std::vector<std::string> api_keys;
|
std::vector<std::string> api_keys;
|
||||||
|
|
||||||
std::string ssl_file_key = ""; // NOLINT
|
std::string ssl_file_key = "";
|
||||||
std::string ssl_file_cert = ""; // NOLINT
|
std::string ssl_file_cert = "";
|
||||||
|
|
||||||
// "advanced" endpoints are disabled by default for better security
|
bool endpoint_slots = true;
|
||||||
bool webui = true;
|
|
||||||
bool endpoint_slots = false;
|
|
||||||
bool endpoint_props = false; // only control POST requests, not GET
|
|
||||||
bool endpoint_metrics = false;
|
bool endpoint_metrics = false;
|
||||||
|
|
||||||
bool log_json = false;
|
bool log_json = false;
|
||||||
|
@ -401,53 +253,28 @@ struct common_params {
|
||||||
std::string cvector_negative_file = "examples/cvector-generator/negative.txt";
|
std::string cvector_negative_file = "examples/cvector-generator/negative.txt";
|
||||||
|
|
||||||
bool spm_infill = false; // suffix/prefix/middle pattern for infill
|
bool spm_infill = false; // suffix/prefix/middle pattern for infill
|
||||||
|
|
||||||
std::string lora_outfile = "ggml-lora-merged-f16.gguf";
|
|
||||||
|
|
||||||
// batched-bench params
|
|
||||||
bool batched_bench_output_jsonl = false;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// call once at the start of a program if it uses libcommon
|
void gpt_params_handle_model_default(gpt_params & params);
|
||||||
// initializes the logging system and prints info about the build
|
|
||||||
void common_init();
|
|
||||||
|
|
||||||
std::string common_params_get_system_info(const common_params & params);
|
bool gpt_params_parse_ex (int argc, char ** argv, gpt_params & params);
|
||||||
|
bool gpt_params_parse (int argc, char ** argv, gpt_params & params);
|
||||||
|
bool gpt_params_find_arg (int argc, char ** argv, const std::string & arg, gpt_params & params, int & i, bool & invalid_param);
|
||||||
|
void gpt_params_print_usage(int argc, char ** argv, const gpt_params & params);
|
||||||
|
|
||||||
bool parse_cpu_range(const std::string & range, bool(&boolmask)[GGML_MAX_N_THREADS]);
|
std::string gpt_params_get_system_info(const gpt_params & params);
|
||||||
bool parse_cpu_mask(const std::string & mask, bool(&boolmask)[GGML_MAX_N_THREADS]);
|
|
||||||
void postprocess_cpu_params(cpu_params & cpuparams, const cpu_params * role_model = nullptr);
|
|
||||||
bool set_process_priority(enum ggml_sched_priority prio);
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// String utils
|
// String utils
|
||||||
//
|
//
|
||||||
|
|
||||||
#ifdef __GNUC__
|
std::vector<std::string> string_split(std::string input, char separator);
|
||||||
#ifdef __MINGW32__
|
|
||||||
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
|
||||||
#else
|
|
||||||
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#define LLAMA_COMMON_ATTRIBUTE_FORMAT(...)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
LLAMA_COMMON_ATTRIBUTE_FORMAT(1, 2)
|
|
||||||
std::string string_format(const char * fmt, ...);
|
|
||||||
|
|
||||||
std::string string_strip(const std::string & str);
|
std::string string_strip(const std::string & str);
|
||||||
std::string string_get_sortable_timestamp();
|
std::string string_get_sortable_timestamp();
|
||||||
|
|
||||||
std::string string_join(const std::vector<std::string> & values, const std::string & separator);
|
|
||||||
std::vector<std::string> string_split(const std::string & str, const std::string & delimiter);
|
|
||||||
std::string string_repeat(const std::string & str, size_t n);
|
|
||||||
|
|
||||||
void string_replace_all(std::string & s, const std::string & search, const std::string & replace);
|
|
||||||
|
|
||||||
template<class T>
|
template<class T>
|
||||||
static std::vector<T> string_split(const std::string & str, char delim) {
|
static std::vector<T> string_split(const std::string & str, char delim) {
|
||||||
static_assert(!std::is_same<T, std::string>::value, "Please use the specialized version for std::string");
|
|
||||||
std::vector<T> values;
|
std::vector<T> values;
|
||||||
std::istringstream str_stream(str);
|
std::istringstream str_stream(str);
|
||||||
std::string token;
|
std::string token;
|
||||||
|
@ -460,40 +287,9 @@ static std::vector<T> string_split(const std::string & str, char delim) {
|
||||||
return values;
|
return values;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<>
|
|
||||||
std::vector<std::string> string_split<std::string>(const std::string & input, char separator)
|
|
||||||
{
|
|
||||||
std::vector<std::string> parts;
|
|
||||||
size_t begin_pos = 0;
|
|
||||||
size_t separator_pos = input.find(separator);
|
|
||||||
while (separator_pos != std::string::npos) {
|
|
||||||
std::string part = input.substr(begin_pos, separator_pos - begin_pos);
|
|
||||||
parts.emplace_back(part);
|
|
||||||
begin_pos = separator_pos + 1;
|
|
||||||
separator_pos = input.find(separator, begin_pos);
|
|
||||||
}
|
|
||||||
parts.emplace_back(input.substr(begin_pos, separator_pos - begin_pos));
|
|
||||||
return parts;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool string_starts_with(const std::string & str,
|
|
||||||
const std::string & prefix) { // While we wait for C++20's std::string::starts_with...
|
|
||||||
return str.rfind(prefix, 0) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool string_ends_with(const std::string & str,
|
|
||||||
const std::string & suffix) { // While we wait for C++20's std::string::ends_with...
|
|
||||||
return str.size() >= suffix.size() && str.compare(str.size()-suffix.size(), suffix.size(), suffix) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
|
bool string_parse_kv_override(const char * data, std::vector<llama_model_kv_override> & overrides);
|
||||||
void string_process_escapes(std::string & input);
|
void string_process_escapes(std::string & input);
|
||||||
|
|
||||||
std::string string_from(bool value);
|
|
||||||
std::string string_from(const std::vector<int> & values);
|
|
||||||
std::string string_from(const struct llama_context * ctx, const std::vector<llama_token> & tokens);
|
|
||||||
std::string string_from(const struct llama_context * ctx, const struct llama_batch & batch);
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Filesystem utils
|
// Filesystem utils
|
||||||
//
|
//
|
||||||
|
@ -508,193 +304,133 @@ std::string fs_get_cache_file(const std::string & filename);
|
||||||
// Model utils
|
// Model utils
|
||||||
//
|
//
|
||||||
|
|
||||||
// note: defines object's lifetime
|
// TODO: avoid tuplue, use struct
|
||||||
struct common_init_result {
|
std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(gpt_params & params);
|
||||||
llama_model_ptr model;
|
|
||||||
llama_context_ptr context;
|
|
||||||
|
|
||||||
std::vector<llama_adapter_lora_ptr> lora;
|
struct llama_model_params llama_model_params_from_gpt_params (const gpt_params & params);
|
||||||
};
|
struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params);
|
||||||
|
|
||||||
struct common_init_result common_init_from_params(common_params & params);
|
struct llama_model * llama_load_model_from_url(const char * model_url, const char * path_model, const struct llama_model_params & params);
|
||||||
|
struct llama_model * llama_load_model_from_hf(const char * repo, const char * file, const char * path_model, const struct llama_model_params & params);
|
||||||
|
|
||||||
struct llama_model_params common_model_params_to_llama ( common_params & params);
|
|
||||||
struct llama_context_params common_context_params_to_llama(const common_params & params);
|
|
||||||
struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params);
|
|
||||||
|
|
||||||
struct llama_model * common_load_model_from_url(
|
|
||||||
const std::string & model_url,
|
|
||||||
const std::string & local_path,
|
|
||||||
const std::string & hf_token,
|
|
||||||
const struct llama_model_params & params);
|
|
||||||
|
|
||||||
struct llama_model * common_load_model_from_hf(
|
|
||||||
const std::string & repo,
|
|
||||||
const std::string & remote_path,
|
|
||||||
const std::string & local_path,
|
|
||||||
const std::string & hf_token,
|
|
||||||
const struct llama_model_params & params);
|
|
||||||
|
|
||||||
std::pair<std::string, std::string> common_get_hf_file(
|
|
||||||
const std::string & hf_repo_with_tag,
|
|
||||||
const std::string & hf_token);
|
|
||||||
|
|
||||||
// clear LoRA adapters from context, then apply new list of adapters
|
|
||||||
void common_set_adapter_lora(struct llama_context * ctx, std::vector<common_adapter_lora_info> & lora);
|
|
||||||
|
|
||||||
//
|
|
||||||
// Batch utils
|
// Batch utils
|
||||||
//
|
|
||||||
|
|
||||||
void common_batch_clear(struct llama_batch & batch);
|
void llama_batch_clear(struct llama_batch & batch);
|
||||||
|
|
||||||
void common_batch_add(
|
void llama_batch_add(
|
||||||
struct llama_batch & batch,
|
struct llama_batch & batch,
|
||||||
llama_token id,
|
llama_token id,
|
||||||
llama_pos pos,
|
llama_pos pos,
|
||||||
const std::vector<llama_seq_id> & seq_ids,
|
const std::vector<llama_seq_id> & seq_ids,
|
||||||
bool logits);
|
bool logits);
|
||||||
|
|
||||||
//
|
|
||||||
// Token utils
|
|
||||||
//
|
|
||||||
|
|
||||||
// longest common prefix
|
|
||||||
size_t common_lcp(const llama_tokens & a, const llama_tokens & b);
|
|
||||||
|
|
||||||
// longet common subsequence
|
|
||||||
size_t common_lcs(const llama_tokens & a, const llama_tokens & b);
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Vocab utils
|
// Vocab utils
|
||||||
//
|
//
|
||||||
|
|
||||||
// tokenizes a string into a vector of tokens
|
// tokenizes a string into a vector of tokens
|
||||||
// should work similar to Python's `tokenizer.encode`
|
// should work similar to Python's `tokenizer.encode`
|
||||||
std::vector<llama_token> common_tokenize(
|
std::vector<llama_token> llama_tokenize(
|
||||||
const struct llama_context * ctx,
|
const struct llama_context * ctx,
|
||||||
const std::string & text,
|
const std::string & text,
|
||||||
bool add_special,
|
bool add_special,
|
||||||
bool parse_special = false);
|
bool parse_special = false);
|
||||||
|
|
||||||
std::vector<llama_token> common_tokenize(
|
std::vector<llama_token> llama_tokenize(
|
||||||
const struct llama_vocab * vocab,
|
const struct llama_model * model,
|
||||||
const std::string & text,
|
const std::string & text,
|
||||||
bool add_special,
|
bool add_special,
|
||||||
bool parse_special = false);
|
bool parse_special = false);
|
||||||
|
|
||||||
// tokenizes a token into a piece, optionally renders special/control tokens
|
// tokenizes a token into a piece, optionally renders special/control tokens
|
||||||
// should work similar to Python's `tokenizer.id_to_piece`
|
// should work similar to Python's `tokenizer.id_to_piece`
|
||||||
std::string common_token_to_piece(
|
std::string llama_token_to_piece(
|
||||||
const struct llama_context * ctx,
|
const struct llama_context * ctx,
|
||||||
llama_token token,
|
llama_token token,
|
||||||
bool special = true);
|
bool special = true);
|
||||||
|
|
||||||
std::string common_token_to_piece(
|
// TODO: these should be moved in llama.h C-style API under single `llama_detokenize` function
|
||||||
const struct llama_vocab * vocab,
|
// that takes into account the tokenizer type and decides how to handle the leading space
|
||||||
llama_token token,
|
//
|
||||||
bool special = true);
|
// detokenizes a vector of tokens into a string
|
||||||
|
// should work similar to Python's `tokenizer.decode`
|
||||||
|
// removes the leading space from the first non-BOS token
|
||||||
|
std::string llama_detokenize_spm(
|
||||||
|
llama_context * ctx,
|
||||||
|
const std::vector<llama_token> & tokens);
|
||||||
|
|
||||||
// detokenizes a vector of tokens into a string
|
// detokenizes a vector of tokens into a string
|
||||||
// should work similar to Python's `tokenizer.decode`
|
// should work similar to Python's `tokenizer.decode`
|
||||||
// optionally renders special/control tokens
|
std::string llama_detokenize_bpe(
|
||||||
std::string common_detokenize(
|
llama_context * ctx,
|
||||||
const struct llama_context * ctx,
|
const std::vector<llama_token> & tokens);
|
||||||
const std::vector<llama_token> & tokens,
|
|
||||||
bool special = true);
|
|
||||||
|
|
||||||
std::string common_detokenize(
|
// Uses the value from the model metadata if possible, otherwise
|
||||||
const struct llama_vocab * vocab,
|
// defaults to true when model type is SPM, otherwise false.
|
||||||
const std::vector<llama_token> & tokens,
|
bool llama_should_add_bos_token(const llama_model * model);
|
||||||
bool special = true);
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// Chat template utils
|
// Chat template utils
|
||||||
//
|
//
|
||||||
|
|
||||||
struct common_tool_call {
|
|
||||||
std::string name;
|
|
||||||
std::string arguments;
|
|
||||||
std::string id;
|
|
||||||
};
|
|
||||||
|
|
||||||
// same with llama_chat_message, but uses std::string
|
// same with llama_chat_message, but uses std::string
|
||||||
struct common_chat_msg {
|
struct llama_chat_msg {
|
||||||
std::string role;
|
std::string role;
|
||||||
std::string content;
|
std::string content;
|
||||||
std::vector<common_tool_call> tool_calls;
|
|
||||||
std::string tool_plan = "";
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
|
||||||
bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
|
bool llama_chat_verify_template(const std::string & tmpl);
|
||||||
|
|
||||||
namespace minja {
|
|
||||||
class chat_template;
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef minja::chat_template common_chat_template;
|
|
||||||
|
|
||||||
struct common_chat_templates {
|
|
||||||
bool has_explicit_template; // Model had builtin template or template overridde was specified.
|
|
||||||
std::unique_ptr<common_chat_template> template_default; // always set (defaults to chatml)
|
|
||||||
std::unique_ptr<common_chat_template> template_tool_use;
|
|
||||||
};
|
|
||||||
|
|
||||||
// CPP wrapper for llama_chat_apply_template
|
// CPP wrapper for llama_chat_apply_template
|
||||||
// If the built-in template is not supported, we default to chatml
|
// If the built-in template is not supported, we default to chatml
|
||||||
// If the custom "tmpl" is not supported, we throw an error
|
// If the custom "tmpl" is not supported, we throw an error
|
||||||
std::string common_chat_apply_template(
|
std::string llama_chat_apply_template(const struct llama_model * model,
|
||||||
const common_chat_template & tmpl,
|
const std::string & tmpl,
|
||||||
const std::vector<common_chat_msg> & chat,
|
const std::vector<llama_chat_msg> & chat,
|
||||||
bool add_ass,
|
bool add_ass);
|
||||||
bool use_jinja);
|
|
||||||
|
|
||||||
// Format single message, while taking into account the position of that message in chat history
|
// Format single message, while taking into account the position of that message in chat history
|
||||||
std::string common_chat_format_single(
|
std::string llama_chat_format_single(const struct llama_model * model,
|
||||||
const common_chat_template & tmpl,
|
const std::string & tmpl,
|
||||||
const std::vector<common_chat_msg> & past_msg,
|
const std::vector<llama_chat_msg> & past_msg,
|
||||||
const common_chat_msg & new_msg,
|
const llama_chat_msg & new_msg,
|
||||||
bool add_ass,
|
bool add_ass);
|
||||||
bool use_jinja);
|
|
||||||
|
|
||||||
// Returns an example of formatted chat
|
// Returns an example of formatted chat
|
||||||
std::string common_chat_format_example(
|
std::string llama_chat_format_example(const struct llama_model * model,
|
||||||
const common_chat_template & tmpl, bool use_jinja);
|
const std::string & tmpl);
|
||||||
|
|
||||||
common_chat_templates common_chat_templates_from_model(const struct llama_model * model, const std::string & chat_template_override);
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// KV cache utils
|
// KV cache utils
|
||||||
//
|
//
|
||||||
|
|
||||||
// Dump the KV cache view with the number of sequences per cell.
|
// Dump the KV cache view with the number of sequences per cell.
|
||||||
void common_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size = 80);
|
void llama_kv_cache_dump_view(const llama_kv_cache_view & view, int row_size = 80);
|
||||||
|
|
||||||
// Dump the KV cache view showing individual sequences in each cell (long output).
|
// Dump the KV cache view showing individual sequences in each cell (long output).
|
||||||
void common_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
|
void llama_kv_cache_dump_view_seqs(const llama_kv_cache_view & view, int row_size = 40);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Embedding utils
|
// Embedding utils
|
||||||
//
|
//
|
||||||
|
|
||||||
// TODO: repace embd_norm with an enum
|
void llama_embd_normalize(const float * inp, float * out, int n, int embd_norm = 2);
|
||||||
void common_embd_normalize(const float * inp, float * out, int n, int embd_norm);
|
|
||||||
|
|
||||||
float common_embd_similarity_cos(const float * embd1, const float * embd2, int n);
|
float llama_embd_similarity_cos(const float * embd1, const float * embd2, int n);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Control vector utils
|
// Control vector utils
|
||||||
//
|
//
|
||||||
|
|
||||||
struct common_control_vector_data {
|
struct llama_control_vector_data {
|
||||||
int n_embd;
|
int n_embd;
|
||||||
|
|
||||||
// stores data for layers [1, n_layer] where n_layer = data.size() / n_embd
|
// stores data for layers [1, n_layer] where n_layer = data.size() / n_embd
|
||||||
std::vector<float> data;
|
std::vector<float> data;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct common_control_vector_load_info {
|
struct llama_control_vector_load_info {
|
||||||
float strength;
|
float strength;
|
||||||
|
|
||||||
std::string fname;
|
std::string fname;
|
||||||
|
@ -702,16 +438,24 @@ struct common_control_vector_load_info {
|
||||||
|
|
||||||
// Load control vectors, scale each by strength, and add them together.
|
// Load control vectors, scale each by strength, and add them together.
|
||||||
// On error, returns {-1, empty}
|
// On error, returns {-1, empty}
|
||||||
common_control_vector_data common_control_vector_load(const std::vector<common_control_vector_load_info> & load_infos);
|
llama_control_vector_data llama_control_vector_load(const std::vector<llama_control_vector_load_info> & load_infos);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Split utils
|
// Split utils
|
||||||
//
|
//
|
||||||
|
|
||||||
namespace {
|
static const char * const LLM_KV_SPLIT_NO = "split.no";
|
||||||
|
static const char * const LLM_KV_SPLIT_COUNT = "split.count";
|
||||||
|
static const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
|
||||||
|
|
||||||
const char * const LLM_KV_SPLIT_NO = "split.no";
|
//
|
||||||
const char * const LLM_KV_SPLIT_COUNT = "split.count";
|
// YAML utils
|
||||||
const char * const LLM_KV_SPLIT_TENSORS_COUNT = "split.tensors.count";
|
//
|
||||||
|
|
||||||
}
|
void yaml_dump_vector_float (FILE * stream, const char * prop_name, const std::vector<float> & data);
|
||||||
|
void yaml_dump_vector_int (FILE * stream, const char * prop_name, const std::vector<int> & data);
|
||||||
|
void yaml_dump_string_multiline(FILE * stream, const char * prop_name, const char * data);
|
||||||
|
|
||||||
|
void yaml_dump_non_result_info(
|
||||||
|
FILE * stream, const gpt_params & params, const llama_context * lctx,
|
||||||
|
const std::string & timestamp, const std::vector<int> & prompt_tokens, const char * model_desc);
|
||||||
|
|
|
@ -94,9 +94,6 @@ namespace console {
|
||||||
simple_io = true;
|
simple_io = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (simple_io) {
|
|
||||||
_setmode(_fileno(stdin), _O_U8TEXT);
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
// POSIX-specific console initialization
|
// POSIX-specific console initialization
|
||||||
if (!simple_io) {
|
if (!simple_io) {
|
||||||
|
|
536
common/grammar-parser.cpp
Normal file
536
common/grammar-parser.cpp
Normal file
|
@ -0,0 +1,536 @@
|
||||||
|
#include "grammar-parser.h"
|
||||||
|
#include <cstdint>
|
||||||
|
#include <cwchar>
|
||||||
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <exception>
|
||||||
|
|
||||||
|
namespace grammar_parser {
|
||||||
|
// NOTE: assumes valid utf8 (but checks for overrun)
|
||||||
|
// copied from llama.cpp
|
||||||
|
static std::pair<uint32_t, const char *> decode_utf8(const char * src) {
|
||||||
|
static const int lookup[] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 3, 4 };
|
||||||
|
uint8_t first_byte = static_cast<uint8_t>(*src);
|
||||||
|
uint8_t highbits = first_byte >> 4;
|
||||||
|
int len = lookup[highbits];
|
||||||
|
uint8_t mask = (1 << (8 - len)) - 1;
|
||||||
|
uint32_t value = first_byte & mask;
|
||||||
|
const char * end = src + len; // may overrun!
|
||||||
|
const char * pos = src + 1;
|
||||||
|
for ( ; pos < end && *pos; pos++) {
|
||||||
|
value = (value << 6) + (static_cast<uint8_t>(*pos) & 0x3F);
|
||||||
|
}
|
||||||
|
return std::make_pair(value, pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t get_symbol_id(parse_state & state, const char * src, size_t len) {
|
||||||
|
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
|
||||||
|
auto result = state.symbol_ids.emplace(std::string(src, len), next_id);
|
||||||
|
return result.first->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint32_t generate_symbol_id(parse_state & state, const std::string & base_name) {
|
||||||
|
uint32_t next_id = static_cast<uint32_t>(state.symbol_ids.size());
|
||||||
|
state.symbol_ids[base_name + '_' + std::to_string(next_id)] = next_id;
|
||||||
|
return next_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void add_rule(
|
||||||
|
parse_state & state,
|
||||||
|
uint32_t rule_id,
|
||||||
|
const std::vector<llama_grammar_element> & rule) {
|
||||||
|
if (state.rules.size() <= rule_id) {
|
||||||
|
state.rules.resize(rule_id + 1);
|
||||||
|
}
|
||||||
|
state.rules[rule_id] = rule;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool is_digit_char(char c) {
|
||||||
|
return '0' <= c && c <= '9';
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool is_word_char(char c) {
|
||||||
|
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '-' || is_digit_char(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::pair<uint32_t, const char *> parse_hex(const char * src, int size) {
|
||||||
|
const char * pos = src;
|
||||||
|
const char * end = src + size;
|
||||||
|
uint32_t value = 0;
|
||||||
|
for ( ; pos < end && *pos; pos++) {
|
||||||
|
value <<= 4;
|
||||||
|
char c = *pos;
|
||||||
|
if ('a' <= c && c <= 'f') {
|
||||||
|
value += c - 'a' + 10;
|
||||||
|
} else if ('A' <= c && c <= 'F') {
|
||||||
|
value += c - 'A' + 10;
|
||||||
|
} else if ('0' <= c && c <= '9') {
|
||||||
|
value += c - '0';
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (pos != end) {
|
||||||
|
throw std::runtime_error("expecting " + std::to_string(size) + " hex chars at " + src);
|
||||||
|
}
|
||||||
|
return std::make_pair(value, pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char * parse_space(const char * src, bool newline_ok) {
|
||||||
|
const char * pos = src;
|
||||||
|
while (*pos == ' ' || *pos == '\t' || *pos == '#' ||
|
||||||
|
(newline_ok && (*pos == '\r' || *pos == '\n'))) {
|
||||||
|
if (*pos == '#') {
|
||||||
|
while (*pos && *pos != '\r' && *pos != '\n') {
|
||||||
|
pos++;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pos++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char * parse_name(const char * src) {
|
||||||
|
const char * pos = src;
|
||||||
|
while (is_word_char(*pos)) {
|
||||||
|
pos++;
|
||||||
|
}
|
||||||
|
if (pos == src) {
|
||||||
|
throw std::runtime_error(std::string("expecting name at ") + src);
|
||||||
|
}
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char * parse_int(const char * src) {
|
||||||
|
const char * pos = src;
|
||||||
|
while (is_digit_char(*pos)) {
|
||||||
|
pos++;
|
||||||
|
}
|
||||||
|
if (pos == src) {
|
||||||
|
throw std::runtime_error(std::string("expecting integer at ") + src);
|
||||||
|
}
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::pair<uint32_t, const char *> parse_char(const char * src) {
|
||||||
|
if (*src == '\\') {
|
||||||
|
switch (src[1]) {
|
||||||
|
case 'x': return parse_hex(src + 2, 2);
|
||||||
|
case 'u': return parse_hex(src + 2, 4);
|
||||||
|
case 'U': return parse_hex(src + 2, 8);
|
||||||
|
case 't': return std::make_pair('\t', src + 2);
|
||||||
|
case 'r': return std::make_pair('\r', src + 2);
|
||||||
|
case 'n': return std::make_pair('\n', src + 2);
|
||||||
|
case '\\':
|
||||||
|
case '"':
|
||||||
|
case '[':
|
||||||
|
case ']':
|
||||||
|
return std::make_pair(src[1], src + 2);
|
||||||
|
default:
|
||||||
|
throw std::runtime_error(std::string("unknown escape at ") + src);
|
||||||
|
}
|
||||||
|
} else if (*src) {
|
||||||
|
return decode_utf8(src);
|
||||||
|
}
|
||||||
|
throw std::runtime_error("unexpected end of input");
|
||||||
|
}
|
||||||
|
|
||||||
|
const char * parse_alternates(
|
||||||
|
parse_state & state,
|
||||||
|
const char * src,
|
||||||
|
const std::string & rule_name,
|
||||||
|
uint32_t rule_id,
|
||||||
|
bool is_nested);
|
||||||
|
|
||||||
|
static const char * parse_sequence(
|
||||||
|
parse_state & state,
|
||||||
|
const char * src,
|
||||||
|
const std::string & rule_name,
|
||||||
|
std::vector<llama_grammar_element> & out_elements,
|
||||||
|
bool is_nested) {
|
||||||
|
size_t last_sym_start = out_elements.size();
|
||||||
|
const char * pos = src;
|
||||||
|
|
||||||
|
auto handle_repetitions = [&](int min_times, int max_times) {
|
||||||
|
|
||||||
|
if (last_sym_start == out_elements.size()) {
|
||||||
|
throw std::runtime_error(std::string("expecting preceding item to */+/?/{ at ") + pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
// apply transformation to previous symbol (last_sym_start to end) according to
|
||||||
|
// the following rewrite rules:
|
||||||
|
// S{m,n} --> S S S (m times) S'(n-m)
|
||||||
|
// S'(x) ::= S S'(x-1) |
|
||||||
|
// (... n-m definitions of these S' rules ...)
|
||||||
|
// S'(1) ::= S |
|
||||||
|
// S{m,} --> S S S (m times) S'
|
||||||
|
// S' ::= S S' |
|
||||||
|
// S* --> S{0,}
|
||||||
|
// --> S' ::= S S' |
|
||||||
|
// S+ --> S{1,}
|
||||||
|
// --> S S'
|
||||||
|
// S' ::= S S' |
|
||||||
|
// S? --> S{0,1}
|
||||||
|
// --> S'
|
||||||
|
// S' ::= S |
|
||||||
|
|
||||||
|
std::vector<llama_grammar_element> previous_elements(out_elements.begin() + last_sym_start, out_elements.end());
|
||||||
|
if (min_times == 0) {
|
||||||
|
out_elements.resize(last_sym_start);
|
||||||
|
} else {
|
||||||
|
// Repeat the previous elements (min_times - 1) times
|
||||||
|
for (int i = 1; i < min_times; i++) {
|
||||||
|
out_elements.insert(out_elements.end(), previous_elements.begin(), previous_elements.end());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t last_rec_rule_id = 0;
|
||||||
|
auto n_opt = max_times < 0 ? 1 : max_times - min_times;
|
||||||
|
|
||||||
|
std::vector<llama_grammar_element> rec_rule(previous_elements);
|
||||||
|
for (int i = 0; i < n_opt; i++) {
|
||||||
|
rec_rule.resize(previous_elements.size());
|
||||||
|
uint32_t rec_rule_id = generate_symbol_id(state, rule_name);
|
||||||
|
if (i > 0 || max_times < 0) {
|
||||||
|
rec_rule.push_back({LLAMA_GRETYPE_RULE_REF, max_times < 0 ? rec_rule_id : last_rec_rule_id});
|
||||||
|
}
|
||||||
|
rec_rule.push_back({LLAMA_GRETYPE_ALT, 0});
|
||||||
|
rec_rule.push_back({LLAMA_GRETYPE_END, 0});
|
||||||
|
add_rule(state, rec_rule_id, rec_rule);
|
||||||
|
last_rec_rule_id = rec_rule_id;
|
||||||
|
}
|
||||||
|
if (n_opt > 0) {
|
||||||
|
out_elements.push_back({LLAMA_GRETYPE_RULE_REF, last_rec_rule_id});
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
while (*pos) {
|
||||||
|
if (*pos == '"') { // literal string
|
||||||
|
pos++;
|
||||||
|
last_sym_start = out_elements.size();
|
||||||
|
while (*pos != '"') {
|
||||||
|
if (!*pos) {
|
||||||
|
throw std::runtime_error("unexpected end of input");
|
||||||
|
}
|
||||||
|
auto char_pair = parse_char(pos);
|
||||||
|
pos = char_pair.second;
|
||||||
|
out_elements.push_back({LLAMA_GRETYPE_CHAR, char_pair.first});
|
||||||
|
}
|
||||||
|
pos = parse_space(pos + 1, is_nested);
|
||||||
|
} else if (*pos == '[') { // char range(s)
|
||||||
|
pos++;
|
||||||
|
enum llama_gretype start_type = LLAMA_GRETYPE_CHAR;
|
||||||
|
if (*pos == '^') {
|
||||||
|
pos++;
|
||||||
|
start_type = LLAMA_GRETYPE_CHAR_NOT;
|
||||||
|
}
|
||||||
|
last_sym_start = out_elements.size();
|
||||||
|
while (*pos != ']') {
|
||||||
|
if (!*pos) {
|
||||||
|
throw std::runtime_error("unexpected end of input");
|
||||||
|
}
|
||||||
|
auto char_pair = parse_char(pos);
|
||||||
|
pos = char_pair.second;
|
||||||
|
enum llama_gretype type = last_sym_start < out_elements.size()
|
||||||
|
? LLAMA_GRETYPE_CHAR_ALT
|
||||||
|
: start_type;
|
||||||
|
|
||||||
|
out_elements.push_back({type, char_pair.first});
|
||||||
|
if (pos[0] == '-' && pos[1] != ']') {
|
||||||
|
if (!pos[1]) {
|
||||||
|
throw std::runtime_error("unexpected end of input");
|
||||||
|
}
|
||||||
|
auto endchar_pair = parse_char(pos + 1);
|
||||||
|
pos = endchar_pair.second;
|
||||||
|
out_elements.push_back({LLAMA_GRETYPE_CHAR_RNG_UPPER, endchar_pair.first});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pos = parse_space(pos + 1, is_nested);
|
||||||
|
} else if (is_word_char(*pos)) { // rule reference
|
||||||
|
const char * name_end = parse_name(pos);
|
||||||
|
uint32_t ref_rule_id = get_symbol_id(state, pos, name_end - pos);
|
||||||
|
pos = parse_space(name_end, is_nested);
|
||||||
|
last_sym_start = out_elements.size();
|
||||||
|
out_elements.push_back({LLAMA_GRETYPE_RULE_REF, ref_rule_id});
|
||||||
|
} else if (*pos == '(') { // grouping
|
||||||
|
// parse nested alternates into synthesized rule
|
||||||
|
pos = parse_space(pos + 1, true);
|
||||||
|
uint32_t sub_rule_id = generate_symbol_id(state, rule_name);
|
||||||
|
pos = parse_alternates(state, pos, rule_name, sub_rule_id, true);
|
||||||
|
last_sym_start = out_elements.size();
|
||||||
|
// output reference to synthesized rule
|
||||||
|
out_elements.push_back({LLAMA_GRETYPE_RULE_REF, sub_rule_id});
|
||||||
|
if (*pos != ')') {
|
||||||
|
throw std::runtime_error(std::string("expecting ')' at ") + pos);
|
||||||
|
}
|
||||||
|
pos = parse_space(pos + 1, is_nested);
|
||||||
|
} else if (*pos == '.') { // any char
|
||||||
|
last_sym_start = out_elements.size();
|
||||||
|
out_elements.push_back({LLAMA_GRETYPE_CHAR_ANY, 0});
|
||||||
|
pos = parse_space(pos + 1, is_nested);
|
||||||
|
} else if (*pos == '*') {
|
||||||
|
pos = parse_space(pos + 1, is_nested);
|
||||||
|
handle_repetitions(0, -1);
|
||||||
|
} else if (*pos == '+') {
|
||||||
|
pos = parse_space(pos + 1, is_nested);
|
||||||
|
handle_repetitions(1, -1);
|
||||||
|
} else if (*pos == '?') {
|
||||||
|
pos = parse_space(pos + 1, is_nested);
|
||||||
|
handle_repetitions(0, 1);
|
||||||
|
} else if (*pos == '{') {
|
||||||
|
pos = parse_space(pos + 1, is_nested);
|
||||||
|
|
||||||
|
if (!is_digit_char(*pos)) {
|
||||||
|
throw std::runtime_error(std::string("expecting an int at ") + pos);
|
||||||
|
}
|
||||||
|
const char * int_end = parse_int(pos);
|
||||||
|
int min_times = std::stoul(std::string(pos, int_end - pos));
|
||||||
|
pos = parse_space(int_end, is_nested);
|
||||||
|
|
||||||
|
int max_times = -1;
|
||||||
|
|
||||||
|
if (*pos == '}') {
|
||||||
|
max_times = min_times;
|
||||||
|
pos = parse_space(pos + 1, is_nested);
|
||||||
|
} else if (*pos == ',') {
|
||||||
|
pos = parse_space(pos + 1, is_nested);
|
||||||
|
|
||||||
|
if (is_digit_char(*pos)) {
|
||||||
|
const char * int_end = parse_int(pos);
|
||||||
|
max_times = std::stoul(std::string(pos, int_end - pos));
|
||||||
|
pos = parse_space(int_end, is_nested);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*pos != '}') {
|
||||||
|
throw std::runtime_error(std::string("expecting '}' at ") + pos);
|
||||||
|
}
|
||||||
|
pos = parse_space(pos + 1, is_nested);
|
||||||
|
} else {
|
||||||
|
throw std::runtime_error(std::string("expecting ',' at ") + pos);
|
||||||
|
}
|
||||||
|
handle_repetitions(min_times, max_times);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
const char * parse_alternates(
|
||||||
|
parse_state & state,
|
||||||
|
const char * src,
|
||||||
|
const std::string & rule_name,
|
||||||
|
uint32_t rule_id,
|
||||||
|
bool is_nested) {
|
||||||
|
std::vector<llama_grammar_element> rule;
|
||||||
|
const char * pos = parse_sequence(state, src, rule_name, rule, is_nested);
|
||||||
|
while (*pos == '|') {
|
||||||
|
rule.push_back({LLAMA_GRETYPE_ALT, 0});
|
||||||
|
pos = parse_space(pos + 1, true);
|
||||||
|
pos = parse_sequence(state, pos, rule_name, rule, is_nested);
|
||||||
|
}
|
||||||
|
rule.push_back({LLAMA_GRETYPE_END, 0});
|
||||||
|
add_rule(state, rule_id, rule);
|
||||||
|
return pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
static const char * parse_rule(parse_state & state, const char * src) {
|
||||||
|
const char * name_end = parse_name(src);
|
||||||
|
const char * pos = parse_space(name_end, false);
|
||||||
|
size_t name_len = name_end - src;
|
||||||
|
uint32_t rule_id = get_symbol_id(state, src, name_len);
|
||||||
|
const std::string name(src, name_len);
|
||||||
|
|
||||||
|
if (!(pos[0] == ':' && pos[1] == ':' && pos[2] == '=')) {
|
||||||
|
throw std::runtime_error(std::string("expecting ::= at ") + pos);
|
||||||
|
}
|
||||||
|
pos = parse_space(pos + 3, true);
|
||||||
|
|
||||||
|
pos = parse_alternates(state, pos, name, rule_id, false);
|
||||||
|
|
||||||
|
if (*pos == '\r') {
|
||||||
|
pos += pos[1] == '\n' ? 2 : 1;
|
||||||
|
} else if (*pos == '\n') {
|
||||||
|
pos++;
|
||||||
|
} else if (*pos) {
|
||||||
|
throw std::runtime_error(std::string("expecting newline or end at ") + pos);
|
||||||
|
}
|
||||||
|
return parse_space(pos, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
parse_state parse(const char * src) {
|
||||||
|
try {
|
||||||
|
parse_state state;
|
||||||
|
const char * pos = parse_space(src, true);
|
||||||
|
while (*pos) {
|
||||||
|
pos = parse_rule(state, pos);
|
||||||
|
}
|
||||||
|
// Validate the state to ensure that all rules are defined
|
||||||
|
for (const auto & rule : state.rules) {
|
||||||
|
for (const auto & elem : rule) {
|
||||||
|
if (elem.type == LLAMA_GRETYPE_RULE_REF) {
|
||||||
|
// Ensure that the rule at that location exists
|
||||||
|
if (elem.value >= state.rules.size() || state.rules[elem.value].empty()) {
|
||||||
|
// Get the name of the rule that is missing
|
||||||
|
for (const auto & kv : state.symbol_ids) {
|
||||||
|
if (kv.second == elem.value) {
|
||||||
|
throw std::runtime_error("Undefined rule identifier '" + kv.first + "'");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return state;
|
||||||
|
} catch (const std::exception & err) {
|
||||||
|
fprintf(stderr, "%s: error parsing grammar: %s\n", __func__, err.what());
|
||||||
|
return parse_state();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void print_grammar_char(FILE * file, uint32_t c) {
|
||||||
|
if (0x20 <= c && c <= 0x7f) {
|
||||||
|
fprintf(file, "%c", static_cast<char>(c));
|
||||||
|
} else {
|
||||||
|
// cop out of encoding UTF-8
|
||||||
|
fprintf(file, "<U+%04X>", c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool is_char_element(llama_grammar_element elem) {
|
||||||
|
switch (elem.type) {
|
||||||
|
case LLAMA_GRETYPE_CHAR: return true;
|
||||||
|
case LLAMA_GRETYPE_CHAR_NOT: return true;
|
||||||
|
case LLAMA_GRETYPE_CHAR_ALT: return true;
|
||||||
|
case LLAMA_GRETYPE_CHAR_RNG_UPPER: return true;
|
||||||
|
case LLAMA_GRETYPE_CHAR_ANY: return true;
|
||||||
|
default: return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void print_rule_binary(FILE * file, const std::vector<llama_grammar_element> & rule) {
|
||||||
|
for (auto elem : rule) {
|
||||||
|
switch (elem.type) {
|
||||||
|
case LLAMA_GRETYPE_END: fprintf(file, "END"); break;
|
||||||
|
case LLAMA_GRETYPE_ALT: fprintf(file, "ALT"); break;
|
||||||
|
case LLAMA_GRETYPE_RULE_REF: fprintf(file, "RULE_REF"); break;
|
||||||
|
case LLAMA_GRETYPE_CHAR: fprintf(file, "CHAR"); break;
|
||||||
|
case LLAMA_GRETYPE_CHAR_NOT: fprintf(file, "CHAR_NOT"); break;
|
||||||
|
case LLAMA_GRETYPE_CHAR_RNG_UPPER: fprintf(file, "CHAR_RNG_UPPER"); break;
|
||||||
|
case LLAMA_GRETYPE_CHAR_ALT: fprintf(file, "CHAR_ALT"); break;
|
||||||
|
case LLAMA_GRETYPE_CHAR_ANY: fprintf(file, "CHAR_ANY"); break;
|
||||||
|
}
|
||||||
|
switch (elem.type) {
|
||||||
|
case LLAMA_GRETYPE_END:
|
||||||
|
case LLAMA_GRETYPE_ALT:
|
||||||
|
case LLAMA_GRETYPE_RULE_REF:
|
||||||
|
fprintf(file, "(%u) ", elem.value);
|
||||||
|
break;
|
||||||
|
case LLAMA_GRETYPE_CHAR:
|
||||||
|
case LLAMA_GRETYPE_CHAR_NOT:
|
||||||
|
case LLAMA_GRETYPE_CHAR_RNG_UPPER:
|
||||||
|
case LLAMA_GRETYPE_CHAR_ALT:
|
||||||
|
case LLAMA_GRETYPE_CHAR_ANY:
|
||||||
|
fprintf(file, "(\"");
|
||||||
|
print_grammar_char(file, elem.value);
|
||||||
|
fprintf(file, "\") ");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(file, "\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void print_rule(
|
||||||
|
FILE * file,
|
||||||
|
uint32_t rule_id,
|
||||||
|
const std::vector<llama_grammar_element> & rule,
|
||||||
|
const std::map<uint32_t, std::string> & symbol_id_names) {
|
||||||
|
if (rule.empty() || rule.back().type != LLAMA_GRETYPE_END) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
"malformed rule, does not end with LLAMA_GRETYPE_END: " + std::to_string(rule_id));
|
||||||
|
}
|
||||||
|
fprintf(file, "%s ::= ", symbol_id_names.at(rule_id).c_str());
|
||||||
|
for (size_t i = 0, end = rule.size() - 1; i < end; i++) {
|
||||||
|
llama_grammar_element elem = rule[i];
|
||||||
|
switch (elem.type) {
|
||||||
|
case LLAMA_GRETYPE_END:
|
||||||
|
throw std::runtime_error(
|
||||||
|
"unexpected end of rule: " + std::to_string(rule_id) + "," +
|
||||||
|
std::to_string(i));
|
||||||
|
case LLAMA_GRETYPE_ALT:
|
||||||
|
fprintf(file, "| ");
|
||||||
|
break;
|
||||||
|
case LLAMA_GRETYPE_RULE_REF:
|
||||||
|
fprintf(file, "%s ", symbol_id_names.at(elem.value).c_str());
|
||||||
|
break;
|
||||||
|
case LLAMA_GRETYPE_CHAR:
|
||||||
|
fprintf(file, "[");
|
||||||
|
print_grammar_char(file, elem.value);
|
||||||
|
break;
|
||||||
|
case LLAMA_GRETYPE_CHAR_NOT:
|
||||||
|
fprintf(file, "[^");
|
||||||
|
print_grammar_char(file, elem.value);
|
||||||
|
break;
|
||||||
|
case LLAMA_GRETYPE_CHAR_RNG_UPPER:
|
||||||
|
if (i == 0 || !is_char_element(rule[i - 1])) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
"LLAMA_GRETYPE_CHAR_RNG_UPPER without preceding char: " +
|
||||||
|
std::to_string(rule_id) + "," + std::to_string(i));
|
||||||
|
}
|
||||||
|
fprintf(file, "-");
|
||||||
|
print_grammar_char(file, elem.value);
|
||||||
|
break;
|
||||||
|
case LLAMA_GRETYPE_CHAR_ALT:
|
||||||
|
if (i == 0 || !is_char_element(rule[i - 1])) {
|
||||||
|
throw std::runtime_error(
|
||||||
|
"LLAMA_GRETYPE_CHAR_ALT without preceding char: " +
|
||||||
|
std::to_string(rule_id) + "," + std::to_string(i));
|
||||||
|
}
|
||||||
|
print_grammar_char(file, elem.value);
|
||||||
|
break;
|
||||||
|
case LLAMA_GRETYPE_CHAR_ANY:
|
||||||
|
fprintf(file, ".");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (is_char_element(elem)) {
|
||||||
|
switch (rule[i + 1].type) {
|
||||||
|
case LLAMA_GRETYPE_CHAR_ALT:
|
||||||
|
case LLAMA_GRETYPE_CHAR_RNG_UPPER:
|
||||||
|
case LLAMA_GRETYPE_CHAR_ANY:
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
fprintf(file, "] ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
fprintf(file, "\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
void print_grammar(FILE * file, const parse_state & state) {
|
||||||
|
try {
|
||||||
|
std::map<uint32_t, std::string> symbol_id_names;
|
||||||
|
for (const auto & kv : state.symbol_ids) {
|
||||||
|
symbol_id_names[kv.second] = kv.first;
|
||||||
|
}
|
||||||
|
for (size_t i = 0, end = state.rules.size(); i < end; i++) {
|
||||||
|
// fprintf(file, "%zu: ", i);
|
||||||
|
// print_rule_binary(file, state.rules[i]);
|
||||||
|
print_rule(file, uint32_t(i), state.rules[i], symbol_id_names);
|
||||||
|
// fprintf(file, "\n");
|
||||||
|
}
|
||||||
|
} catch (const std::exception & err) {
|
||||||
|
fprintf(stderr, "\n%s: error printing grammar: %s\n", __func__, err.what());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<const llama_grammar_element *> parse_state::c_rules() {
|
||||||
|
std::vector<const llama_grammar_element *> ret;
|
||||||
|
ret.reserve(rules.size());
|
||||||
|
for (const auto & rule : rules) {
|
||||||
|
ret.push_back(rule.data());
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
}
|
29
common/grammar-parser.h
Normal file
29
common/grammar-parser.h
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
// Implements a parser for an extended Backus-Naur form (BNF), producing the
|
||||||
|
// binary context-free grammar format specified by llama.h. Supports character
|
||||||
|
// ranges, grouping, and repetition operators. As an example, a grammar for
|
||||||
|
// arithmetic might look like:
|
||||||
|
//
|
||||||
|
// root ::= expr
|
||||||
|
// expr ::= term ([-+*/] term)*
|
||||||
|
// term ::= num | "(" space expr ")" space
|
||||||
|
// num ::= [0-9]+ space
|
||||||
|
// space ::= [ \t\n]*
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#include "llama.h"
|
||||||
|
#include <vector>
|
||||||
|
#include <map>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
namespace grammar_parser {
|
||||||
|
struct parse_state {
|
||||||
|
std::map<std::string, uint32_t> symbol_ids;
|
||||||
|
std::vector<std::vector<llama_grammar_element>> rules;
|
||||||
|
|
||||||
|
std::vector<const llama_grammar_element *> c_rules();
|
||||||
|
};
|
||||||
|
|
||||||
|
parse_state parse(const char * src);
|
||||||
|
void print_grammar(FILE * file, const parse_state & state);
|
||||||
|
}
|
|
@ -1,6 +1,4 @@
|
||||||
#include "json-schema-to-grammar.h"
|
#include "json-schema-to-grammar.h"
|
||||||
#include "common.h"
|
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <map>
|
#include <map>
|
||||||
|
@ -13,6 +11,11 @@
|
||||||
|
|
||||||
using json = nlohmann::ordered_json;
|
using json = nlohmann::ordered_json;
|
||||||
|
|
||||||
|
template <typename Iterator>
|
||||||
|
static std::string join(Iterator begin, Iterator end, const std::string & separator);
|
||||||
|
|
||||||
|
static std::string repeat(const std::string & str, size_t n);
|
||||||
|
|
||||||
static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "") {
|
static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "") {
|
||||||
auto has_max = max_items != std::numeric_limits<int>::max();
|
auto has_max = max_items != std::numeric_limits<int>::max();
|
||||||
|
|
||||||
|
@ -125,8 +128,8 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream &
|
||||||
if (sub_len > 0) {
|
if (sub_len > 0) {
|
||||||
auto from_sub = from.substr(i + 1);
|
auto from_sub = from.substr(i + 1);
|
||||||
auto to_sub = to.substr(i + 1);
|
auto to_sub = to.substr(i + 1);
|
||||||
auto sub_zeros = string_repeat("0", sub_len);
|
auto sub_zeros = repeat("0", sub_len);
|
||||||
auto sub_nines = string_repeat("9", sub_len);
|
auto sub_nines = repeat("9", sub_len);
|
||||||
|
|
||||||
auto to_reached = false;
|
auto to_reached = false;
|
||||||
out << "(";
|
out << "(";
|
||||||
|
@ -185,8 +188,8 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream &
|
||||||
auto max_digits = max_s.length();
|
auto max_digits = max_s.length();
|
||||||
|
|
||||||
for (auto digits = min_digits; digits < max_digits; digits++) {
|
for (auto digits = min_digits; digits < max_digits; digits++) {
|
||||||
uniform_range(min_s, string_repeat("9", digits));
|
uniform_range(min_s, repeat("9", digits));
|
||||||
min_s = "1" + string_repeat("0", digits);
|
min_s = "1" + repeat("0", digits);
|
||||||
out << " | ";
|
out << " | ";
|
||||||
}
|
}
|
||||||
uniform_range(min_s, max_s);
|
uniform_range(min_s, max_s);
|
||||||
|
@ -315,6 +318,49 @@ std::unordered_map<char, std::string> GRAMMAR_LITERAL_ESCAPES = {
|
||||||
std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
|
std::unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
|
||||||
std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
|
std::unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'^', '$', '.', '[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
|
||||||
|
|
||||||
|
template <typename Iterator>
|
||||||
|
std::string join(Iterator begin, Iterator end, const std::string & separator) {
|
||||||
|
std::ostringstream result;
|
||||||
|
if (begin != end) {
|
||||||
|
result << *begin;
|
||||||
|
for (Iterator it = begin + 1; it != end; ++it) {
|
||||||
|
result << separator << *it;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<std::string> split(const std::string & str, const std::string & delimiter) {
|
||||||
|
std::vector<std::string> tokens;
|
||||||
|
size_t start = 0;
|
||||||
|
size_t end = str.find(delimiter);
|
||||||
|
|
||||||
|
while (end != std::string::npos) {
|
||||||
|
tokens.push_back(str.substr(start, end - start));
|
||||||
|
start = end + delimiter.length();
|
||||||
|
end = str.find(delimiter, start);
|
||||||
|
}
|
||||||
|
|
||||||
|
tokens.push_back(str.substr(start));
|
||||||
|
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string repeat(const std::string & str, size_t n) {
|
||||||
|
if (n == 0) {
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string result;
|
||||||
|
result.reserve(str.length() * n);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < n; ++i) {
|
||||||
|
result += str;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
static std::string replacePattern(const std::string & input, const std::regex & regex, const std::function<std::string(const std::smatch &)> & replacement) {
|
static std::string replacePattern(const std::string & input, const std::regex & regex, const std::function<std::string(const std::smatch &)> & replacement) {
|
||||||
std::smatch match;
|
std::smatch match;
|
||||||
std::string result;
|
std::string result;
|
||||||
|
@ -343,7 +389,6 @@ static std::string format_literal(const std::string & literal) {
|
||||||
|
|
||||||
class SchemaConverter {
|
class SchemaConverter {
|
||||||
private:
|
private:
|
||||||
friend std::string build_grammar(const std::function<void(const common_grammar_builder &)> & cb, const common_grammar_options & options);
|
|
||||||
std::function<json(const std::string &)> _fetch_json;
|
std::function<json(const std::string &)> _fetch_json;
|
||||||
bool _dotall;
|
bool _dotall;
|
||||||
std::map<std::string, std::string> _rules;
|
std::map<std::string, std::string> _rules;
|
||||||
|
@ -373,7 +418,7 @@ private:
|
||||||
for (size_t i = 0; i < alt_schemas.size(); i++) {
|
for (size_t i = 0; i < alt_schemas.size(); i++) {
|
||||||
rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i)));
|
rules.push_back(visit(alt_schemas[i], name + (name.empty() ? "alternative-" : "-") + std::to_string(i)));
|
||||||
}
|
}
|
||||||
return string_join(rules, " | ");
|
return join(rules.begin(), rules.end(), " | ");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string _visit_pattern(const std::string & pattern, const std::string & name) {
|
std::string _visit_pattern(const std::string & pattern, const std::string & name) {
|
||||||
|
@ -436,7 +481,7 @@ private:
|
||||||
for (const auto & item : ret) {
|
for (const auto & item : ret) {
|
||||||
results.push_back(to_rule(item));
|
results.push_back(to_rule(item));
|
||||||
}
|
}
|
||||||
return std::make_pair(string_join(results, " "), false);
|
return std::make_pair(join(results.begin(), results.end(), " "), false);
|
||||||
};
|
};
|
||||||
|
|
||||||
while (i < length) {
|
while (i < length) {
|
||||||
|
@ -494,7 +539,7 @@ private:
|
||||||
}
|
}
|
||||||
curly_brackets += '}';
|
curly_brackets += '}';
|
||||||
i++;
|
i++;
|
||||||
auto nums = string_split(curly_brackets.substr(1, curly_brackets.length() - 2), ",");
|
auto nums = split(curly_brackets.substr(1, curly_brackets.length() - 2), ",");
|
||||||
int min_times = 0;
|
int min_times = 0;
|
||||||
int max_times = std::numeric_limits<int>::max();
|
int max_times = std::numeric_limits<int>::max();
|
||||||
try {
|
try {
|
||||||
|
@ -566,7 +611,7 @@ private:
|
||||||
}
|
}
|
||||||
return join_seq();
|
return join_seq();
|
||||||
};
|
};
|
||||||
return _add_rule(name, "\"\\\"\" (" + to_rule(transform()) + ") \"\\\"\" space");
|
return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space");
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -764,11 +809,10 @@ private:
|
||||||
public:
|
public:
|
||||||
SchemaConverter(
|
SchemaConverter(
|
||||||
const std::function<json(const std::string &)> & fetch_json,
|
const std::function<json(const std::string &)> & fetch_json,
|
||||||
bool dotall,
|
bool dotall)
|
||||||
bool compact_spaces)
|
|
||||||
: _fetch_json(fetch_json), _dotall(dotall)
|
: _fetch_json(fetch_json), _dotall(dotall)
|
||||||
{
|
{
|
||||||
_rules["space"] = compact_spaces ? "\" \"?" : SPACE_RULE;
|
_rules["space"] = SPACE_RULE;
|
||||||
}
|
}
|
||||||
|
|
||||||
void resolve_refs(json & schema, const std::string & url) {
|
void resolve_refs(json & schema, const std::string & url) {
|
||||||
|
@ -810,7 +854,7 @@ public:
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
std::string pointer = ref.substr(ref.find('#') + 1);
|
std::string pointer = ref.substr(ref.find('#') + 1);
|
||||||
std::vector<std::string> tokens = string_split(pointer, "/");
|
std::vector<std::string> tokens = split(pointer, "/");
|
||||||
for (size_t i = 1; i < tokens.size(); ++i) {
|
for (size_t i = 1; i < tokens.size(); ++i) {
|
||||||
std::string sel = tokens[i];
|
std::string sel = tokens[i];
|
||||||
if (target.is_null() || !target.contains(sel)) {
|
if (target.is_null() || !target.contains(sel)) {
|
||||||
|
@ -861,7 +905,7 @@ public:
|
||||||
for (const auto & v : schema["enum"]) {
|
for (const auto & v : schema["enum"]) {
|
||||||
enum_values.push_back(_generate_constant_rule(v));
|
enum_values.push_back(_generate_constant_rule(v));
|
||||||
}
|
}
|
||||||
return _add_rule(rule_name, "(" + string_join(enum_values, " | ") + ") space");
|
return _add_rule(rule_name, "(" + join(enum_values.begin(), enum_values.end(), " | ") + ") space");
|
||||||
} else if ((schema_type.is_null() || schema_type == "object")
|
} else if ((schema_type.is_null() || schema_type == "object")
|
||||||
&& (schema.contains("properties") ||
|
&& (schema.contains("properties") ||
|
||||||
(schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
|
(schema.contains("additionalProperties") && schema["additionalProperties"] != true))) {
|
||||||
|
@ -975,10 +1019,10 @@ public:
|
||||||
|
|
||||||
void check_errors() {
|
void check_errors() {
|
||||||
if (!_errors.empty()) {
|
if (!_errors.empty()) {
|
||||||
throw std::runtime_error("JSON schema conversion failed:\n" + string_join(_errors, "\n"));
|
throw std::runtime_error("JSON schema conversion failed:\n" + join(_errors.begin(), _errors.end(), "\n"));
|
||||||
}
|
}
|
||||||
if (!_warnings.empty()) {
|
if (!_warnings.empty()) {
|
||||||
fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", string_join(_warnings, "; ").c_str());
|
fprintf(stderr, "WARNING: JSON schema conversion was incomplete: %s\n", join(_warnings.begin(), _warnings.end(), "; ").c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -991,35 +1035,11 @@ public:
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
std::string json_schema_to_grammar(const json & schema, bool force_gbnf) {
|
std::string json_schema_to_grammar(const json & schema) {
|
||||||
#ifdef LLAMA_USE_LLGUIDANCE
|
SchemaConverter converter([](const std::string &) { return json::object(); }, /* dotall= */ false);
|
||||||
if (!force_gbnf) {
|
auto copy = schema;
|
||||||
return "%llguidance {}\nstart: %json " + schema.dump();
|
converter.resolve_refs(copy, "input");
|
||||||
}
|
converter.visit(copy, "");
|
||||||
#else
|
|
||||||
(void)force_gbnf;
|
|
||||||
#endif // LLAMA_USE_LLGUIDANCE
|
|
||||||
return build_grammar([&](const common_grammar_builder & callbacks) {
|
|
||||||
auto copy = schema;
|
|
||||||
callbacks.resolve_refs(copy);
|
|
||||||
callbacks.add_schema("", copy);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string build_grammar(const std::function<void(const common_grammar_builder &)> & cb, const common_grammar_options & options) {
|
|
||||||
SchemaConverter converter([&](const std::string &) { return json(); }, options.dotall, options.compact_spaces);
|
|
||||||
common_grammar_builder builder {
|
|
||||||
/* .add_rule = */ [&](const std::string & name, const std::string & rule) {
|
|
||||||
return converter._add_rule(name, rule);
|
|
||||||
},
|
|
||||||
/* .add_schema = */ [&](const std::string & name, const nlohmann::ordered_json & schema) {
|
|
||||||
return converter.visit(schema, name == "root" ? "" : name);
|
|
||||||
},
|
|
||||||
/* .resolve_refs = */ [&](nlohmann::ordered_json & schema) {
|
|
||||||
converter.resolve_refs(schema, "");
|
|
||||||
}
|
|
||||||
};
|
|
||||||
cb(builder);
|
|
||||||
converter.check_errors();
|
converter.check_errors();
|
||||||
return converter.format_grammar();
|
return converter.format_grammar();
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,18 +5,4 @@
|
||||||
#define JSON_ASSERT GGML_ASSERT
|
#define JSON_ASSERT GGML_ASSERT
|
||||||
#include "json.hpp"
|
#include "json.hpp"
|
||||||
|
|
||||||
std::string json_schema_to_grammar(const nlohmann::ordered_json & schema,
|
std::string json_schema_to_grammar(const nlohmann::ordered_json& schema);
|
||||||
bool force_gbnf = false);
|
|
||||||
|
|
||||||
struct common_grammar_builder {
|
|
||||||
std::function<std::string(const std::string &, const std::string &)> add_rule;
|
|
||||||
std::function<std::string(const std::string &, const nlohmann::ordered_json &)> add_schema;
|
|
||||||
std::function<void(nlohmann::ordered_json &)> resolve_refs;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct common_grammar_options {
|
|
||||||
bool dotall = false;
|
|
||||||
bool compact_spaces = false;
|
|
||||||
};
|
|
||||||
|
|
||||||
std::string build_grammar(const std::function<void(const common_grammar_builder &)> & cb, const common_grammar_options & options = {});
|
|
||||||
|
|
|
@ -1,270 +0,0 @@
|
||||||
#include "sampling.h"
|
|
||||||
#include "log.h"
|
|
||||||
|
|
||||||
#ifdef LLAMA_USE_LLGUIDANCE
|
|
||||||
|
|
||||||
# include "llguidance.h"
|
|
||||||
# include <cmath>
|
|
||||||
|
|
||||||
struct llama_sampler_llg {
|
|
||||||
const llama_vocab * vocab;
|
|
||||||
std::string grammar_kind;
|
|
||||||
std::string grammar_data;
|
|
||||||
LlgTokenizer * tokenizer;
|
|
||||||
LlgConstraint * grammar;
|
|
||||||
LlgMaskResult llg_res;
|
|
||||||
bool has_llg_res;
|
|
||||||
};
|
|
||||||
|
|
||||||
static LlgConstraint * llama_sampler_llg_new(LlgTokenizer * tokenizer, const char * grammar_kind,
|
|
||||||
const char * grammar_data) {
|
|
||||||
LlgConstraintInit cinit;
|
|
||||||
llg_constraint_init_set_defaults(&cinit, tokenizer);
|
|
||||||
const char * log_level = getenv("LLGUIDANCE_LOG_LEVEL");
|
|
||||||
if (log_level && *log_level) {
|
|
||||||
cinit.log_stderr_level = atoi(log_level);
|
|
||||||
}
|
|
||||||
auto c = llg_new_constraint_any(&cinit, grammar_kind, grammar_data);
|
|
||||||
if (llg_get_error(c)) {
|
|
||||||
LOG_ERR("llg error: %s\n", llg_get_error(c));
|
|
||||||
llg_free_constraint(c);
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
return c;
|
|
||||||
}
|
|
||||||
|
|
||||||
static const char * llama_sampler_llg_name(const llama_sampler * /*smpl*/) {
|
|
||||||
return "llguidance";
|
|
||||||
}
|
|
||||||
|
|
||||||
static void llama_sampler_llg_accept_impl(llama_sampler * smpl, llama_token token) {
|
|
||||||
auto * ctx = (llama_sampler_llg *) smpl->ctx;
|
|
||||||
if (ctx->grammar) {
|
|
||||||
LlgCommitResult res;
|
|
||||||
llg_commit_token(ctx->grammar, token, &res);
|
|
||||||
ctx->has_llg_res = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void llama_sampler_llg_apply(llama_sampler * smpl, llama_token_data_array * cur_p) {
|
|
||||||
auto * ctx = (llama_sampler_llg *) smpl->ctx;
|
|
||||||
if (ctx->grammar) {
|
|
||||||
if (!ctx->has_llg_res) {
|
|
||||||
if (llg_compute_mask(ctx->grammar, &ctx->llg_res) == 0) {
|
|
||||||
ctx->has_llg_res = true;
|
|
||||||
} else {
|
|
||||||
LOG_ERR("llg error: %s\n", llg_get_error(ctx->grammar));
|
|
||||||
llg_free_constraint(ctx->grammar);
|
|
||||||
ctx->grammar = nullptr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (ctx->has_llg_res) {
|
|
||||||
if (ctx->llg_res.is_stop) {
|
|
||||||
for (size_t i = 0; i < cur_p->size; ++i) {
|
|
||||||
if (!llama_vocab_is_eog(ctx->vocab, cur_p->data[i].id)) {
|
|
||||||
cur_p->data[i].logit = -INFINITY;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
const uint32_t * mask = ctx->llg_res.sample_mask;
|
|
||||||
for (size_t i = 0; i < cur_p->size; ++i) {
|
|
||||||
auto token = cur_p->data[i].id;
|
|
||||||
if ((mask[token / 32] & (1 << (token % 32))) == 0) {
|
|
||||||
cur_p->data[i].logit = -INFINITY;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static void llama_sampler_llg_reset(llama_sampler * smpl) {
|
|
||||||
auto * ctx = (llama_sampler_llg *) smpl->ctx;
|
|
||||||
if (!ctx->grammar) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto * grammar_new = llama_sampler_llg_new(ctx->tokenizer, ctx->grammar_kind.c_str(), ctx->grammar_data.c_str());
|
|
||||||
llg_free_constraint(ctx->grammar);
|
|
||||||
ctx->grammar = grammar_new;
|
|
||||||
ctx->has_llg_res = false;
|
|
||||||
}
|
|
||||||
|
|
||||||
static llama_sampler * llama_sampler_llg_clone(const llama_sampler * smpl) {
|
|
||||||
const auto * ctx = (const llama_sampler_llg *) smpl->ctx;
|
|
||||||
|
|
||||||
auto * result = llama_sampler_init_llg(ctx->vocab, nullptr, nullptr);
|
|
||||||
|
|
||||||
// copy the state
|
|
||||||
{
|
|
||||||
auto * result_ctx = (llama_sampler_llg *) result->ctx;
|
|
||||||
|
|
||||||
if (ctx->grammar) {
|
|
||||||
result_ctx->grammar_kind = ctx->grammar_kind;
|
|
||||||
result_ctx->grammar_data = ctx->grammar_data;
|
|
||||||
result_ctx->grammar = llg_clone_constraint(ctx->grammar);
|
|
||||||
result_ctx->tokenizer = llg_clone_tokenizer(ctx->tokenizer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void llama_sampler_llg_free(llama_sampler * smpl) {
|
|
||||||
const auto * ctx = (llama_sampler_llg *) smpl->ctx;
|
|
||||||
|
|
||||||
if (ctx->grammar) {
|
|
||||||
llg_free_constraint(ctx->grammar);
|
|
||||||
llg_free_tokenizer(ctx->tokenizer);
|
|
||||||
}
|
|
||||||
|
|
||||||
delete ctx;
|
|
||||||
}
|
|
||||||
|
|
||||||
static llama_sampler_i llama_sampler_llg_i = {
|
|
||||||
/* .name = */ llama_sampler_llg_name,
|
|
||||||
/* .accept = */ llama_sampler_llg_accept_impl,
|
|
||||||
/* .apply = */ llama_sampler_llg_apply,
|
|
||||||
/* .reset = */ llama_sampler_llg_reset,
|
|
||||||
/* .clone = */ llama_sampler_llg_clone,
|
|
||||||
/* .free = */ llama_sampler_llg_free,
|
|
||||||
};
|
|
||||||
|
|
||||||
static size_t llama_sampler_llg_tokenize_fn(const void * user_data, const uint8_t * bytes, size_t bytes_len,
|
|
||||||
uint32_t * output_tokens, size_t output_tokens_len) {
|
|
||||||
const llama_vocab * vocab = (const llama_vocab *) user_data;
|
|
||||||
int r = 0;
|
|
||||||
try {
|
|
||||||
r = llama_tokenize(vocab, (const char *) bytes, bytes_len, (int32_t *) output_tokens, output_tokens_len, false,
|
|
||||||
true);
|
|
||||||
} catch (const std::exception & e) {
|
|
||||||
GGML_ABORT("llama_tokenize failed: %s\n", e.what());
|
|
||||||
}
|
|
||||||
if (r < 0) {
|
|
||||||
return -r;
|
|
||||||
}
|
|
||||||
return r;
|
|
||||||
}
|
|
||||||
|
|
||||||
static LlgTokenizer * llama_sampler_llg_new_tokenizer(const llama_vocab * vocab) {
|
|
||||||
// TODO store the tokenizer in the vocab somehow
|
|
||||||
static const llama_vocab * vocab_cache;
|
|
||||||
static LlgTokenizer * tokenizer_cache;
|
|
||||||
|
|
||||||
if (vocab_cache == vocab) {
|
|
||||||
return llg_clone_tokenizer(tokenizer_cache);
|
|
||||||
}
|
|
||||||
|
|
||||||
auto tok_eos = llama_vocab_eot(vocab);
|
|
||||||
if (tok_eos == LLAMA_TOKEN_NULL) {
|
|
||||||
tok_eos = llama_vocab_eos(vocab);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t vocab_size = llama_vocab_n_tokens(vocab);
|
|
||||||
|
|
||||||
auto token_lens = new uint32_t[vocab_size];
|
|
||||||
// we typically have ~7 bytes per token; let's go on the safe side here
|
|
||||||
auto token_bytes_size = vocab_size * 16 + 1024 * 1024;
|
|
||||||
auto token_bytes = new uint8_t[token_bytes_size];
|
|
||||||
|
|
||||||
size_t offset = 0;
|
|
||||||
for (size_t i = 0; i < vocab_size; i++) {
|
|
||||||
size_t max_token = 1024;
|
|
||||||
if (token_bytes_size - offset < max_token) {
|
|
||||||
GGML_ABORT("token_bytes buffer too small\n");
|
|
||||||
}
|
|
||||||
|
|
||||||
llama_token token = i;
|
|
||||||
auto dp = (char *) token_bytes + offset;
|
|
||||||
auto size = llama_detokenize(vocab, &token, 1, dp, max_token, false, false);
|
|
||||||
if (size < 0) {
|
|
||||||
GGML_ABORT("llama_detokenize failed\n");
|
|
||||||
}
|
|
||||||
if (size == 0) {
|
|
||||||
size = llama_detokenize(vocab, &token, 1, dp + 1, max_token - 1, false, true);
|
|
||||||
if (size < 0) {
|
|
||||||
GGML_ABORT("llama_detokenize failed\n");
|
|
||||||
}
|
|
||||||
if (size != 0) {
|
|
||||||
*dp = '\xff'; // special token prefix marker
|
|
||||||
size += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
token_lens[i] = size;
|
|
||||||
offset += size;
|
|
||||||
}
|
|
||||||
|
|
||||||
LlgTokenizerInit tinit = {
|
|
||||||
/* .vocab_size = */ (uint32_t) vocab_size,
|
|
||||||
/* .tok_eos = */ (uint32_t) tok_eos,
|
|
||||||
/* .token_lens = */ token_lens,
|
|
||||||
/* .token_bytes = */ token_bytes,
|
|
||||||
/* .tokenizer_json = */ nullptr,
|
|
||||||
/* .tokenize_assumes_string = */ true,
|
|
||||||
/* .tokenize_fn = */ llama_sampler_llg_tokenize_fn,
|
|
||||||
/* .use_approximate_greedy_tokenize_fn = */ false,
|
|
||||||
/* .tokenize_user_data = */ vocab,
|
|
||||||
};
|
|
||||||
|
|
||||||
char error_buffer[1024];
|
|
||||||
LlgTokenizer * tokenizer = llg_new_tokenizer(&tinit, error_buffer, sizeof(error_buffer));
|
|
||||||
|
|
||||||
delete[] token_bytes;
|
|
||||||
delete[] token_lens;
|
|
||||||
|
|
||||||
if (tokenizer == nullptr) {
|
|
||||||
LOG_ERR("llg tokenizer error: %s\n", error_buffer);
|
|
||||||
return tokenizer;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (tokenizer_cache) {
|
|
||||||
llg_free_tokenizer(tokenizer_cache);
|
|
||||||
}
|
|
||||||
vocab_cache = vocab;
|
|
||||||
tokenizer_cache = tokenizer;
|
|
||||||
|
|
||||||
return llg_clone_tokenizer(tokenizer_cache);
|
|
||||||
}
|
|
||||||
|
|
||||||
llama_sampler * llama_sampler_init_llg(const llama_vocab * vocab, const char * grammar_kind,
|
|
||||||
const char * grammar_data) {
|
|
||||||
auto * ctx = new llama_sampler_llg;
|
|
||||||
|
|
||||||
if (grammar_kind != nullptr && grammar_kind[0] != '\0') {
|
|
||||||
auto tokenizer = llama_sampler_llg_new_tokenizer(vocab);
|
|
||||||
*ctx = {
|
|
||||||
/* .vocab = */ vocab,
|
|
||||||
/* .grammar_kind = */ grammar_kind,
|
|
||||||
/* .grammar_data = */ grammar_data,
|
|
||||||
/* .tokenizer = */ tokenizer,
|
|
||||||
/* .grammar = */ llama_sampler_llg_new(tokenizer, grammar_kind, grammar_data),
|
|
||||||
/* .llg_res = */ {},
|
|
||||||
/* .has_llg_res = */ false,
|
|
||||||
};
|
|
||||||
} else {
|
|
||||||
*ctx = {
|
|
||||||
/* .vocab = */ vocab,
|
|
||||||
/* .grammar_kind = */ {},
|
|
||||||
/* .grammar_data = */ {},
|
|
||||||
/* .tokenizer = */ nullptr,
|
|
||||||
/* .grammar = */ nullptr,
|
|
||||||
/* .llg_res = */ {},
|
|
||||||
/* .has_llg_res = */ false,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
return llama_sampler_init(
|
|
||||||
/* .iface = */ &llama_sampler_llg_i,
|
|
||||||
/* .ctx = */ ctx
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
#else
|
|
||||||
|
|
||||||
llama_sampler * llama_sampler_init_llg(const llama_vocab *, const char *, const char *) {
|
|
||||||
LOG_WRN("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled");
|
|
||||||
return nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // LLAMA_USE_LLGUIDANCE
|
|
392
common/log.cpp
392
common/log.cpp
|
@ -1,392 +0,0 @@
|
||||||
#include "log.h"
|
|
||||||
|
|
||||||
#include <condition_variable>
|
|
||||||
#include <cstdarg>
|
|
||||||
#include <cstdio>
|
|
||||||
#include <mutex>
|
|
||||||
#include <sstream>
|
|
||||||
#include <thread>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
int common_log_verbosity_thold = LOG_DEFAULT_LLAMA;
|
|
||||||
|
|
||||||
void common_log_set_verbosity_thold(int verbosity) {
|
|
||||||
common_log_verbosity_thold = verbosity;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int64_t t_us() {
|
|
||||||
return std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now().time_since_epoch()).count();
|
|
||||||
}
|
|
||||||
|
|
||||||
// colors
|
|
||||||
enum common_log_col : int {
|
|
||||||
COMMON_LOG_COL_DEFAULT = 0,
|
|
||||||
COMMON_LOG_COL_BOLD,
|
|
||||||
COMMON_LOG_COL_RED,
|
|
||||||
COMMON_LOG_COL_GREEN,
|
|
||||||
COMMON_LOG_COL_YELLOW,
|
|
||||||
COMMON_LOG_COL_BLUE,
|
|
||||||
COMMON_LOG_COL_MAGENTA,
|
|
||||||
COMMON_LOG_COL_CYAN,
|
|
||||||
COMMON_LOG_COL_WHITE,
|
|
||||||
};
|
|
||||||
|
|
||||||
// disable colors by default
|
|
||||||
static std::vector<const char *> g_col = {
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
"",
|
|
||||||
};
|
|
||||||
|
|
||||||
struct common_log_entry {
|
|
||||||
enum ggml_log_level level;
|
|
||||||
|
|
||||||
bool prefix;
|
|
||||||
|
|
||||||
int64_t timestamp;
|
|
||||||
|
|
||||||
std::vector<char> msg;
|
|
||||||
|
|
||||||
// signals the worker thread to stop
|
|
||||||
bool is_end;
|
|
||||||
|
|
||||||
void print(FILE * file = nullptr) const {
|
|
||||||
FILE * fcur = file;
|
|
||||||
if (!fcur) {
|
|
||||||
// stderr displays DBG messages only when their verbosity level is not higher than the threshold
|
|
||||||
// these messages will still be logged to a file
|
|
||||||
if (level == GGML_LOG_LEVEL_DEBUG && common_log_verbosity_thold < LOG_DEFAULT_DEBUG) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
fcur = stdout;
|
|
||||||
|
|
||||||
if (level != GGML_LOG_LEVEL_NONE) {
|
|
||||||
fcur = stderr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (level != GGML_LOG_LEVEL_NONE && level != GGML_LOG_LEVEL_CONT && prefix) {
|
|
||||||
if (timestamp) {
|
|
||||||
// [M.s.ms.us]
|
|
||||||
fprintf(fcur, "%s%d.%02d.%03d.%03d%s ",
|
|
||||||
g_col[COMMON_LOG_COL_BLUE],
|
|
||||||
(int) (timestamp / 1000000 / 60),
|
|
||||||
(int) (timestamp / 1000000 % 60),
|
|
||||||
(int) (timestamp / 1000 % 1000),
|
|
||||||
(int) (timestamp % 1000),
|
|
||||||
g_col[COMMON_LOG_COL_DEFAULT]);
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (level) {
|
|
||||||
case GGML_LOG_LEVEL_INFO: fprintf(fcur, "%sI %s", g_col[COMMON_LOG_COL_GREEN], g_col[COMMON_LOG_COL_DEFAULT]); break;
|
|
||||||
case GGML_LOG_LEVEL_WARN: fprintf(fcur, "%sW %s", g_col[COMMON_LOG_COL_MAGENTA], "" ); break;
|
|
||||||
case GGML_LOG_LEVEL_ERROR: fprintf(fcur, "%sE %s", g_col[COMMON_LOG_COL_RED], "" ); break;
|
|
||||||
case GGML_LOG_LEVEL_DEBUG: fprintf(fcur, "%sD %s", g_col[COMMON_LOG_COL_YELLOW], "" ); break;
|
|
||||||
default:
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fprintf(fcur, "%s", msg.data());
|
|
||||||
|
|
||||||
if (level == GGML_LOG_LEVEL_WARN || level == GGML_LOG_LEVEL_ERROR || level == GGML_LOG_LEVEL_DEBUG) {
|
|
||||||
fprintf(fcur, "%s", g_col[COMMON_LOG_COL_DEFAULT]);
|
|
||||||
}
|
|
||||||
|
|
||||||
fflush(fcur);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
struct common_log {
|
|
||||||
// default capacity - will be expanded if needed
|
|
||||||
common_log() : common_log(256) {}
|
|
||||||
|
|
||||||
common_log(size_t capacity) {
|
|
||||||
file = nullptr;
|
|
||||||
prefix = false;
|
|
||||||
timestamps = false;
|
|
||||||
running = false;
|
|
||||||
t_start = t_us();
|
|
||||||
|
|
||||||
// initial message size - will be expanded if longer messages arrive
|
|
||||||
entries.resize(capacity);
|
|
||||||
for (auto & entry : entries) {
|
|
||||||
entry.msg.resize(256);
|
|
||||||
}
|
|
||||||
|
|
||||||
head = 0;
|
|
||||||
tail = 0;
|
|
||||||
|
|
||||||
resume();
|
|
||||||
}
|
|
||||||
|
|
||||||
~common_log() {
|
|
||||||
pause();
|
|
||||||
if (file) {
|
|
||||||
fclose(file);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
std::mutex mtx;
|
|
||||||
std::thread thrd;
|
|
||||||
std::condition_variable cv;
|
|
||||||
|
|
||||||
FILE * file;
|
|
||||||
|
|
||||||
bool prefix;
|
|
||||||
bool timestamps;
|
|
||||||
bool running;
|
|
||||||
|
|
||||||
int64_t t_start;
|
|
||||||
|
|
||||||
// ring buffer of entries
|
|
||||||
std::vector<common_log_entry> entries;
|
|
||||||
size_t head;
|
|
||||||
size_t tail;
|
|
||||||
|
|
||||||
// worker thread copies into this
|
|
||||||
common_log_entry cur;
|
|
||||||
|
|
||||||
public:
|
|
||||||
void add(enum ggml_log_level level, const char * fmt, va_list args) {
|
|
||||||
std::lock_guard<std::mutex> lock(mtx);
|
|
||||||
|
|
||||||
if (!running) {
|
|
||||||
// discard messages while the worker thread is paused
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
auto & entry = entries[tail];
|
|
||||||
|
|
||||||
{
|
|
||||||
// cannot use args twice, so make a copy in case we need to expand the buffer
|
|
||||||
va_list args_copy;
|
|
||||||
va_copy(args_copy, args);
|
|
||||||
|
|
||||||
#if 1
|
|
||||||
const size_t n = vsnprintf(entry.msg.data(), entry.msg.size(), fmt, args);
|
|
||||||
if (n >= entry.msg.size()) {
|
|
||||||
entry.msg.resize(n + 1);
|
|
||||||
vsnprintf(entry.msg.data(), entry.msg.size(), fmt, args_copy);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
// hack for bolding arguments
|
|
||||||
|
|
||||||
std::stringstream ss;
|
|
||||||
for (int i = 0; fmt[i] != 0; i++) {
|
|
||||||
if (fmt[i] == '%') {
|
|
||||||
ss << LOG_COL_BOLD;
|
|
||||||
while (fmt[i] != ' ' && fmt[i] != ')' && fmt[i] != ']' && fmt[i] != 0) ss << fmt[i++];
|
|
||||||
ss << LOG_COL_DEFAULT;
|
|
||||||
if (fmt[i] == 0) break;
|
|
||||||
}
|
|
||||||
ss << fmt[i];
|
|
||||||
}
|
|
||||||
const size_t n = vsnprintf(entry.msg.data(), entry.msg.size(), ss.str().c_str(), args);
|
|
||||||
if (n >= entry.msg.size()) {
|
|
||||||
entry.msg.resize(n + 1);
|
|
||||||
vsnprintf(entry.msg.data(), entry.msg.size(), ss.str().c_str(), args_copy);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
va_end(args_copy);
|
|
||||||
}
|
|
||||||
|
|
||||||
entry.level = level;
|
|
||||||
entry.prefix = prefix;
|
|
||||||
entry.timestamp = 0;
|
|
||||||
if (timestamps) {
|
|
||||||
entry.timestamp = t_us() - t_start;
|
|
||||||
}
|
|
||||||
entry.is_end = false;
|
|
||||||
|
|
||||||
tail = (tail + 1) % entries.size();
|
|
||||||
if (tail == head) {
|
|
||||||
// expand the buffer
|
|
||||||
std::vector<common_log_entry> new_entries(2*entries.size());
|
|
||||||
|
|
||||||
size_t new_tail = 0;
|
|
||||||
|
|
||||||
do {
|
|
||||||
new_entries[new_tail] = std::move(entries[head]);
|
|
||||||
|
|
||||||
head = (head + 1) % entries.size();
|
|
||||||
new_tail = (new_tail + 1);
|
|
||||||
} while (head != tail);
|
|
||||||
|
|
||||||
head = 0;
|
|
||||||
tail = new_tail;
|
|
||||||
|
|
||||||
for (size_t i = tail; i < new_entries.size(); i++) {
|
|
||||||
new_entries[i].msg.resize(256);
|
|
||||||
}
|
|
||||||
|
|
||||||
entries = std::move(new_entries);
|
|
||||||
}
|
|
||||||
|
|
||||||
cv.notify_one();
|
|
||||||
}
|
|
||||||
|
|
||||||
void resume() {
|
|
||||||
std::lock_guard<std::mutex> lock(mtx);
|
|
||||||
|
|
||||||
if (running) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
running = true;
|
|
||||||
|
|
||||||
thrd = std::thread([this]() {
|
|
||||||
while (true) {
|
|
||||||
{
|
|
||||||
std::unique_lock<std::mutex> lock(mtx);
|
|
||||||
cv.wait(lock, [this]() { return head != tail; });
|
|
||||||
|
|
||||||
cur = entries[head];
|
|
||||||
|
|
||||||
head = (head + 1) % entries.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (cur.is_end) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
cur.print(); // stdout and stderr
|
|
||||||
|
|
||||||
if (file) {
|
|
||||||
cur.print(file);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
void pause() {
|
|
||||||
{
|
|
||||||
std::lock_guard<std::mutex> lock(mtx);
|
|
||||||
|
|
||||||
if (!running) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
running = false;
|
|
||||||
|
|
||||||
// push an entry to signal the worker thread to stop
|
|
||||||
{
|
|
||||||
auto & entry = entries[tail];
|
|
||||||
entry.is_end = true;
|
|
||||||
|
|
||||||
tail = (tail + 1) % entries.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
cv.notify_one();
|
|
||||||
}
|
|
||||||
|
|
||||||
thrd.join();
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_file(const char * path) {
|
|
||||||
pause();
|
|
||||||
|
|
||||||
if (file) {
|
|
||||||
fclose(file);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (path) {
|
|
||||||
file = fopen(path, "w");
|
|
||||||
} else {
|
|
||||||
file = nullptr;
|
|
||||||
}
|
|
||||||
|
|
||||||
resume();
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_colors(bool colors) {
|
|
||||||
pause();
|
|
||||||
|
|
||||||
if (colors) {
|
|
||||||
g_col[COMMON_LOG_COL_DEFAULT] = LOG_COL_DEFAULT;
|
|
||||||
g_col[COMMON_LOG_COL_BOLD] = LOG_COL_BOLD;
|
|
||||||
g_col[COMMON_LOG_COL_RED] = LOG_COL_RED;
|
|
||||||
g_col[COMMON_LOG_COL_GREEN] = LOG_COL_GREEN;
|
|
||||||
g_col[COMMON_LOG_COL_YELLOW] = LOG_COL_YELLOW;
|
|
||||||
g_col[COMMON_LOG_COL_BLUE] = LOG_COL_BLUE;
|
|
||||||
g_col[COMMON_LOG_COL_MAGENTA] = LOG_COL_MAGENTA;
|
|
||||||
g_col[COMMON_LOG_COL_CYAN] = LOG_COL_CYAN;
|
|
||||||
g_col[COMMON_LOG_COL_WHITE] = LOG_COL_WHITE;
|
|
||||||
} else {
|
|
||||||
for (size_t i = 0; i < g_col.size(); i++) {
|
|
||||||
g_col[i] = "";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
resume();
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_prefix(bool prefix) {
|
|
||||||
std::lock_guard<std::mutex> lock(mtx);
|
|
||||||
|
|
||||||
this->prefix = prefix;
|
|
||||||
}
|
|
||||||
|
|
||||||
void set_timestamps(bool timestamps) {
|
|
||||||
std::lock_guard<std::mutex> lock(mtx);
|
|
||||||
|
|
||||||
this->timestamps = timestamps;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
//
|
|
||||||
// public API
|
|
||||||
//
|
|
||||||
|
|
||||||
struct common_log * common_log_init() {
|
|
||||||
return new common_log;
|
|
||||||
}
|
|
||||||
|
|
||||||
struct common_log * common_log_main() {
|
|
||||||
static struct common_log log;
|
|
||||||
|
|
||||||
return &log;
|
|
||||||
}
|
|
||||||
|
|
||||||
void common_log_pause(struct common_log * log) {
|
|
||||||
log->pause();
|
|
||||||
}
|
|
||||||
|
|
||||||
void common_log_resume(struct common_log * log) {
|
|
||||||
log->resume();
|
|
||||||
}
|
|
||||||
|
|
||||||
void common_log_free(struct common_log * log) {
|
|
||||||
delete log;
|
|
||||||
}
|
|
||||||
|
|
||||||
void common_log_add(struct common_log * log, enum ggml_log_level level, const char * fmt, ...) {
|
|
||||||
va_list args;
|
|
||||||
va_start(args, fmt);
|
|
||||||
log->add(level, fmt, args);
|
|
||||||
va_end(args);
|
|
||||||
}
|
|
||||||
|
|
||||||
void common_log_set_file(struct common_log * log, const char * file) {
|
|
||||||
log->set_file(file);
|
|
||||||
}
|
|
||||||
|
|
||||||
void common_log_set_colors(struct common_log * log, bool colors) {
|
|
||||||
log->set_colors(colors);
|
|
||||||
}
|
|
||||||
|
|
||||||
void common_log_set_prefix(struct common_log * log, bool prefix) {
|
|
||||||
log->set_prefix(prefix);
|
|
||||||
}
|
|
||||||
|
|
||||||
void common_log_set_timestamps(struct common_log * log, bool timestamps) {
|
|
||||||
log->set_timestamps(timestamps);
|
|
||||||
}
|
|
789
common/log.h
789
common/log.h
|
@ -1,103 +1,724 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include "ggml.h" // for ggml_log_level
|
#include <chrono>
|
||||||
|
#include <cstring>
|
||||||
|
#include <sstream>
|
||||||
|
#include <iostream>
|
||||||
|
#include <thread>
|
||||||
|
#include <vector>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cinttypes>
|
||||||
|
|
||||||
#define LOG_CLR_TO_EOL "\033[K\r"
|
// --------------------------------
|
||||||
#define LOG_COL_DEFAULT "\033[0m"
|
//
|
||||||
#define LOG_COL_BOLD "\033[1m"
|
// Basic usage:
|
||||||
#define LOG_COL_RED "\033[31m"
|
//
|
||||||
#define LOG_COL_GREEN "\033[32m"
|
// --------
|
||||||
#define LOG_COL_YELLOW "\033[33m"
|
//
|
||||||
#define LOG_COL_BLUE "\033[34m"
|
// The LOG() and LOG_TEE() macros are ready to go by default
|
||||||
#define LOG_COL_MAGENTA "\033[35m"
|
// they do not require any initialization.
|
||||||
#define LOG_COL_CYAN "\033[36m"
|
//
|
||||||
#define LOG_COL_WHITE "\033[37m"
|
// LOGLN() and LOG_TEELN() are variants which automatically
|
||||||
|
// include \n character at the end of the log string.
|
||||||
|
//
|
||||||
|
// LOG() behaves exactly like printf, by default writing to a logfile.
|
||||||
|
// LOG_TEE() additionally, prints to the screen too ( mimics Unix tee command ).
|
||||||
|
//
|
||||||
|
// Default logfile is named
|
||||||
|
// "llama.<threadID>.log"
|
||||||
|
// Default LOG_TEE() secondary output target is
|
||||||
|
// stderr
|
||||||
|
//
|
||||||
|
// Logs can be dynamically disabled or enabled using functions:
|
||||||
|
// log_disable()
|
||||||
|
// and
|
||||||
|
// log_enable()
|
||||||
|
//
|
||||||
|
// A log target can be changed with:
|
||||||
|
// log_set_target( string )
|
||||||
|
// creating and opening, or re-opening a file by string filename
|
||||||
|
// or
|
||||||
|
// log_set_target( FILE* )
|
||||||
|
// allowing to point at stderr, stdout, or any valid FILE* file handler.
|
||||||
|
//
|
||||||
|
// --------
|
||||||
|
//
|
||||||
|
// End of Basic usage.
|
||||||
|
//
|
||||||
|
// --------------------------------
|
||||||
|
|
||||||
#ifndef __GNUC__
|
// Specifies a log target.
|
||||||
# define LOG_ATTRIBUTE_FORMAT(...)
|
// default uses log_handler() with "llama.log" log file
|
||||||
#elif defined(__MINGW32__)
|
// this can be changed, by defining LOG_TARGET
|
||||||
# define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(gnu_printf, __VA_ARGS__)))
|
// like so:
|
||||||
#else
|
//
|
||||||
# define LOG_ATTRIBUTE_FORMAT(...) __attribute__((format(printf, __VA_ARGS__)))
|
// #define LOG_TARGET (a valid FILE*)
|
||||||
|
// #include "log.h"
|
||||||
|
//
|
||||||
|
// or it can be simply redirected to stdout or stderr
|
||||||
|
// like so:
|
||||||
|
//
|
||||||
|
// #define LOG_TARGET stderr
|
||||||
|
// #include "log.h"
|
||||||
|
//
|
||||||
|
// The log target can also be redirected to a different function
|
||||||
|
// like so:
|
||||||
|
//
|
||||||
|
// #define LOG_TARGET log_handler_different()
|
||||||
|
// #include "log.h"
|
||||||
|
//
|
||||||
|
// FILE* log_handler_different()
|
||||||
|
// {
|
||||||
|
// return stderr;
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// or:
|
||||||
|
//
|
||||||
|
// #define LOG_TARGET log_handler_another_one("somelog.log")
|
||||||
|
// #include "log.h"
|
||||||
|
//
|
||||||
|
// FILE* log_handler_another_one(char*filename)
|
||||||
|
// {
|
||||||
|
// static FILE* logfile = nullptr;
|
||||||
|
// (...)
|
||||||
|
// if( !logfile )
|
||||||
|
// {
|
||||||
|
// fopen(...)
|
||||||
|
// }
|
||||||
|
// (...)
|
||||||
|
// return logfile
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
#ifndef LOG_TARGET
|
||||||
|
#define LOG_TARGET log_handler()
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define LOG_DEFAULT_DEBUG 1
|
#ifndef LOG_TEE_TARGET
|
||||||
#define LOG_DEFAULT_LLAMA 0
|
#define LOG_TEE_TARGET stderr
|
||||||
|
#endif
|
||||||
|
|
||||||
// needed by the LOG_TMPL macro to avoid computing log arguments if the verbosity lower
|
// Utility for synchronizing log configuration state
|
||||||
// set via common_log_set_verbosity()
|
// since std::optional was introduced only in c++17
|
||||||
extern int common_log_verbosity_thold;
|
enum LogTriState
|
||||||
|
{
|
||||||
|
LogTriStateSame,
|
||||||
|
LogTriStateFalse,
|
||||||
|
LogTriStateTrue
|
||||||
|
};
|
||||||
|
|
||||||
void common_log_set_verbosity_thold(int verbosity); // not thread-safe
|
// Utility to obtain "pid" like unique process id and use it when creating log files.
|
||||||
|
inline std::string log_get_pid()
|
||||||
|
{
|
||||||
|
static std::string pid;
|
||||||
|
if (pid.empty())
|
||||||
|
{
|
||||||
|
// std::this_thread::get_id() is the most portable way of obtaining a "process id"
|
||||||
|
// it's not the same as "pid" but is unique enough to solve multiple instances
|
||||||
|
// trying to write to the same log.
|
||||||
|
std::stringstream ss;
|
||||||
|
ss << std::this_thread::get_id();
|
||||||
|
pid = ss.str();
|
||||||
|
}
|
||||||
|
|
||||||
// the common_log uses an internal worker thread to print/write log messages
|
return pid;
|
||||||
// when the worker thread is paused, incoming log messages are discarded
|
}
|
||||||
struct common_log;
|
|
||||||
|
|
||||||
struct common_log * common_log_init();
|
// Utility function for generating log file names with unique id based on thread id.
|
||||||
struct common_log * common_log_main(); // singleton, automatically destroys itself on exit
|
// invocation with log_filename_generator( "llama", "log" ) creates a string "llama.<number>.log"
|
||||||
void common_log_pause (struct common_log * log); // pause the worker thread, not thread-safe
|
// where the number is a runtime id of the current thread.
|
||||||
void common_log_resume(struct common_log * log); // resume the worker thread, not thread-safe
|
|
||||||
void common_log_free (struct common_log * log);
|
|
||||||
|
|
||||||
LOG_ATTRIBUTE_FORMAT(3, 4)
|
#define log_filename_generator(log_file_basename, log_file_extension) log_filename_generator_impl(LogTriStateSame, log_file_basename, log_file_extension)
|
||||||
void common_log_add(struct common_log * log, enum ggml_log_level level, const char * fmt, ...);
|
|
||||||
|
|
||||||
// defaults: file = NULL, colors = false, prefix = false, timestamps = false
|
// INTERNAL, DO NOT USE
|
||||||
//
|
inline std::string log_filename_generator_impl(LogTriState multilog, const std::string & log_file_basename, const std::string & log_file_extension)
|
||||||
// regular log output:
|
{
|
||||||
//
|
static bool _multilog = false;
|
||||||
// ggml_backend_metal_log_allocated_size: allocated buffer, size = 6695.84 MiB, ( 6695.91 / 21845.34)
|
|
||||||
// llm_load_tensors: ggml ctx size = 0.27 MiB
|
|
||||||
// llm_load_tensors: offloading 32 repeating layers to GPU
|
|
||||||
// llm_load_tensors: offloading non-repeating layers to GPU
|
|
||||||
//
|
|
||||||
// with prefix = true, timestamps = true, the log output will look like this:
|
|
||||||
//
|
|
||||||
// 0.00.035.060 D ggml_backend_metal_log_allocated_size: allocated buffer, size = 6695.84 MiB, ( 6695.91 / 21845.34)
|
|
||||||
// 0.00.035.064 I llm_load_tensors: ggml ctx size = 0.27 MiB
|
|
||||||
// 0.00.090.578 I llm_load_tensors: offloading 32 repeating layers to GPU
|
|
||||||
// 0.00.090.579 I llm_load_tensors: offloading non-repeating layers to GPU
|
|
||||||
//
|
|
||||||
// I - info (stdout, V = 0)
|
|
||||||
// W - warning (stderr, V = 0)
|
|
||||||
// E - error (stderr, V = 0)
|
|
||||||
// D - debug (stderr, V = LOG_DEFAULT_DEBUG)
|
|
||||||
//
|
|
||||||
|
|
||||||
void common_log_set_file (struct common_log * log, const char * file); // not thread-safe
|
if (multilog != LogTriStateSame)
|
||||||
void common_log_set_colors (struct common_log * log, bool colors); // not thread-safe
|
{
|
||||||
void common_log_set_prefix (struct common_log * log, bool prefix); // whether to output prefix to each log
|
_multilog = multilog == LogTriStateTrue;
|
||||||
void common_log_set_timestamps(struct common_log * log, bool timestamps); // whether to output timestamps in the prefix
|
}
|
||||||
|
|
||||||
// helper macros for logging
|
std::stringstream buf;
|
||||||
// use these to avoid computing log arguments if the verbosity of the log is higher than the threshold
|
|
||||||
//
|
|
||||||
// for example:
|
|
||||||
//
|
|
||||||
// LOG_DBG("this is a debug message: %d\n", expensive_function());
|
|
||||||
//
|
|
||||||
// this will avoid calling expensive_function() if LOG_DEFAULT_DEBUG > common_log_verbosity_thold
|
|
||||||
//
|
|
||||||
|
|
||||||
#define LOG_TMPL(level, verbosity, ...) \
|
buf << log_file_basename;
|
||||||
do { \
|
if (_multilog)
|
||||||
if ((verbosity) <= common_log_verbosity_thold) { \
|
{
|
||||||
common_log_add(common_log_main(), (level), __VA_ARGS__); \
|
buf << ".";
|
||||||
} \
|
buf << log_get_pid();
|
||||||
|
}
|
||||||
|
buf << ".";
|
||||||
|
buf << log_file_extension;
|
||||||
|
|
||||||
|
return buf.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifndef LOG_DEFAULT_FILE_NAME
|
||||||
|
#define LOG_DEFAULT_FILE_NAME log_filename_generator("llama", "log")
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Utility for turning #define values into string literals
|
||||||
|
// so we can have a define for stderr and
|
||||||
|
// we can print "stderr" instead of literal stderr, etc.
|
||||||
|
#define LOG_STRINGIZE1(s) #s
|
||||||
|
#define LOG_STRINGIZE(s) LOG_STRINGIZE1(s)
|
||||||
|
|
||||||
|
#define LOG_TEE_TARGET_STRING LOG_STRINGIZE(LOG_TEE_TARGET)
|
||||||
|
|
||||||
|
// Allows disabling timestamps.
|
||||||
|
// in order to disable, define LOG_NO_TIMESTAMPS
|
||||||
|
// like so:
|
||||||
|
//
|
||||||
|
// #define LOG_NO_TIMESTAMPS
|
||||||
|
// #include "log.h"
|
||||||
|
//
|
||||||
|
#ifndef LOG_NO_TIMESTAMPS
|
||||||
|
#ifndef _MSC_VER
|
||||||
|
#define LOG_TIMESTAMP_FMT "[%" PRIu64 "] "
|
||||||
|
#define LOG_TIMESTAMP_VAL , (std::chrono::duration_cast<std::chrono::duration<std::uint64_t>>(std::chrono::system_clock::now().time_since_epoch())).count()
|
||||||
|
#else
|
||||||
|
#define LOG_TIMESTAMP_FMT "[%" PRIu64 "] "
|
||||||
|
#define LOG_TIMESTAMP_VAL , (std::chrono::duration_cast<std::chrono::duration<std::uint64_t>>(std::chrono::system_clock::now().time_since_epoch())).count()
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#define LOG_TIMESTAMP_FMT "%s"
|
||||||
|
#define LOG_TIMESTAMP_VAL ,""
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef LOG_TEE_TIMESTAMPS
|
||||||
|
#ifndef _MSC_VER
|
||||||
|
#define LOG_TEE_TIMESTAMP_FMT "[%" PRIu64 "] "
|
||||||
|
#define LOG_TEE_TIMESTAMP_VAL , (std::chrono::duration_cast<std::chrono::duration<std::uint64_t>>(std::chrono::system_clock::now().time_since_epoch())).count()
|
||||||
|
#else
|
||||||
|
#define LOG_TEE_TIMESTAMP_FMT "[%" PRIu64 "] "
|
||||||
|
#define LOG_TEE_TIMESTAMP_VAL , (std::chrono::duration_cast<std::chrono::duration<std::uint64_t>>(std::chrono::system_clock::now().time_since_epoch())).count()
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#define LOG_TEE_TIMESTAMP_FMT "%s"
|
||||||
|
#define LOG_TEE_TIMESTAMP_VAL ,""
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Allows disabling file/line/function prefix
|
||||||
|
// in order to disable, define LOG_NO_FILE_LINE_FUNCTION
|
||||||
|
// like so:
|
||||||
|
//
|
||||||
|
// #define LOG_NO_FILE_LINE_FUNCTION
|
||||||
|
// #include "log.h"
|
||||||
|
//
|
||||||
|
#ifndef LOG_NO_FILE_LINE_FUNCTION
|
||||||
|
#ifndef _MSC_VER
|
||||||
|
#define LOG_FLF_FMT "[%24s:%5d][%24s] "
|
||||||
|
#define LOG_FLF_VAL , __FILE__, __LINE__, __FUNCTION__
|
||||||
|
#else
|
||||||
|
#define LOG_FLF_FMT "[%24s:%5ld][%24s] "
|
||||||
|
#define LOG_FLF_VAL , __FILE__, (long)__LINE__, __FUNCTION__
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#define LOG_FLF_FMT "%s"
|
||||||
|
#define LOG_FLF_VAL ,""
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef LOG_TEE_FILE_LINE_FUNCTION
|
||||||
|
#ifndef _MSC_VER
|
||||||
|
#define LOG_TEE_FLF_FMT "[%24s:%5d][%24s] "
|
||||||
|
#define LOG_TEE_FLF_VAL , __FILE__, __LINE__, __FUNCTION__
|
||||||
|
#else
|
||||||
|
#define LOG_TEE_FLF_FMT "[%24s:%5ld][%24s] "
|
||||||
|
#define LOG_TEE_FLF_VAL , __FILE__, (long)__LINE__, __FUNCTION__
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#define LOG_TEE_FLF_FMT "%s"
|
||||||
|
#define LOG_TEE_FLF_VAL ,""
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// INTERNAL, DO NOT USE
|
||||||
|
// USE LOG() INSTEAD
|
||||||
|
//
|
||||||
|
#if !defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER) || defined(__clang__)
|
||||||
|
#define LOG_IMPL(str, ...) \
|
||||||
|
do { \
|
||||||
|
if (LOG_TARGET != nullptr) \
|
||||||
|
{ \
|
||||||
|
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL, __VA_ARGS__); \
|
||||||
|
fflush(LOG_TARGET); \
|
||||||
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
#else
|
||||||
|
#define LOG_IMPL(str, ...) \
|
||||||
|
do { \
|
||||||
|
if (LOG_TARGET != nullptr) \
|
||||||
|
{ \
|
||||||
|
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL "", ##__VA_ARGS__); \
|
||||||
|
fflush(LOG_TARGET); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
#define LOG(...) LOG_TMPL(GGML_LOG_LEVEL_NONE, 0, __VA_ARGS__)
|
// INTERNAL, DO NOT USE
|
||||||
#define LOGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_NONE, verbosity, __VA_ARGS__)
|
// USE LOG_TEE() INSTEAD
|
||||||
|
//
|
||||||
|
#if !defined(_MSC_VER) || defined(__INTEL_LLVM_COMPILER) || defined(__clang__)
|
||||||
|
#define LOG_TEE_IMPL(str, ...) \
|
||||||
|
do { \
|
||||||
|
if (LOG_TARGET != nullptr) \
|
||||||
|
{ \
|
||||||
|
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL, __VA_ARGS__); \
|
||||||
|
fflush(LOG_TARGET); \
|
||||||
|
} \
|
||||||
|
if (LOG_TARGET != nullptr && LOG_TARGET != stdout && LOG_TARGET != stderr && LOG_TEE_TARGET != nullptr) \
|
||||||
|
{ \
|
||||||
|
fprintf(LOG_TEE_TARGET, LOG_TEE_TIMESTAMP_FMT LOG_TEE_FLF_FMT str "%s" LOG_TEE_TIMESTAMP_VAL LOG_TEE_FLF_VAL, __VA_ARGS__); \
|
||||||
|
fflush(LOG_TEE_TARGET); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
#else
|
||||||
|
#define LOG_TEE_IMPL(str, ...) \
|
||||||
|
do { \
|
||||||
|
if (LOG_TARGET != nullptr) \
|
||||||
|
{ \
|
||||||
|
fprintf(LOG_TARGET, LOG_TIMESTAMP_FMT LOG_FLF_FMT str "%s" LOG_TIMESTAMP_VAL LOG_FLF_VAL "", ##__VA_ARGS__); \
|
||||||
|
fflush(LOG_TARGET); \
|
||||||
|
} \
|
||||||
|
if (LOG_TARGET != nullptr && LOG_TARGET != stdout && LOG_TARGET != stderr && LOG_TEE_TARGET != nullptr) \
|
||||||
|
{ \
|
||||||
|
fprintf(LOG_TEE_TARGET, LOG_TEE_TIMESTAMP_FMT LOG_TEE_FLF_FMT str "%s" LOG_TEE_TIMESTAMP_VAL LOG_TEE_FLF_VAL "", ##__VA_ARGS__); \
|
||||||
|
fflush(LOG_TEE_TARGET); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
#define LOG_INF(...) LOG_TMPL(GGML_LOG_LEVEL_INFO, 0, __VA_ARGS__)
|
// The '\0' as a last argument, is a trick to bypass the silly
|
||||||
#define LOG_WRN(...) LOG_TMPL(GGML_LOG_LEVEL_WARN, 0, __VA_ARGS__)
|
// "warning: ISO C++11 requires at least one argument for the "..." in a variadic macro"
|
||||||
#define LOG_ERR(...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, 0, __VA_ARGS__)
|
// so we can have a single macro which can be called just like printf.
|
||||||
#define LOG_DBG(...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, LOG_DEFAULT_DEBUG, __VA_ARGS__)
|
|
||||||
#define LOG_CNT(...) LOG_TMPL(GGML_LOG_LEVEL_CONT, 0, __VA_ARGS__)
|
|
||||||
|
|
||||||
#define LOG_INFV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_INFO, verbosity, __VA_ARGS__)
|
// Main LOG macro.
|
||||||
#define LOG_WRNV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_WARN, verbosity, __VA_ARGS__)
|
// behaves like printf, and supports arguments the exact same way.
|
||||||
#define LOG_ERRV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_ERROR, verbosity, __VA_ARGS__)
|
//
|
||||||
#define LOG_DBGV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_DEBUG, verbosity, __VA_ARGS__)
|
#if !defined(_MSC_VER) || defined(__clang__)
|
||||||
#define LOG_CNTV(verbosity, ...) LOG_TMPL(GGML_LOG_LEVEL_CONT, verbosity, __VA_ARGS__)
|
#define LOG(...) LOG_IMPL(__VA_ARGS__, "")
|
||||||
|
#else
|
||||||
|
#define LOG(str, ...) LOG_IMPL("%s" str, "", ##__VA_ARGS__, "")
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Main TEE macro.
|
||||||
|
// does the same as LOG
|
||||||
|
// and
|
||||||
|
// simultaneously writes stderr.
|
||||||
|
//
|
||||||
|
// Secondary target can be changed just like LOG_TARGET
|
||||||
|
// by defining LOG_TEE_TARGET
|
||||||
|
//
|
||||||
|
#if !defined(_MSC_VER) || defined(__clang__)
|
||||||
|
#define LOG_TEE(...) LOG_TEE_IMPL(__VA_ARGS__, "")
|
||||||
|
#else
|
||||||
|
#define LOG_TEE(str, ...) LOG_TEE_IMPL("%s" str, "", ##__VA_ARGS__, "")
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// LOG macro variants with auto endline.
|
||||||
|
#if !defined(_MSC_VER) || defined(__clang__)
|
||||||
|
#define LOGLN(...) LOG_IMPL(__VA_ARGS__, "\n")
|
||||||
|
#define LOG_TEELN(...) LOG_TEE_IMPL(__VA_ARGS__, "\n")
|
||||||
|
#else
|
||||||
|
#define LOGLN(str, ...) LOG_IMPL("%s" str, "", ##__VA_ARGS__, "\n")
|
||||||
|
#define LOG_TEELN(str, ...) LOG_TEE_IMPL("%s" str, "", ##__VA_ARGS__, "\n")
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// INTERNAL, DO NOT USE
|
||||||
|
inline FILE *log_handler1_impl(bool change = false, LogTriState append = LogTriStateSame, LogTriState disable = LogTriStateSame, const std::string & filename = LOG_DEFAULT_FILE_NAME, FILE *target = nullptr)
|
||||||
|
{
|
||||||
|
static bool _initialized = false;
|
||||||
|
static bool _append = false;
|
||||||
|
static bool _disabled = filename.empty() && target == nullptr;
|
||||||
|
static std::string log_current_filename{filename};
|
||||||
|
static FILE *log_current_target{target};
|
||||||
|
static FILE *logfile = nullptr;
|
||||||
|
|
||||||
|
if (change)
|
||||||
|
{
|
||||||
|
if (append != LogTriStateSame)
|
||||||
|
{
|
||||||
|
_append = append == LogTriStateTrue;
|
||||||
|
return logfile;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (disable == LogTriStateTrue)
|
||||||
|
{
|
||||||
|
// Disable primary target
|
||||||
|
_disabled = true;
|
||||||
|
}
|
||||||
|
// If previously disabled, only enable, and keep previous target
|
||||||
|
else if (disable == LogTriStateFalse)
|
||||||
|
{
|
||||||
|
_disabled = false;
|
||||||
|
}
|
||||||
|
// Otherwise, process the arguments
|
||||||
|
else if (log_current_filename != filename || log_current_target != target)
|
||||||
|
{
|
||||||
|
_initialized = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_disabled)
|
||||||
|
{
|
||||||
|
// Log is disabled
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_initialized)
|
||||||
|
{
|
||||||
|
// with fallback in case something went wrong
|
||||||
|
return logfile ? logfile : stderr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// do the (re)initialization
|
||||||
|
if (target != nullptr)
|
||||||
|
{
|
||||||
|
if (logfile != nullptr && logfile != stdout && logfile != stderr)
|
||||||
|
{
|
||||||
|
fclose(logfile);
|
||||||
|
}
|
||||||
|
|
||||||
|
log_current_filename = LOG_DEFAULT_FILE_NAME;
|
||||||
|
log_current_target = target;
|
||||||
|
|
||||||
|
logfile = target;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (log_current_filename != filename)
|
||||||
|
{
|
||||||
|
if (logfile != nullptr && logfile != stdout && logfile != stderr)
|
||||||
|
{
|
||||||
|
fclose(logfile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
logfile = fopen(filename.c_str(), _append ? "a" : "w");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!logfile)
|
||||||
|
{
|
||||||
|
// Verify whether the file was opened, otherwise fallback to stderr
|
||||||
|
logfile = stderr;
|
||||||
|
|
||||||
|
fprintf(stderr, "Failed to open logfile '%s' with error '%s'\n", filename.c_str(), std::strerror(errno));
|
||||||
|
fflush(stderr);
|
||||||
|
|
||||||
|
// At this point we let the init flag be to true below, and let the target fallback to stderr
|
||||||
|
// otherwise we would repeatedly fopen() which was already unsuccessful
|
||||||
|
}
|
||||||
|
|
||||||
|
_initialized = true;
|
||||||
|
|
||||||
|
return logfile ? logfile : stderr;
|
||||||
|
}
|
||||||
|
|
||||||
|
// INTERNAL, DO NOT USE
|
||||||
|
inline FILE *log_handler2_impl(bool change = false, LogTriState append = LogTriStateSame, LogTriState disable = LogTriStateSame, FILE *target = nullptr, const std::string & filename = LOG_DEFAULT_FILE_NAME)
|
||||||
|
{
|
||||||
|
return log_handler1_impl(change, append, disable, filename, target);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Disables logs entirely at runtime.
|
||||||
|
// Makes LOG() and LOG_TEE() produce no output,
|
||||||
|
// until enabled back.
|
||||||
|
#define log_disable() log_disable_impl()
|
||||||
|
|
||||||
|
// INTERNAL, DO NOT USE
|
||||||
|
inline FILE *log_disable_impl()
|
||||||
|
{
|
||||||
|
return log_handler1_impl(true, LogTriStateSame, LogTriStateTrue);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Enables logs at runtime.
|
||||||
|
#define log_enable() log_enable_impl()
|
||||||
|
|
||||||
|
// INTERNAL, DO NOT USE
|
||||||
|
inline FILE *log_enable_impl()
|
||||||
|
{
|
||||||
|
return log_handler1_impl(true, LogTriStateSame, LogTriStateFalse);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sets target fir logs, either by a file name or FILE* pointer (stdout, stderr, or any valid FILE*)
|
||||||
|
#define log_set_target(target) log_set_target_impl(target)
|
||||||
|
|
||||||
|
// INTERNAL, DO NOT USE
|
||||||
|
inline FILE *log_set_target_impl(const std::string & filename) { return log_handler1_impl(true, LogTriStateSame, LogTriStateSame, filename); }
|
||||||
|
inline FILE *log_set_target_impl(FILE *target) { return log_handler2_impl(true, LogTriStateSame, LogTriStateSame, target); }
|
||||||
|
|
||||||
|
// INTERNAL, DO NOT USE
|
||||||
|
inline FILE *log_handler() { return log_handler1_impl(); }
|
||||||
|
|
||||||
|
// Enable or disable creating separate log files for each run.
|
||||||
|
// can ONLY be invoked BEFORE first log use.
|
||||||
|
#define log_multilog(enable) log_filename_generator_impl((enable) ? LogTriStateTrue : LogTriStateFalse, "", "")
|
||||||
|
// Enable or disable append mode for log file.
|
||||||
|
// can ONLY be invoked BEFORE first log use.
|
||||||
|
#define log_append(enable) log_append_impl(enable)
|
||||||
|
// INTERNAL, DO NOT USE
|
||||||
|
inline FILE *log_append_impl(bool enable)
|
||||||
|
{
|
||||||
|
return log_handler1_impl(true, enable ? LogTriStateTrue : LogTriStateFalse, LogTriStateSame);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void log_test()
|
||||||
|
{
|
||||||
|
log_disable();
|
||||||
|
LOG("01 Hello World to nobody, because logs are disabled!\n");
|
||||||
|
log_enable();
|
||||||
|
LOG("02 Hello World to default output, which is \"%s\" ( Yaaay, arguments! )!\n", LOG_STRINGIZE(LOG_TARGET));
|
||||||
|
LOG_TEE("03 Hello World to **both** default output and " LOG_TEE_TARGET_STRING "!\n");
|
||||||
|
log_set_target(stderr);
|
||||||
|
LOG("04 Hello World to stderr!\n");
|
||||||
|
LOG_TEE("05 Hello World TEE with double printing to stderr prevented!\n");
|
||||||
|
log_set_target(LOG_DEFAULT_FILE_NAME);
|
||||||
|
LOG("06 Hello World to default log file!\n");
|
||||||
|
log_set_target(stdout);
|
||||||
|
LOG("07 Hello World to stdout!\n");
|
||||||
|
log_set_target(LOG_DEFAULT_FILE_NAME);
|
||||||
|
LOG("08 Hello World to default log file again!\n");
|
||||||
|
log_disable();
|
||||||
|
LOG("09 Hello World _1_ into the void!\n");
|
||||||
|
log_enable();
|
||||||
|
LOG("10 Hello World back from the void ( you should not see _1_ in the log or the output )!\n");
|
||||||
|
log_disable();
|
||||||
|
log_set_target("llama.anotherlog.log");
|
||||||
|
LOG("11 Hello World _2_ to nobody, new target was selected but logs are still disabled!\n");
|
||||||
|
log_enable();
|
||||||
|
LOG("12 Hello World this time in a new file ( you should not see _2_ in the log or the output )?\n");
|
||||||
|
log_set_target("llama.yetanotherlog.log");
|
||||||
|
LOG("13 Hello World this time in yet new file?\n");
|
||||||
|
log_set_target(log_filename_generator("llama_autonamed", "log"));
|
||||||
|
LOG("14 Hello World in log with generated filename!\n");
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
LOG_TEE("15 Hello msvc TEE without arguments\n");
|
||||||
|
LOG_TEE("16 Hello msvc TEE with (%d)(%s) arguments\n", 1, "test");
|
||||||
|
LOG_TEELN("17 Hello msvc TEELN without arguments\n");
|
||||||
|
LOG_TEELN("18 Hello msvc TEELN with (%d)(%s) arguments\n", 1, "test");
|
||||||
|
LOG("19 Hello msvc LOG without arguments\n");
|
||||||
|
LOG("20 Hello msvc LOG with (%d)(%s) arguments\n", 1, "test");
|
||||||
|
LOGLN("21 Hello msvc LOGLN without arguments\n");
|
||||||
|
LOGLN("22 Hello msvc LOGLN with (%d)(%s) arguments\n", 1, "test");
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool log_param_single_parse(const std::string & param)
|
||||||
|
{
|
||||||
|
if ( param == "--log-test")
|
||||||
|
{
|
||||||
|
log_test();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( param == "--log-disable")
|
||||||
|
{
|
||||||
|
log_disable();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( param == "--log-enable")
|
||||||
|
{
|
||||||
|
log_enable();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (param == "--log-new")
|
||||||
|
{
|
||||||
|
log_multilog(true);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (param == "--log-append")
|
||||||
|
{
|
||||||
|
log_append(true);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline bool log_param_pair_parse(bool check_but_dont_parse, const std::string & param, const std::string & next = std::string())
|
||||||
|
{
|
||||||
|
if ( param == "--log-file")
|
||||||
|
{
|
||||||
|
if (!check_but_dont_parse)
|
||||||
|
{
|
||||||
|
log_set_target(log_filename_generator(next.empty() ? "unnamed" : next, "log"));
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void log_print_usage()
|
||||||
|
{
|
||||||
|
printf("log options:\n");
|
||||||
|
/* format
|
||||||
|
printf(" -h, --help show this help message and exit\n");*/
|
||||||
|
/* spacing
|
||||||
|
printf("__-param----------------Description\n");*/
|
||||||
|
printf(" --log-test Run simple logging test\n");
|
||||||
|
printf(" --log-disable Disable trace logs\n");
|
||||||
|
printf(" --log-enable Enable trace logs\n");
|
||||||
|
printf(" --log-file Specify a log filename (without extension)\n");
|
||||||
|
printf(" --log-new Create a separate new log file on start. "
|
||||||
|
"Each log file will have unique name: \"<name>.<ID>.log\"\n");
|
||||||
|
printf(" --log-append Don't truncate the old log file.\n");
|
||||||
|
printf("\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
#define log_dump_cmdline(argc, argv) log_dump_cmdline_impl(argc, argv)
|
||||||
|
|
||||||
|
// INTERNAL, DO NOT USE
|
||||||
|
inline void log_dump_cmdline_impl(int argc, char **argv)
|
||||||
|
{
|
||||||
|
std::stringstream buf;
|
||||||
|
for (int i = 0; i < argc; ++i)
|
||||||
|
{
|
||||||
|
if (std::string(argv[i]).find(' ') != std::string::npos)
|
||||||
|
{
|
||||||
|
buf << " \"" << argv[i] <<"\"";
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
buf << " " << argv[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LOGLN("Cmd:%s", buf.str().c_str());
|
||||||
|
}
|
||||||
|
|
||||||
|
#define log_tostr(var) log_var_to_string_impl(var).c_str()
|
||||||
|
|
||||||
|
inline std::string log_var_to_string_impl(bool var)
|
||||||
|
{
|
||||||
|
return var ? "true" : "false";
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string log_var_to_string_impl(std::string var)
|
||||||
|
{
|
||||||
|
return var;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline std::string log_var_to_string_impl(const std::vector<int> & var)
|
||||||
|
{
|
||||||
|
std::stringstream buf;
|
||||||
|
buf << "[ ";
|
||||||
|
bool first = true;
|
||||||
|
for (auto e : var)
|
||||||
|
{
|
||||||
|
if (first)
|
||||||
|
{
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
buf << ", ";
|
||||||
|
}
|
||||||
|
buf << std::to_string(e);
|
||||||
|
}
|
||||||
|
buf << " ]";
|
||||||
|
|
||||||
|
return buf.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename C, typename T>
|
||||||
|
inline std::string LOG_TOKENS_TOSTR_PRETTY(const C & ctx, const T & tokens)
|
||||||
|
{
|
||||||
|
std::stringstream buf;
|
||||||
|
buf << "[ ";
|
||||||
|
|
||||||
|
bool first = true;
|
||||||
|
for (const auto &token : tokens)
|
||||||
|
{
|
||||||
|
if (!first) {
|
||||||
|
buf << ", ";
|
||||||
|
} else {
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto detokenized = llama_token_to_piece(ctx, token);
|
||||||
|
|
||||||
|
detokenized.erase(
|
||||||
|
std::remove_if(
|
||||||
|
detokenized.begin(),
|
||||||
|
detokenized.end(),
|
||||||
|
[](const unsigned char c) { return !std::isprint(c); }),
|
||||||
|
detokenized.end());
|
||||||
|
|
||||||
|
buf
|
||||||
|
<< "'" << detokenized << "'"
|
||||||
|
<< ":" << std::to_string(token);
|
||||||
|
}
|
||||||
|
buf << " ]";
|
||||||
|
|
||||||
|
return buf.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename C, typename B>
|
||||||
|
inline std::string LOG_BATCH_TOSTR_PRETTY(const C & ctx, const B & batch)
|
||||||
|
{
|
||||||
|
std::stringstream buf;
|
||||||
|
buf << "[ ";
|
||||||
|
|
||||||
|
bool first = true;
|
||||||
|
for (int i = 0; i < batch.n_tokens; ++i)
|
||||||
|
{
|
||||||
|
if (!first) {
|
||||||
|
buf << ", ";
|
||||||
|
} else {
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto detokenized = llama_token_to_piece(ctx, batch.token[i]);
|
||||||
|
|
||||||
|
detokenized.erase(
|
||||||
|
std::remove_if(
|
||||||
|
detokenized.begin(),
|
||||||
|
detokenized.end(),
|
||||||
|
[](const unsigned char c) { return !std::isprint(c); }),
|
||||||
|
detokenized.end());
|
||||||
|
|
||||||
|
buf
|
||||||
|
<< "\n" << std::to_string(i)
|
||||||
|
<< ":token '" << detokenized << "'"
|
||||||
|
<< ":pos " << std::to_string(batch.pos[i])
|
||||||
|
<< ":n_seq_id " << std::to_string(batch.n_seq_id[i])
|
||||||
|
<< ":seq_id " << std::to_string(batch.seq_id[i][0])
|
||||||
|
<< ":logits " << std::to_string(batch.logits[i]);
|
||||||
|
}
|
||||||
|
buf << " ]";
|
||||||
|
|
||||||
|
return buf.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef LOG_DISABLE_LOGS
|
||||||
|
|
||||||
|
#undef LOG
|
||||||
|
#define LOG(...) // dummy stub
|
||||||
|
#undef LOGLN
|
||||||
|
#define LOGLN(...) // dummy stub
|
||||||
|
|
||||||
|
#undef LOG_TEE
|
||||||
|
#define LOG_TEE(...) fprintf(stderr, __VA_ARGS__) // convert to normal fprintf
|
||||||
|
|
||||||
|
#undef LOG_TEELN
|
||||||
|
#define LOG_TEELN(...) fprintf(stderr, __VA_ARGS__) // convert to normal fprintf
|
||||||
|
|
||||||
|
#undef LOG_DISABLE
|
||||||
|
#define LOG_DISABLE() // dummy stub
|
||||||
|
|
||||||
|
#undef LOG_ENABLE
|
||||||
|
#define LOG_ENABLE() // dummy stub
|
||||||
|
|
||||||
|
#undef LOG_ENABLE
|
||||||
|
#define LOG_ENABLE() // dummy stub
|
||||||
|
|
||||||
|
#undef LOG_SET_TARGET
|
||||||
|
#define LOG_SET_TARGET(...) // dummy stub
|
||||||
|
|
||||||
|
#undef LOG_DUMP_CMDLINE
|
||||||
|
#define LOG_DUMP_CMDLINE(...) // dummy stub
|
||||||
|
|
||||||
|
#endif // LOG_DISABLE_LOGS
|
||||||
|
|
2883
common/minja.hpp
2883
common/minja.hpp
File diff suppressed because it is too large
Load diff
|
@ -2,13 +2,10 @@
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
#include "log.h"
|
#include "log.h"
|
||||||
|
|
||||||
#include <cinttypes>
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <cstdio>
|
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <thread>
|
|
||||||
|
|
||||||
void common_ngram_cache_update(common_ngram_cache & ngram_cache, int ngram_min, int ngram_max,
|
void llama_ngram_cache_update(llama_ngram_cache & ngram_cache, int ngram_min, int ngram_max,
|
||||||
std::vector<llama_token> & inp, int nnew, bool print_progress) {
|
std::vector<llama_token> & inp, int nnew, bool print_progress) {
|
||||||
const int64_t t_start_ms = ggml_time_ms();
|
const int64_t t_start_ms = ggml_time_ms();
|
||||||
const int64_t inp_size = inp.size();
|
const int64_t inp_size = inp.size();
|
||||||
|
@ -20,16 +17,16 @@ void common_ngram_cache_update(common_ngram_cache & ngram_cache, int ngram_min,
|
||||||
const int64_t i_start = std::max(inp_size - nnew, ngram_size);
|
const int64_t i_start = std::max(inp_size - nnew, ngram_size);
|
||||||
for (int64_t i = i_start; i < inp_size; ++i) {
|
for (int64_t i = i_start; i < inp_size; ++i) {
|
||||||
const int64_t ngram_start = i - ngram_size;
|
const int64_t ngram_start = i - ngram_size;
|
||||||
common_ngram ngram(&inp[ngram_start], ngram_size);
|
llama_ngram ngram(&inp[ngram_start], ngram_size);
|
||||||
const llama_token token = inp[i];
|
const llama_token token = inp[i];
|
||||||
|
|
||||||
common_ngram_cache::iterator part_it = ngram_cache.find(ngram);
|
llama_ngram_cache::iterator part_it = ngram_cache.find(ngram);
|
||||||
if (part_it == ngram_cache.end()) {
|
if (part_it == ngram_cache.end()) {
|
||||||
common_ngram_cache_part part;
|
llama_ngram_cache_part part;
|
||||||
part.emplace(token, 1);
|
part.emplace(token, 1);
|
||||||
ngram_cache.emplace(ngram, part);
|
ngram_cache.emplace(ngram, part);
|
||||||
} else {
|
} else {
|
||||||
common_ngram_cache_part::iterator token_count_it = part_it->second.find(token);
|
llama_ngram_cache_part::iterator token_count_it = part_it->second.find(token);
|
||||||
if (token_count_it == part_it->second.end()) {
|
if (token_count_it == part_it->second.end()) {
|
||||||
part_it->second.emplace(token, 1);
|
part_it->second.emplace(token, 1);
|
||||||
} else {
|
} else {
|
||||||
|
@ -62,16 +59,16 @@ constexpr int draft_min_sample_size_strict[LLAMA_NGRAM_MAX] = { 4, 3, 2, 2};
|
||||||
constexpr int draft_min_percent_strict[LLAMA_NGRAM_MAX] = {75, 66, 66, 66};
|
constexpr int draft_min_percent_strict[LLAMA_NGRAM_MAX] = {75, 66, 66, 66};
|
||||||
|
|
||||||
// Helper function that tries to draft a token from only the static ngram cache:
|
// Helper function that tries to draft a token from only the static ngram cache:
|
||||||
static llama_token try_draft(common_ngram_cache & nc_static, const common_ngram ngram_static) {
|
static llama_token try_draft(llama_ngram_cache & nc_static, const llama_ngram ngram_static) {
|
||||||
common_ngram_cache::iterator part_static_it = nc_static.find(ngram_static);
|
llama_ngram_cache::iterator part_static_it = nc_static.find(ngram_static);
|
||||||
if (part_static_it == nc_static.end()) {
|
if (part_static_it == nc_static.end()) {
|
||||||
return LLAMA_TOKEN_NULL;
|
return -1;
|
||||||
}
|
}
|
||||||
const common_ngram_cache_part part_static = part_static_it->second;
|
const llama_ngram_cache_part part_static = part_static_it->second;
|
||||||
|
|
||||||
int max_count_static = 0;
|
int max_count_static = 0;
|
||||||
int sum_count_static = 0;
|
int sum_count_static = 0;
|
||||||
llama_token max_token = LLAMA_TOKEN_NULL;
|
llama_token max_token = -1;
|
||||||
|
|
||||||
for (std::pair<llama_token, int> token_count_static : part_static) {
|
for (std::pair<llama_token, int> token_count_static : part_static) {
|
||||||
const llama_token token = token_count_static.first;
|
const llama_token token = token_count_static.first;
|
||||||
|
@ -85,39 +82,39 @@ static llama_token try_draft(common_ngram_cache & nc_static, const common_ngram
|
||||||
}
|
}
|
||||||
|
|
||||||
if (sum_count_static < draft_min_sample_size_lax[LLAMA_NGRAM_STATIC-1]) {
|
if (sum_count_static < draft_min_sample_size_lax[LLAMA_NGRAM_STATIC-1]) {
|
||||||
return LLAMA_TOKEN_NULL;
|
return -1;
|
||||||
}
|
}
|
||||||
if (100*max_count_static < draft_min_percent_lax[LLAMA_NGRAM_STATIC-1]*sum_count_static) {
|
if (100*max_count_static < draft_min_percent_lax[LLAMA_NGRAM_STATIC-1]*sum_count_static) {
|
||||||
return LLAMA_TOKEN_NULL;
|
return -1;
|
||||||
}
|
}
|
||||||
return max_token;
|
return max_token;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Try to draft a token from primary cache (context/dynamic), validate with static cache:
|
// Try to draft a token from primary cache (context/dynamic), validate with static cache:
|
||||||
static llama_token try_draft(
|
static llama_token try_draft(
|
||||||
common_ngram_cache & nc_primary, const std::vector<common_ngram> & ngrams_primary, common_ngram_cache_part & part_static,
|
llama_ngram_cache & nc_primary, const std::vector<llama_ngram> & ngrams_primary, llama_ngram_cache_part & part_static,
|
||||||
const int * min_sample_size, const int * min_percent) {
|
const int * min_sample_size, const int * min_percent) {
|
||||||
|
|
||||||
llama_token drafted_token = LLAMA_TOKEN_NULL;
|
llama_token drafted_token = -1;
|
||||||
|
|
||||||
for (int i = ngrams_primary.size()-1; i >= 0 && drafted_token == LLAMA_TOKEN_NULL; --i) {
|
for (int i = ngrams_primary.size()-1; i >= 0 && drafted_token == -1; --i) {
|
||||||
const common_ngram ngram_primary = ngrams_primary[i];
|
const llama_ngram ngram_primary = ngrams_primary[i];
|
||||||
|
|
||||||
common_ngram_cache::iterator part_primary_it = nc_primary.find(ngram_primary);
|
llama_ngram_cache::iterator part_primary_it = nc_primary.find(ngram_primary);
|
||||||
if (part_primary_it == nc_primary.end()) {
|
if (part_primary_it == nc_primary.end()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
const common_ngram_cache_part part_primary = part_primary_it->second;
|
const llama_ngram_cache_part part_primary = part_primary_it->second;
|
||||||
|
|
||||||
int max_count_primary = 0;
|
int max_count_primary = 0;
|
||||||
int max_count_static = 0;
|
int max_count_static = 0;
|
||||||
int sum_count_primary = 0;
|
int sum_count_primary = 0;
|
||||||
llama_token max_token = LLAMA_TOKEN_NULL;
|
llama_token max_token = -1;
|
||||||
|
|
||||||
for (std::pair<llama_token, int> token_count_primary : part_primary) {
|
for (std::pair<llama_token, int> token_count_primary : part_primary) {
|
||||||
const llama_token token = token_count_primary.first;
|
const llama_token token = token_count_primary.first;
|
||||||
|
|
||||||
common_ngram_cache_part::iterator token_count_static_it = part_static.find(token);
|
llama_ngram_cache_part::iterator token_count_static_it = part_static.find(token);
|
||||||
|
|
||||||
const int32_t count_primary = token_count_primary.second;
|
const int32_t count_primary = token_count_primary.second;
|
||||||
const int32_t count_static = token_count_static_it != part_static.end() ? 100*token_count_static_it->second : 1;
|
const int32_t count_static = token_count_static_it != part_static.end() ? 100*token_count_static_it->second : 1;
|
||||||
|
@ -142,9 +139,9 @@ static llama_token try_draft(
|
||||||
return drafted_token;
|
return drafted_token;
|
||||||
}
|
}
|
||||||
|
|
||||||
void common_ngram_cache_draft(
|
void llama_ngram_cache_draft(
|
||||||
std::vector<llama_token> & inp, std::vector<llama_token> & draft, int n_draft, int ngram_min, int ngram_max,
|
std::vector<llama_token> & inp, std::vector<llama_token> & draft, int n_draft, int ngram_min, int ngram_max,
|
||||||
common_ngram_cache & nc_context, common_ngram_cache & nc_dynamic, common_ngram_cache & nc_static
|
llama_ngram_cache & nc_context, llama_ngram_cache & nc_dynamic, llama_ngram_cache & nc_static
|
||||||
) {
|
) {
|
||||||
GGML_ASSERT(draft.size() == 1);
|
GGML_ASSERT(draft.size() == 1);
|
||||||
const int inp_size = inp.size();
|
const int inp_size = inp.size();
|
||||||
|
@ -154,40 +151,40 @@ void common_ngram_cache_draft(
|
||||||
}
|
}
|
||||||
|
|
||||||
while ((int) draft.size()-1 < n_draft) {
|
while ((int) draft.size()-1 < n_draft) {
|
||||||
llama_token drafted_token = LLAMA_TOKEN_NULL;
|
llama_token drafted_token = -1;
|
||||||
|
|
||||||
const int ngram_start_static = inp_size-LLAMA_NGRAM_STATIC + draft.size()-1;
|
const int ngram_start_static = inp_size-LLAMA_NGRAM_STATIC + draft.size()-1;
|
||||||
common_ngram ngram_static;
|
llama_ngram ngram_static;
|
||||||
for (int j = ngram_start_static; j < ngram_start_static + LLAMA_NGRAM_STATIC; ++j) {
|
for (int j = ngram_start_static; j < ngram_start_static + LLAMA_NGRAM_STATIC; ++j) {
|
||||||
ngram_static.tokens[j-ngram_start_static] = get_token(inp, draft, j);
|
ngram_static.tokens[j-ngram_start_static] = get_token(inp, draft, j);
|
||||||
}
|
}
|
||||||
common_ngram_cache::iterator part_static_it = nc_static.find(ngram_static);
|
llama_ngram_cache::iterator part_static_it = nc_static.find(ngram_static);
|
||||||
common_ngram_cache_part part_static;
|
llama_ngram_cache_part part_static;
|
||||||
if (part_static_it != nc_static.end()) {
|
if (part_static_it != nc_static.end()) {
|
||||||
part_static = part_static_it->second;
|
part_static = part_static_it->second;
|
||||||
}
|
}
|
||||||
|
|
||||||
// cd = context + dynamic
|
// cd = context + dynamic
|
||||||
std::vector<common_ngram> ngrams_cd;
|
std::vector<llama_ngram> ngrams_cd;
|
||||||
for (int ngram_size_cd = ngram_min; ngram_size_cd <= ngram_max; ++ngram_size_cd) {
|
for (int ngram_size_cd = ngram_min; ngram_size_cd <= ngram_max; ++ngram_size_cd) {
|
||||||
const int ngram_start_cd = inp_size-ngram_size_cd + draft.size()-1;
|
const int ngram_start_cd = inp_size-ngram_size_cd + draft.size()-1;
|
||||||
common_ngram ngram_cd;
|
llama_ngram ngram_cd;
|
||||||
for (int j = ngram_start_cd; j < ngram_start_cd + ngram_size_cd; ++j) {
|
for (int j = ngram_start_cd; j < ngram_start_cd + ngram_size_cd; ++j) {
|
||||||
ngram_cd.tokens[j-ngram_start_cd] = get_token(inp, draft, j);
|
ngram_cd.tokens[j-ngram_start_cd] = get_token(inp, draft, j);
|
||||||
}
|
}
|
||||||
ngrams_cd.push_back(ngram_cd);
|
ngrams_cd.push_back(ngram_cd);
|
||||||
}
|
}
|
||||||
if (drafted_token == LLAMA_TOKEN_NULL) {
|
if (drafted_token == -1) {
|
||||||
drafted_token = try_draft(nc_context, ngrams_cd, part_static, draft_min_sample_size_lax, draft_min_percent_lax);
|
drafted_token = try_draft(nc_context, ngrams_cd, part_static, draft_min_sample_size_lax, draft_min_percent_lax);
|
||||||
}
|
}
|
||||||
if (drafted_token == LLAMA_TOKEN_NULL) {
|
if (drafted_token == -1) {
|
||||||
drafted_token = try_draft(nc_dynamic, ngrams_cd, part_static, draft_min_sample_size_strict, draft_min_percent_strict);
|
drafted_token = try_draft(nc_dynamic, ngrams_cd, part_static, draft_min_sample_size_strict, draft_min_percent_strict);
|
||||||
}
|
}
|
||||||
if (drafted_token == LLAMA_TOKEN_NULL) {
|
if (drafted_token == -1) {
|
||||||
drafted_token = try_draft(nc_static, ngram_static);
|
drafted_token = try_draft(nc_static, ngram_static);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (drafted_token == LLAMA_TOKEN_NULL) {
|
if (drafted_token == -1) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -196,16 +193,16 @@ void common_ngram_cache_draft(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void common_ngram_cache_save(common_ngram_cache & ngram_cache, std::string & filename) {
|
void llama_ngram_cache_save(llama_ngram_cache & ngram_cache, std::string & filename) {
|
||||||
std::ofstream file_out(filename, std::ios::binary);
|
std::ofstream file_out(filename, std::ios::binary);
|
||||||
for (std::pair<common_ngram, common_ngram_cache_part> item : ngram_cache) {
|
for (std::pair<llama_ngram, llama_ngram_cache_part> item : ngram_cache) {
|
||||||
const common_ngram ngram = item.first;
|
const llama_ngram ngram = item.first;
|
||||||
common_ngram_cache_part token_counts = item.second;
|
llama_ngram_cache_part token_counts = item.second;
|
||||||
GGML_ASSERT(!token_counts.empty());
|
GGML_ASSERT(!token_counts.empty());
|
||||||
const int32_t ntokens = token_counts.size();
|
const int32_t ntokens = token_counts.size();
|
||||||
GGML_ASSERT(ntokens > 0);
|
GGML_ASSERT(ntokens > 0);
|
||||||
|
|
||||||
file_out.write(reinterpret_cast<const char *>(&ngram), sizeof(common_ngram));
|
file_out.write(reinterpret_cast<const char *>(&ngram), sizeof(llama_ngram));
|
||||||
file_out.write(reinterpret_cast<const char *>(&ntokens), sizeof(int32_t));
|
file_out.write(reinterpret_cast<const char *>(&ntokens), sizeof(int32_t));
|
||||||
for (std::pair<llama_token, int32_t> item2 : token_counts) {
|
for (std::pair<llama_token, int32_t> item2 : token_counts) {
|
||||||
const llama_token token = item2.first;
|
const llama_token token = item2.first;
|
||||||
|
@ -219,14 +216,14 @@ void common_ngram_cache_save(common_ngram_cache & ngram_cache, std::string & fil
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
common_ngram_cache common_ngram_cache_load(std::string & filename) {
|
llama_ngram_cache llama_ngram_cache_load(std::string & filename) {
|
||||||
std::ifstream hashmap_file(filename, std::ios::binary);
|
std::ifstream hashmap_file(filename, std::ios::binary);
|
||||||
if (!hashmap_file) {
|
if (!hashmap_file) {
|
||||||
throw std::ifstream::failure("Unable to open file " + filename);
|
throw std::ifstream::failure("Unable to open file " + filename);
|
||||||
}
|
}
|
||||||
common_ngram_cache ngram_cache;
|
llama_ngram_cache ngram_cache;
|
||||||
|
|
||||||
common_ngram ngram;
|
llama_ngram ngram;
|
||||||
int32_t ntokens;
|
int32_t ntokens;
|
||||||
llama_token token;
|
llama_token token;
|
||||||
int32_t count;
|
int32_t count;
|
||||||
|
@ -235,11 +232,11 @@ common_ngram_cache common_ngram_cache_load(std::string & filename) {
|
||||||
char * ntokensc = reinterpret_cast<char*>(&ntokens);
|
char * ntokensc = reinterpret_cast<char*>(&ntokens);
|
||||||
char * tokenc = reinterpret_cast<char*>(&token);
|
char * tokenc = reinterpret_cast<char*>(&token);
|
||||||
char * countc = reinterpret_cast<char*>(&count);
|
char * countc = reinterpret_cast<char*>(&count);
|
||||||
while(hashmap_file.read(ngramc, sizeof(common_ngram))) {
|
while(hashmap_file.read(ngramc, sizeof(llama_ngram))) {
|
||||||
GGML_ASSERT(!hashmap_file.eof());
|
GGML_ASSERT(!hashmap_file.eof());
|
||||||
GGML_ASSERT(hashmap_file.read(ntokensc, sizeof(int32_t)));
|
GGML_ASSERT(hashmap_file.read(ntokensc, sizeof(int32_t)));
|
||||||
GGML_ASSERT(ntokens > 0);
|
GGML_ASSERT(ntokens > 0);
|
||||||
common_ngram_cache_part token_counts;
|
llama_ngram_cache_part token_counts;
|
||||||
|
|
||||||
for (int i = 0; i < ntokens; ++i) {
|
for (int i = 0; i < ntokens; ++i) {
|
||||||
GGML_ASSERT(!hashmap_file.eof());
|
GGML_ASSERT(!hashmap_file.eof());
|
||||||
|
@ -257,12 +254,12 @@ common_ngram_cache common_ngram_cache_load(std::string & filename) {
|
||||||
return ngram_cache;
|
return ngram_cache;
|
||||||
}
|
}
|
||||||
|
|
||||||
void common_ngram_cache_merge(common_ngram_cache & ngram_cache_target, common_ngram_cache & ngram_cache_add) {
|
void llama_ngram_cache_merge(llama_ngram_cache & ngram_cache_target, llama_ngram_cache & ngram_cache_add) {
|
||||||
for (std::pair<common_ngram, common_ngram_cache_part> ngram_part : ngram_cache_add) {
|
for (std::pair<llama_ngram, llama_ngram_cache_part> ngram_part : ngram_cache_add) {
|
||||||
const common_ngram ngram = ngram_part.first;
|
const llama_ngram ngram = ngram_part.first;
|
||||||
common_ngram_cache_part part = ngram_part.second;
|
llama_ngram_cache_part part = ngram_part.second;
|
||||||
|
|
||||||
common_ngram_cache::iterator part_merged_it = ngram_cache_target.find(ngram);
|
llama_ngram_cache::iterator part_merged_it = ngram_cache_target.find(ngram);
|
||||||
if (part_merged_it == ngram_cache_target.end()) {
|
if (part_merged_it == ngram_cache_target.end()) {
|
||||||
ngram_cache_target.emplace(ngram, part);
|
ngram_cache_target.emplace(ngram, part);
|
||||||
continue;
|
continue;
|
||||||
|
@ -273,7 +270,7 @@ void common_ngram_cache_merge(common_ngram_cache & ngram_cache_target, common_ng
|
||||||
const int32_t count = token_count.second;
|
const int32_t count = token_count.second;
|
||||||
GGML_ASSERT(count > 0);
|
GGML_ASSERT(count > 0);
|
||||||
|
|
||||||
common_ngram_cache_part::iterator token_count_merged_it = part_merged_it->second.find(token);
|
llama_ngram_cache_part::iterator token_count_merged_it = part_merged_it->second.find(token);
|
||||||
if (token_count_merged_it == part_merged_it->second.end()) {
|
if (token_count_merged_it == part_merged_it->second.end()) {
|
||||||
part_merged_it->second.emplace(token, count);
|
part_merged_it->second.emplace(token, count);
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -12,22 +12,22 @@
|
||||||
|
|
||||||
// Data structures to map n-grams to empirical token probabilities:
|
// Data structures to map n-grams to empirical token probabilities:
|
||||||
|
|
||||||
struct common_ngram {
|
struct llama_ngram {
|
||||||
llama_token tokens[LLAMA_NGRAM_MAX];
|
llama_token tokens[LLAMA_NGRAM_MAX];
|
||||||
|
|
||||||
common_ngram() {
|
llama_ngram() {
|
||||||
for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
|
for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
|
||||||
tokens[i] = LLAMA_TOKEN_NULL;
|
tokens[i] = -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
common_ngram(const llama_token * input, const int ngram_size) {
|
llama_ngram(const llama_token * input, const int ngram_size) {
|
||||||
for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
|
for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
|
||||||
tokens[i] = i < ngram_size ? input[i] : LLAMA_TOKEN_NULL;
|
tokens[i] = i < ngram_size ? input[i] : -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool operator==(const common_ngram & other) const {
|
bool operator==(const llama_ngram & other) const {
|
||||||
for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
|
for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
|
||||||
if (tokens[i] != other.tokens[i]) {
|
if (tokens[i] != other.tokens[i]) {
|
||||||
return false;
|
return false;
|
||||||
|
@ -37,28 +37,21 @@ struct common_ngram {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct common_token_hash_function {
|
struct llama_ngram_hash_function {
|
||||||
size_t operator()(const llama_token token) const {
|
size_t operator()(const llama_ngram & ngram) const {
|
||||||
// see https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
|
size_t hash = 0;
|
||||||
return token * 11400714819323198485llu;
|
for (int i = 0; i < LLAMA_NGRAM_MAX; ++i) {
|
||||||
}
|
hash ^= std::hash<llama_token>{}(ngram.tokens[i]);
|
||||||
};
|
|
||||||
|
|
||||||
struct common_ngram_hash_function {
|
|
||||||
size_t operator()(const common_ngram & ngram) const {
|
|
||||||
size_t hash = common_token_hash_function{}(ngram.tokens[0]);
|
|
||||||
for (int i = 1; i < LLAMA_NGRAM_MAX; ++i) {
|
|
||||||
hash ^= common_token_hash_function{}(ngram.tokens[i]);
|
|
||||||
}
|
}
|
||||||
return hash;
|
return hash;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
// token -> number of times token has been seen
|
// token -> number of times token has been seen
|
||||||
typedef std::unordered_map<llama_token, int32_t> common_ngram_cache_part;
|
typedef std::unordered_map<llama_token, int32_t> llama_ngram_cache_part;
|
||||||
|
|
||||||
// n-gram -> empirical distribution of following tokens
|
// n-gram -> empirical distribution of following tokens
|
||||||
typedef std::unordered_map<common_ngram, common_ngram_cache_part, common_ngram_hash_function> common_ngram_cache;
|
typedef std::unordered_map<llama_ngram, llama_ngram_cache_part, llama_ngram_hash_function> llama_ngram_cache;
|
||||||
|
|
||||||
|
|
||||||
// Update an ngram cache with tokens.
|
// Update an ngram cache with tokens.
|
||||||
|
@ -70,8 +63,8 @@ typedef std::unordered_map<common_ngram, common_ngram_cache_part, common_ngram_h
|
||||||
//
|
//
|
||||||
// In order to get correct results inp_data can ONLY BE APPENDED TO.
|
// In order to get correct results inp_data can ONLY BE APPENDED TO.
|
||||||
// Changes in the middle need a complete rebuild.
|
// Changes in the middle need a complete rebuild.
|
||||||
void common_ngram_cache_update(
|
void llama_ngram_cache_update(
|
||||||
common_ngram_cache & ngram_cache, int ngram_min, int ngram_max, std::vector<llama_token> & inp_data, int nnew, bool print_progress);
|
llama_ngram_cache & ngram_cache, int ngram_min, int ngram_max, std::vector<llama_token> & inp_data, int nnew, bool print_progress);
|
||||||
|
|
||||||
// Try to draft tokens from ngram caches.
|
// Try to draft tokens from ngram caches.
|
||||||
// inp: the tokens generated so far.
|
// inp: the tokens generated so far.
|
||||||
|
@ -81,21 +74,21 @@ void common_ngram_cache_update(
|
||||||
// nc_context: ngram cache based on current context.
|
// nc_context: ngram cache based on current context.
|
||||||
// nc_dynamic: ngram cache based on previous user generations.
|
// nc_dynamic: ngram cache based on previous user generations.
|
||||||
// nc_static: ngram cache generated from a large text corpus, used for validation.
|
// nc_static: ngram cache generated from a large text corpus, used for validation.
|
||||||
void common_ngram_cache_draft(
|
void llama_ngram_cache_draft(
|
||||||
std::vector<llama_token> & inp, std::vector<llama_token> & draft, int n_draft, int ngram_min, int ngram_max,
|
std::vector<llama_token> & inp, std::vector<llama_token> & draft, int n_draft, int ngram_min, int ngram_max,
|
||||||
common_ngram_cache & nc_context, common_ngram_cache & nc_dynamic, common_ngram_cache & nc_static);
|
llama_ngram_cache & nc_context, llama_ngram_cache & nc_dynamic, llama_ngram_cache & nc_static);
|
||||||
|
|
||||||
// Save an ngram cache to a file.
|
// Save an ngram cache to a file.
|
||||||
// ngram_cache: the ngram cache to save.
|
// ngram_cache: the ngram cache to save.
|
||||||
// filename: the path under which to save the ngram cache.
|
// filename: the path under which to save the ngram cache.
|
||||||
void common_ngram_cache_save(common_ngram_cache & ngram_cache, std::string & filename);
|
void llama_ngram_cache_save(llama_ngram_cache & ngram_cache, std::string & filename);
|
||||||
|
|
||||||
// Load an ngram cache saved with common_ngram_cache_save.
|
// Load an ngram cache saved with llama_ngram_cache_save.
|
||||||
// filename: the path from which to load the ngram cache.
|
// filename: the path from which to load the ngram cache.
|
||||||
// returns: an ngram cache containing the information saved to filename.
|
// returns: an ngram cache containing the information saved to filename.
|
||||||
common_ngram_cache common_ngram_cache_load(std::string & filename);
|
llama_ngram_cache llama_ngram_cache_load(std::string & filename);
|
||||||
|
|
||||||
// Merge two ngram caches.
|
// Merge two ngram caches.
|
||||||
// ngram_cache_target: the ngram cache to which to add the information from ngram_cache_add.
|
// ngram_cache_target: the ngram cache to which to add the information from ngram_cache_add.
|
||||||
// ngram_cache_add: the ngram cache to add to ngram_cache_target.
|
// ngram_cache_add: the ngram cache to add to ngram_cache_target.
|
||||||
void common_ngram_cache_merge(common_ngram_cache & ngram_cache_target, common_ngram_cache & ngram_cache_add);
|
void llama_ngram_cache_merge(llama_ngram_cache & ngram_cache_target, llama_ngram_cache & ngram_cache_add);
|
||||||
|
|
|
@ -1,526 +1,459 @@
|
||||||
|
#define LLAMA_API_INTERNAL
|
||||||
#include "sampling.h"
|
#include "sampling.h"
|
||||||
|
#include <random>
|
||||||
|
|
||||||
#include "common.h"
|
struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_params & params) {
|
||||||
|
struct llama_sampling_context * result = new llama_sampling_context();
|
||||||
|
|
||||||
#include <cmath>
|
result->params = params;
|
||||||
#include <unordered_map>
|
result->grammar = nullptr;
|
||||||
|
|
||||||
// the ring buffer works similarly to std::deque, but with a fixed capacity
|
// if there is a grammar, parse it
|
||||||
// TODO: deduplicate with llama-impl.h
|
if (!params.grammar.empty()) {
|
||||||
template<typename T>
|
result->parsed_grammar = grammar_parser::parse(params.grammar.c_str());
|
||||||
struct ring_buffer {
|
|
||||||
ring_buffer(size_t cap) : capacity(cap), data(cap) {}
|
|
||||||
|
|
||||||
T & front() {
|
// will be empty (default) if there are parse errors
|
||||||
if (sz == 0) {
|
if (result->parsed_grammar.rules.empty()) {
|
||||||
throw std::runtime_error("ring buffer is empty");
|
fprintf(stderr, "%s: failed to parse grammar\n", __func__);
|
||||||
}
|
delete result;
|
||||||
return data[first];
|
return nullptr;
|
||||||
}
|
|
||||||
|
|
||||||
const T & front() const {
|
|
||||||
if (sz == 0) {
|
|
||||||
throw std::runtime_error("ring buffer is empty");
|
|
||||||
}
|
|
||||||
return data[first];
|
|
||||||
}
|
|
||||||
|
|
||||||
T & back() {
|
|
||||||
if (sz == 0) {
|
|
||||||
throw std::runtime_error("ring buffer is empty");
|
|
||||||
}
|
|
||||||
return data[pos];
|
|
||||||
}
|
|
||||||
|
|
||||||
const T & back() const {
|
|
||||||
if (sz == 0) {
|
|
||||||
throw std::runtime_error("ring buffer is empty");
|
|
||||||
}
|
|
||||||
return data[pos];
|
|
||||||
}
|
|
||||||
|
|
||||||
void push_back(const T & value) {
|
|
||||||
if (sz == capacity) {
|
|
||||||
// advance the start when buffer is full
|
|
||||||
first = (first + 1) % capacity;
|
|
||||||
} else {
|
|
||||||
sz++;
|
|
||||||
}
|
|
||||||
data[pos] = value;
|
|
||||||
pos = (pos + 1) % capacity;
|
|
||||||
}
|
|
||||||
|
|
||||||
T pop_front() {
|
|
||||||
if (sz == 0) {
|
|
||||||
throw std::runtime_error("ring buffer is empty");
|
|
||||||
}
|
|
||||||
T value = data[first];
|
|
||||||
first = (first + 1) % capacity;
|
|
||||||
sz--;
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
const T & rat(size_t i) const {
|
|
||||||
if (i >= sz) {
|
|
||||||
throw std::runtime_error("ring buffer: index out of bounds");
|
|
||||||
}
|
|
||||||
return data[(first + sz - i - 1) % capacity];
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<T> to_vector() const {
|
|
||||||
std::vector<T> result;
|
|
||||||
result.reserve(sz);
|
|
||||||
for (size_t i = 0; i < sz; i++) {
|
|
||||||
result.push_back(data[(first + i) % capacity]);
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
void clear() {
|
|
||||||
// here only reset the status of the buffer
|
|
||||||
sz = 0;
|
|
||||||
first = 0;
|
|
||||||
pos = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool empty() const {
|
|
||||||
return sz == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t size() const {
|
|
||||||
return sz;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t capacity = 0;
|
|
||||||
size_t sz = 0;
|
|
||||||
size_t first = 0;
|
|
||||||
size_t pos = 0;
|
|
||||||
std::vector<T> data;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct common_sampler {
|
|
||||||
common_params_sampling params;
|
|
||||||
|
|
||||||
struct llama_sampler * grmr;
|
|
||||||
struct llama_sampler * chain;
|
|
||||||
|
|
||||||
ring_buffer<llama_token> prev;
|
|
||||||
|
|
||||||
std::vector<llama_token_data> cur;
|
|
||||||
|
|
||||||
llama_token_data_array cur_p;
|
|
||||||
|
|
||||||
void set_logits(struct llama_context * ctx, int idx) {
|
|
||||||
const auto * logits = llama_get_logits_ith(ctx, idx);
|
|
||||||
|
|
||||||
const llama_model * model = llama_get_model(ctx);
|
|
||||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
|
||||||
|
|
||||||
const int n_vocab = llama_vocab_n_tokens(vocab);
|
|
||||||
|
|
||||||
cur.resize(n_vocab);
|
|
||||||
|
|
||||||
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
|
||||||
cur[token_id] = llama_token_data{token_id, logits[token_id], 0.0f};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
cur_p = { cur.data(), cur.size(), -1, false };
|
// Ensure that there is a "root" node.
|
||||||
}
|
if (result->parsed_grammar.symbol_ids.find("root") == result->parsed_grammar.symbol_ids.end()) {
|
||||||
};
|
fprintf(stderr, "%s: grammar does not contain a 'root' symbol\n", __func__);
|
||||||
|
delete result;
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
std::string common_params_sampling::print() const {
|
std::vector<const llama_grammar_element *> grammar_rules(result->parsed_grammar.c_rules());
|
||||||
|
|
||||||
|
struct llama_grammar * grammar = llama_grammar_init(
|
||||||
|
grammar_rules.data(),
|
||||||
|
grammar_rules.size(), result->parsed_grammar.symbol_ids.at("root"));
|
||||||
|
if (grammar == nullptr) {
|
||||||
|
throw std::runtime_error("Failed to initialize llama_grammar");
|
||||||
|
}
|
||||||
|
result->grammar = grammar;
|
||||||
|
}
|
||||||
|
|
||||||
|
result->prev.resize(params.n_prev);
|
||||||
|
|
||||||
|
result->n_valid = 0;
|
||||||
|
|
||||||
|
llama_sampling_set_rng_seed(result, params.seed);
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
void llama_sampling_free(struct llama_sampling_context * ctx) {
|
||||||
|
if (ctx->grammar != NULL) {
|
||||||
|
llama_grammar_free(ctx->grammar);
|
||||||
|
}
|
||||||
|
|
||||||
|
delete ctx;
|
||||||
|
}
|
||||||
|
|
||||||
|
void llama_sampling_reset(llama_sampling_context * ctx) {
|
||||||
|
if (ctx->grammar != NULL) {
|
||||||
|
llama_grammar_free(ctx->grammar);
|
||||||
|
ctx->grammar = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!ctx->parsed_grammar.rules.empty()) {
|
||||||
|
std::vector<const llama_grammar_element *> grammar_rules(ctx->parsed_grammar.c_rules());
|
||||||
|
|
||||||
|
struct llama_grammar * grammar = llama_grammar_init(
|
||||||
|
grammar_rules.data(),
|
||||||
|
grammar_rules.size(), ctx->parsed_grammar.symbol_ids.at("root"));
|
||||||
|
if (grammar == nullptr) {
|
||||||
|
throw std::runtime_error("Failed to initialize llama_grammar");
|
||||||
|
}
|
||||||
|
ctx->grammar = grammar;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::fill(ctx->prev.begin(), ctx->prev.end(), 0);
|
||||||
|
ctx->cur.clear();
|
||||||
|
ctx->n_valid = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void llama_sampling_set_rng_seed(struct llama_sampling_context * ctx, uint32_t seed) {
|
||||||
|
if (seed == LLAMA_DEFAULT_SEED) {
|
||||||
|
seed = std::random_device{}();
|
||||||
|
}
|
||||||
|
ctx->rng.seed(seed);
|
||||||
|
}
|
||||||
|
|
||||||
|
void llama_sampling_cp(llama_sampling_context * src, llama_sampling_context * dst) {
|
||||||
|
if (dst->grammar) {
|
||||||
|
llama_grammar_free(dst->grammar);
|
||||||
|
dst->grammar = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (src->grammar) {
|
||||||
|
dst->grammar = llama_grammar_copy(src->grammar);
|
||||||
|
}
|
||||||
|
|
||||||
|
dst->prev = src->prev;
|
||||||
|
}
|
||||||
|
|
||||||
|
llama_token llama_sampling_last(llama_sampling_context * ctx) {
|
||||||
|
return ctx->prev.back();
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string llama_sampling_prev_str(llama_sampling_context * ctx_sampling, llama_context * ctx_main, int n) {
|
||||||
|
const int size = ctx_sampling->prev.size();
|
||||||
|
|
||||||
|
n = std::min(n, size);
|
||||||
|
|
||||||
|
std::string result;
|
||||||
|
|
||||||
|
for (int i = size - n; i < size; i++) {
|
||||||
|
result += llama_token_to_piece(ctx_main, ctx_sampling->prev[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string llama_sampling_print(const llama_sampling_params & params) {
|
||||||
char result[1024];
|
char result[1024];
|
||||||
|
|
||||||
snprintf(result, sizeof(result),
|
snprintf(result, sizeof(result),
|
||||||
"\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
|
"\trepeat_last_n = %d, repeat_penalty = %.3f, frequency_penalty = %.3f, presence_penalty = %.3f\n"
|
||||||
"\tdry_multiplier = %.3f, dry_base = %.3f, dry_allowed_length = %d, dry_penalty_last_n = %d\n"
|
"\ttop_k = %d, tfs_z = %.3f, top_p = %.3f, min_p = %.3f, typical_p = %.3f, temp = %.3f\n"
|
||||||
"\ttop_k = %d, top_p = %.3f, min_p = %.3f, xtc_probability = %.3f, xtc_threshold = %.3f, typical_p = %.3f, temp = %.3f\n"
|
|
||||||
"\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f",
|
"\tmirostat = %d, mirostat_lr = %.3f, mirostat_ent = %.3f",
|
||||||
penalty_last_n, penalty_repeat, penalty_freq, penalty_present,
|
params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present,
|
||||||
dry_multiplier, dry_base, dry_allowed_length, dry_penalty_last_n,
|
params.top_k, params.tfs_z, params.top_p, params.min_p, params.typical_p, params.temp,
|
||||||
top_k, top_p, min_p, xtc_probability, xtc_threshold, typ_p, temp,
|
params.mirostat, params.mirostat_eta, params.mirostat_tau);
|
||||||
mirostat, mirostat_eta, mirostat_tau);
|
|
||||||
|
|
||||||
return std::string(result);
|
return std::string(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct common_sampler * common_sampler_init(const struct llama_model * model, const struct common_params_sampling & params) {
|
std::string llama_sampling_order_print(const llama_sampling_params & params) {
|
||||||
const llama_vocab * vocab = llama_model_get_vocab(model);
|
std::string result = "CFG -> Penalties ";
|
||||||
|
|
||||||
llama_sampler_chain_params lparams = llama_sampler_chain_default_params();
|
|
||||||
|
|
||||||
lparams.no_perf = params.no_perf;
|
|
||||||
|
|
||||||
std::vector<const char *> trigger_words;
|
|
||||||
trigger_words.reserve(params.grammar_trigger_words.size());
|
|
||||||
for (const auto & str : params.grammar_trigger_words) {
|
|
||||||
trigger_words.push_back(str.word.c_str());
|
|
||||||
}
|
|
||||||
|
|
||||||
struct llama_sampler * grmr;
|
|
||||||
if (params.grammar.compare(0, 11, "%llguidance") == 0) {
|
|
||||||
#ifdef LLAMA_USE_LLGUIDANCE
|
|
||||||
grmr = llama_sampler_init_llg(vocab, "lark", params.grammar.c_str());
|
|
||||||
#else
|
|
||||||
GGML_ABORT("llguidance (cmake -DLLAMA_LLGUIDANCE=ON) is not enabled");
|
|
||||||
#endif // LLAMA_USE_LLGUIDANCE
|
|
||||||
} else {
|
|
||||||
grmr = params.grammar_lazy
|
|
||||||
? llama_sampler_init_grammar_lazy(vocab, params.grammar.c_str(), "root",
|
|
||||||
trigger_words.data(), trigger_words.size(),
|
|
||||||
params.grammar_trigger_tokens.data(), params.grammar_trigger_tokens.size())
|
|
||||||
: llama_sampler_init_grammar(vocab, params.grammar.c_str(), "root");
|
|
||||||
}
|
|
||||||
|
|
||||||
auto * result = new common_sampler {
|
|
||||||
/* .params = */ params,
|
|
||||||
/* .grmr = */ grmr,
|
|
||||||
/* .chain = */ llama_sampler_chain_init(lparams),
|
|
||||||
/* .prev = */ ring_buffer<llama_token>(std::max(32, params.n_prev)),
|
|
||||||
/* .cur = */ {},
|
|
||||||
/* .cur_p = */ {},
|
|
||||||
};
|
|
||||||
|
|
||||||
llama_sampler_chain_add(result->chain,
|
|
||||||
llama_sampler_init_logit_bias(
|
|
||||||
llama_vocab_n_tokens(vocab),
|
|
||||||
params.logit_bias.size(),
|
|
||||||
params.logit_bias.data()));
|
|
||||||
|
|
||||||
if (params.mirostat == 0) {
|
if (params.mirostat == 0) {
|
||||||
for (const auto & cnstr : params.samplers) {
|
for (auto sampler_type : params.samplers_sequence) {
|
||||||
switch (cnstr) {
|
const auto sampler_type_name = llama_sampling_type_to_str(sampler_type);
|
||||||
case COMMON_SAMPLER_TYPE_DRY:
|
if (!sampler_type_name.empty()) {
|
||||||
{
|
result += "-> " + sampler_type_name + " ";
|
||||||
std::vector<const char *> c_breakers;
|
|
||||||
c_breakers.reserve(params.dry_sequence_breakers.size());
|
|
||||||
for (const auto & str : params.dry_sequence_breakers) {
|
|
||||||
c_breakers.push_back(str.c_str());
|
|
||||||
}
|
|
||||||
|
|
||||||
llama_sampler_chain_add(result->chain, llama_sampler_init_dry (vocab, llama_model_n_ctx_train(model), params.dry_multiplier, params.dry_base, params.dry_allowed_length, params.dry_penalty_last_n, c_breakers.data(), c_breakers.size()));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
case COMMON_SAMPLER_TYPE_TOP_K:
|
|
||||||
llama_sampler_chain_add(result->chain, llama_sampler_init_top_k (params.top_k));
|
|
||||||
break;
|
|
||||||
case COMMON_SAMPLER_TYPE_TOP_P:
|
|
||||||
llama_sampler_chain_add(result->chain, llama_sampler_init_top_p (params.top_p, params.min_keep));
|
|
||||||
break;
|
|
||||||
case COMMON_SAMPLER_TYPE_MIN_P:
|
|
||||||
llama_sampler_chain_add(result->chain, llama_sampler_init_min_p (params.min_p, params.min_keep));
|
|
||||||
break;
|
|
||||||
case COMMON_SAMPLER_TYPE_XTC:
|
|
||||||
llama_sampler_chain_add(result->chain, llama_sampler_init_xtc (params.xtc_probability, params.xtc_threshold, params.min_keep, params.seed));
|
|
||||||
break;
|
|
||||||
case COMMON_SAMPLER_TYPE_TYPICAL_P:
|
|
||||||
llama_sampler_chain_add(result->chain, llama_sampler_init_typical (params.typ_p, params.min_keep));
|
|
||||||
break;
|
|
||||||
case COMMON_SAMPLER_TYPE_TEMPERATURE:
|
|
||||||
llama_sampler_chain_add(result->chain, llama_sampler_init_temp_ext (params.temp, params.dynatemp_range, params.dynatemp_exponent));
|
|
||||||
break;
|
|
||||||
case COMMON_SAMPLER_TYPE_INFILL:
|
|
||||||
llama_sampler_chain_add(result->chain, llama_sampler_init_infill (vocab));
|
|
||||||
break;
|
|
||||||
case COMMON_SAMPLER_TYPE_PENALTIES:
|
|
||||||
llama_sampler_chain_add(result->chain, llama_sampler_init_penalties(params.penalty_last_n, params.penalty_repeat, params.penalty_freq, params.penalty_present));
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
GGML_ASSERT(false && "unknown sampler type");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
llama_sampler_chain_add(result->chain, llama_sampler_init_dist(params.seed));
|
|
||||||
} else if (params.mirostat == 1) {
|
|
||||||
llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
|
|
||||||
llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat(llama_vocab_n_tokens(vocab), params.seed, params.mirostat_tau, params.mirostat_eta, 100));
|
|
||||||
} else if (params.mirostat == 2) {
|
|
||||||
llama_sampler_chain_add(result->chain, llama_sampler_init_temp(params.temp));
|
|
||||||
llama_sampler_chain_add(result->chain, llama_sampler_init_mirostat_v2(params.seed, params.mirostat_tau, params.mirostat_eta));
|
|
||||||
} else {
|
} else {
|
||||||
GGML_ASSERT(false && "unknown mirostat version");
|
result += "-> mirostat ";
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void common_sampler_free(struct common_sampler * gsmpl) {
|
std::string llama_sampling_type_to_str(llama_sampler_type sampler_type) {
|
||||||
if (gsmpl) {
|
switch (sampler_type) {
|
||||||
llama_sampler_free(gsmpl->grmr);
|
case llama_sampler_type::TOP_K: return "top_k";
|
||||||
|
case llama_sampler_type::TFS_Z: return "tfs_z";
|
||||||
llama_sampler_free(gsmpl->chain);
|
case llama_sampler_type::TYPICAL_P: return "typical_p";
|
||||||
|
case llama_sampler_type::TOP_P: return "top_p";
|
||||||
delete gsmpl;
|
case llama_sampler_type::MIN_P: return "min_p";
|
||||||
}
|
case llama_sampler_type::TEMPERATURE: return "temperature";
|
||||||
}
|
|
||||||
|
|
||||||
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar) {
|
|
||||||
if (accept_grammar) {
|
|
||||||
llama_sampler_accept(gsmpl->grmr, token);
|
|
||||||
}
|
|
||||||
|
|
||||||
llama_sampler_accept(gsmpl->chain, token);
|
|
||||||
|
|
||||||
gsmpl->prev.push_back(token);
|
|
||||||
}
|
|
||||||
|
|
||||||
void common_sampler_reset(struct common_sampler * gsmpl) {
|
|
||||||
llama_sampler_reset(gsmpl->grmr);
|
|
||||||
|
|
||||||
llama_sampler_reset(gsmpl->chain);
|
|
||||||
}
|
|
||||||
|
|
||||||
struct common_sampler * common_sampler_clone(common_sampler * gsmpl) {
|
|
||||||
return new common_sampler {
|
|
||||||
/* .params = */ gsmpl->params,
|
|
||||||
/* .grmr = */ llama_sampler_clone(gsmpl->grmr),
|
|
||||||
/* .chain = */ llama_sampler_clone(gsmpl->chain),
|
|
||||||
/* .prev = */ gsmpl->prev,
|
|
||||||
/* .cur = */ gsmpl->cur,
|
|
||||||
/* .cur_p = */ gsmpl->cur_p,
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
void common_perf_print(const struct llama_context * ctx, const struct common_sampler * gsmpl) {
|
|
||||||
// TODO: measure grammar performance
|
|
||||||
|
|
||||||
if (gsmpl) {
|
|
||||||
llama_perf_sampler_print(gsmpl->chain);
|
|
||||||
}
|
|
||||||
if (ctx) {
|
|
||||||
llama_perf_context_print(ctx);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
llama_token common_sampler_sample(struct common_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first) {
|
|
||||||
gsmpl->set_logits(ctx, idx);
|
|
||||||
|
|
||||||
auto & grmr = gsmpl->grmr;
|
|
||||||
auto & chain = gsmpl->chain;
|
|
||||||
auto & cur_p = gsmpl->cur_p; // initialized by set_logits
|
|
||||||
|
|
||||||
if (grammar_first) {
|
|
||||||
llama_sampler_apply(grmr, &cur_p);
|
|
||||||
}
|
|
||||||
|
|
||||||
llama_sampler_apply(chain, &cur_p);
|
|
||||||
|
|
||||||
GGML_ASSERT(cur_p.selected != -1 && "no selected token during sampling - check your sampling configuration");
|
|
||||||
|
|
||||||
const llama_token id = cur_p.data[cur_p.selected].id;
|
|
||||||
|
|
||||||
if (grammar_first) {
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
|
|
||||||
// check if it the sampled token fits the grammar
|
|
||||||
{
|
|
||||||
llama_token_data single_token_data = { id, 1.0f, 0.0f };
|
|
||||||
llama_token_data_array single_token_data_array = { &single_token_data, 1, -1, false };
|
|
||||||
|
|
||||||
llama_sampler_apply(grmr, &single_token_data_array);
|
|
||||||
|
|
||||||
const bool is_valid = single_token_data_array.data[0].logit != -INFINITY;
|
|
||||||
if (is_valid) {
|
|
||||||
return id;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// resampling:
|
|
||||||
// if the token is not valid, sample again, but first apply the grammar sampler and then the sampling chain
|
|
||||||
gsmpl->set_logits(ctx, idx);
|
|
||||||
|
|
||||||
llama_sampler_apply(grmr, &cur_p);
|
|
||||||
llama_sampler_apply(chain, &cur_p);
|
|
||||||
|
|
||||||
GGML_ASSERT(cur_p.selected != -1 && "no selected token during re-sampling - check your sampling configuration");
|
|
||||||
|
|
||||||
return cur_p.data[cur_p.selected].id;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const std::vector<int> & idxs, const llama_tokens & draft, bool grammar_first) {
|
|
||||||
GGML_ASSERT(idxs.size() == draft.size() + 1 && "idxs.size() must be draft.size() + 1");
|
|
||||||
|
|
||||||
std::vector<llama_token> result;
|
|
||||||
result.reserve(idxs.size());
|
|
||||||
|
|
||||||
size_t i = 0;
|
|
||||||
for (; i < draft.size(); i++) {
|
|
||||||
const llama_token id = common_sampler_sample(gsmpl, ctx, idxs[i], grammar_first);
|
|
||||||
|
|
||||||
common_sampler_accept(gsmpl, id, true);
|
|
||||||
|
|
||||||
result.push_back(id);
|
|
||||||
|
|
||||||
if (draft[i] != id) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (i == draft.size()) {
|
|
||||||
const llama_token id = common_sampler_sample(gsmpl, ctx, idxs[i], grammar_first);
|
|
||||||
|
|
||||||
common_sampler_accept(gsmpl, id, true);
|
|
||||||
|
|
||||||
result.push_back(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const llama_tokens & draft, bool grammar_first) {
|
|
||||||
std::vector<int> idxs(draft.size() + 1);
|
|
||||||
for (size_t i = 0; i < idxs.size(); ++i) {
|
|
||||||
idxs[i] = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
return common_sampler_sample_and_accept_n(gsmpl, ctx, idxs, draft, grammar_first);
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t common_sampler_get_seed(const struct common_sampler * gsmpl) {
|
|
||||||
return llama_sampler_get_seed(gsmpl->chain);
|
|
||||||
}
|
|
||||||
|
|
||||||
// helpers
|
|
||||||
|
|
||||||
llama_token_data_array * common_sampler_get_candidates(struct common_sampler * gsmpl) {
|
|
||||||
return &gsmpl->cur_p;
|
|
||||||
}
|
|
||||||
|
|
||||||
llama_token common_sampler_last(const struct common_sampler * gsmpl) {
|
|
||||||
return gsmpl->prev.rat(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string common_sampler_print(const struct common_sampler * gsmpl) {
|
|
||||||
std::string result = "logits ";
|
|
||||||
|
|
||||||
for (int i = 0; i < llama_sampler_chain_n(gsmpl->chain); i++) {
|
|
||||||
const auto * smpl = llama_sampler_chain_get(gsmpl->chain, i);
|
|
||||||
result += std::string("-> ") + llama_sampler_name(smpl) + " ";
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx_main, int n) {
|
|
||||||
n = std::min(n, (int) gsmpl->prev.size());
|
|
||||||
|
|
||||||
if (n <= 0) {
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string result;
|
|
||||||
result.reserve(8*n); // 8 is the average length of a token [citation needed], TODO: compute this from the vocab
|
|
||||||
|
|
||||||
for (int i = n - 1; i >= 0; i--) {
|
|
||||||
const llama_token id = gsmpl->prev.rat(i);
|
|
||||||
|
|
||||||
GGML_ASSERT(id != LLAMA_TOKEN_NULL && "null token in the sampling history - should not happen");
|
|
||||||
|
|
||||||
result += common_token_to_piece(ctx_main, id);
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
char common_sampler_type_to_chr(enum common_sampler_type cnstr) {
|
|
||||||
switch (cnstr) {
|
|
||||||
case COMMON_SAMPLER_TYPE_DRY: return 'd';
|
|
||||||
case COMMON_SAMPLER_TYPE_TOP_K: return 'k';
|
|
||||||
case COMMON_SAMPLER_TYPE_TYPICAL_P: return 'y';
|
|
||||||
case COMMON_SAMPLER_TYPE_TOP_P: return 'p';
|
|
||||||
case COMMON_SAMPLER_TYPE_MIN_P: return 'm';
|
|
||||||
case COMMON_SAMPLER_TYPE_TEMPERATURE: return 't';
|
|
||||||
case COMMON_SAMPLER_TYPE_XTC: return 'x';
|
|
||||||
case COMMON_SAMPLER_TYPE_INFILL: return 'i';
|
|
||||||
case COMMON_SAMPLER_TYPE_PENALTIES: return 'e';
|
|
||||||
default : return '?';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::string common_sampler_type_to_str(enum common_sampler_type cnstr) {
|
|
||||||
switch (cnstr) {
|
|
||||||
case COMMON_SAMPLER_TYPE_DRY: return "dry";
|
|
||||||
case COMMON_SAMPLER_TYPE_TOP_K: return "top_k";
|
|
||||||
case COMMON_SAMPLER_TYPE_TYPICAL_P: return "typ_p";
|
|
||||||
case COMMON_SAMPLER_TYPE_TOP_P: return "top_p";
|
|
||||||
case COMMON_SAMPLER_TYPE_MIN_P: return "min_p";
|
|
||||||
case COMMON_SAMPLER_TYPE_TEMPERATURE: return "temperature";
|
|
||||||
case COMMON_SAMPLER_TYPE_XTC: return "xtc";
|
|
||||||
case COMMON_SAMPLER_TYPE_INFILL: return "infill";
|
|
||||||
case COMMON_SAMPLER_TYPE_PENALTIES: return "penalties";
|
|
||||||
default : return "";
|
default : return "";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<common_sampler_type> common_sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names) {
|
std::vector<llama_sampler_type> llama_sampling_types_from_names(const std::vector<std::string> & names, bool allow_alt_names) {
|
||||||
std::unordered_map<std::string, common_sampler_type> sampler_canonical_name_map {
|
std::unordered_map<std::string, llama_sampler_type> sampler_canonical_name_map {
|
||||||
{ "dry", COMMON_SAMPLER_TYPE_DRY },
|
{"top_k", llama_sampler_type::TOP_K},
|
||||||
{ "top_k", COMMON_SAMPLER_TYPE_TOP_K },
|
{"top_p", llama_sampler_type::TOP_P},
|
||||||
{ "top_p", COMMON_SAMPLER_TYPE_TOP_P },
|
{"typical_p", llama_sampler_type::TYPICAL_P},
|
||||||
{ "typ_p", COMMON_SAMPLER_TYPE_TYPICAL_P },
|
{"min_p", llama_sampler_type::MIN_P},
|
||||||
{ "min_p", COMMON_SAMPLER_TYPE_MIN_P },
|
{"tfs_z", llama_sampler_type::TFS_Z},
|
||||||
{ "temperature", COMMON_SAMPLER_TYPE_TEMPERATURE },
|
{"temperature", llama_sampler_type::TEMPERATURE}
|
||||||
{ "xtc", COMMON_SAMPLER_TYPE_XTC },
|
|
||||||
{ "infill", COMMON_SAMPLER_TYPE_INFILL },
|
|
||||||
{ "penalties", COMMON_SAMPLER_TYPE_PENALTIES },
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// since samplers names are written multiple ways
|
// since samplers names are written multiple ways
|
||||||
// make it ready for both system names and input names
|
// make it ready for both system names and input names
|
||||||
std::unordered_map<std::string, common_sampler_type> sampler_alt_name_map {
|
std::unordered_map<std::string, llama_sampler_type> sampler_alt_name_map {
|
||||||
{ "top-k", COMMON_SAMPLER_TYPE_TOP_K },
|
{"top-k", llama_sampler_type::TOP_K},
|
||||||
{ "top-p", COMMON_SAMPLER_TYPE_TOP_P },
|
{"top-p", llama_sampler_type::TOP_P},
|
||||||
{ "nucleus", COMMON_SAMPLER_TYPE_TOP_P },
|
{"nucleus", llama_sampler_type::TOP_P},
|
||||||
{ "typical-p", COMMON_SAMPLER_TYPE_TYPICAL_P },
|
{"typical-p", llama_sampler_type::TYPICAL_P},
|
||||||
{ "typical", COMMON_SAMPLER_TYPE_TYPICAL_P },
|
{"typical", llama_sampler_type::TYPICAL_P},
|
||||||
{ "typ-p", COMMON_SAMPLER_TYPE_TYPICAL_P },
|
{"min-p", llama_sampler_type::MIN_P},
|
||||||
{ "typ", COMMON_SAMPLER_TYPE_TYPICAL_P },
|
{"tfs-z", llama_sampler_type::TFS_Z},
|
||||||
{ "min-p", COMMON_SAMPLER_TYPE_MIN_P },
|
{"tfs", llama_sampler_type::TFS_Z},
|
||||||
{ "temp", COMMON_SAMPLER_TYPE_TEMPERATURE },
|
{"temp", llama_sampler_type::TEMPERATURE}
|
||||||
};
|
};
|
||||||
|
|
||||||
std::vector<common_sampler_type> samplers;
|
std::vector<llama_sampler_type> sampler_types;
|
||||||
samplers.reserve(names.size());
|
sampler_types.reserve(names.size());
|
||||||
|
for (const auto & name : names)
|
||||||
|
{
|
||||||
|
auto sampler_item = sampler_canonical_name_map.find(name);
|
||||||
|
if (sampler_item != sampler_canonical_name_map.end())
|
||||||
|
{
|
||||||
|
sampler_types.push_back(sampler_item->second);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (allow_alt_names)
|
||||||
|
{
|
||||||
|
sampler_item = sampler_alt_name_map.find(name);
|
||||||
|
if (sampler_item != sampler_alt_name_map.end())
|
||||||
|
{
|
||||||
|
sampler_types.push_back(sampler_item->second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sampler_types;
|
||||||
|
}
|
||||||
|
|
||||||
for (const auto & name : names) {
|
std::vector<llama_sampler_type> llama_sampling_types_from_chars(const std::string & names_string) {
|
||||||
auto sampler = sampler_canonical_name_map.find(name);
|
std::unordered_map<char, llama_sampler_type> sampler_name_map {
|
||||||
if (sampler != sampler_canonical_name_map.end()) {
|
{'k', llama_sampler_type::TOP_K},
|
||||||
samplers.push_back(sampler->second);
|
{'p', llama_sampler_type::TOP_P},
|
||||||
|
{'y', llama_sampler_type::TYPICAL_P},
|
||||||
|
{'m', llama_sampler_type::MIN_P},
|
||||||
|
{'f', llama_sampler_type::TFS_Z},
|
||||||
|
{'t', llama_sampler_type::TEMPERATURE}
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<llama_sampler_type> sampler_types;
|
||||||
|
sampler_types.reserve(names_string.size());
|
||||||
|
for (const auto & c : names_string) {
|
||||||
|
const auto sampler_item = sampler_name_map.find(c);
|
||||||
|
if (sampler_item != sampler_name_map.end()) {
|
||||||
|
sampler_types.push_back(sampler_item->second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return sampler_types;
|
||||||
|
}
|
||||||
|
|
||||||
|
// no reasons to expose this function in header
|
||||||
|
static void sampler_queue(
|
||||||
|
struct llama_context * ctx_main,
|
||||||
|
const llama_sampling_params & params,
|
||||||
|
llama_token_data_array & cur_p,
|
||||||
|
size_t min_keep) {
|
||||||
|
const float temp = params.temp;
|
||||||
|
const float dynatemp_range = params.dynatemp_range;
|
||||||
|
const float dynatemp_exponent = params.dynatemp_exponent;
|
||||||
|
const int32_t top_k = params.top_k;
|
||||||
|
const float top_p = params.top_p;
|
||||||
|
const float min_p = params.min_p;
|
||||||
|
const float tfs_z = params.tfs_z;
|
||||||
|
const float typical_p = params.typical_p;
|
||||||
|
const std::vector<llama_sampler_type> & samplers_sequence = params.samplers_sequence;
|
||||||
|
|
||||||
|
for (auto sampler_type : samplers_sequence) {
|
||||||
|
switch (sampler_type) {
|
||||||
|
case llama_sampler_type::TOP_K : llama_sample_top_k (ctx_main, &cur_p, top_k, min_keep); break;
|
||||||
|
case llama_sampler_type::TFS_Z : llama_sample_tail_free(ctx_main, &cur_p, tfs_z, min_keep); break;
|
||||||
|
case llama_sampler_type::TYPICAL_P: llama_sample_typical (ctx_main, &cur_p, typical_p, min_keep); break;
|
||||||
|
case llama_sampler_type::TOP_P : llama_sample_top_p (ctx_main, &cur_p, top_p, min_keep); break;
|
||||||
|
case llama_sampler_type::MIN_P : llama_sample_min_p (ctx_main, &cur_p, min_p, min_keep); break;
|
||||||
|
case llama_sampler_type::TEMPERATURE:
|
||||||
|
if (dynatemp_range > 0) {
|
||||||
|
float dynatemp_min = std::max(0.0f, temp - dynatemp_range);
|
||||||
|
float dynatemp_max = std::max(0.0f, temp + dynatemp_range);
|
||||||
|
llama_sample_entropy(ctx_main, &cur_p, dynatemp_min, dynatemp_max, dynatemp_exponent);
|
||||||
|
} else {
|
||||||
|
llama_sample_temp(ctx_main, &cur_p, temp);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
default : break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static llama_token llama_sampling_sample_impl(
|
||||||
|
struct llama_sampling_context * ctx_sampling,
|
||||||
|
struct llama_context * ctx_main,
|
||||||
|
struct llama_context * ctx_cfg,
|
||||||
|
const int idx,
|
||||||
|
bool is_resampling) {
|
||||||
|
const llama_sampling_params & params = ctx_sampling->params;
|
||||||
|
|
||||||
|
const float temp = params.temp;
|
||||||
|
const int mirostat = params.mirostat;
|
||||||
|
const float mirostat_tau = params.mirostat_tau;
|
||||||
|
const float mirostat_eta = params.mirostat_eta;
|
||||||
|
|
||||||
|
std::vector<float> original_logits;
|
||||||
|
auto cur_p = llama_sampling_prepare(ctx_sampling, ctx_main, ctx_cfg, idx, /* apply_grammar= */ is_resampling, &original_logits);
|
||||||
|
if (ctx_sampling->grammar != NULL && !is_resampling) {
|
||||||
|
GGML_ASSERT(!original_logits.empty());
|
||||||
|
}
|
||||||
|
llama_token id = 0;
|
||||||
|
// Get a pointer to the logits
|
||||||
|
float * logits = llama_get_logits_ith(ctx_main, idx);
|
||||||
|
|
||||||
|
if (temp < 0.0) {
|
||||||
|
// greedy sampling, with probs
|
||||||
|
llama_sample_softmax(ctx_main, &cur_p);
|
||||||
|
id = cur_p.data[0].id;
|
||||||
|
} else if (temp == 0.0) {
|
||||||
|
// greedy sampling, no probs
|
||||||
|
id = llama_sample_token_greedy(ctx_main, &cur_p);
|
||||||
|
} else {
|
||||||
|
if (mirostat == 1) {
|
||||||
|
const int mirostat_m = 100;
|
||||||
|
llama_sample_temp(ctx_main, &cur_p, temp);
|
||||||
|
id = llama_sample_token_mirostat(ctx_main, &cur_p, mirostat_tau, mirostat_eta, mirostat_m, &ctx_sampling->mirostat_mu);
|
||||||
|
} else if (mirostat == 2) {
|
||||||
|
llama_sample_temp(ctx_main, &cur_p, temp);
|
||||||
|
id = llama_sample_token_mirostat_v2(ctx_main, &cur_p, mirostat_tau, mirostat_eta, &ctx_sampling->mirostat_mu);
|
||||||
} else {
|
} else {
|
||||||
if (allow_alt_names) {
|
// temperature sampling
|
||||||
sampler = sampler_alt_name_map.find(name);
|
size_t min_keep = std::max(1, params.min_keep);
|
||||||
if (sampler != sampler_alt_name_map.end()) {
|
|
||||||
samplers.push_back(sampler->second);
|
sampler_queue(ctx_main, params, cur_p, min_keep);
|
||||||
|
|
||||||
|
id = llama_sample_token_with_rng(ctx_main, &cur_p, ctx_sampling->rng);
|
||||||
|
|
||||||
|
//{
|
||||||
|
// const int n_top = 10;
|
||||||
|
// LOG("top %d candidates:\n", n_top);
|
||||||
|
|
||||||
|
// for (int i = 0; i < n_top; i++) {
|
||||||
|
// const llama_token id = cur_p.data[i].id;
|
||||||
|
// (void)id; // To avoid a warning that id is unused when logging is disabled.
|
||||||
|
// LOG(" - %5d: '%12s' (%.3f)\n", id, llama_token_to_piece(ctx_main, id).c_str(), cur_p.data[i].p);
|
||||||
|
// }
|
||||||
|
//}
|
||||||
|
|
||||||
|
//LOG("sampled token: %5d: '%s'\n", id, llama_token_to_piece(ctx_main, id).c_str());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctx_sampling->grammar != NULL && !is_resampling) {
|
||||||
|
// Create an array with a single token data element for the sampled id
|
||||||
|
llama_token_data single_token_data = {id, logits[id], 0.0f};
|
||||||
|
llama_token_data_array single_token_data_array = { &single_token_data, 1, false };
|
||||||
|
|
||||||
|
// Apply grammar constraints to the single token
|
||||||
|
llama_sample_grammar(ctx_main, &single_token_data_array, ctx_sampling->grammar);
|
||||||
|
|
||||||
|
// Check if the token is valid according to the grammar by seeing if its logit has been set to -INFINITY
|
||||||
|
bool is_valid = single_token_data_array.data[0].logit != -INFINITY;
|
||||||
|
|
||||||
|
// If the token is not valid according to the grammar, perform resampling
|
||||||
|
if (!is_valid) {
|
||||||
|
LOG("Resampling because token %d: '%s' does not meet grammar rules\n", id, llama_token_to_piece(ctx_main, id).c_str());
|
||||||
|
|
||||||
|
// Restore logits from the copy
|
||||||
|
std::copy(original_logits.begin(), original_logits.end(), logits);
|
||||||
|
|
||||||
|
return llama_sampling_sample_impl(ctx_sampling, ctx_main, ctx_cfg, idx, /* is_resampling= */ true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ctx_sampling->n_valid = temp == 0.0f ? 0 : cur_p.size;
|
||||||
|
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
static llama_token_data_array llama_sampling_prepare_impl(
|
||||||
|
struct llama_sampling_context * ctx_sampling,
|
||||||
|
struct llama_context * ctx_main,
|
||||||
|
struct llama_context * ctx_cfg,
|
||||||
|
const int idx,
|
||||||
|
bool apply_grammar,
|
||||||
|
std::vector<float> * original_logits) {
|
||||||
|
const llama_sampling_params & params = ctx_sampling->params;
|
||||||
|
|
||||||
|
const int n_vocab = llama_n_vocab(llama_get_model(ctx_main));
|
||||||
|
|
||||||
|
const int32_t penalty_last_n = params.penalty_last_n < 0 ? params.n_prev : params.penalty_last_n;
|
||||||
|
const float penalty_repeat = params.penalty_repeat;
|
||||||
|
const float penalty_freq = params.penalty_freq;
|
||||||
|
const float penalty_present = params.penalty_present;
|
||||||
|
|
||||||
|
const bool penalize_nl = params.penalize_nl;
|
||||||
|
|
||||||
|
auto & prev = ctx_sampling->prev;
|
||||||
|
auto & cur = ctx_sampling->cur;
|
||||||
|
|
||||||
|
// Get a pointer to the logits
|
||||||
|
float * logits = llama_get_logits_ith(ctx_main, idx);
|
||||||
|
|
||||||
|
if (ctx_sampling->grammar != NULL && !apply_grammar) {
|
||||||
|
GGML_ASSERT(original_logits != NULL);
|
||||||
|
// Only make a copy of the original logits if we are not applying grammar checks, not sure if I actually have to do this.
|
||||||
|
*original_logits = {logits, logits + llama_n_vocab(llama_get_model(ctx_main))};
|
||||||
|
}
|
||||||
|
|
||||||
|
// apply params.logit_bias map
|
||||||
|
for (auto it = params.logit_bias.begin(); it != params.logit_bias.end(); it++) {
|
||||||
|
logits[it->first] += it->second;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ctx_cfg) {
|
||||||
|
float * logits_guidance = llama_get_logits_ith(ctx_cfg, idx);
|
||||||
|
llama_sample_apply_guidance(ctx_main, logits, logits_guidance, params.cfg_scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
cur.clear();
|
||||||
|
|
||||||
|
for (llama_token token_id = 0; token_id < n_vocab; token_id++) {
|
||||||
|
cur.emplace_back(llama_token_data{token_id, logits[token_id], 0.0f});
|
||||||
|
}
|
||||||
|
|
||||||
|
llama_token_data_array cur_p = { cur.data(), cur.size(), false };
|
||||||
|
|
||||||
|
// apply penalties
|
||||||
|
const auto& penalty_tokens = params.use_penalty_prompt_tokens ? params.penalty_prompt_tokens : prev;
|
||||||
|
const int penalty_tokens_used_size = std::min((int)penalty_tokens.size(), penalty_last_n);
|
||||||
|
if (penalty_tokens_used_size) {
|
||||||
|
const float nl_logit = logits[llama_token_nl(llama_get_model(ctx_main))];
|
||||||
|
|
||||||
|
llama_sample_repetition_penalties(ctx_main, &cur_p,
|
||||||
|
penalty_tokens.data() + penalty_tokens.size() - penalty_tokens_used_size,
|
||||||
|
penalty_tokens_used_size, penalty_repeat, penalty_freq, penalty_present);
|
||||||
|
|
||||||
|
if (!penalize_nl) {
|
||||||
|
for (size_t idx = 0; idx < cur_p.size; idx++) {
|
||||||
|
if (cur_p.data[idx].id == llama_token_nl(llama_get_model(ctx_main))) {
|
||||||
|
cur_p.data[idx].logit = nl_logit;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return samplers;
|
// apply grammar checks before sampling logic
|
||||||
}
|
if (apply_grammar && ctx_sampling->grammar != NULL) {
|
||||||
|
llama_sample_grammar(ctx_main, &cur_p, ctx_sampling->grammar);
|
||||||
std::vector<common_sampler_type> common_sampler_types_from_chars(const std::string & chars) {
|
|
||||||
std::unordered_map<char, common_sampler_type> sampler_name_map = {
|
|
||||||
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_DRY), COMMON_SAMPLER_TYPE_DRY },
|
|
||||||
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_K), COMMON_SAMPLER_TYPE_TOP_K },
|
|
||||||
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TYPICAL_P), COMMON_SAMPLER_TYPE_TYPICAL_P },
|
|
||||||
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TOP_P), COMMON_SAMPLER_TYPE_TOP_P },
|
|
||||||
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_MIN_P), COMMON_SAMPLER_TYPE_MIN_P },
|
|
||||||
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_TEMPERATURE), COMMON_SAMPLER_TYPE_TEMPERATURE },
|
|
||||||
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_XTC), COMMON_SAMPLER_TYPE_XTC },
|
|
||||||
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_INFILL), COMMON_SAMPLER_TYPE_INFILL },
|
|
||||||
{ common_sampler_type_to_chr(COMMON_SAMPLER_TYPE_PENALTIES), COMMON_SAMPLER_TYPE_PENALTIES },
|
|
||||||
};
|
|
||||||
|
|
||||||
std::vector<common_sampler_type> samplers;
|
|
||||||
samplers.reserve(chars.size());
|
|
||||||
|
|
||||||
for (const auto & c : chars) {
|
|
||||||
const auto sampler = sampler_name_map.find(c);
|
|
||||||
if (sampler != sampler_name_map.end()) {
|
|
||||||
samplers.push_back(sampler->second);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return samplers;
|
return cur_p;
|
||||||
|
}
|
||||||
|
|
||||||
|
llama_token llama_sampling_sample(
|
||||||
|
struct llama_sampling_context * ctx_sampling,
|
||||||
|
struct llama_context * ctx_main,
|
||||||
|
struct llama_context * ctx_cfg,
|
||||||
|
const int idx) {
|
||||||
|
// Call the implementation function with is_resampling set to false by default
|
||||||
|
return llama_sampling_sample_impl(ctx_sampling, ctx_main, ctx_cfg, idx, /* is_resampling= */ false);
|
||||||
|
}
|
||||||
|
|
||||||
|
llama_token_data_array llama_sampling_prepare(
|
||||||
|
struct llama_sampling_context * ctx_sampling,
|
||||||
|
struct llama_context * ctx_main,
|
||||||
|
struct llama_context * ctx_cfg,
|
||||||
|
const int idx,
|
||||||
|
bool apply_grammar,
|
||||||
|
std::vector<float> * original_logits) {
|
||||||
|
return llama_sampling_prepare_impl(ctx_sampling,ctx_main, ctx_cfg, idx, apply_grammar, original_logits);
|
||||||
|
}
|
||||||
|
|
||||||
|
void llama_sampling_accept(
|
||||||
|
struct llama_sampling_context * ctx_sampling,
|
||||||
|
struct llama_context * ctx_main,
|
||||||
|
llama_token id,
|
||||||
|
bool apply_grammar) {
|
||||||
|
ctx_sampling->prev.erase(ctx_sampling->prev.begin());
|
||||||
|
ctx_sampling->prev.push_back(id);
|
||||||
|
|
||||||
|
if (ctx_sampling->grammar != NULL && apply_grammar) {
|
||||||
|
llama_grammar_accept_token(ctx_main, ctx_sampling->grammar, id);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,106 +2,159 @@
|
||||||
|
|
||||||
#include "llama.h"
|
#include "llama.h"
|
||||||
|
|
||||||
#include "common.h"
|
#include "grammar-parser.h"
|
||||||
|
|
||||||
|
#include <random>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <unordered_map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
// common_sampler extends llama_sampler with additional functionality:
|
// sampler types
|
||||||
|
enum class llama_sampler_type : char {
|
||||||
|
TOP_K = 'k',
|
||||||
|
TOP_P = 'p',
|
||||||
|
MIN_P = 'm',
|
||||||
|
TFS_Z = 'f',
|
||||||
|
TYPICAL_P = 'y',
|
||||||
|
TEMPERATURE = 't'
|
||||||
|
};
|
||||||
|
|
||||||
|
// sampling parameters
|
||||||
|
typedef struct llama_sampling_params {
|
||||||
|
int32_t n_prev = 64; // number of previous tokens to remember
|
||||||
|
int32_t n_probs = 0; // if greater than 0, output the probabilities of top n_probs tokens.
|
||||||
|
int32_t min_keep = 0; // 0 = disabled, otherwise samplers should return at least min_keep tokens
|
||||||
|
int32_t top_k = 40; // <= 0 to use vocab size
|
||||||
|
float top_p = 0.95f; // 1.0 = disabled
|
||||||
|
float min_p = 0.05f; // 0.0 = disabled
|
||||||
|
float tfs_z = 1.00f; // 1.0 = disabled
|
||||||
|
float typical_p = 1.00f; // 1.0 = disabled
|
||||||
|
float temp = 0.80f; // <= 0.0 to sample greedily, 0.0 to not output probabilities
|
||||||
|
float dynatemp_range = 0.00f; // 0.0 = disabled
|
||||||
|
float dynatemp_exponent = 1.00f; // controls how entropy maps to temperature in dynamic temperature sampler
|
||||||
|
int32_t penalty_last_n = 64; // last n tokens to penalize (0 = disable penalty, -1 = context size)
|
||||||
|
float penalty_repeat = 1.00f; // 1.0 = disabled
|
||||||
|
float penalty_freq = 0.00f; // 0.0 = disabled
|
||||||
|
float penalty_present = 0.00f; // 0.0 = disabled
|
||||||
|
int32_t mirostat = 0; // 0 = disabled, 1 = mirostat, 2 = mirostat 2.0
|
||||||
|
float mirostat_tau = 5.00f; // target entropy
|
||||||
|
float mirostat_eta = 0.10f; // learning rate
|
||||||
|
bool penalize_nl = false; // consider newlines as a repeatable token
|
||||||
|
uint32_t seed = LLAMA_DEFAULT_SEED; // the seed used to initialize llama_sampling_context
|
||||||
|
|
||||||
|
std::vector<llama_sampler_type> samplers_sequence = {
|
||||||
|
llama_sampler_type::TOP_K,
|
||||||
|
llama_sampler_type::TFS_Z,
|
||||||
|
llama_sampler_type::TYPICAL_P,
|
||||||
|
llama_sampler_type::TOP_P,
|
||||||
|
llama_sampler_type::MIN_P,
|
||||||
|
llama_sampler_type::TEMPERATURE
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string grammar; // optional BNF-like grammar to constrain sampling
|
||||||
|
|
||||||
|
// Classifier-Free Guidance
|
||||||
|
// https://arxiv.org/abs/2306.17806
|
||||||
|
std::string cfg_negative_prompt; // string to help guidance
|
||||||
|
float cfg_scale = 1.f; // how strong is guidance
|
||||||
|
|
||||||
|
std::unordered_map<llama_token, float> logit_bias; // logit bias for specific tokens
|
||||||
|
|
||||||
|
std::vector<llama_token> penalty_prompt_tokens;
|
||||||
|
bool use_penalty_prompt_tokens = false;
|
||||||
|
} llama_sampling_params;
|
||||||
|
|
||||||
|
// general sampler context
|
||||||
|
// TODO: move to llama.h
|
||||||
|
struct llama_sampling_context {
|
||||||
|
// parameters that will be used for sampling
|
||||||
|
llama_sampling_params params;
|
||||||
|
|
||||||
|
// mirostat sampler state
|
||||||
|
float mirostat_mu;
|
||||||
|
|
||||||
|
llama_grammar * grammar;
|
||||||
|
|
||||||
|
// internal
|
||||||
|
grammar_parser::parse_state parsed_grammar;
|
||||||
|
|
||||||
|
// TODO: replace with ring-buffer
|
||||||
|
std::vector<llama_token> prev;
|
||||||
|
std::vector<llama_token_data> cur;
|
||||||
|
size_t n_valid; // Number of correct top tokens with correct probabilities.
|
||||||
|
|
||||||
|
std::mt19937 rng;
|
||||||
|
};
|
||||||
|
|
||||||
|
#include "common.h"
|
||||||
|
|
||||||
|
// Create a new sampling context instance.
|
||||||
|
struct llama_sampling_context * llama_sampling_init(const struct llama_sampling_params & params);
|
||||||
|
|
||||||
|
void llama_sampling_free(struct llama_sampling_context * ctx);
|
||||||
|
|
||||||
|
// Reset the sampler context
|
||||||
|
// - clear prev tokens
|
||||||
|
// - reset grammar
|
||||||
|
void llama_sampling_reset(llama_sampling_context * ctx);
|
||||||
|
|
||||||
|
// Set the sampler seed
|
||||||
|
void llama_sampling_set_rng_seed(struct llama_sampling_context * ctx, uint32_t seed);
|
||||||
|
|
||||||
|
// Copy the sampler context
|
||||||
|
void llama_sampling_cp(llama_sampling_context * src, llama_sampling_context * dst);
|
||||||
|
|
||||||
|
// Get the last sampled token
|
||||||
|
llama_token llama_sampling_last(llama_sampling_context * ctx);
|
||||||
|
|
||||||
|
// Get a string representation of the last sampled tokens
|
||||||
|
std::string llama_sampling_prev_str(llama_sampling_context * ctx_sampling, llama_context * ctx_main, int n);
|
||||||
|
|
||||||
|
// Print sampling parameters into a string
|
||||||
|
std::string llama_sampling_print(const llama_sampling_params & params);
|
||||||
|
|
||||||
|
// Print sampling order into a string
|
||||||
|
std::string llama_sampling_order_print(const llama_sampling_params & params);
|
||||||
|
|
||||||
|
std::string llama_sampling_type_to_str(llama_sampler_type sampler_type);
|
||||||
|
|
||||||
|
std::vector<llama_sampler_type> llama_sampling_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
|
||||||
|
std::vector<llama_sampler_type> llama_sampling_types_from_chars(const std::string & names_string);
|
||||||
|
|
||||||
|
// this is a common sampling function used across the examples for convenience
|
||||||
|
// it can serve as a starting point for implementing your own sampling function
|
||||||
|
// Note: When using multiple sequences, it is the caller's responsibility to call
|
||||||
|
// llama_sampling_reset when a sequence ends
|
||||||
//
|
//
|
||||||
// - grammar support
|
// required:
|
||||||
// - custom sampler logic based on the parameters
|
// - ctx_main: context to use for sampling
|
||||||
// - history of the last accepted tokens
|
// - ctx_sampling: sampling-specific context
|
||||||
// - performance metrics
|
|
||||||
//
|
//
|
||||||
// This goal is to have a common implementation of the sampling logic shared across the examples.
|
// optional:
|
||||||
// For example, depending on the temperature, the sampling chain can be very simple (greedy) or more
|
// - ctx_cfg: context to use for classifier-free guidance
|
||||||
// complex (top-k, top-p, etc).
|
// - idx: sample from llama_get_logits_ith(ctx, idx)
|
||||||
//
|
//
|
||||||
// Another example is related to the grammar. In general, the grammar constraints applied on the full
|
// returns:
|
||||||
// vocabulary can be very taxing. To improve performance, the grammar can be applied only to the sampled
|
// - token: sampled token
|
||||||
// token in order to verify if it fits the grammar. And only if the token doesn't fit the grammar, the
|
// - candidates: vector of candidate tokens
|
||||||
// grammar constraints are applied to the full vocabulary and the token is resampled.
|
|
||||||
//
|
|
||||||
// The common_sampler also maintains a container with the last accepted tokens. In the future, this can
|
|
||||||
// be moved into the core llama library.
|
|
||||||
//
|
|
||||||
// For convenience, the common_sampler also maintains a container with the current candidate tokens.
|
|
||||||
// This can be used to access the probabilities of the rest of the non-sampled tokens.
|
|
||||||
//
|
|
||||||
// TODO: measure grammar performance
|
|
||||||
//
|
//
|
||||||
|
llama_token llama_sampling_sample(
|
||||||
|
struct llama_sampling_context * ctx_sampling,
|
||||||
|
struct llama_context * ctx_main,
|
||||||
|
struct llama_context * ctx_cfg,
|
||||||
|
int idx = -1);
|
||||||
|
|
||||||
struct common_sampler;
|
// Prepares and adjusts the set of token candidates for sampling based on penalties, biases, and sampling parameters.
|
||||||
|
llama_token_data_array llama_sampling_prepare(
|
||||||
|
struct llama_sampling_context * ctx_sampling,
|
||||||
|
struct llama_context * ctx_main,
|
||||||
|
struct llama_context * ctx_cfg,
|
||||||
|
int idx = 0,
|
||||||
|
bool apply_grammar = true,
|
||||||
|
std::vector<float> * original_logits = nullptr);
|
||||||
|
|
||||||
// llama_sampler API overloads
|
void llama_sampling_accept(
|
||||||
|
struct llama_sampling_context * ctx_sampling,
|
||||||
struct common_sampler * common_sampler_init(const struct llama_model * model, const struct common_params_sampling & params);
|
struct llama_context * ctx_main,
|
||||||
|
llama_token id,
|
||||||
void common_sampler_free(struct common_sampler * gsmpl);
|
bool apply_grammar);
|
||||||
|
|
||||||
// if accept_grammar is true, the token is accepted both by the sampling chain and the grammar
|
|
||||||
void common_sampler_accept(struct common_sampler * gsmpl, llama_token token, bool accept_grammar);
|
|
||||||
void common_sampler_reset (struct common_sampler * gsmpl);
|
|
||||||
struct common_sampler * common_sampler_clone (struct common_sampler * gsmpl);
|
|
||||||
|
|
||||||
// arguments can be nullptr to skip printing
|
|
||||||
void common_perf_print(const struct llama_context * ctx, const struct common_sampler * gsmpl);
|
|
||||||
|
|
||||||
// extended sampling implementation:
|
|
||||||
//
|
|
||||||
// - set logits
|
|
||||||
// - apply the configured sampler chain
|
|
||||||
// - check if the token fits the grammar (if any)
|
|
||||||
// - if not: resample by first applying the grammar constraints and then sampling again (slower path)
|
|
||||||
//
|
|
||||||
// if grammar_first is true, the grammar is applied before the samplers (slower)
|
|
||||||
// useful in cases where all the resulting candidates (not just the sampled one) must fit the grammar
|
|
||||||
//
|
|
||||||
llama_token common_sampler_sample(struct common_sampler * gsmpl, struct llama_context * ctx, int idx, bool grammar_first = false);
|
|
||||||
|
|
||||||
// generalized version of common_sampler_sample
|
|
||||||
//
|
|
||||||
// will cross-reference the sampled tokens with a batch of draft tokens and accept those that match
|
|
||||||
// if the sampler disagrees at some point, we stop and return the accepted tokens up to now
|
|
||||||
//
|
|
||||||
// common_sampler_sample_n(gsmpl, ctx, { idx }, {});
|
|
||||||
//
|
|
||||||
// is equivalent to
|
|
||||||
//
|
|
||||||
// common_sampler_sample(gsmpl, ctx, idx);
|
|
||||||
// common_sampler_accept(gsmpl, token, true);
|
|
||||||
//
|
|
||||||
// requires: idxs.size() == draft.size() + 1
|
|
||||||
//
|
|
||||||
// returns at least 1 token, up to idxs.size()
|
|
||||||
//
|
|
||||||
std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const std::vector<int> & idxs, const llama_tokens & draft, bool grammar_first = false);
|
|
||||||
|
|
||||||
// assume idxs == [ 0, 1, 2, ..., draft.size() ]
|
|
||||||
std::vector<llama_token> common_sampler_sample_and_accept_n(struct common_sampler * gsmpl, struct llama_context * ctx, const llama_tokens & draft, bool grammar_first = false);
|
|
||||||
|
|
||||||
uint32_t common_sampler_get_seed(const struct common_sampler * gsmpl);
|
|
||||||
|
|
||||||
// helpers
|
|
||||||
|
|
||||||
// access the internal list of current candidate tokens
|
|
||||||
llama_token_data_array * common_sampler_get_candidates(struct common_sampler * gsmpl);
|
|
||||||
|
|
||||||
// get the last accepted token
|
|
||||||
llama_token common_sampler_last(const struct common_sampler * gsmpl);
|
|
||||||
|
|
||||||
// print the sampler chain into a string
|
|
||||||
std::string common_sampler_print(const struct common_sampler * gsmpl);
|
|
||||||
|
|
||||||
// get a string representation of the last accepted tokens
|
|
||||||
std::string common_sampler_prev_str(common_sampler * gsmpl, llama_context * ctx, int n);
|
|
||||||
|
|
||||||
char common_sampler_type_to_chr(enum common_sampler_type cnstr);
|
|
||||||
std::string common_sampler_type_to_str(enum common_sampler_type cnstr);
|
|
||||||
|
|
||||||
std::vector<enum common_sampler_type> common_sampler_types_from_names(const std::vector<std::string> & names, bool allow_alt_names);
|
|
||||||
std::vector<enum common_sampler_type> common_sampler_types_from_chars(const std::string & chars);
|
|
||||||
|
|
||||||
llama_sampler * llama_sampler_init_llg(const llama_vocab * vocab,
|
|
||||||
const char * grammar_kind, const char * grammar_data);
|
|
||||||
|
|
|
@ -1,277 +0,0 @@
|
||||||
#include "speculative.h"
|
|
||||||
|
|
||||||
#include "log.h"
|
|
||||||
#include "common.h"
|
|
||||||
#include "sampling.h"
|
|
||||||
|
|
||||||
#include <cstring>
|
|
||||||
|
|
||||||
#define SPEC_VOCAB_MAX_SIZE_DIFFERENCE 128
|
|
||||||
#define SPEC_VOCAB_CHECK_START_TOKEN_ID 5
|
|
||||||
|
|
||||||
struct common_speculative {
|
|
||||||
struct llama_context * ctx;
|
|
||||||
struct common_sampler * smpl;
|
|
||||||
|
|
||||||
llama_batch batch;
|
|
||||||
llama_tokens prompt;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct common_speculative * common_speculative_init(
|
|
||||||
struct llama_context * ctx_dft) {
|
|
||||||
auto * result = new common_speculative {
|
|
||||||
/* .ctx = */ ctx_dft,
|
|
||||||
/* .smpl = */ nullptr,
|
|
||||||
/* .batch = */ llama_batch_init(llama_n_batch(ctx_dft), 0, 1),
|
|
||||||
/* .prompt = */ {},
|
|
||||||
};
|
|
||||||
|
|
||||||
// TODO: optimize or pass from outside?
|
|
||||||
#if 0
|
|
||||||
{
|
|
||||||
common_params_sampling params;
|
|
||||||
params.no_perf = false;
|
|
||||||
|
|
||||||
params.top_k = 40;
|
|
||||||
params.top_p = 0.9;
|
|
||||||
|
|
||||||
params.samplers = {
|
|
||||||
COMMON_SAMPLER_TYPE_TOP_K,
|
|
||||||
COMMON_SAMPLER_TYPE_TOP_P,
|
|
||||||
COMMON_SAMPLER_TYPE_INFILL,
|
|
||||||
};
|
|
||||||
|
|
||||||
result->smpl = common_sampler_init(llama_get_model(ctx_dft), params);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
{
|
|
||||||
common_params_sampling params;
|
|
||||||
params.no_perf = false;
|
|
||||||
|
|
||||||
params.top_k = 10;
|
|
||||||
|
|
||||||
params.samplers = {
|
|
||||||
COMMON_SAMPLER_TYPE_TOP_K,
|
|
||||||
};
|
|
||||||
|
|
||||||
result->smpl = common_sampler_init(llama_get_model(ctx_dft), params);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
void common_speculative_free(struct common_speculative * spec) {
|
|
||||||
if (spec == nullptr) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
common_sampler_free(spec->smpl);
|
|
||||||
|
|
||||||
llama_batch_free(spec->batch);
|
|
||||||
|
|
||||||
delete spec;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool common_speculative_are_compatible(
|
|
||||||
const struct llama_context * ctx_tgt,
|
|
||||||
const struct llama_context * ctx_dft) {
|
|
||||||
const struct llama_model * model_tgt = llama_get_model(ctx_tgt);
|
|
||||||
const struct llama_model * model_dft = llama_get_model(ctx_dft);
|
|
||||||
|
|
||||||
const struct llama_vocab * vocab_tgt = llama_model_get_vocab(model_tgt);
|
|
||||||
const struct llama_vocab * vocab_dft = llama_model_get_vocab(model_dft);
|
|
||||||
|
|
||||||
const bool vocab_type_tgt = llama_vocab_type(vocab_tgt);
|
|
||||||
LOG_DBG("%s: vocab_type tgt: %d\n", __func__, vocab_type_tgt);
|
|
||||||
|
|
||||||
const bool vocab_type_dft = llama_vocab_type(vocab_dft);
|
|
||||||
LOG_DBG("%s: vocab_type dft: %d\n", __func__, vocab_type_dft);
|
|
||||||
|
|
||||||
if (vocab_type_tgt != vocab_type_dft) {
|
|
||||||
LOG_ERR("%s: draft model vocab type must match target model to use speculation but "
|
|
||||||
"vocab_type_dft = %d while vocab_type_tgt = %d\n", __func__, vocab_type_dft, vocab_type_tgt);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (llama_vocab_get_add_bos(vocab_tgt) != llama_vocab_get_add_bos(vocab_dft) ||
|
|
||||||
llama_vocab_get_add_eos(vocab_tgt) != llama_vocab_get_add_eos(vocab_dft) ||
|
|
||||||
llama_vocab_bos(vocab_tgt) != llama_vocab_bos(vocab_dft) ||
|
|
||||||
llama_vocab_eos(vocab_tgt) != llama_vocab_eos(vocab_dft)) {
|
|
||||||
LOG_ERR("%s: draft vocab special tokens must match target vocab to use speculation\n", __func__);
|
|
||||||
LOG_ERR("%s: tgt: bos = %d (%d), eos = %d (%d)\n", __func__, llama_vocab_bos(vocab_tgt), llama_vocab_get_add_bos(vocab_tgt), llama_vocab_eos(vocab_tgt), llama_vocab_get_add_eos(vocab_tgt));
|
|
||||||
LOG_ERR("%s: dft: bos = %d (%d), eos = %d (%d)\n", __func__, llama_vocab_bos(vocab_dft), llama_vocab_get_add_bos(vocab_dft), llama_vocab_eos(vocab_dft), llama_vocab_get_add_eos(vocab_dft));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
const int n_vocab_tgt = llama_vocab_n_tokens(vocab_tgt);
|
|
||||||
const int n_vocab_dft = llama_vocab_n_tokens(vocab_dft);
|
|
||||||
|
|
||||||
const int vocab_diff = std::abs(n_vocab_tgt - n_vocab_dft);
|
|
||||||
|
|
||||||
if (vocab_diff > SPEC_VOCAB_MAX_SIZE_DIFFERENCE) {
|
|
||||||
LOG_ERR("%s: draft model vocab must closely match target model to use speculation but "
|
|
||||||
"target vocab size %d does not match draft vocab size %d - difference %d, max allowed %d\n",
|
|
||||||
__func__, n_vocab_tgt, llama_vocab_n_tokens(vocab_dft), vocab_diff, SPEC_VOCAB_MAX_SIZE_DIFFERENCE);
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
for (int i = SPEC_VOCAB_CHECK_START_TOKEN_ID; i < std::min(n_vocab_tgt, n_vocab_dft); ++i) {
|
|
||||||
const char * token_text_tgt = llama_vocab_get_text(vocab_tgt, i);
|
|
||||||
const char * token_text_dft = llama_vocab_get_text(vocab_dft, i);
|
|
||||||
if (std::strcmp(token_text_tgt, token_text_dft) != 0) {
|
|
||||||
LOG_ERR("%s: draft vocab vocab must match target vocab to use speculation but "
|
|
||||||
"token %d content differs - target '%s', draft '%s'\n", __func__, i,
|
|
||||||
common_token_to_piece(ctx_tgt, i).c_str(),
|
|
||||||
common_token_to_piece(ctx_dft, i).c_str());
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
llama_tokens common_speculative_gen_draft(
|
|
||||||
struct common_speculative * spec,
|
|
||||||
struct common_speculative_params params,
|
|
||||||
const llama_tokens & prompt_tgt,
|
|
||||||
llama_token id_last) {
|
|
||||||
auto & batch = spec->batch;
|
|
||||||
auto & ctx = spec->ctx;
|
|
||||||
auto & smpl = spec->smpl;
|
|
||||||
auto & prompt = spec->prompt;
|
|
||||||
|
|
||||||
int reuse_i = 0;
|
|
||||||
int reuse_n = 0;
|
|
||||||
|
|
||||||
const int n_ctx = llama_n_ctx(ctx) - params.n_draft;
|
|
||||||
|
|
||||||
const int i_start = std::max<int>(0, (int) prompt_tgt.size() - n_ctx);
|
|
||||||
|
|
||||||
// reuse as much as possible from the old draft context
|
|
||||||
// ideally, the draft context should be as big as the target context and we will always reuse the entire prompt
|
|
||||||
for (int i = 0; i < (int) prompt.size(); ++i) {
|
|
||||||
int cur = 0;
|
|
||||||
while (i_start + cur < (int) prompt_tgt.size() &&
|
|
||||||
i + cur < (int) prompt.size() &&
|
|
||||||
prompt_tgt[i_start + cur] == prompt[i + cur]) {
|
|
||||||
cur++;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((cur >= params.n_reuse || n_ctx >= (int) prompt_tgt.size()) && cur > reuse_n) {
|
|
||||||
reuse_i = i;
|
|
||||||
reuse_n = cur;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
LOG_DBG("%s: reuse_i = %d, reuse_n = %d, prompt = %d\n", __func__, reuse_i, reuse_n, (int) prompt.size());
|
|
||||||
|
|
||||||
llama_tokens result;
|
|
||||||
result.reserve(params.n_draft);
|
|
||||||
|
|
||||||
if (reuse_n == 0) {
|
|
||||||
llama_kv_cache_clear(ctx);
|
|
||||||
|
|
||||||
prompt.clear();
|
|
||||||
} else {
|
|
||||||
// this happens when a previous draft has been discarded (for example, due to being too small), but the
|
|
||||||
// target model agreed with it. in this case, we simply pass back the previous results to save compute
|
|
||||||
if (reuse_i + reuse_n < (int) prompt.size() && prompt[reuse_i + reuse_n] == id_last) {
|
|
||||||
for (int i = reuse_i + reuse_n + 1; i < (int) prompt.size(); ++i) {
|
|
||||||
result.push_back(prompt[i]);
|
|
||||||
|
|
||||||
if (params.n_draft <= (int) result.size()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (reuse_i > 0) {
|
|
||||||
llama_kv_cache_seq_rm (ctx, 0, 0, reuse_i);
|
|
||||||
llama_kv_cache_seq_add(ctx, 0, reuse_i, -1, -reuse_i);
|
|
||||||
|
|
||||||
prompt.erase(prompt.begin(), prompt.begin() + reuse_i);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (reuse_n < (int) prompt.size()) {
|
|
||||||
llama_kv_cache_seq_rm (ctx, 0, reuse_n, -1);
|
|
||||||
|
|
||||||
prompt.erase(prompt.begin() + reuse_n, prompt.end());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// prepare a batch to evaluate any new tokens in the prompt
|
|
||||||
common_batch_clear(batch);
|
|
||||||
|
|
||||||
for (size_t i = i_start + reuse_n; i < prompt_tgt.size(); ++i) {
|
|
||||||
//LOG_DBG("i = %d, i_start = %d, reuse_n = %d, i - i_start = %d, id = %6d\n", i, i_start, reuse_n, i - i_start, prompt_tgt[i]);
|
|
||||||
common_batch_add(batch, prompt_tgt[i], i - i_start, { 0 }, false);
|
|
||||||
|
|
||||||
prompt.push_back(prompt_tgt[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// we should rarely end-up here during normal decoding
|
|
||||||
if (batch.n_tokens > 0) {
|
|
||||||
//LOG_DBG("%s: draft prompt batch: %s\n", __func__, string_from(ctx, batch).c_str());
|
|
||||||
|
|
||||||
llama_decode(ctx, batch);
|
|
||||||
}
|
|
||||||
|
|
||||||
const llama_pos n_past = prompt.size();
|
|
||||||
|
|
||||||
LOG_DBG("%s: n_past = %d\n", __func__, n_past);
|
|
||||||
|
|
||||||
common_batch_clear(batch);
|
|
||||||
common_batch_add (batch, id_last, n_past, { 0 }, true);
|
|
||||||
|
|
||||||
prompt.push_back(id_last);
|
|
||||||
|
|
||||||
//LOG_DBG("%s: draft prompt: %s\n", __func__, string_from(ctx, prompt).c_str());
|
|
||||||
|
|
||||||
llama_decode(ctx, batch);
|
|
||||||
|
|
||||||
common_sampler_reset(smpl);
|
|
||||||
|
|
||||||
// sample n_draft tokens from the draft model
|
|
||||||
for (int i = 0; i < params.n_draft; ++i) {
|
|
||||||
common_batch_clear(batch);
|
|
||||||
|
|
||||||
common_sampler_sample(smpl, ctx, 0, true);
|
|
||||||
|
|
||||||
const auto * cur_p = common_sampler_get_candidates(smpl);
|
|
||||||
|
|
||||||
for (int k = 0; k < std::min(3, (int) cur_p->size); ++k) {
|
|
||||||
LOG_DBG(" - draft candidate %3d, pos %3d: %6d (%8.3f) '%s'\n",
|
|
||||||
k, i, cur_p->data[k].id, cur_p->data[k].p, common_token_to_piece(ctx, cur_p->data[k].id).c_str());
|
|
||||||
}
|
|
||||||
|
|
||||||
// add drafted token for each sequence
|
|
||||||
const llama_token id = cur_p->data[0].id;
|
|
||||||
|
|
||||||
// only collect very high-confidence draft tokens
|
|
||||||
if (cur_p->data[0].p < params.p_min) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
common_sampler_accept(smpl, id, true);
|
|
||||||
|
|
||||||
result.push_back(id);
|
|
||||||
|
|
||||||
if (params.n_draft <= (int) result.size()) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
common_batch_add(batch, id, n_past + i + 1, { 0 }, true);
|
|
||||||
|
|
||||||
// evaluate the drafted tokens on the draft model
|
|
||||||
llama_decode(ctx, batch);
|
|
||||||
|
|
||||||
prompt.push_back(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
return result;
|
|
||||||
}
|
|
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue