ggml : build backends as libraries (#10256)

* ggml : build backends as libraries

---------

Signed-off-by: Xiaodong Ye <xiaodong.ye@mthreads.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Co-authored-by: R0CKSTAR <xiaodong.ye@mthreads.com>
This commit is contained in:
Diego Devesa 2024-11-14 18:04:35 +01:00 committed by GitHub
parent 4a8ccb37ad
commit ae8de6d50a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
191 changed files with 17541 additions and 16879 deletions

View file

@ -23,15 +23,16 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release --target llama-cli -j$(nproc)
cmake --build build --config Release --target llama-cli -j$(nproc) && \
mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
RUN apt-get update && \
apt-get install -y libgomp1
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
COPY --from=build /app/build/src/libllama.so /libllama.so
COPY --from=build /app/build/bin/llama-cli /llama-cli
COPY --from=build /app/lib/ /
COPY --from=build /app/build/bin/llama-cli /
ENTRYPOINT [ "/llama-cli" ]

View file

@ -16,15 +16,16 @@ WORKDIR /app
COPY . .
RUN cmake -B build -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release --target llama-cli -j$(nproc)
cmake --build build --config Release --target llama-cli -j$(nproc) && \
mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;
FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
RUN apt-get update && \
apt-get install -y libgomp1
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
COPY --from=build /app/build/src/libllama.so /libllama.so
COPY --from=build /app/lib/ /
COPY --from=build /app/build/bin/llama-cli /llama-cli
ENTRYPOINT [ "/llama-cli" ]

View file

@ -23,15 +23,16 @@ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
fi && \
cmake -B build -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release --target llama-server -j$(nproc)
cmake --build build --config Release --target llama-server -j$(nproc) && \
mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev libgomp1 curl
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
COPY --from=build /app/build/src/libllama.so /libllama.so
COPY --from=build /app/lib/ /
COPY --from=build /app/build/bin/llama-server /llama-server
# Must be set to 0.0.0.0 so it can listen to requests from host machine

View file

@ -16,15 +16,16 @@ WORKDIR /app
COPY . .
RUN cmake -B build -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
cmake --build build --config Release --target llama-server -j$(nproc)
cmake --build build --config Release --target llama-server -j$(nproc) && \
mkdir -p /app/lib && \
find build -name "*.so" -exec cp {} /app/lib \;
FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
RUN apt-get update && \
apt-get install -y libcurl4-openssl-dev libgomp1 curl
COPY --from=build /app/build/ggml/src/libggml.so /libggml.so
COPY --from=build /app/build/src/libllama.so /libllama.so
COPY --from=build /app/lib/ /
COPY --from=build /app/build/bin/llama-server /llama-server
# Must be set to 0.0.0.0 so it can listen to requests from host machine

View file

@ -126,9 +126,9 @@ effectiveStdenv.mkDerivation (finalAttrs: {
};
postPatch = ''
substituteInPlace ./ggml/src/ggml-metal.m \
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
substituteInPlace ./ggml/src/ggml-metal.m \
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
'';
@ -173,7 +173,7 @@ effectiveStdenv.mkDerivation (finalAttrs: {
(cmakeBool "GGML_NATIVE" false)
(cmakeBool "GGML_BLAS" useBlas)
(cmakeBool "GGML_CUDA" useCuda)
(cmakeBool "GGML_HIPBLAS" useRocm)
(cmakeBool "GGML_HIP" useRocm)
(cmakeBool "GGML_METAL" useMetalKit)
(cmakeBool "GGML_VULKAN" useVulkan)
(cmakeBool "GGML_STATIC" enableStatic)