ggml : add predefined list of CPU backend variants to build (#10626)

* ggml : add predefined list of CPU backend variants to build * update CPU dockerfiles
2024-12-04 14:45:40 +01:00 · 2024-12-04 14:45:40 +01:00 · 59f4db1088
commit 59f4db1088
parent 2803540814
11 changed files with 502 additions and 391 deletions
--- a/.devops/full.Dockerfile
+++ b/.devops/full.Dockerfile
@ -3,23 +3,36 @@ ARG UBUNTU_VERSION=22.04
 FROM ubuntu:$UBUNTU_VERSION AS build

 RUN apt-get update && \
-    apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
-
-COPY requirements.txt   requirements.txt
-COPY requirements       requirements
-
-RUN pip install --upgrade pip setuptools wheel \
-    && pip install -r requirements.txt
+    apt-get install -y build-essential git cmake libcurl4-openssl-dev

 WORKDIR /app

 COPY . .

-ENV LLAMA_CURL=1
+RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
+    cmake --build build -j $(nproc) && \
+    mkdir -p /app/lib && \
+    find build -name "*.so" -exec cp {} /app/lib/ \;

+FROM ubuntu:$UBUNTU_VERSION as runtime

-RUN make -j$(nproc)
+WORKDIR /app
+
+RUN apt-get update && \
+    apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
+
+COPY requirements.txt   /app/requirements.txt
+COPY requirements       /app/requirements
+COPY .devops/tools.sh   /app/tools.sh
+
+RUN pip install --upgrade pip setuptools wheel && \
+    pip install -r /app/requirements.txt
+
+COPY --from=build /app/build/bin/ /app/
+COPY --from=build /app/lib/ /app/
+COPY --from=build /app/convert_hf_to_gguf.py /app/
+COPY --from=build /app/gguf-py /app/gguf-py

 ENV LC_ALL=C.utf8

-ENTRYPOINT ["/app/.devops/tools.sh"]
+ENTRYPOINT ["/app/tools.sh"]
--- a/.devops/llama-cli.Dockerfile
+++ b/.devops/llama-cli.Dockerfile
@ -3,21 +3,27 @@ ARG UBUNTU_VERSION=22.04
 FROM ubuntu:$UBUNTU_VERSION AS build

 RUN apt-get update && \
-    apt-get install -y build-essential git
+    apt-get install -y build-essential git cmake libcurl4-openssl-dev

 WORKDIR /app

 COPY . .

-RUN make -j$(nproc) llama-cli
+RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
+    cmake --build build -j $(nproc) && \
+    mkdir -p /app/lib && \
+    find build -name "*.so" -exec cp {} /app/lib/ \;

 FROM ubuntu:$UBUNTU_VERSION AS runtime

-RUN apt-get update && \
-    apt-get install -y libgomp1
+WORKDIR /app

-COPY --from=build /app/llama-cli /llama-cli
+RUN apt-get update && \
+    apt-get install -y libcurl4-openssl-dev libgomp1 curl
+
+COPY --from=build /app/build/bin/llama-cli /app/
+COPY --from=build /app/lib/ /app/

 ENV LC_ALL=C.utf8

-ENTRYPOINT [ "/llama-cli" ]
+ENTRYPOINT [ "/app/llama-cli" ]
--- a/.devops/llama-server.Dockerfile
+++ b/.devops/llama-server.Dockerfile
@ -9,28 +9,20 @@ WORKDIR /app

 COPY . .

-
-RUN \
-    # Build multiple versions of the CPU backend
-    scripts/build-cpu.sh avx         -DGGML_AVX=ON -DGGML_AVX2=OFF && \
-    scripts/build-cpu.sh avx2        -DGGML_AVX=ON -DGGML_AVX2=ON && \
-    scripts/build-cpu.sh avx512      -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON && \
-    scripts/build-cpu.sh amx         -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON -DGGML_AVX_VNNI=ON -DGGML_AVX512_VNNI=ON -DGGML_AMX_TILE=ON -DGGML_AMX_INT8=ON && \
-    # Build llama-server
-    cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
-    cmake --build build --target llama-server -j $(nproc) && \
-    # Copy the built libraries to /app/lib
+RUN cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
+    cmake --build build -j $(nproc) && \
    mkdir -p /app/lib && \
-    mv libggml-cpu* /app/lib/ && \
    find build -name "*.so" -exec cp {} /app/lib/ \;

 FROM ubuntu:$UBUNTU_VERSION AS runtime

+WORKDIR /app
+
 RUN apt-get update && \
    apt-get install -y libcurl4-openssl-dev libgomp1 curl

-COPY --from=build /app/build/bin/llama-server /llama-server
-COPY --from=build /app/lib/ /
+COPY --from=build /app/build/bin/llama-server /app/
+COPY --from=build /app/lib/ /app/

 ENV LC_ALL=C.utf8
 # Must be set to 0.0.0.0 so it can listen to requests from host machine
@ -38,4 +30,4 @@ ENV LLAMA_ARG_HOST=0.0.0.0

 HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]

-ENTRYPOINT [ "/llama-server" ]
+ENTRYPOINT [ "/app/llama-server" ]