common: llama_load_model_from_url use a temporary file for downloading

2024-03-17 16:37:02 +01:00 · 2024-03-17 16:37:02 +01:00 · f902ab6de2
commit f902ab6de2
parent 31272c635a
4 changed files with 15 additions and 5 deletions
--- a/.github/workflows/server.yml
+++ b/.github/workflows/server.yml
@ -117,7 +117,7 @@ jobs:
        run: |
          mkdir build
          cd build
-          cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" -DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DBUILD_SHARED_LIBS=ON
+          cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
          cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server

      - name: Python setup
--- a/common/common.cpp
+++ b/common/common.cpp
@ -1748,8 +1748,11 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha

    // If the ETag or the Last-Modified headers are different: trigger a new download
    if (!file_exists || strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) {
+        char path_model_temporary[LLAMA_CURL_MAX_PATH_LENGTH] = {0};
+        snprintf(path_model_temporary, sizeof(path_model_temporary), "%s.downloadInProgress", path_model);
+
        // Set the output file
-        auto * outfile = fopen(path_model, "wb");
+        auto * outfile = fopen(path_model_temporary, "wb");
        if (!outfile) {
            curl_easy_cleanup(curl);
            fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model);
@ -1810,6 +1813,12 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
                        headers.last_modified);
            }
        }
+
+        if (rename(path_model_temporary, path_model) != 0) {
+            curl_easy_cleanup(curl);
+            fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_model_temporary, path_model);
+            return NULL;
+        }
    }

    curl_easy_cleanup(curl);
--- a/examples/server/tests/features/environment.py
+++ b/examples/server/tests/features/environment.py
@ -109,10 +109,10 @@ def print_server_logs(context):
    out, err = context.server_process.communicate()
    if out:
        print("Server stdout:\n")
-        print(out.decode("utf-8"))
+        print(out.decode('utf-8'))
        print("\n")
    if err:
        print("Server stderr:\n")
-        print(err.decode("utf-8"))
+        print(err.decode('utf-8'))
        print("\n")

--- a/examples/server/tests/features/server.feature
+++ b/examples/server/tests/features/server.feature
@ -4,7 +4,8 @@ Feature: llama.cpp server

  Background: Server startup
    Given a server listening on localhost:8080
-    And   a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
+    And   a model url https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf
+    And   a model file stories260K.gguf
    And   a model alias tinyllama-2
    And   42 as server seed
      # KV Cache corresponds to the total amount of tokens