diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml index 79807f897..4ea09115a 100644 --- a/.github/workflows/server.yml +++ b/.github/workflows/server.yml @@ -117,7 +117,7 @@ jobs: run: | mkdir build cd build - cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" -DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DBUILD_SHARED_LIBS=ON + cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server - name: Python setup diff --git a/common/common.cpp b/common/common.cpp index de0501855..3ecd4e5cd 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1748,8 +1748,11 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha // If the ETag or the Last-Modified headers are different: trigger a new download if (!file_exists || strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) { + char path_model_temporary[LLAMA_CURL_MAX_PATH_LENGTH] = {0}; + snprintf(path_model_temporary, sizeof(path_model_temporary), "%s.downloadInProgress", path_model); + // Set the output file - auto * outfile = fopen(path_model, "wb"); + auto * outfile = fopen(path_model_temporary, "wb"); if (!outfile) { curl_easy_cleanup(curl); fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model); @@ -1810,6 +1813,12 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha headers.last_modified); } } + + if (rename(path_model_temporary, path_model) != 0) { + curl_easy_cleanup(curl); + fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_model_temporary, path_model); + return NULL; + } } curl_easy_cleanup(curl); diff --git a/examples/server/tests/features/environment.py b/examples/server/tests/features/environment.py index 9ae3954e9..96751d713 100644 --- a/examples/server/tests/features/environment.py +++ b/examples/server/tests/features/environment.py @@ -109,10 +109,10 @@ def print_server_logs(context): out, err = context.server_process.communicate() if out: print("Server stdout:\n") - print(out.decode("utf-8")) + print(out.decode('utf-8')) print("\n") if err: print("Server stderr:\n") - print(err.decode("utf-8")) + print(err.decode('utf-8')) print("\n") diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature index 5014f326d..7448986e7 100644 --- a/examples/server/tests/features/server.feature +++ b/examples/server/tests/features/server.feature @@ -4,7 +4,8 @@ Feature: llama.cpp server Background: Server startup Given a server listening on localhost:8080 - And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models + And a model url https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf + And a model file stories260K.gguf And a model alias tinyllama-2 And 42 as server seed # KV Cache corresponds to the total amount of tokens