diff --git a/.github/workflows/server.yml b/.github/workflows/server.yml
index 79807f897..4ea09115a 100644
--- a/.github/workflows/server.yml
+++ b/.github/workflows/server.yml
@@ -117,7 +117,7 @@ jobs:
         run: |
           mkdir build
           cd build
-          cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" -DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DBUILD_SHARED_LIBS=ON
+          cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
           cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
 
       - name: Python setup
diff --git a/common/common.cpp b/common/common.cpp
index de0501855..3ecd4e5cd 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1748,8 +1748,11 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
 
     // If the ETag or the Last-Modified headers are different: trigger a new download
     if (!file_exists || strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) {
+        char path_model_temporary[LLAMA_CURL_MAX_PATH_LENGTH] = {0};
+        snprintf(path_model_temporary, sizeof(path_model_temporary), "%s.downloadInProgress", path_model);
+
         // Set the output file
-        auto * outfile = fopen(path_model, "wb");
+        auto * outfile = fopen(path_model_temporary, "wb");
         if (!outfile) {
             curl_easy_cleanup(curl);
             fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model);
@@ -1810,6 +1813,12 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
                         headers.last_modified);
             }
         }
+
+        if (rename(path_model_temporary, path_model) != 0) {
+            curl_easy_cleanup(curl);
+            fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_model_temporary, path_model);
+            return NULL;
+        }
     }
 
     curl_easy_cleanup(curl);
diff --git a/examples/server/tests/features/environment.py b/examples/server/tests/features/environment.py
index 9ae3954e9..96751d713 100644
--- a/examples/server/tests/features/environment.py
+++ b/examples/server/tests/features/environment.py
@@ -109,10 +109,10 @@ def print_server_logs(context):
     out, err = context.server_process.communicate()
     if out:
         print("Server stdout:\n")
-        print(out.decode("utf-8"))
+        print(out.decode('utf-8'))
         print("\n")
     if err:
         print("Server stderr:\n")
-        print(err.decode("utf-8"))
+        print(err.decode('utf-8'))
         print("\n")
 
diff --git a/examples/server/tests/features/server.feature b/examples/server/tests/features/server.feature
index 5014f326d..7448986e7 100644
--- a/examples/server/tests/features/server.feature
+++ b/examples/server/tests/features/server.feature
@@ -4,7 +4,8 @@ Feature: llama.cpp server
 
   Background: Server startup
     Given a server listening on localhost:8080
-    And   a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
+    And   a model url https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf
+    And   a model file stories260K.gguf
     And   a model alias tinyllama-2
     And   42 as server seed
       # KV Cache corresponds to the total amount of tokens