common: llama_load_model_from_url use a temporary file for downloading

This commit is contained in:
Pierrick HYMBERT 2024-03-17 16:37:02 +01:00
parent 31272c635a
commit f902ab6de2
4 changed files with 15 additions and 5 deletions

View file

@ -117,7 +117,7 @@ jobs:
run: |
mkdir build
cd build
cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" -DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DBUILD_SHARED_LIBS=ON
cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
- name: Python setup

View file

@ -1748,8 +1748,11 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
// If the ETag or the Last-Modified headers are different: trigger a new download
if (!file_exists || strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) {
char path_model_temporary[LLAMA_CURL_MAX_PATH_LENGTH] = {0};
snprintf(path_model_temporary, sizeof(path_model_temporary), "%s.downloadInProgress", path_model);
// Set the output file
auto * outfile = fopen(path_model, "wb");
auto * outfile = fopen(path_model_temporary, "wb");
if (!outfile) {
curl_easy_cleanup(curl);
fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model);
@ -1810,6 +1813,12 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
headers.last_modified);
}
}
if (rename(path_model_temporary, path_model) != 0) {
curl_easy_cleanup(curl);
fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_model_temporary, path_model);
return NULL;
}
}
curl_easy_cleanup(curl);

View file

@ -109,10 +109,10 @@ def print_server_logs(context):
out, err = context.server_process.communicate()
if out:
print("Server stdout:\n")
print(out.decode("utf-8"))
print(out.decode('utf-8'))
print("\n")
if err:
print("Server stderr:\n")
print(err.decode("utf-8"))
print(err.decode('utf-8'))
print("\n")

View file

@ -4,7 +4,8 @@ Feature: llama.cpp server
Background: Server startup
Given a server listening on localhost:8080
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
And a model url https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf
And a model file stories260K.gguf
And a model alias tinyllama-2
And 42 as server seed
# KV Cache corresponds to the total amount of tokens