common: llama_load_model_from_url use a temporary file for downloading
This commit is contained in:
parent
31272c635a
commit
f902ab6de2
4 changed files with 15 additions and 5 deletions
2
.github/workflows/server.yml
vendored
2
.github/workflows/server.yml
vendored
|
@ -117,7 +117,7 @@ jobs:
|
|||
run: |
|
||||
mkdir build
|
||||
cd build
|
||||
cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" -DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DBUILD_SHARED_LIBS=ON
|
||||
cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
|
||||
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
|
||||
|
||||
- name: Python setup
|
||||
|
|
|
@ -1748,8 +1748,11 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
|
|||
|
||||
// If the ETag or the Last-Modified headers are different: trigger a new download
|
||||
if (!file_exists || strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) {
|
||||
char path_model_temporary[LLAMA_CURL_MAX_PATH_LENGTH] = {0};
|
||||
snprintf(path_model_temporary, sizeof(path_model_temporary), "%s.downloadInProgress", path_model);
|
||||
|
||||
// Set the output file
|
||||
auto * outfile = fopen(path_model, "wb");
|
||||
auto * outfile = fopen(path_model_temporary, "wb");
|
||||
if (!outfile) {
|
||||
curl_easy_cleanup(curl);
|
||||
fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model);
|
||||
|
@ -1810,6 +1813,12 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
|
|||
headers.last_modified);
|
||||
}
|
||||
}
|
||||
|
||||
if (rename(path_model_temporary, path_model) != 0) {
|
||||
curl_easy_cleanup(curl);
|
||||
fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_model_temporary, path_model);
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
curl_easy_cleanup(curl);
|
||||
|
|
|
@ -109,10 +109,10 @@ def print_server_logs(context):
|
|||
out, err = context.server_process.communicate()
|
||||
if out:
|
||||
print("Server stdout:\n")
|
||||
print(out.decode("utf-8"))
|
||||
print(out.decode('utf-8'))
|
||||
print("\n")
|
||||
if err:
|
||||
print("Server stderr:\n")
|
||||
print(err.decode("utf-8"))
|
||||
print(err.decode('utf-8'))
|
||||
print("\n")
|
||||
|
||||
|
|
|
@ -4,7 +4,8 @@ Feature: llama.cpp server
|
|||
|
||||
Background: Server startup
|
||||
Given a server listening on localhost:8080
|
||||
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
|
||||
And a model url https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf
|
||||
And a model file stories260K.gguf
|
||||
And a model alias tinyllama-2
|
||||
And 42 as server seed
|
||||
# KV Cache corresponds to the total amount of tokens
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue