common: llama_load_model_from_url use a temporary file for downloading
This commit is contained in:
parent
31272c635a
commit
f902ab6de2
4 changed files with 15 additions and 5 deletions
2
.github/workflows/server.yml
vendored
2
.github/workflows/server.yml
vendored
|
@ -117,7 +117,7 @@ jobs:
|
||||||
run: |
|
run: |
|
||||||
mkdir build
|
mkdir build
|
||||||
cd build
|
cd build
|
||||||
cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include" -DLLAMA_BUILD_SERVER=ON -DLLAMA_NATIVE=OFF -DBUILD_SHARED_LIBS=ON
|
cmake .. -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
|
||||||
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
|
cmake --build . --config Release -j ${env:NUMBER_OF_PROCESSORS} --target server
|
||||||
|
|
||||||
- name: Python setup
|
- name: Python setup
|
||||||
|
|
|
@ -1748,8 +1748,11 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
|
||||||
|
|
||||||
// If the ETag or the Last-Modified headers are different: trigger a new download
|
// If the ETag or the Last-Modified headers are different: trigger a new download
|
||||||
if (!file_exists || strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) {
|
if (!file_exists || strcmp(etag, headers.etag) != 0 || strcmp(last_modified, headers.last_modified) != 0) {
|
||||||
|
char path_model_temporary[LLAMA_CURL_MAX_PATH_LENGTH] = {0};
|
||||||
|
snprintf(path_model_temporary, sizeof(path_model_temporary), "%s.downloadInProgress", path_model);
|
||||||
|
|
||||||
// Set the output file
|
// Set the output file
|
||||||
auto * outfile = fopen(path_model, "wb");
|
auto * outfile = fopen(path_model_temporary, "wb");
|
||||||
if (!outfile) {
|
if (!outfile) {
|
||||||
curl_easy_cleanup(curl);
|
curl_easy_cleanup(curl);
|
||||||
fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model);
|
fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path_model);
|
||||||
|
@ -1810,6 +1813,12 @@ struct llama_model * llama_load_model_from_url(const char * model_url, const cha
|
||||||
headers.last_modified);
|
headers.last_modified);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (rename(path_model_temporary, path_model) != 0) {
|
||||||
|
curl_easy_cleanup(curl);
|
||||||
|
fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_model_temporary, path_model);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
curl_easy_cleanup(curl);
|
curl_easy_cleanup(curl);
|
||||||
|
|
|
@ -109,10 +109,10 @@ def print_server_logs(context):
|
||||||
out, err = context.server_process.communicate()
|
out, err = context.server_process.communicate()
|
||||||
if out:
|
if out:
|
||||||
print("Server stdout:\n")
|
print("Server stdout:\n")
|
||||||
print(out.decode("utf-8"))
|
print(out.decode('utf-8'))
|
||||||
print("\n")
|
print("\n")
|
||||||
if err:
|
if err:
|
||||||
print("Server stderr:\n")
|
print("Server stderr:\n")
|
||||||
print(err.decode("utf-8"))
|
print(err.decode('utf-8'))
|
||||||
print("\n")
|
print("\n")
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,8 @@ Feature: llama.cpp server
|
||||||
|
|
||||||
Background: Server startup
|
Background: Server startup
|
||||||
Given a server listening on localhost:8080
|
Given a server listening on localhost:8080
|
||||||
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
|
And a model url https://huggingface.co/ggml-org/models/resolve/main/tinyllamas/stories260K.gguf
|
||||||
|
And a model file stories260K.gguf
|
||||||
And a model alias tinyllama-2
|
And a model alias tinyllama-2
|
||||||
And 42 as server seed
|
And 42 as server seed
|
||||||
# KV Cache corresponds to the total amount of tokens
|
# KV Cache corresponds to the total amount of tokens
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue