curl: check url of previous download (.json metadata w/ url, etag & lastModified)
This commit is contained in:
parent
e55dfde3b0
commit
0664e9b321
1 changed files with 72 additions and 78 deletions
|
@ -1888,11 +1888,16 @@ void llama_batch_add(
|
||||||
|
|
||||||
#ifdef LLAMA_USE_CURL
|
#ifdef LLAMA_USE_CURL
|
||||||
|
|
||||||
static bool llama_download_file(CURL * curl, const char * url, const char * path) {
|
static bool starts_with(const std::string & str, const std::string & prefix) {
|
||||||
|
// While we wait for C++20's std::string::starts_with...
|
||||||
|
return str.rfind(prefix, 0) == 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool llama_download_file(CURL * curl, const std::string & url, const std::string & path) {
|
||||||
bool force_download = false;
|
bool force_download = false;
|
||||||
|
|
||||||
// Set the URL, allow to follow http redirection
|
// Set the URL, allow to follow http redirection
|
||||||
curl_easy_setopt(curl, CURLOPT_URL, url);
|
curl_easy_setopt(curl, CURLOPT_URL, url.c_str());
|
||||||
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
|
||||||
|
|
||||||
#if defined(_WIN32)
|
#if defined(_WIN32)
|
||||||
|
@ -1903,44 +1908,48 @@ static bool llama_download_file(CURL * curl, const char * url, const char * path
|
||||||
|
|
||||||
// Check if the file already exists locally
|
// Check if the file already exists locally
|
||||||
struct stat model_file_info;
|
struct stat model_file_info;
|
||||||
auto file_exists = (stat(path, &model_file_info) == 0);
|
auto file_exists = (stat(path.c_str(), &model_file_info) == 0);
|
||||||
|
|
||||||
// If the file exists, check for ${path_model}.etag or ${path_model}.lastModified files
|
// If the file exists, check for ${path_model}.json file
|
||||||
char etag[LLAMA_CURL_MAX_HEADER_LENGTH] = {0};
|
// Alternatively check for legacy ${path_model}.etag & ${path_model}.lastModified files
|
||||||
char etag_path[PATH_MAX] = {0};
|
std::string metadata_path = path + ".json";
|
||||||
snprintf(etag_path, sizeof(etag_path), "%s.etag", path);
|
|
||||||
|
|
||||||
char last_modified[LLAMA_CURL_MAX_HEADER_LENGTH] = {0};
|
std::string etag;
|
||||||
char last_modified_path[PATH_MAX] = {0};
|
std::string last_modified;
|
||||||
snprintf(last_modified_path, sizeof(last_modified_path), "%s.lastModified", path);
|
nlohmann::json metadata;
|
||||||
|
|
||||||
if (file_exists) {
|
if (file_exists) {
|
||||||
auto * f_etag = fopen(etag_path, "r");
|
// Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
|
||||||
if (f_etag) {
|
std::ifstream metadata_in(metadata_path);
|
||||||
if (!fgets(etag, sizeof(etag), f_etag)) {
|
if (metadata_in.good()) {
|
||||||
fprintf(stderr, "%s: unable to read file %s\n", __func__, etag_path);
|
try {
|
||||||
} else {
|
metadata_in >> metadata;
|
||||||
fprintf(stderr, "%s: previous file found %s: %s\n", __func__, etag_path, etag);
|
fprintf(stderr, "%s: previous metadata file found %s: %s\n", __func__, metadata_path.c_str(), metadata.dump().c_str());
|
||||||
|
if (metadata.contains("url") && metadata["url"].is_string()) {
|
||||||
|
auto previous_url = metadata["url"].get<std::string>();
|
||||||
|
if (previous_url != url) {
|
||||||
|
fprintf(stderr, "%s: Model URL mismatch: %s != %s\n", __func__, url.c_str(), previous_url.c_str());
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
fclose(f_etag);
|
}
|
||||||
|
if (metadata.contains("etag") && metadata["etag"].is_string()) {
|
||||||
|
etag = metadata["etag"];
|
||||||
|
}
|
||||||
|
if (metadata.contains("lastModified") && metadata["lastModified"].is_string()) {
|
||||||
|
last_modified = metadata["lastModified"];
|
||||||
|
}
|
||||||
|
} catch (const nlohmann::json::exception & e) {
|
||||||
|
fprintf(stderr, "%s: error reading metadata file %s: %s\n", __func__, metadata_path.c_str(), e.what());
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto * f_last_modified = fopen(last_modified_path, "r");
|
|
||||||
if (f_last_modified) {
|
|
||||||
if (!fgets(last_modified, sizeof(last_modified), f_last_modified)) {
|
|
||||||
fprintf(stderr, "%s: unable to read file %s\n", __func__, last_modified_path);
|
|
||||||
} else {
|
|
||||||
fprintf(stderr, "%s: previous file found %s: %s\n", __func__, last_modified_path,
|
|
||||||
last_modified);
|
|
||||||
}
|
|
||||||
fclose(f_last_modified);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send a HEAD request to retrieve the etag and last-modified headers
|
// Send a HEAD request to retrieve the etag and last-modified headers
|
||||||
struct llama_load_model_from_url_headers {
|
struct llama_load_model_from_url_headers {
|
||||||
char etag[LLAMA_CURL_MAX_HEADER_LENGTH] = {0};
|
std::string etag;
|
||||||
char last_modified[LLAMA_CURL_MAX_HEADER_LENGTH] = {0};
|
std::string last_modified;
|
||||||
};
|
};
|
||||||
llama_load_model_from_url_headers headers;
|
llama_load_model_from_url_headers headers;
|
||||||
{
|
{
|
||||||
|
@ -1948,20 +1957,16 @@ static bool llama_download_file(CURL * curl, const char * url, const char * path
|
||||||
auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
|
auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
|
||||||
llama_load_model_from_url_headers *headers = (llama_load_model_from_url_headers *) userdata;
|
llama_load_model_from_url_headers *headers = (llama_load_model_from_url_headers *) userdata;
|
||||||
|
|
||||||
// Convert header field name to lowercase
|
std::string header(buffer, n_items);
|
||||||
for (size_t i = 0; i < n_items && buffer[i] != ':'; ++i) {
|
std::smatch match;
|
||||||
buffer[i] = tolower(buffer[i]);
|
if (std::regex_match(header, match, std::regex("([^:]+): (.*)\r\n", std::regex_constants::multiline))) {
|
||||||
|
const std::string & key = match[1];
|
||||||
|
const std::string & value = match[2];
|
||||||
|
if (std::regex_match(key, match, std::regex("ETag", std::regex_constants::icase))) {
|
||||||
|
headers->etag = value;
|
||||||
|
} else if (std::regex_match(key, match, std::regex("Last-Modified", std::regex_constants::icase))) {
|
||||||
|
headers->last_modified = value;
|
||||||
}
|
}
|
||||||
|
|
||||||
const char * etag_prefix = "etag: ";
|
|
||||||
if (strncmp(buffer, etag_prefix, strlen(etag_prefix)) == 0) {
|
|
||||||
strncpy(headers->etag, buffer + strlen(etag_prefix), n_items - strlen(etag_prefix) - 2); // Remove CRLF
|
|
||||||
}
|
|
||||||
|
|
||||||
const char * last_modified_prefix = "last-modified: ";
|
|
||||||
if (strncmp(buffer, last_modified_prefix, strlen(last_modified_prefix)) == 0) {
|
|
||||||
strncpy(headers->last_modified, buffer + strlen(last_modified_prefix),
|
|
||||||
n_items - strlen(last_modified_prefix) - 2); // Remove CRLF
|
|
||||||
}
|
}
|
||||||
return n_items;
|
return n_items;
|
||||||
};
|
};
|
||||||
|
@ -1988,28 +1993,29 @@ static bool llama_download_file(CURL * curl, const char * url, const char * path
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// If the ETag or the Last-Modified headers are different: trigger a new download
|
bool should_download = !file_exists || force_download;
|
||||||
bool should_download = !file_exists
|
if (!should_download && !etag.empty() && !last_modified.empty()) {
|
||||||
|| force_download
|
if (etag != headers.etag || last_modified != headers.last_modified) {
|
||||||
|| (strlen(headers.etag) > 0 && strcmp(etag, headers.etag) != 0)
|
fprintf(stderr, "%s: ETag or Last-Modified headers are different: triggering a new download\n", __func__);
|
||||||
|| (strlen(headers.last_modified) > 0 && strcmp(last_modified, headers.last_modified) != 0);
|
should_download = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (should_download) {
|
if (should_download) {
|
||||||
char path_temporary[PATH_MAX] = {0};
|
std::string path_temporary = path + ".downloadInProgress";
|
||||||
snprintf(path_temporary, sizeof(path_temporary), "%s.downloadInProgress", path);
|
|
||||||
if (file_exists) {
|
if (file_exists) {
|
||||||
fprintf(stderr, "%s: deleting previous downloaded file: %s\n", __func__, path);
|
fprintf(stderr, "%s: deleting previous downloaded file: %s\n", __func__, path.c_str());
|
||||||
if (remove(path) != 0) {
|
if (remove(path.c_str()) != 0) {
|
||||||
curl_easy_cleanup(curl);
|
curl_easy_cleanup(curl);
|
||||||
fprintf(stderr, "%s: unable to delete file: %s\n", __func__, path);
|
fprintf(stderr, "%s: unable to delete file: %s\n", __func__, path.c_str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set the output file
|
// Set the output file
|
||||||
auto * outfile = fopen(path_temporary, "wb");
|
auto * outfile = fopen(path_temporary.c_str(), "wb");
|
||||||
if (!outfile) {
|
if (!outfile) {
|
||||||
curl_easy_cleanup(curl);
|
curl_easy_cleanup(curl);
|
||||||
fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path);
|
fprintf(stderr, "%s: error opening local file for writing: %s\n", __func__, path.c_str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2041,7 +2047,7 @@ static bool llama_download_file(CURL * curl, const char * url, const char * path
|
||||||
|
|
||||||
// start the download
|
// start the download
|
||||||
fprintf(stderr, "%s: downloading from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
|
fprintf(stderr, "%s: downloading from %s to %s (server_etag:%s, server_last_modified:%s)...\n", __func__,
|
||||||
llama_download_hide_password_in_url(url).c_str(), path, headers.etag, headers.last_modified);
|
llama_download_hide_password_in_url(url).c_str(), path.c_str(), headers.etag.c_str(), headers.last_modified.c_str());
|
||||||
auto res = curl_easy_perform(curl);
|
auto res = curl_easy_perform(curl);
|
||||||
if (res != CURLE_OK) {
|
if (res != CURLE_OK) {
|
||||||
fclose(outfile);
|
fclose(outfile);
|
||||||
|
@ -2062,30 +2068,18 @@ static bool llama_download_file(CURL * curl, const char * url, const char * path
|
||||||
// Clean up
|
// Clean up
|
||||||
fclose(outfile);
|
fclose(outfile);
|
||||||
|
|
||||||
// Write the new ETag to the .etag file
|
// Write the updated JSON metadata file.
|
||||||
if (strlen(headers.etag) > 0) {
|
metadata.update({
|
||||||
auto * etag_file = fopen(etag_path, "w");
|
{"url", url},
|
||||||
if (etag_file) {
|
{"etag", headers.etag},
|
||||||
fputs(headers.etag, etag_file);
|
{"lastModified", headers.last_modified}
|
||||||
fclose(etag_file);
|
});
|
||||||
fprintf(stderr, "%s: file etag saved %s: %s\n", __func__, etag_path, headers.etag);
|
std::ofstream(metadata_path) << metadata.dump(4);
|
||||||
}
|
fprintf(stderr, "%s: file metadata saved: %s\n", __func__, metadata_path.c_str());
|
||||||
}
|
|
||||||
|
|
||||||
// Write the new lastModified to the .etag file
|
if (rename(path_temporary.c_str(), path.c_str()) != 0) {
|
||||||
if (strlen(headers.last_modified) > 0) {
|
|
||||||
auto * last_modified_file = fopen(last_modified_path, "w");
|
|
||||||
if (last_modified_file) {
|
|
||||||
fputs(headers.last_modified, last_modified_file);
|
|
||||||
fclose(last_modified_file);
|
|
||||||
fprintf(stderr, "%s: file last modified saved %s: %s\n", __func__, last_modified_path,
|
|
||||||
headers.last_modified);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (rename(path_temporary, path) != 0) {
|
|
||||||
curl_easy_cleanup(curl);
|
curl_easy_cleanup(curl);
|
||||||
fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_temporary, path);
|
fprintf(stderr, "%s: unable to rename file: %s to %s\n", __func__, path_temporary.c_str(), path.c_str());
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue