From f2a4777d4afd1346d3d5a7a686b20ccdd9ff69a5 Mon Sep 17 00:00:00 2001 From: Jan Boon Date: Sat, 6 Apr 2024 00:44:37 +0800 Subject: [PATCH] strict filename validation --- common/common.cpp | 57 ++++++++++++++++++++++++++++++++++++++ common/common.h | 2 ++ examples/server/server.cpp | 4 +-- 3 files changed, 61 insertions(+), 2 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 3e2df6e34..5c4b6f1e9 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1500,6 +1500,63 @@ std::string gpt_random_prompt(std::mt19937 & rng) { GGML_UNREACHABLE(); } +bool validate_file_name(const std::string & filename) { + if (filename.length() > 255) { + // Limit at common largest possible filename on Linux filesystems + // to avoid unnecessary further validation + // (On systems with smaller limits it will be caught by the OS) + return false; + } + + std::u32string filename_utf32; + try { + std::wstring_convert, char32_t> converter; + filename_utf32 = converter.from_bytes(filename); + + // If the reverse conversion mismatches, it means overlong UTF-8 sequences were used, + // or invalid encodings were encountered. Reject such attempts + std::string filename_reencoded = converter.to_bytes(filename_utf32); + if (filename_reencoded != filename) { + return false; + } + } catch (const std::exception &) { + return false; + } + + // Check for forbidden codepoints: + // - Control characters + // - Unicode equivalents of illegal characters + // - UTF-16 surrogate pairs + // - UTF-8 replacement character + // - Illegal characters: / \ : * ? " < > | + for (char32_t c : filename_utf32) { + if (c <= 0x1F // Control characters (C0) + || c == 0x7F // Control characters (DEL) + || (c >= 0x80 && c <= 0x9F) // Control characters (C1) + || c == 0xFF0E // Fullwidth Full Stop (period equivalent) + || c == 0x2215 // Division Slash (forward slash equivalent) + || c == 0x2216 // Set Minus (backslash equivalent) + || (c >= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs + || c == 0xFFFD // Replacement Character (UTF-8) + || c == '/' || c == '\\' || c == ':' || c == '*' // Illegal characters + || c == '?' || c == '"' || c == '<' || c == '>' || c == '|') { + return false; + } + } + + // Reject any ".." (this is stricter than checking for ../ combinations) + if (filename.find("..") != std::string::npos) { + return false; + } + + // Reject "." + if (filename == ".") { + return false; + } + + return true; +} + // // String utils // diff --git a/common/common.h b/common/common.h index 99ee90bc3..4635e05d6 100644 --- a/common/common.h +++ b/common/common.h @@ -179,6 +179,8 @@ std::string gpt_random_prompt(std::mt19937 & rng); void process_escapes(std::string& input); +bool validate_file_name(const std::string & filename); + // // String utils // diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 1a9b7f49d..6c64fe3e1 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -3278,7 +3278,7 @@ int main(int argc, char ** argv) { const auto handle_slots_save = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) { json request_data = json::parse(req.body); std::string filename = request_data["filename"]; - if (filename.find('/') != std::string::npos || filename.find('\\') != std::string::npos || filename.find("..") != std::string::npos) { + if (!validate_file_name(filename)) { res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST)); return; } @@ -3308,7 +3308,7 @@ int main(int argc, char ** argv) { const auto handle_slots_restore = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) { json request_data = json::parse(req.body); std::string filename = request_data["filename"]; - if (filename.find('/') != std::string::npos || filename.find('\\') != std::string::npos || filename.find("..") != std::string::npos) { + if (!validate_file_name(filename)) { res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST)); return; }