strict filename validation

This commit is contained in:
Jan Boon 2024-04-06 00:44:37 +08:00
parent d9fd0d7eb8
commit f2a4777d4a
3 changed files with 61 additions and 2 deletions

View file

@ -1500,6 +1500,63 @@ std::string gpt_random_prompt(std::mt19937 & rng) {
GGML_UNREACHABLE();
}
bool validate_file_name(const std::string & filename) {
if (filename.length() > 255) {
// Limit at common largest possible filename on Linux filesystems
// to avoid unnecessary further validation
// (On systems with smaller limits it will be caught by the OS)
return false;
}
std::u32string filename_utf32;
try {
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> converter;
filename_utf32 = converter.from_bytes(filename);
// If the reverse conversion mismatches, it means overlong UTF-8 sequences were used,
// or invalid encodings were encountered. Reject such attempts
std::string filename_reencoded = converter.to_bytes(filename_utf32);
if (filename_reencoded != filename) {
return false;
}
} catch (const std::exception &) {
return false;
}
// Check for forbidden codepoints:
// - Control characters
// - Unicode equivalents of illegal characters
// - UTF-16 surrogate pairs
// - UTF-8 replacement character
// - Illegal characters: / \ : * ? " < > |
for (char32_t c : filename_utf32) {
if (c <= 0x1F // Control characters (C0)
|| c == 0x7F // Control characters (DEL)
|| (c >= 0x80 && c <= 0x9F) // Control characters (C1)
|| c == 0xFF0E // Fullwidth Full Stop (period equivalent)
|| c == 0x2215 // Division Slash (forward slash equivalent)
|| c == 0x2216 // Set Minus (backslash equivalent)
|| (c >= 0xD800 && c <= 0xDFFF) // UTF-16 surrogate pairs
|| c == 0xFFFD // Replacement Character (UTF-8)
|| c == '/' || c == '\\' || c == ':' || c == '*' // Illegal characters
|| c == '?' || c == '"' || c == '<' || c == '>' || c == '|') {
return false;
}
}
// Reject any ".." (this is stricter than checking for ../ combinations)
if (filename.find("..") != std::string::npos) {
return false;
}
// Reject "."
if (filename == ".") {
return false;
}
return true;
}
//
// String utils
//

View file

@ -179,6 +179,8 @@ std::string gpt_random_prompt(std::mt19937 & rng);
void process_escapes(std::string& input);
bool validate_file_name(const std::string & filename);
//
// String utils
//

View file

@ -3278,7 +3278,7 @@ int main(int argc, char ** argv) {
const auto handle_slots_save = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) {
json request_data = json::parse(req.body);
std::string filename = request_data["filename"];
if (filename.find('/') != std::string::npos || filename.find('\\') != std::string::npos || filename.find("..") != std::string::npos) {
if (!validate_file_name(filename)) {
res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
return;
}
@ -3308,7 +3308,7 @@ int main(int argc, char ** argv) {
const auto handle_slots_restore = [&ctx_server, &res_error, &sparams](const httplib::Request & req, httplib::Response & res, int id_slot) {
json request_data = json::parse(req.body);
std::string filename = request_data["filename"];
if (filename.find('/') != std::string::npos || filename.find('\\') != std::string::npos || filename.find("..") != std::string::npos) {
if (!validate_file_name(filename)) {
res_error(res, format_error_response("Invalid filename", ERROR_TYPE_INVALID_REQUEST));
return;
}