JSON schema conversion: ⚡️ faster repetitions, min/maxLength for strings, cap number length (#6555)
* json: rename python schema converter to make import easier
* server: skip null json_schema / grammar fields
* json: deps management for primitive rules (+ allow null values)
* json: optimize repetitions for minItems/maxItems and regexps: `a{,3}` goes from `"a"? "a"? "a"?` (explosive combos) to `(a (a (a)?)?)?`
* grammars: add troubleshooting section to readme
* json: cap length of numbers to 15 digits before/after decimal point
(avoids infinite gen, e.g. "one third" -> `0.333333333333...`)
* json: unify all repetition code (w/ or w/o sep)
* json: support string minLength/maxLength
* server+json: update server/README w/ result_format
* nits
* json: fix type error w/ python 3.8
* json: fix server/README (json_schema in /completion vs. result_format in /v1/chat/completions)
* json: simplify DOT `{"type": "string", "pattern": "^.$"}`
* json: remove recursion in opt_repetitions (avoids Python stack overflow)
* json: rm dead code
* json: rm useless assert & ggml.h import
			
			
This commit is contained in:
		
							parent
							
								
									fbbc030ba9
								
							
						
					
					
						commit
						ab9a3240a9
					
				
					 10 changed files with 2348 additions and 1929 deletions
				
			
		|  | @ -11,35 +11,101 @@ | |||
| 
 | ||||
| using json = nlohmann::ordered_json; | ||||
| 
 | ||||
| template <typename Iterator> | ||||
| static std::string join(Iterator begin, Iterator end, const std::string & separator); | ||||
| 
 | ||||
| static std::string repeat(const std::string & str, size_t n); | ||||
| 
 | ||||
| static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "", bool item_rule_is_literal = false) { | ||||
|     if (separator_rule.empty()) { | ||||
|         if (min_items == 0 && max_items == 1) { | ||||
|             return item_rule + "?"; | ||||
|         } else if (min_items == 1 && max_items == std::numeric_limits<int>::max()) { | ||||
|             return item_rule + "+"; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     std::string result; | ||||
|     if (min_items > 0) { | ||||
|         if (item_rule_is_literal && separator_rule.empty()) { | ||||
|             result = "\"" + repeat(std::string(item_rule.begin() + 1, item_rule.end() - 1), min_items) + "\""; | ||||
|         } else { | ||||
|             std::vector<std::string> items(min_items, item_rule); | ||||
|             result = join(items.begin(), items.end(), separator_rule.empty() ? " " : " " + separator_rule + " "); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     std::function<std::string(int, bool)> opt_repetitions = [&](int up_to_n, bool prefix_with_sep) -> std::string { | ||||
|         auto content = prefix_with_sep && !separator_rule.empty() ? separator_rule + " " + item_rule : item_rule; | ||||
| 
 | ||||
|         if (up_to_n == 0) { | ||||
|             return ""; | ||||
|         } else if (up_to_n == 1) { | ||||
|             return "(" + content + ")?"; | ||||
|         } else if (!separator_rule.empty() && !prefix_with_sep) { | ||||
|             return "(" + content + " " + opt_repetitions(up_to_n - 1, true) + ")?"; | ||||
|         } else { | ||||
|             std::string res = repeat("(" + content + " ", up_to_n); | ||||
|             // strip trailing space
 | ||||
|             res = res.substr(0, res.length() - 1); | ||||
|             res += repeat(")?", up_to_n); | ||||
|             return res; | ||||
|         } | ||||
|     }; | ||||
| 
 | ||||
|     if (min_items > 0 && max_items != min_items) { | ||||
|         result += " "; | ||||
|     } | ||||
| 
 | ||||
|     if (max_items != std::numeric_limits<int>::max()) { | ||||
|         result += opt_repetitions(max_items - min_items, min_items > 0); | ||||
|     } else { | ||||
|         std::string item_operator = "(" + (separator_rule.empty() ? "" : separator_rule + " ") + item_rule + ")"; | ||||
|         if (min_items == 0 && !separator_rule.empty()) { | ||||
|             result = "(" + item_rule + " " + item_operator + "*)?"; | ||||
|         } else { | ||||
|             result += item_operator + "*"; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     return result; | ||||
| } | ||||
| 
 | ||||
| const std::string SPACE_RULE = "\" \"?"; | ||||
| 
 | ||||
| std::unordered_map<std::string, std::string> PRIMITIVE_RULES = { | ||||
|     {"boolean", "(\"true\" | \"false\") space"}, | ||||
|     {"number", "(\"-\"? ([0-9] | [1-9] [0-9]*)) (\".\" [0-9]+)? ([eE] [-+]? [0-9]+)? space"}, | ||||
|     {"integer", "(\"-\"? ([0-9] | [1-9] [0-9]*)) space"}, | ||||
|     {"value", "object | array | string | number | boolean"}, | ||||
|     {"object", "\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space"}, | ||||
|     {"array", "\"[\" space ( value (\",\" space value)* )? \"]\" space"}, | ||||
|     {"uuid", "\"\\\"\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] " | ||||
|                 "\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] " | ||||
|                 "\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] " | ||||
|                 "\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] " | ||||
|                 "\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] \"\\\"\" space"}, | ||||
|     {"string", " \"\\\"\" (\n" | ||||
|                "        [^\"\\\\] |\n" | ||||
|                "        \"\\\\\" ([\"\\\\/bfnrt] | \"u\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])\n" | ||||
|                "      )* \"\\\"\" space"}, | ||||
|     {"null", "\"null\" space"} | ||||
| struct BuiltinRule { | ||||
|     std::string content; | ||||
|     std::vector<std::string> deps; | ||||
| }; | ||||
| std::vector<std::string> OBJECT_RULE_NAMES = {"object", "array", "string", "number", "boolean", "null", "value"}; | ||||
| 
 | ||||
| std::unordered_map<std::string, std::string> DATE_RULES = { | ||||
|     {"date", "[0-9] [0-9] [0-9] [0-9] \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )"}, | ||||
|     {"time", "([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9] [0-9] [0-9] )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )"}, | ||||
|     {"date-time", "date \"T\" time"}, | ||||
|     {"date-string", "\"\\\"\" date \"\\\"\" space"}, | ||||
|     {"time-string", "\"\\\"\" time \"\\\"\" space"}, | ||||
|     {"date-time-string", "\"\\\"\" date-time \"\\\"\" space"} | ||||
| const std::string _up_to_15_digits = build_repetition("[0-9]", 0, 15); | ||||
| 
 | ||||
| std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = { | ||||
|     {"boolean", {"(\"true\" | \"false\") space", {}}}, | ||||
|     {"decimal-part", {"[0-9] " + _up_to_15_digits, {}}}, | ||||
|     {"integral-part", {"[0-9] | [1-9] " + _up_to_15_digits, {}}}, | ||||
|     {"number", {"(\"-\"? integral-part) (\".\" decimal-part)? ([eE] [-+]? integral-part)? space", {"integral-part", "decimal-part"}}}, | ||||
|     {"integer", {"(\"-\"? integral-part) space", {"integral-part"}}}, | ||||
|     {"value", {"object | array | string | number | boolean | null", {"object", "array", "string", "number", "boolean", "null"}}}, | ||||
|     {"object", {"\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space", {"string", "value"}}}, | ||||
|     {"array", {"\"[\" space ( value (\",\" space value)* )? \"]\" space", {"value"}}}, | ||||
|     {"uuid", {"\"\\\"\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] " | ||||
|                 "\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] " | ||||
|                 "\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] " | ||||
|                 "\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] " | ||||
|                 "\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] \"\\\"\" space", {}}}, | ||||
|     {"char",   {"[^\"\\\\] | \"\\\\\" ([\"\\\\/bfnrt] | \"u\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])", {}}}, | ||||
|     {"string", {"\"\\\"\" char* \"\\\"\" space", {"char"}}}, | ||||
|     {"null", {"\"null\" space", {}}}, | ||||
| }; | ||||
| 
 | ||||
| std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = { | ||||
|     {"date", {"[0-9] [0-9] [0-9] [0-9] \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}}, | ||||
|     {"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9] [0-9] [0-9] )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}}, | ||||
|     {"date-time", {"date \"T\" time", {"date", "time"}}}, | ||||
|     {"date-string", {"\"\\\"\" date \"\\\"\" space", {"date"}}}, | ||||
|     {"time-string", {"\"\\\"\" time \"\\\"\" space", {"time"}}}, | ||||
|     {"date-time-string", {"\"\\\"\" date-time \"\\\"\" space", {"date-time"}}} | ||||
| }; | ||||
| 
 | ||||
| static bool is_reserved_name(const std::string & name) { | ||||
|  | @ -47,7 +113,7 @@ static bool is_reserved_name(const std::string & name) { | |||
|     if (RESERVED_NAMES.empty()) { | ||||
|         RESERVED_NAMES.insert("root"); | ||||
|         for (const auto &p : PRIMITIVE_RULES) RESERVED_NAMES.insert(p.first); | ||||
|         for (const auto &p : DATE_RULES) RESERVED_NAMES.insert(p.first); | ||||
|         for (const auto &p : STRING_FORMAT_RULES) RESERVED_NAMES.insert(p.first); | ||||
|     } | ||||
|     return RESERVED_NAMES.find(name) != RESERVED_NAMES.end(); | ||||
| } | ||||
|  | @ -192,7 +258,7 @@ private: | |||
|                 if (_dotall) { | ||||
|                     rule = "[\\U00000000-\\U0010FFFF]"; | ||||
|                 } else { | ||||
|                     rule = "[\\U00000000-\\x09\\x0B\\x0C\\x0E-\\U0010FFFF]"; | ||||
|                     rule = "[^\\x0A\\x0D]"; | ||||
|                 } | ||||
|                 return _add_rule("dot", rule); | ||||
|             }; | ||||
|  | @ -308,47 +374,21 @@ private: | |||
|                     auto &sub = last.first; | ||||
|                     auto sub_is_literal = last.second; | ||||
| 
 | ||||
|                     if (min_times == 0 && max_times == std::numeric_limits<int>::max()) { | ||||
|                         sub += "*"; | ||||
|                     } else if (min_times == 0 && max_times == 1) { | ||||
|                         sub += "?"; | ||||
|                     } else if (min_times == 1 && max_times == std::numeric_limits<int>::max()) { | ||||
|                         sub += "+"; | ||||
|                     } else { | ||||
|                         if (!sub_is_literal) { | ||||
|                             std::string & sub_id = sub_rule_ids[sub]; | ||||
|                             if (sub_id.empty()) { | ||||
|                                 sub_id = _add_rule(name + "-" + std::to_string(sub_rule_ids.size()), sub); | ||||
|                             } | ||||
|                             sub = sub_id; | ||||
|                     if (!sub_is_literal) { | ||||
|                         std::string & sub_id = sub_rule_ids[sub]; | ||||
|                         if (sub_id.empty()) { | ||||
|                             sub_id = _add_rule(name + "-" + std::to_string(sub_rule_ids.size()), sub); | ||||
|                         } | ||||
|                         std::string result; | ||||
|                         if (sub_is_literal && min_times > 0) { | ||||
|                             result = "\"" + repeat(sub.substr(1, sub.length() - 2), min_times) + "\""; | ||||
|                         } else { | ||||
|                             for (int j = 0; j < min_times; j++) { | ||||
|                                 if (j > 0) { | ||||
|                                     result += " "; | ||||
|                                 } | ||||
|                                 result += sub; | ||||
|                             } | ||||
|                         } | ||||
|                         if (min_times > 0 && min_times < max_times) { | ||||
|                             result += " "; | ||||
|                         } | ||||
|                         if (max_times == std::numeric_limits<int>::max()) { | ||||
|                             result += sub + "*"; | ||||
|                         } else { | ||||
|                             for (int j = min_times; j < max_times; j++) { | ||||
|                                 if (j > min_times) { | ||||
|                                     result += " "; | ||||
|                                 } | ||||
|                                 result += sub + "?"; | ||||
|                             } | ||||
|                         } | ||||
|                         seq.back().first = result; | ||||
|                         seq.back().second = false; | ||||
|                         sub = sub_id; | ||||
|                     } | ||||
|                     seq.back().first = build_repetition( | ||||
|                         sub_is_literal ? "\"" + sub + "\"" : sub, | ||||
|                         min_times, | ||||
|                         max_times, | ||||
|                         "", | ||||
|                         sub_is_literal | ||||
|                     ); | ||||
|                     seq.back().second = false; | ||||
|                 } else { | ||||
|                     std::string literal; | ||||
|                     auto is_non_literal = [&](char c) { | ||||
|  | @ -424,7 +464,7 @@ private: | |||
|         if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get<bool>())) { | ||||
|             std::string sub_name = name + (name.empty() ? "" : "-") + "additional"; | ||||
|             std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value"); | ||||
|             std::string kv_rule = _add_rule(sub_name + "-kv", _add_rule("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule); | ||||
|             std::string kv_rule = _add_rule(sub_name + "-kv", _add_primitive("string", PRIMITIVE_RULES.at("string")) + " \":\" space " + value_rule); | ||||
|             prop_kv_rule_names["*"] = kv_rule; | ||||
|             optional_props.push_back("*"); | ||||
|         } | ||||
|  | @ -486,6 +526,25 @@ private: | |||
|         return rule; | ||||
|     } | ||||
| 
 | ||||
|     std::string _add_primitive(const std::string & name, const BuiltinRule & rule) { | ||||
|         auto n = _add_rule(name, rule.content); | ||||
|         for (const auto & dep : rule.deps) { | ||||
|             BuiltinRule dep_rule; | ||||
|             auto it = PRIMITIVE_RULES.find(dep); | ||||
|             if (it == PRIMITIVE_RULES.end()) { | ||||
|                 it = STRING_FORMAT_RULES.find(dep); | ||||
|                 if (it == STRING_FORMAT_RULES.end()) { | ||||
|                     _errors.push_back("Rule " + dep + " not known"); | ||||
|                     continue; | ||||
|                 } | ||||
|             } | ||||
|             if (_rules.find(dep) == _rules.end()) { | ||||
|                 _add_primitive(dep, it->second); | ||||
|             } | ||||
|         } | ||||
|         return n; | ||||
|     } | ||||
| 
 | ||||
| public: | ||||
|     SchemaConverter( | ||||
|         const std::function<json(const std::string &)> & fetch_json, | ||||
|  | @ -647,49 +706,33 @@ public: | |||
|                 return _add_rule(rule_name, rule); | ||||
|             } else { | ||||
|                 std::string item_rule_name = visit(items, name + (name.empty() ? "" : "-") + "item"); | ||||
|                 std::string list_item_operator = "( \",\" space " + item_rule_name + " )"; | ||||
|                 std::string successive_items; | ||||
|                 int min_items = schema.contains("minItems") ? schema["minItems"].get<int>() : 0; | ||||
|                 json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json(); | ||||
|                 int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : -1; | ||||
|                 if (min_items > 0) { | ||||
|                     successive_items += repeat(list_item_operator, min_items - 1); | ||||
|                     min_items--; | ||||
|                 } | ||||
|                 if (max_items >= 0 && max_items > min_items) { | ||||
|                     successive_items += repeat(list_item_operator + "?", max_items - min_items - 1); | ||||
|                 } else { | ||||
|                     successive_items += list_item_operator + "*"; | ||||
|                 } | ||||
|                 std::string rule; | ||||
|                 if (min_items == 0) { | ||||
|                     rule =  "\"[\" space ( " + item_rule_name + " " + successive_items + " )? \"]\" space"; | ||||
|                 } else { | ||||
|                     rule =  "\"[\" space " + item_rule_name + " " + successive_items + " \"]\" space"; | ||||
|                 } | ||||
|                 return _add_rule(rule_name, rule); | ||||
|                 int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : std::numeric_limits<int>::max(); | ||||
| 
 | ||||
|                 return _add_rule(rule_name, "\"[\" space " + build_repetition(item_rule_name, min_items, max_items, "\",\" space") + " \"]\" space"); | ||||
|             } | ||||
|         } else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) { | ||||
|             return _visit_pattern(schema["pattern"], rule_name); | ||||
|         } else if ((schema_type.is_null() || schema_type == "string") && std::regex_match(schema_format, std::regex("^uuid[1-5]?$"))) { | ||||
|             return _add_rule(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid")); | ||||
|         } else if ((schema_type.is_null() || schema_type == "string") && DATE_RULES.find(schema_format) != DATE_RULES.end()) { | ||||
|             for (const auto & kv : DATE_RULES) { | ||||
|                 _add_rule(kv.first, kv.second); | ||||
|             } | ||||
|             return schema_format + "-string"; | ||||
|             return _add_primitive(rule_name == "root" ? "root" : schema_format, PRIMITIVE_RULES.at("uuid")); | ||||
|         } else if ((schema_type.is_null() || schema_type == "string") && STRING_FORMAT_RULES.find(schema_format + "-string") != STRING_FORMAT_RULES.end()) { | ||||
|             auto prim_name = schema_format + "-string"; | ||||
|             return _add_rule(rule_name, _add_primitive(prim_name, STRING_FORMAT_RULES.at(prim_name))); | ||||
|         } else if (schema_type == "string" && (schema.contains("minLength") || schema.contains("maxLength"))) { | ||||
|             std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char")); | ||||
|             int min_len = schema.contains("minLength") ? schema["minLength"].get<int>() : 0; | ||||
|             int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max(); | ||||
|             return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space"); | ||||
|         } else if (schema.empty() || schema_type == "object") { | ||||
|             for (const auto & n : OBJECT_RULE_NAMES) { | ||||
|                 _add_rule(n, PRIMITIVE_RULES.at(n)); | ||||
|             } | ||||
|             return _add_rule(rule_name, "object"); | ||||
|             return _add_rule(rule_name, _add_primitive("object", PRIMITIVE_RULES.at("object"))); | ||||
|         } else { | ||||
|             if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<std::string>()) == PRIMITIVE_RULES.end()) { | ||||
|                 _errors.push_back("Unrecognized schema: " + schema.dump()); | ||||
|                 return ""; | ||||
|             } | ||||
|             // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
 | ||||
|             return _add_rule(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>())); | ||||
|             return _add_primitive(rule_name == "root" ? "root" : schema_type.get<std::string>(), PRIMITIVE_RULES.at(schema_type.get<std::string>())); | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|  |  | |||
|  | @ -6,37 +6,94 @@ import re | |||
| import sys | ||||
| from typing import Any, Dict, List, Set, Tuple, Union | ||||
| 
 | ||||
| def _build_repetition(item_rule, min_items, max_items, separator_rule=None, item_rule_is_literal=False): | ||||
|     if not separator_rule: | ||||
|         if min_items == 0 and max_items == 1: | ||||
|             return f'{item_rule}?' | ||||
|         elif min_items == 1 and max_items is None: | ||||
|             return f'{item_rule}+' | ||||
| 
 | ||||
|     result = '' | ||||
| 
 | ||||
|     if min_items > 0: | ||||
|         if item_rule_is_literal and separator_rule is None: | ||||
|             result = '"' + (item_rule[1:-1] * min_items) + '"' | ||||
|         else: | ||||
|             result = (f' {separator_rule} ' if separator_rule else ' ').join([item_rule] * min_items) | ||||
| 
 | ||||
|     def opt_repetitions(up_to_n, prefix_with_sep=False): | ||||
|         ''' | ||||
|             - n=4, no sep:             '(a (a (a (a)?)?)?)?' | ||||
|             - n=4, sep=',', prefix:    '("," a ("," a ("," a ("," a)?)?)?)?' | ||||
|             - n=4, sep=',', no prefix: '(a ("," a ("," a ("," a)?)?)?)?' | ||||
|         ''' | ||||
| 
 | ||||
|         content = f'{separator_rule} {item_rule}' if prefix_with_sep and separator_rule else item_rule | ||||
|         if up_to_n == 0: | ||||
|             return '' | ||||
|         elif up_to_n == 1: | ||||
|             return f'({content})?' | ||||
|         elif separator_rule and not prefix_with_sep: | ||||
|             return f'({content} {opt_repetitions(up_to_n - 1, prefix_with_sep=True)})?' | ||||
|         else: | ||||
|             return (f'({content} ' * up_to_n).rstrip() + (')?' * up_to_n) | ||||
| 
 | ||||
|     if min_items > 0 and max_items != min_items: | ||||
|         result += ' ' | ||||
| 
 | ||||
|     if max_items is not None: | ||||
|         result += opt_repetitions(max_items - min_items, prefix_with_sep=min_items > 0) | ||||
|     else: | ||||
|         item_operator = f'({separator_rule + " " if separator_rule else ""}{item_rule})' | ||||
| 
 | ||||
|         if min_items == 0 and separator_rule: | ||||
|             result = f'({item_rule} {item_operator}*)?' | ||||
|         else: | ||||
|             result += f'{item_operator}*' | ||||
| 
 | ||||
|     return result | ||||
| 
 | ||||
| 
 | ||||
| class BuiltinRule: | ||||
|     def __init__(self, content: str, deps: list = None): | ||||
|         self.content = content | ||||
|         self.deps = deps or [] | ||||
| 
 | ||||
| _up_to_15_digits = _build_repetition('[0-9]', 0, 15) | ||||
| 
 | ||||
| # whitespace is constrained to a single space char to prevent model "running away" in | ||||
| # whitespace. Also maybe improves generation quality? | ||||
| SPACE_RULE = '" "?' | ||||
| 
 | ||||
| PRIMITIVE_RULES = { | ||||
|     'boolean': '("true" | "false") space', | ||||
|     'number': '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space', | ||||
|     'integer': '("-"? ([0-9] | [1-9] [0-9]*)) space', | ||||
|     'value'  : 'object | array | string | number | boolean', | ||||
|     'object' : '"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', | ||||
|     'array'  : '"[" space ( value ("," space value)* )? "]" space', | ||||
|     'uuid'   : '"\\"" ' + ' "-" '.join('[0-9a-fA-F]' * n for n in [8, 4, 4, 4, 12]) + ' "\\"" space', | ||||
|     'string': r''' "\"" ( | ||||
|         [^"\\] | | ||||
|         "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|       )* "\"" space''', | ||||
|     'null': '"null" space', | ||||
|     'boolean'      : BuiltinRule('("true" | "false") space', []), | ||||
|     'decimal-part' : BuiltinRule('[0-9] ' + _up_to_15_digits, []), | ||||
|     'integral-part': BuiltinRule('[0-9] | [1-9] ' + _up_to_15_digits, []), | ||||
|     'number'       : BuiltinRule('("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space', ['integral-part', 'decimal-part']), | ||||
|     'integer'      : BuiltinRule('("-"? integral-part) space', ['integral-part']), | ||||
|     'value'        : BuiltinRule('object | array | string | number | boolean | null', ['object', 'array', 'string', 'number', 'boolean', 'null']), | ||||
|     'object'       : BuiltinRule('"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', ['string', 'value']), | ||||
|     'array'        : BuiltinRule('"[" space ( value ("," space value)* )? "]" space', ['value']), | ||||
|     'uuid'         : BuiltinRule(r'"\"" ' + ' "-" '.join('[0-9a-fA-F]' * n for n in [8, 4, 4, 4, 12]) + r' "\"" space', []), | ||||
|     'char'         : BuiltinRule(r'[^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])', []), | ||||
|     'string'       : BuiltinRule(r'"\"" char* "\"" space', ['char']), | ||||
|     'null'         : BuiltinRule('"null" space', []), | ||||
| } | ||||
| OBJECT_RULE_NAMES = ['object', 'array', 'string', 'number', 'boolean', 'null', 'value'] | ||||
| 
 | ||||
| # TODO: support "uri", "email" string formats | ||||
| DATE_RULES = { | ||||
|     'date'   : '[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', | ||||
|     'time'   : '([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', | ||||
|     'date-time': 'date "T" time', | ||||
|     'date-string': '"\\"" date "\\"" space', | ||||
|     'time-string': '"\\"" time "\\"" space', | ||||
|     'date-time-string': '"\\"" date-time "\\"" space', | ||||
| STRING_FORMAT_RULES = { | ||||
|     'date'            : BuiltinRule('[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', []), | ||||
|     'time'            : BuiltinRule('([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', []), | ||||
|     'date-time'       : BuiltinRule('date "T" time', ['date', 'time']), | ||||
|     'date-string'     : BuiltinRule('"\\"" date "\\"" space', ['date']), | ||||
|     'time-string'     : BuiltinRule('"\\"" time "\\"" space', ['time']), | ||||
|     'date-time-string': BuiltinRule('"\\"" date-time "\\"" space', ['date-time']), | ||||
| } | ||||
| 
 | ||||
| RESERVED_NAMES = set(["root", *PRIMITIVE_RULES.keys(), *DATE_RULES.keys()]) | ||||
| DOTALL = '[\\U00000000-\\U0010FFFF]' | ||||
| DOT = '[^\\x0A\\x0D]' | ||||
| 
 | ||||
| RESERVED_NAMES = set(["root", "dot", *PRIMITIVE_RULES.keys(), *STRING_FORMAT_RULES.keys()]) | ||||
| 
 | ||||
| INVALID_RULE_CHARS_RE = re.compile(r'[^a-zA-Z0-9-]+') | ||||
| GRAMMAR_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"]') | ||||
|  | @ -46,8 +103,6 @@ GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']' | |||
| NON_LITERAL_SET = set('|.()[]{}*+?') | ||||
| ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('[]()|{}*+?') | ||||
| 
 | ||||
| DATE_PATTERN = '[0-9]{4}-(0[1-9]|1[0-2])-([0-2][0-9]|3[0-1])' | ||||
| TIME_PATTERN = '([01][0-9]|2[0-3])(:[0-5][0-9]){2}(\\.[0-9]{1,3})?(Z|[+-](([01][0-9]|2[0-3]):[0-5][0-9]))' # Cap millisecond precision w/ 3 digits | ||||
| 
 | ||||
| class SchemaConverter: | ||||
|     def __init__(self, *, prop_order, allow_fetch, dotall, raw_pattern): | ||||
|  | @ -55,7 +110,9 @@ class SchemaConverter: | |||
|         self._allow_fetch = allow_fetch | ||||
|         self._dotall = dotall | ||||
|         self._raw_pattern = raw_pattern | ||||
|         self._rules = {'space': SPACE_RULE} | ||||
|         self._rules = { | ||||
|             'space': SPACE_RULE, | ||||
|         } | ||||
|         self._refs = {} | ||||
|         self._refs_being_resolved = set() | ||||
| 
 | ||||
|  | @ -65,6 +122,29 @@ class SchemaConverter: | |||
|         ) | ||||
|         return f'"{escaped}"' | ||||
| 
 | ||||
|     def not_literal(self, literal: str, dotall: bool = True, maybe_escaped_underscores = False) -> str: | ||||
|         ''' | ||||
|             not_literal('a') -> '[^a]' | ||||
|             not_literal('abc') -> '([^a] | "a" ([^b] | "b" ([^c])?)?)?' | ||||
|         ''' | ||||
|         assert len(literal) > 0, 'Empty literal not supported' | ||||
|         def recurse(i: int): | ||||
|             c = literal[i] | ||||
|             if maybe_escaped_underscores and c == '_': | ||||
|                 yield f'[^{c}\\\\]' | ||||
|                 yield ' | ' | ||||
|                 yield f'"\\\\"? "{c}"' | ||||
|             else: | ||||
|                 yield f'[^{c}]' | ||||
|             if i < len(literal) - 1: | ||||
|                 yield ' | ' | ||||
|                 yield self._format_literal(c) | ||||
|                 yield ' (' | ||||
|                 yield from recurse(i + 1) | ||||
|                 yield ')?' | ||||
| 
 | ||||
|         return ''.join(('(', *recurse(0), ')')) | ||||
| 
 | ||||
|     def _add_rule(self, name, rule): | ||||
|         esc_name = INVALID_RULE_CHARS_RE.sub('-', name) | ||||
|         if esc_name not in self._rules or self._rules[esc_name] == rule: | ||||
|  | @ -169,10 +249,10 @@ class SchemaConverter: | |||
| 
 | ||||
|             def get_dot(): | ||||
|                 if self._dotall: | ||||
|                     rule = '[\\U00000000-\\U0010FFFF]' | ||||
|                     rule = DOTALL | ||||
|                 else: | ||||
|                     # Accept any character... except \n and \r line break chars (\x0A and \xOD) | ||||
|                     rule = '[\\U00000000-\\x09\\x0B\\x0C\\x0E-\\U0010FFFF]' | ||||
|                     rule = DOT | ||||
|                 return self._add_rule(f'dot', rule) | ||||
| 
 | ||||
|             def join_seq(): | ||||
|  | @ -246,26 +326,14 @@ class SchemaConverter: | |||
| 
 | ||||
|                     (sub, sub_is_literal) = seq[-1] | ||||
| 
 | ||||
|                     if min_times == 0 and max_times is None: | ||||
|                         seq[-1] = (f'{sub}*', False) | ||||
|                     elif min_times == 0 and max_times == 1: | ||||
|                         seq[-1] = (f'{sub}?', False) | ||||
|                     elif min_times == 1 and max_times is None: | ||||
|                         seq[-1] = (f'{sub}+', False) | ||||
|                     else: | ||||
|                         if not sub_is_literal: | ||||
|                             id = sub_rule_ids.get(sub) | ||||
|                             if id is None: | ||||
|                                 id = self._add_rule(f'{name}-{len(sub_rule_ids) + 1}', sub) | ||||
|                                 sub_rule_ids[sub] = id | ||||
|                             sub = id | ||||
|                     if not sub_is_literal: | ||||
|                         id = sub_rule_ids.get(sub) | ||||
|                         if id is None: | ||||
|                             id = self._add_rule(f'{name}-{len(sub_rule_ids) + 1}', sub) | ||||
|                             sub_rule_ids[sub] = id | ||||
|                         sub = id | ||||
| 
 | ||||
|                         seq[-1] = ( | ||||
|                             ' '.join( | ||||
|                                 ([f'"{sub[1:-1] * min_times}"'] if sub_is_literal else [sub] * min_times) + | ||||
|                                 ([f'{sub}?'] * (max_times - min_times) if max_times is not None else [f'{sub}*'])), | ||||
|                             False | ||||
|                         ) | ||||
|                     seq[-1] = (_build_repetition(f'"{sub}"' if sub_is_literal else sub, min_times, max_times, item_rule_is_literal=sub_is_literal), False) | ||||
|                 else: | ||||
|                     literal = '' | ||||
|                     while i < length: | ||||
|  | @ -373,49 +441,47 @@ class SchemaConverter: | |||
|                     ' "]" space') | ||||
|             else: | ||||
|                 item_rule_name = self.visit(items, f'{name}{"-" if name else ""}item') | ||||
|                 list_item_operator = f'( "," space {item_rule_name} )' | ||||
|                 successive_items = "" | ||||
|                 min_items = schema.get("minItems", 0) | ||||
|                 max_items = schema.get("maxItems") | ||||
|                 if min_items > 0: | ||||
|                     successive_items = list_item_operator * (min_items - 1) | ||||
|                     min_items -= 1 | ||||
|                 if max_items is not None and max_items > min_items: | ||||
|                     successive_items += (list_item_operator + "?") * (max_items - min_items - 1) | ||||
|                 else: | ||||
|                     successive_items += list_item_operator + "*" | ||||
|                 if min_items == 0: | ||||
|                     rule = f'"[" space ( {item_rule_name} {successive_items} )? "]" space' | ||||
|                 else: | ||||
|                     rule = f'"[" space {item_rule_name} {successive_items} "]" space' | ||||
|                 return self._add_rule(rule_name, rule) | ||||
|                 return self._add_rule(rule_name, '"[" space ' + _build_repetition(item_rule_name, min_items, max_items, separator_rule='"," space') + ' "]" space') | ||||
| 
 | ||||
|         elif schema_type in (None, 'string') and 'pattern' in schema: | ||||
|             return self._visit_pattern(schema['pattern'], rule_name) | ||||
| 
 | ||||
|         elif schema_type in (None, 'string') and re.match(r'^uuid[1-5]?$', schema_format or ''): | ||||
|             return self._add_rule( | ||||
|             return self._add_primitive( | ||||
|                 'root' if rule_name == 'root' else schema_format, | ||||
|                 PRIMITIVE_RULES['uuid'] | ||||
|             ) | ||||
| 
 | ||||
|         elif schema_type in (None, 'string') and schema_format in DATE_RULES: | ||||
|             for t, r in DATE_RULES.items(): | ||||
|                 self._add_rule(t, r) | ||||
|             return schema_format + '-string' | ||||
|         elif schema_type in (None, 'string') and f'{schema_format}-string' in STRING_FORMAT_RULES: | ||||
|             prim_name = f'{schema_format}-string' | ||||
|             return self._add_rule(rule_name, self._add_primitive(prim_name, STRING_FORMAT_RULES[prim_name])) | ||||
| 
 | ||||
|         elif schema_type == 'string' and ('minLength' in schema or 'maxLength' in schema): | ||||
|             char_rule = self._add_primitive('char', PRIMITIVE_RULES['char']) | ||||
|             min_len = schema.get('minLength', 0) | ||||
|             max_len = schema.get('maxLength') | ||||
| 
 | ||||
|             return self._add_rule(rule_name, r'"\"" ' + _build_repetition(char_rule, min_len, max_len) + r' "\"" space') | ||||
| 
 | ||||
|         elif (schema_type == 'object') or (len(schema) == 0): | ||||
|             for n in OBJECT_RULE_NAMES: | ||||
|                 self._add_rule(n, PRIMITIVE_RULES[n]) | ||||
|             return self._add_rule(rule_name, 'object') | ||||
|             return self._add_rule(rule_name, self._add_primitive('object', PRIMITIVE_RULES['object'])) | ||||
| 
 | ||||
|         else: | ||||
|             assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}' | ||||
|             # TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero | ||||
|             return self._add_rule( | ||||
|                 'root' if rule_name == 'root' else schema_type, | ||||
|                 PRIMITIVE_RULES[schema_type] | ||||
|             ) | ||||
|             return self._add_primitive('root' if rule_name == 'root' else schema_type, PRIMITIVE_RULES[schema_type]) | ||||
| 
 | ||||
|     def _add_primitive(self, name: str, rule: BuiltinRule): | ||||
|         n = self._add_rule(name, rule.content) | ||||
| 
 | ||||
|         for dep in rule.deps: | ||||
|             dep_rule = PRIMITIVE_RULES.get(dep) or STRING_FORMAT_RULES.get(dep) | ||||
|             assert dep_rule, f'Rule {dep} not known' | ||||
|             if dep not in self._rules: | ||||
|                 self._add_primitive(dep, dep_rule) | ||||
|         return n | ||||
| 
 | ||||
|     def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]): | ||||
|         prop_order = self._prop_order | ||||
|  | @ -437,7 +503,7 @@ class SchemaConverter: | |||
|             value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value') | ||||
|             prop_kv_rule_names["*"] = self._add_rule( | ||||
|                 f'{sub_name}-kv', | ||||
|                 self._add_rule('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}' | ||||
|                 self._add_primitive('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}' | ||||
|             ) | ||||
|             optional_props.append("*") | ||||
| 
 | ||||
|  | @ -8,7 +8,7 @@ print(subprocess.check_output( | |||
|         "python", | ||||
|         os.path.join( | ||||
|         os.path.dirname(os.path.realpath(__file__)), | ||||
|         "json-schema-to-grammar.py"), | ||||
|         "json_schema_to_grammar.py"), | ||||
|         *rest, | ||||
|         "-", | ||||
|         "--raw-pattern", | ||||
|  |  | |||
|  | @ -11,6 +11,7 @@ Set of LLM REST APIs and a simple web front end to interact with llama.cpp. | |||
|  * Continuous batching | ||||
|  * Multimodal (wip) | ||||
|  * Monitoring endpoints | ||||
|  * Schema-constrained JSON response format | ||||
| 
 | ||||
| The project is under active development, and we are [looking for feedback and contributors](https://github.com/ggerganov/llama.cpp/issues/4216). | ||||
| 
 | ||||
|  | @ -250,6 +251,8 @@ node index.js | |||
| 
 | ||||
|     `grammar`: Set grammar for grammar-based sampling.  Default: no grammar | ||||
| 
 | ||||
|     `json_schema`: Set a JSON schema for grammar-based sampling (e.g. `{"items": {"type": "string"}, "minItems": 10, "maxItems": 100}` of a list of strings, or `{}` for any JSON). See [tests](../../tests/test-json-schema-to-grammar.cpp) for supported features.  Default: no JSON schema. | ||||
| 
 | ||||
|     `seed`: Set the random number generator (RNG) seed.  Default: `-1`, which is a random seed. | ||||
| 
 | ||||
|     `ignore_eos`: Ignore end of stream token and continue generating.  Default: `false` | ||||
|  | @ -365,6 +368,8 @@ Notice that each `probs` is an array of length `n_probs`. | |||
| 
 | ||||
|     See [OpenAI Chat Completions API documentation](https://platform.openai.com/docs/api-reference/chat). While some OpenAI-specific features such as function calling aren't supported, llama.cpp `/completion`-specific features such as `mirostat` are supported. | ||||
| 
 | ||||
|     The `response_format` parameter supports both plain JSON output (e.g. `{"type": "json_object"}`) and schema-constrained JSON (e.g. `{"type": "json_object", "schema": {"type": "string", "minLength": 10, "maxLength": 100}}`), similar to other OpenAI-inspired API providers. | ||||
| 
 | ||||
|     *Examples:* | ||||
| 
 | ||||
|     You can use either Python `openai` library with appropriate checkpoints: | ||||
|  |  | |||
										
											
												File diff suppressed because it is too large
												Load diff
											
										
									
								
							|  | @ -1,33 +1,95 @@ | |||
| // WARNING: This file was ported from json-schema-to-grammar.py, please fix bugs / add features there first.
 | ||||
| // WARNING: This file was ported from json_schema_to_grammar.py, please fix bugs / add features there first.
 | ||||
| const SPACE_RULE = '" "?'; | ||||
| 
 | ||||
| function _buildRepetition(itemRule, minItems, maxItems, opts={}) { | ||||
|   const separatorRule = opts.separatorRule ?? ''; | ||||
|   const itemRuleIsLiteral = opts.itemRuleIsLiteral ?? false | ||||
| 
 | ||||
|   if (separatorRule === '') { | ||||
|     if (minItems === 0 && maxItems === 1) { | ||||
|       return `${itemRule}?`; | ||||
|     } else if (minItems === 1 && maxItems === undefined) { | ||||
|       return `${itemRule}+`; | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   let result = ''; | ||||
|   if (minItems > 0) { | ||||
|     if (itemRuleIsLiteral && separatorRule === '') { | ||||
|       result = `"${itemRule.slice(1, -1).repeat(minItems)}"`; | ||||
|     } else { | ||||
|       result = Array.from({ length: minItems }, () => itemRule) | ||||
|         .join(separatorRule !== '' ? ` ${separatorRule} ` : ' '); | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   const optRepetitions = (upToN, prefixWithSep=false) => { | ||||
|     const content = separatorRule !== '' && prefixWithSep ? `${separatorRule} ${itemRule}` : itemRule; | ||||
|     if (upToN === 0) { | ||||
|       return ''; | ||||
|     } else if (upToN === 1) { | ||||
|       return `(${content})?`; | ||||
|     } else if (separatorRule !== '' && !prefixWithSep) { | ||||
|       return `(${content} ${optRepetitions(upToN - 1, true)})?`; | ||||
|     } else { | ||||
|       return Array.from({ length: upToN }, () => `(${content}`).join(' ').trim() + Array.from({ length: upToN }, () => ')?').join(''); | ||||
|     } | ||||
|   }; | ||||
| 
 | ||||
|   if (minItems > 0 && maxItems !== minItems) { | ||||
|     result += ' '; | ||||
|   } | ||||
| 
 | ||||
|   if (maxItems !== undefined) { | ||||
|     result += optRepetitions(maxItems - minItems, minItems > 0); | ||||
|   } else { | ||||
|     const itemOperator = `(${separatorRule !== '' ? separatorRule + ' ' : ''}${itemRule})`; | ||||
| 
 | ||||
|     if (minItems === 0 && separatorRule !== '') { | ||||
|       result = `(${itemRule} ${itemOperator}*)?`; | ||||
|     } else { | ||||
|       result += `${itemOperator}*`; | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   return result; | ||||
| } | ||||
| 
 | ||||
| class BuiltinRule { | ||||
|   constructor(content, deps) { | ||||
|     this.content = content; | ||||
|     this.deps = deps || []; | ||||
|   } | ||||
| } | ||||
| 
 | ||||
| const UP_TO_15_DIGITS = _buildRepetition('[0-9]', 0, 15); | ||||
| 
 | ||||
| const PRIMITIVE_RULES = { | ||||
|   boolean: '("true" | "false") space', | ||||
|   number: '("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space', | ||||
|   integer: '("-"? ([0-9] | [1-9] [0-9]*)) space', | ||||
|   value: 'object | array | string | number | boolean', | ||||
|   object: '"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', | ||||
|   array: '"[" space ( value ("," space value)* )? "]" space', | ||||
|   uuid: '"\\"" ' + [8, 4, 4, 4, 12].map(n => [...new Array(n)].map(_ => '[0-9a-fA-F]').join('')).join(' "-" ') + ' "\\"" space', | ||||
|   string: ` "\\"" (
 | ||||
|         [^"\\\\] | | ||||
|         "\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|       )* "\\"" space`,
 | ||||
|   null: '"null" space', | ||||
|   boolean        : new BuiltinRule('("true" | "false") space', []), | ||||
|   'decimal-part' : new BuiltinRule('[0-9] ' + UP_TO_15_DIGITS, []), | ||||
|   'integral-part': new BuiltinRule('[0-9] | [1-9] ' + UP_TO_15_DIGITS, []), | ||||
|   number         : new BuiltinRule('("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space', ['integral-part', 'decimal-part']), | ||||
|   integer        : new BuiltinRule('("-"? integral-part) space', ['integral-part']), | ||||
|   value          : new BuiltinRule('object | array | string | number | boolean | null', ['object', 'array', 'string', 'number', 'boolean', 'null']), | ||||
|   object         : new BuiltinRule('"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', ['string', 'value']), | ||||
|   array          : new BuiltinRule('"[" space ( value ("," space value)* )? "]" space', ['value']), | ||||
|   uuid           : new BuiltinRule('"\\"" ' + [8, 4, 4, 4, 12].map(n => [...new Array(n)].map(_ => '[0-9a-fA-F]').join('')).join(' "-" ') + ' "\\"" space', []), | ||||
|   char           : new BuiltinRule(`[^"\\\\] | "\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])`, []), | ||||
|   string         : new BuiltinRule(`"\\"" char* "\\"" space`, ['char']), | ||||
|   null           : new BuiltinRule('"null" space', []), | ||||
| }; | ||||
| const OBJECT_RULE_NAMES = ['object', 'array', 'string', 'number', 'boolean', 'null', 'value']; | ||||
| 
 | ||||
| // TODO: support "uri", "email" string formats
 | ||||
| const DATE_RULES = { | ||||
|     'date'   : '[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', | ||||
|     'time'   : '([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', | ||||
|     'date-time': 'date "T" time', | ||||
|     'date-string': '"\\"" date "\\"" space', | ||||
|     'time-string': '"\\"" time "\\"" space', | ||||
|     'date-time-string': '"\\"" date-time "\\"" space', | ||||
| }; | ||||
| const STRING_FORMAT_RULES = { | ||||
|   'date'            : new BuiltinRule('[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', []), | ||||
|   'time'            : new BuiltinRule('([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', []), | ||||
|   'date-time'       : new BuiltinRule('date "T" time', ['date', 'time']), | ||||
|   'date-string'     : new BuiltinRule('"\\"" date "\\"" space', ['date']), | ||||
|   'time-string'     : new BuiltinRule('"\\"" time "\\"" space', ['time']), | ||||
|   'date-time-string': new BuiltinRule('"\\"" date-time "\\"" space', ['date-time']), | ||||
| } | ||||
| 
 | ||||
| const RESERVED_NAMES = {'root': true, ...PRIMITIVE_RULES, ...DATE_RULES}; | ||||
| const RESERVED_NAMES = {'root': true, ...PRIMITIVE_RULES, ...STRING_FORMAT_RULES}; | ||||
| 
 | ||||
| const INVALID_RULE_CHARS_RE = /[^\dA-Za-z-]+/g; | ||||
| const GRAMMAR_LITERAL_ESCAPE_RE = /[\n\r"]/g; | ||||
|  | @ -158,7 +220,7 @@ export class SchemaConverter { | |||
|         rule = '[\\U00000000-\\U0010FFFF]'; | ||||
|       } else { | ||||
|         // Accept any character... except \n and \r line break chars (\x0A and \xOD)
 | ||||
|         rule = '[\\U00000000-\\x09\\x0B\\x0C\\x0E-\\U0010FFFF]'; | ||||
|         rule = '[^\\x0A\\x0D]'; | ||||
|       } | ||||
|       return this._addRule('dot', rule); | ||||
|     }; | ||||
|  | @ -259,26 +321,19 @@ export class SchemaConverter { | |||
| 
 | ||||
|           let [sub, subIsLiteral] = seq[seq.length - 1]; | ||||
| 
 | ||||
|           if (minTimes === 0 && maxTimes === Infinity) { | ||||
|             seq[seq.length - 1] = [`${sub}*`, false]; | ||||
|           } else if (minTimes === 0 && maxTimes === 1) { | ||||
|             seq[seq.length - 1] = [`${sub}?`, false]; | ||||
|           } else if (minTimes === 1 && maxTimes === Infinity) { | ||||
|             seq[seq.length - 1] = [`${sub}+`, false]; | ||||
|           } else { | ||||
|             if (!subIsLiteral) { | ||||
|               let id = subRuleIds[sub]; | ||||
|               if (id === undefined) { | ||||
|                 id = this._addRule(`${name}-${Object.keys(subRuleIds).length + 1}`, sub); | ||||
|                 subRuleIds[sub] = id; | ||||
|               } | ||||
|               sub = id; | ||||
|           if (!subIsLiteral) { | ||||
|             let id = subRuleIds[sub]; | ||||
|             if (id === undefined) { | ||||
|               id = this._addRule(`${name}-${Object.keys(subRuleIds).length + 1}`, sub); | ||||
|               subRuleIds[sub] = id; | ||||
|             } | ||||
| 
 | ||||
|             const repeatedSub = Array.from({ length: minTimes }, () => subIsLiteral ? `"${sub.slice(1, -1).repeat(minTimes)}"` : sub); | ||||
|             const optionalSub = maxTimes !== undefined ? Array.from({ length: maxTimes - minTimes }, () => `${sub}?`) : [`${sub}*`]; | ||||
|             seq[seq.length - 1] = [repeatedSub.concat(optionalSub).join(' '), false]; | ||||
|             sub = id; | ||||
|           } | ||||
| 
 | ||||
|           seq[seq.length - 1] = [ | ||||
|             _buildRepetition(subIsLiteral ? `"${sub}"` : sub, minTimes, maxTimes, {itemRuleIsLiteral: subIsLiteral}), | ||||
|             false | ||||
|           ]; | ||||
|         } else { | ||||
|           let literal = ''; | ||||
|           while (i < length) { | ||||
|  | @ -394,49 +449,50 @@ export class SchemaConverter { | |||
|         ); | ||||
|       } else { | ||||
|         const itemRuleName = this.visit(items, `${name ?? ''}${name ? '-' : ''}item`); | ||||
|         const listItemOperator = `( "," space ${itemRuleName} )`; | ||||
|         let successiveItems = ''; | ||||
|         let minItems = schema.minItems || 0; | ||||
|         const minItems = schema.minItems || 0; | ||||
|         const maxItems = schema.maxItems; | ||||
|         if (minItems > 0) { | ||||
|           successiveItems = listItemOperator.repeat(minItems - 1); | ||||
|           minItems--; | ||||
|         } | ||||
|         if (maxItems !== undefined && maxItems > minItems) { | ||||
|           successiveItems += `${listItemOperator}?`.repeat(maxItems - minItems - 1); | ||||
|         } else { | ||||
|           successiveItems += `${listItemOperator}*`; | ||||
|         } | ||||
|         const rule = minItems === 0 | ||||
|           ? `"[" space ( ${itemRuleName} ${successiveItems} )? "]" space` | ||||
|           : `"[" space ${itemRuleName} ${successiveItems} "]" space`; | ||||
|         return this._addRule(ruleName, rule); | ||||
|         return this._addRule(ruleName, '"[" space ' + _buildRepetition(itemRuleName, minItems, maxItems, {separatorRule: '"," space'}) + ' "]" space'); | ||||
|       } | ||||
|     } else if ((schemaType === undefined || schemaType === 'string') && 'pattern' in schema) { | ||||
|       return this._visitPattern(schema.pattern, ruleName); | ||||
|     } else if ((schemaType === undefined || schemaType === 'string') && /^uuid[1-5]?$/.test(schema.format || '')) { | ||||
|       return this._addRule( | ||||
|           ruleName === 'root' ? 'root' : schemaFormat, | ||||
|           PRIMITIVE_RULES['uuid']) | ||||
|     } else if ((schemaType === undefined || schemaType === 'string') && schema.format in DATE_RULES) { | ||||
|       for (const [t, r] of Object.entries(DATE_RULES)) { | ||||
|         this._addRule(t, r); | ||||
|       } | ||||
|       return schemaFormat + '-string'; | ||||
|       return this._addPrimitive( | ||||
|         ruleName === 'root' ? 'root' : schemaFormat, | ||||
|         PRIMITIVE_RULES['uuid'] | ||||
|       ); | ||||
|     } else if ((schemaType === undefined || schemaType === 'string') && `${schema.format}-string` in STRING_FORMAT_RULES) { | ||||
|       const primName = `${schema.format}-string` | ||||
|       return this._addRule(ruleName, this._addPrimitive(primName, STRING_FORMAT_RULES[primName])); | ||||
|     } else if (schemaType === 'string' && ('minLength' in schema || 'maxLength' in schema)) { | ||||
|       const charRuleName = this._addPrimitive('char', PRIMITIVE_RULES['char']); | ||||
|       const minLen = schema.minLength || 0; | ||||
|       const maxLen = schema.maxLength; | ||||
|       return this._addRule(ruleName, '"\\\"" ' + _buildRepetition(charRuleName, minLen, maxLen) + ' "\\\"" space'); | ||||
|     } else if ((schemaType === 'object') || (Object.keys(schema).length === 0)) { | ||||
|       for (const n of OBJECT_RULE_NAMES) { | ||||
|         this._addRule(n, PRIMITIVE_RULES[n]); | ||||
|       } | ||||
|       return this._addRule(ruleName, 'object'); | ||||
|       return this._addRule(ruleName, this._addPrimitive('object', PRIMITIVE_RULES['object'])); | ||||
|     } else { | ||||
|       if (!(schemaType in PRIMITIVE_RULES)) { | ||||
|         throw new Error(`Unrecognized schema: ${JSON.stringify(schema)}`); | ||||
|       } | ||||
|       // TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
 | ||||
|       return this._addRule(ruleName === 'root' ? 'root' : schemaType, PRIMITIVE_RULES[schemaType]); | ||||
|       return this._addPrimitive(ruleName === 'root' ? 'root' : schemaType, PRIMITIVE_RULES[schemaType]); | ||||
|     } | ||||
|   } | ||||
| 
 | ||||
|   _addPrimitive(name, rule) { | ||||
|     let n = this._addRule(name, rule.content); | ||||
|     for (const dep of rule.deps) { | ||||
|       const depRule = PRIMITIVE_RULES[dep] || STRING_FORMAT_RULES[dep]; | ||||
|       if (!depRule) { | ||||
|         throw new Error(`Rule ${dep} not known`); | ||||
|       } | ||||
|       if (!(dep in this._rules)) { | ||||
|         this._addPrimitive(dep, depRule); | ||||
|       } | ||||
|     } | ||||
|     return n; | ||||
|   } | ||||
| 
 | ||||
|   _buildObjectRule(properties, required, name, additionalProperties) { | ||||
|     const propOrder = this._propOrder; | ||||
|     // sort by position in prop_order (if specified) then by original order
 | ||||
|  | @ -462,7 +518,7 @@ export class SchemaConverter { | |||
|       const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`); | ||||
|       propKvRuleNames['*'] = this._addRule( | ||||
|         `${subName}-kv`, | ||||
|         `${this._addRule('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`); | ||||
|         `${this._addPrimitive('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`); | ||||
|       optionalProps.push('*'); | ||||
|     } | ||||
| 
 | ||||
|  |  | |||
|  | @ -859,7 +859,7 @@ struct server_context { | |||
|         slot.sparams.min_keep          = json_value(data, "min_keep",          default_sparams.min_keep); | ||||
| 
 | ||||
|         // process "json_schema" and "grammar"
 | ||||
|         if (data.contains("json_schema") && data.contains("grammar")) { | ||||
|         if (data.contains("json_schema") && !data["json_schema"].is_null() && data.contains("grammar") && !data["grammar"].is_null()) { | ||||
|             send_error(task, "Either \"json_schema\" or \"grammar\" can be specified, but not both", ERROR_TYPE_INVALID_REQUEST); | ||||
|             return false; | ||||
|         } else if (data.contains("json_schema") && !data.contains("grammar")) { | ||||
|  |  | |||
|  | @ -1,7 +1,7 @@ | |||
| #!/bin/bash | ||||
| # | ||||
| # ./examples/ts-type-to-grammar.sh "{a:string,b:string,c?:string}" | ||||
| # python examples/json-schema-to-grammar.py https://json.schemastore.org/tsconfig.json | ||||
| # python examples/json_schema_to_grammar.py https://json.schemastore.org/tsconfig.json | ||||
| # | ||||
| set -euo pipefail | ||||
| 
 | ||||
|  | @ -25,4 +25,4 @@ npx ts-json-schema-generator --unstable --no-top-ref --path "$DTS_FILE" --type M | |||
| # https://github.com/YousefED/typescript-json-schema | ||||
| # npx typescript-json-schema --defaultProps --required "$DTS_FILE" MyType | tee "$SCHEMA_FILE" >&2 | ||||
| 
 | ||||
| ./examples/json-schema-to-grammar.py "$SCHEMA_FILE" | ||||
| ./examples/json_schema_to_grammar.py "$SCHEMA_FILE" | ||||
|  |  | |||
|  | @ -89,3 +89,13 @@ This guide provides a brief overview. Check out the GBNF files in this directory | |||
| ``` | ||||
| ./main -m <model> --grammar-file grammars/some-grammar.gbnf -p 'Some prompt' | ||||
| ``` | ||||
| 
 | ||||
| ## Troubleshooting | ||||
| 
 | ||||
| Grammars currently have performance gotchas (see https://github.com/ggerganov/llama.cpp/issues/4218). | ||||
| 
 | ||||
| ### Efficient optional repetitions | ||||
| 
 | ||||
| A common pattern is to allow repetitions of a pattern `x` up to N times. | ||||
| 
 | ||||
| While semantically correct, the syntax `x? x? x?.... x?` (with N repetitions) will result in extremely slow inference. Instead, you can write `(x (x (x ... (x)?...)?)?)?` (w/ N-deep nesting) | ||||
|  |  | |||
|  | @ -104,16 +104,16 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|         R"""( | ||||
|             array ::= "[" space ( value ("," space value)* )? "]" space | ||||
|             boolean ::= ("true" | "false") space | ||||
|             char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|             decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             null ::= "null" space | ||||
|             number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space | ||||
|             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space | ||||
|             object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space | ||||
|             root ::= object | ||||
|             space ::= " "? | ||||
|             string ::=  "\"" ( | ||||
|                     [^"\\] | | ||||
|                     "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|                     )* "\"" space | ||||
|             value ::= object | array | string | number | boolean | ||||
|             string ::= "\"" char* "\"" space | ||||
|             value ::= object | array | string | number | boolean | null | ||||
|         )""" | ||||
|     }); | ||||
| 
 | ||||
|  | @ -133,10 +133,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|             date-string ::= "\"" date "\"" space | ||||
|             date-time ::= date "T" time | ||||
|             date-time-string ::= "\"" date-time "\"" space | ||||
|             root ::= "[" space date-string "," space uuid "," space time-string "," space date-time-string "]" space | ||||
|             root ::= "[" space tuple-0 "," space uuid "," space tuple-2 "," space tuple-3 "]" space | ||||
|             space ::= " "? | ||||
|             time ::= ([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] ) | ||||
|             time-string ::= "\"" time "\"" space | ||||
|             tuple-0 ::= date-string | ||||
|             tuple-2 ::= time-string | ||||
|             tuple-3 ::= date-time-string | ||||
|             uuid ::= "\"" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "-" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "\"" space | ||||
|         )""" | ||||
|     }); | ||||
|  | @ -148,10 +151,65 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|             "type": "string" | ||||
|         })""", | ||||
|         R"""( | ||||
|             root ::=  "\"" ( | ||||
|                     [^"\\] | | ||||
|                     "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|                     )* "\"" space | ||||
|             char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|             root ::= "\"" char* "\"" space | ||||
|             space ::= " "? | ||||
|         )""" | ||||
|     }); | ||||
| 
 | ||||
|     test({ | ||||
|         SUCCESS, | ||||
|         "string w/ min length 1", | ||||
|         R"""({ | ||||
|             "type": "string", | ||||
|             "minLength": 1 | ||||
|         })""", | ||||
|         R"""( | ||||
|             char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|             root ::= "\"" char+ "\"" space | ||||
|             space ::= " "? | ||||
|         )""" | ||||
|     }); | ||||
| 
 | ||||
|     test({ | ||||
|         SUCCESS, | ||||
|         "string w/ min length 3", | ||||
|         R"""({ | ||||
|             "type": "string", | ||||
|             "minLength": 3 | ||||
|         })""", | ||||
|         R"""( | ||||
|             char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|             root ::= "\"" char char char (char)* "\"" space | ||||
|             space ::= " "? | ||||
|         )""" | ||||
|     }); | ||||
| 
 | ||||
|     test({ | ||||
|         SUCCESS, | ||||
|         "string w/ max length", | ||||
|         R"""({ | ||||
|             "type": "string", | ||||
|             "maxLength": 3 | ||||
|         })""", | ||||
|         R"""( | ||||
|             char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|             root ::= "\"" (char (char (char)?)?)? "\"" space | ||||
|             space ::= " "? | ||||
|         )""" | ||||
|     }); | ||||
| 
 | ||||
|     test({ | ||||
|         SUCCESS, | ||||
|         "string w/ min & max length", | ||||
|         R"""({ | ||||
|             "type": "string", | ||||
|             "minLength": 1, | ||||
|             "maxLength": 4 | ||||
|         })""", | ||||
|         R"""( | ||||
|             char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|             root ::= "\"" char (char (char (char)?)?)? "\"" space | ||||
|             space ::= " "? | ||||
|         )""" | ||||
|     }); | ||||
|  | @ -175,7 +233,8 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|             "type": "integer" | ||||
|         })""", | ||||
|         R"""( | ||||
|             root ::= ("-"? ([0-9] | [1-9] [0-9]*)) space | ||||
|             integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             root ::= ("-"? integral-part) space | ||||
|             space ::= " "? | ||||
|         )""" | ||||
|     }); | ||||
|  | @ -223,12 +282,10 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|             "prefixItems": [{ "type": "string" }] | ||||
|         })""", | ||||
|         R"""( | ||||
|             char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|             root ::= "[" space string "]" space | ||||
|             space ::= " "? | ||||
|             string ::=  "\"" ( | ||||
|                     [^"\\] | | ||||
|                     "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|                     )* "\"" space | ||||
|             string ::= "\"" char* "\"" space | ||||
|         )""" | ||||
|     }); | ||||
| 
 | ||||
|  | @ -239,13 +296,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|             "prefixItems": [{ "type": "string" }, { "type": "number" }] | ||||
|         })""", | ||||
|         R"""( | ||||
|             number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space | ||||
|             char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|             decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space | ||||
|             root ::= "[" space string "," space number "]" space | ||||
|             space ::= " "? | ||||
|             string ::=  "\"" ( | ||||
|                     [^"\\] | | ||||
|                     "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|                     )* "\"" space | ||||
|             string ::= "\"" char* "\"" space | ||||
|         )""" | ||||
|     }); | ||||
| 
 | ||||
|  | @ -256,7 +313,9 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|             "type": "number" | ||||
|         })""", | ||||
|         R"""( | ||||
|             root ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space | ||||
|             decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             root ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space | ||||
|             space ::= " "? | ||||
|         )""" | ||||
|     }); | ||||
|  | @ -272,7 +331,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|         })""", | ||||
|         R"""( | ||||
|             boolean ::= ("true" | "false") space | ||||
|             root ::= "[" space boolean ( "," space boolean )( "," space boolean )* "]" space | ||||
|             root ::= "[" space boolean "," space boolean ("," space boolean)* "]" space | ||||
|             space ::= " "? | ||||
|         )""" | ||||
|     }); | ||||
|  | @ -288,7 +347,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|         })""", | ||||
|         R"""( | ||||
|             boolean ::= ("true" | "false") space | ||||
|             root ::= "[" space ( boolean  )? "]" space | ||||
|             root ::= "[" space (boolean)? "]" space | ||||
|             space ::= " "? | ||||
|         )""" | ||||
|     }); | ||||
|  | @ -304,7 +363,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|         })""", | ||||
|         R"""( | ||||
|             boolean ::= ("true" | "false") space | ||||
|             root ::= "[" space ( boolean ( "," space boolean )? )? "]" space | ||||
|             root ::= "[" space (boolean ("," space boolean)?)? "]" space | ||||
|             space ::= " "? | ||||
|         )""" | ||||
|     }); | ||||
|  | @ -320,10 +379,12 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|             "maxItems": 5 | ||||
|         })""", | ||||
|         R"""( | ||||
|             integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) space | ||||
|             decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             integer ::= ("-"? integral-part) space | ||||
|             integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             item ::= number | integer | ||||
|             number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space | ||||
|             root ::= "[" space item ( "," space item )( "," space item )( "," space item )?( "," space item )? "]" space | ||||
|             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space | ||||
|             root ::= "[" space item "," space item "," space item ("," space item ("," space item)?)? "]" space | ||||
|             space ::= " "? | ||||
|         )""" | ||||
|     }); | ||||
|  | @ -372,11 +433,11 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|         "regexp", | ||||
|         R"""({ | ||||
|             "type": "string", | ||||
|             "pattern": "^(\\([0-9]{1,3}\\))?[0-9]{3}-[0-9]{4} and...$" | ||||
|             "pattern": "^(\\([0-9]{1,3}\\))?[0-9]{3}-[0-9]{4} a{3,5}nd...$" | ||||
|         })""", | ||||
|         R"""( | ||||
|             dot ::= [\U00000000-\x09\x0B\x0C\x0E-\U0010FFFF] | ||||
|             root ::= "\"" ("(" root-1 root-1? root-1? ")")? root-1 root-1 root-1 "-" root-1 root-1 root-1 root-1 " and" dot dot dot "\"" space | ||||
|             dot ::= [^\x0A\x0D] | ||||
|             root ::= "\"" ("(" root-1 (root-1 (root-1)?)? ")")? root-1 root-1 root-1 "-" root-1 root-1 root-1 root-1 " " "aaa" ("a" ("a")?)? "nd" dot dot dot "\"" space | ||||
|             root-1 ::= [0-9] | ||||
|             space ::= " "? | ||||
|         )""" | ||||
|  | @ -404,12 +465,10 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|             a-kv ::= "\"a\"" space ":" space string | ||||
|             b-kv ::= "\"b\"" space ":" space string | ||||
|             c-kv ::= "\"c\"" space ":" space string | ||||
|             char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|             root ::= "{" space b-kv "," space c-kv "," space a-kv "}" space | ||||
|             space ::= " "? | ||||
|             string ::=  "\"" ( | ||||
|                     [^"\\] | | ||||
|                     "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|                     )* "\"" space | ||||
|             string ::= "\"" char* "\"" space | ||||
|         )""" | ||||
|     }); | ||||
| 
 | ||||
|  | @ -426,12 +485,10 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|         })""", | ||||
|         R"""( | ||||
|             a-kv ::= "\"a\"" space ":" space string | ||||
|             char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|             root ::= "{" space  (a-kv )? "}" space | ||||
|             space ::= " "? | ||||
|             string ::=  "\"" ( | ||||
|                     [^"\\] | | ||||
|                     "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|                     )* "\"" space | ||||
|             string ::= "\"" char* "\"" space | ||||
|         )""" | ||||
|     }); | ||||
| 
 | ||||
|  | @ -452,12 +509,10 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|             b-kv ::= "\"b\"" space ":" space string | ||||
|             b-rest ::= ( "," space c-kv )? | ||||
|             c-kv ::= "\"c\"" space ":" space string | ||||
|             char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|             root ::= "{" space  (a-kv a-rest | b-kv b-rest | c-kv )? "}" space | ||||
|             space ::= " "? | ||||
|             string ::=  "\"" ( | ||||
|                     [^"\\] | | ||||
|                     "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|                     )* "\"" space | ||||
|             string ::= "\"" char* "\"" space | ||||
|         )""" | ||||
|     }); | ||||
| 
 | ||||
|  | @ -478,14 +533,12 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|             a-kv ::= "\"a\"" space ":" space string | ||||
|             b-kv ::= "\"b\"" space ":" space string | ||||
|             c-kv ::= "\"c\"" space ":" space string | ||||
|             char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|             d-kv ::= "\"d\"" space ":" space string | ||||
|             d-rest ::= ( "," space c-kv )? | ||||
|             root ::= "{" space b-kv "," space a-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space | ||||
|             space ::= " "? | ||||
|             string ::=  "\"" ( | ||||
|                     [^"\\] | | ||||
|                     "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|                 )* "\"" space | ||||
|             string ::= "\"" char* "\"" space | ||||
|         )""" | ||||
|     }); | ||||
| 
 | ||||
|  | @ -499,14 +552,14 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|         R"""( | ||||
|             additional-kv ::= string ":" space additional-value | ||||
|             additional-kvs ::= additional-kv ( "," space additional-kv )* | ||||
|             additional-value ::= "[" space ( number ( "," space number )* )? "]" space | ||||
|             number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space | ||||
|             additional-value ::= "[" space (number ("," space number)*)? "]" space | ||||
|             char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|             decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space | ||||
|             root ::= "{" space  (additional-kvs )? "}" space | ||||
|             space ::= " "? | ||||
|             string ::=  "\"" ( | ||||
|                     [^"\\] | | ||||
|                     "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|                     )* "\"" space | ||||
|             string ::= "\"" char* "\"" space | ||||
|         )""" | ||||
|     }); | ||||
| 
 | ||||
|  | @ -520,16 +573,16 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|         R"""( | ||||
|             array ::= "[" space ( value ("," space value)* )? "]" space | ||||
|             boolean ::= ("true" | "false") space | ||||
|             char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|             decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             null ::= "null" space | ||||
|             number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space | ||||
|             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space | ||||
|             object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space | ||||
|             root ::= object | ||||
|             space ::= " "? | ||||
|             string ::=  "\"" ( | ||||
|                     [^"\\] | | ||||
|                     "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|                     )* "\"" space | ||||
|             value ::= object | array | string | number | boolean | ||||
|             string ::= "\"" char* "\"" space | ||||
|             value ::= object | array | string | number | boolean | null | ||||
|         )""" | ||||
|     }); | ||||
| 
 | ||||
|  | @ -542,16 +595,16 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|         R"""( | ||||
|             array ::= "[" space ( value ("," space value)* )? "]" space | ||||
|             boolean ::= ("true" | "false") space | ||||
|             char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|             decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             null ::= "null" space | ||||
|             number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space | ||||
|             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space | ||||
|             object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space | ||||
|             root ::= object | ||||
|             space ::= " "? | ||||
|             string ::=  "\"" ( | ||||
|                     [^"\\] | | ||||
|                     "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|                     )* "\"" space | ||||
|             value ::= object | array | string | number | boolean | ||||
|             string ::= "\"" char* "\"" space | ||||
|             value ::= object | array | string | number | boolean | null | ||||
|         )""" | ||||
|     }); | ||||
| 
 | ||||
|  | @ -583,13 +636,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|             a-kv ::= "\"a\"" space ":" space number | ||||
|             additional-kv ::= string ":" space string | ||||
|             additional-kvs ::= additional-kv ( "," space additional-kv )* | ||||
|             number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space | ||||
|             char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|             decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space | ||||
|             root ::= "{" space a-kv ( "," space ( additional-kvs ) )? "}" space | ||||
|             space ::= " "? | ||||
|             string ::=  "\"" ( | ||||
|                     [^"\\] | | ||||
|                     "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|                     )* "\"" space | ||||
|             string ::= "\"" char* "\"" space | ||||
|         )""" | ||||
|     }); | ||||
| 
 | ||||
|  | @ -608,13 +661,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|             a-rest ::= additional-kvs | ||||
|             additional-kv ::= string ":" space number | ||||
|             additional-kvs ::= additional-kv ( "," space additional-kv )* | ||||
|             number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space | ||||
|             char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|             decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space | ||||
|             root ::= "{" space  (a-kv a-rest | additional-kvs )? "}" space | ||||
|             space ::= " "? | ||||
|             string ::=  "\"" ( | ||||
|                     [^"\\] | | ||||
|                     "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|                     )* "\"" space | ||||
|             string ::= "\"" char* "\"" space | ||||
|         )""" | ||||
|     }); | ||||
| 
 | ||||
|  | @ -636,13 +689,13 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|             additional-kvs ::= additional-kv ( "," space additional-kv )* | ||||
|             b-kv ::= "\"b\"" space ":" space number | ||||
|             b-rest ::= additional-kvs | ||||
|             number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space | ||||
|             char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|             decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space | ||||
|             root ::= "{" space a-kv ( "," space ( b-kv b-rest | additional-kvs ) )? "}" space | ||||
|             space ::= " "? | ||||
|             string ::=  "\"" ( | ||||
|                     [^"\\] | | ||||
|                     "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|                     )* "\"" space | ||||
|             string ::= "\"" char* "\"" space | ||||
|         )""" | ||||
|     }); | ||||
| 
 | ||||
|  | @ -650,9 +703,9 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|         SUCCESS, | ||||
|         "top-level $ref", | ||||
|         R"""({ | ||||
|             "$ref": "#/definitions/MyType", | ||||
|             "$ref": "#/definitions/foo", | ||||
|             "definitions": { | ||||
|                 "MyType": { | ||||
|                 "foo": { | ||||
|                     "type": "object", | ||||
|                     "properties": { | ||||
|                         "a": { | ||||
|  | @ -667,14 +720,12 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|             } | ||||
|         })""", | ||||
|         R"""( | ||||
|             MyType ::= "{" space MyType-a-kv "}" space | ||||
|             MyType-a-kv ::= "\"a\"" space ":" space string | ||||
|             root ::= MyType | ||||
|             char ::= [^"\\] | "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|             foo ::= "{" space foo-a-kv "}" space | ||||
|             foo-a-kv ::= "\"a\"" space ":" space string | ||||
|             root ::= foo | ||||
|             space ::= " "? | ||||
|             string ::=  "\"" ( | ||||
|                     [^"\\] | | ||||
|                     "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) | ||||
|                     )* "\"" space | ||||
|             string ::= "\"" char* "\"" space | ||||
|         )""" | ||||
|     }); | ||||
| 
 | ||||
|  | @ -701,9 +752,11 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|             alternative-1 ::= bar | ||||
|             bar ::= "{" space  (bar-b-kv )? "}" space | ||||
|             bar-b-kv ::= "\"b\"" space ":" space number | ||||
|             decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             foo ::= "{" space  (foo-a-kv )? "}" space | ||||
|             foo-a-kv ::= "\"a\"" space ":" space number | ||||
|             number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space | ||||
|             integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space | ||||
|             root ::= alternative-0 | alternative-1 | ||||
|             space ::= " "? | ||||
|         )""" | ||||
|  | @ -745,7 +798,9 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|             c-kv ::= "\"c\"" space ":" space number | ||||
|             d-kv ::= "\"d\"" space ":" space number | ||||
|             d-rest ::= ( "," space c-kv )? | ||||
|             number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space | ||||
|             decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space | ||||
|             root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space | ||||
|             space ::= " "? | ||||
|         )""" | ||||
|  | @ -786,7 +841,9 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||
|             "definitions": {} | ||||
|         })""", | ||||
|         R"""( | ||||
|             number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space | ||||
|             decimal-part ::= [0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             integral-part ::= [0-9] | [1-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9] ([0-9])?)?)?)?)?)?)?)?)?)?)?)?)?)?)? | ||||
|             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space | ||||
|             number- ::= "{" space number-number-kv "}" space | ||||
|             number-kv ::= "\"number\"" space ":" space number- | ||||
|             number-number ::= "{" space number-number-root-kv "}" space | ||||
|  | @ -816,7 +873,7 @@ int main() { | |||
|         test_all("Python", [](const TestCase & tc) { | ||||
|             write("test-json-schema-input.tmp", tc.schema); | ||||
|             tc.verify_status(std::system( | ||||
|                 "python ./examples/json-schema-to-grammar.py test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE); | ||||
|                 "python ./examples/json_schema_to_grammar.py test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE); | ||||
|             tc.verify(read("test-grammar-output.tmp")); | ||||
|         }); | ||||
|     } else { | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue