grammars: x{min,max} repetition operator (#6640)
* grammars: x{min,max} repetition operator + tweak +/*/? to avoid duplication of original over alternates * grammars: handle `x{n}` and fix `x{n,n}` * grammars: document new repetition operators * grammars: uniform use of int for min & max * grammars: refactor parser test * grammar: parsing tests w/ natural pretty print of updated expectations * grammars: much prettier print of expectations (+ TEST_GRAMMAR_PARSER_PRINT_ALL=1 to force all) * grammars: improve test pretty print again * grammars: pretty print rules and chars * grammars: fix copy rule skipping * grammars: disallow `a{,}` (not allowed in regexps) * Update common/grammar-parser.cpp Co-authored-by: Clint Herron <hanclinto@gmail.com> * grammars: fix copy rule skipping (again) & display of expectations * grammars: more test cases * grammars: update reps parsing to bring ? / * / + closer to before * json: use new GBNF repetitions{m,n} syntax * grammars: update performance gotchas w/ repetition advice * Update examples/json_schema_to_grammar.py Co-authored-by: Clint Herron <hanclinto@gmail.com> * Update examples/server/public/json-schema-to-grammar.mjs Co-authored-by: Clint Herron <hanclinto@gmail.com> * grammars: comment on rule repetitions * grammars: ensure unambiguous number alternatives * grammar: nit typo switched error msgs * grammar: nit numbering in comment * json: update numeric rule to be unambiguous * Apply suggestions from code review Co-authored-by: Clint Herron <hanclinto@gmail.com> * Update examples/server/public/json-schema-to-grammar.mjs Co-authored-by: Clint Herron <hanclinto@gmail.com> * json: fix integral-part * grammar: add repetition tests --------- Co-authored-by: Clint Herron <hanclinto@gmail.com>
This commit is contained in:
parent
f5d7b268ec
commit
55b2d0849d
9 changed files with 736 additions and 418 deletions
|
@ -16,58 +16,27 @@ static std::string join(Iterator begin, Iterator end, const std::string & separa
|
|||
|
||||
static std::string repeat(const std::string & str, size_t n);
|
||||
|
||||
static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "", bool item_rule_is_literal = false) {
|
||||
static std::string build_repetition(const std::string & item_rule, int min_items, int max_items, const std::string & separator_rule = "") {
|
||||
auto has_max = max_items != std::numeric_limits<int>::max();
|
||||
|
||||
if (min_items == 0 && max_items == 1) {
|
||||
return item_rule + "?";
|
||||
}
|
||||
|
||||
if (separator_rule.empty()) {
|
||||
if (min_items == 0 && max_items == 1) {
|
||||
return item_rule + "?";
|
||||
} else if (min_items == 1 && max_items == std::numeric_limits<int>::max()) {
|
||||
if (min_items == 1 && !has_max) {
|
||||
return item_rule + "+";
|
||||
}
|
||||
}
|
||||
|
||||
std::string result;
|
||||
if (min_items > 0) {
|
||||
if (item_rule_is_literal && separator_rule.empty()) {
|
||||
result = "\"" + repeat(std::string(item_rule.begin() + 1, item_rule.end() - 1), min_items) + "\"";
|
||||
} else if (min_items == 0 && !has_max) {
|
||||
return item_rule + "*";
|
||||
} else {
|
||||
std::vector<std::string> items(min_items, item_rule);
|
||||
result = join(items.begin(), items.end(), separator_rule.empty() ? " " : " " + separator_rule + " ");
|
||||
return item_rule + "{" + std::to_string(min_items) + "," + (has_max ? std::to_string(max_items) : "") + "}";
|
||||
}
|
||||
}
|
||||
|
||||
std::function<std::string(int, bool)> opt_repetitions = [&](int up_to_n, bool prefix_with_sep) -> std::string {
|
||||
auto content = prefix_with_sep && !separator_rule.empty() ? separator_rule + " " + item_rule : item_rule;
|
||||
|
||||
if (up_to_n == 0) {
|
||||
return "";
|
||||
} else if (up_to_n == 1) {
|
||||
return "(" + content + ")?";
|
||||
} else if (!separator_rule.empty() && !prefix_with_sep) {
|
||||
return "(" + content + " " + opt_repetitions(up_to_n - 1, true) + ")?";
|
||||
} else {
|
||||
std::string res = repeat("(" + content + " ", up_to_n);
|
||||
// strip trailing space
|
||||
res = res.substr(0, res.length() - 1);
|
||||
res += repeat(")?", up_to_n);
|
||||
return res;
|
||||
}
|
||||
};
|
||||
|
||||
if (min_items > 0 && max_items != min_items) {
|
||||
result += " ";
|
||||
auto result = item_rule + " " + build_repetition("(" + separator_rule + " " + item_rule + ")", min_items == 0 ? 0 : min_items - 1, has_max ? max_items - 1 : max_items);
|
||||
if (min_items == 0) {
|
||||
result = "(" + result + ")?";
|
||||
}
|
||||
|
||||
if (max_items != std::numeric_limits<int>::max()) {
|
||||
result += opt_repetitions(max_items - min_items, min_items > 0);
|
||||
} else {
|
||||
std::string item_operator = "(" + (separator_rule.empty() ? "" : separator_rule + " ") + item_rule + ")";
|
||||
if (min_items == 0 && !separator_rule.empty()) {
|
||||
result = "(" + item_rule + " " + item_operator + "*)?";
|
||||
} else {
|
||||
result += item_operator + "*";
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -78,30 +47,24 @@ struct BuiltinRule {
|
|||
std::vector<std::string> deps;
|
||||
};
|
||||
|
||||
const std::string _up_to_15_digits = build_repetition("[0-9]", 0, 15);
|
||||
|
||||
std::unordered_map<std::string, BuiltinRule> PRIMITIVE_RULES = {
|
||||
{"boolean", {"(\"true\" | \"false\") space", {}}},
|
||||
{"decimal-part", {"[0-9] " + _up_to_15_digits, {}}},
|
||||
{"integral-part", {"[0-9] | [1-9] " + _up_to_15_digits, {}}},
|
||||
{"decimal-part", {"[0-9]{1,16}", {}}},
|
||||
{"integral-part", {"[0] | [1-9] [0-9]{0,15}", {}}},
|
||||
{"number", {"(\"-\"? integral-part) (\".\" decimal-part)? ([eE] [-+]? integral-part)? space", {"integral-part", "decimal-part"}}},
|
||||
{"integer", {"(\"-\"? integral-part) space", {"integral-part"}}},
|
||||
{"value", {"object | array | string | number | boolean | null", {"object", "array", "string", "number", "boolean", "null"}}},
|
||||
{"object", {"\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space", {"string", "value"}}},
|
||||
{"array", {"\"[\" space ( value (\",\" space value)* )? \"]\" space", {"value"}}},
|
||||
{"uuid", {"\"\\\"\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] \"\\\"\" space", {}}},
|
||||
{"char", {"[^\"\\\\] | \"\\\\\" ([\"\\\\/bfnrt] | \"u\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])", {}}},
|
||||
{"uuid", {"\"\\\"\" [0-9a-fA-F]{8} \"-\" [0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{4} \"-\" [0-9a-fA-F]{12} \"\\\"\" space", {}}},
|
||||
{"char", {"[^\"\\\\] | \"\\\\\" ([\"\\\\/bfnrt] | \"u\" [0-9a-fA-F]{4})", {}}},
|
||||
{"string", {"\"\\\"\" char* \"\\\"\" space", {"char"}}},
|
||||
{"null", {"\"null\" space", {}}},
|
||||
};
|
||||
|
||||
std::unordered_map<std::string, BuiltinRule> STRING_FORMAT_RULES = {
|
||||
{"date", {"[0-9] [0-9] [0-9] [0-9] \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}},
|
||||
{"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9] [0-9] [0-9] )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}},
|
||||
{"date", {"[0-9]{4} \"-\" ( \"0\" [1-9] | \"1\" [0-2] ) \"-\" ( \"0\" [1-9] | [1-2] [0-9] | \"3\" [0-1] )", {}}},
|
||||
{"time", {"([01] [0-9] | \"2\" [0-3]) \":\" [0-5] [0-9] \":\" [0-5] [0-9] ( \".\" [0-9]{3} )? ( \"Z\" | ( \"+\" | \"-\" ) ( [01] [0-9] | \"2\" [0-3] ) \":\" [0-5] [0-9] )", {}}},
|
||||
{"date-time", {"date \"T\" time", {"date", "time"}}},
|
||||
{"date-string", {"\"\\\"\" date \"\\\"\" space", {"date"}}},
|
||||
{"time-string", {"\"\\\"\" time \"\\\"\" space", {"time"}}},
|
||||
|
@ -385,8 +348,7 @@ private:
|
|||
sub_is_literal ? "\"" + sub + "\"" : sub,
|
||||
min_times,
|
||||
max_times,
|
||||
"",
|
||||
sub_is_literal
|
||||
""
|
||||
);
|
||||
seq.back().second = false;
|
||||
} else {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue