grammars: x{min,max} repetition operator (#6640)
* grammars: x{min,max} repetition operator + tweak +/*/? to avoid duplication of original over alternates * grammars: handle `x{n}` and fix `x{n,n}` * grammars: document new repetition operators * grammars: uniform use of int for min & max * grammars: refactor parser test * grammar: parsing tests w/ natural pretty print of updated expectations * grammars: much prettier print of expectations (+ TEST_GRAMMAR_PARSER_PRINT_ALL=1 to force all) * grammars: improve test pretty print again * grammars: pretty print rules and chars * grammars: fix copy rule skipping * grammars: disallow `a{,}` (not allowed in regexps) * Update common/grammar-parser.cpp Co-authored-by: Clint Herron <hanclinto@gmail.com> * grammars: fix copy rule skipping (again) & display of expectations * grammars: more test cases * grammars: update reps parsing to bring ? / * / + closer to before * json: use new GBNF repetitions{m,n} syntax * grammars: update performance gotchas w/ repetition advice * Update examples/json_schema_to_grammar.py Co-authored-by: Clint Herron <hanclinto@gmail.com> * Update examples/server/public/json-schema-to-grammar.mjs Co-authored-by: Clint Herron <hanclinto@gmail.com> * grammars: comment on rule repetitions * grammars: ensure unambiguous number alternatives * grammar: nit typo switched error msgs * grammar: nit numbering in comment * json: update numeric rule to be unambiguous * Apply suggestions from code review Co-authored-by: Clint Herron <hanclinto@gmail.com> * Update examples/server/public/json-schema-to-grammar.mjs Co-authored-by: Clint Herron <hanclinto@gmail.com> * json: fix integral-part * grammar: add repetition tests --------- Co-authored-by: Clint Herron <hanclinto@gmail.com>
This commit is contained in:
parent
f5d7b268ec
commit
55b2d0849d
9 changed files with 736 additions and 418 deletions
|
@ -2,57 +2,26 @@
|
|||
const SPACE_RULE = '" "?';
|
||||
|
||||
function _buildRepetition(itemRule, minItems, maxItems, opts={}) {
|
||||
if (minItems === 0 && maxItems === 1) {
|
||||
return `${itemRule}?`;
|
||||
}
|
||||
|
||||
|
||||
const separatorRule = opts.separatorRule ?? '';
|
||||
const itemRuleIsLiteral = opts.itemRuleIsLiteral ?? false
|
||||
|
||||
if (separatorRule === '') {
|
||||
if (minItems === 0 && maxItems === 1) {
|
||||
return `${itemRule}?`;
|
||||
} else if (minItems === 1 && maxItems === undefined) {
|
||||
if (minItems === 1 && maxItems === undefined) {
|
||||
return `${itemRule}+`;
|
||||
}
|
||||
}
|
||||
|
||||
let result = '';
|
||||
if (minItems > 0) {
|
||||
if (itemRuleIsLiteral && separatorRule === '') {
|
||||
result = `"${itemRule.slice(1, -1).repeat(minItems)}"`;
|
||||
} else if (minItems === 0 && maxItems === undefined) {
|
||||
return `${itemRule}*`;
|
||||
} else {
|
||||
result = Array.from({ length: minItems }, () => itemRule)
|
||||
.join(separatorRule !== '' ? ` ${separatorRule} ` : ' ');
|
||||
return `${itemRule}{${minItems},${maxItems !== undefined ? maxItems : ''}}`;
|
||||
}
|
||||
}
|
||||
|
||||
const optRepetitions = (upToN, prefixWithSep=false) => {
|
||||
const content = separatorRule !== '' && prefixWithSep ? `${separatorRule} ${itemRule}` : itemRule;
|
||||
if (upToN === 0) {
|
||||
return '';
|
||||
} else if (upToN === 1) {
|
||||
return `(${content})?`;
|
||||
} else if (separatorRule !== '' && !prefixWithSep) {
|
||||
return `(${content} ${optRepetitions(upToN - 1, true)})?`;
|
||||
} else {
|
||||
return Array.from({ length: upToN }, () => `(${content}`).join(' ').trim() + Array.from({ length: upToN }, () => ')?').join('');
|
||||
}
|
||||
};
|
||||
|
||||
if (minItems > 0 && maxItems !== minItems) {
|
||||
result += ' ';
|
||||
}
|
||||
|
||||
if (maxItems !== undefined) {
|
||||
result += optRepetitions(maxItems - minItems, minItems > 0);
|
||||
} else {
|
||||
const itemOperator = `(${separatorRule !== '' ? separatorRule + ' ' : ''}${itemRule})`;
|
||||
|
||||
if (minItems === 0 && separatorRule !== '') {
|
||||
result = `(${itemRule} ${itemOperator}*)?`;
|
||||
} else {
|
||||
result += `${itemOperator}*`;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
const result = itemRule + ' ' + _buildRepetition(`(${separatorRule} ${itemRule})`, minItems > 0 ? minItems - 1 : 0, maxItems !== undefined ? maxItems - 1 : undefined);
|
||||
return minItems === 0 ? `(${result})?` : result;
|
||||
}
|
||||
|
||||
class BuiltinRule {
|
||||
|
@ -62,27 +31,25 @@ class BuiltinRule {
|
|||
}
|
||||
}
|
||||
|
||||
const UP_TO_15_DIGITS = _buildRepetition('[0-9]', 0, 15);
|
||||
|
||||
const PRIMITIVE_RULES = {
|
||||
boolean : new BuiltinRule('("true" | "false") space', []),
|
||||
'decimal-part' : new BuiltinRule('[0-9] ' + UP_TO_15_DIGITS, []),
|
||||
'integral-part': new BuiltinRule('[0-9] | [1-9] ' + UP_TO_15_DIGITS, []),
|
||||
'decimal-part' : new BuiltinRule('[0-9]{1,16}', []),
|
||||
'integral-part': new BuiltinRule('[0] | [1-9] [0-9]{0,15}', []),
|
||||
number : new BuiltinRule('("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space', ['integral-part', 'decimal-part']),
|
||||
integer : new BuiltinRule('("-"? integral-part) space', ['integral-part']),
|
||||
value : new BuiltinRule('object | array | string | number | boolean | null', ['object', 'array', 'string', 'number', 'boolean', 'null']),
|
||||
object : new BuiltinRule('"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', ['string', 'value']),
|
||||
array : new BuiltinRule('"[" space ( value ("," space value)* )? "]" space', ['value']),
|
||||
uuid : new BuiltinRule('"\\"" ' + [8, 4, 4, 4, 12].map(n => [...new Array(n)].map(_ => '[0-9a-fA-F]').join('')).join(' "-" ') + ' "\\"" space', []),
|
||||
char : new BuiltinRule(`[^"\\\\] | "\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])`, []),
|
||||
uuid : new BuiltinRule('"\\"" [0-9a-fA-F]{8} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{12} "\\"" space', []),
|
||||
char : new BuiltinRule(`[^"\\\\] | "\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F]{4})`, []),
|
||||
string : new BuiltinRule(`"\\"" char* "\\"" space`, ['char']),
|
||||
null : new BuiltinRule('"null" space', []),
|
||||
};
|
||||
|
||||
// TODO: support "uri", "email" string formats
|
||||
const STRING_FORMAT_RULES = {
|
||||
'date' : new BuiltinRule('[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', []),
|
||||
'time' : new BuiltinRule('([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', []),
|
||||
'date' : new BuiltinRule('[0-9]{4} "-" ( "0" [1-9] | "1" [0-2] ) "-" ( \"0\" [1-9] | [1-2] [0-9] | "3" [0-1] )', []),
|
||||
'time' : new BuiltinRule('([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9]{3} )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )', []),
|
||||
'date-time' : new BuiltinRule('date "T" time', ['date', 'time']),
|
||||
'date-string' : new BuiltinRule('"\\"" date "\\"" space', ['date']),
|
||||
'time-string' : new BuiltinRule('"\\"" time "\\"" space', ['time']),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue