json: optimize repetitions for minItems/maxItems and regexps: a{,3}
goes from "a"? "a"? "a"?
(explosive combos) to (a (a (a)?)?)?
This commit is contained in:
parent
159b883bd4
commit
a59e9431fc
5 changed files with 1730 additions and 1659 deletions
|
@ -11,6 +11,23 @@
|
|||
|
||||
using json = nlohmann::ordered_json;
|
||||
|
||||
static std::string build_repetition(const std::string & content, int upToN) {
|
||||
std::ostringstream out;
|
||||
std::function<void(int)> aux = [&](int n) {
|
||||
if (n == 0) {
|
||||
return;
|
||||
}
|
||||
out << "(" << content;
|
||||
if (n > 1) {
|
||||
out << " ";
|
||||
aux(n - 1);
|
||||
}
|
||||
out << ")?";
|
||||
};
|
||||
aux(upToN);
|
||||
return out.str();
|
||||
}
|
||||
|
||||
const std::string SPACE_RULE = "\" \"?";
|
||||
|
||||
struct BuiltinRule {
|
||||
|
@ -343,12 +360,7 @@ private:
|
|||
if (max_times == std::numeric_limits<int>::max()) {
|
||||
result += sub + "*";
|
||||
} else {
|
||||
for (int j = min_times; j < max_times; j++) {
|
||||
if (j > min_times) {
|
||||
result += " ";
|
||||
}
|
||||
result += sub + "?";
|
||||
}
|
||||
result += build_repetition(sub, max_times - min_times);
|
||||
}
|
||||
seq.back().first = result;
|
||||
seq.back().second = false;
|
||||
|
@ -680,7 +692,7 @@ public:
|
|||
min_items--;
|
||||
}
|
||||
if (max_items >= 0 && max_items > min_items) {
|
||||
successive_items += repeat(list_item_operator + "?", max_items - min_items - 1);
|
||||
successive_items += build_repetition(list_item_operator, max_items - min_items - 1);
|
||||
} else {
|
||||
successive_items += list_item_operator + "*";
|
||||
}
|
||||
|
|
|
@ -6,6 +6,12 @@ import re
|
|||
import sys
|
||||
from typing import Any, Dict, List, Set, Tuple, Union
|
||||
|
||||
def _build_repetition(content, up_to_n):
|
||||
# return ' '.join([content] * n)
|
||||
if up_to_n == 0:
|
||||
return ''
|
||||
return f'({content}{" " + _build_repetition(content, up_to_n-1) if up_to_n > 1 else ""})?'
|
||||
|
||||
class BuiltinRule:
|
||||
def __init__(self, content: str, deps: list[str] = None):
|
||||
self.content = content
|
||||
|
@ -277,10 +283,13 @@ class SchemaConverter:
|
|||
(sub, sub_is_literal) = seq[-1]
|
||||
|
||||
if min_times == 0 and max_times is None:
|
||||
sub = f'"{sub}"' if sub_is_literal else sub
|
||||
seq[-1] = (f'{sub}*', False)
|
||||
elif min_times == 0 and max_times == 1:
|
||||
sub = f'"{sub}"' if sub_is_literal else sub
|
||||
seq[-1] = (f'{sub}?', False)
|
||||
elif min_times == 1 and max_times is None:
|
||||
sub = f'"{sub}"' if sub_is_literal else sub
|
||||
seq[-1] = (f'{sub}+', False)
|
||||
else:
|
||||
if not sub_is_literal:
|
||||
|
@ -290,12 +299,17 @@ class SchemaConverter:
|
|||
sub_rule_ids[sub] = id
|
||||
sub = id
|
||||
|
||||
seq[-1] = (
|
||||
' '.join(
|
||||
([f'"{sub[1:-1] * min_times}"'] if sub_is_literal else [sub] * min_times) +
|
||||
([f'{sub}?'] * (max_times - min_times) if max_times is not None else [f'{sub}*'])),
|
||||
False
|
||||
)
|
||||
if sub_is_literal and min_times > 0:
|
||||
result = '"' + (sub[1:-1] * min_times) + '"'
|
||||
else:
|
||||
result = ' '.join([sub] * min_times)
|
||||
|
||||
if min_times < max_times:
|
||||
if min_times > 0:
|
||||
result += ' '
|
||||
result += _build_repetition(sub, max_times - min_times)
|
||||
|
||||
seq[-1] = (result, False)
|
||||
else:
|
||||
literal = ''
|
||||
while i < length:
|
||||
|
@ -411,7 +425,7 @@ class SchemaConverter:
|
|||
successive_items = list_item_operator * (min_items - 1)
|
||||
min_items -= 1
|
||||
if max_items is not None and max_items > min_items:
|
||||
successive_items += (list_item_operator + "?") * (max_items - min_items - 1)
|
||||
successive_items += _build_repetition(list_item_operator, max_items - min_items - 1)
|
||||
else:
|
||||
successive_items += list_item_operator + "*"
|
||||
if min_items == 0:
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,6 +1,13 @@
|
|||
// WARNING: This file was ported from json_schema_to_grammar.py, please fix bugs / add features there first.
|
||||
const SPACE_RULE = '" "?';
|
||||
|
||||
function _buildRepetition(content, upToN) {
|
||||
if (upToN === 0) {
|
||||
return '';
|
||||
}
|
||||
return `(${content}${upToN > 1 ? ` ${_buildRepetition(content, upToN - 1)}` : ''})?`;
|
||||
}
|
||||
|
||||
class BuiltinRule {
|
||||
constructor(content, deps) {
|
||||
this.content = content;
|
||||
|
@ -281,9 +288,20 @@ export class SchemaConverter {
|
|||
sub = id;
|
||||
}
|
||||
|
||||
const repeatedSub = Array.from({ length: minTimes }, () => subIsLiteral ? `"${sub.slice(1, -1).repeat(minTimes)}"` : sub);
|
||||
const optionalSub = maxTimes !== undefined ? Array.from({ length: maxTimes - minTimes }, () => `${sub}?`) : [`${sub}*`];
|
||||
seq[seq.length - 1] = [repeatedSub.concat(optionalSub).join(' '), false];
|
||||
let result;
|
||||
if (subIsLiteral && minTimes > 0) {
|
||||
result = `"${sub.slice(1, -1).repeat(minTimes)}"`;
|
||||
} else {
|
||||
result = Array.from({ length: minTimes }, () => sub).join(' ');
|
||||
}
|
||||
|
||||
if (minTimes < maxTimes) {
|
||||
if (minTimes > 0) {
|
||||
result += ' ';
|
||||
}
|
||||
result += _buildRepetition(sub, maxTimes - minTimes);
|
||||
}
|
||||
seq[seq.length - 1] = [result, false];
|
||||
}
|
||||
} else {
|
||||
let literal = '';
|
||||
|
@ -409,7 +427,7 @@ export class SchemaConverter {
|
|||
minItems--;
|
||||
}
|
||||
if (maxItems !== undefined && maxItems > minItems) {
|
||||
successiveItems += `${listItemOperator}?`.repeat(maxItems - minItems - 1);
|
||||
successiveItems += _buildRepetition(listItemOperator, maxItems - minItems - 1);
|
||||
} else {
|
||||
successiveItems += `${listItemOperator}*`;
|
||||
}
|
||||
|
|
|
@ -307,7 +307,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||
})""",
|
||||
R"""(
|
||||
boolean ::= ("true" | "false") space
|
||||
root ::= "[" space ( boolean ( "," space boolean )? )? "]" space
|
||||
root ::= "[" space ( boolean (( "," space boolean ))? )? "]" space
|
||||
space ::= " "?
|
||||
)"""
|
||||
});
|
||||
|
@ -326,7 +326,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||
integer ::= ("-"? ([0-9] | [1-9] [0-9]*)) space
|
||||
item ::= number | integer
|
||||
number ::= ("-"? ([0-9] | [1-9] [0-9]*)) ("." [0-9]+)? ([eE] [-+]? [0-9]+)? space
|
||||
root ::= "[" space item ( "," space item )( "," space item )( "," space item )?( "," space item )? "]" space
|
||||
root ::= "[" space item ( "," space item )( "," space item )(( "," space item ) (( "," space item ))?)? "]" space
|
||||
space ::= " "?
|
||||
)"""
|
||||
});
|
||||
|
@ -379,7 +379,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||
})""",
|
||||
R"""(
|
||||
dot ::= [\U00000000-\x09\x0B\x0C\x0E-\U0010FFFF]
|
||||
root ::= "\"" ("(" root-1 root-1? root-1? ")")? root-1 root-1 root-1 "-" root-1 root-1 root-1 root-1 " and" dot dot dot "\"" space
|
||||
root ::= "\"" ("(" root-1 (root-1 (root-1)?)? ")")? root-1 root-1 root-1 "-" root-1 root-1 root-1 root-1 " and" dot dot dot "\"" space
|
||||
root-1 ::= [0-9]
|
||||
space ::= " "?
|
||||
)"""
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue