json: fix string patterns (was missing quotes)
This commit is contained in:
parent
dd922a4da3
commit
24f0b941cf
7 changed files with 1579 additions and 1500 deletions
|
@ -44,16 +44,17 @@ GRAMMAR_RANGE_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"\]\-\\]')
|
||||||
GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']': '\\]'}
|
GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']': '\\]'}
|
||||||
|
|
||||||
NON_LITERAL_SET = set('|.()[]{}*+?')
|
NON_LITERAL_SET = set('|.()[]{}*+?')
|
||||||
ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('{*+?')
|
ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('[]()|{}*+?')
|
||||||
|
|
||||||
DATE_PATTERN = '[0-9]{4}-(0[1-9]|1[0-2])-([0-2][0-9]|3[0-1])'
|
DATE_PATTERN = '[0-9]{4}-(0[1-9]|1[0-2])-([0-2][0-9]|3[0-1])'
|
||||||
TIME_PATTERN = '([01][0-9]|2[0-3])(:[0-5][0-9]){2}(\\.[0-9]{1,3})?(Z|[+-](([01][0-9]|2[0-3]):[0-5][0-9]))' # Cap millisecond precision w/ 3 digits
|
TIME_PATTERN = '([01][0-9]|2[0-3])(:[0-5][0-9]){2}(\\.[0-9]{1,3})?(Z|[+-](([01][0-9]|2[0-3]):[0-5][0-9]))' # Cap millisecond precision w/ 3 digits
|
||||||
|
|
||||||
class SchemaConverter:
|
class SchemaConverter:
|
||||||
def __init__(self, *, prop_order, allow_fetch, dotall):
|
def __init__(self, *, prop_order, allow_fetch, dotall, raw_pattern):
|
||||||
self._prop_order = prop_order
|
self._prop_order = prop_order
|
||||||
self._allow_fetch = allow_fetch
|
self._allow_fetch = allow_fetch
|
||||||
self._dotall = dotall
|
self._dotall = dotall
|
||||||
|
self._raw_pattern = raw_pattern
|
||||||
self._rules = {'space': SPACE_RULE}
|
self._rules = {'space': SPACE_RULE}
|
||||||
self._refs = {}
|
self._refs = {}
|
||||||
self._refs_being_resolved = set()
|
self._refs_being_resolved = set()
|
||||||
|
@ -152,6 +153,10 @@ class SchemaConverter:
|
||||||
i = 0
|
i = 0
|
||||||
length = len(pattern)
|
length = len(pattern)
|
||||||
|
|
||||||
|
def to_rule(s: Tuple[str, bool]) -> str:
|
||||||
|
(txt, is_literal) = s
|
||||||
|
return "\"" + txt + "\"" if is_literal else txt
|
||||||
|
|
||||||
def transform() -> Tuple[str, bool]:
|
def transform() -> Tuple[str, bool]:
|
||||||
'''
|
'''
|
||||||
Parse a unit at index i (advancing it), and return its string representation + whether it's a literal.
|
Parse a unit at index i (advancing it), and return its string representation + whether it's a literal.
|
||||||
|
@ -180,13 +185,12 @@ class SchemaConverter:
|
||||||
ret = []
|
ret = []
|
||||||
for is_literal, g in itertools.groupby(seq, lambda x: x[1]):
|
for is_literal, g in itertools.groupby(seq, lambda x: x[1]):
|
||||||
if is_literal:
|
if is_literal:
|
||||||
lit = ''.join(x[0][1:-1] for x in g)
|
ret.append((''.join(x[0] for x in g), True))
|
||||||
ret.append((f'"{lit}"', True))
|
|
||||||
else:
|
else:
|
||||||
ret.extend(g)
|
ret.extend(g)
|
||||||
if len(ret) == 1:
|
if len(ret) == 1:
|
||||||
return ret[0]
|
return ret[0]
|
||||||
return (' '.join(x[0] for x in seq), False)
|
return (' '.join(to_rule(x) for x in seq), False)
|
||||||
|
|
||||||
while i < length:
|
while i < length:
|
||||||
c = pattern[i]
|
c = pattern[i]
|
||||||
|
@ -197,7 +201,7 @@ class SchemaConverter:
|
||||||
i += 1
|
i += 1
|
||||||
if i < length:
|
if i < length:
|
||||||
assert pattern[i] != '?', f'Unsupported pattern syntax "{pattern[i]}" at index {i} of /{pattern}/'
|
assert pattern[i] != '?', f'Unsupported pattern syntax "{pattern[i]}" at index {i} of /{pattern}/'
|
||||||
seq.append((f'({transform()[0]})', False))
|
seq.append((f'({to_rule(transform())})', False))
|
||||||
elif c == ')':
|
elif c == ')':
|
||||||
i += 1
|
i += 1
|
||||||
assert start > 0 and pattern[start-1] == '(', f'Unbalanced parentheses; start = {start}, i = {i}, pattern = {pattern}'
|
assert start > 0 and pattern[start-1] == '(', f'Unbalanced parentheses; start = {start}, i = {i}, pattern = {pattern}'
|
||||||
|
@ -220,7 +224,7 @@ class SchemaConverter:
|
||||||
seq.append(('|', False))
|
seq.append(('|', False))
|
||||||
i += 1
|
i += 1
|
||||||
elif c in ('*', '+', '?'):
|
elif c in ('*', '+', '?'):
|
||||||
seq[-1] = (f'{seq[-1][0]}{c}', False)
|
seq[-1] = (to_rule(seq[-1]) + c, False)
|
||||||
i += 1
|
i += 1
|
||||||
elif c == '{':
|
elif c == '{':
|
||||||
curly_brackets = c
|
curly_brackets = c
|
||||||
|
@ -232,13 +236,18 @@ class SchemaConverter:
|
||||||
curly_brackets += '}'
|
curly_brackets += '}'
|
||||||
i += 1
|
i += 1
|
||||||
nums = [s.strip() for s in curly_brackets[1:-1].split(',')]
|
nums = [s.strip() for s in curly_brackets[1:-1].split(',')]
|
||||||
if len(nums) == 1:
|
min_times = 0
|
||||||
min_times = int(nums[0])
|
max_times = None
|
||||||
max_times = min_times
|
try:
|
||||||
else:
|
if len(nums) == 1:
|
||||||
assert len(nums) == 2
|
min_times = int(nums[0])
|
||||||
min_times = int(nums[0]) if nums[0] else 0
|
max_times = min_times
|
||||||
max_times = int(nums[1]) if nums[1] else None
|
else:
|
||||||
|
assert len(nums) == 2
|
||||||
|
min_times = int(nums[0]) if nums[0] else 0
|
||||||
|
max_times = int(nums[1]) if nums[1] else None
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError(f'Invalid quantifier {curly_brackets} in /{pattern}/')
|
||||||
|
|
||||||
(sub, sub_is_literal) = seq[-1]
|
(sub, sub_is_literal) = seq[-1]
|
||||||
|
|
||||||
|
@ -263,32 +272,35 @@ class SchemaConverter:
|
||||||
False
|
False
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
lit = ''
|
literal = ''
|
||||||
while i < length and pattern[i] not in NON_LITERAL_SET \
|
while i < length:
|
||||||
and not (i < length - 1 and pattern[i+1] in ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS):
|
|
||||||
if pattern[i] == '\\' and i < length - 1:
|
if pattern[i] == '\\' and i < length - 1:
|
||||||
i += 1
|
next = pattern[i + 1]
|
||||||
if pattern[i] in NON_LITERAL_SET:
|
if next in ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS:
|
||||||
# Escapes in regular expressions that aren't escaped in GBNF literals
|
i += 1
|
||||||
lit += pattern[i]
|
literal += pattern[i]
|
||||||
|
i += 1
|
||||||
else:
|
else:
|
||||||
lit += f'\\{pattern[i]}'
|
literal += pattern[i:i+2]
|
||||||
|
i += 2
|
||||||
|
elif pattern[i] == '"' and not self._raw_pattern:
|
||||||
|
literal += '\\"'
|
||||||
|
i += 1
|
||||||
|
elif pattern[i] not in NON_LITERAL_SET and \
|
||||||
|
(i == length - 1 or literal == '' or pattern[i+1] == '.' or pattern[i+1] not in NON_LITERAL_SET):
|
||||||
|
literal += pattern[i]
|
||||||
i += 1
|
i += 1
|
||||||
else:
|
else:
|
||||||
if pattern[i] == '"':
|
break
|
||||||
lit += '\\'
|
if literal:
|
||||||
lit += pattern[i]
|
seq.append((literal, True))
|
||||||
i += 1
|
|
||||||
if lit:
|
|
||||||
seq.append((f'"{lit}"', True))
|
|
||||||
|
|
||||||
if i < length and pattern[i] not in ('.', '(', ')', '|', '[', '{', '*', '+', '?'):
|
|
||||||
seq.append((f'"{pattern[i]}"', True))
|
|
||||||
i += 1
|
|
||||||
|
|
||||||
return join_seq()
|
return join_seq()
|
||||||
|
|
||||||
return self._add_rule(name, transform()[0])
|
return self._add_rule(
|
||||||
|
name,
|
||||||
|
to_rule(transform()) if self._raw_pattern \
|
||||||
|
else "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space")
|
||||||
|
|
||||||
|
|
||||||
def _resolve_ref(self, ref):
|
def _resolve_ref(self, ref):
|
||||||
|
@ -510,6 +522,11 @@ def main(args_in = None):
|
||||||
action='store_true',
|
action='store_true',
|
||||||
default=False,
|
default=False,
|
||||||
help='Whether to treat dot (".") as matching all chars including line breaks in regular expression patterns')
|
help='Whether to treat dot (".") as matching all chars including line breaks in regular expression patterns')
|
||||||
|
parser.add_argument(
|
||||||
|
'--raw-pattern',
|
||||||
|
action='store_true',
|
||||||
|
default=False,
|
||||||
|
help='Treats string patterns as raw patterns w/o quotes (or quote escapes)')
|
||||||
|
|
||||||
parser.add_argument('schema', help='file containing JSON schema ("-" for stdin)')
|
parser.add_argument('schema', help='file containing JSON schema ("-" for stdin)')
|
||||||
args = parser.parse_args(args_in)
|
args = parser.parse_args(args_in)
|
||||||
|
@ -528,7 +545,8 @@ def main(args_in = None):
|
||||||
converter = SchemaConverter(
|
converter = SchemaConverter(
|
||||||
prop_order={name: idx for idx, name in enumerate(args.prop_order)},
|
prop_order={name: idx for idx, name in enumerate(args.prop_order)},
|
||||||
allow_fetch=args.allow_fetch,
|
allow_fetch=args.allow_fetch,
|
||||||
dotall=args.dotall)
|
dotall=args.dotall,
|
||||||
|
raw_pattern=args.raw_pattern)
|
||||||
schema = converter.resolve_refs(schema, url)
|
schema = converter.resolve_refs(schema, url)
|
||||||
converter.visit(schema, '')
|
converter.visit(schema, '')
|
||||||
print(converter.format_grammar())
|
print(converter.format_grammar())
|
||||||
|
|
|
@ -11,6 +11,7 @@ print(subprocess.check_output(
|
||||||
"json-schema-to-grammar.py"),
|
"json-schema-to-grammar.py"),
|
||||||
*rest,
|
*rest,
|
||||||
"-",
|
"-",
|
||||||
|
"--raw-pattern",
|
||||||
],
|
],
|
||||||
text=True,
|
text=True,
|
||||||
input=json.dumps({
|
input=json.dumps({
|
||||||
|
|
|
@ -62,7 +62,7 @@ unordered_map<char, string> GRAMMAR_LITERAL_ESCAPES = {
|
||||||
};
|
};
|
||||||
|
|
||||||
unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
|
unordered_set<char> NON_LITERAL_SET = {'|', '.', '(', ')', '[', ']', '{', '}', '*', '+', '?'};
|
||||||
unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'{', '*', '+', '?'};
|
unordered_set<char> ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = {'[', ']', '(', ')', '|', '{', '}', '*', '+', '?'};
|
||||||
|
|
||||||
template <typename Iterator>
|
template <typename Iterator>
|
||||||
string join(Iterator begin, Iterator end, const string& separator) {
|
string join(Iterator begin, Iterator end, const string& separator) {
|
||||||
|
@ -186,9 +186,15 @@ private:
|
||||||
size_t i = 0;
|
size_t i = 0;
|
||||||
size_t length = sub_pattern.length();
|
size_t length = sub_pattern.length();
|
||||||
|
|
||||||
std::function<pair<string, bool>()> transform = [&]() -> pair<string, bool> {
|
using literal_or_rule = pair<string, bool>;
|
||||||
|
auto to_rule = [&](const literal_or_rule& ls) {
|
||||||
|
auto is_literal = ls.second;
|
||||||
|
auto s = ls.first;
|
||||||
|
return is_literal ? "\"" + s + "\"" : s;
|
||||||
|
};
|
||||||
|
std::function<literal_or_rule()> transform = [&]() -> literal_or_rule {
|
||||||
size_t start = i;
|
size_t start = i;
|
||||||
vector<pair<string, bool>> seq;
|
vector<literal_or_rule> seq;
|
||||||
|
|
||||||
auto get_dot = [&]() {
|
auto get_dot = [&]() {
|
||||||
string rule;
|
string rule;
|
||||||
|
@ -202,28 +208,32 @@ private:
|
||||||
|
|
||||||
// Joins the sequence, merging consecutive literals together.
|
// Joins the sequence, merging consecutive literals together.
|
||||||
auto join_seq = [&]() {
|
auto join_seq = [&]() {
|
||||||
vector<string> results;
|
vector<literal_or_rule> ret;
|
||||||
|
|
||||||
string literal;
|
string literal;
|
||||||
auto flush_literal = [&]() {
|
auto flush_literal = [&]() {
|
||||||
if (literal.empty()) {
|
if (literal.empty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
results.push_back("\"" + literal + "\"");
|
ret.push_back(make_pair(literal, true));
|
||||||
literal.clear();
|
literal.clear();
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
for (const auto& item : seq) {
|
for (const auto& item : seq) {
|
||||||
if (item.second) {
|
auto is_literal = item.second;
|
||||||
literal += item.first.substr(1, item.first.length() - 2);
|
if (is_literal) {
|
||||||
|
literal += item.first;
|
||||||
} else {
|
} else {
|
||||||
flush_literal();
|
flush_literal();
|
||||||
results.push_back(item.first);
|
ret.push_back(item);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (flush_literal() && results.size() == 1) {
|
flush_literal();
|
||||||
return make_pair(results[0], true);
|
|
||||||
|
vector<string> results;
|
||||||
|
for (const auto& item : ret) {
|
||||||
|
results.push_back(to_rule(item));
|
||||||
}
|
}
|
||||||
return make_pair(join(results.begin(), results.end(), " "), false);
|
return make_pair(join(results.begin(), results.end(), " "), false);
|
||||||
};
|
};
|
||||||
|
@ -240,8 +250,7 @@ private:
|
||||||
_warnings.push_back("Unsupported pattern syntax");
|
_warnings.push_back("Unsupported pattern syntax");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
auto sub_result = transform();
|
seq.push_back(make_pair("(" + to_rule(transform()) + ")", false));
|
||||||
seq.push_back(make_pair("(" + sub_result.first + ")", false));
|
|
||||||
} else if (c == ')') {
|
} else if (c == ')') {
|
||||||
i++;
|
i++;
|
||||||
if (start > 0 && sub_pattern[start - 1] != '(') {
|
if (start > 0 && sub_pattern[start - 1] != '(') {
|
||||||
|
@ -270,7 +279,7 @@ private:
|
||||||
seq.push_back(make_pair("|", false));
|
seq.push_back(make_pair("|", false));
|
||||||
i++;
|
i++;
|
||||||
} else if (c == '*' || c == '+' || c == '?') {
|
} else if (c == '*' || c == '+' || c == '?') {
|
||||||
seq.back().first += c;
|
seq.back() = make_pair(to_rule(seq.back()) + c, false);
|
||||||
i++;
|
i++;
|
||||||
} else if (c == '{') {
|
} else if (c == '{') {
|
||||||
string curly_brackets = string(1, c);
|
string curly_brackets = string(1, c);
|
||||||
|
@ -287,17 +296,22 @@ private:
|
||||||
auto nums = split(curly_brackets.substr(1, curly_brackets.length() - 2), ",");
|
auto nums = split(curly_brackets.substr(1, curly_brackets.length() - 2), ",");
|
||||||
int min_times = 0;
|
int min_times = 0;
|
||||||
int max_times = numeric_limits<int>::max();
|
int max_times = numeric_limits<int>::max();
|
||||||
if (nums.size() == 1) {
|
try {
|
||||||
min_times = max_times = stoi(nums[0]);
|
if (nums.size() == 1) {
|
||||||
} else if (nums.size() != 2) {
|
min_times = max_times = std::stoi(nums[0]);
|
||||||
_errors.push_back("Wrong number of values in curly brackets");
|
} else if (nums.size() != 2) {
|
||||||
} else {
|
_errors.push_back("Wrong number of values in curly brackets");
|
||||||
if (!nums[0].empty()) {
|
} else {
|
||||||
min_times = stoi(nums[0]);
|
if (!nums[0].empty()) {
|
||||||
}
|
min_times = std::stoi(nums[0]);
|
||||||
if (!nums[1].empty()) {
|
}
|
||||||
max_times = stoi(nums[1]);
|
if (!nums[1].empty()) {
|
||||||
|
max_times = std::stoi(nums[1]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
} catch (const std::invalid_argument& e) {
|
||||||
|
_errors.push_back("Invalid number in curly brackets");
|
||||||
|
return make_pair("", false);
|
||||||
}
|
}
|
||||||
auto &last = seq.back();
|
auto &last = seq.back();
|
||||||
auto &sub = last.first;
|
auto &sub = last.first;
|
||||||
|
@ -346,36 +360,39 @@ private:
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
string literal;
|
string literal;
|
||||||
while (i < length && NON_LITERAL_SET.find(sub_pattern[i]) == NON_LITERAL_SET.end() &&
|
auto is_non_literal = [&](char c) {
|
||||||
(i == length - 1 || ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.find(sub_pattern[i + 1]) == ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.end())) {
|
return NON_LITERAL_SET.find(c) != NON_LITERAL_SET.end();
|
||||||
|
};
|
||||||
|
while (i < length) {
|
||||||
if (sub_pattern[i] == '\\' && i < length - 1) {
|
if (sub_pattern[i] == '\\' && i < length - 1) {
|
||||||
i++;
|
char next = sub_pattern[i + 1];
|
||||||
if (NON_LITERAL_SET.find(sub_pattern[i]) != NON_LITERAL_SET.end()) {
|
if (ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.find(next) != ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.end()) {
|
||||||
|
i++;
|
||||||
literal += sub_pattern[i];
|
literal += sub_pattern[i];
|
||||||
|
i++;
|
||||||
} else {
|
} else {
|
||||||
literal += "\\" + string(1, sub_pattern[i]);
|
literal += sub_pattern.substr(i, 2);
|
||||||
|
i += 2;
|
||||||
}
|
}
|
||||||
|
} else if (sub_pattern[i] == '"') {
|
||||||
|
literal += "\\\"";
|
||||||
i++;
|
i++;
|
||||||
} else {
|
} else if (!is_non_literal(sub_pattern[i]) &&
|
||||||
if (sub_pattern[i] == '"') {
|
(i == length - 1 || literal.empty() || sub_pattern[i + 1] == '.' || !is_non_literal(sub_pattern[i + 1]))) {
|
||||||
literal += "\\";
|
|
||||||
}
|
|
||||||
literal += sub_pattern[i];
|
literal += sub_pattern[i];
|
||||||
i++;
|
i++;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!literal.empty()) {
|
if (!literal.empty()) {
|
||||||
seq.push_back(make_pair("\"" + literal + "\"", true));
|
seq.push_back(make_pair(literal, true));
|
||||||
}
|
|
||||||
if (i < length && NON_LITERAL_SET.find(sub_pattern[i]) == NON_LITERAL_SET.end()) {
|
|
||||||
seq.push_back(make_pair("\"" + string(1, sub_pattern[i]) + "\"", true));
|
|
||||||
i++;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return join_seq();
|
return join_seq();
|
||||||
};
|
};
|
||||||
return _add_rule(name, transform().first);
|
return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space");
|
||||||
}
|
}
|
||||||
|
|
||||||
string _resolve_ref(const string& ref) {
|
string _resolve_ref(const string& ref) {
|
||||||
|
|
File diff suppressed because it is too large
Load diff
File diff suppressed because one or more lines are too long
|
@ -35,7 +35,7 @@ const GRAMMAR_RANGE_LITERAL_ESCAPE_RE = /[\n\r"\]\-\\]/g;
|
||||||
const GRAMMAR_LITERAL_ESCAPES = { '\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']': '\\]' };
|
const GRAMMAR_LITERAL_ESCAPES = { '\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']': '\\]' };
|
||||||
|
|
||||||
const NON_LITERAL_SET = new Set('|.()[]{}*+?');
|
const NON_LITERAL_SET = new Set('|.()[]{}*+?');
|
||||||
const ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = new Set('{*+?');
|
const ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = new Set('[]()|{}*+?');
|
||||||
|
|
||||||
export class SchemaConverter {
|
export class SchemaConverter {
|
||||||
constructor(options) {
|
constructor(options) {
|
||||||
|
@ -163,6 +163,9 @@ export class SchemaConverter {
|
||||||
return this._addRule('dot', rule);
|
return this._addRule('dot', rule);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
const toRule = ([s, isLiteral]) => isLiteral ? "\"" + s + "\"" : s;
|
||||||
|
|
||||||
const transform = () => {
|
const transform = () => {
|
||||||
const start = i;
|
const start = i;
|
||||||
// For each component of this sequence, store its string representation and whether it's a literal.
|
// For each component of this sequence, store its string representation and whether it's a literal.
|
||||||
|
@ -175,8 +178,7 @@ export class SchemaConverter {
|
||||||
const ret = [];
|
const ret = [];
|
||||||
for (const [isLiteral, g] of groupBy(seq, x => x[1])) {
|
for (const [isLiteral, g] of groupBy(seq, x => x[1])) {
|
||||||
if (isLiteral) {
|
if (isLiteral) {
|
||||||
const lit = [...g].map(x => x[0].slice(1, -1)).join('');
|
ret.push([[...g].map(x => x[0]).join(''), true]);
|
||||||
ret.push([`"${lit}"`, true]);
|
|
||||||
} else {
|
} else {
|
||||||
ret.push(...g);
|
ret.push(...g);
|
||||||
}
|
}
|
||||||
|
@ -184,7 +186,7 @@ export class SchemaConverter {
|
||||||
if (ret.length === 1) {
|
if (ret.length === 1) {
|
||||||
return ret[0];
|
return ret[0];
|
||||||
}
|
}
|
||||||
return [ret.map(x => x[0]).join(' '), false];
|
return [ret.map(x => toRule(x)).join(' '), false];
|
||||||
};
|
};
|
||||||
|
|
||||||
while (i < length) {
|
while (i < length) {
|
||||||
|
@ -199,7 +201,7 @@ export class SchemaConverter {
|
||||||
throw new Error(`Unsupported pattern syntax "${pattern[i]}" at index ${i} of /${pattern}/`);
|
throw new Error(`Unsupported pattern syntax "${pattern[i]}" at index ${i} of /${pattern}/`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
seq.push([`(${transform()[0]})`, false]);
|
seq.push([`(${toRule(transform())})`, false]);
|
||||||
} else if (c === ')') {
|
} else if (c === ')') {
|
||||||
i += 1;
|
i += 1;
|
||||||
if (start <= 0 || pattern[start - 1] !== '(') {
|
if (start <= 0 || pattern[start - 1] !== '(') {
|
||||||
|
@ -228,7 +230,7 @@ export class SchemaConverter {
|
||||||
seq.push(['|', false]);
|
seq.push(['|', false]);
|
||||||
i += 1;
|
i += 1;
|
||||||
} else if (c === '*' || c === '+' || c === '?') {
|
} else if (c === '*' || c === '+' || c === '?') {
|
||||||
seq[seq.length - 1] = [`${seq[seq.length - 1][0]}${c}`, false];
|
seq[seq.length - 1] = [toRule(seq[seq.length - 1]) + c, false];
|
||||||
i += 1;
|
i += 1;
|
||||||
} else if (c === '{') {
|
} else if (c === '{') {
|
||||||
let curlyBrackets = c;
|
let curlyBrackets = c;
|
||||||
|
@ -278,33 +280,31 @@ export class SchemaConverter {
|
||||||
seq[seq.length - 1] = [repeatedSub.concat(optionalSub).join(' '), false];
|
seq[seq.length - 1] = [repeatedSub.concat(optionalSub).join(' '), false];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let lit = '';
|
let literal = '';
|
||||||
while (i < length && !NON_LITERAL_SET.has(pattern[i]) &&
|
while (i < length) {
|
||||||
!(i < length - 1 && ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.has(pattern[i + 1]))) {
|
|
||||||
if (pattern[i] === '\\' && i < length - 1) {
|
if (pattern[i] === '\\' && i < length - 1) {
|
||||||
i += 1;
|
const next = pattern[i + 1];
|
||||||
if (NON_LITERAL_SET.has(pattern[i])) {
|
if (ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS.has(next)) {
|
||||||
// Escapes in regular expressions that aren't escaped in GBNF literals
|
i += 1;
|
||||||
lit += pattern[i];
|
literal += pattern[i];
|
||||||
|
i += 1;
|
||||||
} else {
|
} else {
|
||||||
lit += `\\${pattern[i]}`;
|
literal += pattern.slice(i, i + 2);
|
||||||
|
i += 2;
|
||||||
}
|
}
|
||||||
|
} else if (pattern[i] === '"') {
|
||||||
|
literal += '\\"';
|
||||||
|
i += 1;
|
||||||
|
} else if (!NON_LITERAL_SET.has(pattern[i]) &&
|
||||||
|
(i === length - 1 || literal === '' || pattern[i + 1] === '.' || !NON_LITERAL_SET.has(pattern[i+1]))) {
|
||||||
|
literal += pattern[i];
|
||||||
i += 1;
|
i += 1;
|
||||||
} else {
|
} else {
|
||||||
if (pattern[i] === '"') {
|
break;
|
||||||
lit += '\\';
|
|
||||||
}
|
|
||||||
lit += pattern[i];
|
|
||||||
i += 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (lit) {
|
if (literal !== '') {
|
||||||
seq.push([`"${lit}"`, true]);
|
seq.push([literal, true]);
|
||||||
}
|
|
||||||
|
|
||||||
if (i < length && !NON_LITERAL_SET.has(pattern[i])) {
|
|
||||||
seq.push([`"${pattern[i]}"`, true]);
|
|
||||||
i += 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -312,7 +312,7 @@ export class SchemaConverter {
|
||||||
return joinSeq();
|
return joinSeq();
|
||||||
};
|
};
|
||||||
|
|
||||||
return this._addRule(name, transform()[0]);
|
return this._addRule(name, "\"\\\"\" " + toRule(transform()) + " \"\\\"\" space")
|
||||||
}
|
}
|
||||||
|
|
||||||
_resolveRef(ref) {
|
_resolveRef(ref) {
|
||||||
|
|
|
@ -331,6 +331,45 @@ static void test_all(const string& lang, std::function<void(const TestCase&)> ru
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test({
|
||||||
|
SUCCESS,
|
||||||
|
"simple regexp",
|
||||||
|
R"""({
|
||||||
|
"type": "string",
|
||||||
|
"pattern": "^abc?d*efg+(hij)?kl$"
|
||||||
|
})""",
|
||||||
|
R"""(
|
||||||
|
root ::= "\"" "ab" "c"? "d"* "ef" "g"+ ("hij")? "kl" "\"" space
|
||||||
|
space ::= " "?
|
||||||
|
)"""
|
||||||
|
});
|
||||||
|
|
||||||
|
test({
|
||||||
|
SUCCESS,
|
||||||
|
"regexp escapes",
|
||||||
|
R"""({
|
||||||
|
"type": "string",
|
||||||
|
"pattern": "^\\[\\]\\{\\}\\(\\)\\|\\+\\*\\?$"
|
||||||
|
})""",
|
||||||
|
R"""(
|
||||||
|
root ::= "\"" "[]{}()|+*?" "\"" space
|
||||||
|
space ::= " "?
|
||||||
|
)"""
|
||||||
|
});
|
||||||
|
|
||||||
|
test({
|
||||||
|
SUCCESS,
|
||||||
|
"regexp quote",
|
||||||
|
R"""({
|
||||||
|
"type": "string",
|
||||||
|
"pattern": "^\"$"
|
||||||
|
})""",
|
||||||
|
R"""(
|
||||||
|
root ::= "\"" "\"" "\"" space
|
||||||
|
space ::= " "?
|
||||||
|
)"""
|
||||||
|
});
|
||||||
|
|
||||||
test({
|
test({
|
||||||
SUCCESS,
|
SUCCESS,
|
||||||
"regexp",
|
"regexp",
|
||||||
|
@ -340,7 +379,7 @@ static void test_all(const string& lang, std::function<void(const TestCase&)> ru
|
||||||
})""",
|
})""",
|
||||||
R"""(
|
R"""(
|
||||||
dot ::= [\U00000000-\x09\x0B\x0C\x0E-\U0010FFFF]
|
dot ::= [\U00000000-\x09\x0B\x0C\x0E-\U0010FFFF]
|
||||||
root ::= ("(" root-1 root-1? root-1? ")")? root-1 root-1 root-1 "-" root-1 root-1 root-1 root-1 " and" dot dot dot
|
root ::= "\"" ("(" root-1 root-1? root-1? ")")? root-1 root-1 root-1 "-" root-1 root-1 root-1 root-1 " and" dot dot dot "\"" space
|
||||||
root-1 ::= [0-9]
|
root-1 ::= [0-9]
|
||||||
space ::= " "?
|
space ::= " "?
|
||||||
)"""
|
)"""
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue