fix not_strings & port to js+py

This commit is contained in:
ochafik 2024-06-11 04:21:06 +01:00
parent 8b47473df3
commit 4e6375606d
4 changed files with 182 additions and 109 deletions

View file

@ -160,64 +160,6 @@ static std::string format_literal(const std::string & literal) {
return "\"" + escaped + "\""; return "\"" + escaped + "\"";
} }
/*
Returns a rule that matches a JSON string that is none of the provided strings
not_strings({"and", "also"})
-> ["] ( [a] ([l] ([s] ([^"o]) | [^"s]) | [n] ([^"d]) | [^"ln]) | [^"a] ) char* ["]
*/
std::string not_strings(const std::vector<std::string> & strings) {
struct TrieNode {
std::map<char, TrieNode> children;
bool is_end_of_string;
void insert(const std::string & string) {
auto node = this;
for (char c : string) {
node = &node->children[c];
}
node->is_end_of_string = true;
}
};
TrieNode trie;
for (const auto & s : strings) {
trie.insert(s);
}
std::ostringstream out;
out << "[\"] ( ";
std::function<void(const TrieNode &)> visit = [&](const TrieNode & node) {
std::ostringstream rejects;
auto first = true;
for (const auto & kv : node.children) {
rejects << kv.first;
if (kv.second.is_end_of_string) {
continue;
}
if (first) {
first = false;
} else {
out << " | ";
}
out << "[" << kv.first << "] (";
visit(kv.second);
out << ")";
}
if (!node.children.empty()) {
if (!first) {
out << " | ";
}
out << "[^\"" << rejects.str() << "]";
}
};
visit(trie);
out << " ) char* [\"] space";
return out.str();
}
class SchemaConverter { class SchemaConverter {
private: private:
std::function<json(const std::string &)> _fetch_json; std::function<json(const std::string &)> _fetch_json;
@ -445,6 +387,67 @@ private:
return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space"); return _add_rule(name, "\"\\\"\" " + to_rule(transform()) + " \"\\\"\" space");
} }
/*
Returns a rule that matches a JSON string that is none of the provided strings
not_strings({"and", "also"})
-> ["] ( [a] ([l] ([s] ([^"o]) | [^"s]) | [n] ([^"d]) | [^"ln]) | [^"a] ) char* ["]
*/
std::string _not_strings(const std::vector<std::string> & strings) {
struct TrieNode {
std::map<char, TrieNode> children;
bool is_end_of_string;
void insert(const std::string & string) {
auto node = this;
for (char c : string) {
node = &node->children[c];
}
node->is_end_of_string = true;
}
};
TrieNode trie;
for (const auto & s : strings) {
trie.insert(s);
}
std::string char_rule = _add_primitive("char", PRIMITIVE_RULES.at("char"));
std::ostringstream out;
out << "[\"] ( ";
std::function<void(const TrieNode &)> visit = [&](const TrieNode & node) {
std::ostringstream rejects;
auto first = true;
for (const auto & kv : node.children) {
rejects << kv.first;
if (first) {
first = false;
} else {
out << " | ";
}
out << "[" << kv.first << "]";
if (kv.second.is_end_of_string) {
out << " " << char_rule << "+";
} else {
out << " (";
visit(kv.second);
out << ")";
}
}
if (!node.children.empty()) {
if (!first) {
out << " | ";
}
out << "[^\"" << rejects.str() << "] " << char_rule << "*";
}
};
visit(trie);
out << " )? [\"] space";
return out.str();
}
std::string _resolve_ref(const std::string & ref) { std::string _resolve_ref(const std::string & ref) {
std::string ref_name = ref.substr(ref.find_last_of('/') + 1); std::string ref_name = ref.substr(ref.find_last_of('/') + 1);
if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) { if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) {
@ -484,11 +487,13 @@ private:
} }
if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get<bool>())) { if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get<bool>())) {
std::string sub_name = name + (name.empty() ? "" : "-") + "additional"; std::string sub_name = name + (name.empty() ? "" : "-") + "additional";
std::string value_rule = visit(additional_properties.is_object() ? additional_properties : json::object(), sub_name + "-value"); std::string value_rule =
additional_properties.is_object() ? visit(additional_properties, sub_name + "-value")
: _add_primitive("value", PRIMITIVE_RULES.at("value"));
auto key_rule = auto key_rule =
prop_names.empty() ? _add_primitive("string", PRIMITIVE_RULES.at("string")) prop_names.empty() ? _add_primitive("string", PRIMITIVE_RULES.at("string"))
: _add_rule(sub_name + "-k", not_strings(prop_names)); : _add_rule(sub_name + "-k", _not_strings(prop_names));
std::string kv_rule = _add_rule(sub_name + "-kv", key_rule + " \":\" space " + value_rule); std::string kv_rule = _add_rule(sub_name + "-kv", key_rule + " \":\" space " + value_rule);
prop_kv_rule_names["*"] = kv_rule; prop_kv_rule_names["*"] = kv_rule;
optional_props.push_back("*"); optional_props.push_back("*");

View file

@ -71,47 +71,6 @@ NON_LITERAL_SET = set('|.()[]{}*+?')
ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('[]()|{}*+?') ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('[]()|{}*+?')
def not_strings(strings):
class TrieNode:
def __init__(self):
self.children = {}
self.is_end_of_string = False
def insert(self, string):
node = self
for c in string:
node = node.children.setdefault(c, TrieNode())
node.is_end_of_string = True
trie = TrieNode()
for s in strings:
trie.insert(s)
out = ['["] ( ']
def visit(node):
rejects = []
first = True
for c, child in node.children.items():
rejects.append(c)
if child.is_end_of_string:
continue
if first:
first = False
else:
out.append(' | ')
out.append(f'[{c}] (')
visit(child)
out.append(')')
if node.children:
if not first:
out.append(' | ')
out.append(f'[^"{"".join(rejects)}]')
visit(trie)
out.append(' ) char* ["] space')
return ''.join(out)
class SchemaConverter: class SchemaConverter:
def __init__(self, *, prop_order, allow_fetch, dotall, raw_pattern): def __init__(self, *, prop_order, allow_fetch, dotall, raw_pattern):
self._prop_order = prop_order self._prop_order = prop_order
@ -153,6 +112,51 @@ class SchemaConverter:
return ''.join(('(', *recurse(0), ')')) return ''.join(('(', *recurse(0), ')'))
def _not_strings(self, strings):
class TrieNode:
def __init__(self):
self.children = {}
self.is_end_of_string = False
def insert(self, string):
node = self
for c in string:
node = node.children.setdefault(c, TrieNode())
node.is_end_of_string = True
trie = TrieNode()
for s in strings:
trie.insert(s)
char_rule = self._add_primitive('char', PRIMITIVE_RULES['char'])
out = ['["] ( ']
def visit(node):
rejects = []
first = True
for c in sorted(node.children.keys()):
child = node.children[c]
rejects.append(c)
if first:
first = False
else:
out.append(' | ')
out.append(f'[{c}]')
if (child.is_end_of_string):
out.append(f' {char_rule}+')
else:
out.append(f' (')
visit(child)
out.append(')')
if node.children:
if not first:
out.append(' | ')
out.append(f'[^"{"".join(rejects)}] {char_rule}*')
visit(trie)
out.append(f' ){"" if trie.is_end_of_string else "?"} ["] space')
return ''.join(out)
def _add_rule(self, name, rule): def _add_rule(self, name, rule):
esc_name = INVALID_RULE_CHARS_RE.sub('-', name) esc_name = INVALID_RULE_CHARS_RE.sub('-', name)
if esc_name not in self._rules or self._rules[esc_name] == rule: if esc_name not in self._rules or self._rules[esc_name] == rule:
@ -513,7 +517,7 @@ class SchemaConverter:
sub_name = f'{name}{"-" if name else ""}additional' sub_name = f'{name}{"-" if name else ""}additional'
value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value') value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value')
key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not sorted_props \ key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not sorted_props \
else self._add_rule(f'{sub_name}-k', not_strings(sorted_props)) else self._add_rule(f'{sub_name}-k', self._not_strings(sorted_props))
prop_kv_rule_names["*"] = self._add_rule( prop_kv_rule_names["*"] = self._add_rule(
f'{sub_name}-kv', f'{sub_name}-kv',

View file

@ -337,6 +337,63 @@ export class SchemaConverter {
return this._addRule(name, "\"\\\"\" " + toRule(transform()) + " \"\\\"\" space") return this._addRule(name, "\"\\\"\" " + toRule(transform()) + " \"\\\"\" space")
} }
_notStrings(strings) {
class TrieNode {
constructor() {
this.children = {};
this.isEndOfString = false;
}
insert(str) {
let node = this;
for (const c of str) {
node = node.children[c] = node.children[c] || new TrieNode();
}
node.isEndOfString = true;
}
}
const trie = new TrieNode();
for (const s of strings) {
trie.insert(s);
}
const charRuleName = this._addPrimitive('char', PRIMITIVE_RULES['char']);
const out = ['["] ( '];
const visit = (node) => {
const rejects = [];
let first = true;
for (const c of Object.keys(node.children).sort()) {
const child = node.children[c];
rejects.push(c);
if (!first) {
out.push(' | ');
}
out.push(`[${c}]`);
if (child.isEndOfString) {
out.push(` ${charRuleName}+`);
} else {
out.push(' (');
visit(child);
out.push(')');
}
first = false;
}
if (Object.keys(node.children).length > 0) {
if (!first) {
out.push(' | ');
}
out.push(`[^"${rejects.join('')}] ${charRuleName}*`);
}
};
visit(trie);
out.push(` )${trie.isEndOfString ? '' : '?'} ["] space`);
return out.join('');
}
_resolveRef(ref) { _resolveRef(ref) {
let refName = ref.split('/').pop(); let refName = ref.split('/').pop();
if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) { if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
@ -487,9 +544,14 @@ export class SchemaConverter {
if (typeof additionalProperties === 'object' || additionalProperties === true) { if (typeof additionalProperties === 'object' || additionalProperties === true) {
const subName = `${name ?? ''}${name ? '-' : ''}additional`; const subName = `${name ?? ''}${name ? '-' : ''}additional`;
const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`); const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`);
const key_rule =
sortedProps.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string'])
: this._addRule(`${subName}-k`, this._notStrings(sortedProps));
propKvRuleNames['*'] = this._addRule( propKvRuleNames['*'] = this._addRule(
`${subName}-kv`, `${subName}-kv`,
`${this._addPrimitive('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`); `${key_rule} ":" space ${valueRule}`);
optionalProps.push('*'); optionalProps.push('*');
} }

View file

@ -634,7 +634,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
})""", })""",
R"""( R"""(
a-kv ::= "\"a\"" space ":" space number a-kv ::= "\"a\"" space ":" space number
additional-k ::= ["] ( [^"a] ) char* ["] space additional-k ::= ["] ( [a] char+ | [^"a] char* )? ["] space
additional-kv ::= additional-k ":" space string additional-kv ::= additional-k ":" space string
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16} decimal-part ::= [0-9]{1,16}
@ -659,8 +659,9 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
R"""( R"""(
a-kv ::= "\"a\"" space ":" space number a-kv ::= "\"a\"" space ":" space number
a-rest ::= ( "," space additional-kv )* a-rest ::= ( "," space additional-kv )*
additional-k ::= ["] ( [^"a] ) char* ["] space additional-k ::= ["] ( [a] char+ | [^"a] char* )? ["] space
additional-kv ::= additional-k ":" space number additional-kv ::= additional-k ":" space number
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16} decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15} integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
@ -682,11 +683,12 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
"additionalProperties": {"type": "number"} "additionalProperties": {"type": "number"}
})""", })""",
R"""( R"""(
additional-k ::= ["] ( [a] ([l] ([s] ([^"o]) | [^"s]) | [n] ([^"d]) | [^"ln]) | [^"a] ) char* ["] space additional-k ::= ["] ( [a] ([l] ([s] ([o] char+ | [^"o] char*) | [^"s] char*) | [n] ([d] char+ | [^"d] char*) | [^"ln] char*) | [^"a] char* )? ["] space
additional-kv ::= additional-k ":" space number additional-kv ::= additional-k ":" space number
also-kv ::= "\"also\"" space ":" space number also-kv ::= "\"also\"" space ":" space number
also-rest ::= ( "," space additional-kv )* also-rest ::= ( "," space additional-kv )*
and-kv ::= "\"and\"" space ":" space number and-kv ::= "\"and\"" space ":" space number
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16} decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15} integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
@ -748,7 +750,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
alternative-1 ::= bar alternative-1 ::= bar
array ::= "[" space ( value ("," space value)* )? "]" space array ::= "[" space ( value ("," space value)* )? "]" space
bar ::= "{" space (bar-b-kv bar-b-rest | bar-additional-kv ( "," space bar-additional-kv )* )? "}" space bar ::= "{" space (bar-b-kv bar-b-rest | bar-additional-kv ( "," space bar-additional-kv )* )? "}" space
bar-additional-k ::= ["] ( [^"b] ) char* ["] space bar-additional-k ::= ["] ( [b] char+ | [^"b] char* )? ["] space
bar-additional-kv ::= bar-additional-k ":" space bar-additional-value bar-additional-kv ::= bar-additional-k ":" space bar-additional-value
bar-additional-value ::= object bar-additional-value ::= object
bar-b-kv ::= "\"b\"" space ":" space number bar-b-kv ::= "\"b\"" space ":" space number
@ -759,7 +761,7 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
foo ::= "{" space (foo-a-kv foo-a-rest | foo-additional-kv ( "," space foo-additional-kv )* )? "}" space foo ::= "{" space (foo-a-kv foo-a-rest | foo-additional-kv ( "," space foo-additional-kv )* )? "}" space
foo-a-kv ::= "\"a\"" space ":" space number foo-a-kv ::= "\"a\"" space ":" space number
foo-a-rest ::= ( "," space foo-additional-kv )* foo-a-rest ::= ( "," space foo-additional-kv )*
foo-additional-k ::= ["] ( [^"a] ) char* ["] space foo-additional-k ::= ["] ( [a] char+ | [^"a] char* )? ["] space
foo-additional-kv ::= foo-additional-k ":" space foo-additional-value foo-additional-kv ::= foo-additional-k ":" space foo-additional-value
foo-additional-value ::= object foo-additional-value ::= object
integral-part ::= [0] | [1-9] [0-9]{0,15} integral-part ::= [0] | [1-9] [0-9]{0,15}