Update json-schema-to-grammar.cpp

This commit is contained in:
ochafik 2024-03-11 23:09:51 +00:00
parent cb364ef542
commit d934adccea

View file

@ -88,30 +88,42 @@ static string repeat(const string& str, size_t n) {
return result; return result;
} }
static string _format_literal(const string& literal) { static std::string replacePattern(const std::string& input, const regex& regex, const function<string(const smatch &)>& replacement) {
string escaped = json(literal).dump(); std::smatch match;
smatch match; std::string result;
while (regex_search(escaped, match, GRAMMAR_LITERAL_ESCAPE_RE)) {
char c = match.str()[0]; std::string::const_iterator searchStart(input.cbegin());
escaped.replace(match.position(), 1, GRAMMAR_LITERAL_ESCAPES.at(c)); std::string::const_iterator searchEnd(input.cend());
while (std::regex_search(searchStart, searchEnd, match, regex)) {
result.append(searchStart, searchStart + match.position());
result.append(replacement(match));
searchStart = match.suffix().first;
} }
result.append(searchStart, searchEnd);
return result;
}
static string _format_literal(const string& literal) {
string escaped = replacePattern(json(literal).dump(), GRAMMAR_LITERAL_ESCAPE_RE, [&](const auto& match) {
char c = match.str()[0];
return GRAMMAR_LITERAL_ESCAPES.at(c);
});
return "\"" + escaped + "\""; return "\"" + escaped + "\"";
} }
static string _format_range_char(const string& literal) { static string _format_range_char(const string& ch) {
string escaped = literal.substr(1, literal.length() - 2); return replacePattern(ch, GRAMMAR_RANGE_LITERAL_ESCAPE_RE, [&](const auto& match) {
smatch match;
while (regex_search(escaped, match, GRAMMAR_RANGE_LITERAL_ESCAPE_RE)) {
char c = match.str()[0]; char c = match.str()[0];
escaped.replace(match.position(), 1, GRAMMAR_LITERAL_ESCAPES.at(c)); return GRAMMAR_LITERAL_ESCAPES.at(c);
} });
return escaped;
} }
class SchemaConverter { class SchemaConverter {
private: private:
unordered_map<string, int> _prop_order;
std::optional<std::function<json(const string&)>> _fetch_json; std::optional<std::function<json(const string&)>> _fetch_json;
bool _dotall; bool _dotall;
unordered_map<string, string> _rules; unordered_map<string, string> _rules;
@ -348,21 +360,8 @@ private:
} }
string _build_object_rule(const vector<pair<string, json>>& properties, const unordered_set<string>& required, const string& name) { string _build_object_rule(const vector<pair<string, json>>& properties, const unordered_set<string>& required, const string& name) {
unordered_map<string, int> prop_order = _prop_order; vector<string> required_props;
vector<string> sorted_props; vector<string> optional_props;
for (size_t i = 0; i < properties.size(); i++) {
const auto& prop = properties[i];
int order = (prop_order.find(prop.first) != prop_order.end()) ? prop_order[prop.first] : (int)prop_order.size();
sorted_props.push_back(prop.first);
for (size_t j = sorted_props.size() - 1; j > 0; j--) {
if (order < ((prop_order.find(sorted_props[j - 1]) != prop_order.end()) ? prop_order[sorted_props[j - 1]] : (int)prop_order.size())) {
swap(sorted_props[j], sorted_props[j - 1]);
} else {
break;
}
}
}
unordered_map<string, string> prop_kv_rule_names; unordered_map<string, string> prop_kv_rule_names;
for (const auto& [prop_name, prop_schema] : properties) { for (const auto& [prop_name, prop_schema] : properties) {
string prop_rule_name = visit(prop_schema, name + (name.empty() ? "" : "-") + prop_name); string prop_rule_name = visit(prop_schema, name + (name.empty() ? "" : "-") + prop_name);
@ -370,15 +369,10 @@ private:
name + (name.empty() ? "" : "-") + prop_name + "-kv", name + (name.empty() ? "" : "-") + prop_name + "-kv",
_format_literal(prop_name) + " space \":\" space " + prop_rule_name _format_literal(prop_name) + " space \":\" space " + prop_rule_name
); );
} if (required.find(prop_name) != required.end()) {
required_props.push_back(prop_name);
vector<string> required_props;
vector<string> optional_props;
for (const auto& prop : sorted_props) {
if (required.find(prop) != required.end()) {
required_props.push_back(prop);
} else { } else {
optional_props.push_back(prop); optional_props.push_back(prop_name);
} }
} }
@ -438,10 +432,9 @@ private:
public: public:
SchemaConverter( SchemaConverter(
const unordered_map<std::string, int>& prop_order,
const std::optional<std::function<json(const string&)>>& fetch_json, const std::optional<std::function<json(const string&)>>& fetch_json,
bool dotall) bool dotall)
: _prop_order(prop_order), _fetch_json(fetch_json), _dotall(dotall) : _fetch_json(fetch_json), _dotall(dotall)
{ {
_rules["space"] = SPACE_RULE; _rules["space"] = SPACE_RULE;
} }
@ -668,11 +661,10 @@ public:
string json_schema_to_grammar(const json& schema) { string json_schema_to_grammar(const json& schema) {
unordered_map<string, int> prop_order;
auto dotall = false; auto dotall = false;
string url("input"); string url("input");
SchemaConverter converter(prop_order, /* fetch_json= */ std::nullopt, dotall); SchemaConverter converter(/* fetch_json= */ std::nullopt, dotall);
auto copy = schema; auto copy = schema;
copy = converter.resolve_refs(copy, url); copy = converter.resolve_refs(copy, url);
converter.visit(copy, ""); converter.visit(copy, "");