Update json-schema-to-grammar.cpp
This commit is contained in:
parent
8caaf1641d
commit
8fee84b45c
1 changed files with 72 additions and 43 deletions
|
@ -1,13 +1,14 @@
|
|||
#include "json-schema-to-grammar.h"
|
||||
#include <algorithm>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <regex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <iostream>
|
||||
#include <regex>
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
using json = nlohmann::json;
|
||||
using namespace std;
|
||||
|
@ -21,8 +22,15 @@ unordered_map<string, string> PRIMITIVE_RULES = {
|
|||
{"value", "object | array | string | number | boolean"},
|
||||
{"object", "\"{\" space ( string \":\" space value (\",\" space string \":\" space value)* )? \"}\" space"},
|
||||
{"array", "\"[\" space ( value (\",\" space value)* )? \"]\" space"},
|
||||
{"uuid", "\"\\\"\" \"-\" \"-\" \"-\" \"-\" \"\\\"\" space"},
|
||||
{"string", "\"\\\"\" ([^\"\\\\] | \"\\\\\" ([\"\\\\/bfnrt] | \"u\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]))* \"\\\"\" space"},
|
||||
{"uuid", "\"\\\"\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] "
|
||||
"\"-\" [0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F] \"\\\"\" space"},
|
||||
{"string", " \"\\\"\" (\n"
|
||||
" [^\"\\\\] |\n"
|
||||
" \"\\\\\" ([\"\\\\/bfnrt] | \"u\" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])\n"
|
||||
" )* \"\\\"\" space"},
|
||||
{"null", "\"null\" space"}
|
||||
};
|
||||
|
||||
|
@ -126,7 +134,7 @@ class SchemaConverter {
|
|||
private:
|
||||
std::optional<std::function<json(const string&)>> _fetch_json;
|
||||
bool _dotall;
|
||||
unordered_map<string, string> _rules;
|
||||
map<string, string> _rules;
|
||||
unordered_map<string, nlohmann::json> _refs;
|
||||
unordered_set<string> _refs_being_resolved;
|
||||
vector<string> _errors;
|
||||
|
@ -289,26 +297,34 @@ private:
|
|||
seq.back().first = sub + "+";
|
||||
} else {
|
||||
if (!sub_is_literal) {
|
||||
string sub_id = sub_rule_ids[sub];
|
||||
string& sub_id = sub_rule_ids[sub];
|
||||
if (sub_id.empty()) {
|
||||
sub_id = _add_rule(name + "-" + to_string(sub_rule_ids.size() + 1), sub);
|
||||
sub_rule_ids[sub] = sub_id;
|
||||
sub_id = _add_rule(name + "-" + to_string(sub_rule_ids.size()), sub);
|
||||
}
|
||||
sub = sub_id;
|
||||
}
|
||||
string result;
|
||||
for (int j = 0; j < min_times; j++) {
|
||||
if (sub_is_literal) {
|
||||
result += "\"" + repeat(sub.substr(1, sub.length() - 2), min_times) + "\"";
|
||||
if (sub_is_literal && min_times > 0) {
|
||||
result = "\"" + repeat(sub.substr(1, sub.length() - 2), min_times) + "\"";
|
||||
} else {
|
||||
result += sub + " ";
|
||||
for (int j = 0; j < min_times; j++) {
|
||||
if (j > 0) {
|
||||
result += " ";
|
||||
}
|
||||
result += sub;
|
||||
}
|
||||
}
|
||||
if (min_times > 0 && min_times < max_times) {
|
||||
result += " ";
|
||||
}
|
||||
if (max_times == numeric_limits<int>::max()) {
|
||||
result += sub + "*";
|
||||
} else {
|
||||
for (int j = min_times; j < max_times; j++) {
|
||||
result += sub + "? ";
|
||||
if (j > min_times) {
|
||||
result += " ";
|
||||
}
|
||||
result += sub + "?";
|
||||
}
|
||||
}
|
||||
seq.back().first = result;
|
||||
|
@ -417,12 +433,10 @@ private:
|
|||
}
|
||||
rule += get_recursive_refs(vector<string>(optional_props.begin() + i, optional_props.end()), false);
|
||||
}
|
||||
rule += " ";
|
||||
|
||||
if (!required_props.empty()) {
|
||||
rule += " ) ";
|
||||
rule += " )";
|
||||
}
|
||||
rule += " )? ";
|
||||
rule += " )?";
|
||||
}
|
||||
|
||||
rule += " \"}\" space ";
|
||||
|
@ -595,16 +609,11 @@ public:
|
|||
json max_items_json = schema.contains("maxItems") ? schema["maxItems"] : json();
|
||||
int max_items = max_items_json.is_number_integer() ? max_items_json.get<int>() : -1;
|
||||
if (min_items > 0) {
|
||||
successive_items = list_item_operator;
|
||||
for (int i = 1; i < min_items; i++) {
|
||||
successive_items += list_item_operator;
|
||||
}
|
||||
successive_items += repeat(list_item_operator, min_items - 1);
|
||||
min_items--;
|
||||
}
|
||||
if (max_items >= 0 && max_items > min_items) {
|
||||
for (int i = min_items; i < max_items - 1; i++) {
|
||||
successive_items += (list_item_operator + "?");
|
||||
}
|
||||
successive_items += repeat(list_item_operator + "?", max_items - min_items - 1);
|
||||
} else {
|
||||
successive_items += list_item_operator + "*";
|
||||
}
|
||||
|
@ -618,7 +627,7 @@ public:
|
|||
}
|
||||
} else if ((schema_type.is_null() || schema_type == "string") && schema.contains("pattern")) {
|
||||
return _visit_pattern(schema["pattern"], rule_name);
|
||||
} else if ((schema_type == "object" || schema_type.is_null()) && (schema.size() == 1 || schema.empty())) {
|
||||
} else if (schema.empty() || (schema.size() == 1 && schema_type == "object")) {
|
||||
for (const auto& [t, r] : PRIMITIVE_RULES) {
|
||||
_add_rule(t, r);
|
||||
}
|
||||
|
@ -667,3 +676,23 @@ string json_schema_to_grammar(const json& schema) {
|
|||
converter.check_errors();
|
||||
return converter.format_grammar();
|
||||
}
|
||||
|
||||
#ifdef LLAMA_BUILD_JSON_SCHEMA_CONVERTER
|
||||
|
||||
int main(int argc, const char** argv) {
|
||||
if (argc != 2) {
|
||||
cerr << "Expected only one argument" << endl;
|
||||
return -1;
|
||||
}
|
||||
string file(argv[1]);
|
||||
string schema;
|
||||
if (file == "-") {
|
||||
schema.append(istreambuf_iterator<char>(cin), istreambuf_iterator<char>());
|
||||
} else {
|
||||
ifstream in(argv[1]);
|
||||
schema.append(istreambuf_iterator<char>(in), istreambuf_iterator<char>());
|
||||
}
|
||||
cout << json_schema_to_grammar(json::parse(schema)).c_str() << endl;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue