json: indent 4 spaces
This commit is contained in:
parent
7628bd8c76
commit
10ee30f1b8
5 changed files with 868 additions and 869 deletions
|
@ -177,8 +177,8 @@ private:
|
||||||
|
|
||||||
string _visit_pattern(const string& pattern, const string& name) {
|
string _visit_pattern(const string& pattern, const string& name) {
|
||||||
if (!(pattern.front() == '^' && pattern.back() == '$')) {
|
if (!(pattern.front() == '^' && pattern.back() == '$')) {
|
||||||
_errors.push_back("Pattern must start with '^' and end with '$'");
|
_errors.push_back("Pattern must start with '^' and end with '$'");
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
string sub_pattern = pattern.substr(1, pattern.length() - 2);
|
string sub_pattern = pattern.substr(1, pattern.length() - 2);
|
||||||
unordered_map<string, string> sub_rule_ids;
|
unordered_map<string, string> sub_rule_ids;
|
||||||
|
@ -212,21 +212,21 @@ private:
|
||||||
|
|
||||||
string literal;
|
string literal;
|
||||||
auto flush_literal = [&]() {
|
auto flush_literal = [&]() {
|
||||||
if (literal.empty()) {
|
if (literal.empty()) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
ret.push_back(make_pair(literal, true));
|
ret.push_back(make_pair(literal, true));
|
||||||
literal.clear();
|
literal.clear();
|
||||||
return true;
|
return true;
|
||||||
};
|
};
|
||||||
|
|
||||||
for (const auto& item : seq) {
|
for (const auto& item : seq) {
|
||||||
auto is_literal = item.second;
|
auto is_literal = item.second;
|
||||||
if (is_literal) {
|
if (is_literal) {
|
||||||
literal += item.first;
|
literal += item.first;
|
||||||
} else {
|
} else {
|
||||||
flush_literal();
|
flush_literal();
|
||||||
ret.push_back(item);
|
ret.push_back(item);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
flush_literal();
|
flush_literal();
|
||||||
|
@ -254,7 +254,7 @@ private:
|
||||||
} else if (c == ')') {
|
} else if (c == ')') {
|
||||||
i++;
|
i++;
|
||||||
if (start > 0 && sub_pattern[start - 1] != '(') {
|
if (start > 0 && sub_pattern[start - 1] != '(') {
|
||||||
_errors.push_back("Unbalanced parentheses");
|
_errors.push_back("Unbalanced parentheses");
|
||||||
}
|
}
|
||||||
return join_seq();
|
return join_seq();
|
||||||
} else if (c == '[') {
|
} else if (c == '[') {
|
||||||
|
@ -270,7 +270,7 @@ private:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (i >= length) {
|
if (i >= length) {
|
||||||
_errors.push_back("Unbalanced square brackets");
|
_errors.push_back("Unbalanced square brackets");
|
||||||
}
|
}
|
||||||
square_brackets += ']';
|
square_brackets += ']';
|
||||||
i++;
|
i++;
|
||||||
|
@ -289,7 +289,7 @@ private:
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
if (i >= length) {
|
if (i >= length) {
|
||||||
_errors.push_back("Unbalanced curly brackets");
|
_errors.push_back("Unbalanced curly brackets");
|
||||||
}
|
}
|
||||||
curly_brackets += '}';
|
curly_brackets += '}';
|
||||||
i++;
|
i++;
|
||||||
|
@ -547,8 +547,8 @@ public:
|
||||||
for (size_t i = 1; i < tokens.size(); ++i) {
|
for (size_t i = 1; i < tokens.size(); ++i) {
|
||||||
string sel = tokens[i];
|
string sel = tokens[i];
|
||||||
if (target.is_null() || !target.contains(sel)) {
|
if (target.is_null() || !target.contains(sel)) {
|
||||||
_errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump());
|
_errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
target = target[sel];
|
target = target[sel];
|
||||||
}
|
}
|
||||||
|
@ -698,8 +698,8 @@ public:
|
||||||
return _add_rule(rule_name, "object");
|
return _add_rule(rule_name, "object");
|
||||||
} else {
|
} else {
|
||||||
if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<string>()) == PRIMITIVE_RULES.end()) {
|
if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<string>()) == PRIMITIVE_RULES.end()) {
|
||||||
_errors.push_back("Unrecognized schema: " + schema.dump());
|
_errors.push_back("Unrecognized schema: " + schema.dump());
|
||||||
return "";
|
return "";
|
||||||
}
|
}
|
||||||
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
|
||||||
return _add_rule(rule_name == "root" ? "root" : schema_type.get<string>(), PRIMITIVE_RULES.at(schema_type.get<string>()));
|
return _add_rule(rule_name == "root" ? "root" : schema_type.get<string>(), PRIMITIVE_RULES.at(schema_type.get<string>()));
|
||||||
|
@ -707,12 +707,12 @@ public:
|
||||||
}
|
}
|
||||||
|
|
||||||
void check_errors() {
|
void check_errors() {
|
||||||
if (!_errors.empty()) {
|
if (!_errors.empty()) {
|
||||||
throw std::runtime_error("JSON schema conversion failed:\n" + join(_errors.begin(), _errors.end(), "\n"));
|
throw std::runtime_error("JSON schema conversion failed:\n" + join(_errors.begin(), _errors.end(), "\n"));
|
||||||
}
|
}
|
||||||
if (!_warnings.empty()) {
|
if (!_warnings.empty()) {
|
||||||
std::cerr << "WARNING: JSON schema conversion was incomplete: " + join(_warnings.begin(), _warnings.end(), "; ") << std::endl;
|
std::cerr << "WARNING: JSON schema conversion was incomplete: " + join(_warnings.begin(), _warnings.end(), "; ") << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
string format_grammar() {
|
string format_grammar() {
|
||||||
|
@ -725,10 +725,10 @@ public:
|
||||||
};
|
};
|
||||||
|
|
||||||
string json_schema_to_grammar(const json& schema) {
|
string json_schema_to_grammar(const json& schema) {
|
||||||
SchemaConverter converter([](const string&) { return json::object(); }, /* dotall= */ false);
|
SchemaConverter converter([](const string&) { return json::object(); }, /* dotall= */ false);
|
||||||
auto copy = schema;
|
auto copy = schema;
|
||||||
converter.resolve_refs(copy, "input");
|
converter.resolve_refs(copy, "input");
|
||||||
converter.visit(copy, "");
|
converter.visit(copy, "");
|
||||||
converter.check_errors();
|
converter.check_errors();
|
||||||
return converter.format_grammar();
|
return converter.format_grammar();
|
||||||
}
|
}
|
||||||
|
|
|
@ -4,71 +4,71 @@
|
||||||
#! python json-schema-pydantic-example.py
|
#! python json-schema-pydantic-example.py
|
||||||
|
|
||||||
from pydantic import BaseModel, TypeAdapter
|
from pydantic import BaseModel, TypeAdapter
|
||||||
from annotated_types import MaxLen, MinLen
|
from annotated_types import MinLen
|
||||||
from typing import Annotated, Iterable, List, Optional
|
from typing import Annotated, List, Optional
|
||||||
import enum, json, requests
|
import json, requests
|
||||||
|
|
||||||
if True:
|
if True:
|
||||||
|
|
||||||
def create_completion(*, response_model=None, endpoint="http://localhost:8080/v1/chat/completions", messages, **kwargs):
|
def create_completion(*, response_model=None, endpoint="http://localhost:8080/v1/chat/completions", messages, **kwargs):
|
||||||
'''
|
'''
|
||||||
Creates a chat completion using an OpenAI-compatible endpoint w/ JSON schema support
|
Creates a chat completion using an OpenAI-compatible endpoint w/ JSON schema support
|
||||||
(llama.cpp server, llama-cpp-python, Anyscale / Together...)
|
(llama.cpp server, llama-cpp-python, Anyscale / Together...)
|
||||||
|
|
||||||
The response_model param takes a type (+ supports Pydantic) and behaves just as w/ Instructor (see below)
|
The response_model param takes a type (+ supports Pydantic) and behaves just as w/ Instructor (see below)
|
||||||
'''
|
'''
|
||||||
if response_model:
|
if response_model:
|
||||||
type_adapter = TypeAdapter(response_model)
|
type_adapter = TypeAdapter(response_model)
|
||||||
schema = type_adapter.json_schema()
|
schema = type_adapter.json_schema()
|
||||||
messages = [{
|
messages = [{
|
||||||
"role": "system",
|
"role": "system",
|
||||||
"content": f"You respond in JSON format with the following schema: {json.dumps(schema, indent=2)}"
|
"content": f"You respond in JSON format with the following schema: {json.dumps(schema, indent=2)}"
|
||||||
}] + messages
|
}] + messages
|
||||||
response_format={"type": "json_object", "schema": schema}
|
response_format={"type": "json_object", "schema": schema}
|
||||||
|
|
||||||
data = requests.post(endpoint, headers={"Content-Type": "application/json"},
|
data = requests.post(endpoint, headers={"Content-Type": "application/json"},
|
||||||
json=dict(messages=messages, response_format=response_format, **kwargs)).json()
|
json=dict(messages=messages, response_format=response_format, **kwargs)).json()
|
||||||
if 'error' in data:
|
if 'error' in data:
|
||||||
raise Exception(data['error']['message'])
|
raise Exception(data['error']['message'])
|
||||||
|
|
||||||
content = data["choices"][0]["message"]["content"]
|
content = data["choices"][0]["message"]["content"]
|
||||||
return type_adapter.validate_json(content) if type_adapter else content
|
return type_adapter.validate_json(content) if type_adapter else content
|
||||||
|
|
||||||
else:
|
else:
|
||||||
|
|
||||||
# This alternative branch uses Instructor + OpenAI client lib.
|
# This alternative branch uses Instructor + OpenAI client lib.
|
||||||
# Instructor support streamed iterable responses, retry & more.
|
# Instructor support streamed iterable responses, retry & more.
|
||||||
# (see https://python.useinstructor.com/)
|
# (see https://python.useinstructor.com/)
|
||||||
#! pip install instructor openai
|
#! pip install instructor openai
|
||||||
import instructor, openai
|
import instructor, openai
|
||||||
client = instructor.patch(
|
client = instructor.patch(
|
||||||
openai.OpenAI(api_key="123", base_url="http://localhost:8080"),
|
openai.OpenAI(api_key="123", base_url="http://localhost:8080"),
|
||||||
mode=instructor.Mode.JSON_SCHEMA)
|
mode=instructor.Mode.JSON_SCHEMA)
|
||||||
create_completion = client.chat.completions.create
|
create_completion = client.chat.completions.create
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
class QAPair(BaseModel):
|
class QAPair(BaseModel):
|
||||||
question: str
|
question: str
|
||||||
concise_answer: str
|
concise_answer: str
|
||||||
justification: str
|
justification: str
|
||||||
|
|
||||||
class PyramidalSummary(BaseModel):
|
class PyramidalSummary(BaseModel):
|
||||||
title: str
|
title: str
|
||||||
summary: str
|
summary: str
|
||||||
question_answers: Annotated[List[QAPair], MinLen(2)]
|
question_answers: Annotated[List[QAPair], MinLen(2)]
|
||||||
sub_sections: Optional[Annotated[List['PyramidalSummary'], MinLen(2)]]
|
sub_sections: Optional[Annotated[List['PyramidalSummary'], MinLen(2)]]
|
||||||
|
|
||||||
print("# Summary\n", create_completion(
|
print("# Summary\n", create_completion(
|
||||||
model="...",
|
model="...",
|
||||||
response_model=PyramidalSummary,
|
response_model=PyramidalSummary,
|
||||||
messages=[{
|
messages=[{
|
||||||
"role": "user",
|
"role": "user",
|
||||||
"content": f"""
|
"content": f"""
|
||||||
You are a highly efficient corporate document summarizer.
|
You are a highly efficient corporate document summarizer.
|
||||||
Create a pyramidal summary of an imaginary internal document about our company processes
|
Create a pyramidal summary of an imaginary internal document about our company processes
|
||||||
(starting high-level, going down to each sub sections).
|
(starting high-level, going down to each sub sections).
|
||||||
Keep questions short, and answers even shorter (trivia / quizz style).
|
Keep questions short, and answers even shorter (trivia / quizz style).
|
||||||
"""
|
"""
|
||||||
}]))
|
}]))
|
||||||
|
|
|
@ -4,17 +4,17 @@ assert len(sys.argv) >= 2
|
||||||
[_, pattern, *rest] = sys.argv
|
[_, pattern, *rest] = sys.argv
|
||||||
|
|
||||||
print(subprocess.check_output(
|
print(subprocess.check_output(
|
||||||
[
|
[
|
||||||
"python",
|
"python",
|
||||||
os.path.join(
|
os.path.join(
|
||||||
os.path.dirname(os.path.realpath(__file__)),
|
os.path.dirname(os.path.realpath(__file__)),
|
||||||
"json-schema-to-grammar.py"),
|
"json-schema-to-grammar.py"),
|
||||||
*rest,
|
*rest,
|
||||||
"-",
|
"-",
|
||||||
"--raw-pattern",
|
"--raw-pattern",
|
||||||
],
|
],
|
||||||
text=True,
|
text=True,
|
||||||
input=json.dumps({
|
input=json.dumps({
|
||||||
"type": "string",
|
"type": "string",
|
||||||
"pattern": pattern,
|
"pattern": pattern,
|
||||||
}, indent=2)))
|
}, indent=2)))
|
||||||
|
|
|
@ -377,14 +377,14 @@ static json oaicompat_completion_params_parse(
|
||||||
}
|
}
|
||||||
|
|
||||||
if (body.contains("response_format")) {
|
if (body.contains("response_format")) {
|
||||||
auto response_format = json_value(body, "response_format", json::object());
|
auto response_format = json_value(body, "response_format", json::object());
|
||||||
if (response_format.contains("type")) {
|
if (response_format.contains("type")) {
|
||||||
if (response_format["type"] == "json_object") {
|
if (response_format["type"] == "json_object") {
|
||||||
llama_params["json_schema"] = json_value(response_format, "schema", json::object());
|
llama_params["json_schema"] = json_value(response_format, "schema", json::object());
|
||||||
} else {
|
} else {
|
||||||
throw std::runtime_error("response_format type not supported: " + response_format["type"].dump());
|
throw std::runtime_error("response_format type not supported: " + response_format["type"].dump());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle 'stop' field
|
// Handle 'stop' field
|
||||||
|
|
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue