json: indent 4 spaces

This commit is contained in:
Olivier Chafik 2024-03-20 14:47:21 +00:00
parent 7628bd8c76
commit 10ee30f1b8
5 changed files with 868 additions and 869 deletions

View file

@ -177,8 +177,8 @@ private:
string _visit_pattern(const string& pattern, const string& name) {
if (!(pattern.front() == '^' && pattern.back() == '$')) {
_errors.push_back("Pattern must start with '^' and end with '$'");
return "";
_errors.push_back("Pattern must start with '^' and end with '$'");
return "";
}
string sub_pattern = pattern.substr(1, pattern.length() - 2);
unordered_map<string, string> sub_rule_ids;
@ -212,21 +212,21 @@ private:
string literal;
auto flush_literal = [&]() {
if (literal.empty()) {
return false;
}
ret.push_back(make_pair(literal, true));
literal.clear();
return true;
if (literal.empty()) {
return false;
}
ret.push_back(make_pair(literal, true));
literal.clear();
return true;
};
for (const auto& item : seq) {
auto is_literal = item.second;
if (is_literal) {
literal += item.first;
literal += item.first;
} else {
flush_literal();
ret.push_back(item);
flush_literal();
ret.push_back(item);
}
}
flush_literal();
@ -254,7 +254,7 @@ private:
} else if (c == ')') {
i++;
if (start > 0 && sub_pattern[start - 1] != '(') {
_errors.push_back("Unbalanced parentheses");
_errors.push_back("Unbalanced parentheses");
}
return join_seq();
} else if (c == '[') {
@ -270,7 +270,7 @@ private:
}
}
if (i >= length) {
_errors.push_back("Unbalanced square brackets");
_errors.push_back("Unbalanced square brackets");
}
square_brackets += ']';
i++;
@ -289,7 +289,7 @@ private:
i++;
}
if (i >= length) {
_errors.push_back("Unbalanced curly brackets");
_errors.push_back("Unbalanced curly brackets");
}
curly_brackets += '}';
i++;
@ -547,8 +547,8 @@ public:
for (size_t i = 1; i < tokens.size(); ++i) {
string sel = tokens[i];
if (target.is_null() || !target.contains(sel)) {
_errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump());
return;
_errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump());
return;
}
target = target[sel];
}
@ -698,8 +698,8 @@ public:
return _add_rule(rule_name, "object");
} else {
if (!schema_type.is_string() || PRIMITIVE_RULES.find(schema_type.get<string>()) == PRIMITIVE_RULES.end()) {
_errors.push_back("Unrecognized schema: " + schema.dump());
return "";
_errors.push_back("Unrecognized schema: " + schema.dump());
return "";
}
// TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero
return _add_rule(rule_name == "root" ? "root" : schema_type.get<string>(), PRIMITIVE_RULES.at(schema_type.get<string>()));
@ -707,12 +707,12 @@ public:
}
void check_errors() {
if (!_errors.empty()) {
throw std::runtime_error("JSON schema conversion failed:\n" + join(_errors.begin(), _errors.end(), "\n"));
}
if (!_warnings.empty()) {
std::cerr << "WARNING: JSON schema conversion was incomplete: " + join(_warnings.begin(), _warnings.end(), "; ") << std::endl;
}
if (!_errors.empty()) {
throw std::runtime_error("JSON schema conversion failed:\n" + join(_errors.begin(), _errors.end(), "\n"));
}
if (!_warnings.empty()) {
std::cerr << "WARNING: JSON schema conversion was incomplete: " + join(_warnings.begin(), _warnings.end(), "; ") << std::endl;
}
}
string format_grammar() {
@ -725,10 +725,10 @@ public:
};
string json_schema_to_grammar(const json& schema) {
SchemaConverter converter([](const string&) { return json::object(); }, /* dotall= */ false);
auto copy = schema;
converter.resolve_refs(copy, "input");
converter.visit(copy, "");
converter.check_errors();
return converter.format_grammar();
SchemaConverter converter([](const string&) { return json::object(); }, /* dotall= */ false);
auto copy = schema;
converter.resolve_refs(copy, "input");
converter.visit(copy, "");
converter.check_errors();
return converter.format_grammar();
}

View file

@ -4,71 +4,71 @@
#! python json-schema-pydantic-example.py
from pydantic import BaseModel, TypeAdapter
from annotated_types import MaxLen, MinLen
from typing import Annotated, Iterable, List, Optional
import enum, json, requests
from annotated_types import MinLen
from typing import Annotated, List, Optional
import json, requests
if True:
def create_completion(*, response_model=None, endpoint="http://localhost:8080/v1/chat/completions", messages, **kwargs):
'''
Creates a chat completion using an OpenAI-compatible endpoint w/ JSON schema support
(llama.cpp server, llama-cpp-python, Anyscale / Together...)
def create_completion(*, response_model=None, endpoint="http://localhost:8080/v1/chat/completions", messages, **kwargs):
'''
Creates a chat completion using an OpenAI-compatible endpoint w/ JSON schema support
(llama.cpp server, llama-cpp-python, Anyscale / Together...)
The response_model param takes a type (+ supports Pydantic) and behaves just as w/ Instructor (see below)
'''
if response_model:
type_adapter = TypeAdapter(response_model)
schema = type_adapter.json_schema()
messages = [{
"role": "system",
"content": f"You respond in JSON format with the following schema: {json.dumps(schema, indent=2)}"
}] + messages
response_format={"type": "json_object", "schema": schema}
The response_model param takes a type (+ supports Pydantic) and behaves just as w/ Instructor (see below)
'''
if response_model:
type_adapter = TypeAdapter(response_model)
schema = type_adapter.json_schema()
messages = [{
"role": "system",
"content": f"You respond in JSON format with the following schema: {json.dumps(schema, indent=2)}"
}] + messages
response_format={"type": "json_object", "schema": schema}
data = requests.post(endpoint, headers={"Content-Type": "application/json"},
json=dict(messages=messages, response_format=response_format, **kwargs)).json()
if 'error' in data:
raise Exception(data['error']['message'])
data = requests.post(endpoint, headers={"Content-Type": "application/json"},
json=dict(messages=messages, response_format=response_format, **kwargs)).json()
if 'error' in data:
raise Exception(data['error']['message'])
content = data["choices"][0]["message"]["content"]
return type_adapter.validate_json(content) if type_adapter else content
content = data["choices"][0]["message"]["content"]
return type_adapter.validate_json(content) if type_adapter else content
else:
# This alternative branch uses Instructor + OpenAI client lib.
# Instructor support streamed iterable responses, retry & more.
# (see https://python.useinstructor.com/)
#! pip install instructor openai
import instructor, openai
client = instructor.patch(
openai.OpenAI(api_key="123", base_url="http://localhost:8080"),
mode=instructor.Mode.JSON_SCHEMA)
create_completion = client.chat.completions.create
# This alternative branch uses Instructor + OpenAI client lib.
# Instructor support streamed iterable responses, retry & more.
# (see https://python.useinstructor.com/)
#! pip install instructor openai
import instructor, openai
client = instructor.patch(
openai.OpenAI(api_key="123", base_url="http://localhost:8080"),
mode=instructor.Mode.JSON_SCHEMA)
create_completion = client.chat.completions.create
if __name__ == '__main__':
class QAPair(BaseModel):
question: str
concise_answer: str
justification: str
class QAPair(BaseModel):
question: str
concise_answer: str
justification: str
class PyramidalSummary(BaseModel):
title: str
summary: str
question_answers: Annotated[List[QAPair], MinLen(2)]
sub_sections: Optional[Annotated[List['PyramidalSummary'], MinLen(2)]]
class PyramidalSummary(BaseModel):
title: str
summary: str
question_answers: Annotated[List[QAPair], MinLen(2)]
sub_sections: Optional[Annotated[List['PyramidalSummary'], MinLen(2)]]
print("# Summary\n", create_completion(
model="...",
response_model=PyramidalSummary,
messages=[{
"role": "user",
"content": f"""
You are a highly efficient corporate document summarizer.
Create a pyramidal summary of an imaginary internal document about our company processes
(starting high-level, going down to each sub sections).
Keep questions short, and answers even shorter (trivia / quizz style).
"""
}]))
print("# Summary\n", create_completion(
model="...",
response_model=PyramidalSummary,
messages=[{
"role": "user",
"content": f"""
You are a highly efficient corporate document summarizer.
Create a pyramidal summary of an imaginary internal document about our company processes
(starting high-level, going down to each sub sections).
Keep questions short, and answers even shorter (trivia / quizz style).
"""
}]))

View file

@ -4,17 +4,17 @@ assert len(sys.argv) >= 2
[_, pattern, *rest] = sys.argv
print(subprocess.check_output(
[
"python",
os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"json-schema-to-grammar.py"),
*rest,
"-",
"--raw-pattern",
],
text=True,
input=json.dumps({
"type": "string",
"pattern": pattern,
}, indent=2)))
[
"python",
os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"json-schema-to-grammar.py"),
*rest,
"-",
"--raw-pattern",
],
text=True,
input=json.dumps({
"type": "string",
"pattern": pattern,
}, indent=2)))

View file

@ -377,14 +377,14 @@ static json oaicompat_completion_params_parse(
}
if (body.contains("response_format")) {
auto response_format = json_value(body, "response_format", json::object());
if (response_format.contains("type")) {
if (response_format["type"] == "json_object") {
llama_params["json_schema"] = json_value(response_format, "schema", json::object());
} else {
throw std::runtime_error("response_format type not supported: " + response_format["type"].dump());
auto response_format = json_value(body, "response_format", json::object());
if (response_format.contains("type")) {
if (response_format["type"] == "json_object") {
llama_params["json_schema"] = json_value(response_format, "schema", json::object());
} else {
throw std::runtime_error("response_format type not supported: " + response_format["type"].dump());
}
}
}
}
// Handle 'stop' field

File diff suppressed because it is too large Load diff