json
: fix additionalProperties, allow space after enum/const (#7840)
* json: default additionalProperty to true * json: don't force additional props after normal properties! * json: allow space after enum/const * json: update pydantic example to set additionalProperties: false * json: prevent additional props to redefine a typed prop * port not_strings to python, add trailing space * fix not_strings & port to js+py * Update json-schema-to-grammar.cpp * fix _not_strings for substring overlaps * json: fix additionalProperties default, uncomment tests * json: add integ. test case for additionalProperties * json: nit: simplify condition * reformat grammar integ tests w/ R"""()""" strings where there's escapes * update # tokens in server test: consts can now have trailing space
This commit is contained in:
parent
163d50adaf
commit
6777c544bd
7 changed files with 497 additions and 245 deletions
|
@ -3,7 +3,7 @@
|
|||
#! pip install pydantic
|
||||
#! python json-schema-pydantic-example.py
|
||||
|
||||
from pydantic import BaseModel, TypeAdapter
|
||||
from pydantic import BaseModel, Extra, TypeAdapter
|
||||
from annotated_types import MinLen
|
||||
from typing import Annotated, List, Optional
|
||||
import json, requests
|
||||
|
@ -50,12 +50,16 @@ else:
|
|||
if __name__ == '__main__':
|
||||
|
||||
class QAPair(BaseModel):
|
||||
class Config:
|
||||
extra = 'forbid' # triggers additionalProperties: false in the JSON schema
|
||||
question: str
|
||||
concise_answer: str
|
||||
justification: str
|
||||
stars: Annotated[int, Field(ge=1, le=5)]
|
||||
|
||||
class PyramidalSummary(BaseModel):
|
||||
class Config:
|
||||
extra = 'forbid' # triggers additionalProperties: false in the JSON schema
|
||||
title: str
|
||||
summary: str
|
||||
question_answers: Annotated[List[QAPair], MinLen(2)]
|
||||
|
|
|
@ -4,8 +4,7 @@ import itertools
|
|||
import json
|
||||
import re
|
||||
import sys
|
||||
from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
|
||||
|
||||
from typing import Any, List, Optional, Set, Tuple, Union
|
||||
|
||||
def _build_repetition(item_rule, min_items, max_items, separator_rule=None):
|
||||
|
||||
|
@ -276,6 +275,51 @@ class SchemaConverter:
|
|||
|
||||
return ''.join(('(', *recurse(0), ')'))
|
||||
|
||||
def _not_strings(self, strings):
|
||||
class TrieNode:
|
||||
def __init__(self):
|
||||
self.children = {}
|
||||
self.is_end_of_string = False
|
||||
|
||||
def insert(self, string):
|
||||
node = self
|
||||
for c in string:
|
||||
node = node.children.setdefault(c, TrieNode())
|
||||
node.is_end_of_string = True
|
||||
|
||||
trie = TrieNode()
|
||||
for s in strings:
|
||||
trie.insert(s)
|
||||
|
||||
char_rule = self._add_primitive('char', PRIMITIVE_RULES['char'])
|
||||
out = ['["] ( ']
|
||||
|
||||
def visit(node):
|
||||
rejects = []
|
||||
first = True
|
||||
for c in sorted(node.children.keys()):
|
||||
child = node.children[c]
|
||||
rejects.append(c)
|
||||
if first:
|
||||
first = False
|
||||
else:
|
||||
out.append(' | ')
|
||||
out.append(f'[{c}]')
|
||||
if child.children:
|
||||
out.append(f' (')
|
||||
visit(child)
|
||||
out.append(')')
|
||||
elif child.is_end_of_string:
|
||||
out.append(f' {char_rule}+')
|
||||
if node.children:
|
||||
if not first:
|
||||
out.append(' | ')
|
||||
out.append(f'[^"{"".join(rejects)}] {char_rule}*')
|
||||
visit(trie)
|
||||
|
||||
out.append(f' ){"" if trie.is_end_of_string else "?"} ["] space')
|
||||
return ''.join(out)
|
||||
|
||||
def _add_rule(self, name, rule):
|
||||
esc_name = INVALID_RULE_CHARS_RE.sub('-', name)
|
||||
if esc_name not in self._rules or self._rules[esc_name] == rule:
|
||||
|
@ -524,10 +568,10 @@ class SchemaConverter:
|
|||
return self._add_rule(rule_name, self._generate_union_rule(name, [{'type': t} for t in schema_type]))
|
||||
|
||||
elif 'const' in schema:
|
||||
return self._add_rule(rule_name, self._generate_constant_rule(schema['const']))
|
||||
return self._add_rule(rule_name, self._generate_constant_rule(schema['const']) + ' space')
|
||||
|
||||
elif 'enum' in schema:
|
||||
rule = ' | '.join((self._generate_constant_rule(v) for v in schema['enum']))
|
||||
rule = '(' + ' | '.join((self._generate_constant_rule(v) for v in schema['enum'])) + ') space'
|
||||
return self._add_rule(rule_name, rule)
|
||||
|
||||
elif schema_type in (None, 'object') and \
|
||||
|
@ -632,7 +676,7 @@ class SchemaConverter:
|
|||
self._add_primitive(dep, dep_rule)
|
||||
return n
|
||||
|
||||
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]):
|
||||
def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Optional[Union[bool, Any]]):
|
||||
prop_order = self._prop_order
|
||||
# sort by position in prop_order (if specified) then by original order
|
||||
sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))]
|
||||
|
@ -647,12 +691,16 @@ class SchemaConverter:
|
|||
required_props = [k for k in sorted_props if k in required]
|
||||
optional_props = [k for k in sorted_props if k not in required]
|
||||
|
||||
if additional_properties == True or isinstance(additional_properties, dict):
|
||||
if additional_properties != False:
|
||||
sub_name = f'{name}{"-" if name else ""}additional'
|
||||
value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value')
|
||||
value_rule = self.visit(additional_properties, f'{sub_name}-value') if isinstance(additional_properties, dict) else \
|
||||
self._add_primitive('value', PRIMITIVE_RULES['value'])
|
||||
key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not sorted_props \
|
||||
else self._add_rule(f'{sub_name}-k', self._not_strings(sorted_props))
|
||||
|
||||
prop_kv_rule_names["*"] = self._add_rule(
|
||||
f'{sub_name}-kv',
|
||||
self._add_primitive('string', PRIMITIVE_RULES['string']) + f' ":" space {value_rule}'
|
||||
f'{key_rule} ":" space {value_rule}'
|
||||
)
|
||||
optional_props.append("*")
|
||||
|
||||
|
@ -667,15 +715,11 @@ class SchemaConverter:
|
|||
def get_recursive_refs(ks, first_is_optional):
|
||||
[k, *rest] = ks
|
||||
kv_rule_name = prop_kv_rule_names[k]
|
||||
if k == '*':
|
||||
res = self._add_rule(
|
||||
f'{name}{"-" if name else ""}additional-kvs',
|
||||
f'{kv_rule_name} ( "," space ' + kv_rule_name + ' )*'
|
||||
)
|
||||
elif first_is_optional:
|
||||
res = f'( "," space {kv_rule_name} )?'
|
||||
comma_ref = f'( "," space {kv_rule_name} )'
|
||||
if first_is_optional:
|
||||
res = comma_ref + ('*' if k == '*' else '?')
|
||||
else:
|
||||
res = kv_rule_name
|
||||
res = kv_rule_name + (' ' + comma_ref + "*" if k == '*' else '')
|
||||
if len(rest) > 0:
|
||||
res += ' ' + self._add_rule(
|
||||
f'{name}{"-" if name else ""}{k}-rest',
|
||||
|
|
|
@ -532,6 +532,64 @@ export class SchemaConverter {
|
|||
return this._addRule(name, "\"\\\"\" " + toRule(transform()) + " \"\\\"\" space")
|
||||
}
|
||||
|
||||
_notStrings(strings) {
|
||||
class TrieNode {
|
||||
constructor() {
|
||||
this.children = {};
|
||||
this.isEndOfString = false;
|
||||
}
|
||||
|
||||
insert(str) {
|
||||
let node = this;
|
||||
for (const c of str) {
|
||||
node = node.children[c] = node.children[c] || new TrieNode();
|
||||
}
|
||||
node.isEndOfString = true;
|
||||
}
|
||||
}
|
||||
|
||||
const trie = new TrieNode();
|
||||
for (const s of strings) {
|
||||
trie.insert(s);
|
||||
}
|
||||
|
||||
const charRuleName = this._addPrimitive('char', PRIMITIVE_RULES['char']);
|
||||
const out = ['["] ( '];
|
||||
|
||||
const visit = (node) => {
|
||||
const rejects = [];
|
||||
let first = true;
|
||||
for (const c of Object.keys(node.children).sort()) {
|
||||
const child = node.children[c];
|
||||
rejects.push(c);
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
out.push(' | ');
|
||||
}
|
||||
out.push(`[${c}]`);
|
||||
if (Object.keys(child.children).length > 0) {
|
||||
out.push(' (');
|
||||
visit(child);
|
||||
out.push(')');
|
||||
} else if (child.isEndOfString) {
|
||||
out.push(` ${charRuleName}+`);
|
||||
}
|
||||
}
|
||||
if (Object.keys(node.children).length > 0) {
|
||||
if (!first) {
|
||||
out.push(' | ');
|
||||
}
|
||||
out.push(`[^"${rejects.join('')}] ${charRuleName}*`);
|
||||
}
|
||||
};
|
||||
|
||||
visit(trie);
|
||||
|
||||
out.push(` )${trie.isEndOfString ? '' : '?'} ["] space`);
|
||||
return out.join('');
|
||||
}
|
||||
|
||||
_resolveRef(ref) {
|
||||
let refName = ref.split('/').pop();
|
||||
if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
|
||||
|
@ -560,9 +618,9 @@ export class SchemaConverter {
|
|||
} else if (Array.isArray(schemaType)) {
|
||||
return this._addRule(ruleName, this._generateUnionRule(name, schemaType.map(t => ({ type: t }))));
|
||||
} else if ('const' in schema) {
|
||||
return this._addRule(ruleName, this._generateConstantRule(schema.const));
|
||||
return this._addRule(ruleName, this._generateConstantRule(schema.const) + ' space');
|
||||
} else if ('enum' in schema) {
|
||||
const rule = schema.enum.map(v => this._generateConstantRule(v)).join(' | ');
|
||||
const rule = '(' + schema.enum.map(v => this._generateConstantRule(v)).join(' | ') + ') space';
|
||||
return this._addRule(ruleName, rule);
|
||||
} else if ((schemaType === undefined || schemaType === 'object') &&
|
||||
('properties' in schema ||
|
||||
|
@ -599,7 +657,7 @@ export class SchemaConverter {
|
|||
}
|
||||
}
|
||||
|
||||
return this._addRule(ruleName, this._buildObjectRule(properties, required, name, /* additionalProperties= */ false));
|
||||
return this._addRule(ruleName, this._buildObjectRule(properties, required, name, null));
|
||||
} else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) {
|
||||
const items = schema.items ?? schema.prefixItems;
|
||||
if (Array.isArray(items)) {
|
||||
|
@ -693,12 +751,19 @@ export class SchemaConverter {
|
|||
const requiredProps = sortedProps.filter(k => required.has(k));
|
||||
const optionalProps = sortedProps.filter(k => !required.has(k));
|
||||
|
||||
if (typeof additionalProperties === 'object' || additionalProperties === true) {
|
||||
if (additionalProperties !== false) {
|
||||
const subName = `${name ?? ''}${name ? '-' : ''}additional`;
|
||||
const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`);
|
||||
const valueRule =
|
||||
additionalProperties != null && typeof additionalProperties === 'object' ? this.visit(additionalProperties, `${subName}-value`)
|
||||
: this._addPrimitive('value', PRIMITIVE_RULES['value']);
|
||||
|
||||
const key_rule =
|
||||
sortedProps.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string'])
|
||||
: this._addRule(`${subName}-k`, this._notStrings(sortedProps));
|
||||
|
||||
propKvRuleNames['*'] = this._addRule(
|
||||
`${subName}-kv`,
|
||||
`${this._addPrimitive('string', PRIMITIVE_RULES['string'])} ":" space ${valueRule}`);
|
||||
`${key_rule} ":" space ${valueRule}`);
|
||||
optionalProps.push('*');
|
||||
}
|
||||
|
||||
|
@ -715,15 +780,11 @@ export class SchemaConverter {
|
|||
const [k, ...rest] = ks;
|
||||
const kvRuleName = propKvRuleNames[k];
|
||||
let res;
|
||||
if (k === '*') {
|
||||
res = this._addRule(
|
||||
`${name ?? ''}${name ? '-' : ''}additional-kvs`,
|
||||
`${kvRuleName} ( "," space ` + kvRuleName + ` )*`
|
||||
)
|
||||
} else if (firstIsOptional) {
|
||||
res = `( "," space ${kvRuleName} )?`;
|
||||
const commaRef = `( "," space ${kvRuleName} )`;
|
||||
if (firstIsOptional) {
|
||||
res = commaRef + (k === '*' ? '*' : '?');
|
||||
} else {
|
||||
res = kvRuleName;
|
||||
res = kvRuleName + (k === '*' ? ' ' + commaRef + '*' : '');
|
||||
}
|
||||
if (rest.length > 0) {
|
||||
res += ' ' + this._addRule(
|
||||
|
|
|
@ -82,7 +82,7 @@ Feature: llama.cpp server
|
|||
|
||||
Examples: Prompts
|
||||
| response_format | n_predicted | re_content |
|
||||
| {"type": "json_object", "schema": {"const": "42"}} | 5 | "42" |
|
||||
| {"type": "json_object", "schema": {"const": "42"}} | 6 | "42" |
|
||||
| {"type": "json_object", "schema": {"items": [{"type": "integer"}]}} | 10 | \[ -300 \] |
|
||||
| {"type": "json_object"} | 10 | \{ " Jacky. |
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue