From 2f1a087c6bb5ec2e56d3785f3f483f3c2647b254 Mon Sep 17 00:00:00 2001 From: ochafik Date: Sat, 22 Jun 2024 20:52:02 +0100 Subject: [PATCH] json: fix additionalProperties default, uncomment tests --- common/json-schema-to-grammar.cpp | 6 ++++-- examples/json_schema_to_grammar.py | 14 ++++++------- .../server/public/json-schema-to-grammar.mjs | 14 ++++++------- tests/test-grammar-integration.cpp | 17 +--------------- tests/test-json-schema-to-grammar.cpp | 20 +++++++++++++------ 5 files changed, 31 insertions(+), 40 deletions(-) diff --git a/common/json-schema-to-grammar.cpp b/common/json-schema-to-grammar.cpp index 275bdb97c..0bc4d2229 100644 --- a/common/json-schema-to-grammar.cpp +++ b/common/json-schema-to-grammar.cpp @@ -401,6 +401,8 @@ private: std::map children; bool is_end_of_string; + TrieNode() : is_end_of_string(false) {} + void insert(const std::string & string) { auto node = this; for (char c : string) { @@ -491,7 +493,7 @@ private: } prop_names.push_back(prop_name); } - if (additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get())) { + if (additional_properties.is_null() || additional_properties.is_object() || (additional_properties.is_boolean() && additional_properties.get())) { std::string sub_name = name + (name.empty() ? "" : "-") + "additional"; std::string value_rule = additional_properties.is_object() ? visit(additional_properties, sub_name + "-value") @@ -695,7 +697,7 @@ public: return _add_rule(rule_name, _build_object_rule( properties, required, name, - schema.contains("additionalProperties") ? schema["additionalProperties"] : json::object())); + schema.contains("additionalProperties") ? schema["additionalProperties"] : json())); } else if ((schema_type.is_null() || schema_type == "object") && schema.contains("allOf")) { std::unordered_set required; std::vector> properties; diff --git a/examples/json_schema_to_grammar.py b/examples/json_schema_to_grammar.py index 2f8f5ae9c..9f0bccfcf 100755 --- a/examples/json_schema_to_grammar.py +++ b/examples/json_schema_to_grammar.py @@ -4,7 +4,7 @@ import itertools import json import re import sys -from typing import Any, Dict, List, Set, Tuple, Union +from typing import Any, Dict, List, Optional, Set, Tuple, Union def _build_repetition(item_rule, min_items, max_items, separator_rule=None): @@ -416,10 +416,7 @@ class SchemaConverter: ('additionalProperties' in schema and schema['additionalProperties'] is not True)): required = set(schema.get('required', [])) properties = list(schema.get('properties', {}).items()) - additional_properties = schema.get('additionalProperties', True) - if additional_properties is None: - additional_properties = True - return self._add_rule(rule_name, self._build_object_rule(properties, required, name, additional_properties)) + return self._add_rule(rule_name, self._build_object_rule(properties, required, name, schema.get('additionalProperties'))) elif schema_type in (None, 'object') and 'allOf' in schema: required = set() @@ -498,7 +495,7 @@ class SchemaConverter: self._add_primitive(dep, dep_rule) return n - def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Union[bool, Any]): + def _build_object_rule(self, properties: List[Tuple[str, Any]], required: Set[str], name: str, additional_properties: Optional[Union[bool, Any]]): prop_order = self._prop_order # sort by position in prop_order (if specified) then by original order sorted_props = [kv[0] for _, kv in sorted(enumerate(properties), key=lambda ikv: (prop_order.get(ikv[1][0], len(prop_order)), ikv[0]))] @@ -513,9 +510,10 @@ class SchemaConverter: required_props = [k for k in sorted_props if k in required] optional_props = [k for k in sorted_props if k not in required] - if additional_properties == True or isinstance(additional_properties, dict): + if additional_properties != False: sub_name = f'{name}{"-" if name else ""}additional' - value_rule = self.visit({} if additional_properties == True else additional_properties, f'{sub_name}-value') + value_rule = self.visit(additional_properties, f'{sub_name}-value') if isinstance(additional_properties, dict) else \ + self._add_primitive('value', PRIMITIVE_RULES['value']) key_rule = self._add_primitive('string', PRIMITIVE_RULES['string']) if not sorted_props \ else self._add_rule(f'{sub_name}-k', self._not_strings(sorted_props)) diff --git a/examples/server/public/json-schema-to-grammar.mjs b/examples/server/public/json-schema-to-grammar.mjs index 3a7b6c86a..9bf32631b 100644 --- a/examples/server/public/json-schema-to-grammar.mjs +++ b/examples/server/public/json-schema-to-grammar.mjs @@ -432,11 +432,7 @@ export class SchemaConverter { ('additionalProperties' in schema && schema.additionalProperties !== true))) { const required = new Set(schema.required || []); const properties = Object.entries(schema.properties ?? {}); - let additionalProperties = schema.additionalProperties; - if (additionalProperties === undefined) { - additionalProperties = true; - } - return this._addRule(ruleName, this._buildObjectRule(properties, required, name, additionalProperties)); + return this._addRule(ruleName, this._buildObjectRule(properties, required, name, schema.additionalProperties)); } else if ((schemaType === undefined || schemaType === 'object') && 'allOf' in schema) { const required = new Set(); const properties = []; @@ -466,7 +462,7 @@ export class SchemaConverter { } } - return this._addRule(ruleName, this._buildObjectRule(properties, required, name, /* additionalProperties= */ false)); + return this._addRule(ruleName, this._buildObjectRule(properties, required, name, null)); } else if ((schemaType === undefined || schemaType === 'array') && ('items' in schema || 'prefixItems' in schema)) { const items = schema.items ?? schema.prefixItems; if (Array.isArray(items)) { @@ -542,9 +538,11 @@ export class SchemaConverter { const requiredProps = sortedProps.filter(k => required.has(k)); const optionalProps = sortedProps.filter(k => !required.has(k)); - if (typeof additionalProperties === 'object' || additionalProperties === true) { + if (additionalProperties !== false) { const subName = `${name ?? ''}${name ? '-' : ''}additional`; - const valueRule = this.visit(additionalProperties === true ? {} : additionalProperties, `${subName}-value`); + const valueRule = + additionalProperties != null && typeof additionalProperties === 'object' ? this.visit(additionalProperties, `${subName}-value`) + : this._addPrimitive('value', PRIMITIVE_RULES['value']); const key_rule = sortedProps.length === 0 ? this._addPrimitive('string', PRIMITIVE_RULES['string']) diff --git a/tests/test-grammar-integration.cpp b/tests/test-grammar-integration.cpp index 96f90c01e..6b2181a6a 100644 --- a/tests/test-grammar-integration.cpp +++ b/tests/test-grammar-integration.cpp @@ -15,8 +15,6 @@ using json = nlohmann::ordered_json; -//#define INCLUDE_FAILING_TESTS 1 - static llama_grammar* build_grammar(const std::string & grammar_str) { auto parsed_grammar = grammar_parser::parse(grammar_str.c_str()); @@ -823,12 +821,8 @@ static void test_json_schema() { // "By extension, even an empty object is valid" R"""({})""", // "By default, providing additional properties is valid" -#ifdef INCLUDE_FAILING_TESTS - // TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""", - // TODO: Spaces should be permitted around enum values, but currently they fail to pass. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", -#endif }, // Failing strings { @@ -861,20 +855,13 @@ static void test_json_schema() { { // "By extension, even an empty object is valid" R"""({})""", -#ifdef INCLUDE_FAILING_TESTS - // TODO: Following line should pass and doesn't R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""", // "By default, leaving out properties is valid" - // TODO: Following line should pass and doesn't R"""({ "street_name": "Pennsylvania" })""", - // TODO: Following line should pass and doesn't R"""({ "number": 1600, "street_name": "Pennsylvania" })""", // "By default, providing additional properties is valid" - // TODO: The following should pass, but currently FAILS. Additional properties should be permitted by default. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""", - // TODO: Spaces should be permitted around enum values, but currently they fail to pass. R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", -#endif }, // Failing strings { @@ -906,10 +893,8 @@ static void test_json_schema() { R"""({ "number": 1600, "street_type":"Avenue"})""", R"""({ "number": 1600, "street_name": "Pennsylvania" })""", R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""", -#ifdef INCLUDE_FAILING_TESTS - // TODO: Spaces should be permitted around enum values, but currently they fail to pass. + // Spaces are permitted around enum values R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""", -#endif }, // Failing strings { diff --git a/tests/test-json-schema-to-grammar.cpp b/tests/test-json-schema-to-grammar.cpp index b5ce8d7c7..6399a5109 100755 --- a/tests/test-json-schema-to-grammar.cpp +++ b/tests/test-json-schema-to-grammar.cpp @@ -827,8 +827,7 @@ static void test_all(const std::string & lang, std::function