json: add date, time, date-time formats

This commit is contained in:
ochafik 2024-03-11 04:03:05 +00:00
parent 9a61802a28
commit e1ed7a04d6
3 changed files with 1542 additions and 1454 deletions

View file

@ -17,6 +17,7 @@ PRIMITIVE_RULES = {
'value' : 'object | array | string | number | boolean', 'value' : 'object | array | string | number | boolean',
'object' : '"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', 'object' : '"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space',
'array' : '"[" space ( value ("," space value)* )? "]" space', 'array' : '"[" space ( value ("," space value)* )? "]" space',
'uuid' : '"\\""' + ' "-" '.join('[0-9a-fA-F]' * n for n in [8, 4, 4, 4, 12]) + '"\\"" space',
'string': r''' "\"" ( 'string': r''' "\"" (
[^"\\] | [^"\\] |
"\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) "\\" (["\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
@ -24,6 +25,16 @@ PRIMITIVE_RULES = {
'null': '"null" space', 'null': '"null" space',
} }
# TODO: support "uri", "email" string formats
DATE_RULES = {
'date' : '[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( [0-2] [0-9] | "3" [0-1] )',
'time' : '([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )',
'date-time': 'date "T" time',
'date-string': '"\\"" date "\\"" space',
'time-string': '"\\"" time "\\"" space',
'date-time-string': '"\\"" date-time "\\"" space',
}
INVALID_RULE_CHARS_RE = re.compile(r'[^a-zA-Z0-9-]+') INVALID_RULE_CHARS_RE = re.compile(r'[^a-zA-Z0-9-]+')
GRAMMAR_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"]') GRAMMAR_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"]')
GRAMMAR_RANGE_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"\]\-\\]') GRAMMAR_RANGE_LITERAL_ESCAPE_RE = re.compile(r'[\r\n"\]\-\\]')
@ -32,6 +43,9 @@ GRAMMAR_LITERAL_ESCAPES = {'\r': '\\r', '\n': '\\n', '"': '\\"', '-': '\\-', ']'
NON_LITERAL_SET = set('|.()[]{}*+?') NON_LITERAL_SET = set('|.()[]{}*+?')
ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('{*+?') ESCAPED_IN_REGEXPS_BUT_NOT_IN_LITERALS = set('{*+?')
DATE_PATTERN = '[0-9]{4}-(0[1-9]|1[0-2])-([0-2][0-9]|3[0-1])'
TIME_PATTERN = '([01][0-9]|2[0-3])(:[0-5][0-9]){2}(\\.[0-9]{1,3})?(Z|[+-](([01][0-9]|2[0-3]):[0-5][0-9]))' # Cap millisecond precision w/ 3 digits
class SchemaConverter: class SchemaConverter:
def __init__(self, *, prop_order, allow_fetch, dotall): def __init__(self, *, prop_order, allow_fetch, dotall):
self._prop_order = prop_order self._prop_order = prop_order
@ -285,6 +299,7 @@ class SchemaConverter:
def visit(self, schema, name): def visit(self, schema, name):
schema_type = schema.get('type') schema_type = schema.get('type')
schema_format = schema.get('format')
rule_name = name or 'root' rule_name = name or 'root'
if (ref := schema.get('$ref')) is not None: if (ref := schema.get('$ref')) is not None:
@ -375,20 +390,23 @@ class SchemaConverter:
elif schema_type in (None, 'string') and 'pattern' in schema: elif schema_type in (None, 'string') and 'pattern' in schema:
return self._visit_pattern(schema['pattern'], rule_name) return self._visit_pattern(schema['pattern'], rule_name)
# TODO: support string formats "uri", "date", "date-time"
elif schema_type in (None, 'string') and re.match(r'^uuid[1-5]?$', schema.get('format', '')):
return self._visit_pattern('^"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}"$', 'uuid')
elif schema_type in (None, 'string') and schema.get('format') == 'date':
# Adapted from full-date at https://www.rfc-editor.org/rfc/rfc3339.txt
return self._visit_pattern('^"[0-9]{4}-(0[1-9]|1[0-2])-([0-2][0-9]|3[0-1])"$', 'date')
elif schema_type == 'object' and len(schema) == 1 or schema_type is None and len(schema) == 0: elif schema_type == 'object' and len(schema) == 1 or schema_type is None and len(schema) == 0:
# This depends on all primitive types # This depends on all primitive types
for t, r in PRIMITIVE_RULES.items(): for t, r in PRIMITIVE_RULES.items():
self._add_rule(t, r) self._add_rule(t, r)
return 'object' return 'object'
elif schema_type in (None, 'string') and re.match(r'^uuid[1-5]?$', schema_format or ''):
return self._add_rule(
'root' if rule_name == 'root' else schema_format,
PRIMITIVE_RULES['uuid']
)
elif schema_type in (None, 'string') and schema_format in DATE_RULES:
for t, r in DATE_RULES.items():
self._add_rule(t, r)
return schema_format + '-string'
else: else:
assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}' assert schema_type in PRIMITIVE_RULES, f'Unrecognized schema: {schema}'
# TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero # TODO: support minimum, maximum, exclusiveMinimum, exclusiveMaximum at least for zero

File diff suppressed because it is too large Load diff

View file

@ -8,6 +8,7 @@ const PRIMITIVE_RULES = {
value: 'object | array | string | number | boolean', value: 'object | array | string | number | boolean',
object: '"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space', object: '"{" space ( string ":" space value ("," space string ":" space value)* )? "}" space',
array: '"[" space ( value ("," space value)* )? "]" space', array: '"[" space ( value ("," space value)* )? "]" space',
uuid: '"\\""' + [8, 4, 4, 4, 12].map(n => [...new Array(n)].map(_ => '[0-9a-fA-F]').join('')).join(' "-" ') + '"\\"" space',
string: ` "\\"" ( string: ` "\\"" (
[^"\\\\] | [^"\\\\] |
"\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F]) "\\\\" (["\\\\/bfnrt] | "u" [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F] [0-9a-fA-F])
@ -15,6 +16,16 @@ const PRIMITIVE_RULES = {
null: '"null" space', null: '"null" space',
}; };
// TODO: support "uri", "email" string formats
const DATE_RULES = {
'date' : '[0-9] [0-9] [0-9] [0-9] "-" ( "0" [1-9] | "1" [0-2] ) "-" ( [0-2] [0-9] | "3" [0-1] )',
'time' : '([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9] [0-9] [0-9] )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )',
'date-time': 'date "T" time',
'date-string': '"\\"" date "\\"" space',
'time-string': '"\\"" time "\\"" space',
'date-time-string': '"\\"" date-time "\\"" space',
};
const INVALID_RULE_CHARS_RE = /[^\dA-Za-z-]+/g; const INVALID_RULE_CHARS_RE = /[^\dA-Za-z-]+/g;
const GRAMMAR_LITERAL_ESCAPE_RE = /[\n\r"]/g; const GRAMMAR_LITERAL_ESCAPE_RE = /[\n\r"]/g;
const GRAMMAR_RANGE_LITERAL_ESCAPE_RE = /[\n\r"\]\-\\]/g; const GRAMMAR_RANGE_LITERAL_ESCAPE_RE = /[\n\r"\]\-\\]/g;
@ -314,6 +325,7 @@ export class SchemaConverter {
visit(schema, name) { visit(schema, name) {
const schemaType = schema.type; const schemaType = schema.type;
const schemaFormat = schema.format;
const ruleName = name || 'root'; const ruleName = name || 'root';
const ref = schema.$ref; const ref = schema.$ref;
@ -400,10 +412,14 @@ export class SchemaConverter {
} else if ((schemaType === undefined || schemaType === 'string') && 'pattern' in schema) { } else if ((schemaType === undefined || schemaType === 'string') && 'pattern' in schema) {
return this._visitPattern(schema.pattern, ruleName); return this._visitPattern(schema.pattern, ruleName);
} else if ((schemaType === undefined || schemaType === 'string') && /^uuid[1-5]?$/.test(schema.format || '')) { } else if ((schemaType === undefined || schemaType === 'string') && /^uuid[1-5]?$/.test(schema.format || '')) {
return this._visitPattern('^"[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}"$', 'uuid'); return this._addRule(
} else if ((schemaType === undefined || schemaType === 'string') && schema.format === 'date') { ruleName === 'root' ? 'root' : schemaFormat,
// Adapted from full-date at https://www.rfc-editor.org/rfc/rfc3339.txt PRIMITIVE_RULES['uuid'])
return this._visitPattern('^"[0-9]{4}-(0[1-9]|1[0-2])-([0-2][0-9]|3[0-1])"$', 'date') } else if ((schemaType === undefined || schemaType === 'string') && schema.format in DATE_RULES) {
for (const [t, r] of Object.entries(DATE_RULES)) {
this._addRule(t, r);
}
return schemaFormat + '-string';
} else if (schemaType === 'object' && Object.keys(schema).length === 1 || schemaType === undefined && Object.keys(schema).length === 0) { } else if (schemaType === 'object' && Object.keys(schema).length === 1 || schemaType === undefined && Object.keys(schema).length === 0) {
// This depends on all primitive types // This depends on all primitive types
for (const [t, r] of Object.entries(PRIMITIVE_RULES)) { for (const [t, r] of Object.entries(PRIMITIVE_RULES)) {