cosmopolitan/third_party/python/Lib/sre_constants.py
2021-09-06 19:24:10 -07:00

311 lines
10 KiB
Python

#
# Secret Labs' Regular Expression Engine
#
# various symbols used by the regular expression engine.
# run this script to update the _sre include files!
#
# Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved.
#
# See the sre.py file for information on usage and redistribution.
#
"""Internal support module for sre"""
# update when constants are added or removed
MAGIC = 20140917
from _sre import MAXREPEAT, MAXGROUPS
# SRE standard exception (access as sre.error)
# should this really be here?
class error(Exception):
"""Exception raised for invalid regular expressions.
Attributes:
msg: The unformatted error message
pattern: The regular expression pattern
pos: The index in the pattern where compilation failed (may be None)
lineno: The line corresponding to pos (may be None)
colno: The column corresponding to pos (may be None)
"""
def __init__(self, msg, pattern=None, pos=None):
self.msg = msg
self.pattern = pattern
self.pos = pos
if pattern is not None and pos is not None:
msg = '%s at position %d' % (msg, pos)
if isinstance(pattern, str):
newline = '\n'
else:
newline = b'\n'
self.lineno = pattern.count(newline, 0, pos) + 1
self.colno = pos - pattern.rfind(newline, 0, pos)
if newline in pattern:
msg = '%s (line %d, column %d)' % (msg, self.lineno, self.colno)
else:
self.lineno = self.colno = None
super().__init__(msg)
class _NamedIntConstant(int):
def __new__(cls, value, name):
self = super(_NamedIntConstant, cls).__new__(cls, value)
self.name = name
return self
def __str__(self):
return self.name
__repr__ = __str__
MAXREPEAT = _NamedIntConstant(MAXREPEAT, 'MAXREPEAT')
FAILURE = _NamedIntConstant(0, 'FAILURE')
SUCCESS = _NamedIntConstant(1, 'SUCCESS')
ANY = _NamedIntConstant(2, 'ANY')
ANY_ALL = _NamedIntConstant(3, 'ANY_ALL')
ASSERT = _NamedIntConstant(4, 'ASSERT')
ASSERT_NOT = _NamedIntConstant(5, 'ASSERT_NOT')
AT = _NamedIntConstant(6, 'AT')
BRANCH = _NamedIntConstant(7, 'BRANCH')
CALL = _NamedIntConstant(8, 'CALL')
CATEGORY = _NamedIntConstant(9, 'CATEGORY')
CHARSET = _NamedIntConstant(10, 'CHARSET')
BIGCHARSET = _NamedIntConstant(11, 'BIGCHARSET')
GROUPREF = _NamedIntConstant(12, 'GROUPREF')
GROUPREF_EXISTS = _NamedIntConstant(13, 'GROUPREF_EXISTS')
GROUPREF_IGNORE = _NamedIntConstant(14, 'GROUPREF_IGNORE')
IN = _NamedIntConstant(15, 'IN')
IN_IGNORE = _NamedIntConstant(16, 'IN_IGNORE')
INFO = _NamedIntConstant(17, 'INFO')
JUMP = _NamedIntConstant(18, 'JUMP')
LITERAL = _NamedIntConstant(19, 'LITERAL')
LITERAL_IGNORE = _NamedIntConstant(20, 'LITERAL_IGNORE')
MARK = _NamedIntConstant(21, 'MARK')
MAX_UNTIL = _NamedIntConstant(22, 'MAX_UNTIL')
MIN_UNTIL = _NamedIntConstant(23, 'MIN_UNTIL')
NOT_LITERAL = _NamedIntConstant(24, 'NOT_LITERAL')
NOT_LITERAL_IGNORE = _NamedIntConstant(25, 'NOT_LITERAL_IGNORE')
NEGATE = _NamedIntConstant(26, 'NEGATE')
RANGE = _NamedIntConstant(27, 'RANGE')
REPEAT = _NamedIntConstant(28, 'REPEAT')
REPEAT_ONE = _NamedIntConstant(29, 'REPEAT_ONE')
SUBPATTERN = _NamedIntConstant(30, 'SUBPATTERN')
MIN_REPEAT_ONE = _NamedIntConstant(31, 'MIN_REPEAT_ONE')
RANGE_IGNORE = _NamedIntConstant(32, 'RANGE_IGNORE')
MIN_REPEAT = _NamedIntConstant(33, 'MIN_REPEAT')
MAX_REPEAT = _NamedIntConstant(34, 'MAX_REPEAT')
OPCODES = [
FAILURE,
SUCCESS,
ANY,
ANY_ALL,
ASSERT,
ASSERT_NOT,
AT,
BRANCH,
CALL,
CATEGORY,
CHARSET,
BIGCHARSET,
GROUPREF,
GROUPREF_EXISTS,
GROUPREF_IGNORE,
IN,
IN_IGNORE,
INFO,
JUMP,
LITERAL,
LITERAL_IGNORE,
MARK,
MAX_UNTIL,
MIN_UNTIL,
NOT_LITERAL,
NOT_LITERAL_IGNORE,
NEGATE,
RANGE,
REPEAT,
REPEAT_ONE,
SUBPATTERN,
MIN_REPEAT_ONE,
RANGE_IGNORE,
]
AT_BEGINNING = _NamedIntConstant( 0, 'AT_BEGINNING')
AT_BEGINNING_LINE = _NamedIntConstant( 1, 'AT_BEGINNING_LINE')
AT_BEGINNING_STRING = _NamedIntConstant( 2, 'AT_BEGINNING_STRING')
AT_BOUNDARY = _NamedIntConstant( 3, 'AT_BOUNDARY')
AT_NON_BOUNDARY = _NamedIntConstant( 4, 'AT_NON_BOUNDARY')
AT_END = _NamedIntConstant( 5, 'AT_END')
AT_END_LINE = _NamedIntConstant( 6, 'AT_END_LINE')
AT_END_STRING = _NamedIntConstant( 7, 'AT_END_STRING')
AT_LOC_BOUNDARY = _NamedIntConstant( 8, 'AT_LOC_BOUNDARY')
AT_LOC_NON_BOUNDARY = _NamedIntConstant( 9, 'AT_LOC_NON_BOUNDARY')
AT_UNI_BOUNDARY = _NamedIntConstant(10, 'AT_UNI_BOUNDARY')
AT_UNI_NON_BOUNDARY = _NamedIntConstant(11, 'AT_UNI_NON_BOUNDARY')
ATCODES = [
AT_BEGINNING,
AT_BEGINNING_LINE,
AT_BEGINNING_STRING,
AT_BOUNDARY,
AT_NON_BOUNDARY,
AT_END,
AT_END_LINE,
AT_END_STRING,
AT_LOC_BOUNDARY,
AT_LOC_NON_BOUNDARY,
AT_UNI_BOUNDARY,
AT_UNI_NON_BOUNDARY,
]
CATEGORY_DIGIT = _NamedIntConstant( 1, 'CATEGORY_DIGIT')
CATEGORY_NOT_DIGIT = _NamedIntConstant( 2, 'CATEGORY_NOT_DIGIT')
CATEGORY_SPACE = _NamedIntConstant( 3, 'CATEGORY_SPACE')
CATEGORY_NOT_SPACE = _NamedIntConstant( 4, 'CATEGORY_NOT_SPACE')
CATEGORY_WORD = _NamedIntConstant( 5, 'CATEGORY_WORD')
CATEGORY_NOT_WORD = _NamedIntConstant( 6, 'CATEGORY_NOT_WORD')
CATEGORY_LINEBREAK = _NamedIntConstant( 7, 'CATEGORY_LINEBREAK')
CATEGORY_NOT_LINEBREAK = _NamedIntConstant( 8, 'CATEGORY_NOT_LINEBREAK')
CATEGORY_LOC_WORD = _NamedIntConstant( 9, 'CATEGORY_LOC_WORD')
CATEGORY_LOC_NOT_WORD = _NamedIntConstant(10, 'CATEGORY_LOC_NOT_WORD')
CATEGORY_UNI_DIGIT = _NamedIntConstant(11, 'CATEGORY_UNI_DIGIT')
CATEGORY_UNI_NOT_DIGIT = _NamedIntConstant(12, 'CATEGORY_UNI_NOT_DIGIT')
CATEGORY_UNI_SPACE = _NamedIntConstant(13, 'CATEGORY_UNI_SPACE')
CATEGORY_UNI_NOT_SPACE = _NamedIntConstant(14, 'CATEGORY_UNI_NOT_SPACE')
CATEGORY_UNI_WORD = _NamedIntConstant(15, 'CATEGORY_UNI_WORD')
CATEGORY_UNI_NOT_WORD = _NamedIntConstant(16, 'CATEGORY_UNI_NOT_WORD')
CATEGORY_UNI_LINEBREAK = _NamedIntConstant(17, 'CATEGORY_UNI_LINEBREAK')
CATEGORY_UNI_NOT_LINEBREAK = _NamedIntConstant(18, 'CATEGORY_UNI_NOT_LINEBREAK')
CHCODES = [
CATEGORY_DIGIT,
CATEGORY_NOT_DIGIT,
CATEGORY_SPACE,
CATEGORY_NOT_SPACE,
CATEGORY_WORD,
CATEGORY_NOT_WORD,
CATEGORY_LINEBREAK,
CATEGORY_NOT_LINEBREAK,
CATEGORY_LOC_WORD,
CATEGORY_LOC_NOT_WORD,
CATEGORY_UNI_DIGIT,
CATEGORY_UNI_NOT_DIGIT,
CATEGORY_UNI_SPACE,
CATEGORY_UNI_NOT_SPACE,
CATEGORY_UNI_WORD,
CATEGORY_UNI_NOT_WORD,
CATEGORY_UNI_LINEBREAK,
CATEGORY_UNI_NOT_LINEBREAK,
]
# replacement operations for "ignore case" mode
OP_IGNORE = {
GROUPREF: GROUPREF_IGNORE,
IN: IN_IGNORE,
LITERAL: LITERAL_IGNORE,
NOT_LITERAL: NOT_LITERAL_IGNORE,
RANGE: RANGE_IGNORE,
}
AT_MULTILINE = {
AT_BEGINNING: AT_BEGINNING_LINE,
AT_END: AT_END_LINE
}
AT_LOCALE = {
AT_BOUNDARY: AT_LOC_BOUNDARY,
AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY
}
AT_UNICODE = {
AT_BOUNDARY: AT_UNI_BOUNDARY,
AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY
}
CH_LOCALE = {
CATEGORY_DIGIT: CATEGORY_DIGIT,
CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT,
CATEGORY_SPACE: CATEGORY_SPACE,
CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE,
CATEGORY_WORD: CATEGORY_LOC_WORD,
CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD,
CATEGORY_LINEBREAK: CATEGORY_LINEBREAK,
CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK
}
CH_UNICODE = {
CATEGORY_DIGIT: CATEGORY_UNI_DIGIT,
CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT,
CATEGORY_SPACE: CATEGORY_UNI_SPACE,
CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE,
CATEGORY_WORD: CATEGORY_UNI_WORD,
CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD,
CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK,
CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK
}
# flags
SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking)
SRE_FLAG_IGNORECASE = 2 # case insensitive
SRE_FLAG_LOCALE = 4 # honour system locale
SRE_FLAG_MULTILINE = 8 # treat target as multiline string
SRE_FLAG_DOTALL = 16 # treat target as a single string
SRE_FLAG_UNICODE = 32 # use unicode "locale"
SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments
SRE_FLAG_DEBUG = 128 # debugging
SRE_FLAG_ASCII = 256 # use ascii "locale"
# flags for INFO primitive
SRE_INFO_PREFIX = 1 # has prefix
SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix)
SRE_INFO_CHARSET = 4 # pattern starts with character from given set
if __name__ == "__main__":
def dump(f, d, prefix):
items = sorted(d)
for item in items:
f.write("#define %s_%s %d\n" % (prefix, item, item))
with open("sre_constants.h", "w") as f:
f.write("""\
/*
* Secret Labs' Regular Expression Engine
*
* regular expression matching engine
*
* NOTE: This file is generated by sre_constants.py. If you need
* to change anything in here, edit sre_constants.py and run it.
*
* Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved.
*
* See the _sre.c file for information on usage and redistribution.
*/
""")
f.write("#define SRE_MAGIC %d\n" % MAGIC)
dump(f, OPCODES, "SRE_OP")
dump(f, ATCODES, "SRE")
dump(f, CHCODES, "SRE")
f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE)
f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE)
f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE)
f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE)
f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL)
f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE)
f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE)
f.write("#define SRE_FLAG_DEBUG %d\n" % SRE_FLAG_DEBUG)
f.write("#define SRE_FLAG_ASCII %d\n" % SRE_FLAG_ASCII)
f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX)
f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL)
f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET)
print("done")