# # Secret Labs' Regular Expression Engine # # various symbols used by the regular expression engine. # run this script to update the _sre include files! # # Copyright (c) 1998-2001 by Secret Labs AB. All rights reserved. # # See the sre.py file for information on usage and redistribution. # """Internal support module for sre""" # update when constants are added or removed MAGIC = 20140917 from _sre import MAXREPEAT, MAXGROUPS # SRE standard exception (access as sre.error) # should this really be here? class error(Exception): """Exception raised for invalid regular expressions. Attributes: msg: The unformatted error message pattern: The regular expression pattern pos: The index in the pattern where compilation failed (may be None) lineno: The line corresponding to pos (may be None) colno: The column corresponding to pos (may be None) """ def __init__(self, msg, pattern=None, pos=None): self.msg = msg self.pattern = pattern self.pos = pos if pattern is not None and pos is not None: msg = '%s at position %d' % (msg, pos) if isinstance(pattern, str): newline = '\n' else: newline = b'\n' self.lineno = pattern.count(newline, 0, pos) + 1 self.colno = pos - pattern.rfind(newline, 0, pos) if newline in pattern: msg = '%s (line %d, column %d)' % (msg, self.lineno, self.colno) else: self.lineno = self.colno = None super().__init__(msg) class _NamedIntConstant(int): def __new__(cls, value, name): self = super(_NamedIntConstant, cls).__new__(cls, value) self.name = name return self def __str__(self): return self.name __repr__ = __str__ MAXREPEAT = _NamedIntConstant(MAXREPEAT, 'MAXREPEAT') FAILURE = _NamedIntConstant(0, 'FAILURE') SUCCESS = _NamedIntConstant(1, 'SUCCESS') ANY = _NamedIntConstant(2, 'ANY') ANY_ALL = _NamedIntConstant(3, 'ANY_ALL') ASSERT = _NamedIntConstant(4, 'ASSERT') ASSERT_NOT = _NamedIntConstant(5, 'ASSERT_NOT') AT = _NamedIntConstant(6, 'AT') BRANCH = _NamedIntConstant(7, 'BRANCH') CALL = _NamedIntConstant(8, 'CALL') CATEGORY = _NamedIntConstant(9, 'CATEGORY') CHARSET = _NamedIntConstant(10, 'CHARSET') BIGCHARSET = _NamedIntConstant(11, 'BIGCHARSET') GROUPREF = _NamedIntConstant(12, 'GROUPREF') GROUPREF_EXISTS = _NamedIntConstant(13, 'GROUPREF_EXISTS') GROUPREF_IGNORE = _NamedIntConstant(14, 'GROUPREF_IGNORE') IN = _NamedIntConstant(15, 'IN') IN_IGNORE = _NamedIntConstant(16, 'IN_IGNORE') INFO = _NamedIntConstant(17, 'INFO') JUMP = _NamedIntConstant(18, 'JUMP') LITERAL = _NamedIntConstant(19, 'LITERAL') LITERAL_IGNORE = _NamedIntConstant(20, 'LITERAL_IGNORE') MARK = _NamedIntConstant(21, 'MARK') MAX_UNTIL = _NamedIntConstant(22, 'MAX_UNTIL') MIN_UNTIL = _NamedIntConstant(23, 'MIN_UNTIL') NOT_LITERAL = _NamedIntConstant(24, 'NOT_LITERAL') NOT_LITERAL_IGNORE = _NamedIntConstant(25, 'NOT_LITERAL_IGNORE') NEGATE = _NamedIntConstant(26, 'NEGATE') RANGE = _NamedIntConstant(27, 'RANGE') REPEAT = _NamedIntConstant(28, 'REPEAT') REPEAT_ONE = _NamedIntConstant(29, 'REPEAT_ONE') SUBPATTERN = _NamedIntConstant(30, 'SUBPATTERN') MIN_REPEAT_ONE = _NamedIntConstant(31, 'MIN_REPEAT_ONE') RANGE_IGNORE = _NamedIntConstant(32, 'RANGE_IGNORE') MIN_REPEAT = _NamedIntConstant(33, 'MIN_REPEAT') MAX_REPEAT = _NamedIntConstant(34, 'MAX_REPEAT') OPCODES = [ FAILURE, SUCCESS, ANY, ANY_ALL, ASSERT, ASSERT_NOT, AT, BRANCH, CALL, CATEGORY, CHARSET, BIGCHARSET, GROUPREF, GROUPREF_EXISTS, GROUPREF_IGNORE, IN, IN_IGNORE, INFO, JUMP, LITERAL, LITERAL_IGNORE, MARK, MAX_UNTIL, MIN_UNTIL, NOT_LITERAL, NOT_LITERAL_IGNORE, NEGATE, RANGE, REPEAT, REPEAT_ONE, SUBPATTERN, MIN_REPEAT_ONE, RANGE_IGNORE, ] AT_BEGINNING = _NamedIntConstant( 0, 'AT_BEGINNING') AT_BEGINNING_LINE = _NamedIntConstant( 1, 'AT_BEGINNING_LINE') AT_BEGINNING_STRING = _NamedIntConstant( 2, 'AT_BEGINNING_STRING') AT_BOUNDARY = _NamedIntConstant( 3, 'AT_BOUNDARY') AT_NON_BOUNDARY = _NamedIntConstant( 4, 'AT_NON_BOUNDARY') AT_END = _NamedIntConstant( 5, 'AT_END') AT_END_LINE = _NamedIntConstant( 6, 'AT_END_LINE') AT_END_STRING = _NamedIntConstant( 7, 'AT_END_STRING') AT_LOC_BOUNDARY = _NamedIntConstant( 8, 'AT_LOC_BOUNDARY') AT_LOC_NON_BOUNDARY = _NamedIntConstant( 9, 'AT_LOC_NON_BOUNDARY') AT_UNI_BOUNDARY = _NamedIntConstant(10, 'AT_UNI_BOUNDARY') AT_UNI_NON_BOUNDARY = _NamedIntConstant(11, 'AT_UNI_NON_BOUNDARY') ATCODES = [ AT_BEGINNING, AT_BEGINNING_LINE, AT_BEGINNING_STRING, AT_BOUNDARY, AT_NON_BOUNDARY, AT_END, AT_END_LINE, AT_END_STRING, AT_LOC_BOUNDARY, AT_LOC_NON_BOUNDARY, AT_UNI_BOUNDARY, AT_UNI_NON_BOUNDARY, ] CATEGORY_DIGIT = _NamedIntConstant( 1, 'CATEGORY_DIGIT') CATEGORY_NOT_DIGIT = _NamedIntConstant( 2, 'CATEGORY_NOT_DIGIT') CATEGORY_SPACE = _NamedIntConstant( 3, 'CATEGORY_SPACE') CATEGORY_NOT_SPACE = _NamedIntConstant( 4, 'CATEGORY_NOT_SPACE') CATEGORY_WORD = _NamedIntConstant( 5, 'CATEGORY_WORD') CATEGORY_NOT_WORD = _NamedIntConstant( 6, 'CATEGORY_NOT_WORD') CATEGORY_LINEBREAK = _NamedIntConstant( 7, 'CATEGORY_LINEBREAK') CATEGORY_NOT_LINEBREAK = _NamedIntConstant( 8, 'CATEGORY_NOT_LINEBREAK') CATEGORY_LOC_WORD = _NamedIntConstant( 9, 'CATEGORY_LOC_WORD') CATEGORY_LOC_NOT_WORD = _NamedIntConstant(10, 'CATEGORY_LOC_NOT_WORD') CATEGORY_UNI_DIGIT = _NamedIntConstant(11, 'CATEGORY_UNI_DIGIT') CATEGORY_UNI_NOT_DIGIT = _NamedIntConstant(12, 'CATEGORY_UNI_NOT_DIGIT') CATEGORY_UNI_SPACE = _NamedIntConstant(13, 'CATEGORY_UNI_SPACE') CATEGORY_UNI_NOT_SPACE = _NamedIntConstant(14, 'CATEGORY_UNI_NOT_SPACE') CATEGORY_UNI_WORD = _NamedIntConstant(15, 'CATEGORY_UNI_WORD') CATEGORY_UNI_NOT_WORD = _NamedIntConstant(16, 'CATEGORY_UNI_NOT_WORD') CATEGORY_UNI_LINEBREAK = _NamedIntConstant(17, 'CATEGORY_UNI_LINEBREAK') CATEGORY_UNI_NOT_LINEBREAK = _NamedIntConstant(18, 'CATEGORY_UNI_NOT_LINEBREAK') CHCODES = [ CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE, CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD, CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_WORD, CATEGORY_LOC_NOT_WORD, CATEGORY_UNI_DIGIT, CATEGORY_UNI_NOT_DIGIT, CATEGORY_UNI_SPACE, CATEGORY_UNI_NOT_SPACE, CATEGORY_UNI_WORD, CATEGORY_UNI_NOT_WORD, CATEGORY_UNI_LINEBREAK, CATEGORY_UNI_NOT_LINEBREAK, ] # replacement operations for "ignore case" mode OP_IGNORE = { GROUPREF: GROUPREF_IGNORE, IN: IN_IGNORE, LITERAL: LITERAL_IGNORE, NOT_LITERAL: NOT_LITERAL_IGNORE, RANGE: RANGE_IGNORE, } AT_MULTILINE = { AT_BEGINNING: AT_BEGINNING_LINE, AT_END: AT_END_LINE } AT_LOCALE = { AT_BOUNDARY: AT_LOC_BOUNDARY, AT_NON_BOUNDARY: AT_LOC_NON_BOUNDARY } AT_UNICODE = { AT_BOUNDARY: AT_UNI_BOUNDARY, AT_NON_BOUNDARY: AT_UNI_NON_BOUNDARY } CH_LOCALE = { CATEGORY_DIGIT: CATEGORY_DIGIT, CATEGORY_NOT_DIGIT: CATEGORY_NOT_DIGIT, CATEGORY_SPACE: CATEGORY_SPACE, CATEGORY_NOT_SPACE: CATEGORY_NOT_SPACE, CATEGORY_WORD: CATEGORY_LOC_WORD, CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD, CATEGORY_LINEBREAK: CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK: CATEGORY_NOT_LINEBREAK } CH_UNICODE = { CATEGORY_DIGIT: CATEGORY_UNI_DIGIT, CATEGORY_NOT_DIGIT: CATEGORY_UNI_NOT_DIGIT, CATEGORY_SPACE: CATEGORY_UNI_SPACE, CATEGORY_NOT_SPACE: CATEGORY_UNI_NOT_SPACE, CATEGORY_WORD: CATEGORY_UNI_WORD, CATEGORY_NOT_WORD: CATEGORY_UNI_NOT_WORD, CATEGORY_LINEBREAK: CATEGORY_UNI_LINEBREAK, CATEGORY_NOT_LINEBREAK: CATEGORY_UNI_NOT_LINEBREAK } # flags SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking) SRE_FLAG_IGNORECASE = 2 # case insensitive SRE_FLAG_LOCALE = 4 # honour system locale SRE_FLAG_MULTILINE = 8 # treat target as multiline string SRE_FLAG_DOTALL = 16 # treat target as a single string SRE_FLAG_UNICODE = 32 # use unicode "locale" SRE_FLAG_VERBOSE = 64 # ignore whitespace and comments SRE_FLAG_DEBUG = 128 # debugging SRE_FLAG_ASCII = 256 # use ascii "locale" # flags for INFO primitive SRE_INFO_PREFIX = 1 # has prefix SRE_INFO_LITERAL = 2 # entire pattern is literal (given by prefix) SRE_INFO_CHARSET = 4 # pattern starts with character from given set if __name__ == "__main__": def dump(f, d, prefix): items = sorted(d) for item in items: f.write("#define %s_%s %d\n" % (prefix, item, item)) with open("sre_constants.h", "w") as f: f.write("""\ /* * Secret Labs' Regular Expression Engine * * regular expression matching engine * * NOTE: This file is generated by sre_constants.py. If you need * to change anything in here, edit sre_constants.py and run it. * * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. * * See the _sre.c file for information on usage and redistribution. */ """) f.write("#define SRE_MAGIC %d\n" % MAGIC) dump(f, OPCODES, "SRE_OP") dump(f, ATCODES, "SRE") dump(f, CHCODES, "SRE") f.write("#define SRE_FLAG_TEMPLATE %d\n" % SRE_FLAG_TEMPLATE) f.write("#define SRE_FLAG_IGNORECASE %d\n" % SRE_FLAG_IGNORECASE) f.write("#define SRE_FLAG_LOCALE %d\n" % SRE_FLAG_LOCALE) f.write("#define SRE_FLAG_MULTILINE %d\n" % SRE_FLAG_MULTILINE) f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL) f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE) f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE) f.write("#define SRE_FLAG_DEBUG %d\n" % SRE_FLAG_DEBUG) f.write("#define SRE_FLAG_ASCII %d\n" % SRE_FLAG_ASCII) f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX) f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL) f.write("#define SRE_INFO_CHARSET %d\n" % SRE_INFO_CHARSET) print("done")