File Explorer

/proc/thread-self/root/proc/self/root/proc/12/task/20/root/lib64/python3.9
This explorer reads the filesystem of the server it runs on, so /workspace/user isn't present here. Browsing and the terminal still work against this server's own disk from /.
30 dirs
174 files
sre_parse.py39.8 KB · 1077 lines
1#2# Secret Labs' Regular Expression Engine3#4# convert re-style regular expression to sre pattern5#6# Copyright (c) 1998-2001 by Secret Labs AB.  All rights reserved.7#8# See the sre.py file for information on usage and redistribution.9#10 11"""Internal support module for sre"""12 13# XXX: show string offset and offending character for all errors14 15from sre_constants import *16 17SPECIAL_CHARS = ".\\[{()*+?^$|"18REPEAT_CHARS = "*+?{"19 20DIGITS = frozenset("0123456789")21 22OCTDIGITS = frozenset("01234567")23HEXDIGITS = frozenset("0123456789abcdefABCDEF")24ASCIILETTERS = frozenset("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")25 26WHITESPACE = frozenset(" \t\n\r\v\f")27 28_REPEATCODES = frozenset({MIN_REPEAT, MAX_REPEAT})29_UNITCODES = frozenset({ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY})30 31ESCAPES = {32    r"\a": (LITERAL, ord("\a")),33    r"\b": (LITERAL, ord("\b")),34    r"\f": (LITERAL, ord("\f")),35    r"\n": (LITERAL, ord("\n")),36    r"\r": (LITERAL, ord("\r")),37    r"\t": (LITERAL, ord("\t")),38    r"\v": (LITERAL, ord("\v")),39    r"\\": (LITERAL, ord("\\"))40}41 42CATEGORIES = {43    r"\A": (AT, AT_BEGINNING_STRING), # start of string44    r"\b": (AT, AT_BOUNDARY),45    r"\B": (AT, AT_NON_BOUNDARY),46    r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),47    r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),48    r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),49    r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),50    r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),51    r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),52    r"\Z": (AT, AT_END_STRING), # end of string53}54 55FLAGS = {56    # standard flags57    "i": SRE_FLAG_IGNORECASE,58    "L": SRE_FLAG_LOCALE,59    "m": SRE_FLAG_MULTILINE,60    "s": SRE_FLAG_DOTALL,61    "x": SRE_FLAG_VERBOSE,62    # extensions63    "a": SRE_FLAG_ASCII,64    "t": SRE_FLAG_TEMPLATE,65    "u": SRE_FLAG_UNICODE,66}67 68TYPE_FLAGS = SRE_FLAG_ASCII | SRE_FLAG_LOCALE | SRE_FLAG_UNICODE69GLOBAL_FLAGS = SRE_FLAG_DEBUG | SRE_FLAG_TEMPLATE70 71class Verbose(Exception):72    pass73 74class State:75    # keeps track of state for parsing76    def __init__(self):77        self.flags = 078        self.groupdict = {}79        self.groupwidths = [None]  # group 080        self.lookbehindgroups = None81        self.grouprefpos = {}82    @property83    def groups(self):84        return len(self.groupwidths)85    def opengroup(self, name=None):86        gid = self.groups87        self.groupwidths.append(None)88        if self.groups > MAXGROUPS:89            raise error("too many groups")90        if name is not None:91            ogid = self.groupdict.get(name, None)92            if ogid is not None:93                raise error("redefinition of group name %r as group %d; "94                            "was group %d" % (name, gid,  ogid))95            self.groupdict[name] = gid96        return gid97    def closegroup(self, gid, p):98        self.groupwidths[gid] = p.getwidth()99    def checkgroup(self, gid):100        return gid < self.groups and self.groupwidths[gid] is not None101 102    def checklookbehindgroup(self, gid, source):103        if self.lookbehindgroups is not None:104            if not self.checkgroup(gid):105                raise source.error('cannot refer to an open group')106            if gid >= self.lookbehindgroups:107                raise source.error('cannot refer to group defined in the same '108                                   'lookbehind subpattern')109 110class SubPattern:111    # a subpattern, in intermediate form112    def __init__(self, state, data=None):113        self.state = state114        if data is None:115            data = []116        self.data = data117        self.width = None118 119    def dump(self, level=0):120        nl = True121        seqtypes = (tuple, list)122        for op, av in self.data:123            print(level*"  " + str(op), end='')124            if op is IN:125                # member sublanguage126                print()127                for op, a in av:128                    print((level+1)*"  " + str(op), a)129            elif op is BRANCH:130                print()131                for i, a in enumerate(av[1]):132                    if i:133                        print(level*"  " + "OR")134                    a.dump(level+1)135            elif op is GROUPREF_EXISTS:136                condgroup, item_yes, item_no = av137                print('', condgroup)138                item_yes.dump(level+1)139                if item_no:140                    print(level*"  " + "ELSE")141                    item_no.dump(level+1)142            elif isinstance(av, seqtypes):143                nl = False144                for a in av:145                    if isinstance(a, SubPattern):146                        if not nl:147                            print()148                        a.dump(level+1)149                        nl = True150                    else:151                        if not nl:152                            print(' ', end='')153                        print(a, end='')154                        nl = False155                if not nl:156                    print()157            else:158                print('', av)159    def __repr__(self):160        return repr(self.data)161    def __len__(self):162        return len(self.data)163    def __delitem__(self, index):164        del self.data[index]165    def __getitem__(self, index):166        if isinstance(index, slice):167            return SubPattern(self.state, self.data[index])168        return self.data[index]169    def __setitem__(self, index, code):170        self.data[index] = code171    def insert(self, index, code):172        self.data.insert(index, code)173    def append(self, code):174        self.data.append(code)175    def getwidth(self):176        # determine the width (min, max) for this subpattern177        if self.width is not None:178            return self.width179        lo = hi = 0180        for op, av in self.data:181            if op is BRANCH:182                i = MAXREPEAT - 1183                j = 0184                for av in av[1]:185                    l, h = av.getwidth()186                    i = min(i, l)187                    j = max(j, h)188                lo = lo + i189                hi = hi + j190            elif op is CALL:191                i, j = av.getwidth()192                lo = lo + i193                hi = hi + j194            elif op is SUBPATTERN:195                i, j = av[-1].getwidth()196                lo = lo + i197                hi = hi + j198            elif op in _REPEATCODES:199                i, j = av[2].getwidth()200                lo = lo + i * av[0]201                hi = hi + j * av[1]202            elif op in _UNITCODES:203                lo = lo + 1204                hi = hi + 1205            elif op is GROUPREF:206                i, j = self.state.groupwidths[av]207                lo = lo + i208                hi = hi + j209            elif op is GROUPREF_EXISTS:210                i, j = av[1].getwidth()211                if av[2] is not None:212                    l, h = av[2].getwidth()213                    i = min(i, l)214                    j = max(j, h)215                else:216                    i = 0217                lo = lo + i218                hi = hi + j219            elif op is SUCCESS:220                break221        self.width = min(lo, MAXREPEAT - 1), min(hi, MAXREPEAT)222        return self.width223 224class Tokenizer:225    def __init__(self, string):226        self.istext = isinstance(string, str)227        self.string = string228        if not self.istext:229            string = str(string, 'latin1')230        self.decoded_string = string231        self.index = 0232        self.next = None233        self.__next()234    def __next(self):235        index = self.index236        try:237            char = self.decoded_string[index]238        except IndexError:239            self.next = None240            return241        if char == "\\":242            index += 1243            try:244                char += self.decoded_string[index]245            except IndexError:246                raise error("bad escape (end of pattern)",247                            self.string, len(self.string) - 1) from None248        self.index = index + 1249        self.next = char250    def match(self, char):251        if char == self.next:252            self.__next()253            return True254        return False255    def get(self):256        this = self.next257        self.__next()258        return this259    def getwhile(self, n, charset):260        result = ''261        for _ in range(n):262            c = self.next263            if c not in charset:264                break265            result += c266            self.__next()267        return result268    def getuntil(self, terminator, name):269        result = ''270        while True:271            c = self.next272            self.__next()273            if c is None:274                if not result:275                    raise self.error("missing " + name)276                raise self.error("missing %s, unterminated name" % terminator,277                                 len(result))278            if c == terminator:279                if not result:280                    raise self.error("missing " + name, 1)281                break282            result += c283        return result284    @property285    def pos(self):286        return self.index - len(self.next or '')287    def tell(self):288        return self.index - len(self.next or '')289    def seek(self, index):290        self.index = index291        self.__next()292 293    def error(self, msg, offset=0):294        return error(msg, self.string, self.tell() - offset)295 296def _class_escape(source, escape):297    # handle escape code inside character class298    code = ESCAPES.get(escape)299    if code:300        return code301    code = CATEGORIES.get(escape)302    if code and code[0] is IN:303        return code304    try:305        c = escape[1:2]306        if c == "x":307            # hexadecimal escape (exactly two digits)308            escape += source.getwhile(2, HEXDIGITS)309            if len(escape) != 4:310                raise source.error("incomplete escape %s" % escape, len(escape))311            return LITERAL, int(escape[2:], 16)312        elif c == "u" and source.istext:313            # unicode escape (exactly four digits)314            escape += source.getwhile(4, HEXDIGITS)315            if len(escape) != 6:316                raise source.error("incomplete escape %s" % escape, len(escape))317            return LITERAL, int(escape[2:], 16)318        elif c == "U" and source.istext:319            # unicode escape (exactly eight digits)320            escape += source.getwhile(8, HEXDIGITS)321            if len(escape) != 10:322                raise source.error("incomplete escape %s" % escape, len(escape))323            c = int(escape[2:], 16)324            chr(c) # raise ValueError for invalid code325            return LITERAL, c326        elif c == "N" and source.istext:327            import unicodedata328            # named unicode escape e.g. \N{EM DASH}329            if not source.match('{'):330                raise source.error("missing {")331            charname = source.getuntil('}', 'character name')332            try:333                c = ord(unicodedata.lookup(charname))334            except (KeyError, TypeError):335                raise source.error("undefined character name %r" % charname,336                                   len(charname) + len(r'\N{}'))337            return LITERAL, c338        elif c in OCTDIGITS:339            # octal escape (up to three digits)340            escape += source.getwhile(2, OCTDIGITS)341            c = int(escape[1:], 8)342            if c > 0o377:343                raise source.error('octal escape value %s outside of '344                                   'range 0-0o377' % escape, len(escape))345            return LITERAL, c346        elif c in DIGITS:347            raise ValueError348        if len(escape) == 2:349            if c in ASCIILETTERS:350                raise source.error('bad escape %s' % escape, len(escape))351            return LITERAL, ord(escape[1])352    except ValueError:353        pass354    raise source.error("bad escape %s" % escape, len(escape))355 356def _escape(source, escape, state):357    # handle escape code in expression358    code = CATEGORIES.get(escape)359    if code:360        return code361    code = ESCAPES.get(escape)362    if code:363        return code364    try:365        c = escape[1:2]366        if c == "x":367            # hexadecimal escape368            escape += source.getwhile(2, HEXDIGITS)369            if len(escape) != 4:370                raise source.error("incomplete escape %s" % escape, len(escape))371            return LITERAL, int(escape[2:], 16)372        elif c == "u" and source.istext:373            # unicode escape (exactly four digits)374            escape += source.getwhile(4, HEXDIGITS)375            if len(escape) != 6:376                raise source.error("incomplete escape %s" % escape, len(escape))377            return LITERAL, int(escape[2:], 16)378        elif c == "U" and source.istext:379            # unicode escape (exactly eight digits)380            escape += source.getwhile(8, HEXDIGITS)381            if len(escape) != 10:382                raise source.error("incomplete escape %s" % escape, len(escape))383            c = int(escape[2:], 16)384            chr(c) # raise ValueError for invalid code385            return LITERAL, c386        elif c == "N" and source.istext:387            import unicodedata388            # named unicode escape e.g. \N{EM DASH}389            if not source.match('{'):390                raise source.error("missing {")391            charname = source.getuntil('}', 'character name')392            try:393                c = ord(unicodedata.lookup(charname))394            except (KeyError, TypeError):395                raise source.error("undefined character name %r" % charname,396                                   len(charname) + len(r'\N{}'))397            return LITERAL, c398        elif c == "0":399            # octal escape400            escape += source.getwhile(2, OCTDIGITS)401            return LITERAL, int(escape[1:], 8)402        elif c in DIGITS:403            # octal escape *or* decimal group reference (sigh)404            if source.next in DIGITS:405                escape += source.get()406                if (escape[1] in OCTDIGITS and escape[2] in OCTDIGITS and407                    source.next in OCTDIGITS):408                    # got three octal digits; this is an octal escape409                    escape += source.get()410                    c = int(escape[1:], 8)411                    if c > 0o377:412                        raise source.error('octal escape value %s outside of '413                                           'range 0-0o377' % escape,414                                           len(escape))415                    return LITERAL, c416            # not an octal escape, so this is a group reference417            group = int(escape[1:])418            if group < state.groups:419                if not state.checkgroup(group):420                    raise source.error("cannot refer to an open group",421                                       len(escape))422                state.checklookbehindgroup(group, source)423                return GROUPREF, group424            raise source.error("invalid group reference %d" % group, len(escape) - 1)425        if len(escape) == 2:426            if c in ASCIILETTERS:427                raise source.error("bad escape %s" % escape, len(escape))428            return LITERAL, ord(escape[1])429    except ValueError:430        pass431    raise source.error("bad escape %s" % escape, len(escape))432 433def _uniq(items):434    return list(dict.fromkeys(items))435 436def _parse_sub(source, state, verbose, nested):437    # parse an alternation: a|b|c438 439    items = []440    itemsappend = items.append441    sourcematch = source.match442    start = source.tell()443    while True:444        itemsappend(_parse(source, state, verbose, nested + 1,445                           not nested and not items))446        if not sourcematch("|"):447            break448 449    if len(items) == 1:450        return items[0]451 452    subpattern = SubPattern(state)453 454    # check if all items share a common prefix455    while True:456        prefix = None457        for item in items:458            if not item:459                break460            if prefix is None:461                prefix = item[0]462            elif item[0] != prefix:463                break464        else:465            # all subitems start with a common "prefix".466            # move it out of the branch467            for item in items:468                del item[0]469            subpattern.append(prefix)470            continue # check next one471        break472 473    # check if the branch can be replaced by a character set474    set = []475    for item in items:476        if len(item) != 1:477            break478        op, av = item[0]479        if op is LITERAL:480            set.append((op, av))481        elif op is IN and av[0][0] is not NEGATE:482            set.extend(av)483        else:484            break485    else:486        # we can store this as a character set instead of a487        # branch (the compiler may optimize this even more)488        subpattern.append((IN, _uniq(set)))489        return subpattern490 491    subpattern.append((BRANCH, (None, items)))492    return subpattern493 494def _parse(source, state, verbose, nested, first=False):495    # parse a simple pattern496    subpattern = SubPattern(state)497 498    # precompute constants into local variables499    subpatternappend = subpattern.append500    sourceget = source.get501    sourcematch = source.match502    _len = len503    _ord = ord504 505    while True:506 507        this = source.next508        if this is None:509            break # end of pattern510        if this in "|)":511            break # end of subpattern512        sourceget()513 514        if verbose:515            # skip whitespace and comments516            if this in WHITESPACE:517                continue518            if this == "#":519                while True:520                    this = sourceget()521                    if this is None or this == "\n":522                        break523                continue524 525        if this[0] == "\\":526            code = _escape(source, this, state)527            subpatternappend(code)528 529        elif this not in SPECIAL_CHARS:530            subpatternappend((LITERAL, _ord(this)))531 532        elif this == "[":533            here = source.tell() - 1534            # character set535            set = []536            setappend = set.append537##          if sourcematch(":"):538##              pass # handle character classes539            if source.next == '[':540                import warnings541                warnings.warn(542                    'Possible nested set at position %d' % source.tell(),543                    FutureWarning, stacklevel=nested + 6544                )545            negate = sourcematch("^")546            # check remaining characters547            while True:548                this = sourceget()549                if this is None:550                    raise source.error("unterminated character set",551                                       source.tell() - here)552                if this == "]" and set:553                    break554                elif this[0] == "\\":555                    code1 = _class_escape(source, this)556                else:557                    if set and this in '-&~|' and source.next == this:558                        import warnings559                        warnings.warn(560                            'Possible set %s at position %d' % (561                                'difference' if this == '-' else562                                'intersection' if this == '&' else563                                'symmetric difference' if this == '~' else564                                'union',565                                source.tell() - 1),566                            FutureWarning, stacklevel=nested + 6567                        )568                    code1 = LITERAL, _ord(this)569                if sourcematch("-"):570                    # potential range571                    that = sourceget()572                    if that is None:573                        raise source.error("unterminated character set",574                                           source.tell() - here)575                    if that == "]":576                        if code1[0] is IN:577                            code1 = code1[1][0]578                        setappend(code1)579                        setappend((LITERAL, _ord("-")))580                        break581                    if that[0] == "\\":582                        code2 = _class_escape(source, that)583                    else:584                        if that == '-':585                            import warnings586                            warnings.warn(587                                'Possible set difference at position %d' % (588                                    source.tell() - 2),589                                FutureWarning, stacklevel=nested + 6590                            )591                        code2 = LITERAL, _ord(that)592                    if code1[0] != LITERAL or code2[0] != LITERAL:593                        msg = "bad character range %s-%s" % (this, that)594                        raise source.error(msg, len(this) + 1 + len(that))595                    lo = code1[1]596                    hi = code2[1]597                    if hi < lo:598                        msg = "bad character range %s-%s" % (this, that)599                        raise source.error(msg, len(this) + 1 + len(that))600                    setappend((RANGE, (lo, hi)))601                else:602                    if code1[0] is IN:603                        code1 = code1[1][0]604                    setappend(code1)605 606            set = _uniq(set)607            # XXX: <fl> should move set optimization to compiler!608            if _len(set) == 1 and set[0][0] is LITERAL:609                # optimization610                if negate:611                    subpatternappend((NOT_LITERAL, set[0][1]))612                else:613                    subpatternappend(set[0])614            else:615                if negate:616                    set.insert(0, (NEGATE, None))617                # charmap optimization can't be added here because618                # global flags still are not known619                subpatternappend((IN, set))620 621        elif this in REPEAT_CHARS:622            # repeat previous item623            here = source.tell()624            if this == "?":625                min, max = 0, 1626            elif this == "*":627                min, max = 0, MAXREPEAT628 629            elif this == "+":630                min, max = 1, MAXREPEAT631            elif this == "{":632                if source.next == "}":633                    subpatternappend((LITERAL, _ord(this)))634                    continue635 636                min, max = 0, MAXREPEAT637                lo = hi = ""638                while source.next in DIGITS:639                    lo += sourceget()640                if sourcematch(","):641                    while source.next in DIGITS:642                        hi += sourceget()643                else:644                    hi = lo645                if not sourcematch("}"):646                    subpatternappend((LITERAL, _ord(this)))647                    source.seek(here)648                    continue649 650                if lo:651                    min = int(lo)652                    if min >= MAXREPEAT:653                        raise OverflowError("the repetition number is too large")654                if hi:655                    max = int(hi)656                    if max >= MAXREPEAT:657                        raise OverflowError("the repetition number is too large")658                    if max < min:659                        raise source.error("min repeat greater than max repeat",660                                           source.tell() - here)661            else:662                raise AssertionError("unsupported quantifier %r" % (char,))663            # figure out which item to repeat664            if subpattern:665                item = subpattern[-1:]666            else:667                item = None668            if not item or item[0][0] is AT:669                raise source.error("nothing to repeat",670                                   source.tell() - here + len(this))671            if item[0][0] in _REPEATCODES:672                raise source.error("multiple repeat",673                                   source.tell() - here + len(this))674            if item[0][0] is SUBPATTERN:675                group, add_flags, del_flags, p = item[0][1]676                if group is None and not add_flags and not del_flags:677                    item = p678            if sourcematch("?"):679                subpattern[-1] = (MIN_REPEAT, (min, max, item))680            else:681                subpattern[-1] = (MAX_REPEAT, (min, max, item))682 683        elif this == ".":684            subpatternappend((ANY, None))685 686        elif this == "(":687            start = source.tell() - 1688            group = True689            name = None690            add_flags = 0691            del_flags = 0692            if sourcematch("?"):693                # options694                char = sourceget()695                if char is None:696                    raise source.error("unexpected end of pattern")697                if char == "P":698                    # python extensions699                    if sourcematch("<"):700                        # named group: skip forward to end of name701                        name = source.getuntil(">", "group name")702                        if not name.isidentifier():703                            msg = "bad character in group name %r" % name704                            raise source.error(msg, len(name) + 1)705                    elif sourcematch("="):706                        # named backreference707                        name = source.getuntil(")", "group name")708                        if not name.isidentifier():709                            msg = "bad character in group name %r" % name710                            raise source.error(msg, len(name) + 1)711                        gid = state.groupdict.get(name)712                        if gid is None:713                            msg = "unknown group name %r" % name714                            raise source.error(msg, len(name) + 1)715                        if not state.checkgroup(gid):716                            raise source.error("cannot refer to an open group",717                                               len(name) + 1)718                        state.checklookbehindgroup(gid, source)719                        subpatternappend((GROUPREF, gid))720                        continue721 722                    else:723                        char = sourceget()724                        if char is None:725                            raise source.error("unexpected end of pattern")726                        raise source.error("unknown extension ?P" + char,727                                           len(char) + 2)728                elif char == ":":729                    # non-capturing group730                    group = None731                elif char == "#":732                    # comment733                    while True:734                        if source.next is None:735                            raise source.error("missing ), unterminated comment",736                                               source.tell() - start)737                        if sourceget() == ")":738                            break739                    continue740 741                elif char in "=!<":742                    # lookahead assertions743                    dir = 1744                    if char == "<":745                        char = sourceget()746                        if char is None:747                            raise source.error("unexpected end of pattern")748                        if char not in "=!":749                            raise source.error("unknown extension ?<" + char,750                                               len(char) + 2)751                        dir = -1 # lookbehind752                        lookbehindgroups = state.lookbehindgroups753                        if lookbehindgroups is None:754                            state.lookbehindgroups = state.groups755                    p = _parse_sub(source, state, verbose, nested + 1)756                    if dir < 0:757                        if lookbehindgroups is None:758                            state.lookbehindgroups = None759                    if not sourcematch(")"):760                        raise source.error("missing ), unterminated subpattern",761                                           source.tell() - start)762                    if char == "=":763                        subpatternappend((ASSERT, (dir, p)))764                    else:765                        subpatternappend((ASSERT_NOT, (dir, p)))766                    continue767 768                elif char == "(":769                    # conditional backreference group770                    condname = source.getuntil(")", "group name")771                    if condname.isidentifier():772                        condgroup = state.groupdict.get(condname)773                        if condgroup is None:774                            msg = "unknown group name %r" % condname775                            raise source.error(msg, len(condname) + 1)776                    else:777                        try:778                            condgroup = int(condname)779                            if condgroup < 0:780                                raise ValueError781                        except ValueError:782                            msg = "bad character in group name %r" % condname783                            raise source.error(msg, len(condname) + 1) from None784                        if not condgroup:785                            raise source.error("bad group number",786                                               len(condname) + 1)787                        if condgroup >= MAXGROUPS:788                            msg = "invalid group reference %d" % condgroup789                            raise source.error(msg, len(condname) + 1)790                        if condgroup not in state.grouprefpos:791                            state.grouprefpos[condgroup] = (792                                source.tell() - len(condname) - 1793                            )794                    state.checklookbehindgroup(condgroup, source)795                    item_yes = _parse(source, state, verbose, nested + 1)796                    if source.match("|"):797                        item_no = _parse(source, state, verbose, nested + 1)798                        if source.next == "|":799                            raise source.error("conditional backref with more than two branches")800                    else:801                        item_no = None802                    if not source.match(")"):803                        raise source.error("missing ), unterminated subpattern",804                                           source.tell() - start)805                    subpatternappend((GROUPREF_EXISTS, (condgroup, item_yes, item_no)))806                    continue807 808                elif char in FLAGS or char == "-":809                    # flags810                    flags = _parse_flags(source, state, char)811                    if flags is None:  # global flags812                        if not first or subpattern:813                            import warnings814                            warnings.warn(815                                'Flags not at the start of the expression %r%s'816                                ' but at position %d' % (817                                    source.string[:20],  # truncate long regexes818                                    ' (truncated)' if len(source.string) > 20 else '',819                                    start,820                                ),821                                DeprecationWarning, stacklevel=nested + 6822                            )823                        if (state.flags & SRE_FLAG_VERBOSE) and not verbose:824                            raise Verbose825                        continue826 827                    add_flags, del_flags = flags828                    group = None829                else:830                    raise source.error("unknown extension ?" + char,831                                       len(char) + 1)832 833            # parse group contents834            if group is not None:835                try:836                    group = state.opengroup(name)837                except error as err:838                    raise source.error(err.msg, len(name) + 1) from None839            sub_verbose = ((verbose or (add_flags & SRE_FLAG_VERBOSE)) and840                           not (del_flags & SRE_FLAG_VERBOSE))841            p = _parse_sub(source, state, sub_verbose, nested + 1)842            if not source.match(")"):843                raise source.error("missing ), unterminated subpattern",844                                   source.tell() - start)845            if group is not None:846                state.closegroup(group, p)847            subpatternappend((SUBPATTERN, (group, add_flags, del_flags, p)))848 849        elif this == "^":850            subpatternappend((AT, AT_BEGINNING))851 852        elif this == "$":853            subpatternappend((AT, AT_END))854 855        else:856            raise AssertionError("unsupported special character %r" % (char,))857 858    # unpack non-capturing groups859    for i in range(len(subpattern))[::-1]:860        op, av = subpattern[i]861        if op is SUBPATTERN:862            group, add_flags, del_flags, p = av863            if group is None and not add_flags and not del_flags:864                subpattern[i: i+1] = p865 866    return subpattern867 868def _parse_flags(source, state, char):869    sourceget = source.get870    add_flags = 0871    del_flags = 0872    if char != "-":873        while True:874            flag = FLAGS[char]875            if source.istext:876                if char == 'L':877                    msg = "bad inline flags: cannot use 'L' flag with a str pattern"878                    raise source.error(msg)879            else:880                if char == 'u':881                    msg = "bad inline flags: cannot use 'u' flag with a bytes pattern"882                    raise source.error(msg)883            add_flags |= flag884            if (flag & TYPE_FLAGS) and (add_flags & TYPE_FLAGS) != flag:885                msg = "bad inline flags: flags 'a', 'u' and 'L' are incompatible"886                raise source.error(msg)887            char = sourceget()888            if char is None:889                raise source.error("missing -, : or )")890            if char in ")-:":891                break892            if char not in FLAGS:893                msg = "unknown flag" if char.isalpha() else "missing -, : or )"894                raise source.error(msg, len(char))895    if char == ")":896        state.flags |= add_flags897        return None898    if add_flags & GLOBAL_FLAGS:899        raise source.error("bad inline flags: cannot turn on global flag", 1)900    if char == "-":901        char = sourceget()902        if char is None:903            raise source.error("missing flag")904        if char not in FLAGS:905            msg = "unknown flag" if char.isalpha() else "missing flag"906            raise source.error(msg, len(char))907        while True:908            flag = FLAGS[char]909            if flag & TYPE_FLAGS:910                msg = "bad inline flags: cannot turn off flags 'a', 'u' and 'L'"911                raise source.error(msg)912            del_flags |= flag913            char = sourceget()914            if char is None:915                raise source.error("missing :")916            if char == ":":917                break918            if char not in FLAGS:919                msg = "unknown flag" if char.isalpha() else "missing :"920                raise source.error(msg, len(char))921    assert char == ":"922    if del_flags & GLOBAL_FLAGS:923        raise source.error("bad inline flags: cannot turn off global flag", 1)924    if add_flags & del_flags:925        raise source.error("bad inline flags: flag turned on and off", 1)926    return add_flags, del_flags927 928def fix_flags(src, flags):929    # Check and fix flags according to the type of pattern (str or bytes)930    if isinstance(src, str):931        if flags & SRE_FLAG_LOCALE:932            raise ValueError("cannot use LOCALE flag with a str pattern")933        if not flags & SRE_FLAG_ASCII:934            flags |= SRE_FLAG_UNICODE935        elif flags & SRE_FLAG_UNICODE:936            raise ValueError("ASCII and UNICODE flags are incompatible")937    else:938        if flags & SRE_FLAG_UNICODE:939            raise ValueError("cannot use UNICODE flag with a bytes pattern")940        if flags & SRE_FLAG_LOCALE and flags & SRE_FLAG_ASCII:941            raise ValueError("ASCII and LOCALE flags are incompatible")942    return flags943 944def parse(str, flags=0, state=None):945    # parse 're' pattern into list of (opcode, argument) tuples946 947    source = Tokenizer(str)948 949    if state is None:950        state = State()951    state.flags = flags952    state.str = str953 954    try:955        p = _parse_sub(source, state, flags & SRE_FLAG_VERBOSE, 0)956    except Verbose:957        # the VERBOSE flag was switched on inside the pattern.  to be958        # on the safe side, we'll parse the whole thing again...959        state = State()960        state.flags = flags | SRE_FLAG_VERBOSE961        state.str = str962        source.seek(0)963        p = _parse_sub(source, state, True, 0)964 965    p.state.flags = fix_flags(str, p.state.flags)966 967    if source.next is not None:968        assert source.next == ")"969        raise source.error("unbalanced parenthesis")970 971    for g in p.state.grouprefpos:972        if g >= p.state.groups:973            msg = "invalid group reference %d" % g974            raise error(msg, str, p.state.grouprefpos[g])975 976    if flags & SRE_FLAG_DEBUG:977        p.dump()978 979    return p980 981def parse_template(source, state):982    # parse 're' replacement string into list of literals and983    # group references984    s = Tokenizer(source)985    sget = s.get986    groups = []987    literals = []988    literal = []989    lappend = literal.append990    def addgroup(index, pos):991        if index > state.groups:992            raise s.error("invalid group reference %d" % index, pos)993        if literal:994            literals.append(''.join(literal))995            del literal[:]996        groups.append((len(literals), index))997        literals.append(None)998    groupindex = state.groupindex999    while True:1000        this = sget()1001        if this is None:1002            break # end of replacement string1003        if this[0] == "\\":1004            # group1005            c = this[1]1006            if c == "g":1007                name = ""1008                if not s.match("<"):1009                    raise s.error("missing <")1010                name = s.getuntil(">", "group name")1011                if name.isidentifier():1012                    try:1013                        index = groupindex[name]1014                    except KeyError:1015                        raise IndexError("unknown group name %r" % name)1016                else:1017                    try:1018                        index = int(name)1019                        if index < 0:1020                            raise ValueError1021                    except ValueError:1022                        raise s.error("bad character in group name %r" % name,1023                                      len(name) + 1) from None1024                    if index >= MAXGROUPS:1025                        raise s.error("invalid group reference %d" % index,1026                                      len(name) + 1)1027                addgroup(index, len(name) + 1)1028            elif c == "0":1029                if s.next in OCTDIGITS:1030                    this += sget()1031                    if s.next in OCTDIGITS:1032                        this += sget()1033                lappend(chr(int(this[1:], 8) & 0xff))1034            elif c in DIGITS:1035                isoctal = False1036                if s.next in DIGITS:1037                    this += sget()1038                    if (c in OCTDIGITS and this[2] in OCTDIGITS and1039                        s.next in OCTDIGITS):1040                        this += sget()1041                        isoctal = True1042                        c = int(this[1:], 8)1043                        if c > 0o377:1044                            raise s.error('octal escape value %s outside of '1045                                          'range 0-0o377' % this, len(this))1046                        lappend(chr(c))1047                if not isoctal:1048                    addgroup(int(this[1:]), len(this) - 1)1049            else:1050                try:1051                    this = chr(ESCAPES[this][1])1052                except KeyError:1053                    if c in ASCIILETTERS:1054                        raise s.error('bad escape %s' % this, len(this))1055                lappend(this)1056        else:1057            lappend(this)1058    if literal:1059        literals.append(''.join(literal))1060    if not isinstance(source, str):1061        # The tokenizer implicitly decodes bytes objects as latin-1, we must1062        # therefore re-encode the final representation.1063        literals = [None if s is None else s.encode('latin-1') for s in literals]1064    return groups, literals1065 1066def expand_template(template, match):1067    g = match.group1068    empty = match.string[:0]1069    groups, literals = template1070    literals = literals[:]1071    try:1072        for index, group in groups:1073            literals[index] = g(group) or empty1074    except IndexError:1075        raise error("invalid group reference %d" % index)1076    return empty.join(literals)1077