File Explorer

/proc/thread-self/root/proc/self/root/proc/self/task/16/root/lib64/python3.9
This explorer reads the filesystem of the server it runs on, so /workspace/user isn't present here. Browsing and the terminal still work against this server's own disk from /.
30 dirs
174 files
sre_compile.py27.3 KB · 809 lines
1#2# Secret Labs' Regular Expression Engine3#4# convert template to internal format5#6# Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved.7#8# See the sre.py file for information on usage and redistribution.9#10 11"""Internal support module for sre"""12 13import _sre14import sre_parse15from sre_constants import *16 17assert _sre.MAGIC == MAGIC, "SRE module mismatch"18 19_LITERAL_CODES = {LITERAL, NOT_LITERAL}20_REPEATING_CODES = {REPEAT, MIN_REPEAT, MAX_REPEAT}21_SUCCESS_CODES = {SUCCESS, FAILURE}22_ASSERT_CODES = {ASSERT, ASSERT_NOT}23_UNIT_CODES = _LITERAL_CODES | {ANY, IN}24 25# Sets of lowercase characters which have the same uppercase.26_equivalences = (27    # LATIN SMALL LETTER I, LATIN SMALL LETTER DOTLESS I28    (0x69, 0x131), # iı29    # LATIN SMALL LETTER S, LATIN SMALL LETTER LONG S30    (0x73, 0x17f), # sſ31    # MICRO SIGN, GREEK SMALL LETTER MU32    (0xb5, 0x3bc), # µμ33    # COMBINING GREEK YPOGEGRAMMENI, GREEK SMALL LETTER IOTA, GREEK PROSGEGRAMMENI34    (0x345, 0x3b9, 0x1fbe), # \u0345ιι35    # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS, GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA36    (0x390, 0x1fd3), # ΐΐ37    # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS, GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA38    (0x3b0, 0x1fe3), # ΰΰ39    # GREEK SMALL LETTER BETA, GREEK BETA SYMBOL40    (0x3b2, 0x3d0), # βϐ41    # GREEK SMALL LETTER EPSILON, GREEK LUNATE EPSILON SYMBOL42    (0x3b5, 0x3f5), # εϵ43    # GREEK SMALL LETTER THETA, GREEK THETA SYMBOL44    (0x3b8, 0x3d1), # θϑ45    # GREEK SMALL LETTER KAPPA, GREEK KAPPA SYMBOL46    (0x3ba, 0x3f0), # κϰ47    # GREEK SMALL LETTER PI, GREEK PI SYMBOL48    (0x3c0, 0x3d6), # πϖ49    # GREEK SMALL LETTER RHO, GREEK RHO SYMBOL50    (0x3c1, 0x3f1), # ρϱ51    # GREEK SMALL LETTER FINAL SIGMA, GREEK SMALL LETTER SIGMA52    (0x3c2, 0x3c3), # ςσ53    # GREEK SMALL LETTER PHI, GREEK PHI SYMBOL54    (0x3c6, 0x3d5), # φϕ55    # CYRILLIC SMALL LETTER VE, CYRILLIC SMALL LETTER ROUNDED VE56    (0x432, 0x1c80), # вᲀ57    # CYRILLIC SMALL LETTER DE, CYRILLIC SMALL LETTER LONG-LEGGED DE58    (0x434, 0x1c81), # дᲁ59    # CYRILLIC SMALL LETTER O, CYRILLIC SMALL LETTER NARROW O60    (0x43e, 0x1c82), # оᲂ61    # CYRILLIC SMALL LETTER ES, CYRILLIC SMALL LETTER WIDE ES62    (0x441, 0x1c83), # сᲃ63    # CYRILLIC SMALL LETTER TE, CYRILLIC SMALL LETTER TALL TE, CYRILLIC SMALL LETTER THREE-LEGGED TE64    (0x442, 0x1c84, 0x1c85), # тᲄᲅ65    # CYRILLIC SMALL LETTER HARD SIGN, CYRILLIC SMALL LETTER TALL HARD SIGN66    (0x44a, 0x1c86), # ъᲆ67    # CYRILLIC SMALL LETTER YAT, CYRILLIC SMALL LETTER TALL YAT68    (0x463, 0x1c87), # ѣᲇ69    # CYRILLIC SMALL LETTER UNBLENDED UK, CYRILLIC SMALL LETTER MONOGRAPH UK70    (0x1c88, 0xa64b), # ᲈꙋ71    # LATIN SMALL LETTER S WITH DOT ABOVE, LATIN SMALL LETTER LONG S WITH DOT ABOVE72    (0x1e61, 0x1e9b), # ṡẛ73    # LATIN SMALL LIGATURE LONG S T, LATIN SMALL LIGATURE ST74    (0xfb05, 0xfb06), # ﬅﬆ75)76 77# Maps the lowercase code to lowercase codes which have the same uppercase.78_ignorecase_fixes = {i: tuple(j for j in t if i != j)79                     for t in _equivalences for i in t}80 81def _combine_flags(flags, add_flags, del_flags,82                   TYPE_FLAGS=sre_parse.TYPE_FLAGS):83    if add_flags & TYPE_FLAGS:84        flags &= ~TYPE_FLAGS85    return (flags | add_flags) & ~del_flags86 87def _compile(code, pattern, flags):88    # internal: compile a (sub)pattern89    emit = code.append90    _len = len91    LITERAL_CODES = _LITERAL_CODES92    REPEATING_CODES = _REPEATING_CODES93    SUCCESS_CODES = _SUCCESS_CODES94    ASSERT_CODES = _ASSERT_CODES95    iscased = None96    tolower = None97    fixes = None98    if flags & SRE_FLAG_IGNORECASE and not flags & SRE_FLAG_LOCALE:99        if flags & SRE_FLAG_UNICODE:100            iscased = _sre.unicode_iscased101            tolower = _sre.unicode_tolower102            fixes = _ignorecase_fixes103        else:104            iscased = _sre.ascii_iscased105            tolower = _sre.ascii_tolower106    for op, av in pattern:107        if op in LITERAL_CODES:108            if not flags & SRE_FLAG_IGNORECASE:109                emit(op)110                emit(av)111            elif flags & SRE_FLAG_LOCALE:112                emit(OP_LOCALE_IGNORE[op])113                emit(av)114            elif not iscased(av):115                emit(op)116                emit(av)117            else:118                lo = tolower(av)119                if not fixes:  # ascii120                    emit(OP_IGNORE[op])121                    emit(lo)122                elif lo not in fixes:123                    emit(OP_UNICODE_IGNORE[op])124                    emit(lo)125                else:126                    emit(IN_UNI_IGNORE)127                    skip = _len(code); emit(0)128                    if op is NOT_LITERAL:129                        emit(NEGATE)130                    for k in (lo,) + fixes[lo]:131                        emit(LITERAL)132                        emit(k)133                    emit(FAILURE)134                    code[skip] = _len(code) - skip135        elif op is IN:136            charset, hascased = _optimize_charset(av, iscased, tolower, fixes)137            if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE:138                emit(IN_LOC_IGNORE)139            elif not hascased:140                emit(IN)141            elif not fixes:  # ascii142                emit(IN_IGNORE)143            else:144                emit(IN_UNI_IGNORE)145            skip = _len(code); emit(0)146            _compile_charset(charset, flags, code)147            code[skip] = _len(code) - skip148        elif op is ANY:149            if flags & SRE_FLAG_DOTALL:150                emit(ANY_ALL)151            else:152                emit(ANY)153        elif op in REPEATING_CODES:154            if flags & SRE_FLAG_TEMPLATE:155                raise error("internal: unsupported template operator %r" % (op,))156            if _simple(av[2]):157                if op is MAX_REPEAT:158                    emit(REPEAT_ONE)159                else:160                    emit(MIN_REPEAT_ONE)161                skip = _len(code); emit(0)162                emit(av[0])163                emit(av[1])164                _compile(code, av[2], flags)165                emit(SUCCESS)166                code[skip] = _len(code) - skip167            else:168                emit(REPEAT)169                skip = _len(code); emit(0)170                emit(av[0])171                emit(av[1])172                _compile(code, av[2], flags)173                code[skip] = _len(code) - skip174                if op is MAX_REPEAT:175                    emit(MAX_UNTIL)176                else:177                    emit(MIN_UNTIL)178        elif op is SUBPATTERN:179            group, add_flags, del_flags, p = av180            if group:181                emit(MARK)182                emit((group-1)*2)183            # _compile_info(code, p, _combine_flags(flags, add_flags, del_flags))184            _compile(code, p, _combine_flags(flags, add_flags, del_flags))185            if group:186                emit(MARK)187                emit((group-1)*2+1)188        elif op in SUCCESS_CODES:189            emit(op)190        elif op in ASSERT_CODES:191            emit(op)192            skip = _len(code); emit(0)193            if av[0] >= 0:194                emit(0) # look ahead195            else:196                lo, hi = av[1].getwidth()197                if lo != hi:198                    raise error("look-behind requires fixed-width pattern")199                emit(lo) # look behind200            _compile(code, av[1], flags)201            emit(SUCCESS)202            code[skip] = _len(code) - skip203        elif op is CALL:204            emit(op)205            skip = _len(code); emit(0)206            _compile(code, av, flags)207            emit(SUCCESS)208            code[skip] = _len(code) - skip209        elif op is AT:210            emit(op)211            if flags & SRE_FLAG_MULTILINE:212                av = AT_MULTILINE.get(av, av)213            if flags & SRE_FLAG_LOCALE:214                av = AT_LOCALE.get(av, av)215            elif flags & SRE_FLAG_UNICODE:216                av = AT_UNICODE.get(av, av)217            emit(av)218        elif op is BRANCH:219            emit(op)220            tail = []221            tailappend = tail.append222            for av in av[1]:223                skip = _len(code); emit(0)224                # _compile_info(code, av, flags)225                _compile(code, av, flags)226                emit(JUMP)227                tailappend(_len(code)); emit(0)228                code[skip] = _len(code) - skip229            emit(FAILURE) # end of branch230            for tail in tail:231                code[tail] = _len(code) - tail232        elif op is CATEGORY:233            emit(op)234            if flags & SRE_FLAG_LOCALE:235                av = CH_LOCALE[av]236            elif flags & SRE_FLAG_UNICODE:237                av = CH_UNICODE[av]238            emit(av)239        elif op is GROUPREF:240            if not flags & SRE_FLAG_IGNORECASE:241                emit(op)242            elif flags & SRE_FLAG_LOCALE:243                emit(GROUPREF_LOC_IGNORE)244            elif not fixes:  # ascii245                emit(GROUPREF_IGNORE)246            else:247                emit(GROUPREF_UNI_IGNORE)248            emit(av-1)249        elif op is GROUPREF_EXISTS:250            emit(op)251            emit(av[0]-1)252            skipyes = _len(code); emit(0)253            _compile(code, av[1], flags)254            if av[2]:255                emit(JUMP)256                skipno = _len(code); emit(0)257                code[skipyes] = _len(code) - skipyes + 1258                _compile(code, av[2], flags)259                code[skipno] = _len(code) - skipno260            else:261                code[skipyes] = _len(code) - skipyes + 1262        else:263            raise error("internal: unsupported operand type %r" % (op,))264 265def _compile_charset(charset, flags, code):266    # compile charset subprogram267    emit = code.append268    for op, av in charset:269        emit(op)270        if op is NEGATE:271            pass272        elif op is LITERAL:273            emit(av)274        elif op is RANGE or op is RANGE_UNI_IGNORE:275            emit(av[0])276            emit(av[1])277        elif op is CHARSET:278            code.extend(av)279        elif op is BIGCHARSET:280            code.extend(av)281        elif op is CATEGORY:282            if flags & SRE_FLAG_LOCALE:283                emit(CH_LOCALE[av])284            elif flags & SRE_FLAG_UNICODE:285                emit(CH_UNICODE[av])286            else:287                emit(av)288        else:289            raise error("internal: unsupported set operator %r" % (op,))290    emit(FAILURE)291 292def _optimize_charset(charset, iscased=None, fixup=None, fixes=None):293    # internal: optimize character set294    out = []295    tail = []296    charmap = bytearray(256)297    hascased = False298    for op, av in charset:299        while True:300            try:301                if op is LITERAL:302                    if fixup:303                        lo = fixup(av)304                        charmap[lo] = 1305                        if fixes and lo in fixes:306                            for k in fixes[lo]:307                                charmap[k] = 1308                        if not hascased and iscased(av):309                            hascased = True310                    else:311                        charmap[av] = 1312                elif op is RANGE:313                    r = range(av[0], av[1]+1)314                    if fixup:315                        if fixes:316                            for i in map(fixup, r):317                                charmap[i] = 1318                                if i in fixes:319                                    for k in fixes[i]:320                                        charmap[k] = 1321                        else:322                            for i in map(fixup, r):323                                charmap[i] = 1324                        if not hascased:325                            hascased = any(map(iscased, r))326                    else:327                        for i in r:328                            charmap[i] = 1329                elif op is NEGATE:330                    out.append((op, av))331                else:332                    tail.append((op, av))333            except IndexError:334                if len(charmap) == 256:335                    # character set contains non-UCS1 character codes336                    charmap += b'\0' * 0xff00337                    continue338                # Character set contains non-BMP character codes.339                # For range, all BMP characters in the range are already340                # proceeded.341                if fixup:342                    hascased = True343                    # For now, IN_UNI_IGNORE+LITERAL and344                    # IN_UNI_IGNORE+RANGE_UNI_IGNORE work for all non-BMP345                    # characters, because two characters (at least one of346                    # which is not in the BMP) match case-insensitively347                    # if and only if:348                    # 1) c1.lower() == c2.lower()349                    # 2) c1.lower() == c2 or c1.lower().upper() == c2350                    # Also, both c.lower() and c.lower().upper() are single351                    # characters for every non-BMP character.352                    if op is RANGE:353                        op = RANGE_UNI_IGNORE354                tail.append((op, av))355            break356 357    # compress character map358    runs = []359    q = 0360    while True:361        p = charmap.find(1, q)362        if p < 0:363            break364        if len(runs) >= 2:365            runs = None366            break367        q = charmap.find(0, p)368        if q < 0:369            runs.append((p, len(charmap)))370            break371        runs.append((p, q))372    if runs is not None:373        # use literal/range374        for p, q in runs:375            if q - p == 1:376                out.append((LITERAL, p))377            else:378                out.append((RANGE, (p, q - 1)))379        out += tail380        # if the case was changed or new representation is more compact381        if hascased or len(out) < len(charset):382            return out, hascased383        # else original character set is good enough384        return charset, hascased385 386    # use bitmap387    if len(charmap) == 256:388        data = _mk_bitmap(charmap)389        out.append((CHARSET, data))390        out += tail391        return out, hascased392 393    # To represent a big charset, first a bitmap of all characters in the394    # set is constructed. Then, this bitmap is sliced into chunks of 256395    # characters, duplicate chunks are eliminated, and each chunk is396    # given a number. In the compiled expression, the charset is397    # represented by a 32-bit word sequence, consisting of one word for398    # the number of different chunks, a sequence of 256 bytes (64 words)399    # of chunk numbers indexed by their original chunk position, and a400    # sequence of 256-bit chunks (8 words each).401 402    # Compression is normally good: in a typical charset, large ranges of403    # Unicode will be either completely excluded (e.g. if only cyrillic404    # letters are to be matched), or completely included (e.g. if large405    # subranges of Kanji match). These ranges will be represented by406    # chunks of all one-bits or all zero-bits.407 408    # Matching can be also done efficiently: the more significant byte of409    # the Unicode character is an index into the chunk number, and the410    # less significant byte is a bit index in the chunk (just like the411    # CHARSET matching).412 413    charmap = bytes(charmap) # should be hashable414    comps = {}415    mapping = bytearray(256)416    block = 0417    data = bytearray()418    for i in range(0, 65536, 256):419        chunk = charmap[i: i + 256]420        if chunk in comps:421            mapping[i // 256] = comps[chunk]422        else:423            mapping[i // 256] = comps[chunk] = block424            block += 1425            data += chunk426    data = _mk_bitmap(data)427    data[0:0] = [block] + _bytes_to_codes(mapping)428    out.append((BIGCHARSET, data))429    out += tail430    return out, hascased431 432_CODEBITS = _sre.CODESIZE * 8433MAXCODE = (1 << _CODEBITS) - 1434_BITS_TRANS = b'0' + b'1' * 255435def _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int):436    s = bits.translate(_BITS_TRANS)[::-1]437    return [_int(s[i - _CODEBITS: i], 2)438            for i in range(len(s), 0, -_CODEBITS)]439 440def _bytes_to_codes(b):441    # Convert block indices to word array442    a = memoryview(b).cast('I')443    assert a.itemsize == _sre.CODESIZE444    assert len(a) * a.itemsize == len(b)445    return a.tolist()446 447def _simple(p):448    # check if this subpattern is a "simple" operator449    if len(p) != 1:450        return False451    op, av = p[0]452    if op is SUBPATTERN:453        return av[0] is None and _simple(av[-1])454    return op in _UNIT_CODES455 456def _generate_overlap_table(prefix):457    """458    Generate an overlap table for the following prefix.459    An overlap table is a table of the same size as the prefix which460    informs about the potential self-overlap for each index in the prefix:461    - if overlap[i] == 0, prefix[i:] can't overlap prefix[0:...]462    - if overlap[i] == k with 0 < k <= i, prefix[i-k+1:i+1] overlaps with463      prefix[0:k]464    """465    table = [0] * len(prefix)466    for i in range(1, len(prefix)):467        idx = table[i - 1]468        while prefix[i] != prefix[idx]:469            if idx == 0:470                table[i] = 0471                break472            idx = table[idx - 1]473        else:474            table[i] = idx + 1475    return table476 477def _get_iscased(flags):478    if not flags & SRE_FLAG_IGNORECASE:479        return None480    elif flags & SRE_FLAG_UNICODE:481        return _sre.unicode_iscased482    else:483        return _sre.ascii_iscased484 485def _get_literal_prefix(pattern, flags):486    # look for literal prefix487    prefix = []488    prefixappend = prefix.append489    prefix_skip = None490    iscased = _get_iscased(flags)491    for op, av in pattern.data:492        if op is LITERAL:493            if iscased and iscased(av):494                break495            prefixappend(av)496        elif op is SUBPATTERN:497            group, add_flags, del_flags, p = av498            flags1 = _combine_flags(flags, add_flags, del_flags)499            if flags1 & SRE_FLAG_IGNORECASE and flags1 & SRE_FLAG_LOCALE:500                break501            prefix1, prefix_skip1, got_all = _get_literal_prefix(p, flags1)502            if prefix_skip is None:503                if group is not None:504                    prefix_skip = len(prefix)505                elif prefix_skip1 is not None:506                    prefix_skip = len(prefix) + prefix_skip1507            prefix.extend(prefix1)508            if not got_all:509                break510        else:511            break512    else:513        return prefix, prefix_skip, True514    return prefix, prefix_skip, False515 516def _get_charset_prefix(pattern, flags):517    while True:518        if not pattern.data:519            return None520        op, av = pattern.data[0]521        if op is not SUBPATTERN:522            break523        group, add_flags, del_flags, pattern = av524        flags = _combine_flags(flags, add_flags, del_flags)525        if flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE:526            return None527 528    iscased = _get_iscased(flags)529    if op is LITERAL:530        if iscased and iscased(av):531            return None532        return [(op, av)]533    elif op is BRANCH:534        charset = []535        charsetappend = charset.append536        for p in av[1]:537            if not p:538                return None539            op, av = p[0]540            if op is LITERAL and not (iscased and iscased(av)):541                charsetappend((op, av))542            else:543                return None544        return charset545    elif op is IN:546        charset = av547        if iscased:548            for op, av in charset:549                if op is LITERAL:550                    if iscased(av):551                        return None552                elif op is RANGE:553                    if av[1] > 0xffff:554                        return None555                    if any(map(iscased, range(av[0], av[1]+1))):556                        return None557        return charset558    return None559 560def _compile_info(code, pattern, flags):561    # internal: compile an info block.  in the current version,562    # this contains min/max pattern width, and an optional literal563    # prefix or a character map564    lo, hi = pattern.getwidth()565    if hi > MAXCODE:566        hi = MAXCODE567    if lo == 0:568        code.extend([INFO, 4, 0, lo, hi])569        return570    # look for a literal prefix571    prefix = []572    prefix_skip = 0573    charset = [] # not used574    if not (flags & SRE_FLAG_IGNORECASE and flags & SRE_FLAG_LOCALE):575        # look for literal prefix576        prefix, prefix_skip, got_all = _get_literal_prefix(pattern, flags)577        # if no prefix, look for charset prefix578        if not prefix:579            charset = _get_charset_prefix(pattern, flags)580##     if prefix:581##         print("*** PREFIX", prefix, prefix_skip)582##     if charset:583##         print("*** CHARSET", charset)584    # add an info block585    emit = code.append586    emit(INFO)587    skip = len(code); emit(0)588    # literal flag589    mask = 0590    if prefix:591        mask = SRE_INFO_PREFIX592        if prefix_skip is None and got_all:593            mask = mask | SRE_INFO_LITERAL594    elif charset:595        mask = mask | SRE_INFO_CHARSET596    emit(mask)597    # pattern length598    if lo < MAXCODE:599        emit(lo)600    else:601        emit(MAXCODE)602        prefix = prefix[:MAXCODE]603    emit(min(hi, MAXCODE))604    # add literal prefix605    if prefix:606        emit(len(prefix)) # length607        if prefix_skip is None:608            prefix_skip =  len(prefix)609        emit(prefix_skip) # skip610        code.extend(prefix)611        # generate overlap table612        code.extend(_generate_overlap_table(prefix))613    elif charset:614        charset, hascased = _optimize_charset(charset)615        assert not hascased616        _compile_charset(charset, flags, code)617    code[skip] = len(code) - skip618 619def isstring(obj):620    return isinstance(obj, (str, bytes))621 622def _code(p, flags):623 624    flags = p.state.flags | flags625    code = []626 627    # compile info block628    _compile_info(code, p, flags)629 630    # compile the pattern631    _compile(code, p.data, flags)632 633    code.append(SUCCESS)634 635    return code636 637def _hex_code(code):638    return '[%s]' % ', '.join('%#0*x' % (_sre.CODESIZE*2+2, x) for x in code)639 640def dis(code):641    import sys642 643    labels = set()644    level = 0645    offset_width = len(str(len(code) - 1))646 647    def dis_(start, end):648        def print_(*args, to=None):649            if to is not None:650                labels.add(to)651                args += ('(to %d)' % (to,),)652            print('%*d%s ' % (offset_width, start, ':' if start in labels else '.'),653                  end='  '*(level-1))654            print(*args)655 656        def print_2(*args):657            print(end=' '*(offset_width + 2*level))658            print(*args)659 660        nonlocal level661        level += 1662        i = start663        while i < end:664            start = i665            op = code[i]666            i += 1667            op = OPCODES[op]668            if op in (SUCCESS, FAILURE, ANY, ANY_ALL,669                      MAX_UNTIL, MIN_UNTIL, NEGATE):670                print_(op)671            elif op in (LITERAL, NOT_LITERAL,672                        LITERAL_IGNORE, NOT_LITERAL_IGNORE,673                        LITERAL_UNI_IGNORE, NOT_LITERAL_UNI_IGNORE,674                        LITERAL_LOC_IGNORE, NOT_LITERAL_LOC_IGNORE):675                arg = code[i]676                i += 1677                print_(op, '%#02x (%r)' % (arg, chr(arg)))678            elif op is AT:679                arg = code[i]680                i += 1681                arg = str(ATCODES[arg])682                assert arg[:3] == 'AT_'683                print_(op, arg[3:])684            elif op is CATEGORY:685                arg = code[i]686                i += 1687                arg = str(CHCODES[arg])688                assert arg[:9] == 'CATEGORY_'689                print_(op, arg[9:])690            elif op in (IN, IN_IGNORE, IN_UNI_IGNORE, IN_LOC_IGNORE):691                skip = code[i]692                print_(op, skip, to=i+skip)693                dis_(i+1, i+skip)694                i += skip695            elif op in (RANGE, RANGE_UNI_IGNORE):696                lo, hi = code[i: i+2]697                i += 2698                print_(op, '%#02x %#02x (%r-%r)' % (lo, hi, chr(lo), chr(hi)))699            elif op is CHARSET:700                print_(op, _hex_code(code[i: i + 256//_CODEBITS]))701                i += 256//_CODEBITS702            elif op is BIGCHARSET:703                arg = code[i]704                i += 1705                mapping = list(b''.join(x.to_bytes(_sre.CODESIZE, sys.byteorder)706                                        for x in code[i: i + 256//_sre.CODESIZE]))707                print_(op, arg, mapping)708                i += 256//_sre.CODESIZE709                level += 1710                for j in range(arg):711                    print_2(_hex_code(code[i: i + 256//_CODEBITS]))712                    i += 256//_CODEBITS713                level -= 1714            elif op in (MARK, GROUPREF, GROUPREF_IGNORE, GROUPREF_UNI_IGNORE,715                        GROUPREF_LOC_IGNORE):716                arg = code[i]717                i += 1718                print_(op, arg)719            elif op is JUMP:720                skip = code[i]721                print_(op, skip, to=i+skip)722                i += 1723            elif op is BRANCH:724                skip = code[i]725                print_(op, skip, to=i+skip)726                while skip:727                    dis_(i+1, i+skip)728                    i += skip729                    start = i730                    skip = code[i]731                    if skip:732                        print_('branch', skip, to=i+skip)733                    else:734                        print_(FAILURE)735                i += 1736            elif op in (REPEAT, REPEAT_ONE, MIN_REPEAT_ONE):737                skip, min, max = code[i: i+3]738                if max == MAXREPEAT:739                    max = 'MAXREPEAT'740                print_(op, skip, min, max, to=i+skip)741                dis_(i+3, i+skip)742                i += skip743            elif op is GROUPREF_EXISTS:744                arg, skip = code[i: i+2]745                print_(op, arg, skip, to=i+skip)746                i += 2747            elif op in (ASSERT, ASSERT_NOT):748                skip, arg = code[i: i+2]749                print_(op, skip, arg, to=i+skip)750                dis_(i+2, i+skip)751                i += skip752            elif op is INFO:753                skip, flags, min, max = code[i: i+4]754                if max == MAXREPEAT:755                    max = 'MAXREPEAT'756                print_(op, skip, bin(flags), min, max, to=i+skip)757                start = i+4758                if flags & SRE_INFO_PREFIX:759                    prefix_len, prefix_skip = code[i+4: i+6]760                    print_2('  prefix_skip', prefix_skip)761                    start = i + 6762                    prefix = code[start: start+prefix_len]763                    print_2('  prefix',764                            '[%s]' % ', '.join('%#02x' % x for x in prefix),765                            '(%r)' % ''.join(map(chr, prefix)))766                    start += prefix_len767                    print_2('  overlap', code[start: start+prefix_len])768                    start += prefix_len769                if flags & SRE_INFO_CHARSET:770                    level += 1771                    print_2('in')772                    dis_(start, i+skip)773                    level -= 1774                i += skip775            else:776                raise ValueError(op)777 778        level -= 1779 780    dis_(0, len(code))781 782 783def compile(p, flags=0):784    # internal: convert pattern list to internal format785 786    if isstring(p):787        pattern = p788        p = sre_parse.parse(p, flags)789    else:790        pattern = None791 792    code = _code(p, flags)793 794    if flags & SRE_FLAG_DEBUG:795        print()796        dis(code)797 798    # map in either direction799    groupindex = p.state.groupdict800    indexgroup = [None] * p.state.groups801    for k, i in groupindex.items():802        indexgroup[i] = k803 804    return _sre.compile(805        pattern, flags | p.state.flags, code,806        p.state.groups-1,807        groupindex, tuple(indexgroup)808        )809