File Explorer

/proc/thread-self/root/proc/self/root/proc/thread-self/root/lib64/python3.9/encodings
This explorer reads the filesystem of the server it runs on, so /workspace/user isn't present here. Browsing and the terminal still work against this server's own disk from /.
1 dir
122 files
idna.py8.9 KB · 308 lines
1# This module implements the RFCs 3490 (IDNA) and 3491 (Nameprep)2 3import stringprep, re, codecs4from unicodedata import ucd_3_2_0 as unicodedata5 6# IDNA section 3.17dots = re.compile("[\u002E\u3002\uFF0E\uFF61]")8 9# IDNA section 510ace_prefix = b"xn--"11sace_prefix = "xn--"12 13# This assumes query strings, so AllowUnassigned is true14def nameprep(label):15    # Map16    newlabel = []17    for c in label:18        if stringprep.in_table_b1(c):19            # Map to nothing20            continue21        newlabel.append(stringprep.map_table_b2(c))22    label = "".join(newlabel)23 24    # Normalize25    label = unicodedata.normalize("NFKC", label)26 27    # Prohibit28    for c in label:29        if stringprep.in_table_c12(c) or \30           stringprep.in_table_c22(c) or \31           stringprep.in_table_c3(c) or \32           stringprep.in_table_c4(c) or \33           stringprep.in_table_c5(c) or \34           stringprep.in_table_c6(c) or \35           stringprep.in_table_c7(c) or \36           stringprep.in_table_c8(c) or \37           stringprep.in_table_c9(c):38            raise UnicodeError("Invalid character %r" % c)39 40    # Check bidi41    RandAL = [stringprep.in_table_d1(x) for x in label]42    if any(RandAL):43        # There is a RandAL char in the string. Must perform further44        # tests:45        # 1) The characters in section 5.8 MUST be prohibited.46        # This is table C.8, which was already checked47        # 2) If a string contains any RandALCat character, the string48        # MUST NOT contain any LCat character.49        if any(stringprep.in_table_d2(x) for x in label):50            raise UnicodeError("Violation of BIDI requirement 2")51        # 3) If a string contains any RandALCat character, a52        # RandALCat character MUST be the first character of the53        # string, and a RandALCat character MUST be the last54        # character of the string.55        if not RandAL[0] or not RandAL[-1]:56            raise UnicodeError("Violation of BIDI requirement 3")57 58    return label59 60def ToASCII(label):61    try:62        # Step 1: try ASCII63        label = label.encode("ascii")64    except UnicodeError:65        pass66    else:67        # Skip to step 3: UseSTD3ASCIIRules is false, so68        # Skip to step 8.69        if 0 < len(label) < 64:70            return label71        raise UnicodeError("label empty or too long")72 73    # Step 2: nameprep74    label = nameprep(label)75 76    # Step 3: UseSTD3ASCIIRules is false77    # Step 4: try ASCII78    try:79        label = label.encode("ascii")80    except UnicodeError:81        pass82    else:83        # Skip to step 8.84        if 0 < len(label) < 64:85            return label86        raise UnicodeError("label empty or too long")87 88    # Step 5: Check ACE prefix89    if label.startswith(sace_prefix):90        raise UnicodeError("Label starts with ACE prefix")91 92    # Step 6: Encode with PUNYCODE93    label = label.encode("punycode")94 95    # Step 7: Prepend ACE prefix96    label = ace_prefix + label97 98    # Step 8: Check size99    if 0 < len(label) < 64:100        return label101    raise UnicodeError("label empty or too long")102 103def ToUnicode(label):104    # Step 1: Check for ASCII105    if isinstance(label, bytes):106        pure_ascii = True107    else:108        try:109            label = label.encode("ascii")110            pure_ascii = True111        except UnicodeError:112            pure_ascii = False113    if not pure_ascii:114        # Step 2: Perform nameprep115        label = nameprep(label)116        # It doesn't say this, but apparently, it should be ASCII now117        try:118            label = label.encode("ascii")119        except UnicodeError:120            raise UnicodeError("Invalid character in IDN label")121    # Step 3: Check for ACE prefix122    if not label.startswith(ace_prefix):123        return str(label, "ascii")124 125    # Step 4: Remove ACE prefix126    label1 = label[len(ace_prefix):]127 128    # Step 5: Decode using PUNYCODE129    result = label1.decode("punycode")130 131    # Step 6: Apply ToASCII132    label2 = ToASCII(result)133 134    # Step 7: Compare the result of step 6 with the one of step 3135    # label2 will already be in lower case.136    if str(label, "ascii").lower() != str(label2, "ascii"):137        raise UnicodeError("IDNA does not round-trip", label, label2)138 139    # Step 8: return the result of step 5140    return result141 142### Codec APIs143 144class Codec(codecs.Codec):145    def encode(self, input, errors='strict'):146 147        if errors != 'strict':148            # IDNA is quite clear that implementations must be strict149            raise UnicodeError("unsupported error handling "+errors)150 151        if not input:152            return b'', 0153 154        try:155            result = input.encode('ascii')156        except UnicodeEncodeError:157            pass158        else:159            # ASCII name: fast path160            labels = result.split(b'.')161            for label in labels[:-1]:162                if not (0 < len(label) < 64):163                    raise UnicodeError("label empty or too long")164            if len(labels[-1]) >= 64:165                raise UnicodeError("label too long")166            return result, len(input)167 168        result = bytearray()169        labels = dots.split(input)170        if labels and not labels[-1]:171            trailing_dot = b'.'172            del labels[-1]173        else:174            trailing_dot = b''175        for label in labels:176            if result:177                # Join with U+002E178                result.extend(b'.')179            result.extend(ToASCII(label))180        return bytes(result+trailing_dot), len(input)181 182    def decode(self, input, errors='strict'):183 184        if errors != 'strict':185            raise UnicodeError("Unsupported error handling "+errors)186 187        if not input:188            return "", 0189 190        # IDNA allows decoding to operate on Unicode strings, too.191        if not isinstance(input, bytes):192            # XXX obviously wrong, see #3232193            input = bytes(input)194 195        if ace_prefix not in input:196            # Fast path197            try:198                return input.decode('ascii'), len(input)199            except UnicodeDecodeError:200                pass201 202        labels = input.split(b".")203 204        if labels and len(labels[-1]) == 0:205            trailing_dot = '.'206            del labels[-1]207        else:208            trailing_dot = ''209 210        result = []211        for label in labels:212            result.append(ToUnicode(label))213 214        return ".".join(result)+trailing_dot, len(input)215 216class IncrementalEncoder(codecs.BufferedIncrementalEncoder):217    def _buffer_encode(self, input, errors, final):218        if errors != 'strict':219            # IDNA is quite clear that implementations must be strict220            raise UnicodeError("unsupported error handling "+errors)221 222        if not input:223            return (b'', 0)224 225        labels = dots.split(input)226        trailing_dot = b''227        if labels:228            if not labels[-1]:229                trailing_dot = b'.'230                del labels[-1]231            elif not final:232                # Keep potentially unfinished label until the next call233                del labels[-1]234                if labels:235                    trailing_dot = b'.'236 237        result = bytearray()238        size = 0239        for label in labels:240            if size:241                # Join with U+002E242                result.extend(b'.')243                size += 1244            result.extend(ToASCII(label))245            size += len(label)246 247        result += trailing_dot248        size += len(trailing_dot)249        return (bytes(result), size)250 251class IncrementalDecoder(codecs.BufferedIncrementalDecoder):252    def _buffer_decode(self, input, errors, final):253        if errors != 'strict':254            raise UnicodeError("Unsupported error handling "+errors)255 256        if not input:257            return ("", 0)258 259        # IDNA allows decoding to operate on Unicode strings, too.260        if isinstance(input, str):261            labels = dots.split(input)262        else:263            # Must be ASCII string264            input = str(input, "ascii")265            labels = input.split(".")266 267        trailing_dot = ''268        if labels:269            if not labels[-1]:270                trailing_dot = '.'271                del labels[-1]272            elif not final:273                # Keep potentially unfinished label until the next call274                del labels[-1]275                if labels:276                    trailing_dot = '.'277 278        result = []279        size = 0280        for label in labels:281            result.append(ToUnicode(label))282            if size:283                size += 1284            size += len(label)285 286        result = ".".join(result) + trailing_dot287        size += len(trailing_dot)288        return (result, size)289 290class StreamWriter(Codec,codecs.StreamWriter):291    pass292 293class StreamReader(Codec,codecs.StreamReader):294    pass295 296### encodings module API297 298def getregentry():299    return codecs.CodecInfo(300        name='idna',301        encode=Codec().encode,302        decode=Codec().decode,303        incrementalencoder=IncrementalEncoder,304        incrementaldecoder=IncrementalDecoder,305        streamwriter=StreamWriter,306        streamreader=StreamReader,307    )308