File Explorer

/proc/self/root/proc/self/root/proc/thread-self/root/lib64/python3.9/html
This explorer reads the filesystem of the server it runs on, so /workspace/user isn't present here. Browsing and the terminal still work against this server's own disk from /.
1 dir
3 files
__init__.py4.6 KB · 133 lines
1"""2General functions for HTML manipulation.3"""4 5import re as _re6from html.entities import html5 as _html57 8 9__all__ = ['escape', 'unescape']10 11 12def escape(s, quote=True):13    """14    Replace special characters "&", "<" and ">" to HTML-safe sequences.15    If the optional flag quote is true (the default), the quotation mark16    characters, both double quote (") and single quote (') characters are also17    translated.18    """19    s = s.replace("&", "&amp;") # Must be done first!20    s = s.replace("<", "&lt;")21    s = s.replace(">", "&gt;")22    if quote:23        s = s.replace('"', "&quot;")24        s = s.replace('\'', "&#x27;")25    return s26 27 28# see http://www.w3.org/TR/html5/syntax.html#tokenizing-character-references29 30_invalid_charrefs = {31    0x00: '\ufffd',  # REPLACEMENT CHARACTER32    0x0d: '\r',      # CARRIAGE RETURN33    0x80: '\u20ac',  # EURO SIGN34    0x81: '\x81',    # <control>35    0x82: '\u201a',  # SINGLE LOW-9 QUOTATION MARK36    0x83: '\u0192',  # LATIN SMALL LETTER F WITH HOOK37    0x84: '\u201e',  # DOUBLE LOW-9 QUOTATION MARK38    0x85: '\u2026',  # HORIZONTAL ELLIPSIS39    0x86: '\u2020',  # DAGGER40    0x87: '\u2021',  # DOUBLE DAGGER41    0x88: '\u02c6',  # MODIFIER LETTER CIRCUMFLEX ACCENT42    0x89: '\u2030',  # PER MILLE SIGN43    0x8a: '\u0160',  # LATIN CAPITAL LETTER S WITH CARON44    0x8b: '\u2039',  # SINGLE LEFT-POINTING ANGLE QUOTATION MARK45    0x8c: '\u0152',  # LATIN CAPITAL LIGATURE OE46    0x8d: '\x8d',    # <control>47    0x8e: '\u017d',  # LATIN CAPITAL LETTER Z WITH CARON48    0x8f: '\x8f',    # <control>49    0x90: '\x90',    # <control>50    0x91: '\u2018',  # LEFT SINGLE QUOTATION MARK51    0x92: '\u2019',  # RIGHT SINGLE QUOTATION MARK52    0x93: '\u201c',  # LEFT DOUBLE QUOTATION MARK53    0x94: '\u201d',  # RIGHT DOUBLE QUOTATION MARK54    0x95: '\u2022',  # BULLET55    0x96: '\u2013',  # EN DASH56    0x97: '\u2014',  # EM DASH57    0x98: '\u02dc',  # SMALL TILDE58    0x99: '\u2122',  # TRADE MARK SIGN59    0x9a: '\u0161',  # LATIN SMALL LETTER S WITH CARON60    0x9b: '\u203a',  # SINGLE RIGHT-POINTING ANGLE QUOTATION MARK61    0x9c: '\u0153',  # LATIN SMALL LIGATURE OE62    0x9d: '\x9d',    # <control>63    0x9e: '\u017e',  # LATIN SMALL LETTER Z WITH CARON64    0x9f: '\u0178',  # LATIN CAPITAL LETTER Y WITH DIAERESIS65}66 67_invalid_codepoints = {68    # 0x0001 to 0x000869    0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8,70    # 0x000E to 0x001F71    0xe, 0xf, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,72    0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,73    # 0x007F to 0x009F74    0x7f, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a,75    0x8b, 0x8c, 0x8d, 0x8e, 0x8f, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,76    0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,77    # 0xFDD0 to 0xFDEF78    0xfdd0, 0xfdd1, 0xfdd2, 0xfdd3, 0xfdd4, 0xfdd5, 0xfdd6, 0xfdd7, 0xfdd8,79    0xfdd9, 0xfdda, 0xfddb, 0xfddc, 0xfddd, 0xfdde, 0xfddf, 0xfde0, 0xfde1,80    0xfde2, 0xfde3, 0xfde4, 0xfde5, 0xfde6, 0xfde7, 0xfde8, 0xfde9, 0xfdea,81    0xfdeb, 0xfdec, 0xfded, 0xfdee, 0xfdef,82    # others83    0xb, 0xfffe, 0xffff, 0x1fffe, 0x1ffff, 0x2fffe, 0x2ffff, 0x3fffe, 0x3ffff,84    0x4fffe, 0x4ffff, 0x5fffe, 0x5ffff, 0x6fffe, 0x6ffff, 0x7fffe, 0x7ffff,85    0x8fffe, 0x8ffff, 0x9fffe, 0x9ffff, 0xafffe, 0xaffff, 0xbfffe, 0xbffff,86    0xcfffe, 0xcffff, 0xdfffe, 0xdffff, 0xefffe, 0xeffff, 0xffffe, 0xfffff,87    0x10fffe, 0x10ffff88}89 90 91def _replace_charref(s):92    s = s.group(1)93    if s[0] == '#':94        # numeric charref95        if s[1] in 'xX':96            num = int(s[2:].rstrip(';'), 16)97        else:98            num = int(s[1:].rstrip(';'))99        if num in _invalid_charrefs:100            return _invalid_charrefs[num]101        if 0xD800 <= num <= 0xDFFF or num > 0x10FFFF:102            return '\uFFFD'103        if num in _invalid_codepoints:104            return ''105        return chr(num)106    else:107        # named charref108        if s in _html5:109            return _html5[s]110        # find the longest matching name (as defined by the standard)111        for x in range(len(s)-1, 1, -1):112            if s[:x] in _html5:113                return _html5[s[:x]] + s[x:]114        else:115            return '&' + s116 117 118_charref = _re.compile(r'&(#[0-9]+;?'119                       r'|#[xX][0-9a-fA-F]+;?'120                       r'|[^\t\n\f <&#;]{1,32};?)')121 122def unescape(s):123    """124    Convert all named and numeric character references (e.g. &gt;, &#62;,125    &x3e;) in the string s to the corresponding unicode characters.126    This function uses the rules defined by the HTML 5 standard127    for both valid and invalid character references, and the list of128    HTML 5 named character references defined in html.entities.html5.129    """130    if '&' not in s:131        return s132    return _charref.sub(_replace_charref, s)133