File Explorer

/proc/self/root/proc/thread-self/root/proc/self/task/21/root/usr/lib64/python3.9
This explorer reads the filesystem of the server it runs on, so /workspace/user isn't present here. Browsing and the terminal still work against this server's own disk from /.
30 dirs
174 files
mimetypes.py21.1 KB · 613 lines
1"""Guess the MIME type of a file.2 3This module defines two useful functions:4 5guess_type(url, strict=True) -- guess the MIME type and encoding of a URL.6 7guess_extension(type, strict=True) -- guess the extension for a given MIME type.8 9It also contains the following, for tuning the behavior:10 11Data:12 13knownfiles -- list of files to parse14inited -- flag set when init() has been called15suffix_map -- dictionary mapping suffixes to suffixes16encodings_map -- dictionary mapping suffixes to encodings17types_map -- dictionary mapping suffixes to types18 19Functions:20 21init([files]) -- parse a list of files, default knownfiles (on Windows, the22  default values are taken from the registry)23read_mime_types(file) -- parse one file, return a dictionary or None24"""25 26import os27import sys28import posixpath29import urllib.parse30try:31    import winreg as _winreg32except ImportError:33    _winreg = None34 35__all__ = [36    "knownfiles", "inited", "MimeTypes",37    "guess_type", "guess_all_extensions", "guess_extension",38    "add_type", "init", "read_mime_types",39    "suffix_map", "encodings_map", "types_map", "common_types"40]41 42knownfiles = [43    "/etc/mime.types",44    "/etc/httpd/mime.types",                    # Mac OS X45    "/etc/httpd/conf/mime.types",               # Apache46    "/etc/apache/mime.types",                   # Apache 147    "/etc/apache2/mime.types",                  # Apache 248    "/usr/local/etc/httpd/conf/mime.types",49    "/usr/local/lib/netscape/mime.types",50    "/usr/local/etc/httpd/conf/mime.types",     # Apache 1.251    "/usr/local/etc/mime.types",                # Apache 1.352    ]53 54inited = False55_db = None56 57 58class MimeTypes:59    """MIME-types datastore.60 61    This datastore can handle information from mime.types-style files62    and supports basic determination of MIME type from a filename or63    URL, and can guess a reasonable extension given a MIME type.64    """65 66    def __init__(self, filenames=(), strict=True):67        if not inited:68            init()69        self.encodings_map = _encodings_map_default.copy()70        self.suffix_map = _suffix_map_default.copy()71        self.types_map = ({}, {}) # dict for (non-strict, strict)72        self.types_map_inv = ({}, {})73        for (ext, type) in _types_map_default.items():74            self.add_type(type, ext, True)75        for (ext, type) in _common_types_default.items():76            self.add_type(type, ext, False)77        for name in filenames:78            self.read(name, strict)79 80    def add_type(self, type, ext, strict=True):81        """Add a mapping between a type and an extension.82 83        When the extension is already known, the new84        type will replace the old one. When the type85        is already known the extension will be added86        to the list of known extensions.87 88        If strict is true, information will be added to89        list of standard types, else to the list of non-standard90        types.91        """92        self.types_map[strict][ext] = type93        exts = self.types_map_inv[strict].setdefault(type, [])94        if ext not in exts:95            exts.append(ext)96 97    def guess_type(self, url, strict=True):98        """Guess the type of a file which is either a URL or a path-like object.99 100        Return value is a tuple (type, encoding) where type is None if101        the type can't be guessed (no or unknown suffix) or a string102        of the form type/subtype, usable for a MIME Content-type103        header; and encoding is None for no encoding or the name of104        the program used to encode (e.g. compress or gzip).  The105        mappings are table driven.  Encoding suffixes are case106        sensitive; type suffixes are first tried case sensitive, then107        case insensitive.108 109        The suffixes .tgz, .taz and .tz (case sensitive!) are all110        mapped to '.tar.gz'.  (This is table-driven too, using the111        dictionary suffix_map.)112 113        Optional `strict' argument when False adds a bunch of commonly found,114        but non-standard types.115        """116        url = os.fspath(url)117        scheme, url = urllib.parse._splittype(url)118        if scheme == 'data':119            # syntax of data URLs:120            # dataurl   := "data:" [ mediatype ] [ ";base64" ] "," data121            # mediatype := [ type "/" subtype ] *( ";" parameter )122            # data      := *urlchar123            # parameter := attribute "=" value124            # type/subtype defaults to "text/plain"125            comma = url.find(',')126            if comma < 0:127                # bad data URL128                return None, None129            semi = url.find(';', 0, comma)130            if semi >= 0:131                type = url[:semi]132            else:133                type = url[:comma]134            if '=' in type or '/' not in type:135                type = 'text/plain'136            return type, None           # never compressed, so encoding is None137        base, ext = posixpath.splitext(url)138        while (ext_lower := ext.lower()) in self.suffix_map:139            base, ext = posixpath.splitext(base + self.suffix_map[ext_lower])140        # encodings_map is case sensitive141        if ext in self.encodings_map:142            encoding = self.encodings_map[ext]143            base, ext = posixpath.splitext(base)144        else:145            encoding = None146        ext = ext.lower()147        types_map = self.types_map[True]148        if ext in types_map:149            return types_map[ext], encoding150        elif strict:151            return None, encoding152        types_map = self.types_map[False]153        if ext in types_map:154            return types_map[ext], encoding155        else:156            return None, encoding157 158    def guess_all_extensions(self, type, strict=True):159        """Guess the extensions for a file based on its MIME type.160 161        Return value is a list of strings giving the possible filename162        extensions, including the leading dot ('.').  The extension is not163        guaranteed to have been associated with any particular data stream,164        but would be mapped to the MIME type `type' by guess_type().165 166        Optional `strict' argument when false adds a bunch of commonly found,167        but non-standard types.168        """169        type = type.lower()170        extensions = list(self.types_map_inv[True].get(type, []))171        if not strict:172            for ext in self.types_map_inv[False].get(type, []):173                if ext not in extensions:174                    extensions.append(ext)175        return extensions176 177    def guess_extension(self, type, strict=True):178        """Guess the extension for a file based on its MIME type.179 180        Return value is a string giving a filename extension,181        including the leading dot ('.').  The extension is not182        guaranteed to have been associated with any particular data183        stream, but would be mapped to the MIME type `type' by184        guess_type().  If no extension can be guessed for `type', None185        is returned.186 187        Optional `strict' argument when false adds a bunch of commonly found,188        but non-standard types.189        """190        extensions = self.guess_all_extensions(type, strict)191        if not extensions:192            return None193        return extensions[0]194 195    def read(self, filename, strict=True):196        """197        Read a single mime.types-format file, specified by pathname.198 199        If strict is true, information will be added to200        list of standard types, else to the list of non-standard201        types.202        """203        with open(filename, encoding='utf-8') as fp:204            self.readfp(fp, strict)205 206    def readfp(self, fp, strict=True):207        """208        Read a single mime.types-format file.209 210        If strict is true, information will be added to211        list of standard types, else to the list of non-standard212        types.213        """214        while 1:215            line = fp.readline()216            if not line:217                break218            words = line.split()219            for i in range(len(words)):220                if words[i][0] == '#':221                    del words[i:]222                    break223            if not words:224                continue225            type, suffixes = words[0], words[1:]226            for suff in suffixes:227                self.add_type(type, '.' + suff, strict)228 229    def read_windows_registry(self, strict=True):230        """231        Load the MIME types database from Windows registry.232 233        If strict is true, information will be added to234        list of standard types, else to the list of non-standard235        types.236        """237 238        # Windows only239        if not _winreg:240            return241 242        def enum_types(mimedb):243            i = 0244            while True:245                try:246                    ctype = _winreg.EnumKey(mimedb, i)247                except OSError:248                    break249                else:250                    if '\0' not in ctype:251                        yield ctype252                i += 1253 254        with _winreg.OpenKey(_winreg.HKEY_CLASSES_ROOT, '') as hkcr:255            for subkeyname in enum_types(hkcr):256                try:257                    with _winreg.OpenKey(hkcr, subkeyname) as subkey:258                        # Only check file extensions259                        if not subkeyname.startswith("."):260                            continue261                        # raises OSError if no 'Content Type' value262                        mimetype, datatype = _winreg.QueryValueEx(263                            subkey, 'Content Type')264                        if datatype != _winreg.REG_SZ:265                            continue266                        self.add_type(mimetype, subkeyname, strict)267                except OSError:268                    continue269 270def guess_type(url, strict=True):271    """Guess the type of a file based on its URL.272 273    Return value is a tuple (type, encoding) where type is None if the274    type can't be guessed (no or unknown suffix) or a string of the275    form type/subtype, usable for a MIME Content-type header; and276    encoding is None for no encoding or the name of the program used277    to encode (e.g. compress or gzip).  The mappings are table278    driven.  Encoding suffixes are case sensitive; type suffixes are279    first tried case sensitive, then case insensitive.280 281    The suffixes .tgz, .taz and .tz (case sensitive!) are all mapped282    to ".tar.gz".  (This is table-driven too, using the dictionary283    suffix_map).284 285    Optional `strict' argument when false adds a bunch of commonly found, but286    non-standard types.287    """288    if _db is None:289        init()290    return _db.guess_type(url, strict)291 292 293def guess_all_extensions(type, strict=True):294    """Guess the extensions for a file based on its MIME type.295 296    Return value is a list of strings giving the possible filename297    extensions, including the leading dot ('.').  The extension is not298    guaranteed to have been associated with any particular data299    stream, but would be mapped to the MIME type `type' by300    guess_type().  If no extension can be guessed for `type', None301    is returned.302 303    Optional `strict' argument when false adds a bunch of commonly found,304    but non-standard types.305    """306    if _db is None:307        init()308    return _db.guess_all_extensions(type, strict)309 310def guess_extension(type, strict=True):311    """Guess the extension for a file based on its MIME type.312 313    Return value is a string giving a filename extension, including the314    leading dot ('.').  The extension is not guaranteed to have been315    associated with any particular data stream, but would be mapped to the316    MIME type `type' by guess_type().  If no extension can be guessed for317    `type', None is returned.318 319    Optional `strict' argument when false adds a bunch of commonly found,320    but non-standard types.321    """322    if _db is None:323        init()324    return _db.guess_extension(type, strict)325 326def add_type(type, ext, strict=True):327    """Add a mapping between a type and an extension.328 329    When the extension is already known, the new330    type will replace the old one. When the type331    is already known the extension will be added332    to the list of known extensions.333 334    If strict is true, information will be added to335    list of standard types, else to the list of non-standard336    types.337    """338    if _db is None:339        init()340    return _db.add_type(type, ext, strict)341 342 343def init(files=None):344    global suffix_map, types_map, encodings_map, common_types345    global inited, _db346    inited = True    # so that MimeTypes.__init__() doesn't call us again347 348    if files is None or _db is None:349        db = MimeTypes()350        if _winreg:351            db.read_windows_registry()352 353        if files is None:354            files = knownfiles355        else:356            files = knownfiles + list(files)357    else:358        db = _db359 360    for file in files:361        if os.path.isfile(file):362            db.read(file)363    encodings_map = db.encodings_map364    suffix_map = db.suffix_map365    types_map = db.types_map[True]366    common_types = db.types_map[False]367    # Make the DB a global variable now that it is fully initialized368    _db = db369 370 371def read_mime_types(file):372    try:373        f = open(file, encoding='utf-8')374    except OSError:375        return None376    with f:377        db = MimeTypes()378        db.readfp(f, True)379        return db.types_map[True]380 381 382def _default_mime_types():383    global suffix_map, _suffix_map_default384    global encodings_map, _encodings_map_default385    global types_map, _types_map_default386    global common_types, _common_types_default387 388    suffix_map = _suffix_map_default = {389        '.svgz': '.svg.gz',390        '.tgz': '.tar.gz',391        '.taz': '.tar.gz',392        '.tz': '.tar.gz',393        '.tbz2': '.tar.bz2',394        '.txz': '.tar.xz',395        }396 397    encodings_map = _encodings_map_default = {398        '.gz': 'gzip',399        '.Z': 'compress',400        '.bz2': 'bzip2',401        '.xz': 'xz',402        '.br': 'br',403        }404 405    # Before adding new types, make sure they are either registered with IANA,406    # at http://www.iana.org/assignments/media-types407    # or extensions, i.e. using the x- prefix408 409    # If you add to these, please keep them sorted by mime type.410    # Make sure the entry with the preferred file extension for a particular mime type411    # appears before any others of the same mimetype.412    types_map = _types_map_default = {413        '.js'     : 'application/javascript',414        '.mjs'    : 'application/javascript',415        '.json'   : 'application/json',416        '.webmanifest': 'application/manifest+json',417        '.doc'    : 'application/msword',418        '.dot'    : 'application/msword',419        '.wiz'    : 'application/msword',420        '.bin'    : 'application/octet-stream',421        '.a'      : 'application/octet-stream',422        '.dll'    : 'application/octet-stream',423        '.exe'    : 'application/octet-stream',424        '.o'      : 'application/octet-stream',425        '.obj'    : 'application/octet-stream',426        '.so'     : 'application/octet-stream',427        '.oda'    : 'application/oda',428        '.pdf'    : 'application/pdf',429        '.p7c'    : 'application/pkcs7-mime',430        '.ps'     : 'application/postscript',431        '.ai'     : 'application/postscript',432        '.eps'    : 'application/postscript',433        '.m3u'    : 'application/vnd.apple.mpegurl',434        '.m3u8'   : 'application/vnd.apple.mpegurl',435        '.xls'    : 'application/vnd.ms-excel',436        '.xlb'    : 'application/vnd.ms-excel',437        '.ppt'    : 'application/vnd.ms-powerpoint',438        '.pot'    : 'application/vnd.ms-powerpoint',439        '.ppa'    : 'application/vnd.ms-powerpoint',440        '.pps'    : 'application/vnd.ms-powerpoint',441        '.pwz'    : 'application/vnd.ms-powerpoint',442        '.wasm'   : 'application/wasm',443        '.bcpio'  : 'application/x-bcpio',444        '.cpio'   : 'application/x-cpio',445        '.csh'    : 'application/x-csh',446        '.dvi'    : 'application/x-dvi',447        '.gtar'   : 'application/x-gtar',448        '.hdf'    : 'application/x-hdf',449        '.latex'  : 'application/x-latex',450        '.mif'    : 'application/x-mif',451        '.cdf'    : 'application/x-netcdf',452        '.nc'     : 'application/x-netcdf',453        '.p12'    : 'application/x-pkcs12',454        '.pfx'    : 'application/x-pkcs12',455        '.ram'    : 'application/x-pn-realaudio',456        '.pyc'    : 'application/x-python-code',457        '.pyo'    : 'application/x-python-code',458        '.sh'     : 'application/x-sh',459        '.shar'   : 'application/x-shar',460        '.swf'    : 'application/x-shockwave-flash',461        '.sv4cpio': 'application/x-sv4cpio',462        '.sv4crc' : 'application/x-sv4crc',463        '.tar'    : 'application/x-tar',464        '.tcl'    : 'application/x-tcl',465        '.tex'    : 'application/x-tex',466        '.texi'   : 'application/x-texinfo',467        '.texinfo': 'application/x-texinfo',468        '.roff'   : 'application/x-troff',469        '.t'      : 'application/x-troff',470        '.tr'     : 'application/x-troff',471        '.man'    : 'application/x-troff-man',472        '.me'     : 'application/x-troff-me',473        '.ms'     : 'application/x-troff-ms',474        '.ustar'  : 'application/x-ustar',475        '.src'    : 'application/x-wais-source',476        '.xsl'    : 'application/xml',477        '.rdf'    : 'application/xml',478        '.wsdl'   : 'application/xml',479        '.xpdl'   : 'application/xml',480        '.zip'    : 'application/zip',481        '.au'     : 'audio/basic',482        '.snd'    : 'audio/basic',483        '.mp3'    : 'audio/mpeg',484        '.mp2'    : 'audio/mpeg',485        '.aif'    : 'audio/x-aiff',486        '.aifc'   : 'audio/x-aiff',487        '.aiff'   : 'audio/x-aiff',488        '.ra'     : 'audio/x-pn-realaudio',489        '.wav'    : 'audio/x-wav',490        '.bmp'    : 'image/bmp',491        '.gif'    : 'image/gif',492        '.ief'    : 'image/ief',493        '.jpg'    : 'image/jpeg',494        '.jpe'    : 'image/jpeg',495        '.jpeg'   : 'image/jpeg',496        '.png'    : 'image/png',497        '.svg'    : 'image/svg+xml',498        '.tiff'   : 'image/tiff',499        '.tif'    : 'image/tiff',500        '.ico'    : 'image/vnd.microsoft.icon',501        '.ras'    : 'image/x-cmu-raster',502        '.bmp'    : 'image/x-ms-bmp',503        '.pnm'    : 'image/x-portable-anymap',504        '.pbm'    : 'image/x-portable-bitmap',505        '.pgm'    : 'image/x-portable-graymap',506        '.ppm'    : 'image/x-portable-pixmap',507        '.rgb'    : 'image/x-rgb',508        '.xbm'    : 'image/x-xbitmap',509        '.xpm'    : 'image/x-xpixmap',510        '.xwd'    : 'image/x-xwindowdump',511        '.eml'    : 'message/rfc822',512        '.mht'    : 'message/rfc822',513        '.mhtml'  : 'message/rfc822',514        '.nws'    : 'message/rfc822',515        '.css'    : 'text/css',516        '.csv'    : 'text/csv',517        '.html'   : 'text/html',518        '.htm'    : 'text/html',519        '.txt'    : 'text/plain',520        '.bat'    : 'text/plain',521        '.c'      : 'text/plain',522        '.h'      : 'text/plain',523        '.ksh'    : 'text/plain',524        '.pl'     : 'text/plain',525        '.rtx'    : 'text/richtext',526        '.tsv'    : 'text/tab-separated-values',527        '.py'     : 'text/x-python',528        '.etx'    : 'text/x-setext',529        '.sgm'    : 'text/x-sgml',530        '.sgml'   : 'text/x-sgml',531        '.vcf'    : 'text/x-vcard',532        '.xml'    : 'text/xml',533        '.mp4'    : 'video/mp4',534        '.mpeg'   : 'video/mpeg',535        '.m1v'    : 'video/mpeg',536        '.mpa'    : 'video/mpeg',537        '.mpe'    : 'video/mpeg',538        '.mpg'    : 'video/mpeg',539        '.mov'    : 'video/quicktime',540        '.qt'     : 'video/quicktime',541        '.webm'   : 'video/webm',542        '.avi'    : 'video/x-msvideo',543        '.movie'  : 'video/x-sgi-movie',544        }545 546    # These are non-standard types, commonly found in the wild.  They will547    # only match if strict=0 flag is given to the API methods.548 549    # Please sort these too550    common_types = _common_types_default = {551        '.rtf' : 'application/rtf',552        '.midi': 'audio/midi',553        '.mid' : 'audio/midi',554        '.jpg' : 'image/jpg',555        '.pict': 'image/pict',556        '.pct' : 'image/pict',557        '.pic' : 'image/pict',558        '.xul' : 'text/xul',559        }560 561 562_default_mime_types()563 564 565def _main():566    import getopt567 568    USAGE = """\569Usage: mimetypes.py [options] type570 571Options:572    --help / -h       -- print this message and exit573    --lenient / -l    -- additionally search of some common, but non-standard574                         types.575    --extension / -e  -- guess extension instead of type576 577More than one type argument may be given.578"""579 580    def usage(code, msg=''):581        print(USAGE)582        if msg: print(msg)583        sys.exit(code)584 585    try:586        opts, args = getopt.getopt(sys.argv[1:], 'hle',587                                   ['help', 'lenient', 'extension'])588    except getopt.error as msg:589        usage(1, msg)590 591    strict = 1592    extension = 0593    for opt, arg in opts:594        if opt in ('-h', '--help'):595            usage(0)596        elif opt in ('-l', '--lenient'):597            strict = 0598        elif opt in ('-e', '--extension'):599            extension = 1600    for gtype in args:601        if extension:602            guess = guess_extension(gtype, strict)603            if not guess: print("I don't know anything about type", gtype)604            else: print(guess)605        else:606            guess, encoding = guess_type(gtype, strict)607            if not guess: print("I don't know anything about type", gtype)608            else: print('type:', guess, 'encoding:', encoding)609 610 611if __name__ == '__main__':612    _main()613