File Explorer

/proc/self/root/proc/thread-self/root/proc/self/root/proc/self/task/21/root/lib64/python3.9
This explorer reads the filesystem of the server it runs on, so /workspace/user isn't present here. Browsing and the terminal still work against this server's own disk from /.
30 dirs
174 files
gzip.py21.3 KB · 609 lines
1"""Functions that read and write gzipped files.2 3The user of the file doesn't have to worry about the compression,4but random access is not allowed."""5 6# based on Andrew Kuchling's minigzip.py distributed with the zlib module7 8import struct, sys, time, os9import zlib10import builtins11import io12import _compression13 14__all__ = ["BadGzipFile", "GzipFile", "open", "compress", "decompress"]15 16FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 1617 18READ, WRITE = 1, 219 20_COMPRESS_LEVEL_FAST = 121_COMPRESS_LEVEL_TRADEOFF = 622_COMPRESS_LEVEL_BEST = 923 24 25def open(filename, mode="rb", compresslevel=_COMPRESS_LEVEL_BEST,26         encoding=None, errors=None, newline=None):27    """Open a gzip-compressed file in binary or text mode.28 29    The filename argument can be an actual filename (a str or bytes object), or30    an existing file object to read from or write to.31 32    The mode argument can be "r", "rb", "w", "wb", "x", "xb", "a" or "ab" for33    binary mode, or "rt", "wt", "xt" or "at" for text mode. The default mode is34    "rb", and the default compresslevel is 9.35 36    For binary mode, this function is equivalent to the GzipFile constructor:37    GzipFile(filename, mode, compresslevel). In this case, the encoding, errors38    and newline arguments must not be provided.39 40    For text mode, a GzipFile object is created, and wrapped in an41    io.TextIOWrapper instance with the specified encoding, error handling42    behavior, and line ending(s).43 44    """45    if "t" in mode:46        if "b" in mode:47            raise ValueError("Invalid mode: %r" % (mode,))48    else:49        if encoding is not None:50            raise ValueError("Argument 'encoding' not supported in binary mode")51        if errors is not None:52            raise ValueError("Argument 'errors' not supported in binary mode")53        if newline is not None:54            raise ValueError("Argument 'newline' not supported in binary mode")55 56    gz_mode = mode.replace("t", "")57    if isinstance(filename, (str, bytes, os.PathLike)):58        binary_file = GzipFile(filename, gz_mode, compresslevel)59    elif hasattr(filename, "read") or hasattr(filename, "write"):60        binary_file = GzipFile(None, gz_mode, compresslevel, filename)61    else:62        raise TypeError("filename must be a str or bytes object, or a file")63 64    if "t" in mode:65        return io.TextIOWrapper(binary_file, encoding, errors, newline)66    else:67        return binary_file68 69def write32u(output, value):70    # The L format writes the bit pattern correctly whether signed71    # or unsigned.72    output.write(struct.pack("<L", value))73 74class _PaddedFile:75    """Minimal read-only file object that prepends a string to the contents76    of an actual file. Shouldn't be used outside of gzip.py, as it lacks77    essential functionality."""78 79    def __init__(self, f, prepend=b''):80        self._buffer = prepend81        self._length = len(prepend)82        self.file = f83        self._read = 084 85    def read(self, size):86        if self._read is None:87            return self.file.read(size)88        if self._read + size <= self._length:89            read = self._read90            self._read += size91            return self._buffer[read:self._read]92        else:93            read = self._read94            self._read = None95            return self._buffer[read:] + \96                   self.file.read(size-self._length+read)97 98    def prepend(self, prepend=b''):99        if self._read is None:100            self._buffer = prepend101        else:  # Assume data was read since the last prepend() call102            self._read -= len(prepend)103            return104        self._length = len(self._buffer)105        self._read = 0106 107    def seek(self, off):108        self._read = None109        self._buffer = None110        return self.file.seek(off)111 112    def seekable(self):113        return True  # Allows fast-forwarding even in unseekable streams114 115 116class BadGzipFile(OSError):117    """Exception raised in some cases for invalid gzip files."""118 119 120class GzipFile(_compression.BaseStream):121    """The GzipFile class simulates most of the methods of a file object with122    the exception of the truncate() method.123 124    This class only supports opening files in binary mode. If you need to open a125    compressed file in text mode, use the gzip.open() function.126 127    """128 129    # Overridden with internal file object to be closed, if only a filename130    # is passed in131    myfileobj = None132 133    def __init__(self, filename=None, mode=None,134                 compresslevel=_COMPRESS_LEVEL_BEST, fileobj=None, mtime=None):135        """Constructor for the GzipFile class.136 137        At least one of fileobj and filename must be given a138        non-trivial value.139 140        The new class instance is based on fileobj, which can be a regular141        file, an io.BytesIO object, or any other object which simulates a file.142        It defaults to None, in which case filename is opened to provide143        a file object.144 145        When fileobj is not None, the filename argument is only used to be146        included in the gzip file header, which may include the original147        filename of the uncompressed file.  It defaults to the filename of148        fileobj, if discernible; otherwise, it defaults to the empty string,149        and in this case the original filename is not included in the header.150 151        The mode argument can be any of 'r', 'rb', 'a', 'ab', 'w', 'wb', 'x', or152        'xb' depending on whether the file will be read or written.  The default153        is the mode of fileobj if discernible; otherwise, the default is 'rb'.154        A mode of 'r' is equivalent to one of 'rb', and similarly for 'w' and155        'wb', 'a' and 'ab', and 'x' and 'xb'.156 157        The compresslevel argument is an integer from 0 to 9 controlling the158        level of compression; 1 is fastest and produces the least compression,159        and 9 is slowest and produces the most compression. 0 is no compression160        at all. The default is 9.161 162        The mtime argument is an optional numeric timestamp to be written163        to the last modification time field in the stream when compressing.164        If omitted or None, the current time is used.165 166        """167 168        if mode and ('t' in mode or 'U' in mode):169            raise ValueError("Invalid mode: {!r}".format(mode))170        if mode and 'b' not in mode:171            mode += 'b'172        if fileobj is None:173            fileobj = self.myfileobj = builtins.open(filename, mode or 'rb')174        if filename is None:175            filename = getattr(fileobj, 'name', '')176            if not isinstance(filename, (str, bytes)):177                filename = ''178        else:179            filename = os.fspath(filename)180        origmode = mode181        if mode is None:182            mode = getattr(fileobj, 'mode', 'rb')183 184        if mode.startswith('r'):185            self.mode = READ186            raw = _GzipReader(fileobj)187            self._buffer = io.BufferedReader(raw)188            self.name = filename189 190        elif mode.startswith(('w', 'a', 'x')):191            if origmode is None:192                import warnings193                warnings.warn(194                    "GzipFile was opened for writing, but this will "195                    "change in future Python releases.  "196                    "Specify the mode argument for opening it for writing.",197                    FutureWarning, 2)198            self.mode = WRITE199            self._init_write(filename)200            self.compress = zlib.compressobj(compresslevel,201                                             zlib.DEFLATED,202                                             -zlib.MAX_WBITS,203                                             zlib.DEF_MEM_LEVEL,204                                             0)205            self._write_mtime = mtime206        else:207            raise ValueError("Invalid mode: {!r}".format(mode))208 209        self.fileobj = fileobj210 211        if self.mode == WRITE:212            self._write_gzip_header(compresslevel)213 214    @property215    def filename(self):216        import warnings217        warnings.warn("use the name attribute", DeprecationWarning, 2)218        if self.mode == WRITE and self.name[-3:] != ".gz":219            return self.name + ".gz"220        return self.name221 222    @property223    def mtime(self):224        """Last modification time read from stream, or None"""225        return self._buffer.raw._last_mtime226 227    def __repr__(self):228        s = repr(self.fileobj)229        return '<gzip ' + s[1:-1] + ' ' + hex(id(self)) + '>'230 231    def _init_write(self, filename):232        self.name = filename233        self.crc = zlib.crc32(b"")234        self.size = 0235        self.writebuf = []236        self.bufsize = 0237        self.offset = 0  # Current file offset for seek(), tell(), etc238 239    def _write_gzip_header(self, compresslevel):240        self.fileobj.write(b'\037\213')             # magic header241        self.fileobj.write(b'\010')                 # compression method242        try:243            # RFC 1952 requires the FNAME field to be Latin-1. Do not244            # include filenames that cannot be represented that way.245            fname = os.path.basename(self.name)246            if not isinstance(fname, bytes):247                fname = fname.encode('latin-1')248            if fname.endswith(b'.gz'):249                fname = fname[:-3]250        except UnicodeEncodeError:251            fname = b''252        flags = 0253        if fname:254            flags = FNAME255        self.fileobj.write(chr(flags).encode('latin-1'))256        mtime = self._write_mtime257        if mtime is None:258            mtime = time.time()259        write32u(self.fileobj, int(mtime))260        if compresslevel == _COMPRESS_LEVEL_BEST:261            xfl = b'\002'262        elif compresslevel == _COMPRESS_LEVEL_FAST:263            xfl = b'\004'264        else:265            xfl = b'\000'266        self.fileobj.write(xfl)267        self.fileobj.write(b'\377')268        if fname:269            self.fileobj.write(fname + b'\000')270 271    def write(self,data):272        self._check_not_closed()273        if self.mode != WRITE:274            import errno275            raise OSError(errno.EBADF, "write() on read-only GzipFile object")276 277        if self.fileobj is None:278            raise ValueError("write() on closed GzipFile object")279 280        if isinstance(data, bytes):281            length = len(data)282        else:283            # accept any data that supports the buffer protocol284            data = memoryview(data)285            length = data.nbytes286 287        if length > 0:288            self.fileobj.write(self.compress.compress(data))289            self.size += length290            self.crc = zlib.crc32(data, self.crc)291            self.offset += length292 293        return length294 295    def read(self, size=-1):296        self._check_not_closed()297        if self.mode != READ:298            import errno299            raise OSError(errno.EBADF, "read() on write-only GzipFile object")300        return self._buffer.read(size)301 302    def read1(self, size=-1):303        """Implements BufferedIOBase.read1()304 305        Reads up to a buffer's worth of data if size is negative."""306        self._check_not_closed()307        if self.mode != READ:308            import errno309            raise OSError(errno.EBADF, "read1() on write-only GzipFile object")310 311        if size < 0:312            size = io.DEFAULT_BUFFER_SIZE313        return self._buffer.read1(size)314 315    def peek(self, n):316        self._check_not_closed()317        if self.mode != READ:318            import errno319            raise OSError(errno.EBADF, "peek() on write-only GzipFile object")320        return self._buffer.peek(n)321 322    @property323    def closed(self):324        return self.fileobj is None325 326    def close(self):327        fileobj = self.fileobj328        if fileobj is None:329            return330        self.fileobj = None331        try:332            if self.mode == WRITE:333                fileobj.write(self.compress.flush())334                write32u(fileobj, self.crc)335                # self.size may exceed 2 GiB, or even 4 GiB336                write32u(fileobj, self.size & 0xffffffff)337            elif self.mode == READ:338                self._buffer.close()339        finally:340            myfileobj = self.myfileobj341            if myfileobj:342                self.myfileobj = None343                myfileobj.close()344 345    def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):346        self._check_not_closed()347        if self.mode == WRITE:348            # Ensure the compressor's buffer is flushed349            self.fileobj.write(self.compress.flush(zlib_mode))350            self.fileobj.flush()351 352    def fileno(self):353        """Invoke the underlying file object's fileno() method.354 355        This will raise AttributeError if the underlying file object356        doesn't support fileno().357        """358        return self.fileobj.fileno()359 360    def rewind(self):361        '''Return the uncompressed stream file position indicator to the362        beginning of the file'''363        if self.mode != READ:364            raise OSError("Can't rewind in write mode")365        self._buffer.seek(0)366 367    def readable(self):368        return self.mode == READ369 370    def writable(self):371        return self.mode == WRITE372 373    def seekable(self):374        return True375 376    def seek(self, offset, whence=io.SEEK_SET):377        if self.mode == WRITE:378            if whence != io.SEEK_SET:379                if whence == io.SEEK_CUR:380                    offset = self.offset + offset381                else:382                    raise ValueError('Seek from end not supported')383            if offset < self.offset:384                raise OSError('Negative seek in write mode')385            count = offset - self.offset386            chunk = b'\0' * 1024387            for i in range(count // 1024):388                self.write(chunk)389            self.write(b'\0' * (count % 1024))390        elif self.mode == READ:391            self._check_not_closed()392            return self._buffer.seek(offset, whence)393 394        return self.offset395 396    def readline(self, size=-1):397        self._check_not_closed()398        return self._buffer.readline(size)399 400 401class _GzipReader(_compression.DecompressReader):402    def __init__(self, fp):403        super().__init__(_PaddedFile(fp), zlib.decompressobj,404                         wbits=-zlib.MAX_WBITS)405        # Set flag indicating start of a new member406        self._new_member = True407        self._last_mtime = None408 409    def _init_read(self):410        self._crc = zlib.crc32(b"")411        self._stream_size = 0  # Decompressed size of unconcatenated stream412 413    def _read_exact(self, n):414        '''Read exactly *n* bytes from `self._fp`415 416        This method is required because self._fp may be unbuffered,417        i.e. return short reads.418        '''419 420        data = self._fp.read(n)421        while len(data) < n:422            b = self._fp.read(n - len(data))423            if not b:424                raise EOFError("Compressed file ended before the "425                               "end-of-stream marker was reached")426            data += b427        return data428 429    def _read_gzip_header(self):430        magic = self._fp.read(2)431        if magic == b'':432            return False433 434        if magic != b'\037\213':435            raise BadGzipFile('Not a gzipped file (%r)' % magic)436 437        (method, flag,438         self._last_mtime) = struct.unpack("<BBIxx", self._read_exact(8))439        if method != 8:440            raise BadGzipFile('Unknown compression method')441 442        if flag & FEXTRA:443            # Read & discard the extra field, if present444            extra_len, = struct.unpack("<H", self._read_exact(2))445            self._read_exact(extra_len)446        if flag & FNAME:447            # Read and discard a null-terminated string containing the filename448            while True:449                s = self._fp.read(1)450                if not s or s==b'\000':451                    break452        if flag & FCOMMENT:453            # Read and discard a null-terminated string containing a comment454            while True:455                s = self._fp.read(1)456                if not s or s==b'\000':457                    break458        if flag & FHCRC:459            self._read_exact(2)     # Read & discard the 16-bit header CRC460        return True461 462    def read(self, size=-1):463        if size < 0:464            return self.readall()465        # size=0 is special because decompress(max_length=0) is not supported466        if not size:467            return b""468 469        # For certain input data, a single470        # call to decompress() may not return471        # any data. In this case, retry until we get some data or reach EOF.472        while True:473            if self._decompressor.eof:474                # Ending case: we've come to the end of a member in the file,475                # so finish up this member, and read a new gzip header.476                # Check the CRC and file size, and set the flag so we read477                # a new member478                self._read_eof()479                self._new_member = True480                self._decompressor = self._decomp_factory(481                    **self._decomp_args)482 483            if self._new_member:484                # If the _new_member flag is set, we have to485                # jump to the next member, if there is one.486                self._init_read()487                if not self._read_gzip_header():488                    self._size = self._pos489                    return b""490                self._new_member = False491 492            # Read a chunk of data from the file493            buf = self._fp.read(io.DEFAULT_BUFFER_SIZE)494 495            uncompress = self._decompressor.decompress(buf, size)496            if self._decompressor.unconsumed_tail != b"":497                self._fp.prepend(self._decompressor.unconsumed_tail)498            elif self._decompressor.unused_data != b"":499                # Prepend the already read bytes to the fileobj so they can500                # be seen by _read_eof() and _read_gzip_header()501                self._fp.prepend(self._decompressor.unused_data)502 503            if uncompress != b"":504                break505            if buf == b"":506                raise EOFError("Compressed file ended before the "507                               "end-of-stream marker was reached")508 509        self._add_read_data( uncompress )510        self._pos += len(uncompress)511        return uncompress512 513    def _add_read_data(self, data):514        self._crc = zlib.crc32(data, self._crc)515        self._stream_size = self._stream_size + len(data)516 517    def _read_eof(self):518        # We've read to the end of the file519        # We check that the computed CRC and size of the520        # uncompressed data matches the stored values.  Note that the size521        # stored is the true file size mod 2**32.522        crc32, isize = struct.unpack("<II", self._read_exact(8))523        if crc32 != self._crc:524            raise BadGzipFile("CRC check failed %s != %s" % (hex(crc32),525                                                             hex(self._crc)))526        elif isize != (self._stream_size & 0xffffffff):527            raise BadGzipFile("Incorrect length of data produced")528 529        # Gzip files can be padded with zeroes and still have archives.530        # Consume all zero bytes and set the file position to the first531        # non-zero byte. See http://www.gzip.org/#faq8532        c = b"\x00"533        while c == b"\x00":534            c = self._fp.read(1)535        if c:536            self._fp.prepend(c)537 538    def _rewind(self):539        super()._rewind()540        self._new_member = True541 542def compress(data, compresslevel=_COMPRESS_LEVEL_BEST, *, mtime=None):543    """Compress data in one shot and return the compressed string.544    Optional argument is the compression level, in range of 0-9.545    """546    buf = io.BytesIO()547    with GzipFile(fileobj=buf, mode='wb', compresslevel=compresslevel, mtime=mtime) as f:548        f.write(data)549    return buf.getvalue()550 551def decompress(data):552    """Decompress a gzip compressed string in one shot.553    Return the decompressed string.554    """555    with GzipFile(fileobj=io.BytesIO(data)) as f:556        return f.read()557 558 559def main():560    from argparse import ArgumentParser561    parser = ArgumentParser(description=562        "A simple command line interface for the gzip module: act like gzip, "563        "but do not delete the input file.")564    group = parser.add_mutually_exclusive_group()565    group.add_argument('--fast', action='store_true', help='compress faster')566    group.add_argument('--best', action='store_true', help='compress better')567    group.add_argument("-d", "--decompress", action="store_true",568                        help="act like gunzip instead of gzip")569 570    parser.add_argument("args", nargs="*", default=["-"], metavar='file')571    args = parser.parse_args()572 573    compresslevel = _COMPRESS_LEVEL_TRADEOFF574    if args.fast:575        compresslevel = _COMPRESS_LEVEL_FAST576    elif args.best:577        compresslevel = _COMPRESS_LEVEL_BEST578 579    for arg in args.args:580        if args.decompress:581            if arg == "-":582                f = GzipFile(filename="", mode="rb", fileobj=sys.stdin.buffer)583                g = sys.stdout.buffer584            else:585                if arg[-3:] != ".gz":586                    sys.exit(f"filename doesn't end in .gz: {arg!r}")587                f = open(arg, "rb")588                g = builtins.open(arg[:-3], "wb")589        else:590            if arg == "-":591                f = sys.stdin.buffer592                g = GzipFile(filename="", mode="wb", fileobj=sys.stdout.buffer,593                             compresslevel=compresslevel)594            else:595                f = builtins.open(arg, "rb")596                g = open(arg + ".gz", "wb")597        while True:598            chunk = f.read(1024)599            if not chunk:600                break601            g.write(chunk)602        if g is not sys.stdout.buffer:603            g.close()604        if f is not sys.stdin.buffer:605            f.close()606 607if __name__ == '__main__':608    main()609