File Explorer

/proc/self/root/proc/self/root/proc/thread-self/root/proc/12/root/lib64/python3.9
This explorer reads the filesystem of the server it runs on, so /workspace/user isn't present here. Browsing and the terminal still work against this server's own disk from /.
30 dirs
174 files
tarfile.py108 KB · 3006 lines
1#! /usr/bin/python3.92#-------------------------------------------------------------------3# tarfile.py4#-------------------------------------------------------------------5# Copyright (C) 2002 Lars Gustaebel <lars@gustaebel.de>6# All rights reserved.7#8# Permission  is  hereby granted,  free  of charge,  to  any person9# obtaining a  copy of  this software  and associated documentation10# files  (the  "Software"),  to   deal  in  the  Software   without11# restriction,  including  without limitation  the  rights to  use,12# copy, modify, merge, publish, distribute, sublicense, and/or sell13# copies  of  the  Software,  and to  permit  persons  to  whom the14# Software  is  furnished  to  do  so,  subject  to  the  following15# conditions:16#17# The above copyright  notice and this  permission notice shall  be18# included in all copies or substantial portions of the Software.19#20# THE SOFTWARE IS PROVIDED "AS  IS", WITHOUT WARRANTY OF ANY  KIND,21# EXPRESS OR IMPLIED, INCLUDING  BUT NOT LIMITED TO  THE WARRANTIES22# OF  MERCHANTABILITY,  FITNESS   FOR  A  PARTICULAR   PURPOSE  AND23# NONINFRINGEMENT.  IN  NO  EVENT SHALL  THE  AUTHORS  OR COPYRIGHT24# HOLDERS  BE LIABLE  FOR ANY  CLAIM, DAMAGES  OR OTHER  LIABILITY,25# WHETHER  IN AN  ACTION OF  CONTRACT, TORT  OR OTHERWISE,  ARISING26# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR27# OTHER DEALINGS IN THE SOFTWARE.28#29"""Read from and write to tar format archives.30"""31 32version     = "0.9.0"33__author__  = "Lars Gust\u00e4bel (lars@gustaebel.de)"34__credits__ = "Gustavo Niemeyer, Niels Gust\u00e4bel, Richard Townsend."35 36#---------37# Imports38#---------39from builtins import open as bltn_open40import sys41import os42import io43import shutil44import stat45import time46import struct47import copy48import re49import warnings50 51try:52    import pwd53except ImportError:54    pwd = None55try:56    import grp57except ImportError:58    grp = None59 60# os.symlink on Windows prior to 6.0 raises NotImplementedError61symlink_exception = (AttributeError, NotImplementedError)62try:63    # OSError (winerror=1314) will be raised if the caller does not hold the64    # SeCreateSymbolicLinkPrivilege privilege65    symlink_exception += (OSError,)66except NameError:67    pass68 69# from tarfile import *70__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",71           "CompressionError", "StreamError", "ExtractError", "HeaderError",72           "ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",73           "DEFAULT_FORMAT", "open"]74 75 76#---------------------------------------------------------77# tar constants78#---------------------------------------------------------79NUL = b"\0"                     # the null character80BLOCKSIZE = 512                 # length of processing blocks81RECORDSIZE = BLOCKSIZE * 20     # length of records82GNU_MAGIC = b"ustar  \0"        # magic gnu tar string83POSIX_MAGIC = b"ustar\x0000"    # magic posix tar string84 85LENGTH_NAME = 100               # maximum length of a filename86LENGTH_LINK = 100               # maximum length of a linkname87LENGTH_PREFIX = 155             # maximum length of the prefix field88 89REGTYPE = b"0"                  # regular file90AREGTYPE = b"\0"                # regular file91LNKTYPE = b"1"                  # link (inside tarfile)92SYMTYPE = b"2"                  # symbolic link93CHRTYPE = b"3"                  # character special device94BLKTYPE = b"4"                  # block special device95DIRTYPE = b"5"                  # directory96FIFOTYPE = b"6"                 # fifo special device97CONTTYPE = b"7"                 # contiguous file98 99GNUTYPE_LONGNAME = b"L"         # GNU tar longname100GNUTYPE_LONGLINK = b"K"         # GNU tar longlink101GNUTYPE_SPARSE = b"S"           # GNU tar sparse file102 103XHDTYPE = b"x"                  # POSIX.1-2001 extended header104XGLTYPE = b"g"                  # POSIX.1-2001 global header105SOLARIS_XHDTYPE = b"X"          # Solaris extended header106 107USTAR_FORMAT = 0                # POSIX.1-1988 (ustar) format108GNU_FORMAT = 1                  # GNU tar format109PAX_FORMAT = 2                  # POSIX.1-2001 (pax) format110DEFAULT_FORMAT = PAX_FORMAT111 112#---------------------------------------------------------113# tarfile constants114#---------------------------------------------------------115# File types that tarfile supports:116SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,117                   SYMTYPE, DIRTYPE, FIFOTYPE,118                   CONTTYPE, CHRTYPE, BLKTYPE,119                   GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,120                   GNUTYPE_SPARSE)121 122# File types that will be treated as a regular file.123REGULAR_TYPES = (REGTYPE, AREGTYPE,124                 CONTTYPE, GNUTYPE_SPARSE)125 126# File types that are part of the GNU tar format.127GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,128             GNUTYPE_SPARSE)129 130# Fields from a pax header that override a TarInfo attribute.131PAX_FIELDS = ("path", "linkpath", "size", "mtime",132              "uid", "gid", "uname", "gname")133 134# Fields from a pax header that are affected by hdrcharset.135PAX_NAME_FIELDS = {"path", "linkpath", "uname", "gname"}136 137# Fields in a pax header that are numbers, all other fields138# are treated as strings.139PAX_NUMBER_FIELDS = {140    "atime": float,141    "ctime": float,142    "mtime": float,143    "uid": int,144    "gid": int,145    "size": int146}147 148#---------------------------------------------------------149# initialization150#---------------------------------------------------------151if os.name == "nt":152    ENCODING = "utf-8"153else:154    ENCODING = sys.getfilesystemencoding()155 156#---------------------------------------------------------157# Some useful functions158#---------------------------------------------------------159 160def stn(s, length, encoding, errors):161    """Convert a string to a null-terminated bytes object.162    """163    if s is None:164        raise ValueError("metadata cannot contain None")165    s = s.encode(encoding, errors)166    return s[:length] + (length - len(s)) * NUL167 168def nts(s, encoding, errors):169    """Convert a null-terminated bytes object to a string.170    """171    p = s.find(b"\0")172    if p != -1:173        s = s[:p]174    return s.decode(encoding, errors)175 176def nti(s):177    """Convert a number field to a python number.178    """179    # There are two possible encodings for a number field, see180    # itn() below.181    if s[0] in (0o200, 0o377):182        n = 0183        for i in range(len(s) - 1):184            n <<= 8185            n += s[i + 1]186        if s[0] == 0o377:187            n = -(256 ** (len(s) - 1) - n)188    else:189        try:190            s = nts(s, "ascii", "strict")191            n = int(s.strip() or "0", 8)192        except ValueError:193            raise InvalidHeaderError("invalid header")194    return n195 196def itn(n, digits=8, format=DEFAULT_FORMAT):197    """Convert a python number to a number field.198    """199    # POSIX 1003.1-1988 requires numbers to be encoded as a string of200    # octal digits followed by a null-byte, this allows values up to201    # (8**(digits-1))-1. GNU tar allows storing numbers greater than202    # that if necessary. A leading 0o200 or 0o377 byte indicate this203    # particular encoding, the following digits-1 bytes are a big-endian204    # base-256 representation. This allows values up to (256**(digits-1))-1.205    # A 0o200 byte indicates a positive number, a 0o377 byte a negative206    # number.207    n = int(n)208    if 0 <= n < 8 ** (digits - 1):209        s = bytes("%0*o" % (digits - 1, n), "ascii") + NUL210    elif format == GNU_FORMAT and -256 ** (digits - 1) <= n < 256 ** (digits - 1):211        if n >= 0:212            s = bytearray([0o200])213        else:214            s = bytearray([0o377])215            n = 256 ** digits + n216 217        for i in range(digits - 1):218            s.insert(1, n & 0o377)219            n >>= 8220    else:221        raise ValueError("overflow in number field")222 223    return s224 225def calc_chksums(buf):226    """Calculate the checksum for a member's header by summing up all227       characters except for the chksum field which is treated as if228       it was filled with spaces. According to the GNU tar sources,229       some tars (Sun and NeXT) calculate chksum with signed char,230       which will be different if there are chars in the buffer with231       the high bit set. So we calculate two checksums, unsigned and232       signed.233    """234    unsigned_chksum = 256 + sum(struct.unpack_from("148B8x356B", buf))235    signed_chksum = 256 + sum(struct.unpack_from("148b8x356b", buf))236    return unsigned_chksum, signed_chksum237 238def copyfileobj(src, dst, length=None, exception=OSError, bufsize=None):239    """Copy length bytes from fileobj src to fileobj dst.240       If length is None, copy the entire content.241    """242    bufsize = bufsize or 16 * 1024243    if length == 0:244        return245    if length is None:246        shutil.copyfileobj(src, dst, bufsize)247        return248 249    blocks, remainder = divmod(length, bufsize)250    for b in range(blocks):251        buf = src.read(bufsize)252        if len(buf) < bufsize:253            raise exception("unexpected end of data")254        dst.write(buf)255 256    if remainder != 0:257        buf = src.read(remainder)258        if len(buf) < remainder:259            raise exception("unexpected end of data")260        dst.write(buf)261    return262 263def _safe_print(s):264    encoding = getattr(sys.stdout, 'encoding', None)265    if encoding is not None:266        s = s.encode(encoding, 'backslashreplace').decode(encoding)267    print(s, end=' ')268 269 270class TarError(Exception):271    """Base exception."""272    pass273class ExtractError(TarError):274    """General exception for extract errors."""275    pass276class ReadError(TarError):277    """Exception for unreadable tar archives."""278    pass279class CompressionError(TarError):280    """Exception for unavailable compression methods."""281    pass282class StreamError(TarError):283    """Exception for unsupported operations on stream-like TarFiles."""284    pass285class HeaderError(TarError):286    """Base exception for header errors."""287    pass288class EmptyHeaderError(HeaderError):289    """Exception for empty headers."""290    pass291class TruncatedHeaderError(HeaderError):292    """Exception for truncated headers."""293    pass294class EOFHeaderError(HeaderError):295    """Exception for end of file headers."""296    pass297class InvalidHeaderError(HeaderError):298    """Exception for invalid headers."""299    pass300class SubsequentHeaderError(HeaderError):301    """Exception for missing and invalid extended headers."""302    pass303 304#---------------------------305# internal stream interface306#---------------------------307class _LowLevelFile:308    """Low-level file object. Supports reading and writing.309       It is used instead of a regular file object for streaming310       access.311    """312 313    def __init__(self, name, mode):314        mode = {315            "r": os.O_RDONLY,316            "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,317        }[mode]318        if hasattr(os, "O_BINARY"):319            mode |= os.O_BINARY320        self.fd = os.open(name, mode, 0o666)321 322    def close(self):323        os.close(self.fd)324 325    def read(self, size):326        return os.read(self.fd, size)327 328    def write(self, s):329        os.write(self.fd, s)330 331class _Stream:332    """Class that serves as an adapter between TarFile and333       a stream-like object.  The stream-like object only334       needs to have a read() or write() method and is accessed335       blockwise.  Use of gzip or bzip2 compression is possible.336       A stream-like object could be for example: sys.stdin,337       sys.stdout, a socket, a tape device etc.338 339       _Stream is intended to be used only internally.340    """341 342    def __init__(self, name, mode, comptype, fileobj, bufsize):343        """Construct a _Stream object.344        """345        self._extfileobj = True346        if fileobj is None:347            fileobj = _LowLevelFile(name, mode)348            self._extfileobj = False349 350        if comptype == '*':351            # Enable transparent compression detection for the352            # stream interface353            fileobj = _StreamProxy(fileobj)354            comptype = fileobj.getcomptype()355 356        self.name     = name or ""357        self.mode     = mode358        self.comptype = comptype359        self.fileobj  = fileobj360        self.bufsize  = bufsize361        self.buf      = b""362        self.pos      = 0363        self.closed   = False364 365        try:366            if comptype == "gz":367                try:368                    import zlib369                except ImportError:370                    raise CompressionError("zlib module is not available")371                self.zlib = zlib372                self.crc = zlib.crc32(b"")373                if mode == "r":374                    self._init_read_gz()375                    self.exception = zlib.error376                else:377                    self._init_write_gz()378 379            elif comptype == "bz2":380                try:381                    import bz2382                except ImportError:383                    raise CompressionError("bz2 module is not available")384                if mode == "r":385                    self.dbuf = b""386                    self.cmp = bz2.BZ2Decompressor()387                    self.exception = OSError388                else:389                    self.cmp = bz2.BZ2Compressor()390 391            elif comptype == "xz":392                try:393                    import lzma394                except ImportError:395                    raise CompressionError("lzma module is not available")396                if mode == "r":397                    self.dbuf = b""398                    self.cmp = lzma.LZMADecompressor()399                    self.exception = lzma.LZMAError400                else:401                    self.cmp = lzma.LZMACompressor()402 403            elif comptype != "tar":404                raise CompressionError("unknown compression type %r" % comptype)405 406        except:407            if not self._extfileobj:408                self.fileobj.close()409            self.closed = True410            raise411 412    def __del__(self):413        if hasattr(self, "closed") and not self.closed:414            self.close()415 416    def _init_write_gz(self):417        """Initialize for writing with gzip compression.418        """419        self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,420                                            -self.zlib.MAX_WBITS,421                                            self.zlib.DEF_MEM_LEVEL,422                                            0)423        timestamp = struct.pack("<L", int(time.time()))424        self.__write(b"\037\213\010\010" + timestamp + b"\002\377")425        if self.name.endswith(".gz"):426            self.name = self.name[:-3]427        # Honor "directory components removed" from RFC1952428        self.name = os.path.basename(self.name)429        # RFC1952 says we must use ISO-8859-1 for the FNAME field.430        self.__write(self.name.encode("iso-8859-1", "replace") + NUL)431 432    def write(self, s):433        """Write string s to the stream.434        """435        if self.comptype == "gz":436            self.crc = self.zlib.crc32(s, self.crc)437        self.pos += len(s)438        if self.comptype != "tar":439            s = self.cmp.compress(s)440        self.__write(s)441 442    def __write(self, s):443        """Write string s to the stream if a whole new block444           is ready to be written.445        """446        self.buf += s447        while len(self.buf) > self.bufsize:448            self.fileobj.write(self.buf[:self.bufsize])449            self.buf = self.buf[self.bufsize:]450 451    def close(self):452        """Close the _Stream object. No operation should be453           done on it afterwards.454        """455        if self.closed:456            return457 458        self.closed = True459        try:460            if self.mode == "w" and self.comptype != "tar":461                self.buf += self.cmp.flush()462 463            if self.mode == "w" and self.buf:464                self.fileobj.write(self.buf)465                self.buf = b""466                if self.comptype == "gz":467                    self.fileobj.write(struct.pack("<L", self.crc))468                    self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFF))469        finally:470            if not self._extfileobj:471                self.fileobj.close()472 473    def _init_read_gz(self):474        """Initialize for reading a gzip compressed fileobj.475        """476        self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)477        self.dbuf = b""478 479        # taken from gzip.GzipFile with some alterations480        if self.__read(2) != b"\037\213":481            raise ReadError("not a gzip file")482        if self.__read(1) != b"\010":483            raise CompressionError("unsupported compression method")484 485        flag = ord(self.__read(1))486        self.__read(6)487 488        if flag & 4:489            xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))490            self.read(xlen)491        if flag & 8:492            while True:493                s = self.__read(1)494                if not s or s == NUL:495                    break496        if flag & 16:497            while True:498                s = self.__read(1)499                if not s or s == NUL:500                    break501        if flag & 2:502            self.__read(2)503 504    def tell(self):505        """Return the stream's file pointer position.506        """507        return self.pos508 509    def seek(self, pos=0):510        """Set the stream's file pointer to pos. Negative seeking511           is forbidden.512        """513        if pos - self.pos >= 0:514            blocks, remainder = divmod(pos - self.pos, self.bufsize)515            for i in range(blocks):516                self.read(self.bufsize)517            self.read(remainder)518        else:519            raise StreamError("seeking backwards is not allowed")520        return self.pos521 522    def read(self, size):523        """Return the next size number of bytes from the stream."""524        assert size is not None525        buf = self._read(size)526        self.pos += len(buf)527        return buf528 529    def _read(self, size):530        """Return size bytes from the stream.531        """532        if self.comptype == "tar":533            return self.__read(size)534 535        c = len(self.dbuf)536        t = [self.dbuf]537        while c < size:538            # Skip underlying buffer to avoid unaligned double buffering.539            if self.buf:540                buf = self.buf541                self.buf = b""542            else:543                buf = self.fileobj.read(self.bufsize)544                if not buf:545                    break546            try:547                buf = self.cmp.decompress(buf)548            except self.exception:549                raise ReadError("invalid compressed data")550            t.append(buf)551            c += len(buf)552        t = b"".join(t)553        self.dbuf = t[size:]554        return t[:size]555 556    def __read(self, size):557        """Return size bytes from stream. If internal buffer is empty,558           read another block from the stream.559        """560        c = len(self.buf)561        t = [self.buf]562        while c < size:563            buf = self.fileobj.read(self.bufsize)564            if not buf:565                break566            t.append(buf)567            c += len(buf)568        t = b"".join(t)569        self.buf = t[size:]570        return t[:size]571# class _Stream572 573class _StreamProxy(object):574    """Small proxy class that enables transparent compression575       detection for the Stream interface (mode 'r|*').576    """577 578    def __init__(self, fileobj):579        self.fileobj = fileobj580        self.buf = self.fileobj.read(BLOCKSIZE)581 582    def read(self, size):583        self.read = self.fileobj.read584        return self.buf585 586    def getcomptype(self):587        if self.buf.startswith(b"\x1f\x8b\x08"):588            return "gz"589        elif self.buf[0:3] == b"BZh" and self.buf[4:10] == b"1AY&SY":590            return "bz2"591        elif self.buf.startswith((b"\x5d\x00\x00\x80", b"\xfd7zXZ")):592            return "xz"593        else:594            return "tar"595 596    def close(self):597        self.fileobj.close()598# class StreamProxy599 600#------------------------601# Extraction file object602#------------------------603class _FileInFile(object):604    """A thin wrapper around an existing file object that605       provides a part of its data as an individual file606       object.607    """608 609    def __init__(self, fileobj, offset, size, blockinfo=None):610        self.fileobj = fileobj611        self.offset = offset612        self.size = size613        self.position = 0614        self.name = getattr(fileobj, "name", None)615        self.closed = False616 617        if blockinfo is None:618            blockinfo = [(0, size)]619 620        # Construct a map with data and zero blocks.621        self.map_index = 0622        self.map = []623        lastpos = 0624        realpos = self.offset625        for offset, size in blockinfo:626            if offset > lastpos:627                self.map.append((False, lastpos, offset, None))628            self.map.append((True, offset, offset + size, realpos))629            realpos += size630            lastpos = offset + size631        if lastpos < self.size:632            self.map.append((False, lastpos, self.size, None))633 634    def flush(self):635        pass636 637    def readable(self):638        return True639 640    def writable(self):641        return False642 643    def seekable(self):644        return self.fileobj.seekable()645 646    def tell(self):647        """Return the current file position.648        """649        return self.position650 651    def seek(self, position, whence=io.SEEK_SET):652        """Seek to a position in the file.653        """654        if whence == io.SEEK_SET:655            self.position = min(max(position, 0), self.size)656        elif whence == io.SEEK_CUR:657            if position < 0:658                self.position = max(self.position + position, 0)659            else:660                self.position = min(self.position + position, self.size)661        elif whence == io.SEEK_END:662            self.position = max(min(self.size + position, self.size), 0)663        else:664            raise ValueError("Invalid argument")665        return self.position666 667    def read(self, size=None):668        """Read data from the file.669        """670        if size is None:671            size = self.size - self.position672        else:673            size = min(size, self.size - self.position)674 675        buf = b""676        while size > 0:677            while True:678                data, start, stop, offset = self.map[self.map_index]679                if start <= self.position < stop:680                    break681                else:682                    self.map_index += 1683                    if self.map_index == len(self.map):684                        self.map_index = 0685            length = min(size, stop - self.position)686            if data:687                self.fileobj.seek(offset + (self.position - start))688                b = self.fileobj.read(length)689                if len(b) != length:690                    raise ReadError("unexpected end of data")691                buf += b692            else:693                buf += NUL * length694            size -= length695            self.position += length696        return buf697 698    def readinto(self, b):699        buf = self.read(len(b))700        b[:len(buf)] = buf701        return len(buf)702 703    def close(self):704        self.closed = True705#class _FileInFile706 707class ExFileObject(io.BufferedReader):708 709    def __init__(self, tarfile, tarinfo):710        fileobj = _FileInFile(tarfile.fileobj, tarinfo.offset_data,711                tarinfo.size, tarinfo.sparse)712        super().__init__(fileobj)713#class ExFileObject714 715 716#-----------------------------717# extraction filters (PEP 706)718#-----------------------------719 720class FilterError(TarError):721    pass722 723class AbsolutePathError(FilterError):724    def __init__(self, tarinfo):725        self.tarinfo = tarinfo726        super().__init__(f'member {tarinfo.name!r} has an absolute path')727 728class OutsideDestinationError(FilterError):729    def __init__(self, tarinfo, path):730        self.tarinfo = tarinfo731        self._path = path732        super().__init__(f'{tarinfo.name!r} would be extracted to {path!r}, '733                         + 'which is outside the destination')734 735class SpecialFileError(FilterError):736    def __init__(self, tarinfo):737        self.tarinfo = tarinfo738        super().__init__(f'{tarinfo.name!r} is a special file')739 740class AbsoluteLinkError(FilterError):741    def __init__(self, tarinfo):742        self.tarinfo = tarinfo743        super().__init__(f'{tarinfo.name!r} is a link to an absolute path')744 745class LinkOutsideDestinationError(FilterError):746    def __init__(self, tarinfo, path):747        self.tarinfo = tarinfo748        self._path = path749        super().__init__(f'{tarinfo.name!r} would link to {path!r}, '750                         + 'which is outside the destination')751 752class LinkFallbackError(FilterError):753    def __init__(self, tarinfo, path):754        self.tarinfo = tarinfo755        self._path = path756        super().__init__(f'link {tarinfo.name!r} would be extracted as a '757                         + f'copy of {path!r}, which was rejected')758 759# Errors caused by filters -- both "fatal" and "non-fatal" -- that760# we consider to be issues with the argument, rather than a bug in the761# filter function762_FILTER_ERRORS = (FilterError, OSError, ExtractError)763 764def _get_filtered_attrs(member, dest_path, for_data=True):765    new_attrs = {}766    name = member.name767    dest_path = os.path.realpath(dest_path, strict=os.path.ALLOW_MISSING)768    # Strip leading / (tar's directory separator) from filenames.769    # Include os.sep (target OS directory separator) as well.770    if name.startswith(('/', os.sep)):771        name = new_attrs['name'] = member.path.lstrip('/' + os.sep)772    if os.path.isabs(name):773        # Path is absolute even after stripping.774        # For example, 'C:/foo' on Windows.775        raise AbsolutePathError(member)776    # Ensure we stay in the destination777    target_path = os.path.realpath(os.path.join(dest_path, name),778                                   strict=os.path.ALLOW_MISSING)779    if os.path.commonpath([target_path, dest_path]) != dest_path:780        raise OutsideDestinationError(member, target_path)781    # Limit permissions (no high bits, and go-w)782    mode = member.mode783    if mode is not None:784        # Strip high bits & group/other write bits785        mode = mode & 0o755786        if for_data:787            # For data, handle permissions & file types788            if member.isreg() or member.islnk():789                if not mode & 0o100:790                    # Clear executable bits if not executable by user791                    mode &= ~0o111792                # Ensure owner can read & write793                mode |= 0o600794            elif member.isdir() or member.issym():795                # Ignore mode for directories & symlinks796                mode = None797            else:798                # Reject special files799                raise SpecialFileError(member)800        if mode != member.mode:801            new_attrs['mode'] = mode802    if for_data:803        # Ignore ownership for 'data'804        if member.uid is not None:805            new_attrs['uid'] = None806        if member.gid is not None:807            new_attrs['gid'] = None808        if member.uname is not None:809            new_attrs['uname'] = None810        if member.gname is not None:811            new_attrs['gname'] = None812        # Check link destination for 'data'813        if member.islnk() or member.issym():814            if os.path.isabs(member.linkname):815                raise AbsoluteLinkError(member)816            normalized = os.path.normpath(member.linkname)817            if normalized != member.linkname:818                new_attrs['linkname'] = normalized819            if member.issym():820                target_path = os.path.join(dest_path,821                                           os.path.dirname(name),822                                           member.linkname)823            else:824                target_path = os.path.join(dest_path,825                                           member.linkname)826            target_path = os.path.realpath(target_path,827                                           strict=os.path.ALLOW_MISSING)828            if os.path.commonpath([target_path, dest_path]) != dest_path:829                raise LinkOutsideDestinationError(member, target_path)830    return new_attrs831 832def fully_trusted_filter(member, dest_path):833    return member834 835def tar_filter(member, dest_path):836    new_attrs = _get_filtered_attrs(member, dest_path, False)837    if new_attrs:838        return member.replace(**new_attrs, deep=False)839    return member840 841def data_filter(member, dest_path):842    new_attrs = _get_filtered_attrs(member, dest_path, True)843    if new_attrs:844        return member.replace(**new_attrs, deep=False)845    return member846 847_NAMED_FILTERS = {848    "fully_trusted": fully_trusted_filter,849    "tar": tar_filter,850    "data": data_filter,851}852 853#------------------854# Exported Classes855#------------------856 857# Sentinel for replace() defaults, meaning "don't change the attribute"858_KEEP = object()859 860# Header length is digits followed by a space.861_header_length_prefix_re = re.compile(br"([0-9]{1,20}) ")862 863class TarInfo(object):864    """Informational class which holds the details about an865       archive member given by a tar header block.866       TarInfo objects are returned by TarFile.getmember(),867       TarFile.getmembers() and TarFile.gettarinfo() and are868       usually created internally.869    """870 871    __slots__ = dict(872        name = 'Name of the archive member.',873        mode = 'Permission bits.',874        uid = 'User ID of the user who originally stored this member.',875        gid = 'Group ID of the user who originally stored this member.',876        size = 'Size in bytes.',877        mtime = 'Time of last modification.',878        chksum = 'Header checksum.',879        type = ('File type. type is usually one of these constants: '880                'REGTYPE, AREGTYPE, LNKTYPE, SYMTYPE, DIRTYPE, FIFOTYPE, '881                'CONTTYPE, CHRTYPE, BLKTYPE, GNUTYPE_SPARSE.'),882        linkname = ('Name of the target file name, which is only present '883                    'in TarInfo objects of type LNKTYPE and SYMTYPE.'),884        uname = 'User name.',885        gname = 'Group name.',886        devmajor = 'Device major number.',887        devminor = 'Device minor number.',888        offset = 'The tar header starts here.',889        offset_data = "The file's data starts here.",890        pax_headers = ('A dictionary containing key-value pairs of an '891                       'associated pax extended header.'),892        sparse = 'Sparse member information.',893        tarfile = None,894        _sparse_structs = None,895        _link_target = None,896        )897 898    def __init__(self, name=""):899        """Construct a TarInfo object. name is the optional name900           of the member.901        """902        self.name = name        # member name903        self.mode = 0o644       # file permissions904        self.uid = 0            # user id905        self.gid = 0            # group id906        self.size = 0           # file size907        self.mtime = 0          # modification time908        self.chksum = 0         # header checksum909        self.type = REGTYPE     # member type910        self.linkname = ""      # link name911        self.uname = ""         # user name912        self.gname = ""         # group name913        self.devmajor = 0       # device major number914        self.devminor = 0       # device minor number915 916        self.offset = 0         # the tar header starts here917        self.offset_data = 0    # the file's data starts here918 919        self.sparse = None      # sparse member information920        self.pax_headers = {}   # pax header information921 922    @property923    def path(self):924        'In pax headers, "name" is called "path".'925        return self.name926 927    @path.setter928    def path(self, name):929        self.name = name930 931    @property932    def linkpath(self):933        'In pax headers, "linkname" is called "linkpath".'934        return self.linkname935 936    @linkpath.setter937    def linkpath(self, linkname):938        self.linkname = linkname939 940    def __repr__(self):941        return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))942 943    def replace(self, *,944                name=_KEEP, mtime=_KEEP, mode=_KEEP, linkname=_KEEP,945                uid=_KEEP, gid=_KEEP, uname=_KEEP, gname=_KEEP,946                deep=True, _KEEP=_KEEP):947        """Return a deep copy of self with the given attributes replaced.948        """949        if deep:950            result = copy.deepcopy(self)951        else:952            result = copy.copy(self)953        if name is not _KEEP:954            result.name = name955        if mtime is not _KEEP:956            result.mtime = mtime957        if mode is not _KEEP:958            result.mode = mode959        if linkname is not _KEEP:960            result.linkname = linkname961        if uid is not _KEEP:962            result.uid = uid963        if gid is not _KEEP:964            result.gid = gid965        if uname is not _KEEP:966            result.uname = uname967        if gname is not _KEEP:968            result.gname = gname969        return result970 971    def get_info(self):972        """Return the TarInfo's attributes as a dictionary.973        """974        if self.mode is None:975            mode = None976        else:977            mode = self.mode & 0o7777978        info = {979            "name":     self.name,980            "mode":     mode,981            "uid":      self.uid,982            "gid":      self.gid,983            "size":     self.size,984            "mtime":    self.mtime,985            "chksum":   self.chksum,986            "type":     self.type,987            "linkname": self.linkname,988            "uname":    self.uname,989            "gname":    self.gname,990            "devmajor": self.devmajor,991            "devminor": self.devminor992        }993 994        if info["type"] == DIRTYPE and not info["name"].endswith("/"):995            info["name"] += "/"996 997        return info998 999    def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="surrogateescape"):1000        """Return a tar header as a string of 512 byte blocks.1001        """1002        info = self.get_info()1003        for name, value in info.items():1004            if value is None:1005                raise ValueError("%s may not be None" % name)1006 1007        if format == USTAR_FORMAT:1008            return self.create_ustar_header(info, encoding, errors)1009        elif format == GNU_FORMAT:1010            return self.create_gnu_header(info, encoding, errors)1011        elif format == PAX_FORMAT:1012            return self.create_pax_header(info, encoding)1013        else:1014            raise ValueError("invalid format")1015 1016    def create_ustar_header(self, info, encoding, errors):1017        """Return the object as a ustar header block.1018        """1019        info["magic"] = POSIX_MAGIC1020 1021        if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:1022            raise ValueError("linkname is too long")1023 1024        if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:1025            info["prefix"], info["name"] = self._posix_split_name(info["name"], encoding, errors)1026 1027        return self._create_header(info, USTAR_FORMAT, encoding, errors)1028 1029    def create_gnu_header(self, info, encoding, errors):1030        """Return the object as a GNU header block sequence.1031        """1032        info["magic"] = GNU_MAGIC1033 1034        buf = b""1035        if len(info["linkname"].encode(encoding, errors)) > LENGTH_LINK:1036            buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK, encoding, errors)1037 1038        if len(info["name"].encode(encoding, errors)) > LENGTH_NAME:1039            buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME, encoding, errors)1040 1041        return buf + self._create_header(info, GNU_FORMAT, encoding, errors)1042 1043    def create_pax_header(self, info, encoding):1044        """Return the object as a ustar header block. If it cannot be1045           represented this way, prepend a pax extended header sequence1046           with supplement information.1047        """1048        info["magic"] = POSIX_MAGIC1049        pax_headers = self.pax_headers.copy()1050 1051        # Test string fields for values that exceed the field length or cannot1052        # be represented in ASCII encoding.1053        for name, hname, length in (1054                ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),1055                ("uname", "uname", 32), ("gname", "gname", 32)):1056 1057            if hname in pax_headers:1058                # The pax header has priority.1059                continue1060 1061            # Try to encode the string as ASCII.1062            try:1063                info[name].encode("ascii", "strict")1064            except UnicodeEncodeError:1065                pax_headers[hname] = info[name]1066                continue1067 1068            if len(info[name]) > length:1069                pax_headers[hname] = info[name]1070 1071        # Test number fields for values that exceed the field limit or values1072        # that like to be stored as float.1073        for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):1074            needs_pax = False1075 1076            val = info[name]1077            val_is_float = isinstance(val, float)1078            val_int = round(val) if val_is_float else val1079            if not 0 <= val_int < 8 ** (digits - 1):1080                # Avoid overflow.1081                info[name] = 01082                needs_pax = True1083            elif val_is_float:1084                # Put rounded value in ustar header, and full1085                # precision value in pax header.1086                info[name] = val_int1087                needs_pax = True1088 1089            # The existing pax header has priority.1090            if needs_pax and name not in pax_headers:1091                pax_headers[name] = str(val)1092 1093        # Create a pax extended header if necessary.1094        if pax_headers:1095            buf = self._create_pax_generic_header(pax_headers, XHDTYPE, encoding)1096        else:1097            buf = b""1098 1099        return buf + self._create_header(info, USTAR_FORMAT, "ascii", "replace")1100 1101    @classmethod1102    def create_pax_global_header(cls, pax_headers):1103        """Return the object as a pax global header block sequence.1104        """1105        return cls._create_pax_generic_header(pax_headers, XGLTYPE, "utf-8")1106 1107    def _posix_split_name(self, name, encoding, errors):1108        """Split a name longer than 100 chars into a prefix1109           and a name part.1110        """1111        components = name.split("/")1112        for i in range(1, len(components)):1113            prefix = "/".join(components[:i])1114            name = "/".join(components[i:])1115            if len(prefix.encode(encoding, errors)) <= LENGTH_PREFIX and \1116                    len(name.encode(encoding, errors)) <= LENGTH_NAME:1117                break1118        else:1119            raise ValueError("name is too long")1120 1121        return prefix, name1122 1123    @staticmethod1124    def _create_header(info, format, encoding, errors):1125        """Return a header block. info is a dictionary with file1126           information, format must be one of the *_FORMAT constants.1127        """1128        has_device_fields = info.get("type") in (CHRTYPE, BLKTYPE)1129        if has_device_fields:1130            devmajor = itn(info.get("devmajor", 0), 8, format)1131            devminor = itn(info.get("devminor", 0), 8, format)1132        else:1133            devmajor = stn("", 8, encoding, errors)1134            devminor = stn("", 8, encoding, errors)1135 1136        # None values in metadata should cause ValueError.1137        # itn()/stn() do this for all fields except type.1138        filetype = info.get("type", REGTYPE)1139        if filetype is None:1140            raise ValueError("TarInfo.type must not be None")1141 1142        parts = [1143            stn(info.get("name", ""), 100, encoding, errors),1144            itn(info.get("mode", 0) & 0o7777, 8, format),1145            itn(info.get("uid", 0), 8, format),1146            itn(info.get("gid", 0), 8, format),1147            itn(info.get("size", 0), 12, format),1148            itn(info.get("mtime", 0), 12, format),1149            b"        ", # checksum field1150            filetype,1151            stn(info.get("linkname", ""), 100, encoding, errors),1152            info.get("magic", POSIX_MAGIC),1153            stn(info.get("uname", ""), 32, encoding, errors),1154            stn(info.get("gname", ""), 32, encoding, errors),1155            devmajor,1156            devminor,1157            stn(info.get("prefix", ""), 155, encoding, errors)1158        ]1159 1160        buf = struct.pack("%ds" % BLOCKSIZE, b"".join(parts))1161        chksum = calc_chksums(buf[-BLOCKSIZE:])[0]1162        buf = buf[:-364] + bytes("%06o\0" % chksum, "ascii") + buf[-357:]1163        return buf1164 1165    @staticmethod1166    def _create_payload(payload):1167        """Return the string payload filled with zero bytes1168           up to the next 512 byte border.1169        """1170        blocks, remainder = divmod(len(payload), BLOCKSIZE)1171        if remainder > 0:1172            payload += (BLOCKSIZE - remainder) * NUL1173        return payload1174 1175    @classmethod1176    def _create_gnu_long_header(cls, name, type, encoding, errors):1177        """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence1178           for name.1179        """1180        name = name.encode(encoding, errors) + NUL1181 1182        info = {}1183        info["name"] = "././@LongLink"1184        info["type"] = type1185        info["size"] = len(name)1186        info["magic"] = GNU_MAGIC1187 1188        # create extended header + name blocks.1189        return cls._create_header(info, USTAR_FORMAT, encoding, errors) + \1190                cls._create_payload(name)1191 1192    @classmethod1193    def _create_pax_generic_header(cls, pax_headers, type, encoding):1194        """Return a POSIX.1-2008 extended or global header sequence1195           that contains a list of keyword, value pairs. The values1196           must be strings.1197        """1198        # Check if one of the fields contains surrogate characters and thereby1199        # forces hdrcharset=BINARY, see _proc_pax() for more information.1200        binary = False1201        for keyword, value in pax_headers.items():1202            try:1203                value.encode("utf-8", "strict")1204            except UnicodeEncodeError:1205                binary = True1206                break1207 1208        records = b""1209        if binary:1210            # Put the hdrcharset field at the beginning of the header.1211            records += b"21 hdrcharset=BINARY\n"1212 1213        for keyword, value in pax_headers.items():1214            keyword = keyword.encode("utf-8")1215            if binary:1216                # Try to restore the original byte representation of `value'.1217                # Needless to say, that the encoding must match the string.1218                value = value.encode(encoding, "surrogateescape")1219            else:1220                value = value.encode("utf-8")1221 1222            l = len(keyword) + len(value) + 3   # ' ' + '=' + '\n'1223            n = p = 01224            while True:1225                n = l + len(str(p))1226                if n == p:1227                    break1228                p = n1229            records += bytes(str(p), "ascii") + b" " + keyword + b"=" + value + b"\n"1230 1231        # We use a hardcoded "././@PaxHeader" name like star does1232        # instead of the one that POSIX recommends.1233        info = {}1234        info["name"] = "././@PaxHeader"1235        info["type"] = type1236        info["size"] = len(records)1237        info["magic"] = POSIX_MAGIC1238 1239        # Create pax header + record blocks.1240        return cls._create_header(info, USTAR_FORMAT, "ascii", "replace") + \1241                cls._create_payload(records)1242 1243    @classmethod1244    def frombuf(cls, buf, encoding, errors):1245        """Construct a TarInfo object from a 512 byte bytes object.1246        """1247        if len(buf) == 0:1248            raise EmptyHeaderError("empty header")1249        if len(buf) != BLOCKSIZE:1250            raise TruncatedHeaderError("truncated header")1251        if buf.count(NUL) == BLOCKSIZE:1252            raise EOFHeaderError("end of file header")1253 1254        chksum = nti(buf[148:156])1255        if chksum not in calc_chksums(buf):1256            raise InvalidHeaderError("bad checksum")1257 1258        obj = cls()1259        obj.name = nts(buf[0:100], encoding, errors)1260        obj.mode = nti(buf[100:108])1261        obj.uid = nti(buf[108:116])1262        obj.gid = nti(buf[116:124])1263        obj.size = nti(buf[124:136])1264        obj.mtime = nti(buf[136:148])1265        obj.chksum = chksum1266        obj.type = buf[156:157]1267        obj.linkname = nts(buf[157:257], encoding, errors)1268        obj.uname = nts(buf[265:297], encoding, errors)1269        obj.gname = nts(buf[297:329], encoding, errors)1270        obj.devmajor = nti(buf[329:337])1271        obj.devminor = nti(buf[337:345])1272        prefix = nts(buf[345:500], encoding, errors)1273 1274        # Old V7 tar format represents a directory as a regular1275        # file with a trailing slash.1276        if obj.type == AREGTYPE and obj.name.endswith("/"):1277            obj.type = DIRTYPE1278 1279        # The old GNU sparse format occupies some of the unused1280        # space in the buffer for up to 4 sparse structures.1281        # Save them for later processing in _proc_sparse().1282        if obj.type == GNUTYPE_SPARSE:1283            pos = 3861284            structs = []1285            for i in range(4):1286                try:1287                    offset = nti(buf[pos:pos + 12])1288                    numbytes = nti(buf[pos + 12:pos + 24])1289                except ValueError:1290                    break1291                structs.append((offset, numbytes))1292                pos += 241293            isextended = bool(buf[482])1294            origsize = nti(buf[483:495])1295            obj._sparse_structs = (structs, isextended, origsize)1296 1297        # Remove redundant slashes from directories.1298        if obj.isdir():1299            obj.name = obj.name.rstrip("/")1300 1301        # Reconstruct a ustar longname.1302        if prefix and obj.type not in GNU_TYPES:1303            obj.name = prefix + "/" + obj.name1304        return obj1305 1306    @classmethod1307    def fromtarfile(cls, tarfile):1308        """Return the next TarInfo object from TarFile object1309           tarfile.1310        """1311        buf = tarfile.fileobj.read(BLOCKSIZE)1312        obj = cls.frombuf(buf, tarfile.encoding, tarfile.errors)1313        obj.offset = tarfile.fileobj.tell() - BLOCKSIZE1314        return obj._proc_member(tarfile)1315 1316    #--------------------------------------------------------------------------1317    # The following are methods that are called depending on the type of a1318    # member. The entry point is _proc_member() which can be overridden in a1319    # subclass to add custom _proc_*() methods. A _proc_*() method MUST1320    # implement the following1321    # operations:1322    # 1. Set self.offset_data to the position where the data blocks begin,1323    #    if there is data that follows.1324    # 2. Set tarfile.offset to the position where the next member's header will1325    #    begin.1326    # 3. Return self or another valid TarInfo object.1327    def _proc_member(self, tarfile):1328        """Choose the right processing method depending on1329           the type and call it.1330        """1331        if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):1332            return self._proc_gnulong(tarfile)1333        elif self.type == GNUTYPE_SPARSE:1334            return self._proc_sparse(tarfile)1335        elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):1336            return self._proc_pax(tarfile)1337        else:1338            return self._proc_builtin(tarfile)1339 1340    def _proc_builtin(self, tarfile):1341        """Process a builtin type or an unknown type which1342           will be treated as a regular file.1343        """1344        self.offset_data = tarfile.fileobj.tell()1345        offset = self.offset_data1346        if self.isreg() or self.type not in SUPPORTED_TYPES:1347            # Skip the following data blocks.1348            offset += self._block(self.size)1349        tarfile.offset = offset1350 1351        # Patch the TarInfo object with saved global1352        # header information.1353        self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)1354 1355        return self1356 1357    def _proc_gnulong(self, tarfile):1358        """Process the blocks that hold a GNU longname1359           or longlink member.1360        """1361        buf = tarfile.fileobj.read(self._block(self.size))1362 1363        # Fetch the next header and process it.1364        try:1365            next = self.fromtarfile(tarfile)1366        except HeaderError:1367            raise SubsequentHeaderError("missing or bad subsequent header")1368 1369        # Patch the TarInfo object from the next header with1370        # the longname information.1371        next.offset = self.offset1372        if self.type == GNUTYPE_LONGNAME:1373            next.name = nts(buf, tarfile.encoding, tarfile.errors)1374        elif self.type == GNUTYPE_LONGLINK:1375            next.linkname = nts(buf, tarfile.encoding, tarfile.errors)1376 1377        return next1378 1379    def _proc_sparse(self, tarfile):1380        """Process a GNU sparse header plus extra headers.1381        """1382        # We already collected some sparse structures in frombuf().1383        structs, isextended, origsize = self._sparse_structs1384        del self._sparse_structs1385 1386        # Collect sparse structures from extended header blocks.1387        while isextended:1388            buf = tarfile.fileobj.read(BLOCKSIZE)1389            pos = 01390            for i in range(21):1391                try:1392                    offset = nti(buf[pos:pos + 12])1393                    numbytes = nti(buf[pos + 12:pos + 24])1394                except ValueError:1395                    break1396                if offset and numbytes:1397                    structs.append((offset, numbytes))1398                pos += 241399            isextended = bool(buf[504])1400        self.sparse = structs1401 1402        self.offset_data = tarfile.fileobj.tell()1403        tarfile.offset = self.offset_data + self._block(self.size)1404        self.size = origsize1405        return self1406 1407    def _proc_pax(self, tarfile):1408        """Process an extended or global header as described in1409           POSIX.1-2008.1410        """1411        # Read the header information.1412        buf = tarfile.fileobj.read(self._block(self.size))1413 1414        # A pax header stores supplemental information for either1415        # the following file (extended) or all following files1416        # (global).1417        if self.type == XGLTYPE:1418            pax_headers = tarfile.pax_headers1419        else:1420            pax_headers = tarfile.pax_headers.copy()1421 1422        # Parse pax header information. A record looks like that:1423        # "%d %s=%s\n" % (length, keyword, value). length is the size1424        # of the complete record including the length field itself and1425        # the newline.1426        pos = 01427        encoding = None1428        raw_headers = []1429        while len(buf) > pos and buf[pos] != 0x00:1430            if not (match := _header_length_prefix_re.match(buf, pos)):1431                raise InvalidHeaderError("invalid header")1432            try:1433                length = int(match.group(1))1434            except ValueError:1435                raise InvalidHeaderError("invalid header")1436            # Headers must be at least 5 bytes, shortest being '5 x=\n'.1437            # Value is allowed to be empty.1438            if length < 5:1439                raise InvalidHeaderError("invalid header")1440            if pos + length > len(buf):1441                raise InvalidHeaderError("invalid header")1442 1443            header_value_end_offset = match.start(1) + length - 1  # Last byte of the header1444            keyword_and_value = buf[match.end(1) + 1:header_value_end_offset]1445            raw_keyword, equals, raw_value = keyword_and_value.partition(b"=")1446 1447            # Check the framing of the header. The last character must be '\n' (0x0A)1448            if not raw_keyword or equals != b"=" or buf[header_value_end_offset] != 0x0A:1449                raise InvalidHeaderError("invalid header")1450            raw_headers.append((length, raw_keyword, raw_value))1451 1452            # Check if the pax header contains a hdrcharset field. This tells us1453            # the encoding of the path, linkpath, uname and gname fields. Normally,1454            # these fields are UTF-8 encoded but since POSIX.1-2008 tar1455            # implementations are allowed to store them as raw binary strings if1456            # the translation to UTF-8 fails. For the time being, we don't care about1457            # anything other than "BINARY". The only other value that is currently1458            # allowed by the standard is "ISO-IR 10646 2000 UTF-8" in other words UTF-8.1459            # Note that we only follow the initial 'hdrcharset' setting to preserve1460            # the initial behavior of the 'tarfile' module.1461            if raw_keyword == b"hdrcharset" and encoding is None:1462                if raw_value == b"BINARY":1463                    encoding = tarfile.encoding1464                else:  # This branch ensures only the first 'hdrcharset' header is used.1465                    encoding = "utf-8"1466 1467            pos += length1468 1469        # If no explicit hdrcharset is set, we use UTF-8 as a default.1470        if encoding is None:1471            encoding = "utf-8"1472 1473        # After parsing the raw headers we can decode them to text.1474        for length, raw_keyword, raw_value in raw_headers:1475            # Normally, we could just use "utf-8" as the encoding and "strict"1476            # as the error handler, but we better not take the risk. For1477            # example, GNU tar <= 1.23 is known to store filenames it cannot1478            # translate to UTF-8 as raw strings (unfortunately without a1479            # hdrcharset=BINARY header).1480            # We first try the strict standard encoding, and if that fails we1481            # fall back on the user's encoding and error handler.1482            keyword = self._decode_pax_field(raw_keyword, "utf-8", "utf-8",1483                    tarfile.errors)1484            if keyword in PAX_NAME_FIELDS:1485                value = self._decode_pax_field(raw_value, encoding, tarfile.encoding,1486                        tarfile.errors)1487            else:1488                value = self._decode_pax_field(raw_value, "utf-8", "utf-8",1489                        tarfile.errors)1490 1491            pax_headers[keyword] = value1492 1493        # Fetch the next header.1494        try:1495            next = self.fromtarfile(tarfile)1496        except HeaderError:1497            raise SubsequentHeaderError("missing or bad subsequent header")1498 1499        # Process GNU sparse information.1500        if "GNU.sparse.map" in pax_headers:1501            # GNU extended sparse format version 0.1.1502            self._proc_gnusparse_01(next, pax_headers)1503 1504        elif "GNU.sparse.size" in pax_headers:1505            # GNU extended sparse format version 0.0.1506            self._proc_gnusparse_00(next, raw_headers)1507 1508        elif pax_headers.get("GNU.sparse.major") == "1" and pax_headers.get("GNU.sparse.minor") == "0":1509            # GNU extended sparse format version 1.0.1510            self._proc_gnusparse_10(next, pax_headers, tarfile)1511 1512        if self.type in (XHDTYPE, SOLARIS_XHDTYPE):1513            # Patch the TarInfo object with the extended header info.1514            next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)1515            next.offset = self.offset1516 1517            if "size" in pax_headers:1518                # If the extended header replaces the size field,1519                # we need to recalculate the offset where the next1520                # header starts.1521                offset = next.offset_data1522                if next.isreg() or next.type not in SUPPORTED_TYPES:1523                    offset += next._block(next.size)1524                tarfile.offset = offset1525 1526        return next1527 1528    def _proc_gnusparse_00(self, next, raw_headers):1529        """Process a GNU tar extended sparse header, version 0.0.1530        """1531        offsets = []1532        numbytes = []1533        for _, keyword, value in raw_headers:1534            if keyword == b"GNU.sparse.offset":1535                try:1536                    offsets.append(int(value.decode()))1537                except ValueError:1538                    raise InvalidHeaderError("invalid header")1539 1540            elif keyword == b"GNU.sparse.numbytes":1541                try:1542                    numbytes.append(int(value.decode()))1543                except ValueError:1544                    raise InvalidHeaderError("invalid header")1545 1546        next.sparse = list(zip(offsets, numbytes))1547 1548    def _proc_gnusparse_01(self, next, pax_headers):1549        """Process a GNU tar extended sparse header, version 0.1.1550        """1551        sparse = [int(x) for x in pax_headers["GNU.sparse.map"].split(",")]1552        next.sparse = list(zip(sparse[::2], sparse[1::2]))1553 1554    def _proc_gnusparse_10(self, next, pax_headers, tarfile):1555        """Process a GNU tar extended sparse header, version 1.0.1556        """1557        fields = None1558        sparse = []1559        buf = tarfile.fileobj.read(BLOCKSIZE)1560        fields, buf = buf.split(b"\n", 1)1561        fields = int(fields)1562        while len(sparse) < fields * 2:1563            if b"\n" not in buf:1564                buf += tarfile.fileobj.read(BLOCKSIZE)1565            number, buf = buf.split(b"\n", 1)1566            sparse.append(int(number))1567        next.offset_data = tarfile.fileobj.tell()1568        next.sparse = list(zip(sparse[::2], sparse[1::2]))1569 1570    def _apply_pax_info(self, pax_headers, encoding, errors):1571        """Replace fields with supplemental information from a previous1572           pax extended or global header.1573        """1574        for keyword, value in pax_headers.items():1575            if keyword == "GNU.sparse.name":1576                setattr(self, "path", value)1577            elif keyword == "GNU.sparse.size":1578                setattr(self, "size", int(value))1579            elif keyword == "GNU.sparse.realsize":1580                setattr(self, "size", int(value))1581            elif keyword in PAX_FIELDS:1582                if keyword in PAX_NUMBER_FIELDS:1583                    try:1584                        value = PAX_NUMBER_FIELDS[keyword](value)1585                    except ValueError:1586                        value = 01587                if keyword == "path":1588                    value = value.rstrip("/")1589                setattr(self, keyword, value)1590 1591        self.pax_headers = pax_headers.copy()1592 1593    def _decode_pax_field(self, value, encoding, fallback_encoding, fallback_errors):1594        """Decode a single field from a pax record.1595        """1596        try:1597            return value.decode(encoding, "strict")1598        except UnicodeDecodeError:1599            return value.decode(fallback_encoding, fallback_errors)1600 1601    def _block(self, count):1602        """Round up a byte count by BLOCKSIZE and return it,1603           e.g. _block(834) => 1024.1604        """1605        # Only non-negative offsets are allowed1606        if count < 0:1607            raise InvalidHeaderError("invalid offset")1608        blocks, remainder = divmod(count, BLOCKSIZE)1609        if remainder:1610            blocks += 11611        return blocks * BLOCKSIZE1612 1613    def isreg(self):1614        'Return True if the Tarinfo object is a regular file.'1615        return self.type in REGULAR_TYPES1616 1617    def isfile(self):1618        'Return True if the Tarinfo object is a regular file.'1619        return self.isreg()1620 1621    def isdir(self):1622        'Return True if it is a directory.'1623        return self.type == DIRTYPE1624 1625    def issym(self):1626        'Return True if it is a symbolic link.'1627        return self.type == SYMTYPE1628 1629    def islnk(self):1630        'Return True if it is a hard link.'1631        return self.type == LNKTYPE1632 1633    def ischr(self):1634        'Return True if it is a character device.'1635        return self.type == CHRTYPE1636 1637    def isblk(self):1638        'Return True if it is a block device.'1639        return self.type == BLKTYPE1640 1641    def isfifo(self):1642        'Return True if it is a FIFO.'1643        return self.type == FIFOTYPE1644 1645    def issparse(self):1646        return self.sparse is not None1647 1648    def isdev(self):1649        'Return True if it is one of character device, block device or FIFO.'1650        return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)1651# class TarInfo1652 1653class TarFile(object):1654    """The TarFile Class provides an interface to tar archives.1655    """1656 1657    debug = 0                   # May be set from 0 (no msgs) to 3 (all msgs)1658 1659    dereference = False         # If true, add content of linked file to the1660                                # tar file, else the link.1661 1662    ignore_zeros = False        # If true, skips empty or invalid blocks and1663                                # continues processing.1664 1665    errorlevel = 1              # If 0, fatal errors only appear in debug1666                                # messages (if debug >= 0). If > 0, errors1667                                # are passed to the caller as exceptions.1668 1669    format = DEFAULT_FORMAT     # The format to use when creating an archive.1670 1671    encoding = ENCODING         # Encoding for 8-bit character strings.1672 1673    errors = None               # Error handler for unicode conversion.1674 1675    tarinfo = TarInfo           # The default TarInfo class to use.1676 1677    fileobject = ExFileObject   # The file-object for extractfile().1678 1679    extraction_filter = None    # The default filter for extraction.1680 1681    def __init__(self, name=None, mode="r", fileobj=None, format=None,1682            tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,1683            errors="surrogateescape", pax_headers=None, debug=None,1684            errorlevel=None, copybufsize=None):1685        """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to1686           read from an existing archive, 'a' to append data to an existing1687           file or 'w' to create a new file overwriting an existing one. `mode'1688           defaults to 'r'.1689           If `fileobj' is given, it is used for reading or writing data. If it1690           can be determined, `mode' is overridden by `fileobj's mode.1691           `fileobj' is not closed, when TarFile is closed.1692        """1693        modes = {"r": "rb", "a": "r+b", "w": "wb", "x": "xb"}1694        if mode not in modes:1695            raise ValueError("mode must be 'r', 'a', 'w' or 'x'")1696        self.mode = mode1697        self._mode = modes[mode]1698 1699        if not fileobj:1700            if self.mode == "a" and not os.path.exists(name):1701                # Create nonexistent files in append mode.1702                self.mode = "w"1703                self._mode = "wb"1704            fileobj = bltn_open(name, self._mode)1705            self._extfileobj = False1706        else:1707            if (name is None and hasattr(fileobj, "name") and1708                isinstance(fileobj.name, (str, bytes))):1709                name = fileobj.name1710            if hasattr(fileobj, "mode"):1711                self._mode = fileobj.mode1712            self._extfileobj = True1713        self.name = os.path.abspath(name) if name else None1714        self.fileobj = fileobj1715 1716        # Init attributes.1717        if format is not None:1718            self.format = format1719        if tarinfo is not None:1720            self.tarinfo = tarinfo1721        if dereference is not None:1722            self.dereference = dereference1723        if ignore_zeros is not None:1724            self.ignore_zeros = ignore_zeros1725        if encoding is not None:1726            self.encoding = encoding1727        self.errors = errors1728 1729        if pax_headers is not None and self.format == PAX_FORMAT:1730            self.pax_headers = pax_headers1731        else:1732            self.pax_headers = {}1733 1734        if debug is not None:1735            self.debug = debug1736        if errorlevel is not None:1737            self.errorlevel = errorlevel1738 1739        # Init datastructures.1740        self.copybufsize = copybufsize1741        self.closed = False1742        self.members = []       # list of members as TarInfo objects1743        self._loaded = False    # flag if all members have been read1744        self.offset = self.fileobj.tell()1745                                # current position in the archive file1746        self.inodes = {}        # dictionary caching the inodes of1747                                # archive members already added1748 1749        try:1750            if self.mode == "r":1751                self.firstmember = None1752                self.firstmember = self.next()1753 1754            if self.mode == "a":1755                # Move to the end of the archive,1756                # before the first empty block.1757                while True:1758                    self.fileobj.seek(self.offset)1759                    try:1760                        tarinfo = self.tarinfo.fromtarfile(self)1761                        self.members.append(tarinfo)1762                    except EOFHeaderError:1763                        self.fileobj.seek(self.offset)1764                        break1765                    except HeaderError as e:1766                        raise ReadError(str(e))1767 1768            if self.mode in ("a", "w", "x"):1769                self._loaded = True1770 1771                if self.pax_headers:1772                    buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())1773                    self.fileobj.write(buf)1774                    self.offset += len(buf)1775        except:1776            if not self._extfileobj:1777                self.fileobj.close()1778            self.closed = True1779            raise1780 1781    #--------------------------------------------------------------------------1782    # Below are the classmethods which act as alternate constructors to the1783    # TarFile class. The open() method is the only one that is needed for1784    # public use; it is the "super"-constructor and is able to select an1785    # adequate "sub"-constructor for a particular compression using the mapping1786    # from OPEN_METH.1787    #1788    # This concept allows one to subclass TarFile without losing the comfort of1789    # the super-constructor. A sub-constructor is registered and made available1790    # by adding it to the mapping in OPEN_METH.1791 1792    @classmethod1793    def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):1794        """Open a tar archive for reading, writing or appending. Return1795           an appropriate TarFile class.1796 1797           mode:1798           'r' or 'r:*' open for reading with transparent compression1799           'r:'         open for reading exclusively uncompressed1800           'r:gz'       open for reading with gzip compression1801           'r:bz2'      open for reading with bzip2 compression1802           'r:xz'       open for reading with lzma compression1803           'a' or 'a:'  open for appending, creating the file if necessary1804           'w' or 'w:'  open for writing without compression1805           'w:gz'       open for writing with gzip compression1806           'w:bz2'      open for writing with bzip2 compression1807           'w:xz'       open for writing with lzma compression1808 1809           'x' or 'x:'  create a tarfile exclusively without compression, raise1810                        an exception if the file is already created1811           'x:gz'       create a gzip compressed tarfile, raise an exception1812                        if the file is already created1813           'x:bz2'      create a bzip2 compressed tarfile, raise an exception1814                        if the file is already created1815           'x:xz'       create an lzma compressed tarfile, raise an exception1816                        if the file is already created1817 1818           'r|*'        open a stream of tar blocks with transparent compression1819           'r|'         open an uncompressed stream of tar blocks for reading1820           'r|gz'       open a gzip compressed stream of tar blocks1821           'r|bz2'      open a bzip2 compressed stream of tar blocks1822           'r|xz'       open an lzma compressed stream of tar blocks1823           'w|'         open an uncompressed stream for writing1824           'w|gz'       open a gzip compressed stream for writing1825           'w|bz2'      open a bzip2 compressed stream for writing1826           'w|xz'       open an lzma compressed stream for writing1827        """1828 1829        if not name and not fileobj:1830            raise ValueError("nothing to open")1831 1832        if mode in ("r", "r:*"):1833            # Find out which *open() is appropriate for opening the file.1834            def not_compressed(comptype):1835                return cls.OPEN_METH[comptype] == 'taropen'1836            for comptype in sorted(cls.OPEN_METH, key=not_compressed):1837                func = getattr(cls, cls.OPEN_METH[comptype])1838                if fileobj is not None:1839                    saved_pos = fileobj.tell()1840                try:1841                    return func(name, "r", fileobj, **kwargs)1842                except (ReadError, CompressionError):1843                    if fileobj is not None:1844                        fileobj.seek(saved_pos)1845                    continue1846            raise ReadError("file could not be opened successfully")1847 1848        elif ":" in mode:1849            filemode, comptype = mode.split(":", 1)1850            filemode = filemode or "r"1851            comptype = comptype or "tar"1852 1853            # Select the *open() function according to1854            # given compression.1855            if comptype in cls.OPEN_METH:1856                func = getattr(cls, cls.OPEN_METH[comptype])1857            else:1858                raise CompressionError("unknown compression type %r" % comptype)1859            return func(name, filemode, fileobj, **kwargs)1860 1861        elif "|" in mode:1862            filemode, comptype = mode.split("|", 1)1863            filemode = filemode or "r"1864            comptype = comptype or "tar"1865 1866            if filemode not in ("r", "w"):1867                raise ValueError("mode must be 'r' or 'w'")1868 1869            stream = _Stream(name, filemode, comptype, fileobj, bufsize)1870            try:1871                t = cls(name, filemode, stream, **kwargs)1872            except:1873                stream.close()1874                raise1875            t._extfileobj = False1876            return t1877 1878        elif mode in ("a", "w", "x"):1879            return cls.taropen(name, mode, fileobj, **kwargs)1880 1881        raise ValueError("undiscernible mode")1882 1883    @classmethod1884    def taropen(cls, name, mode="r", fileobj=None, **kwargs):1885        """Open uncompressed tar archive name for reading or writing.1886        """1887        if mode not in ("r", "a", "w", "x"):1888            raise ValueError("mode must be 'r', 'a', 'w' or 'x'")1889        return cls(name, mode, fileobj, **kwargs)1890 1891    @classmethod1892    def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):1893        """Open gzip compressed tar archive name for reading or writing.1894           Appending is not allowed.1895        """1896        if mode not in ("r", "w", "x"):1897            raise ValueError("mode must be 'r', 'w' or 'x'")1898 1899        try:1900            from gzip import GzipFile1901        except ImportError:1902            raise CompressionError("gzip module is not available")1903 1904        try:1905            fileobj = GzipFile(name, mode + "b", compresslevel, fileobj)1906        except OSError:1907            if fileobj is not None and mode == 'r':1908                raise ReadError("not a gzip file")1909            raise1910 1911        try:1912            t = cls.taropen(name, mode, fileobj, **kwargs)1913        except OSError:1914            fileobj.close()1915            if mode == 'r':1916                raise ReadError("not a gzip file")1917            raise1918        except:1919            fileobj.close()1920            raise1921        t._extfileobj = False1922        return t1923 1924    @classmethod1925    def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):1926        """Open bzip2 compressed tar archive name for reading or writing.1927           Appending is not allowed.1928        """1929        if mode not in ("r", "w", "x"):1930            raise ValueError("mode must be 'r', 'w' or 'x'")1931 1932        try:1933            from bz2 import BZ2File1934        except ImportError:1935            raise CompressionError("bz2 module is not available")1936 1937        fileobj = BZ2File(fileobj or name, mode, compresslevel=compresslevel)1938 1939        try:1940            t = cls.taropen(name, mode, fileobj, **kwargs)1941        except (OSError, EOFError):1942            fileobj.close()1943            if mode == 'r':1944                raise ReadError("not a bzip2 file")1945            raise1946        except:1947            fileobj.close()1948            raise1949        t._extfileobj = False1950        return t1951 1952    @classmethod1953    def xzopen(cls, name, mode="r", fileobj=None, preset=None, **kwargs):1954        """Open lzma compressed tar archive name for reading or writing.1955           Appending is not allowed.1956        """1957        if mode not in ("r", "w", "x"):1958            raise ValueError("mode must be 'r', 'w' or 'x'")1959 1960        try:1961            from lzma import LZMAFile, LZMAError1962        except ImportError:1963            raise CompressionError("lzma module is not available")1964 1965        fileobj = LZMAFile(fileobj or name, mode, preset=preset)1966 1967        try:1968            t = cls.taropen(name, mode, fileobj, **kwargs)1969        except (LZMAError, EOFError):1970            fileobj.close()1971            if mode == 'r':1972                raise ReadError("not an lzma file")1973            raise1974        except:1975            fileobj.close()1976            raise1977        t._extfileobj = False1978        return t1979 1980    # All *open() methods are registered here.1981    OPEN_METH = {1982        "tar": "taropen",   # uncompressed tar1983        "gz":  "gzopen",    # gzip compressed tar1984        "bz2": "bz2open",   # bzip2 compressed tar1985        "xz":  "xzopen"     # lzma compressed tar1986    }1987 1988    #--------------------------------------------------------------------------1989    # The public methods which TarFile provides:1990 1991    def close(self):1992        """Close the TarFile. In write-mode, two finishing zero blocks are1993           appended to the archive.1994        """1995        if self.closed:1996            return1997 1998        self.closed = True1999        try:2000            if self.mode in ("a", "w", "x"):2001                self.fileobj.write(NUL * (BLOCKSIZE * 2))2002                self.offset += (BLOCKSIZE * 2)2003                # fill up the end with zero-blocks2004                # (like option -b20 for tar does)2005                blocks, remainder = divmod(self.offset, RECORDSIZE)2006                if remainder > 0:2007                    self.fileobj.write(NUL * (RECORDSIZE - remainder))2008        finally:2009            if not self._extfileobj:2010                self.fileobj.close()2011 2012    def getmember(self, name):2013        """Return a TarInfo object for member `name'. If `name' can not be2014           found in the archive, KeyError is raised. If a member occurs more2015           than once in the archive, its last occurrence is assumed to be the2016           most up-to-date version.2017        """2018        tarinfo = self._getmember(name.rstrip('/'))2019        if tarinfo is None:2020            raise KeyError("filename %r not found" % name)2021        return tarinfo2022 2023    def getmembers(self):2024        """Return the members of the archive as a list of TarInfo objects. The2025           list has the same order as the members in the archive.2026        """2027        self._check()2028        if not self._loaded:    # if we want to obtain a list of2029            self._load()        # all members, we first have to2030                                # scan the whole archive.2031        return self.members2032 2033    def getnames(self):2034        """Return the members of the archive as a list of their names. It has2035           the same order as the list returned by getmembers().2036        """2037        return [tarinfo.name for tarinfo in self.getmembers()]2038 2039    def gettarinfo(self, name=None, arcname=None, fileobj=None):2040        """Create a TarInfo object from the result of os.stat or equivalent2041           on an existing file. The file is either named by `name', or2042           specified as a file object `fileobj' with a file descriptor. If2043           given, `arcname' specifies an alternative name for the file in the2044           archive, otherwise, the name is taken from the 'name' attribute of2045           'fileobj', or the 'name' argument. The name should be a text2046           string.2047        """2048        self._check("awx")2049 2050        # When fileobj is given, replace name by2051        # fileobj's real name.2052        if fileobj is not None:2053            name = fileobj.name2054 2055        # Building the name of the member in the archive.2056        # Backward slashes are converted to forward slashes,2057        # Absolute paths are turned to relative paths.2058        if arcname is None:2059            arcname = name2060        drv, arcname = os.path.splitdrive(arcname)2061        arcname = arcname.replace(os.sep, "/")2062        arcname = arcname.lstrip("/")2063 2064        # Now, fill the TarInfo object with2065        # information specific for the file.2066        tarinfo = self.tarinfo()2067        tarinfo.tarfile = self  # Not needed2068 2069        # Use os.stat or os.lstat, depending on if symlinks shall be resolved.2070        if fileobj is None:2071            if not self.dereference:2072                statres = os.lstat(name)2073            else:2074                statres = os.stat(name)2075        else:2076            statres = os.fstat(fileobj.fileno())2077        linkname = ""2078 2079        stmd = statres.st_mode2080        if stat.S_ISREG(stmd):2081            inode = (statres.st_ino, statres.st_dev)2082            if not self.dereference and statres.st_nlink > 1 and \2083                    inode in self.inodes and arcname != self.inodes[inode]:2084                # Is it a hardlink to an already2085                # archived file?2086                type = LNKTYPE2087                linkname = self.inodes[inode]2088            else:2089                # The inode is added only if its valid.2090                # For win32 it is always 0.2091                type = REGTYPE2092                if inode[0]:2093                    self.inodes[inode] = arcname2094        elif stat.S_ISDIR(stmd):2095            type = DIRTYPE2096        elif stat.S_ISFIFO(stmd):2097            type = FIFOTYPE2098        elif stat.S_ISLNK(stmd):2099            type = SYMTYPE2100            linkname = os.readlink(name)2101        elif stat.S_ISCHR(stmd):2102            type = CHRTYPE2103        elif stat.S_ISBLK(stmd):2104            type = BLKTYPE2105        else:2106            return None2107 2108        # Fill the TarInfo object with all2109        # information we can get.2110        tarinfo.name = arcname2111        tarinfo.mode = stmd2112        tarinfo.uid = statres.st_uid2113        tarinfo.gid = statres.st_gid2114        if type == REGTYPE:2115            tarinfo.size = statres.st_size2116        else:2117            tarinfo.size = 02118        tarinfo.mtime = statres.st_mtime2119        tarinfo.type = type2120        tarinfo.linkname = linkname2121        if pwd:2122            try:2123                tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]2124            except KeyError:2125                pass2126        if grp:2127            try:2128                tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]2129            except KeyError:2130                pass2131 2132        if type in (CHRTYPE, BLKTYPE):2133            if hasattr(os, "major") and hasattr(os, "minor"):2134                tarinfo.devmajor = os.major(statres.st_rdev)2135                tarinfo.devminor = os.minor(statres.st_rdev)2136        return tarinfo2137 2138    def list(self, verbose=True, *, members=None):2139        """Print a table of contents to sys.stdout. If `verbose' is False, only2140           the names of the members are printed. If it is True, an `ls -l'-like2141           output is produced. `members' is optional and must be a subset of the2142           list returned by getmembers().2143        """2144        self._check()2145 2146        if members is None:2147            members = self2148        for tarinfo in members:2149            if verbose:2150                if tarinfo.mode is None:2151                    _safe_print("??????????")2152                else:2153                    _safe_print(stat.filemode(tarinfo.mode))2154                _safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,2155                                       tarinfo.gname or tarinfo.gid))2156                if tarinfo.ischr() or tarinfo.isblk():2157                    _safe_print("%10s" %2158                            ("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))2159                else:2160                    _safe_print("%10d" % tarinfo.size)2161                if tarinfo.mtime is None:2162                    _safe_print("????-??-?? ??:??:??")2163                else:2164                    _safe_print("%d-%02d-%02d %02d:%02d:%02d" \2165                                % time.localtime(tarinfo.mtime)[:6])2166 2167            _safe_print(tarinfo.name + ("/" if tarinfo.isdir() else ""))2168 2169            if verbose:2170                if tarinfo.issym():2171                    _safe_print("-> " + tarinfo.linkname)2172                if tarinfo.islnk():2173                    _safe_print("link to " + tarinfo.linkname)2174            print()2175 2176    def add(self, name, arcname=None, recursive=True, *, filter=None):2177        """Add the file `name' to the archive. `name' may be any type of file2178           (directory, fifo, symbolic link, etc.). If given, `arcname'2179           specifies an alternative name for the file in the archive.2180           Directories are added recursively by default. This can be avoided by2181           setting `recursive' to False. `filter' is a function2182           that expects a TarInfo object argument and returns the changed2183           TarInfo object, if it returns None the TarInfo object will be2184           excluded from the archive.2185        """2186        self._check("awx")2187 2188        if arcname is None:2189            arcname = name2190 2191        # Skip if somebody tries to archive the archive...2192        if self.name is not None and os.path.abspath(name) == self.name:2193            self._dbg(2, "tarfile: Skipped %r" % name)2194            return2195 2196        self._dbg(1, name)2197 2198        # Create a TarInfo object from the file.2199        tarinfo = self.gettarinfo(name, arcname)2200 2201        if tarinfo is None:2202            self._dbg(1, "tarfile: Unsupported type %r" % name)2203            return2204 2205        # Change or exclude the TarInfo object.2206        if filter is not None:2207            tarinfo = filter(tarinfo)2208            if tarinfo is None:2209                self._dbg(2, "tarfile: Excluded %r" % name)2210                return2211 2212        # Append the tar header and data to the archive.2213        if tarinfo.isreg():2214            with bltn_open(name, "rb") as f:2215                self.addfile(tarinfo, f)2216 2217        elif tarinfo.isdir():2218            self.addfile(tarinfo)2219            if recursive:2220                for f in sorted(os.listdir(name)):2221                    self.add(os.path.join(name, f), os.path.join(arcname, f),2222                            recursive, filter=filter)2223 2224        else:2225            self.addfile(tarinfo)2226 2227    def addfile(self, tarinfo, fileobj=None):2228        """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is2229           given, it should be a binary file, and tarinfo.size bytes are read2230           from it and added to the archive. You can create TarInfo objects2231           directly, or by using gettarinfo().2232        """2233        self._check("awx")2234 2235        tarinfo = copy.copy(tarinfo)2236 2237        buf = tarinfo.tobuf(self.format, self.encoding, self.errors)2238        self.fileobj.write(buf)2239        self.offset += len(buf)2240        bufsize=self.copybufsize2241        # If there's data to follow, append it.2242        if fileobj is not None:2243            copyfileobj(fileobj, self.fileobj, tarinfo.size, bufsize=bufsize)2244            blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)2245            if remainder > 0:2246                self.fileobj.write(NUL * (BLOCKSIZE - remainder))2247                blocks += 12248            self.offset += blocks * BLOCKSIZE2249 2250        self.members.append(tarinfo)2251 2252    def _get_filter_function(self, filter):2253        if filter is None:2254            filter = self.extraction_filter2255            if filter is None:2256                return fully_trusted_filter2257            if isinstance(filter, str):2258                raise TypeError(2259                    'String names are not supported for '2260                    + 'TarFile.extraction_filter. Use a function such as '2261                    + 'tarfile.data_filter directly.')2262            return filter2263        if callable(filter):2264            return filter2265        try:2266            return _NAMED_FILTERS[filter]2267        except KeyError:2268            raise ValueError(f"filter {filter!r} not found") from None2269 2270    def extractall(self, path=".", members=None, *, numeric_owner=False,2271                   filter=None):2272        """Extract all members from the archive to the current working2273           directory and set owner, modification time and permissions on2274           directories afterwards. `path' specifies a different directory2275           to extract to. `members' is optional and must be a subset of the2276           list returned by getmembers(). If `numeric_owner` is True, only2277           the numbers for user/group names are used and not the names.2278 2279           The `filter` function will be called on each member just2280           before extraction.2281           It can return a changed TarInfo or None to skip the member.2282           String names of common filters are accepted.2283        """2284        directories = []2285 2286        filter_function = self._get_filter_function(filter)2287        if members is None:2288            members = self2289 2290        for member in members:2291            tarinfo, unfiltered = self._get_extract_tarinfo(2292                member, filter_function, path)2293            if tarinfo is None:2294                continue2295            if tarinfo.isdir():2296                # For directories, delay setting attributes until later,2297                # since permissions can interfere with extraction and2298                # extracting contents can reset mtime.2299                directories.append(unfiltered)2300            self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(),2301                              numeric_owner=numeric_owner,2302                              filter_function=filter_function)2303 2304        # Reverse sort directories.2305        directories.sort(key=lambda a: a.name, reverse=True)2306 2307 2308        # Set correct owner, mtime and filemode on directories.2309        for unfiltered in directories:2310            try:2311                # Need to re-apply any filter, to take the *current* filesystem2312                # state into account.2313                try:2314                    tarinfo = filter_function(unfiltered, path)2315                except _FILTER_ERRORS as exc:2316                    self._log_no_directory_fixup(unfiltered, repr(exc))2317                    continue2318                if tarinfo is None:2319                    self._log_no_directory_fixup(unfiltered,2320                                                 'excluded by filter')2321                    continue2322                dirpath = os.path.join(path, tarinfo.name)2323                try:2324                    lstat = os.lstat(dirpath)2325                except FileNotFoundError:2326                    self._log_no_directory_fixup(tarinfo, 'missing')2327                    continue2328                if not stat.S_ISDIR(lstat.st_mode):2329                    # This is no longer a directory; presumably a later2330                    # member overwrote the entry.2331                    self._log_no_directory_fixup(tarinfo, 'not a directory')2332                    continue2333                self.chown(tarinfo, dirpath, numeric_owner=numeric_owner)2334                self.utime(tarinfo, dirpath)2335                self.chmod(tarinfo, dirpath)2336            except ExtractError as e:2337                self._handle_nonfatal_error(e)2338 2339    def _log_no_directory_fixup(self, member, reason):2340        self._dbg(2, "tarfile: Not fixing up directory %r (%s)" %2341                  (member.name, reason))2342 2343    def extract(self, member, path="", set_attrs=True, *, numeric_owner=False,2344                filter=None):2345        """Extract a member from the archive to the current working directory,2346           using its full name. Its file information is extracted as accurately2347           as possible. `member' may be a filename or a TarInfo object. You can2348           specify a different directory using `path'. File attributes (owner,2349           mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`2350           is True, only the numbers for user/group names are used and not2351           the names.2352 2353           The `filter` function will be called before extraction.2354           It can return a changed TarInfo or None to skip the member.2355           String names of common filters are accepted.2356        """2357        filter_function = self._get_filter_function(filter)2358        tarinfo, unfiltered = self._get_extract_tarinfo(2359            member, filter_function, path)2360        if tarinfo is not None:2361            self._extract_one(tarinfo, path, set_attrs, numeric_owner)2362 2363    def _get_extract_tarinfo(self, member, filter_function, path):2364        """Get (filtered, unfiltered) TarInfos from *member*2365 2366        *member* might be a string.2367 2368        Return (None, None) if not found.2369        """2370 2371        if isinstance(member, str):2372            unfiltered = self.getmember(member)2373        else:2374            unfiltered = member2375 2376        filtered = None2377        try:2378            filtered = filter_function(unfiltered, path)2379        except (OSError, FilterError) as e:2380            self._handle_fatal_error(e)2381        except ExtractError as e:2382            self._handle_nonfatal_error(e)2383        if filtered is None:2384            self._dbg(2, "tarfile: Excluded %r" % unfiltered.name)2385            return None, None2386 2387        # Prepare the link target for makelink().2388        if filtered.islnk():2389            filtered = copy.copy(filtered)2390            filtered._link_target = os.path.join(path, filtered.linkname)2391        return filtered, unfiltered2392 2393    def _extract_one(self, tarinfo, path, set_attrs, numeric_owner,2394                     filter_function=None):2395        """Extract from filtered tarinfo to disk.2396 2397           filter_function is only used when extracting a *different*2398           member (e.g. as fallback to creating a symlink)2399        """2400        self._check("r")2401 2402        try:2403            self._extract_member(tarinfo, os.path.join(path, tarinfo.name),2404                                 set_attrs=set_attrs,2405                                 numeric_owner=numeric_owner,2406                                 filter_function=filter_function,2407                                 extraction_root=path)2408        except OSError as e:2409            self._handle_fatal_error(e)2410        except ExtractError as e:2411            self._handle_nonfatal_error(e)2412 2413    def _handle_nonfatal_error(self, e):2414        """Handle non-fatal error (ExtractError) according to errorlevel"""2415        if self.errorlevel > 1:2416            raise2417        else:2418            self._dbg(1, "tarfile: %s" % e)2419 2420    def _handle_fatal_error(self, e):2421        """Handle "fatal" error according to self.errorlevel"""2422        if self.errorlevel > 0:2423            raise2424        elif isinstance(e, OSError):2425            if e.filename is None:2426                self._dbg(1, "tarfile: %s" % e.strerror)2427            else:2428                self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))2429        else:2430            self._dbg(1, "tarfile: %s %s" % (type(e).__name__, e))2431 2432    def extractfile(self, member):2433        """Extract a member from the archive as a file object. `member' may be2434           a filename or a TarInfo object. If `member' is a regular file or2435           a link, an io.BufferedReader object is returned. For all other2436           existing members, None is returned. If `member' does not appear2437           in the archive, KeyError is raised.2438        """2439        self._check("r")2440 2441        if isinstance(member, str):2442            tarinfo = self.getmember(member)2443        else:2444            tarinfo = member2445 2446        if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:2447            # Members with unknown types are treated as regular files.2448            return self.fileobject(self, tarinfo)2449 2450        elif tarinfo.islnk() or tarinfo.issym():2451            if isinstance(self.fileobj, _Stream):2452                # A small but ugly workaround for the case that someone tries2453                # to extract a (sym)link as a file-object from a non-seekable2454                # stream of tar blocks.2455                raise StreamError("cannot extract (sym)link as file object")2456            else:2457                # A (sym)link's file object is its target's file object.2458                return self.extractfile(self._find_link_target(tarinfo))2459        else:2460            # If there's no data associated with the member (directory, chrdev,2461            # blkdev, etc.), return None instead of a file object.2462            return None2463 2464    def _extract_member(self, tarinfo, targetpath, set_attrs=True,2465                        numeric_owner=False, *, filter_function=None,2466                        extraction_root=None):2467        """Extract the filtered TarInfo object tarinfo to a physical2468           file called targetpath.2469 2470           filter_function is only used when extracting a *different*2471           member (e.g. as fallback to creating a symlink)2472        """2473        # Fetch the TarInfo object for the given name2474        # and build the destination pathname, replacing2475        # forward slashes to platform specific separators.2476        targetpath = targetpath.rstrip("/")2477        targetpath = targetpath.replace("/", os.sep)2478 2479        # Create all upper directories.2480        upperdirs = os.path.dirname(targetpath)2481        if upperdirs and not os.path.exists(upperdirs):2482            # Create directories that are not part of the archive with2483            # default permissions.2484            os.makedirs(upperdirs)2485 2486        if tarinfo.islnk() or tarinfo.issym():2487            self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))2488        else:2489            self._dbg(1, tarinfo.name)2490 2491        if tarinfo.isreg():2492            self.makefile(tarinfo, targetpath)2493        elif tarinfo.isdir():2494            self.makedir(tarinfo, targetpath)2495        elif tarinfo.isfifo():2496            self.makefifo(tarinfo, targetpath)2497        elif tarinfo.ischr() or tarinfo.isblk():2498            self.makedev(tarinfo, targetpath)2499        elif tarinfo.islnk() or tarinfo.issym():2500            self.makelink_with_filter(2501                tarinfo, targetpath,2502                filter_function=filter_function,2503                extraction_root=extraction_root)2504        elif tarinfo.type not in SUPPORTED_TYPES:2505            self.makeunknown(tarinfo, targetpath)2506        else:2507            self.makefile(tarinfo, targetpath)2508 2509        if set_attrs:2510            self.chown(tarinfo, targetpath, numeric_owner)2511            if not tarinfo.issym():2512                self.chmod(tarinfo, targetpath)2513                self.utime(tarinfo, targetpath)2514 2515    #--------------------------------------------------------------------------2516    # Below are the different file methods. They are called via2517    # _extract_member() when extract() is called. They can be replaced in a2518    # subclass to implement other functionality.2519 2520    def makedir(self, tarinfo, targetpath):2521        """Make a directory called targetpath.2522        """2523        try:2524            if tarinfo.mode is None:2525                # Use the system's default mode2526                os.mkdir(targetpath)2527            else:2528                # Use a safe mode for the directory, the real mode is set2529                # later in _extract_member().2530                os.mkdir(targetpath, 0o700)2531        except FileExistsError:2532            pass2533 2534    def makefile(self, tarinfo, targetpath):2535        """Make a file called targetpath.2536        """2537        source = self.fileobj2538        source.seek(tarinfo.offset_data)2539        bufsize = self.copybufsize2540        with bltn_open(targetpath, "wb") as target:2541            if tarinfo.sparse is not None:2542                for offset, size in tarinfo.sparse:2543                    target.seek(offset)2544                    copyfileobj(source, target, size, ReadError, bufsize)2545                target.seek(tarinfo.size)2546                target.truncate()2547            else:2548                copyfileobj(source, target, tarinfo.size, ReadError, bufsize)2549 2550    def makeunknown(self, tarinfo, targetpath):2551        """Make a file from a TarInfo object with an unknown type2552           at targetpath.2553        """2554        self.makefile(tarinfo, targetpath)2555        self._dbg(1, "tarfile: Unknown file type %r, " \2556                     "extracted as regular file." % tarinfo.type)2557 2558    def makefifo(self, tarinfo, targetpath):2559        """Make a fifo called targetpath.2560        """2561        if hasattr(os, "mkfifo"):2562            os.mkfifo(targetpath)2563        else:2564            raise ExtractError("fifo not supported by system")2565 2566    def makedev(self, tarinfo, targetpath):2567        """Make a character or block device called targetpath.2568        """2569        if not hasattr(os, "mknod") or not hasattr(os, "makedev"):2570            raise ExtractError("special devices not supported by system")2571 2572        mode = tarinfo.mode2573        if mode is None:2574            # Use mknod's default2575            mode = 0o6002576        if tarinfo.isblk():2577            mode |= stat.S_IFBLK2578        else:2579            mode |= stat.S_IFCHR2580 2581        os.mknod(targetpath, mode,2582                 os.makedev(tarinfo.devmajor, tarinfo.devminor))2583 2584    def makelink(self, tarinfo, targetpath):2585        return self.makelink_with_filter(tarinfo, targetpath, None, None)2586 2587    def makelink_with_filter(self, tarinfo, targetpath,2588                             filter_function, extraction_root):2589        """Make a (symbolic) link called targetpath. If it cannot be created2590          (platform limitation), we try to make a copy of the referenced file2591          instead of a link.2592 2593          filter_function is only used when extracting a *different*2594          member (e.g. as fallback to creating a link).2595        """2596        keyerror_to_extracterror = False2597        try:2598            # For systems that support symbolic and hard links.2599            if tarinfo.issym():2600                if os.path.lexists(targetpath):2601                    # Avoid FileExistsError on following os.symlink.2602                    os.unlink(targetpath)2603                os.symlink(tarinfo.linkname, targetpath)2604                return2605            else:2606                if os.path.exists(tarinfo._link_target):2607                    os.link(tarinfo._link_target, targetpath)2608                    return2609        except symlink_exception:2610            keyerror_to_extracterror = True2611 2612        try:2613            unfiltered = self._find_link_target(tarinfo)2614        except KeyError:2615            if keyerror_to_extracterror:2616                raise ExtractError(2617                    "unable to resolve link inside archive")2618            else:2619                raise2620 2621        if filter_function is None:2622            filtered = unfiltered2623        else:2624            if extraction_root is None:2625                raise ExtractError(2626                    "makelink_with_filter: if filter_function is not None, "2627                    + "extraction_root must also not be None")2628            try:2629                filtered = filter_function(unfiltered, extraction_root)2630            except _FILTER_ERRORS as cause:2631                raise LinkFallbackError(tarinfo, unfiltered.name) from cause2632        if filtered is not None:2633            self._extract_member(filtered, targetpath,2634                                 filter_function=filter_function,2635                                 extraction_root=extraction_root)2636 2637    def chown(self, tarinfo, targetpath, numeric_owner):2638        """Set owner of targetpath according to tarinfo. If numeric_owner2639           is True, use .gid/.uid instead of .gname/.uname. If numeric_owner2640           is False, fall back to .gid/.uid when the search based on name2641           fails.2642        """2643        if hasattr(os, "geteuid") and os.geteuid() == 0:2644            # We have to be root to do so.2645            g = tarinfo.gid2646            u = tarinfo.uid2647            if not numeric_owner:2648                try:2649                    if grp and tarinfo.gname:2650                        g = grp.getgrnam(tarinfo.gname)[2]2651                except KeyError:2652                    pass2653                try:2654                    if pwd and tarinfo.uname:2655                        u = pwd.getpwnam(tarinfo.uname)[2]2656                except KeyError:2657                    pass2658            if g is None:2659                g = -12660            if u is None:2661                u = -12662            try:2663                if tarinfo.issym() and hasattr(os, "lchown"):2664                    os.lchown(targetpath, u, g)2665                else:2666                    os.chown(targetpath, u, g)2667            except OSError:2668                raise ExtractError("could not change owner")2669 2670    def chmod(self, tarinfo, targetpath):2671        """Set file permissions of targetpath according to tarinfo.2672        """2673        if tarinfo.mode is None:2674            return2675        try:2676            os.chmod(targetpath, tarinfo.mode)2677        except OSError:2678            raise ExtractError("could not change mode")2679 2680    def utime(self, tarinfo, targetpath):2681        """Set modification time of targetpath according to tarinfo.2682        """2683        mtime = tarinfo.mtime2684        if mtime is None:2685            return2686        if not hasattr(os, 'utime'):2687            return2688        try:2689            os.utime(targetpath, (mtime, mtime))2690        except OSError:2691            raise ExtractError("could not change modification time")2692 2693    #--------------------------------------------------------------------------2694    def next(self):2695        """Return the next member of the archive as a TarInfo object, when2696           TarFile is opened for reading. Return None if there is no more2697           available.2698        """2699        self._check("ra")2700        if self.firstmember is not None:2701            m = self.firstmember2702            self.firstmember = None2703            return m2704 2705        # Advance the file pointer.2706        if self.offset != self.fileobj.tell():2707            self.fileobj.seek(self.offset - 1)2708            if not self.fileobj.read(1):2709                raise ReadError("unexpected end of data")2710 2711        # Read the next block.2712        tarinfo = None2713        while True:2714            try:2715                tarinfo = self.tarinfo.fromtarfile(self)2716            except EOFHeaderError as e:2717                if self.ignore_zeros:2718                    self._dbg(2, "0x%X: %s" % (self.offset, e))2719                    self.offset += BLOCKSIZE2720                    continue2721            except InvalidHeaderError as e:2722                if self.ignore_zeros:2723                    self._dbg(2, "0x%X: %s" % (self.offset, e))2724                    self.offset += BLOCKSIZE2725                    continue2726                elif self.offset == 0:2727                    raise ReadError(str(e))2728            except EmptyHeaderError:2729                if self.offset == 0:2730                    raise ReadError("empty file")2731            except TruncatedHeaderError as e:2732                if self.offset == 0:2733                    raise ReadError(str(e))2734            except SubsequentHeaderError as e:2735                raise ReadError(str(e))2736            except Exception as e:2737                try:2738                    import zlib2739                    if isinstance(e, zlib.error):2740                        raise ReadError(f'zlib error: {e}')2741                    else:2742                        raise e2743                except ImportError:2744                    raise e2745            break2746 2747        if tarinfo is not None:2748            self.members.append(tarinfo)2749        else:2750            self._loaded = True2751 2752        return tarinfo2753 2754    #--------------------------------------------------------------------------2755    # Little helper methods:2756 2757    def _getmember(self, name, tarinfo=None, normalize=False):2758        """Find an archive member by name from bottom to top.2759           If tarinfo is given, it is used as the starting point.2760        """2761        # Ensure that all members have been loaded.2762        members = self.getmembers()2763 2764        # Limit the member search list up to tarinfo.2765        skipping = False2766        if tarinfo is not None:2767            try:2768                index = members.index(tarinfo)2769            except ValueError:2770                # The given starting point might be a (modified) copy.2771                # We'll later skip members until we find an equivalent.2772                skipping = True2773            else:2774                # Happy fast path2775                members = members[:index]2776 2777        if normalize:2778            name = os.path.normpath(name)2779 2780        for member in reversed(members):2781            if skipping:2782                if tarinfo.offset == member.offset:2783                    skipping = False2784                continue2785            if normalize:2786                member_name = os.path.normpath(member.name)2787            else:2788                member_name = member.name2789 2790            if name == member_name:2791                return member2792 2793        if skipping:2794            # Starting point was not found2795            raise ValueError(tarinfo)2796 2797    def _load(self):2798        """Read through the entire archive file and look for readable2799           members.2800        """2801        while True:2802            tarinfo = self.next()2803            if tarinfo is None:2804                break2805        self._loaded = True2806 2807    def _check(self, mode=None):2808        """Check if TarFile is still open, and if the operation's mode2809           corresponds to TarFile's mode.2810        """2811        if self.closed:2812            raise OSError("%s is closed" % self.__class__.__name__)2813        if mode is not None and self.mode not in mode:2814            raise OSError("bad operation for mode %r" % self.mode)2815 2816    def _find_link_target(self, tarinfo):2817        """Find the target member of a symlink or hardlink member in the2818           archive.2819        """2820        if tarinfo.issym():2821            # Always search the entire archive.2822            linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))2823            limit = None2824        else:2825            # Search the archive before the link, because a hard link is2826            # just a reference to an already archived file.2827            linkname = tarinfo.linkname2828            limit = tarinfo2829 2830        member = self._getmember(linkname, tarinfo=limit, normalize=True)2831        if member is None:2832            raise KeyError("linkname %r not found" % linkname)2833        return member2834 2835    def __iter__(self):2836        """Provide an iterator object.2837        """2838        if self._loaded:2839            yield from self.members2840            return2841 2842        # Yield items using TarFile's next() method.2843        # When all members have been read, set TarFile as _loaded.2844        index = 02845        # Fix for SF #1100429: Under rare circumstances it can2846        # happen that getmembers() is called during iteration,2847        # which will have already exhausted the next() method.2848        if self.firstmember is not None:2849            tarinfo = self.next()2850            index += 12851            yield tarinfo2852 2853        while True:2854            if index < len(self.members):2855                tarinfo = self.members[index]2856            elif not self._loaded:2857                tarinfo = self.next()2858                if not tarinfo:2859                    self._loaded = True2860                    return2861            else:2862                return2863            index += 12864            yield tarinfo2865 2866    def _dbg(self, level, msg):2867        """Write debugging output to sys.stderr.2868        """2869        if level <= self.debug:2870            print(msg, file=sys.stderr)2871 2872    def __enter__(self):2873        self._check()2874        return self2875 2876    def __exit__(self, type, value, traceback):2877        if type is None:2878            self.close()2879        else:2880            # An exception occurred. We must not call close() because2881            # it would try to write end-of-archive blocks and padding.2882            if not self._extfileobj:2883                self.fileobj.close()2884            self.closed = True2885 2886#--------------------2887# exported functions2888#--------------------2889 2890def is_tarfile(name):2891    """Return True if name points to a tar archive that we2892       are able to handle, else return False.2893 2894       'name' should be a string, file, or file-like object.2895    """2896    try:2897        if hasattr(name, "read"):2898            t = open(fileobj=name)2899        else:2900            t = open(name)2901        t.close()2902        return True2903    except TarError:2904        return False2905 2906open = TarFile.open2907 2908 2909def main():2910    import argparse2911 2912    description = 'A simple command-line interface for tarfile module.'2913    parser = argparse.ArgumentParser(description=description)2914    parser.add_argument('-v', '--verbose', action='store_true', default=False,2915                        help='Verbose output')2916    parser.add_argument('--filter', metavar='<filtername>',2917                        choices=_NAMED_FILTERS,2918                        help='Filter for extraction')2919 2920    group = parser.add_mutually_exclusive_group(required=True)2921    group.add_argument('-l', '--list', metavar='<tarfile>',2922                       help='Show listing of a tarfile')2923    group.add_argument('-e', '--extract', nargs='+',2924                       metavar=('<tarfile>', '<output_dir>'),2925                       help='Extract tarfile into target dir')2926    group.add_argument('-c', '--create', nargs='+',2927                       metavar=('<name>', '<file>'),2928                       help='Create tarfile from sources')2929    group.add_argument('-t', '--test', metavar='<tarfile>',2930                       help='Test if a tarfile is valid')2931 2932    args = parser.parse_args()2933 2934    if args.filter and args.extract is None:2935        parser.exit(1, '--filter is only valid for extraction\n')2936 2937    if args.test is not None:2938        src = args.test2939        if is_tarfile(src):2940            with open(src, 'r') as tar:2941                tar.getmembers()2942                print(tar.getmembers(), file=sys.stderr)2943            if args.verbose:2944                print('{!r} is a tar archive.'.format(src))2945        else:2946            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))2947 2948    elif args.list is not None:2949        src = args.list2950        if is_tarfile(src):2951            with TarFile.open(src, 'r:*') as tf:2952                tf.list(verbose=args.verbose)2953        else:2954            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))2955 2956    elif args.extract is not None:2957        if len(args.extract) == 1:2958            src = args.extract[0]2959            curdir = os.curdir2960        elif len(args.extract) == 2:2961            src, curdir = args.extract2962        else:2963            parser.exit(1, parser.format_help())2964 2965        if is_tarfile(src):2966            with TarFile.open(src, 'r:*') as tf:2967                tf.extractall(path=curdir, filter=args.filter)2968            if args.verbose:2969                if curdir == '.':2970                    msg = '{!r} file is extracted.'.format(src)2971                else:2972                    msg = ('{!r} file is extracted '2973                           'into {!r} directory.').format(src, curdir)2974                print(msg)2975        else:2976            parser.exit(1, '{!r} is not a tar archive.\n'.format(src))2977 2978    elif args.create is not None:2979        tar_name = args.create.pop(0)2980        _, ext = os.path.splitext(tar_name)2981        compressions = {2982            # gz2983            '.gz': 'gz',2984            '.tgz': 'gz',2985            # xz2986            '.xz': 'xz',2987            '.txz': 'xz',2988            # bz22989            '.bz2': 'bz2',2990            '.tbz': 'bz2',2991            '.tbz2': 'bz2',2992            '.tb2': 'bz2',2993        }2994        tar_mode = 'w:' + compressions[ext] if ext in compressions else 'w'2995        tar_files = args.create2996 2997        with TarFile.open(tar_name, tar_mode) as tf:2998            for file_name in tar_files:2999                tf.add(file_name)3000 3001        if args.verbose:3002            print('{!r} file created.'.format(tar_name))3003 3004if __name__ == '__main__':3005    main()3006