File Explorer

/proc/self/root/proc/thread-self/root/proc/self/task/13/root/lib64/python3.9/http
This explorer reads the filesystem of the server it runs on, so /workspace/user isn't present here. Browsing and the terminal still work against this server's own disk from /.
1 dir
5 files
server.py47.1 KB · 1314 lines
1"""HTTP server classes.2 3Note: BaseHTTPRequestHandler doesn't implement any HTTP request; see4SimpleHTTPRequestHandler for simple implementations of GET, HEAD and POST,5and CGIHTTPRequestHandler for CGI scripts.6 7It does, however, optionally implement HTTP/1.1 persistent connections,8as of version 0.3.9 10Notes on CGIHTTPRequestHandler11------------------------------12 13This class implements GET and POST requests to cgi-bin scripts.14 15If the os.fork() function is not present (e.g. on Windows),16subprocess.Popen() is used as a fallback, with slightly altered semantics.17 18In all cases, the implementation is intentionally naive -- all19requests are executed synchronously.20 21SECURITY WARNING: DON'T USE THIS CODE UNLESS YOU ARE INSIDE A FIREWALL22-- it may execute arbitrary Python code or external programs.23 24Note that status code 200 is sent prior to execution of a CGI script, so25scripts cannot send other status codes such as 302 (redirect).26 27XXX To do:28 29- log requests even later (to capture byte count)30- log user-agent header and other interesting goodies31- send error log to separate file32"""33 34 35# See also:36#37# HTTP Working Group                                        T. Berners-Lee38# INTERNET-DRAFT                                            R. T. Fielding39# <draft-ietf-http-v10-spec-00.txt>                     H. Frystyk Nielsen40# Expires September 8, 1995                                  March 8, 199541#42# URL: http://www.ics.uci.edu/pub/ietf/http/draft-ietf-http-v10-spec-00.txt43#44# and45#46# Network Working Group                                      R. Fielding47# Request for Comments: 2616                                       et al48# Obsoletes: 2068                                              June 199949# Category: Standards Track50#51# URL: http://www.faqs.org/rfcs/rfc2616.html52 53# Log files54# ---------55#56# Here's a quote from the NCSA httpd docs about log file format.57#58# | The logfile format is as follows. Each line consists of:59# |60# | host rfc931 authuser [DD/Mon/YYYY:hh:mm:ss] "request" ddd bbbb61# |62# |        host: Either the DNS name or the IP number of the remote client63# |        rfc931: Any information returned by identd for this person,64# |                - otherwise.65# |        authuser: If user sent a userid for authentication, the user name,66# |                  - otherwise.67# |        DD: Day68# |        Mon: Month (calendar name)69# |        YYYY: Year70# |        hh: hour (24-hour format, the machine's timezone)71# |        mm: minutes72# |        ss: seconds73# |        request: The first line of the HTTP request as sent by the client.74# |        ddd: the status code returned by the server, - if not available.75# |        bbbb: the total number of bytes sent,76# |              *not including the HTTP/1.0 header*, - if not available77# |78# | You can determine the name of the file accessed through request.79#80# (Actually, the latter is only true if you know the server configuration81# at the time the request was made!)82 83__version__ = "0.6"84 85__all__ = [86    "HTTPServer", "ThreadingHTTPServer", "BaseHTTPRequestHandler",87    "SimpleHTTPRequestHandler", "CGIHTTPRequestHandler",88]89 90import copy91import datetime92import email.utils93import html94import http.client95import io96import itertools97import mimetypes98import os99import posixpath100import select101import shutil102import socket # For gethostbyaddr()103import socketserver104import sys105import time106import urllib.parse107 108from http import HTTPStatus109 110 111# Default error message template112DEFAULT_ERROR_MESSAGE = """\113<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"114        "http://www.w3.org/TR/html4/strict.dtd">115<html>116    <head>117        <meta http-equiv="Content-Type" content="text/html;charset=utf-8">118        <title>Error response</title>119    </head>120    <body>121        <h1>Error response</h1>122        <p>Error code: %(code)d</p>123        <p>Message: %(message)s.</p>124        <p>Error code explanation: %(code)s - %(explain)s.</p>125    </body>126</html>127"""128 129DEFAULT_ERROR_CONTENT_TYPE = "text/html;charset=utf-8"130 131class HTTPServer(socketserver.TCPServer):132 133    allow_reuse_address = 1    # Seems to make sense in testing environment134 135    def server_bind(self):136        """Override server_bind to store the server name."""137        socketserver.TCPServer.server_bind(self)138        host, port = self.server_address[:2]139        self.server_name = socket.getfqdn(host)140        self.server_port = port141 142 143class ThreadingHTTPServer(socketserver.ThreadingMixIn, HTTPServer):144    daemon_threads = True145 146 147class BaseHTTPRequestHandler(socketserver.StreamRequestHandler):148 149    """HTTP request handler base class.150 151    The following explanation of HTTP serves to guide you through the152    code as well as to expose any misunderstandings I may have about153    HTTP (so you don't need to read the code to figure out I'm wrong154    :-).155 156    HTTP (HyperText Transfer Protocol) is an extensible protocol on157    top of a reliable stream transport (e.g. TCP/IP).  The protocol158    recognizes three parts to a request:159 160    1. One line identifying the request type and path161    2. An optional set of RFC-822-style headers162    3. An optional data part163 164    The headers and data are separated by a blank line.165 166    The first line of the request has the form167 168    <command> <path> <version>169 170    where <command> is a (case-sensitive) keyword such as GET or POST,171    <path> is a string containing path information for the request,172    and <version> should be the string "HTTP/1.0" or "HTTP/1.1".173    <path> is encoded using the URL encoding scheme (using %xx to signify174    the ASCII character with hex code xx).175 176    The specification specifies that lines are separated by CRLF but177    for compatibility with the widest range of clients recommends178    servers also handle LF.  Similarly, whitespace in the request line179    is treated sensibly (allowing multiple spaces between components180    and allowing trailing whitespace).181 182    Similarly, for output, lines ought to be separated by CRLF pairs183    but most clients grok LF characters just fine.184 185    If the first line of the request has the form186 187    <command> <path>188 189    (i.e. <version> is left out) then this is assumed to be an HTTP190    0.9 request; this form has no optional headers and data part and191    the reply consists of just the data.192 193    The reply form of the HTTP 1.x protocol again has three parts:194 195    1. One line giving the response code196    2. An optional set of RFC-822-style headers197    3. The data198 199    Again, the headers and data are separated by a blank line.200 201    The response code line has the form202 203    <version> <responsecode> <responsestring>204 205    where <version> is the protocol version ("HTTP/1.0" or "HTTP/1.1"),206    <responsecode> is a 3-digit response code indicating success or207    failure of the request, and <responsestring> is an optional208    human-readable string explaining what the response code means.209 210    This server parses the request and the headers, and then calls a211    function specific to the request type (<command>).  Specifically,212    a request SPAM will be handled by a method do_SPAM().  If no213    such method exists the server sends an error response to the214    client.  If it exists, it is called with no arguments:215 216    do_SPAM()217 218    Note that the request name is case sensitive (i.e. SPAM and spam219    are different requests).220 221    The various request details are stored in instance variables:222 223    - client_address is the client IP address in the form (host,224    port);225 226    - command, path and version are the broken-down request line;227 228    - headers is an instance of email.message.Message (or a derived229    class) containing the header information;230 231    - rfile is a file object open for reading positioned at the232    start of the optional input data part;233 234    - wfile is a file object open for writing.235 236    IT IS IMPORTANT TO ADHERE TO THE PROTOCOL FOR WRITING!237 238    The first thing to be written must be the response line.  Then239    follow 0 or more header lines, then a blank line, and then the240    actual data (if any).  The meaning of the header lines depends on241    the command executed by the server; in most cases, when data is242    returned, there should be at least one header line of the form243 244    Content-type: <type>/<subtype>245 246    where <type> and <subtype> should be registered MIME types,247    e.g. "text/html" or "text/plain".248 249    """250 251    # The Python system version, truncated to its first component.252    sys_version = "Python/" + sys.version.split()[0]253 254    # The server software version.  You may want to override this.255    # The format is multiple whitespace-separated strings,256    # where each string is of the form name[/version].257    server_version = "BaseHTTP/" + __version__258 259    error_message_format = DEFAULT_ERROR_MESSAGE260    error_content_type = DEFAULT_ERROR_CONTENT_TYPE261 262    # The default request version.  This only affects responses up until263    # the point where the request line is parsed, so it mainly decides what264    # the client gets back when sending a malformed request line.265    # Most web servers default to HTTP 0.9, i.e. don't send a status line.266    default_request_version = "HTTP/0.9"267 268    def parse_request(self):269        """Parse a request (internal).270 271        The request should be stored in self.raw_requestline; the results272        are in self.command, self.path, self.request_version and273        self.headers.274 275        Return True for success, False for failure; on failure, any relevant276        error response has already been sent back.277 278        """279        self.command = None  # set in case of error on the first line280        self.request_version = version = self.default_request_version281        self.close_connection = True282        requestline = str(self.raw_requestline, 'iso-8859-1')283        requestline = requestline.rstrip('\r\n')284        self.requestline = requestline285        words = requestline.split()286        if len(words) == 0:287            return False288 289        if len(words) >= 3:  # Enough to determine protocol version290            version = words[-1]291            try:292                if not version.startswith('HTTP/'):293                    raise ValueError294                base_version_number = version.split('/', 1)[1]295                version_number = base_version_number.split(".")296                # RFC 2145 section 3.1 says there can be only one "." and297                #   - major and minor numbers MUST be treated as298                #      separate integers;299                #   - HTTP/2.4 is a lower version than HTTP/2.13, which in300                #      turn is lower than HTTP/12.3;301                #   - Leading zeros MUST be ignored by recipients.302                if len(version_number) != 2:303                    raise ValueError304                version_number = int(version_number[0]), int(version_number[1])305            except (ValueError, IndexError):306                self.send_error(307                    HTTPStatus.BAD_REQUEST,308                    "Bad request version (%r)" % version)309                return False310            if version_number >= (1, 1) and self.protocol_version >= "HTTP/1.1":311                self.close_connection = False312            if version_number >= (2, 0):313                self.send_error(314                    HTTPStatus.HTTP_VERSION_NOT_SUPPORTED,315                    "Invalid HTTP version (%s)" % base_version_number)316                return False317            self.request_version = version318 319        if not 2 <= len(words) <= 3:320            self.send_error(321                HTTPStatus.BAD_REQUEST,322                "Bad request syntax (%r)" % requestline)323            return False324        command, path = words[:2]325        if len(words) == 2:326            self.close_connection = True327            if command != 'GET':328                self.send_error(329                    HTTPStatus.BAD_REQUEST,330                    "Bad HTTP/0.9 request type (%r)" % command)331                return False332        self.command, self.path = command, path333 334        # gh-87389: The purpose of replacing '//' with '/' is to protect335        # against open redirect attacks possibly triggered if the path starts336        # with '//' because http clients treat //path as an absolute URI337        # without scheme (similar to http://path) rather than a path.338        if self.path.startswith('//'):339            self.path = '/' + self.path.lstrip('/')  # Reduce to a single /340 341        # Examine the headers and look for a Connection directive.342        try:343            self.headers = http.client.parse_headers(self.rfile,344                                                     _class=self.MessageClass)345        except http.client.LineTooLong as err:346            self.send_error(347                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,348                "Line too long",349                str(err))350            return False351        except http.client.HTTPException as err:352            self.send_error(353                HTTPStatus.REQUEST_HEADER_FIELDS_TOO_LARGE,354                "Too many headers",355                str(err)356            )357            return False358 359        conntype = self.headers.get('Connection', "")360        if conntype.lower() == 'close':361            self.close_connection = True362        elif (conntype.lower() == 'keep-alive' and363              self.protocol_version >= "HTTP/1.1"):364            self.close_connection = False365        # Examine the headers and look for an Expect directive366        expect = self.headers.get('Expect', "")367        if (expect.lower() == "100-continue" and368                self.protocol_version >= "HTTP/1.1" and369                self.request_version >= "HTTP/1.1"):370            if not self.handle_expect_100():371                return False372        return True373 374    def handle_expect_100(self):375        """Decide what to do with an "Expect: 100-continue" header.376 377        If the client is expecting a 100 Continue response, we must378        respond with either a 100 Continue or a final response before379        waiting for the request body. The default is to always respond380        with a 100 Continue. You can behave differently (for example,381        reject unauthorized requests) by overriding this method.382 383        This method should either return True (possibly after sending384        a 100 Continue response) or send an error response and return385        False.386 387        """388        self.send_response_only(HTTPStatus.CONTINUE)389        self.end_headers()390        return True391 392    def handle_one_request(self):393        """Handle a single HTTP request.394 395        You normally don't need to override this method; see the class396        __doc__ string for information on how to handle specific HTTP397        commands such as GET and POST.398 399        """400        try:401            self.raw_requestline = self.rfile.readline(65537)402            if len(self.raw_requestline) > 65536:403                self.requestline = ''404                self.request_version = ''405                self.command = ''406                self.send_error(HTTPStatus.REQUEST_URI_TOO_LONG)407                return408            if not self.raw_requestline:409                self.close_connection = True410                return411            if not self.parse_request():412                # An error code has been sent, just exit413                return414            mname = 'do_' + self.command415            if not hasattr(self, mname):416                self.send_error(417                    HTTPStatus.NOT_IMPLEMENTED,418                    "Unsupported method (%r)" % self.command)419                return420            method = getattr(self, mname)421            method()422            self.wfile.flush() #actually send the response if not already done.423        except socket.timeout as e:424            #a read or a write timed out.  Discard this connection425            self.log_error("Request timed out: %r", e)426            self.close_connection = True427            return428 429    def handle(self):430        """Handle multiple requests if necessary."""431        self.close_connection = True432 433        self.handle_one_request()434        while not self.close_connection:435            self.handle_one_request()436 437    def send_error(self, code, message=None, explain=None):438        """Send and log an error reply.439 440        Arguments are441        * code:    an HTTP error code442                   3 digits443        * message: a simple optional 1 line reason phrase.444                   *( HTAB / SP / VCHAR / %x80-FF )445                   defaults to short entry matching the response code446        * explain: a detailed message defaults to the long entry447                   matching the response code.448 449        This sends an error response (so it must be called before any450        output has been generated), logs the error, and finally sends451        a piece of HTML explaining the error to the user.452 453        """454 455        try:456            shortmsg, longmsg = self.responses[code]457        except KeyError:458            shortmsg, longmsg = '???', '???'459        if message is None:460            message = shortmsg461        if explain is None:462            explain = longmsg463        self.log_error("code %d, message %s", code, message)464        self.send_response(code, message)465        self.send_header('Connection', 'close')466 467        # Message body is omitted for cases described in:468        #  - RFC7230: 3.3. 1xx, 204(No Content), 304(Not Modified)469        #  - RFC7231: 6.3.6. 205(Reset Content)470        body = None471        if (code >= 200 and472            code not in (HTTPStatus.NO_CONTENT,473                         HTTPStatus.RESET_CONTENT,474                         HTTPStatus.NOT_MODIFIED)):475            # HTML encode to prevent Cross Site Scripting attacks476            # (see bug #1100201)477            content = (self.error_message_format % {478                'code': code,479                'message': html.escape(message, quote=False),480                'explain': html.escape(explain, quote=False)481            })482            body = content.encode('UTF-8', 'replace')483            self.send_header("Content-Type", self.error_content_type)484            self.send_header('Content-Length', str(len(body)))485        self.end_headers()486 487        if self.command != 'HEAD' and body:488            self.wfile.write(body)489 490    def send_response(self, code, message=None):491        """Add the response header to the headers buffer and log the492        response code.493 494        Also send two standard headers with the server software495        version and the current date.496 497        """498        self.log_request(code)499        self.send_response_only(code, message)500        self.send_header('Server', self.version_string())501        self.send_header('Date', self.date_time_string())502 503    def send_response_only(self, code, message=None):504        """Send the response header only."""505        if self.request_version != 'HTTP/0.9':506            if message is None:507                if code in self.responses:508                    message = self.responses[code][0]509                else:510                    message = ''511            if not hasattr(self, '_headers_buffer'):512                self._headers_buffer = []513            self._headers_buffer.append(("%s %d %s\r\n" %514                    (self.protocol_version, code, message)).encode(515                        'latin-1', 'strict'))516 517    def send_header(self, keyword, value):518        """Send a MIME header to the headers buffer."""519        if self.request_version != 'HTTP/0.9':520            if not hasattr(self, '_headers_buffer'):521                self._headers_buffer = []522            self._headers_buffer.append(523                ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict'))524 525        if keyword.lower() == 'connection':526            if value.lower() == 'close':527                self.close_connection = True528            elif value.lower() == 'keep-alive':529                self.close_connection = False530 531    def end_headers(self):532        """Send the blank line ending the MIME headers."""533        if self.request_version != 'HTTP/0.9':534            self._headers_buffer.append(b"\r\n")535            self.flush_headers()536 537    def flush_headers(self):538        if hasattr(self, '_headers_buffer'):539            self.wfile.write(b"".join(self._headers_buffer))540            self._headers_buffer = []541 542    def log_request(self, code='-', size='-'):543        """Log an accepted request.544 545        This is called by send_response().546 547        """548        if isinstance(code, HTTPStatus):549            code = code.value550        self.log_message('"%s" %s %s',551                         self.requestline, str(code), str(size))552 553    def log_error(self, format, *args):554        """Log an error.555 556        This is called when a request cannot be fulfilled.  By557        default it passes the message on to log_message().558 559        Arguments are the same as for log_message().560 561        XXX This should go to the separate error log.562 563        """564 565        self.log_message(format, *args)566 567    # https://en.wikipedia.org/wiki/List_of_Unicode_characters#Control_codes568    _control_char_table = str.maketrans(569            {c: fr'\x{c:02x}' for c in itertools.chain(range(0x20), range(0x7f,0xa0))})570    _control_char_table[ord('\\')] = r'\\'571 572    def log_message(self, format, *args):573        """Log an arbitrary message.574 575        This is used by all other logging functions.  Override576        it if you have specific logging wishes.577 578        The first argument, FORMAT, is a format string for the579        message to be logged.  If the format string contains580        any % escapes requiring parameters, they should be581        specified as subsequent arguments (it's just like582        printf!).583 584        The client ip and current date/time are prefixed to585        every message.586 587        Unicode control characters are replaced with escaped hex588        before writing the output to stderr.589 590        """591 592        message = format % args593        sys.stderr.write("%s - - [%s] %s\n" %594                         (self.address_string(),595                          self.log_date_time_string(),596                          message.translate(self._control_char_table)))597 598    def version_string(self):599        """Return the server software version string."""600        return self.server_version + ' ' + self.sys_version601 602    def date_time_string(self, timestamp=None):603        """Return the current date and time formatted for a message header."""604        if timestamp is None:605            timestamp = time.time()606        return email.utils.formatdate(timestamp, usegmt=True)607 608    def log_date_time_string(self):609        """Return the current time formatted for logging."""610        now = time.time()611        year, month, day, hh, mm, ss, x, y, z = time.localtime(now)612        s = "%02d/%3s/%04d %02d:%02d:%02d" % (613                day, self.monthname[month], year, hh, mm, ss)614        return s615 616    weekdayname = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']617 618    monthname = [None,619                 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',620                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']621 622    def address_string(self):623        """Return the client address."""624 625        return self.client_address[0]626 627    # Essentially static class variables628 629    # The version of the HTTP protocol we support.630    # Set this to HTTP/1.1 to enable automatic keepalive631    protocol_version = "HTTP/1.0"632 633    # MessageClass used to parse headers634    MessageClass = http.client.HTTPMessage635 636    # hack to maintain backwards compatibility637    responses = {638        v: (v.phrase, v.description)639        for v in HTTPStatus.__members__.values()640    }641 642 643class SimpleHTTPRequestHandler(BaseHTTPRequestHandler):644 645    """Simple HTTP request handler with GET and HEAD commands.646 647    This serves files from the current directory and any of its648    subdirectories.  The MIME type for files is determined by649    calling the .guess_type() method.650 651    The GET and HEAD requests are identical except that the HEAD652    request omits the actual contents of the file.653 654    """655 656    server_version = "SimpleHTTP/" + __version__657    extensions_map = _encodings_map_default = {658        '.gz': 'application/gzip',659        '.Z': 'application/octet-stream',660        '.bz2': 'application/x-bzip2',661        '.xz': 'application/x-xz',662    }663 664    def __init__(self, *args, directory=None, **kwargs):665        if directory is None:666            directory = os.getcwd()667        self.directory = os.fspath(directory)668        super().__init__(*args, **kwargs)669 670    def do_GET(self):671        """Serve a GET request."""672        f = self.send_head()673        if f:674            try:675                self.copyfile(f, self.wfile)676            finally:677                f.close()678 679    def do_HEAD(self):680        """Serve a HEAD request."""681        f = self.send_head()682        if f:683            f.close()684 685    def send_head(self):686        """Common code for GET and HEAD commands.687 688        This sends the response code and MIME headers.689 690        Return value is either a file object (which has to be copied691        to the outputfile by the caller unless the command was HEAD,692        and must be closed by the caller under all circumstances), or693        None, in which case the caller has nothing further to do.694 695        """696        path = self.translate_path(self.path)697        f = None698        if os.path.isdir(path):699            parts = urllib.parse.urlsplit(self.path)700            if not parts.path.endswith('/'):701                # redirect browser - doing basically what apache does702                self.send_response(HTTPStatus.MOVED_PERMANENTLY)703                new_parts = (parts[0], parts[1], parts[2] + '/',704                             parts[3], parts[4])705                new_url = urllib.parse.urlunsplit(new_parts)706                self.send_header("Location", new_url)707                self.send_header("Content-Length", "0")708                self.end_headers()709                return None710            for index in "index.html", "index.htm":711                index = os.path.join(path, index)712                if os.path.exists(index):713                    path = index714                    break715            else:716                return self.list_directory(path)717        ctype = self.guess_type(path)718        # check for trailing "/" which should return 404. See Issue17324719        # The test for this was added in test_httpserver.py720        # However, some OS platforms accept a trailingSlash as a filename721        # See discussion on python-dev and Issue34711 regarding722        # parseing and rejection of filenames with a trailing slash723        if path.endswith("/"):724            self.send_error(HTTPStatus.NOT_FOUND, "File not found")725            return None726        try:727            f = open(path, 'rb')728        except OSError:729            self.send_error(HTTPStatus.NOT_FOUND, "File not found")730            return None731 732        try:733            fs = os.fstat(f.fileno())734            # Use browser cache if possible735            if ("If-Modified-Since" in self.headers736                    and "If-None-Match" not in self.headers):737                # compare If-Modified-Since and time of last file modification738                try:739                    ims = email.utils.parsedate_to_datetime(740                        self.headers["If-Modified-Since"])741                except (TypeError, IndexError, OverflowError, ValueError):742                    # ignore ill-formed values743                    pass744                else:745                    if ims.tzinfo is None:746                        # obsolete format with no timezone, cf.747                        # https://tools.ietf.org/html/rfc7231#section-7.1.1.1748                        ims = ims.replace(tzinfo=datetime.timezone.utc)749                    if ims.tzinfo is datetime.timezone.utc:750                        # compare to UTC datetime of last modification751                        last_modif = datetime.datetime.fromtimestamp(752                            fs.st_mtime, datetime.timezone.utc)753                        # remove microseconds, like in If-Modified-Since754                        last_modif = last_modif.replace(microsecond=0)755 756                        if last_modif <= ims:757                            self.send_response(HTTPStatus.NOT_MODIFIED)758                            self.end_headers()759                            f.close()760                            return None761 762            self.send_response(HTTPStatus.OK)763            self.send_header("Content-type", ctype)764            self.send_header("Content-Length", str(fs[6]))765            self.send_header("Last-Modified",766                self.date_time_string(fs.st_mtime))767            self.end_headers()768            return f769        except:770            f.close()771            raise772 773    def list_directory(self, path):774        """Helper to produce a directory listing (absent index.html).775 776        Return value is either a file object, or None (indicating an777        error).  In either case, the headers are sent, making the778        interface the same as for send_head().779 780        """781        try:782            list = os.listdir(path)783        except OSError:784            self.send_error(785                HTTPStatus.NOT_FOUND,786                "No permission to list directory")787            return None788        list.sort(key=lambda a: a.lower())789        r = []790        try:791            displaypath = urllib.parse.unquote(self.path,792                                               errors='surrogatepass')793        except UnicodeDecodeError:794            displaypath = urllib.parse.unquote(self.path)795        displaypath = html.escape(displaypath, quote=False)796        enc = sys.getfilesystemencoding()797        title = 'Directory listing for %s' % displaypath798        r.append('<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" '799                 '"http://www.w3.org/TR/html4/strict.dtd">')800        r.append('<html>\n<head>')801        r.append('<meta http-equiv="Content-Type" '802                 'content="text/html; charset=%s">' % enc)803        r.append('<title>%s</title>\n</head>' % title)804        r.append('<body>\n<h1>%s</h1>' % title)805        r.append('<hr>\n<ul>')806        for name in list:807            fullname = os.path.join(path, name)808            displayname = linkname = name809            # Append / for directories or @ for symbolic links810            if os.path.isdir(fullname):811                displayname = name + "/"812                linkname = name + "/"813            if os.path.islink(fullname):814                displayname = name + "@"815                # Note: a link to a directory displays with @ and links with /816            r.append('<li><a href="%s">%s</a></li>'817                    % (urllib.parse.quote(linkname,818                                          errors='surrogatepass'),819                       html.escape(displayname, quote=False)))820        r.append('</ul>\n<hr>\n</body>\n</html>\n')821        encoded = '\n'.join(r).encode(enc, 'surrogateescape')822        f = io.BytesIO()823        f.write(encoded)824        f.seek(0)825        self.send_response(HTTPStatus.OK)826        self.send_header("Content-type", "text/html; charset=%s" % enc)827        self.send_header("Content-Length", str(len(encoded)))828        self.end_headers()829        return f830 831    def translate_path(self, path):832        """Translate a /-separated PATH to the local filename syntax.833 834        Components that mean special things to the local file system835        (e.g. drive or directory names) are ignored.  (XXX They should836        probably be diagnosed.)837 838        """839        # abandon query parameters840        path = path.split('?',1)[0]841        path = path.split('#',1)[0]842        # Don't forget explicit trailing slash when normalizing. Issue17324843        trailing_slash = path.rstrip().endswith('/')844        try:845            path = urllib.parse.unquote(path, errors='surrogatepass')846        except UnicodeDecodeError:847            path = urllib.parse.unquote(path)848        path = posixpath.normpath(path)849        words = path.split('/')850        words = filter(None, words)851        path = self.directory852        for word in words:853            if os.path.dirname(word) or word in (os.curdir, os.pardir):854                # Ignore components that are not a simple file/directory name855                continue856            path = os.path.join(path, word)857        if trailing_slash:858            path += '/'859        return path860 861    def copyfile(self, source, outputfile):862        """Copy all data between two file objects.863 864        The SOURCE argument is a file object open for reading865        (or anything with a read() method) and the DESTINATION866        argument is a file object open for writing (or867        anything with a write() method).868 869        The only reason for overriding this would be to change870        the block size or perhaps to replace newlines by CRLF871        -- note however that this the default server uses this872        to copy binary data as well.873 874        """875        shutil.copyfileobj(source, outputfile)876 877    def guess_type(self, path):878        """Guess the type of a file.879 880        Argument is a PATH (a filename).881 882        Return value is a string of the form type/subtype,883        usable for a MIME Content-type header.884 885        The default implementation looks the file's extension886        up in the table self.extensions_map, using application/octet-stream887        as a default; however it would be permissible (if888        slow) to look inside the data to make a better guess.889 890        """891        base, ext = posixpath.splitext(path)892        if ext in self.extensions_map:893            return self.extensions_map[ext]894        ext = ext.lower()895        if ext in self.extensions_map:896            return self.extensions_map[ext]897        guess, _ = mimetypes.guess_type(path)898        if guess:899            return guess900        return 'application/octet-stream'901 902 903# Utilities for CGIHTTPRequestHandler904 905def _url_collapse_path(path):906    """907    Given a URL path, remove extra '/'s and '.' path elements and collapse908    any '..' references and returns a collapsed path.909 910    Implements something akin to RFC-2396 5.2 step 6 to parse relative paths.911    The utility of this function is limited to is_cgi method and helps912    preventing some security attacks.913 914    Returns: The reconstituted URL, which will always start with a '/'.915 916    Raises: IndexError if too many '..' occur within the path.917 918    """919    # Query component should not be involved.920    path, _, query = path.partition('?')921    path = urllib.parse.unquote(path)922 923    # Similar to os.path.split(os.path.normpath(path)) but specific to URL924    # path semantics rather than local operating system semantics.925    path_parts = path.split('/')926    head_parts = []927    for part in path_parts[:-1]:928        if part == '..':929            head_parts.pop() # IndexError if more '..' than prior parts930        elif part and part != '.':931            head_parts.append( part )932    if path_parts:933        tail_part = path_parts.pop()934        if tail_part:935            if tail_part == '..':936                head_parts.pop()937                tail_part = ''938            elif tail_part == '.':939                tail_part = ''940    else:941        tail_part = ''942 943    if query:944        tail_part = '?'.join((tail_part, query))945 946    splitpath = ('/' + '/'.join(head_parts), tail_part)947    collapsed_path = "/".join(splitpath)948 949    return collapsed_path950 951 952 953nobody = None954 955def nobody_uid():956    """Internal routine to get nobody's uid"""957    global nobody958    if nobody:959        return nobody960    try:961        import pwd962    except ImportError:963        return -1964    try:965        nobody = pwd.getpwnam('nobody')[2]966    except KeyError:967        nobody = 1 + max(x[2] for x in pwd.getpwall())968    return nobody969 970 971def executable(path):972    """Test for executable file."""973    return os.access(path, os.X_OK)974 975 976class CGIHTTPRequestHandler(SimpleHTTPRequestHandler):977 978    """Complete HTTP server with GET, HEAD and POST commands.979 980    GET and HEAD also support running CGI scripts.981 982    The POST command is *only* implemented for CGI scripts.983 984    """985 986    # Determine platform specifics987    have_fork = hasattr(os, 'fork')988 989    # Make rfile unbuffered -- we need to read one line and then pass990    # the rest to a subprocess, so we can't use buffered input.991    rbufsize = 0992 993    def do_POST(self):994        """Serve a POST request.995 996        This is only implemented for CGI scripts.997 998        """999 1000        if self.is_cgi():1001            self.run_cgi()1002        else:1003            self.send_error(1004                HTTPStatus.NOT_IMPLEMENTED,1005                "Can only POST to CGI scripts")1006 1007    def send_head(self):1008        """Version of send_head that support CGI scripts"""1009        if self.is_cgi():1010            return self.run_cgi()1011        else:1012            return SimpleHTTPRequestHandler.send_head(self)1013 1014    def is_cgi(self):1015        """Test whether self.path corresponds to a CGI script.1016 1017        Returns True and updates the cgi_info attribute to the tuple1018        (dir, rest) if self.path requires running a CGI script.1019        Returns False otherwise.1020 1021        If any exception is raised, the caller should assume that1022        self.path was rejected as invalid and act accordingly.1023 1024        The default implementation tests whether the normalized url1025        path begins with one of the strings in self.cgi_directories1026        (and the next character is a '/' or the end of the string).1027 1028        """1029        collapsed_path = _url_collapse_path(self.path)1030        dir_sep = collapsed_path.find('/', 1)1031        while dir_sep > 0 and not collapsed_path[:dir_sep] in self.cgi_directories:1032            dir_sep = collapsed_path.find('/', dir_sep+1)1033        if dir_sep > 0:1034            head, tail = collapsed_path[:dir_sep], collapsed_path[dir_sep+1:]1035            self.cgi_info = head, tail1036            return True1037        return False1038 1039 1040    cgi_directories = ['/cgi-bin', '/htbin']1041 1042    def is_executable(self, path):1043        """Test whether argument path is an executable file."""1044        return executable(path)1045 1046    def is_python(self, path):1047        """Test whether argument path is a Python script."""1048        head, tail = os.path.splitext(path)1049        return tail.lower() in (".py", ".pyw")1050 1051    def run_cgi(self):1052        """Execute a CGI script."""1053        dir, rest = self.cgi_info1054        path = dir + '/' + rest1055        i = path.find('/', len(dir)+1)1056        while i >= 0:1057            nextdir = path[:i]1058            nextrest = path[i+1:]1059 1060            scriptdir = self.translate_path(nextdir)1061            if os.path.isdir(scriptdir):1062                dir, rest = nextdir, nextrest1063                i = path.find('/', len(dir)+1)1064            else:1065                break1066 1067        # find an explicit query string, if present.1068        rest, _, query = rest.partition('?')1069 1070        # dissect the part after the directory name into a script name &1071        # a possible additional path, to be stored in PATH_INFO.1072        i = rest.find('/')1073        if i >= 0:1074            script, rest = rest[:i], rest[i:]1075        else:1076            script, rest = rest, ''1077 1078        scriptname = dir + '/' + script1079        scriptfile = self.translate_path(scriptname)1080        if not os.path.exists(scriptfile):1081            self.send_error(1082                HTTPStatus.NOT_FOUND,1083                "No such CGI script (%r)" % scriptname)1084            return1085        if not os.path.isfile(scriptfile):1086            self.send_error(1087                HTTPStatus.FORBIDDEN,1088                "CGI script is not a plain file (%r)" % scriptname)1089            return1090        ispy = self.is_python(scriptname)1091        if self.have_fork or not ispy:1092            if not self.is_executable(scriptfile):1093                self.send_error(1094                    HTTPStatus.FORBIDDEN,1095                    "CGI script is not executable (%r)" % scriptname)1096                return1097 1098        # Reference: http://hoohoo.ncsa.uiuc.edu/cgi/env.html1099        # XXX Much of the following could be prepared ahead of time!1100        env = copy.deepcopy(os.environ)1101        env['SERVER_SOFTWARE'] = self.version_string()1102        env['SERVER_NAME'] = self.server.server_name1103        env['GATEWAY_INTERFACE'] = 'CGI/1.1'1104        env['SERVER_PROTOCOL'] = self.protocol_version1105        env['SERVER_PORT'] = str(self.server.server_port)1106        env['REQUEST_METHOD'] = self.command1107        uqrest = urllib.parse.unquote(rest)1108        env['PATH_INFO'] = uqrest1109        env['PATH_TRANSLATED'] = self.translate_path(uqrest)1110        env['SCRIPT_NAME'] = scriptname1111        if query:1112            env['QUERY_STRING'] = query1113        env['REMOTE_ADDR'] = self.client_address[0]1114        authorization = self.headers.get("authorization")1115        if authorization:1116            authorization = authorization.split()1117            if len(authorization) == 2:1118                import base64, binascii1119                env['AUTH_TYPE'] = authorization[0]1120                if authorization[0].lower() == "basic":1121                    try:1122                        authorization = authorization[1].encode('ascii')1123                        authorization = base64.decodebytes(authorization).\1124                                        decode('ascii')1125                    except (binascii.Error, UnicodeError):1126                        pass1127                    else:1128                        authorization = authorization.split(':')1129                        if len(authorization) == 2:1130                            env['REMOTE_USER'] = authorization[0]1131        # XXX REMOTE_IDENT1132        if self.headers.get('content-type') is None:1133            env['CONTENT_TYPE'] = self.headers.get_content_type()1134        else:1135            env['CONTENT_TYPE'] = self.headers['content-type']1136        length = self.headers.get('content-length')1137        if length:1138            env['CONTENT_LENGTH'] = length1139        referer = self.headers.get('referer')1140        if referer:1141            env['HTTP_REFERER'] = referer1142        accept = self.headers.get_all('accept', ())1143        env['HTTP_ACCEPT'] = ','.join(accept)1144        ua = self.headers.get('user-agent')1145        if ua:1146            env['HTTP_USER_AGENT'] = ua1147        co = filter(None, self.headers.get_all('cookie', []))1148        cookie_str = ', '.join(co)1149        if cookie_str:1150            env['HTTP_COOKIE'] = cookie_str1151        # XXX Other HTTP_* headers1152        # Since we're setting the env in the parent, provide empty1153        # values to override previously set values1154        for k in ('QUERY_STRING', 'REMOTE_HOST', 'CONTENT_LENGTH',1155                  'HTTP_USER_AGENT', 'HTTP_COOKIE', 'HTTP_REFERER'):1156            env.setdefault(k, "")1157 1158        self.send_response(HTTPStatus.OK, "Script output follows")1159        self.flush_headers()1160 1161        decoded_query = query.replace('+', ' ')1162 1163        if self.have_fork:1164            # Unix -- fork as we should1165            args = [script]1166            if '=' not in decoded_query:1167                args.append(decoded_query)1168            nobody = nobody_uid()1169            self.wfile.flush() # Always flush before forking1170            pid = os.fork()1171            if pid != 0:1172                # Parent1173                pid, sts = os.waitpid(pid, 0)1174                # throw away additional data [see bug #427345]1175                while select.select([self.rfile], [], [], 0)[0]:1176                    if not self.rfile.read(1):1177                        break1178                exitcode = os.waitstatus_to_exitcode(sts)1179                if exitcode:1180                    self.log_error(f"CGI script exit code {exitcode}")1181                return1182            # Child1183            try:1184                try:1185                    os.setuid(nobody)1186                except OSError:1187                    pass1188                os.dup2(self.rfile.fileno(), 0)1189                os.dup2(self.wfile.fileno(), 1)1190                os.execve(scriptfile, args, env)1191            except:1192                self.server.handle_error(self.request, self.client_address)1193                os._exit(127)1194 1195        else:1196            # Non-Unix -- use subprocess1197            import subprocess1198            cmdline = [scriptfile]1199            if self.is_python(scriptfile):1200                interp = sys.executable1201                if interp.lower().endswith("w.exe"):1202                    # On Windows, use python.exe, not pythonw.exe1203                    interp = interp[:-5] + interp[-4:]1204                cmdline = [interp, '-u'] + cmdline1205            if '=' not in query:1206                cmdline.append(query)1207            self.log_message("command: %s", subprocess.list2cmdline(cmdline))1208            try:1209                nbytes = int(length)1210            except (TypeError, ValueError):1211                nbytes = 01212            p = subprocess.Popen(cmdline,1213                                 stdin=subprocess.PIPE,1214                                 stdout=subprocess.PIPE,1215                                 stderr=subprocess.PIPE,1216                                 env = env1217                                 )1218            if self.command.lower() == "post" and nbytes > 0:1219                data = self.rfile.read(nbytes)1220            else:1221                data = None1222            # throw away additional data [see bug #427345]1223            while select.select([self.rfile._sock], [], [], 0)[0]:1224                if not self.rfile._sock.recv(1):1225                    break1226            stdout, stderr = p.communicate(data)1227            self.wfile.write(stdout)1228            if stderr:1229                self.log_error('%s', stderr)1230            p.stderr.close()1231            p.stdout.close()1232            status = p.returncode1233            if status:1234                self.log_error("CGI script exit status %#x", status)1235            else:1236                self.log_message("CGI script exited OK")1237 1238 1239def _get_best_family(*address):1240    infos = socket.getaddrinfo(1241        *address,1242        type=socket.SOCK_STREAM,1243        flags=socket.AI_PASSIVE,1244    )1245    family, type, proto, canonname, sockaddr = next(iter(infos))1246    return family, sockaddr1247 1248 1249def test(HandlerClass=BaseHTTPRequestHandler,1250         ServerClass=ThreadingHTTPServer,1251         protocol="HTTP/1.0", port=8000, bind=None):1252    """Test the HTTP request handler class.1253 1254    This runs an HTTP server on port 8000 (or the port argument).1255 1256    """1257    ServerClass.address_family, addr = _get_best_family(bind, port)1258    HandlerClass.protocol_version = protocol1259    with ServerClass(addr, HandlerClass) as httpd:1260        host, port = httpd.socket.getsockname()[:2]1261        url_host = f'[{host}]' if ':' in host else host1262        print(1263            f"Serving HTTP on {host} port {port} "1264            f"(http://{url_host}:{port}/) ..."1265        )1266        try:1267            httpd.serve_forever()1268        except KeyboardInterrupt:1269            print("\nKeyboard interrupt received, exiting.")1270            sys.exit(0)1271 1272if __name__ == '__main__':1273    import argparse1274    import contextlib1275 1276    parser = argparse.ArgumentParser()1277    parser.add_argument('--cgi', action='store_true',1278                        help='run as CGI server')1279    parser.add_argument('--bind', '-b', metavar='ADDRESS',1280                        help='specify alternate bind address '1281                             '(default: all interfaces)')1282    parser.add_argument('--directory', '-d', default=os.getcwd(),1283                        help='specify alternate directory '1284                             '(default: current directory)')1285    parser.add_argument('port', action='store', default=8000, type=int,1286                        nargs='?',1287                        help='specify alternate port (default: 8000)')1288    args = parser.parse_args()1289    if args.cgi:1290        handler_class = CGIHTTPRequestHandler1291    else:1292        handler_class = SimpleHTTPRequestHandler1293 1294    # ensure dual-stack is not disabled; ref #389071295    class DualStackServer(ThreadingHTTPServer):1296 1297        def server_bind(self):1298            # suppress exception when protocol is IPv41299            with contextlib.suppress(Exception):1300                self.socket.setsockopt(1301                    socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 0)1302            return super().server_bind()1303 1304        def finish_request(self, request, client_address):1305            self.RequestHandlerClass(request, client_address, self,1306                                     directory=args.directory)1307 1308    test(1309        HandlerClass=handler_class,1310        ServerClass=DualStackServer,1311        port=args.port,1312        bind=args.bind,1313    )1314