File Explorer

/proc/thread-self/root/proc/self/root/proc/self/task/15/root/lib64/python3.9/email
This explorer reads the filesystem of the server it runs on, so /workspace/user isn't present here. Browsing and the terminal still work against this server's own disk from /.
2 dirs
21 files
_parseaddr.py17.3 KB · 554 lines
1# Copyright (C) 2002-2007 Python Software Foundation2# Contact: email-sig@python.org3 4"""Email address parsing code.5 6Lifted directly from rfc822.py.  This should eventually be rewritten.7"""8 9__all__ = [10    'mktime_tz',11    'parsedate',12    'parsedate_tz',13    'quote',14    ]15 16import time, calendar17 18SPACE = ' '19EMPTYSTRING = ''20COMMASPACE = ', '21 22# Parse a date field23_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',24               'aug', 'sep', 'oct', 'nov', 'dec',25               'january', 'february', 'march', 'april', 'may', 'june', 'july',26               'august', 'september', 'october', 'november', 'december']27 28_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']29 30# The timezone table does not include the military time zones defined31# in RFC822, other than Z.  According to RFC1123, the description in32# RFC822 gets the signs wrong, so we can't rely on any such time33# zones.  RFC1123 recommends that numeric timezone indicators be used34# instead of timezone names.35 36_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,37              'AST': -400, 'ADT': -300,  # Atlantic (used in Canada)38              'EST': -500, 'EDT': -400,  # Eastern39              'CST': -600, 'CDT': -500,  # Central40              'MST': -700, 'MDT': -600,  # Mountain41              'PST': -800, 'PDT': -700   # Pacific42              }43 44 45def parsedate_tz(data):46    """Convert a date string to a time tuple.47 48    Accounts for military timezones.49    """50    res = _parsedate_tz(data)51    if not res:52        return53    if res[9] is None:54        res[9] = 055    return tuple(res)56 57def _parsedate_tz(data):58    """Convert date to extended time tuple.59 60    The last (additional) element is the time zone offset in seconds, except if61    the timezone was specified as -0000.  In that case the last element is62    None.  This indicates a UTC timestamp that explicitly declaims knowledge of63    the source timezone, as opposed to a +0000 timestamp that indicates the64    source timezone really was UTC.65 66    """67    if not data:68        return69    data = data.split()70    if not data:  # This happens for whitespace-only input.71        return None72    # The FWS after the comma after the day-of-week is optional, so search and73    # adjust for this.74    if data[0].endswith(',') or data[0].lower() in _daynames:75        # There's a dayname here. Skip it76        del data[0]77    else:78        i = data[0].rfind(',')79        if i >= 0:80            data[0] = data[0][i+1:]81    if len(data) == 3: # RFC 850 date, deprecated82        stuff = data[0].split('-')83        if len(stuff) == 3:84            data = stuff + data[1:]85    if len(data) == 4:86        s = data[3]87        i = s.find('+')88        if i == -1:89            i = s.find('-')90        if i > 0:91            data[3:] = [s[:i], s[i:]]92        else:93            data.append('') # Dummy tz94    if len(data) < 5:95        return None96    data = data[:5]97    [dd, mm, yy, tm, tz] = data98    mm = mm.lower()99    if mm not in _monthnames:100        dd, mm = mm, dd.lower()101        if mm not in _monthnames:102            return None103    mm = _monthnames.index(mm) + 1104    if mm > 12:105        mm -= 12106    if dd[-1] == ',':107        dd = dd[:-1]108    i = yy.find(':')109    if i > 0:110        yy, tm = tm, yy111    if yy[-1] == ',':112        yy = yy[:-1]113    if not yy[0].isdigit():114        yy, tz = tz, yy115    if tm[-1] == ',':116        tm = tm[:-1]117    tm = tm.split(':')118    if len(tm) == 2:119        [thh, tmm] = tm120        tss = '0'121    elif len(tm) == 3:122        [thh, tmm, tss] = tm123    elif len(tm) == 1 and '.' in tm[0]:124        # Some non-compliant MUAs use '.' to separate time elements.125        tm = tm[0].split('.')126        if len(tm) == 2:127            [thh, tmm] = tm128            tss = 0129        elif len(tm) == 3:130            [thh, tmm, tss] = tm131        else:132            return None133    else:134        return None135    try:136        yy = int(yy)137        dd = int(dd)138        thh = int(thh)139        tmm = int(tmm)140        tss = int(tss)141    except ValueError:142        return None143    # Check for a yy specified in two-digit format, then convert it to the144    # appropriate four-digit format, according to the POSIX standard. RFC 822145    # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)146    # mandates a 4-digit yy. For more information, see the documentation for147    # the time module.148    if yy < 100:149        # The year is between 1969 and 1999 (inclusive).150        if yy > 68:151            yy += 1900152        # The year is between 2000 and 2068 (inclusive).153        else:154            yy += 2000155    tzoffset = None156    tz = tz.upper()157    if tz in _timezones:158        tzoffset = _timezones[tz]159    else:160        try:161            tzoffset = int(tz)162        except ValueError:163            pass164        if tzoffset==0 and tz.startswith('-'):165            tzoffset = None166    # Convert a timezone offset into seconds ; -0500 -> -18000167    if tzoffset:168        if tzoffset < 0:169            tzsign = -1170            tzoffset = -tzoffset171        else:172            tzsign = 1173        tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)174    # Daylight Saving Time flag is set to -1, since DST is unknown.175    return [yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset]176 177 178def parsedate(data):179    """Convert a time string to a time tuple."""180    t = parsedate_tz(data)181    if isinstance(t, tuple):182        return t[:9]183    else:184        return t185 186 187def mktime_tz(data):188    """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp."""189    if data[9] is None:190        # No zone info, so localtime is better assumption than GMT191        return time.mktime(data[:8] + (-1,))192    else:193        t = calendar.timegm(data)194        return t - data[9]195 196 197def quote(str):198    """Prepare string to be used in a quoted string.199 200    Turns backslash and double quote characters into quoted pairs.  These201    are the only characters that need to be quoted inside a quoted string.202    Does not add the surrounding double quotes.203    """204    return str.replace('\\', '\\\\').replace('"', '\\"')205 206 207class AddrlistClass:208    """Address parser class by Ben Escoto.209 210    To understand what this class does, it helps to have a copy of RFC 2822 in211    front of you.212 213    Note: this class interface is deprecated and may be removed in the future.214    Use email.utils.AddressList instead.215    """216 217    def __init__(self, field):218        """Initialize a new instance.219 220        `field' is an unparsed address header field, containing221        one or more addresses.222        """223        self.specials = '()<>@,:;.\"[]'224        self.pos = 0225        self.LWS = ' \t'226        self.CR = '\r\n'227        self.FWS = self.LWS + self.CR228        self.atomends = self.specials + self.LWS + self.CR229        # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it230        # is obsolete syntax.  RFC 2822 requires that we recognize obsolete231        # syntax, so allow dots in phrases.232        self.phraseends = self.atomends.replace('.', '')233        self.field = field234        self.commentlist = []235 236    def gotonext(self):237        """Skip white space and extract comments."""238        wslist = []239        while self.pos < len(self.field):240            if self.field[self.pos] in self.LWS + '\n\r':241                if self.field[self.pos] not in '\n\r':242                    wslist.append(self.field[self.pos])243                self.pos += 1244            elif self.field[self.pos] == '(':245                self.commentlist.append(self.getcomment())246            else:247                break248        return EMPTYSTRING.join(wslist)249 250    def getaddrlist(self):251        """Parse all addresses.252 253        Returns a list containing all of the addresses.254        """255        result = []256        while self.pos < len(self.field):257            ad = self.getaddress()258            if ad:259                result += ad260            else:261                result.append(('', ''))262        return result263 264    def getaddress(self):265        """Parse the next address."""266        self.commentlist = []267        self.gotonext()268 269        oldpos = self.pos270        oldcl = self.commentlist271        plist = self.getphraselist()272 273        self.gotonext()274        returnlist = []275 276        if self.pos >= len(self.field):277            # Bad email address technically, no domain.278            if plist:279                returnlist = [(SPACE.join(self.commentlist), plist[0])]280 281        elif self.field[self.pos] in '.@':282            # email address is just an addrspec283            # this isn't very efficient since we start over284            self.pos = oldpos285            self.commentlist = oldcl286            addrspec = self.getaddrspec()287            returnlist = [(SPACE.join(self.commentlist), addrspec)]288 289        elif self.field[self.pos] == ':':290            # address is a group291            returnlist = []292 293            fieldlen = len(self.field)294            self.pos += 1295            while self.pos < len(self.field):296                self.gotonext()297                if self.pos < fieldlen and self.field[self.pos] == ';':298                    self.pos += 1299                    break300                returnlist = returnlist + self.getaddress()301 302        elif self.field[self.pos] == '<':303            # Address is a phrase then a route addr304            routeaddr = self.getrouteaddr()305 306            if self.commentlist:307                returnlist = [(SPACE.join(plist) + ' (' +308                               ' '.join(self.commentlist) + ')', routeaddr)]309            else:310                returnlist = [(SPACE.join(plist), routeaddr)]311 312        else:313            if plist:314                returnlist = [(SPACE.join(self.commentlist), plist[0])]315            elif self.field[self.pos] in self.specials:316                self.pos += 1317 318        self.gotonext()319        if self.pos < len(self.field) and self.field[self.pos] == ',':320            self.pos += 1321        return returnlist322 323    def getrouteaddr(self):324        """Parse a route address (Return-path value).325 326        This method just skips all the route stuff and returns the addrspec.327        """328        if self.field[self.pos] != '<':329            return330 331        expectroute = False332        self.pos += 1333        self.gotonext()334        adlist = ''335        while self.pos < len(self.field):336            if expectroute:337                self.getdomain()338                expectroute = False339            elif self.field[self.pos] == '>':340                self.pos += 1341                break342            elif self.field[self.pos] == '@':343                self.pos += 1344                expectroute = True345            elif self.field[self.pos] == ':':346                self.pos += 1347            else:348                adlist = self.getaddrspec()349                self.pos += 1350                break351            self.gotonext()352 353        return adlist354 355    def getaddrspec(self):356        """Parse an RFC 2822 addr-spec."""357        aslist = []358 359        self.gotonext()360        while self.pos < len(self.field):361            preserve_ws = True362            if self.field[self.pos] == '.':363                if aslist and not aslist[-1].strip():364                    aslist.pop()365                aslist.append('.')366                self.pos += 1367                preserve_ws = False368            elif self.field[self.pos] == '"':369                aslist.append('"%s"' % quote(self.getquote()))370            elif self.field[self.pos] in self.atomends:371                if aslist and not aslist[-1].strip():372                    aslist.pop()373                break374            else:375                aslist.append(self.getatom())376            ws = self.gotonext()377            if preserve_ws and ws:378                aslist.append(ws)379 380        if self.pos >= len(self.field) or self.field[self.pos] != '@':381            return EMPTYSTRING.join(aslist)382 383        aslist.append('@')384        self.pos += 1385        self.gotonext()386        domain = self.getdomain()387        if not domain:388            # Invalid domain, return an empty address instead of returning a389            # local part to denote failed parsing.390            return EMPTYSTRING391        return EMPTYSTRING.join(aslist) + domain392 393    def getdomain(self):394        """Get the complete domain name from an address."""395        sdlist = []396        while self.pos < len(self.field):397            if self.field[self.pos] in self.LWS:398                self.pos += 1399            elif self.field[self.pos] == '(':400                self.commentlist.append(self.getcomment())401            elif self.field[self.pos] == '[':402                sdlist.append(self.getdomainliteral())403            elif self.field[self.pos] == '.':404                self.pos += 1405                sdlist.append('.')406            elif self.field[self.pos] == '@':407                # bpo-34155: Don't parse domains with two `@` like408                # `a@malicious.org@important.com`.409                return EMPTYSTRING410            elif self.field[self.pos] in self.atomends:411                break412            else:413                sdlist.append(self.getatom())414        return EMPTYSTRING.join(sdlist)415 416    def getdelimited(self, beginchar, endchars, allowcomments=True):417        """Parse a header fragment delimited by special characters.418 419        `beginchar' is the start character for the fragment.420        If self is not looking at an instance of `beginchar' then421        getdelimited returns the empty string.422 423        `endchars' is a sequence of allowable end-delimiting characters.424        Parsing stops when one of these is encountered.425 426        If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed427        within the parsed fragment.428        """429        if self.field[self.pos] != beginchar:430            return ''431 432        slist = ['']433        quote = False434        self.pos += 1435        while self.pos < len(self.field):436            if quote:437                slist.append(self.field[self.pos])438                quote = False439            elif self.field[self.pos] in endchars:440                self.pos += 1441                break442            elif allowcomments and self.field[self.pos] == '(':443                slist.append(self.getcomment())444                continue        # have already advanced pos from getcomment445            elif self.field[self.pos] == '\\':446                quote = True447            else:448                slist.append(self.field[self.pos])449            self.pos += 1450 451        return EMPTYSTRING.join(slist)452 453    def getquote(self):454        """Get a quote-delimited fragment from self's field."""455        return self.getdelimited('"', '"\r', False)456 457    def getcomment(self):458        """Get a parenthesis-delimited fragment from self's field."""459        return self.getdelimited('(', ')\r', True)460 461    def getdomainliteral(self):462        """Parse an RFC 2822 domain-literal."""463        return '[%s]' % self.getdelimited('[', ']\r', False)464 465    def getatom(self, atomends=None):466        """Parse an RFC 2822 atom.467 468        Optional atomends specifies a different set of end token delimiters469        (the default is to use self.atomends).  This is used e.g. in470        getphraselist() since phrase endings must not include the `.' (which471        is legal in phrases)."""472        atomlist = ['']473        if atomends is None:474            atomends = self.atomends475 476        while self.pos < len(self.field):477            if self.field[self.pos] in atomends:478                break479            else:480                atomlist.append(self.field[self.pos])481            self.pos += 1482 483        return EMPTYSTRING.join(atomlist)484 485    def getphraselist(self):486        """Parse a sequence of RFC 2822 phrases.487 488        A phrase is a sequence of words, which are in turn either RFC 2822489        atoms or quoted-strings.  Phrases are canonicalized by squeezing all490        runs of continuous whitespace into one space.491        """492        plist = []493 494        while self.pos < len(self.field):495            if self.field[self.pos] in self.FWS:496                self.pos += 1497            elif self.field[self.pos] == '"':498                plist.append(self.getquote())499            elif self.field[self.pos] == '(':500                self.commentlist.append(self.getcomment())501            elif self.field[self.pos] in self.phraseends:502                break503            else:504                plist.append(self.getatom(self.phraseends))505 506        return plist507 508class AddressList(AddrlistClass):509    """An AddressList encapsulates a list of parsed RFC 2822 addresses."""510    def __init__(self, field):511        AddrlistClass.__init__(self, field)512        if field:513            self.addresslist = self.getaddrlist()514        else:515            self.addresslist = []516 517    def __len__(self):518        return len(self.addresslist)519 520    def __add__(self, other):521        # Set union522        newaddr = AddressList(None)523        newaddr.addresslist = self.addresslist[:]524        for x in other.addresslist:525            if not x in self.addresslist:526                newaddr.addresslist.append(x)527        return newaddr528 529    def __iadd__(self, other):530        # Set union, in-place531        for x in other.addresslist:532            if not x in self.addresslist:533                self.addresslist.append(x)534        return self535 536    def __sub__(self, other):537        # Set difference538        newaddr = AddressList(None)539        for x in self.addresslist:540            if not x in other.addresslist:541                newaddr.addresslist.append(x)542        return newaddr543 544    def __isub__(self, other):545        # Set difference, in-place546        for x in other.addresslist:547            if x in self.addresslist:548                self.addresslist.remove(x)549        return self550 551    def __getitem__(self, index):552        # Make indexing, slices, and 'in' work553        return self.addresslist[index]554