File Explorer

/proc/thread-self/root/proc/self/root/proc/self/root/usr/lib64/python3.9
This explorer reads the filesystem of the server it runs on, so /workspace/user isn't present here. Browsing and the terminal still work against this server's own disk from /.
30 dirs
174 files
textwrap.py19.0 KB · 487 lines
1"""Text wrapping and filling.2"""3 4# Copyright (C) 1999-2001 Gregory P. Ward.5# Copyright (C) 2002, 2003 Python Software Foundation.6# Written by Greg Ward <gward@python.net>7 8import re9 10__all__ = ['TextWrapper', 'wrap', 'fill', 'dedent', 'indent', 'shorten']11 12# Hardcode the recognized whitespace characters to the US-ASCII13# whitespace characters.  The main reason for doing this is that14# some Unicode spaces (like \u00a0) are non-breaking whitespaces.15_whitespace = '\t\n\x0b\x0c\r '16 17class TextWrapper:18    """19    Object for wrapping/filling text.  The public interface consists of20    the wrap() and fill() methods; the other methods are just there for21    subclasses to override in order to tweak the default behaviour.22    If you want to completely replace the main wrapping algorithm,23    you'll probably have to override _wrap_chunks().24 25    Several instance attributes control various aspects of wrapping:26      width (default: 70)27        the maximum width of wrapped lines (unless break_long_words28        is false)29      initial_indent (default: "")30        string that will be prepended to the first line of wrapped31        output.  Counts towards the line's width.32      subsequent_indent (default: "")33        string that will be prepended to all lines save the first34        of wrapped output; also counts towards each line's width.35      expand_tabs (default: true)36        Expand tabs in input text to spaces before further processing.37        Each tab will become 0 .. 'tabsize' spaces, depending on its position38        in its line.  If false, each tab is treated as a single character.39      tabsize (default: 8)40        Expand tabs in input text to 0 .. 'tabsize' spaces, unless41        'expand_tabs' is false.42      replace_whitespace (default: true)43        Replace all whitespace characters in the input text by spaces44        after tab expansion.  Note that if expand_tabs is false and45        replace_whitespace is true, every tab will be converted to a46        single space!47      fix_sentence_endings (default: false)48        Ensure that sentence-ending punctuation is always followed49        by two spaces.  Off by default because the algorithm is50        (unavoidably) imperfect.51      break_long_words (default: true)52        Break words longer than 'width'.  If false, those words will not53        be broken, and some lines might be longer than 'width'.54      break_on_hyphens (default: true)55        Allow breaking hyphenated words. If true, wrapping will occur56        preferably on whitespaces and right after hyphens part of57        compound words.58      drop_whitespace (default: true)59        Drop leading and trailing whitespace from lines.60      max_lines (default: None)61        Truncate wrapped lines.62      placeholder (default: ' [...]')63        Append to the last line of truncated text.64    """65 66    unicode_whitespace_trans = {}67    uspace = ord(' ')68    for x in _whitespace:69        unicode_whitespace_trans[ord(x)] = uspace70 71    # This funky little regex is just the trick for splitting72    # text up into word-wrappable chunks.  E.g.73    #   "Hello there -- you goof-ball, use the -b option!"74    # splits into75    #   Hello/ /there/ /--/ /you/ /goof-/ball,/ /use/ /the/ /-b/ /option!76    # (after stripping out empty strings).77    word_punct = r'[\w!"\'&.,?]'78    letter = r'[^\d\W]'79    whitespace = r'[%s]' % re.escape(_whitespace)80    nowhitespace = '[^' + whitespace[1:]81    wordsep_re = re.compile(r'''82        ( # any whitespace83          %(ws)s+84        | # em-dash between words85          (?<=%(wp)s) -{2,} (?=\w)86        | # word, possibly hyphenated87          %(nws)s+? (?:88            # hyphenated word89              -(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-))90              (?= %(lt)s -? %(lt)s)91            | # end of word92              (?=%(ws)s|\Z)93            | # em-dash94              (?<=%(wp)s) (?=-{2,}\w)95            )96        )''' % {'wp': word_punct, 'lt': letter,97                'ws': whitespace, 'nws': nowhitespace},98        re.VERBOSE)99    del word_punct, letter, nowhitespace100 101    # This less funky little regex just split on recognized spaces. E.g.102    #   "Hello there -- you goof-ball, use the -b option!"103    # splits into104    #   Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/105    wordsep_simple_re = re.compile(r'(%s+)' % whitespace)106    del whitespace107 108    # XXX this is not locale- or charset-aware -- string.lowercase109    # is US-ASCII only (and therefore English-only)110    sentence_end_re = re.compile(r'[a-z]'             # lowercase letter111                                 r'[\.\!\?]'          # sentence-ending punct.112                                 r'[\"\']?'           # optional end-of-quote113                                 r'\Z')               # end of chunk114 115    def __init__(self,116                 width=70,117                 initial_indent="",118                 subsequent_indent="",119                 expand_tabs=True,120                 replace_whitespace=True,121                 fix_sentence_endings=False,122                 break_long_words=True,123                 drop_whitespace=True,124                 break_on_hyphens=True,125                 tabsize=8,126                 *,127                 max_lines=None,128                 placeholder=' [...]'):129        self.width = width130        self.initial_indent = initial_indent131        self.subsequent_indent = subsequent_indent132        self.expand_tabs = expand_tabs133        self.replace_whitespace = replace_whitespace134        self.fix_sentence_endings = fix_sentence_endings135        self.break_long_words = break_long_words136        self.drop_whitespace = drop_whitespace137        self.break_on_hyphens = break_on_hyphens138        self.tabsize = tabsize139        self.max_lines = max_lines140        self.placeholder = placeholder141 142 143    # -- Private methods -----------------------------------------------144    # (possibly useful for subclasses to override)145 146    def _munge_whitespace(self, text):147        """_munge_whitespace(text : string) -> string148 149        Munge whitespace in text: expand tabs and convert all other150        whitespace characters to spaces.  Eg. " foo\\tbar\\n\\nbaz"151        becomes " foo    bar  baz".152        """153        if self.expand_tabs:154            text = text.expandtabs(self.tabsize)155        if self.replace_whitespace:156            text = text.translate(self.unicode_whitespace_trans)157        return text158 159 160    def _split(self, text):161        """_split(text : string) -> [string]162 163        Split the text to wrap into indivisible chunks.  Chunks are164        not quite the same as words; see _wrap_chunks() for full165        details.  As an example, the text166          Look, goof-ball -- use the -b option!167        breaks into the following chunks:168          'Look,', ' ', 'goof-', 'ball', ' ', '--', ' ',169          'use', ' ', 'the', ' ', '-b', ' ', 'option!'170        if break_on_hyphens is True, or in:171          'Look,', ' ', 'goof-ball', ' ', '--', ' ',172          'use', ' ', 'the', ' ', '-b', ' ', option!'173        otherwise.174        """175        if self.break_on_hyphens is True:176            chunks = self.wordsep_re.split(text)177        else:178            chunks = self.wordsep_simple_re.split(text)179        chunks = [c for c in chunks if c]180        return chunks181 182    def _fix_sentence_endings(self, chunks):183        """_fix_sentence_endings(chunks : [string])184 185        Correct for sentence endings buried in 'chunks'.  Eg. when the186        original text contains "... foo.\\nBar ...", munge_whitespace()187        and split() will convert that to [..., "foo.", " ", "Bar", ...]188        which has one too few spaces; this method simply changes the one189        space to two.190        """191        i = 0192        patsearch = self.sentence_end_re.search193        while i < len(chunks)-1:194            if chunks[i+1] == " " and patsearch(chunks[i]):195                chunks[i+1] = "  "196                i += 2197            else:198                i += 1199 200    def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):201        """_handle_long_word(chunks : [string],202                             cur_line : [string],203                             cur_len : int, width : int)204 205        Handle a chunk of text (most likely a word, not whitespace) that206        is too long to fit in any line.207        """208        # Figure out when indent is larger than the specified width, and make209        # sure at least one character is stripped off on every pass210        if width < 1:211            space_left = 1212        else:213            space_left = width - cur_len214 215        # If we're allowed to break long words, then do so: put as much216        # of the next chunk onto the current line as will fit.217        if self.break_long_words:218            cur_line.append(reversed_chunks[-1][:space_left])219            reversed_chunks[-1] = reversed_chunks[-1][space_left:]220 221        # Otherwise, we have to preserve the long word intact.  Only add222        # it to the current line if there's nothing already there --223        # that minimizes how much we violate the width constraint.224        elif not cur_line:225            cur_line.append(reversed_chunks.pop())226 227        # If we're not allowed to break long words, and there's already228        # text on the current line, do nothing.  Next time through the229        # main loop of _wrap_chunks(), we'll wind up here again, but230        # cur_len will be zero, so the next line will be entirely231        # devoted to the long word that we can't handle right now.232 233    def _wrap_chunks(self, chunks):234        """_wrap_chunks(chunks : [string]) -> [string]235 236        Wrap a sequence of text chunks and return a list of lines of237        length 'self.width' or less.  (If 'break_long_words' is false,238        some lines may be longer than this.)  Chunks correspond roughly239        to words and the whitespace between them: each chunk is240        indivisible (modulo 'break_long_words'), but a line break can241        come between any two chunks.  Chunks should not have internal242        whitespace; ie. a chunk is either all whitespace or a "word".243        Whitespace chunks will be removed from the beginning and end of244        lines, but apart from that whitespace is preserved.245        """246        lines = []247        if self.width <= 0:248            raise ValueError("invalid width %r (must be > 0)" % self.width)249        if self.max_lines is not None:250            if self.max_lines > 1:251                indent = self.subsequent_indent252            else:253                indent = self.initial_indent254            if len(indent) + len(self.placeholder.lstrip()) > self.width:255                raise ValueError("placeholder too large for max width")256 257        # Arrange in reverse order so items can be efficiently popped258        # from a stack of chucks.259        chunks.reverse()260 261        while chunks:262 263            # Start the list of chunks that will make up the current line.264            # cur_len is just the length of all the chunks in cur_line.265            cur_line = []266            cur_len = 0267 268            # Figure out which static string will prefix this line.269            if lines:270                indent = self.subsequent_indent271            else:272                indent = self.initial_indent273 274            # Maximum width for this line.275            width = self.width - len(indent)276 277            # First chunk on line is whitespace -- drop it, unless this278            # is the very beginning of the text (ie. no lines started yet).279            if self.drop_whitespace and chunks[-1].strip() == '' and lines:280                del chunks[-1]281 282            while chunks:283                l = len(chunks[-1])284 285                # Can at least squeeze this chunk onto the current line.286                if cur_len + l <= width:287                    cur_line.append(chunks.pop())288                    cur_len += l289 290                # Nope, this line is full.291                else:292                    break293 294            # The current line is full, and the next chunk is too big to295            # fit on *any* line (not just this one).296            if chunks and len(chunks[-1]) > width:297                self._handle_long_word(chunks, cur_line, cur_len, width)298                cur_len = sum(map(len, cur_line))299 300            # If the last chunk on this line is all whitespace, drop it.301            if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':302                cur_len -= len(cur_line[-1])303                del cur_line[-1]304 305            if cur_line:306                if (self.max_lines is None or307                    len(lines) + 1 < self.max_lines or308                    (not chunks or309                     self.drop_whitespace and310                     len(chunks) == 1 and311                     not chunks[0].strip()) and cur_len <= width):312                    # Convert current line back to a string and store it in313                    # list of all lines (return value).314                    lines.append(indent + ''.join(cur_line))315                else:316                    while cur_line:317                        if (cur_line[-1].strip() and318                            cur_len + len(self.placeholder) <= width):319                            cur_line.append(self.placeholder)320                            lines.append(indent + ''.join(cur_line))321                            break322                        cur_len -= len(cur_line[-1])323                        del cur_line[-1]324                    else:325                        if lines:326                            prev_line = lines[-1].rstrip()327                            if (len(prev_line) + len(self.placeholder) <=328                                    self.width):329                                lines[-1] = prev_line + self.placeholder330                                break331                        lines.append(indent + self.placeholder.lstrip())332                    break333 334        return lines335 336    def _split_chunks(self, text):337        text = self._munge_whitespace(text)338        return self._split(text)339 340    # -- Public interface ----------------------------------------------341 342    def wrap(self, text):343        """wrap(text : string) -> [string]344 345        Reformat the single paragraph in 'text' so it fits in lines of346        no more than 'self.width' columns, and return a list of wrapped347        lines.  Tabs in 'text' are expanded with string.expandtabs(),348        and all other whitespace characters (including newline) are349        converted to space.350        """351        chunks = self._split_chunks(text)352        if self.fix_sentence_endings:353            self._fix_sentence_endings(chunks)354        return self._wrap_chunks(chunks)355 356    def fill(self, text):357        """fill(text : string) -> string358 359        Reformat the single paragraph in 'text' to fit in lines of no360        more than 'self.width' columns, and return a new string361        containing the entire wrapped paragraph.362        """363        return "\n".join(self.wrap(text))364 365 366# -- Convenience interface ---------------------------------------------367 368def wrap(text, width=70, **kwargs):369    """Wrap a single paragraph of text, returning a list of wrapped lines.370 371    Reformat the single paragraph in 'text' so it fits in lines of no372    more than 'width' columns, and return a list of wrapped lines.  By373    default, tabs in 'text' are expanded with string.expandtabs(), and374    all other whitespace characters (including newline) are converted to375    space.  See TextWrapper class for available keyword args to customize376    wrapping behaviour.377    """378    w = TextWrapper(width=width, **kwargs)379    return w.wrap(text)380 381def fill(text, width=70, **kwargs):382    """Fill a single paragraph of text, returning a new string.383 384    Reformat the single paragraph in 'text' to fit in lines of no more385    than 'width' columns, and return a new string containing the entire386    wrapped paragraph.  As with wrap(), tabs are expanded and other387    whitespace characters converted to space.  See TextWrapper class for388    available keyword args to customize wrapping behaviour.389    """390    w = TextWrapper(width=width, **kwargs)391    return w.fill(text)392 393def shorten(text, width, **kwargs):394    """Collapse and truncate the given text to fit in the given width.395 396    The text first has its whitespace collapsed.  If it then fits in397    the *width*, it is returned as is.  Otherwise, as many words398    as possible are joined and then the placeholder is appended::399 400        >>> textwrap.shorten("Hello  world!", width=12)401        'Hello world!'402        >>> textwrap.shorten("Hello  world!", width=11)403        'Hello [...]'404    """405    w = TextWrapper(width=width, max_lines=1, **kwargs)406    return w.fill(' '.join(text.strip().split()))407 408 409# -- Loosely related functionality -------------------------------------410 411_whitespace_only_re = re.compile('^[ \t]+$', re.MULTILINE)412_leading_whitespace_re = re.compile('(^[ \t]*)(?:[^ \t\n])', re.MULTILINE)413 414def dedent(text):415    """Remove any common leading whitespace from every line in `text`.416 417    This can be used to make triple-quoted strings line up with the left418    edge of the display, while still presenting them in the source code419    in indented form.420 421    Note that tabs and spaces are both treated as whitespace, but they422    are not equal: the lines "  hello" and "\\thello" are423    considered to have no common leading whitespace.424 425    Entirely blank lines are normalized to a newline character.426    """427    # Look for the longest leading string of spaces and tabs common to428    # all lines.429    margin = None430    text = _whitespace_only_re.sub('', text)431    indents = _leading_whitespace_re.findall(text)432    for indent in indents:433        if margin is None:434            margin = indent435 436        # Current line more deeply indented than previous winner:437        # no change (previous winner is still on top).438        elif indent.startswith(margin):439            pass440 441        # Current line consistent with and no deeper than previous winner:442        # it's the new winner.443        elif margin.startswith(indent):444            margin = indent445 446        # Find the largest common whitespace between current line and previous447        # winner.448        else:449            for i, (x, y) in enumerate(zip(margin, indent)):450                if x != y:451                    margin = margin[:i]452                    break453 454    # sanity check (testing/debugging only)455    if 0 and margin:456        for line in text.split("\n"):457            assert not line or line.startswith(margin), \458                   "line = %r, margin = %r" % (line, margin)459 460    if margin:461        text = re.sub(r'(?m)^' + margin, '', text)462    return text463 464 465def indent(text, prefix, predicate=None):466    """Adds 'prefix' to the beginning of selected lines in 'text'.467 468    If 'predicate' is provided, 'prefix' will only be added to the lines469    where 'predicate(line)' is True. If 'predicate' is not provided,470    it will default to adding 'prefix' to all non-empty lines that do not471    consist solely of whitespace characters.472    """473    if predicate is None:474        def predicate(line):475            return line.strip()476 477    def prefixed_lines():478        for line in text.splitlines(True):479            yield (prefix + line if predicate(line) else line)480    return ''.join(prefixed_lines())481 482 483if __name__ == "__main__":484    #print dedent("\tfoo\n\tbar")485    #print dedent("  \thello there\n  \t  how are you?")486    print(dedent("Hello there.\n  This is indented."))487