File Explorer

/proc/self/root/proc/thread-self/root/lib64/python3.9/lib2to3/pgen2

This explorer reads the filesystem of the server it runs on, so /workspace/user isn't present here. Browsing and the terminal still work against this server's own disk from /.

conv.py9.4 KB · 258 lines
# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved.# Licensed to PSF under a Contributor Agreement. """Convert graminit.[ch] spit out by pgen to Python code. Pgen is the Python parser generator.  It is useful to quickly create aparser from a grammar file in Python's grammar notation.  But I don'twant my parsers to be written in C (yet), so I'm translating theparsing tables to Python data structures and writing a Python parseengine. Note that the token numbers are constants determined by the standardPython tokenizer.  The standard token module defines these numbers andtheir names (the names are not used much).  The token numbers arehardcoded into the Python tokenizer and into pgen.  A Pythonimplementation of the Python tokenizer is also available, in thestandard tokenize module. On the other hand, symbol numbers (representing the grammar'snon-terminals) are assigned by pgen based on the actual grammarinput. Note: this module is pretty much obsolete; the pgen module generatesequivalent grammar tables directly from the Grammar.txt input filewithout having to invoke the Python pgen C program. """ # Python importsimport re # Local importsfrom pgen2 import grammar, token  class Converter(grammar.Grammar):    """Grammar subclass that reads classic pgen output files.     The run() method reads the tables as produced by the pgen parser    generator, typically contained in two C files, graminit.h and    graminit.c.  The other methods are for internal use only.     See the base class for more documentation.     """     def run(self, graminit_h, graminit_c):        """Load the grammar tables from the text files written by pgen."""        self.parse_graminit_h(graminit_h)        self.parse_graminit_c(graminit_c)        self.finish_off()     def parse_graminit_h(self, filename):        """Parse the .h file written by pgen.  (Internal)         This file is a sequence of #define statements defining the        nonterminals of the grammar as numbers.  We build two tables        mapping the numbers to names and back.         """        try:            f = open(filename)        except OSError as err:            print("Can't open %s: %s" % (filename, err))            return False        self.symbol2number = {}        self.number2symbol = {}        lineno = 0        for line in f:            lineno += 1            mo = re.match(r"^#define\s+(\w+)\s+(\d+)$", line)            if not mo and line.strip():                print("%s(%s): can't parse %s" % (filename, lineno,                                                  line.strip()))            else:                symbol, number = mo.groups()                number = int(number)                assert symbol not in self.symbol2number                assert number not in self.number2symbol                self.symbol2number[symbol] = number                self.number2symbol[number] = symbol        return True     def parse_graminit_c(self, filename):        """Parse the .c file written by pgen.  (Internal)         The file looks as follows.  The first two lines are always this:         #include "pgenheaders.h"        #include "grammar.h"         After that come four blocks:         1) one or more state definitions        2) a table defining dfas        3) a table defining labels        4) a struct defining the grammar         A state definition has the following form:        - one or more arc arrays, each of the form:          static arc arcs_<n>_<m>[<k>] = {                  {<i>, <j>},                  ...          };        - followed by a state array, of the form:          static state states_<s>[<t>] = {                  {<k>, arcs_<n>_<m>},                  ...          };         """        try:            f = open(filename)        except OSError as err:            print("Can't open %s: %s" % (filename, err))            return False        # The code below essentially uses f's iterator-ness!        lineno = 0         # Expect the two #include lines        lineno, line = lineno+1, next(f)        assert line == '#include "pgenheaders.h"\n', (lineno, line)        lineno, line = lineno+1, next(f)        assert line == '#include "grammar.h"\n', (lineno, line)         # Parse the state definitions        lineno, line = lineno+1, next(f)        allarcs = {}        states = []        while line.startswith("static arc "):            while line.startswith("static arc "):                mo = re.match(r"static arc arcs_(\d+)_(\d+)\[(\d+)\] = {$",                              line)                assert mo, (lineno, line)                n, m, k = list(map(int, mo.groups()))                arcs = []                for _ in range(k):                    lineno, line = lineno+1, next(f)                    mo = re.match(r"\s+{(\d+), (\d+)},$", line)                    assert mo, (lineno, line)                    i, j = list(map(int, mo.groups()))                    arcs.append((i, j))                lineno, line = lineno+1, next(f)                assert line == "};\n", (lineno, line)                allarcs[(n, m)] = arcs                lineno, line = lineno+1, next(f)            mo = re.match(r"static state states_(\d+)\[(\d+)\] = {$", line)            assert mo, (lineno, line)            s, t = list(map(int, mo.groups()))            assert s == len(states), (lineno, line)            state = []            for _ in range(t):                lineno, line = lineno+1, next(f)                mo = re.match(r"\s+{(\d+), arcs_(\d+)_(\d+)},$", line)                assert mo, (lineno, line)                k, n, m = list(map(int, mo.groups()))                arcs = allarcs[n, m]                assert k == len(arcs), (lineno, line)                state.append(arcs)            states.append(state)            lineno, line = lineno+1, next(f)            assert line == "};\n", (lineno, line)            lineno, line = lineno+1, next(f)        self.states = states         # Parse the dfas        dfas = {}        mo = re.match(r"static dfa dfas\[(\d+)\] = {$", line)        assert mo, (lineno, line)        ndfas = int(mo.group(1))        for i in range(ndfas):            lineno, line = lineno+1, next(f)            mo = re.match(r'\s+{(\d+), "(\w+)", (\d+), (\d+), states_(\d+),$',                          line)            assert mo, (lineno, line)            symbol = mo.group(2)            number, x, y, z = list(map(int, mo.group(1, 3, 4, 5)))            assert self.symbol2number[symbol] == number, (lineno, line)            assert self.number2symbol[number] == symbol, (lineno, line)            assert x == 0, (lineno, line)            state = states[z]            assert y == len(state), (lineno, line)            lineno, line = lineno+1, next(f)            mo = re.match(r'\s+("(?:\\\d\d\d)*")},$', line)            assert mo, (lineno, line)            first = {}            rawbitset = eval(mo.group(1))            for i, c in enumerate(rawbitset):                byte = ord(c)                for j in range(8):                    if byte & (1<<j):                        first[i*8 + j] = 1            dfas[number] = (state, first)        lineno, line = lineno+1, next(f)        assert line == "};\n", (lineno, line)        self.dfas = dfas         # Parse the labels        labels = []        lineno, line = lineno+1, next(f)        mo = re.match(r"static label labels\[(\d+)\] = {$", line)        assert mo, (lineno, line)        nlabels = int(mo.group(1))        for i in range(nlabels):            lineno, line = lineno+1, next(f)            mo = re.match(r'\s+{(\d+), (0|"\w+")},$', line)            assert mo, (lineno, line)            x, y = mo.groups()            x = int(x)            if y == "0":                y = None            else:                y = eval(y)            labels.append((x, y))        lineno, line = lineno+1, next(f)        assert line == "};\n", (lineno, line)        self.labels = labels         # Parse the grammar struct        lineno, line = lineno+1, next(f)        assert line == "grammar _PyParser_Grammar = {\n", (lineno, line)        lineno, line = lineno+1, next(f)        mo = re.match(r"\s+(\d+),$", line)        assert mo, (lineno, line)        ndfas = int(mo.group(1))        assert ndfas == len(self.dfas)        lineno, line = lineno+1, next(f)        assert line == "\tdfas,\n", (lineno, line)        lineno, line = lineno+1, next(f)        mo = re.match(r"\s+{(\d+), labels},$", line)        assert mo, (lineno, line)        nlabels = int(mo.group(1))        assert nlabels == len(self.labels), (lineno, line)        lineno, line = lineno+1, next(f)        mo = re.match(r"\s+(\d+)$", line)        assert mo, (lineno, line)        start = int(mo.group(1))        assert start in self.number2symbol, (lineno, line)        self.start = start        lineno, line = lineno+1, next(f)        assert line == "};\n", (lineno, line)        try:            lineno, line = lineno+1, next(f)        except StopIteration:            pass        else:            assert 0, (lineno, line)     def finish_off(self):        """Create additional useful structures.  (Internal)."""        self.keywords = {} # map from keyword strings to arc labels        self.tokens = {}   # map from numeric token values to arc labels        for ilabel, (type, value) in enumerate(self.labels):            if type == token.NAME and value is not None:                self.keywords[value] = ilabel            elif value is None:                self.tokens[type] = ilabel