File Explorer

/proc/thread-self/root/proc/thread-self/root/lib64/python3.9/lib2to3

This explorer reads the filesystem of the server it runs on, so /workspace/user isn't present here. Browsing and the terminal still work against this server's own disk from /.

btm_matcher.py6.5 KB · 164 lines
"""A bottom-up tree matching algorithm implementation meant to speedup 2to3's matching process. After the tree patterns are reduced totheir rarest linear path, a linear Aho-Corasick automaton iscreated. The linear automaton traverses the linear paths from theleaves to the root of the AST and returns a set of nodes for furthermatching. This reduces significantly the number of candidate nodes.""" __author__ = "George Boutsioukis <gboutsioukis@gmail.com>" import loggingimport itertoolsfrom collections import defaultdict from . import pytreefrom .btm_utils import reduce_tree class BMNode(object):    """Class for a node of the Aho-Corasick automaton used in matching"""    count = itertools.count()    def __init__(self):        self.transition_table = {}        self.fixers = []        self.id = next(BMNode.count)        self.content = '' class BottomMatcher(object):    """The main matcher class. After instantiating the patterns should    be added using the add_fixer method"""     def __init__(self):        self.match = set()        self.root = BMNode()        self.nodes = [self.root]        self.fixers = []        self.logger = logging.getLogger("RefactoringTool")     def add_fixer(self, fixer):        """Reduces a fixer's pattern tree to a linear path and adds it        to the matcher(a common Aho-Corasick automaton). The fixer is        appended on the matching states and called when they are        reached"""        self.fixers.append(fixer)        tree = reduce_tree(fixer.pattern_tree)        linear = tree.get_linear_subpattern()        match_nodes = self.add(linear, start=self.root)        for match_node in match_nodes:            match_node.fixers.append(fixer)     def add(self, pattern, start):        "Recursively adds a linear pattern to the AC automaton"        #print("adding pattern", pattern, "to", start)        if not pattern:            #print("empty pattern")            return [start]        if isinstance(pattern[0], tuple):            #alternatives            #print("alternatives")            match_nodes = []            for alternative in pattern[0]:                #add all alternatives, and add the rest of the pattern                #to each end node                end_nodes = self.add(alternative, start=start)                for end in end_nodes:                    match_nodes.extend(self.add(pattern[1:], end))            return match_nodes        else:            #single token            #not last            if pattern[0] not in start.transition_table:                #transition did not exist, create new                next_node = BMNode()                start.transition_table[pattern[0]] = next_node            else:                #transition exists already, follow                next_node = start.transition_table[pattern[0]]             if pattern[1:]:                end_nodes = self.add(pattern[1:], start=next_node)            else:                end_nodes = [next_node]            return end_nodes     def run(self, leaves):        """The main interface with the bottom matcher. The tree is        traversed from the bottom using the constructed        automaton. Nodes are only checked once as the tree is        retraversed. When the automaton fails, we give it one more        shot(in case the above tree matches as a whole with the        rejected leaf), then we break for the next leaf. There is the        special case of multiple arguments(see code comments) where we        recheck the nodes         Args:           The leaves of the AST tree to be matched         Returns:           A dictionary of node matches with fixers as the keys        """        current_ac_node = self.root        results = defaultdict(list)        for leaf in leaves:            current_ast_node = leaf            while current_ast_node:                current_ast_node.was_checked = True                for child in current_ast_node.children:                    # multiple statements, recheck                    if isinstance(child, pytree.Leaf) and child.value == ";":                        current_ast_node.was_checked = False                        break                if current_ast_node.type == 1:                    #name                    node_token = current_ast_node.value                else:                    node_token = current_ast_node.type                 if node_token in current_ac_node.transition_table:                    #token matches                    current_ac_node = current_ac_node.transition_table[node_token]                    for fixer in current_ac_node.fixers:                        results[fixer].append(current_ast_node)                else:                    #matching failed, reset automaton                    current_ac_node = self.root                    if (current_ast_node.parent is not None                        and current_ast_node.parent.was_checked):                        #the rest of the tree upwards has been checked, next leaf                        break                     #recheck the rejected node once from the root                    if node_token in current_ac_node.transition_table:                        #token matches                        current_ac_node = current_ac_node.transition_table[node_token]                        for fixer in current_ac_node.fixers:                            results[fixer].append(current_ast_node)                 current_ast_node = current_ast_node.parent        return results     def print_ac(self):        "Prints a graphviz diagram of the BM automaton(for debugging)"        print("digraph g{")        def print_node(node):            for subnode_key in node.transition_table.keys():                subnode = node.transition_table[subnode_key]                print("%d -> %d [label=%s] //%s" %                      (node.id, subnode.id, type_repr(subnode_key), str(subnode.fixers)))                if subnode_key == 1:                    print(subnode.content)                print_node(subnode)        print_node(self.root)        print("}") # taken from pytree.py for debugging; only used by print_ac_type_reprs = {}def type_repr(type_num):    global _type_reprs    if not _type_reprs:        from .pygram import python_symbols        # printing tokens is possible but not as useful        # from .pgen2 import token // token.__dict__.items():        for name, val in python_symbols.__dict__.items():            if type(val) == int: _type_reprs[val] = name    return _type_reprs.setdefault(type_num, type_num)