@@ -16,13 +16,13 @@ def __init__(self, root_node, tokens, lang = "python"):
1616 self ._ast_nodes = {} # Nodes indexed by an AST node
1717 self ._anonymous_nodes = [] # Unindexed nodes, can only be indexed by traversal
1818
19- self .root_node = self .add_node (root_node )
20-
2119 prev_token = self ._add_token (tokens [0 ])
2220 for token in tokens [1 :]:
2321 token_node = self ._add_token (token )
2422 prev_token .add_successor (token_node , "next_token" )
2523 prev_token = token_node
24+
25+ self .root_node = self .add_or_get_node (root_node )
2626
2727 # Helper methods --------------------------------
2828
@@ -61,6 +61,9 @@ def add_node(self, node):
6161 return self ._add_ast_node (node )
6262
6363 def add_or_get_node (self , node ):
64+ if isinstance (node , SyntaxNode ):
65+ return self .add_or_get_node (node .ast_node )
66+
6467 if isinstance (node , Node ): return node
6568 try :
6669 return self ._add_ast_node (node )
@@ -74,6 +77,9 @@ def add_relation(self, source_node, target_node, relation = "ast"):
7477
7578 # API GET methods-----------------------------------------
7679
80+ def has_node (self , ast_node ):
81+ return node_key (ast_node ) in self ._ast_nodes
82+
7783 def nodes (self ):
7884 return chain (self ._ast_nodes .values (), self ._anonymous_nodes )
7985
@@ -88,6 +94,17 @@ def todot(self, file_name = None, edge_colors = None):
8894 dotwriter .run (f )
8995 f .seek (0 )
9096 return f .read ()
97+
98+ def tokens_only (self ):
99+ """
100+ Computes a graph containing only tokens
101+
102+ Any edges of inner nodes will be propagated down to leaves.
103+ The first token related to an inner node acts as an representant
104+
105+ """
106+ return graph_to_tokens_only (self )
107+
91108
92109 # Internal GET methods -----------------------------------
93110
@@ -290,4 +307,67 @@ def escape(token):
290307
291308 # Cleanup
292309 for src_node in self .graph :
293- del src_node ._dot_node_id
310+ del src_node ._dot_node_id
311+
312+
313+ # Propagate to leaves ----------------------------------------------------------------
314+
315+ def _compute_representer (graph ):
316+ representer = {}
317+
318+ root_node = graph .root_node
319+ queue = [root_node ]
320+
321+ while len (queue ) > 0 :
322+ current_node = queue .pop (- 1 )
323+
324+ path = []
325+ while not hasattr (current_node , "token" ):
326+ path .append (current_node )
327+ syntax_node = current_node .ast_node
328+ children = [graph .add_or_get_node (c )
329+ for c in syntax_node .children
330+ if graph .has_node (c )]
331+ if len (children ) == 0 : break
332+ first , * others = children
333+ queue .extend (others )
334+ current_node = first
335+
336+ for r in path : representer [r ] = current_node
337+
338+ return representer
339+
340+
341+ SYNTAX_TYPES = {"child" , "sibling" }
342+
343+ def graph_to_tokens_only (graph ):
344+ representers = _compute_representer (graph )
345+ tokens = graph .tokens
346+
347+ output = CodeGraph (tokens [0 ].ast_node , tokens , lang = graph .lang )
348+
349+ for token in graph .tokens :
350+ if not hasattr (token , "ast_node" ): continue
351+ token_node = graph .add_or_get_node (token .ast_node )
352+ output_node = output .add_or_get_node (token .ast_node )
353+
354+ for _ , edge_type , successor in token_node .successors ():
355+ if edge_type in SYNTAX_TYPES : continue
356+ if not hasattr (successor , "token" ): continue
357+ output_succ = output .add_or_get_node (successor .ast_node )
358+ output .add_relation (output_node , output_succ , edge_type )
359+
360+
361+ for node , representer in representers .items ():
362+ output_representer = output .add_or_get_node (representer .ast_node )
363+
364+ for _ , edge_type , successor in node .successors ():
365+ if edge_type in SYNTAX_TYPES : continue
366+ if successor not in representers : continue
367+ successor_representer = representers [successor ]
368+ output_successor_representer = output .add_or_get_node (successor_representer .ast_node )
369+ output .add_relation (output_representer ,
370+ output_successor_representer ,
371+ edge_type )
372+
373+ return output
0 commit comments