]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - misc/coding_tools/check-miscaptures.py
check-miscaptures.py: Python doesn't really have declarations; report the topmost...
[tahoe-lafs/tahoe-lafs.git] / misc / coding_tools / check-miscaptures.py
1 #! /usr/bin/python
2
3 import os, sys, compiler
4 from compiler.ast import Node, For, While, ListComp, AssName, Name, Lambda, Function
5
6
7 def check_source(source):
8     return check_thing(compiler.parse, source)
9
10 def check_file(path):
11     return check_thing(compiler.parseFile, path)
12
13 def check_thing(parser, thing):
14     try:
15         ast = parser(thing)
16     except SyntaxError, e:
17         return [e]
18     else:
19         results = []
20         check_ast(ast, results)
21         return results
22
23 def check_ast(ast, results):
24     """Check a node outside a loop."""
25     if isinstance(ast, (For, While, ListComp)):
26         check_loop(ast, results)
27     else:
28         for child in ast.getChildNodes():
29             if isinstance(ast, Node):
30                 check_ast(child, results)
31
32 def check_loop(ast, results):
33     """Check a particular outer loop."""
34
35     # List comprehensions have a poorly designed AST of the form
36     # ListComp(exprNode, [ListCompFor(...), ...]), in which the
37     # result expression is outside the ListCompFor node even though
38     # it is logically inside the loop(s).
39     # There may be multiple ListCompFor nodes (in cases such as
40     #   [lambda: (a,b) for a in ... for b in ...]
41     # ), and that case they are not nested in the AST. But these
42     # warts (nonobviously) happen not to matter for our analysis.
43
44     assigned = {}  # maps name to lineno of topmost assignment
45     nested = set()
46     collect_assigned_and_nested(ast, assigned, nested)
47
48     # For each nested function...
49     for funcnode in nested:
50         # Check for captured variables in this function.
51         captured = set()
52         collect_captured(funcnode, assigned, captured)
53         for name in captured:
54             # We want to report the outermost capturing function
55             # (since that is where the workaround will need to be
56             # added), and the topmost assignment to the variable.
57             # Just one report per capturing function per variable
58             # will do.
59             results.append(make_result(funcnode, name, assigned[name]))
60
61         # Check each node in the function body in case it
62         # contains another 'for' loop.
63         childnodes = funcnode.getChildNodes()[len(funcnode.defaults):]
64         for child in childnodes:
65             check_ast(funcnode, results)
66
67 def collect_assigned_and_nested(ast, assigned, nested):
68     """
69     Collect the names assigned in this loop, not including names
70     assigned in nested functions. Also collect the nodes of functions
71     that are nested one level deep.
72     """
73     if isinstance(ast, AssName):
74         if ast.name not in assigned or assigned[ast.name] > ast.lineno:
75             assigned[ast.name] = ast.lineno
76     else:
77         childnodes = ast.getChildNodes()
78         if isinstance(ast, (Lambda, Function)):
79             nested.add(ast)
80
81             # The default argument expressions are "outside" the
82             # function, even though they are children of the
83             # Lambda or Function node.
84             childnodes = childnodes[:len(ast.defaults)]
85
86         for child in childnodes:
87             if isinstance(ast, Node):
88                 collect_assigned_and_nested(child, assigned, nested)
89
90 def collect_captured(ast, assigned, captured):
91     """Collect any captured variables that are also in assigned."""
92     if isinstance(ast, Name):
93         if ast.name in assigned:
94             captured.add(ast.name)
95     else:
96         childnodes = ast.getChildNodes()
97         if isinstance(ast, (Lambda, Function)):
98             # Formal parameters of the function are excluded from
99             # captures we care about in subnodes of the function body.
100             new_assigned = assigned.copy()
101             remove_argnames(ast.argnames, new_assigned)
102
103             for child in childnodes[len(ast.defaults):]:
104                 collect_captured(child, assigned, captured)
105
106             # The default argument expressions are "outside" the
107             # function, even though they are children of the
108             # Lambda or Function node.
109             childnodes = childnodes[:len(ast.defaults)]
110
111         for child in childnodes:
112             if isinstance(ast, Node):
113                 collect_captured(child, assigned, captured)
114
115
116 def remove_argnames(names, fromset):
117     for element in names:
118         if element in fromset:
119             del fromset[element]
120         elif isinstance(element, (tuple, list)):
121             remove_argnames(element, fromset)
122
123
124 def make_result(funcnode, var_name, var_lineno):
125     if hasattr(funcnode, 'name'):
126         func_name = 'function %r' % (funcnode.name,)
127     else:
128         func_name = '<lambda>'
129     return (funcnode.lineno, func_name, var_name, var_lineno)
130
131 def report(out, path, results):
132     for r in results:
133         if isinstance(r, SyntaxError):
134             print >>out, path + (" NOT ANALYSED due to syntax error: %s" % r)
135         else:
136             print >>out, path + (":%r %s captures %r assigned at line %d" % r)
137
138 def check(sources, out):
139     class Counts:
140         n = 0
141         processed_files = 0
142         suspect_files = 0
143     counts = Counts()
144
145     def _process(path):
146         results = check_file(path)
147         report(out, path, results)
148         counts.n += len(results)
149         counts.processed_files += 1
150         if len(results) > 0:
151             counts.suspect_files += 1
152
153     for source in sources:
154         print >>out, "Checking %s..." % (source,)
155         if os.path.isfile(source):
156             _process(source)
157         else:
158             for (dirpath, dirnames, filenames) in os.walk(source):
159                 for fn in filenames:
160                     (basename, ext) = os.path.splitext(fn)
161                     if ext == '.py':
162                         _process(os.path.join(dirpath, fn))
163
164     print >>out, ("%d suspiciously captured variables in %d out of %d files"
165                   % (counts.n, counts.suspect_files, counts.processed_files))
166     return counts.n
167
168
169 sources = ['src']
170 if len(sys.argv) > 1:
171     sources = sys.argv[1:]
172 if check(sources, sys.stderr) > 0:
173     sys.exit(1)
174
175
176 # TODO: self-tests