]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - misc/coding_tools/check-miscaptures.py
Merge pull request #236 from daira/2725.timezone-test.0
[tahoe-lafs/tahoe-lafs.git] / misc / coding_tools / check-miscaptures.py
1 #! /usr/bin/python
2
3 import os, sys, compiler
4 from compiler.ast import Node, For, While, ListComp, AssName, Name, Lambda, Function
5
6
7 def check_source(source):
8     return check_thing(compiler.parse, source)
9
10 def check_file(path):
11     return check_thing(compiler.parseFile, path)
12
13 def check_thing(parser, thing):
14     try:
15         ast = parser(thing)
16     except SyntaxError, e:
17         return e
18     else:
19         results = []
20         check_ast(ast, results)
21         return results
22
23 def check_ast(ast, results):
24     """Check a node outside a loop."""
25     if isinstance(ast, (For, While, ListComp)):
26         check_loop(ast, results)
27     else:
28         for child in ast.getChildNodes():
29             if isinstance(ast, Node):
30                 check_ast(child, results)
31
32 def check_loop(ast, results):
33     """Check a particular outer loop."""
34
35     # List comprehensions have a poorly designed AST of the form
36     # ListComp(exprNode, [ListCompFor(...), ...]), in which the
37     # result expression is outside the ListCompFor node even though
38     # it is logically inside the loop(s).
39     # There may be multiple ListCompFor nodes (in cases such as
40     #   [lambda: (a,b) for a in ... for b in ...]
41     # ), and that case they are not nested in the AST. But these
42     # warts (nonobviously) happen not to matter for our analysis.
43
44     assigned = {}  # maps name to lineno of topmost assignment
45     nested = set()
46     collect_assigned_and_nested(ast, assigned, nested)
47
48     # For each nested function...
49     for funcnode in nested:
50         # Check for captured variables in this function.
51         captured = set()
52         collect_captured(funcnode, assigned, captured, False)
53         for name in captured:
54             # We want to report the outermost capturing function
55             # (since that is where the workaround will need to be
56             # added), and the topmost assignment to the variable.
57             # Just one report per capturing function per variable
58             # will do.
59             results.append(make_result(funcnode, name, assigned[name]))
60
61         # Check each node in the function body in case it
62         # contains another 'for' loop.
63         childnodes = funcnode.getChildNodes()[len(funcnode.defaults):]
64         for child in childnodes:
65             check_ast(funcnode, results)
66
67 def collect_assigned_and_nested(ast, assigned, nested):
68     """
69     Collect the names assigned in this loop, not including names
70     assigned in nested functions. Also collect the nodes of functions
71     that are nested one level deep.
72     """
73     if isinstance(ast, AssName):
74         if ast.name not in assigned or assigned[ast.name] > ast.lineno:
75             assigned[ast.name] = ast.lineno
76     else:
77         childnodes = ast.getChildNodes()
78         if isinstance(ast, (Lambda, Function)):
79             nested.add(ast)
80
81             # The default argument expressions are "outside" the
82             # function, even though they are children of the
83             # Lambda or Function node.
84             childnodes = childnodes[:len(ast.defaults)]
85
86         for child in childnodes:
87             if isinstance(ast, Node):
88                 collect_assigned_and_nested(child, assigned, nested)
89
90 def collect_captured(ast, assigned, captured, in_function_yet):
91     """Collect any captured variables that are also in assigned."""
92     if isinstance(ast, Name):
93         if ast.name in assigned:
94             captured.add(ast.name)
95     else:
96         childnodes = ast.getChildNodes()
97         if isinstance(ast, (Lambda, Function)):
98             # Formal parameters of the function are excluded from
99             # captures we care about in subnodes of the function body.
100             new_assigned = assigned.copy()
101             remove_argnames(ast.argnames, new_assigned)
102
103             if len(new_assigned) > 0:
104                 for child in childnodes[len(ast.defaults):]:
105                     collect_captured(child, new_assigned, captured, True)
106
107             # The default argument expressions are "outside" *this*
108             # function, even though they are children of the Lambda or
109             # Function node.
110             if not in_function_yet:
111                 return
112             childnodes = childnodes[:len(ast.defaults)]
113
114         for child in childnodes:
115             if isinstance(ast, Node):
116                 collect_captured(child, assigned, captured, True)
117
118
119 def remove_argnames(names, fromset):
120     for element in names:
121         if element in fromset:
122             del fromset[element]
123         elif isinstance(element, (tuple, list)):
124             remove_argnames(element, fromset)
125
126
127 def make_result(funcnode, var_name, var_lineno):
128     if hasattr(funcnode, 'name'):
129         func_name = 'function %r' % (funcnode.name,)
130     else:
131         func_name = '<lambda>'
132     return (funcnode.lineno, func_name, var_name, var_lineno)
133
134 def report(out, path, results):
135     for r in results:
136         print >>out, path + (":%r %s captures %r assigned at line %d" % r)
137
138 def check(sources, out):
139     class Counts:
140         n = 0
141         processed_files = 0
142         suspect_files = 0
143         error_files = 0
144     counts = Counts()
145
146     def _process(path):
147         results = check_file(path)
148         if isinstance(results, SyntaxError):
149             print >>out, path + (" NOT ANALYSED due to syntax error: %s" % results)
150             counts.error_files += 1
151         else:
152             report(out, path, results)
153             counts.n += len(results)
154             counts.processed_files += 1
155             if len(results) > 0:
156                 counts.suspect_files += 1
157
158     for source in sources:
159         print >>out, "Checking %s..." % (source,)
160         if os.path.isfile(source):
161             _process(source)
162         else:
163             for (dirpath, dirnames, filenames) in os.walk(source):
164                 for fn in filenames:
165                     (basename, ext) = os.path.splitext(fn)
166                     if ext == '.py':
167                         _process(os.path.join(dirpath, fn))
168
169     print >>out, ("%d suspiciously captured variables in %d out of %d file(s)."
170                   % (counts.n, counts.suspect_files, counts.processed_files))
171     if counts.error_files > 0:
172         print >>out, ("%d file(s) not processed due to syntax errors."
173                       % (counts.error_files,))
174     return counts.n
175
176
177 sources = ['src']
178 if len(sys.argv) > 1:
179     sources = sys.argv[1:]
180 if check(sources, sys.stderr) > 0:
181     sys.exit(1)
182
183
184 # TODO: self-tests