misc/coding_tools/check-miscaptures.py

   1 #! /usr/bin/python
   2
   3 import os, sys, compiler
   4 from compiler.ast import Node, For, While, ListComp, AssName, Name, Lambda, Function
   5
   6
   7 def check_source(source):
   8     return check_thing(compiler.parse, source)
   9
  10 def check_file(path):
  11     return check_thing(compiler.parseFile, path)
  12
  13 def check_thing(parser, thing):
  14     try:
  15         ast = parser(thing)
  16     except SyntaxError, e:
  17         return [e]
  18     else:
  19         results = []
  20         check_ast(ast, results)
  21         return results
  22
  23 def check_ast(ast, results):
  24     """Check a node outside a loop."""
  25     if isinstance(ast, (For, While, ListComp)):
  26         check_loop(ast, results)
  27     else:
  28         for child in ast.getChildNodes():
  29             if isinstance(ast, Node):
  30                 check_ast(child, results)
  31
  32 def check_loop(ast, results):
  33     """Check a particular outer loop."""
  34
  35     # List comprehensions have a poorly designed AST of the form
  36     # ListComp(exprNode, [ListCompFor(...), ...]), in which the
  37     # result expression is outside the ListCompFor node even though
  38     # it is logically inside the loop(s).
  39     # There may be multiple ListCompFor nodes (in cases such as
  40     #   [lambda: (a,b) for a in ... for b in ...]
  41     # ), and that case they are not nested in the AST. But these
  42     # warts (nonobviously) happen not to matter for our analysis.
  43
  44     assigned = {}  # maps name to lineno of topmost assignment
  45     nested = set()
  46     collect_assigned_and_nested(ast, assigned, nested)
  47
  48     # For each nested function...
  49     for funcnode in nested:
  50         # Check for captured variables in this function.
  51         captured = set()
  52         collect_captured(funcnode, assigned, captured, False)
  53         for name in captured:
  54             # We want to report the outermost capturing function
  55             # (since that is where the workaround will need to be
  56             # added), and the topmost assignment to the variable.
  57             # Just one report per capturing function per variable
  58             # will do.
  59             results.append(make_result(funcnode, name, assigned[name]))
  60
  61         # Check each node in the function body in case it
  62         # contains another 'for' loop.
  63         childnodes = funcnode.getChildNodes()[len(funcnode.defaults):]
  64         for child in childnodes:
  65             check_ast(funcnode, results)
  66
  67 def collect_assigned_and_nested(ast, assigned, nested):
  68     """
  69     Collect the names assigned in this loop, not including names
  70     assigned in nested functions. Also collect the nodes of functions
  71     that are nested one level deep.
  72     """
  73     if isinstance(ast, AssName):
  74         if ast.name not in assigned or assigned[ast.name] > ast.lineno:
  75             assigned[ast.name] = ast.lineno
  76     else:
  77         childnodes = ast.getChildNodes()
  78         if isinstance(ast, (Lambda, Function)):
  79             nested.add(ast)
  80
  81             # The default argument expressions are "outside" the
  82             # function, even though they are children of the
  83             # Lambda or Function node.
  84             childnodes = childnodes[:len(ast.defaults)]
  85
  86         for child in childnodes:
  87             if isinstance(ast, Node):
  88                 collect_assigned_and_nested(child, assigned, nested)
  89
  90 def collect_captured(ast, assigned, captured, in_function_yet):
  91     """Collect any captured variables that are also in assigned."""
  92     if isinstance(ast, Name):
  93         if ast.name in assigned:
  94             captured.add(ast.name)
  95     else:
  96         childnodes = ast.getChildNodes()
  97         if isinstance(ast, (Lambda, Function)):
  98             # Formal parameters of the function are excluded from
  99             # captures we care about in subnodes of the function body.
 100             new_assigned = assigned.copy()
 101             remove_argnames(ast.argnames, new_assigned)
 102
 103             if len(new_assigned) > 0:
 104                 for child in childnodes[len(ast.defaults):]:
 105                     collect_captured(child, new_assigned, captured, True)
 106
 107             # The default argument expressions are "outside" *this*
 108             # function, even though they are children of the Lambda or
 109             # Function node.
 110             if not in_function_yet:
 111                 return
 112             childnodes = childnodes[:len(ast.defaults)]
 113
 114         for child in childnodes:
 115             if isinstance(ast, Node):
 116                 collect_captured(child, assigned, captured, True)
 117
 118
 119 def remove_argnames(names, fromset):
 120     for element in names:
 121         if element in fromset:
 122             del fromset[element]
 123         elif isinstance(element, (tuple, list)):
 124             remove_argnames(element, fromset)
 125
 126
 127 def make_result(funcnode, var_name, var_lineno):
 128     if hasattr(funcnode, 'name'):
 129         func_name = 'function %r' % (funcnode.name,)
 130     else:
 131         func_name = '<lambda>'
 132     return (funcnode.lineno, func_name, var_name, var_lineno)
 133
 134 def report(out, path, results):
 135     for r in results:
 136         if isinstance(r, SyntaxError):
 137             print >>out, path + (" NOT ANALYSED due to syntax error: %s" % r)
 138         else:
 139             print >>out, path + (":%r %s captures %r assigned at line %d" % r)
 140
 141 def check(sources, out):
 142     class Counts:
 143         n = 0
 144         processed_files = 0
 145         suspect_files = 0
 146     counts = Counts()
 147
 148     def _process(path):
 149         results = check_file(path)
 150         report(out, path, results)
 151         counts.n += len(results)
 152         counts.processed_files += 1
 153         if len(results) > 0:
 154             counts.suspect_files += 1
 155
 156     for source in sources:
 157         print >>out, "Checking %s..." % (source,)
 158         if os.path.isfile(source):
 159             _process(source)
 160         else:
 161             for (dirpath, dirnames, filenames) in os.walk(source):
 162                 for fn in filenames:
 163                     (basename, ext) = os.path.splitext(fn)
 164                     if ext == '.py':
 165                         _process(os.path.join(dirpath, fn))
 166
 167     print >>out, ("%d suspiciously captured variables in %d out of %d files"
 168                   % (counts.n, counts.suspect_files, counts.processed_files))
 169     return counts.n
 170
 171
 172 sources = ['src']
 173 if len(sys.argv) > 1:
 174     sources = sys.argv[1:]
 175 if check(sources, sys.stderr) > 0:
 176     sys.exit(1)
 177
 178
 179 # TODO: self-tests