From: david-sarah Date: Fri, 7 Oct 2011 07:41:21 +0000 (+0000) Subject: Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables... X-Git-Url: https://git.rkrishnan.org/simplejson/components/%22file:/something?a=commitdiff_plain;h=be1c94893fa08ec87077ef2a944088a3a328ad31;p=tahoe-lafs%2Ftahoe-lafs.git Add misc/coding_tools/check-miscaptures.py to detect incorrect captures of variables declared in a for loop, and a 'make check-miscaptures' Makefile target to run it. (It is also run by 'make code-checks'.) This is a rewritten version that reports much fewer false positives, by determining captured variables more accurately. fixes #1555 --- diff --git a/Makefile b/Makefile index f1fd3a9b..0fffb5f4 100644 --- a/Makefile +++ b/Makefile @@ -121,7 +121,7 @@ upload-coverage: false endif -code-checks: build version-and-path check-interfaces -find-trailing-spaces -check-umids pyflakes +code-checks: build version-and-path check-interfaces check-miscaptures -find-trailing-spaces -check-umids pyflakes version-and-path: $(TAHOE) --version-and-path @@ -130,6 +130,10 @@ check-interfaces: $(TAHOE) @misc/coding_tools/check-interfaces.py 2>&1 |tee violations.txt @echo +check-miscaptures: + $(PYTHON) misc/coding_tools/check-miscaptures.py $(SOURCES) 2>&1 |tee miscaptures.txt + @echo + pyflakes: $(PYTHON) -OOu `which pyflakes` $(SOURCES) |sort |uniq @echo diff --git a/misc/coding_tools/check-miscaptures.py b/misc/coding_tools/check-miscaptures.py new file mode 100644 index 00000000..f7950a92 --- /dev/null +++ b/misc/coding_tools/check-miscaptures.py @@ -0,0 +1,160 @@ +#! /usr/bin/python + +import os, sys, compiler, traceback +from compiler.ast import Node, For, AssName, Name, Lambda, Function + + +def check_source(source): + return check_thing(compiler.parse, source) + +def check_file(path): + return check_thing(compiler.parseFile, path) + +def check_thing(parser, thing): + try: + ast = parser(thing) + except SyntaxError, e: + return [e] + else: + results = [] + check_ast(ast, results) + return results + +def check_ast(ast, results): + """Check a node outside a 'for' loop.""" + if isinstance(ast, For): + check_for(ast, results) + else: + for child in ast.getChildNodes(): + if isinstance(ast, Node): + check_ast(child, results) + +def check_for(ast, results): + """Check a particular outer 'for' loop.""" + + declared = {} # maps name to lineno of declaration + nested = set() + collect_declared_and_nested(ast, declared, nested) + + # For each nested function... + for funcnode in nested: + # Check for captured variables in this function. + captured = set() + collect_captured(funcnode, declared, captured) + for name in captured: + # We want to report the outermost capturing function + # (since that is where the workaround will need to be + # added), and the variable declaration. Just one report + # per capturing function per variable will do. + results.append(make_result(funcnode, name, declared[name])) + + # Check each node in the function body in case it + # contains another 'for' loop. + childnodes = funcnode.getChildNodes()[len(funcnode.defaults):] + for child in childnodes: + check_ast(funcnode, results) + +def collect_declared_and_nested(ast, declared, nested): + """ + Collect the names declared in this 'for' loop, not including + names declared in nested functions. Also collect the nodes of + functions that are nested one level deep. + """ + if isinstance(ast, AssName): + declared[ast.name] = ast.lineno + else: + childnodes = ast.getChildNodes() + if isinstance(ast, (Lambda, Function)): + nested.add(ast) + + # The default argument expressions are "outside" the + # function, even though they are children of the + # Lambda or Function node. + childnodes = childnodes[:len(ast.defaults)] + + for child in childnodes: + if isinstance(ast, Node): + collect_declared_and_nested(child, declared, nested) + +def collect_captured(ast, declared, captured): + """Collect any captured variables that are also in declared.""" + if isinstance(ast, Name): + if ast.name in declared: + captured.add(ast.name) + else: + childnodes = ast.getChildNodes() + + if isinstance(ast, (Lambda, Function)): + # Formal parameters of the function are excluded from + # captures we care about in subnodes of the function body. + declared = declared.copy() + for argname in ast.argnames: + if argname in declared: + del declared[argname] + + for child in childnodes[len(ast.defaults):]: + collect_captured(child, declared, captured) + + # The default argument expressions are "outside" the + # function, even though they are children of the + # Lambda or Function node. + childnodes = childnodes[:len(ast.defaults)] + + for child in childnodes: + if isinstance(ast, Node): + collect_captured(child, declared, captured) + + +def make_result(funcnode, var_name, var_lineno): + if hasattr(funcnode, 'name'): + func_name = 'function %r' % (funcnode.name,) + else: + func_name = '' + return (funcnode.lineno, func_name, var_name, var_lineno) + +def report(out, path, results): + for r in results: + if isinstance(r, SyntaxError): + print >>out, path + (" NOT ANALYSED due to syntax error: %s" % r) + else: + print >>out, path + (":%r %s captures %r declared at line %d" % r) + +def check(sources, out): + class Counts: + n = 0 + processed_files = 0 + suspect_files = 0 + counts = Counts() + + def _process(path): + results = check_file(path) + report(out, path, results) + counts.n += len(results) + counts.processed_files += 1 + if len(results) > 0: + counts.suspect_files += 1 + + for source in sources: + print >>out, "Checking %s..." % (source,) + if os.path.isfile(source): + _process(source) + else: + for (dirpath, dirnames, filenames) in os.walk(source): + for fn in filenames: + (basename, ext) = os.path.splitext(fn) + if ext == '.py': + _process(os.path.join(dirpath, fn)) + + print >>out, ("%d suspiciously captured variables in %d out of %d files" + % (counts.n, counts.suspect_files, counts.processed_files)) + return counts.n + + +sources = ['src'] +if len(sys.argv) > 1: + sources = sys.argv[1:] +if check(sources, sys.stderr) > 0: + sys.exit(1) + + +# TODO: self-tests