From: david-sarah Date: Wed, 29 Jun 2011 18:53:56 +0000 (-0700) Subject: encodingutil: argv and output encodings are always the same on all platforms. Lose... X-Git-Url: https://git.rkrishnan.org/pf/content/simplejson/about.html?a=commitdiff_plain;h=f9d218c6736cf3891f2d6db8fe1ea9558eb4e717;p=tahoe-lafs%2Ftahoe-lafs.git encodingutil: argv and output encodings are always the same on all platforms. Lose the unnecessary generality of them being different. fixes #1120 --- diff --git a/src/allmydata/scripts/runner.py b/src/allmydata/scripts/runner.py index 50f2f07a..3147fb5a 100644 --- a/src/allmydata/scripts/runner.py +++ b/src/allmydata/scripts/runner.py @@ -6,7 +6,7 @@ from twisted.python import usage from allmydata.scripts.common import BaseOptions from allmydata.scripts import debug, create_node, startstop_node, cli, keygen, stats_gatherer -from allmydata.util.encodingutil import quote_output, get_argv_encoding +from allmydata.util.encodingutil import quote_output, get_io_encoding def GROUP(s): # Usage.parseOptions compares argv[1] against command[0], so it will @@ -73,7 +73,7 @@ def runner(argv, c = c.subOptions print >>stdout, str(c) try: - msg = e.args[0].decode(get_argv_encoding()) + msg = e.args[0].decode(get_io_encoding()) except Exception: msg = repr(e) print >>stdout, "%s: %s\n" % (sys.argv[0], quote_output(msg, quotemarks=False)) diff --git a/src/allmydata/test/test_cli.py b/src/allmydata/test/test_cli.py index 254d702c..2dbbba27 100644 --- a/src/allmydata/test/test_cli.py +++ b/src/allmydata/test/test_cli.py @@ -32,7 +32,7 @@ from twisted.python import usage from allmydata.util.assertutil import precondition from allmydata.util.encodingutil import listdir_unicode, unicode_platform, \ - quote_output, get_output_encoding, get_argv_encoding, get_filesystem_encoding, \ + quote_output, get_io_encoding, get_filesystem_encoding, \ unicode_to_output, unicode_to_argv, to_str from allmydata.util.fileutil import abspath_expanduser_unicode @@ -651,8 +651,8 @@ class CreateAlias(GridTestMixin, CLITestMixin, unittest.TestCase): self.set_up_grid() try: - etudes_arg = u"\u00E9tudes".encode(get_argv_encoding()) - lumiere_arg = u"lumi\u00E8re.txt".encode(get_argv_encoding()) + etudes_arg = u"\u00E9tudes".encode(get_io_encoding()) + lumiere_arg = u"lumi\u00E8re.txt".encode(get_io_encoding()) except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII command argument could not be encoded on this platform.") @@ -980,7 +980,7 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase): # tahoe put "\u00E0 trier.txt" "\u00E0 trier.txt" try: - a_trier_arg = u"\u00E0 trier.txt".encode(get_argv_encoding()) + a_trier_arg = u"\u00E0 trier.txt".encode(get_io_encoding()) except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII command argument could not be encoded on this platform.") @@ -997,7 +997,7 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase): d = self.do_cli("create-alias", "tahoe") d.addCallback(lambda res: - self.do_cli("put", rel_fn.encode(get_argv_encoding()), a_trier_arg)) + self.do_cli("put", rel_fn.encode(get_io_encoding()), a_trier_arg)) def _uploaded((rc, out, err)): readcap = out.strip() self.failUnless(readcap.startswith("URI:LIT:"), readcap) @@ -1022,12 +1022,12 @@ class List(GridTestMixin, CLITestMixin, unittest.TestCase): # u"g\u00F6\u00F6d" might not be representable in the argv and/or output encodings. # It is initially included in the directory in any case. try: - good_arg = u"g\u00F6\u00F6d".encode(get_argv_encoding()) + good_arg = u"g\u00F6\u00F6d".encode(get_io_encoding()) except UnicodeEncodeError: good_arg = None try: - good_out = u"g\u00F6\u00F6d".encode(get_output_encoding()) + good_out = u"g\u00F6\u00F6d".encode(get_io_encoding()) except UnicodeEncodeError: good_out = None @@ -1393,8 +1393,8 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): fn1 = os.path.join(unicode(self.basedir), u"\u00C4rtonwall") try: - fn1_arg = fn1.encode(get_argv_encoding()) - artonwall_arg = u"\u00C4rtonwall".encode(get_argv_encoding()) + fn1_arg = fn1.encode(get_io_encoding()) + artonwall_arg = u"\u00C4rtonwall".encode(get_io_encoding()) except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII command argument could not be encoded on this platform.") @@ -1432,7 +1432,7 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): self.failUnlessIn("files whose names could not be converted", err) else: self.failUnlessReallyEqual(rc, 0) - self.failUnlessReallyEqual(out.decode(get_output_encoding()), u"Metallica\n\u00C4rtonwall\n") + self.failUnlessReallyEqual(out.decode(get_io_encoding()), u"Metallica\n\u00C4rtonwall\n") self.failUnlessReallyEqual(err, "") d.addCallback(_check) @@ -1550,9 +1550,9 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): fn1 = os.path.join(unicode(self.basedir), u"\u00C4rtonwall") try: - fn1_arg = fn1.encode(get_argv_encoding()) + fn1_arg = fn1.encode(get_io_encoding()) del fn1_arg # hush pyflakes - artonwall_arg = u"\u00C4rtonwall".encode(get_argv_encoding()) + artonwall_arg = u"\u00C4rtonwall".encode(get_io_encoding()) except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII command argument could not be encoded on this platform.") @@ -1574,7 +1574,7 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): self.failUnlessIn("files whose names could not be converted", err) else: self.failUnlessReallyEqual(rc, 0) - self.failUnlessReallyEqual(out.decode(get_output_encoding()), u"\u00C4rtonwall\n") + self.failUnlessReallyEqual(out.decode(get_io_encoding()), u"\u00C4rtonwall\n") self.failUnlessReallyEqual(err, "") d.addCallback(_check) @@ -1882,7 +1882,7 @@ class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase): def test_exclude_options_unicode(self): nice_doc = u"nice_d\u00F8c.lyx" try: - doc_pattern_arg = u"*d\u00F8c*".encode(get_argv_encoding()) + doc_pattern_arg = u"*d\u00F8c*".encode(get_io_encoding()) except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII command argument could not be encoded on this platform.") @@ -2517,7 +2517,7 @@ class Mkdir(GridTestMixin, CLITestMixin, unittest.TestCase): self.set_up_grid() try: - motorhead_arg = u"tahoe:Mot\u00F6rhead".encode(get_argv_encoding()) + motorhead_arg = u"tahoe:Mot\u00F6rhead".encode(get_io_encoding()) except UnicodeEncodeError: raise unittest.SkipTest("A non-ASCII command argument could not be encoded on this platform.") diff --git a/src/allmydata/test/test_encodingutil.py b/src/allmydata/test/test_encodingutil.py index 17d95498..b49ca874 100644 --- a/src/allmydata/test/test_encodingutil.py +++ b/src/allmydata/test/test_encodingutil.py @@ -36,8 +36,7 @@ if __name__ == "__main__": print " argv = %s" % repr(sys.argv[1]) print " platform = '%s'" % sys.platform print " filesystem_encoding = '%s'" % sys.getfilesystemencoding() - print " output_encoding = '%s'" % sys.stdout.encoding - print " argv_encoding = '%s'" % sys.stdout.encoding + print " io_encoding = '%s'" % sys.stdout.encoding try: tmpdir = tempfile.mkdtemp() for fname in TEST_FILENAMES: @@ -65,7 +64,7 @@ from allmydata.test.common_util import ReallyEqualMixin from allmydata.util import encodingutil from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \ unicode_to_output, quote_output, unicode_platform, listdir_unicode, \ - FilenameEncodingError, get_output_encoding, get_filesystem_encoding, _reload + FilenameEncodingError, get_io_encoding, get_filesystem_encoding, _reload from allmydata.dirnode import normalize from twisted.python import usage @@ -73,29 +72,29 @@ from twisted.python import usage class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase): @patch('sys.stdout') - def test_get_output_encoding(self, mock_stdout): + def test_get_io_encoding(self, mock_stdout): mock_stdout.encoding = 'UTF-8' _reload() - self.failUnlessReallyEqual(get_output_encoding(), 'utf-8') + self.failUnlessReallyEqual(get_io_encoding(), 'utf-8') mock_stdout.encoding = 'cp65001' _reload() - self.failUnlessReallyEqual(get_output_encoding(), 'utf-8') + self.failUnlessReallyEqual(get_io_encoding(), 'utf-8') mock_stdout.encoding = 'koi8-r' expected = sys.platform == "win32" and 'utf-8' or 'koi8-r' _reload() - self.failUnlessReallyEqual(get_output_encoding(), expected) + self.failUnlessReallyEqual(get_io_encoding(), expected) mock_stdout.encoding = 'nonexistent_encoding' if sys.platform == "win32": _reload() - self.failUnlessReallyEqual(get_output_encoding(), 'utf-8') + self.failUnlessReallyEqual(get_io_encoding(), 'utf-8') else: self.failUnlessRaises(AssertionError, _reload) @patch('locale.getpreferredencoding') - def test_get_output_encoding_not_from_stdout(self, mock_locale_getpreferredencoding): + def test_get_io_encoding_not_from_stdout(self, mock_locale_getpreferredencoding): locale # hush pyflakes mock_locale_getpreferredencoding.return_value = 'koi8-r' @@ -106,26 +105,26 @@ class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase): try: expected = sys.platform == "win32" and 'utf-8' or 'koi8-r' _reload() - self.failUnlessReallyEqual(get_output_encoding(), expected) + self.failUnlessReallyEqual(get_io_encoding(), expected) sys.stdout.encoding = None _reload() - self.failUnlessReallyEqual(get_output_encoding(), expected) + self.failUnlessReallyEqual(get_io_encoding(), expected) mock_locale_getpreferredencoding.return_value = None _reload() - self.failUnlessReallyEqual(get_output_encoding(), 'utf-8') + self.failUnlessReallyEqual(get_io_encoding(), 'utf-8') finally: sys.stdout = old_stdout def test_argv_to_unicode(self): - encodingutil.argv_encoding = 'utf-8' + encodingutil.io_encoding = 'utf-8' self.failUnlessRaises(usage.UsageError, argv_to_unicode, lumiere_nfc.encode('latin1')) def test_unicode_to_output(self): - encodingutil.output_encoding = 'koi8-r' + encodingutil.io_encoding = 'koi8-r' self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc) @patch('os.listdir') @@ -191,7 +190,7 @@ class EncodingUtil(ReallyEqualMixin): if 'argv' not in dir(self): return - mock.encoding = self.output_encoding + mock.encoding = self.io_encoding argu = lumiere_nfc argv = self.argv _reload() @@ -205,7 +204,7 @@ class EncodingUtil(ReallyEqualMixin): if 'argv' not in dir(self): return - mock.encoding = self.output_encoding + mock.encoding = self.io_encoding _reload() self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), self.argv) @@ -378,13 +377,13 @@ class QuoteOutput(ReallyEqualMixin, unittest.TestCase): check(u"\u2621\"", u"'\u2621\"'", True) def test_quote_output_default(self): - encodingutil.output_encoding = 'ascii' + encodingutil.io_encoding = 'ascii' self.test_quote_output_ascii(None) - encodingutil.output_encoding = 'latin1' + encodingutil.io_encoding = 'latin1' self.test_quote_output_latin1(None) - encodingutil.output_encoding = 'utf-8' + encodingutil.io_encoding = 'utf-8' self.test_quote_output_utf8(None) @@ -393,8 +392,7 @@ class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase): argv = 'lumi\xc3\xa8re' platform = 'linux2' filesystem_encoding = 'UTF-8' - output_encoding = 'UTF-8' - argv_encoding = 'UTF-8' + io_encoding = 'UTF-8' dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt'] class UbuntuKarmicLatin1(EncodingUtil, unittest.TestCase): @@ -402,8 +400,7 @@ class UbuntuKarmicLatin1(EncodingUtil, unittest.TestCase): argv = 'lumi\xe8re' platform = 'linux2' filesystem_encoding = 'ISO-8859-1' - output_encoding = 'ISO-8859-1' - argv_encoding = 'ISO-8859-1' + io_encoding = 'ISO-8859-1' dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3'] class Windows(EncodingUtil, unittest.TestCase): @@ -411,8 +408,7 @@ class Windows(EncodingUtil, unittest.TestCase): argv = 'lumi\xc3\xa8re' platform = 'win32' filesystem_encoding = 'mbcs' - output_encoding = 'utf-8' - argv_encoding = 'utf-8' + io_encoding = 'utf-8' dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3'] class MacOSXLeopard(EncodingUtil, unittest.TestCase): @@ -420,22 +416,19 @@ class MacOSXLeopard(EncodingUtil, unittest.TestCase): output = 'lumi\xc3\xa8re' platform = 'darwin' filesystem_encoding = 'utf-8' - output_encoding = 'UTF-8' - argv_encoding = 'UTF-8' + io_encoding = 'UTF-8' dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file'] class MacOSXLeopard7bit(EncodingUtil, unittest.TestCase): uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc' platform = 'darwin' filesystem_encoding = 'utf-8' - output_encoding = 'US-ASCII' - argv_encoding = 'US-ASCII' + io_encoding = 'US-ASCII' dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file'] class OpenBSD(EncodingUtil, unittest.TestCase): uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)' platform = 'openbsd4' filesystem_encoding = '646' - output_encoding = '646' - argv_encoding = '646' + io_encoding = '646' # Oops, I cannot write filenames containing non-ascii characters diff --git a/src/allmydata/util/encodingutil.py b/src/allmydata/util/encodingutil.py index 45eaaf39..2f3bfeca 100644 --- a/src/allmydata/util/encodingutil.py +++ b/src/allmydata/util/encodingutil.py @@ -34,12 +34,11 @@ def check_encoding(encoding): raise AssertionError("The character encoding '%s' is not supported for conversion." % (encoding,)) filesystem_encoding = None -output_encoding = None -argv_encoding = None +io_encoding = None is_unicode_platform = False def _reload(): - global filesystem_encoding, output_encoding, argv_encoding, is_unicode_platform + global filesystem_encoding, io_encoding, is_unicode_platform filesystem_encoding = canonical_encoding(sys.getfilesystemencoding()) check_encoding(filesystem_encoding) @@ -47,20 +46,19 @@ def _reload(): if sys.platform == 'win32': # On Windows we install UTF-8 stream wrappers for sys.stdout and # sys.stderr, and reencode the arguments as UTF-8 (see scripts/runner.py). - output_encoding = 'utf-8' + io_encoding = 'utf-8' else: - outenc = None + ioenc = None if hasattr(sys.stdout, 'encoding'): - outenc = sys.stdout.encoding - if outenc is None: + ioenc = sys.stdout.encoding + if ioenc is None: try: - outenc = locale.getpreferredencoding() + ioenc = locale.getpreferredencoding() except Exception: pass # work around - output_encoding = canonical_encoding(outenc) + io_encoding = canonical_encoding(ioenc) - check_encoding(output_encoding) - argv_encoding = output_encoding + check_encoding(io_encoding) is_unicode_platform = sys.platform in ["win32", "darwin"] @@ -73,17 +71,11 @@ def get_filesystem_encoding(): """ return filesystem_encoding -def get_output_encoding(): +def get_io_encoding(): """ - Returns expected encoding for writing to stdout or stderr. + Returns expected encoding for writing to stdout or stderr, and for arguments in sys.argv. """ - return output_encoding - -def get_argv_encoding(): - """ - Returns expected encoding for command-line arguments. - """ - return argv_encoding + return io_encoding def argv_to_unicode(s): """ @@ -92,10 +84,10 @@ def argv_to_unicode(s): precondition(isinstance(s, str), s) try: - return unicode(s, argv_encoding) + return unicode(s, io_encoding) except UnicodeDecodeError: raise usage.UsageError("Argument %s cannot be decoded as %s." % - (quote_output(s), argv_encoding)) + (quote_output(s), io_encoding)) def argv_to_abspath(s): """ @@ -117,7 +109,7 @@ def unicode_to_argv(s, mangle=False): # This must be the same as 'mangle' in bin/tahoe-script.template. return str(re.sub(ur'[^\x20-\x7F]', lambda m: u'\x7F%x;' % (ord(m.group(0)),), s)) else: - return s.encode(argv_encoding) + return s.encode(io_encoding) def unicode_to_url(s): """ @@ -148,16 +140,16 @@ def unicode_to_output(s): precondition(isinstance(s, unicode), s) try: - out = s.encode(output_encoding) + out = s.encode(io_encoding) except (UnicodeEncodeError, UnicodeDecodeError): - raise UnicodeEncodeError(output_encoding, s, 0, 0, + raise UnicodeEncodeError(io_encoding, s, 0, 0, "A string could not be encoded as %s for output to the terminal:\n%r" % - (output_encoding, repr(s))) + (io_encoding, repr(s))) if PRINTABLE_8BIT.search(out) is None: - raise UnicodeEncodeError(output_encoding, s, 0, 0, + raise UnicodeEncodeError(io_encoding, s, 0, 0, "A string encoded as %s for output to the terminal contained unsafe bytes:\n%r" % - (output_encoding, repr(s))) + (io_encoding, repr(s))) return out @@ -212,7 +204,7 @@ def quote_output(s, quotemarks=True, encoding=None): if MUST_DOUBLE_QUOTE.search(s) is None: try: - out = s.encode(encoding or output_encoding) + out = s.encode(encoding or io_encoding) if quotemarks or out.startswith('"'): return "'%s'" % (out,) else: @@ -221,7 +213,7 @@ def quote_output(s, quotemarks=True, encoding=None): pass escaped = ESCAPABLE_UNICODE.sub(_unicode_escape, s) - return '"%s"' % (escaped.encode(encoding or output_encoding, 'backslashreplace'),) + return '"%s"' % (escaped.encode(encoding or io_encoding, 'backslashreplace'),) def quote_path(path, quotemarks=True): return quote_output("/".join(map(to_str, path)), quotemarks=quotemarks)