From e2c7ad1d881312b3cd1dce9b976629cc4160985b Mon Sep 17 00:00:00 2001 From: david-sarah <david-sarah@jacaranda.org> Date: Tue, 15 Jun 2010 21:20:12 -0700 Subject: [PATCH] stringutils.py: Add encoding argument to quote_output. Also work around a bug in locale.getpreferredencoding on older Pythons. --- src/allmydata/util/stringutils.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/src/allmydata/util/stringutils.py b/src/allmydata/util/stringutils.py index 99c0d8c5..890100e0 100644 --- a/src/allmydata/util/stringutils.py +++ b/src/allmydata/util/stringutils.py @@ -10,15 +10,17 @@ import unicodedata from allmydata.util.assertutil import precondition from twisted.python import usage import locale +from allmydata.util import log def _canonical_encoding(encoding): if encoding is None: + log.msg("Warning: falling back to UTF-8 encoding.", level=log.WEIRD) encoding = 'utf-8' encoding = encoding.lower() if encoding == "cp65001": encoding = 'utf-8' - elif encoding == "us-ascii" or encoding == "646": + elif encoding == "us-ascii" or encoding == "646" or encoding == "ansi_x3.4-1968": encoding = 'ascii' # sometimes Python returns an encoding name that it doesn't support for conversion @@ -39,7 +41,15 @@ def _reload(): global filesystem_encoding, output_encoding, argv_encoding, is_unicode_platform filesystem_encoding = _canonical_encoding(sys.getfilesystemencoding()) - output_encoding = _canonical_encoding(sys.stdout.encoding or locale.getpreferredencoding()) + + outenc = sys.stdout.encoding + if outenc is None: + try: + outenc = locale.getpreferredencoding() + except Exception: + pass # work around <http://bugs.python.org/issue1443504> + output_encoding = _canonical_encoding(outenc) + if sys.platform == 'win32': # Unicode arguments are not supported on Windows yet; see #565 and #1074. argv_encoding = 'ascii' @@ -126,7 +136,7 @@ def unicode_to_output(s): (output_encoding, repr(s))) return out -def quote_output(s, quotemarks=True): +def quote_output(s, quotemarks=True, encoding=None): """ Encode either a Unicode string or a UTF-8-encoded bytestring for representation on stdout or stderr, tolerating errors. If 'quotemarks' is True, the string is @@ -142,7 +152,7 @@ def quote_output(s, quotemarks=True): return 'b' + repr(s) try: - out = s.encode(output_encoding) + out = s.encode(encoding or output_encoding) except (UnicodeEncodeError, UnicodeDecodeError): return repr(s) -- 2.45.2