def tearDown(self):
_reload()
- def _check(self, inp, out, enc, optional_quotes):
+ def _check(self, inp, out, enc, optional_quotes, quote_newlines):
out2 = out
if optional_quotes:
out2 = out2[1:-1]
- self.failUnlessReallyEqual(quote_output(inp, encoding=enc), out)
- self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quotemarks=False), out2)
+ self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quote_newlines=quote_newlines), out)
+ self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
if out[0:2] == 'b"':
pass
elif isinstance(inp, str):
- self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc), out)
- self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quotemarks=False), out2)
+ self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quote_newlines=quote_newlines), out)
+ self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
else:
- self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc), out)
- self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc, quotemarks=False), out2)
+ self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc, quote_newlines=quote_newlines), out)
+ self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
def _test_quote_output_all(self, enc):
- def check(inp, out, optional_quotes=False):
- self._check(inp, out, enc, optional_quotes)
+ def check(inp, out, optional_quotes=False, quote_newlines=None):
+ self._check(inp, out, enc, optional_quotes, quote_newlines)
# optional single quotes
check("foo", "'foo'", True)
check("\\", "'\\'", True)
check("$\"`", "'$\"`'", True)
+ check("\n", "'\n'", True, quote_newlines=False)
# mandatory single quotes
check("\"", "'\"'")
# double quotes
check("'", "\"'\"")
- check("\n", "\"\\x0a\"")
+ check("\n", "\"\\x0a\"", quote_newlines=True)
check("\x00", "\"\\x00\"")
# invalid Unicode and astral planes
check("\x00\"$\\`\x80\xFF", "b\"\\x00\\\"\\$\\\\\\`\\x80\\xff\"")
def test_quote_output_ascii(self, enc='ascii'):
- def check(inp, out, optional_quotes=False):
- self._check(inp, out, enc, optional_quotes)
+ def check(inp, out, optional_quotes=False, quote_newlines=None):
+ self._check(inp, out, enc, optional_quotes, quote_newlines)
self._test_quote_output_all(enc)
check(u"\u00D7", "\"\\xd7\"")
check(u"\u2621", "\"\\u2621\"")
check(u"'\u2621", "\"'\\u2621\"")
check(u"\"\u2621", "\"\\\"\\u2621\"")
+ check(u"\n", "'\n'", True, quote_newlines=False)
+ check(u"\n", "\"\\x0a\"", quote_newlines=True)
def test_quote_output_latin1(self, enc='latin1'):
- def check(inp, out, optional_quotes=False):
- self._check(inp, out.encode('latin1'), enc, optional_quotes)
+ def check(inp, out, optional_quotes=False, quote_newlines=None):
+ self._check(inp, out.encode('latin1'), enc, optional_quotes, quote_newlines)
self._test_quote_output_all(enc)
check(u"\u00D7", u"'\u00D7'", True)
check(u"\u2621", u"\"\\u2621\"")
check(u"'\u2621", u"\"'\\u2621\"")
check(u"\"\u2621", u"\"\\\"\\u2621\"")
+ check(u"\n", u"'\n'", True, quote_newlines=False)
+ check(u"\n", u"\"\\x0a\"", quote_newlines=True)
def test_quote_output_utf8(self, enc='utf-8'):
- def check(inp, out, optional_quotes=False):
- self._check(inp, out.encode('utf-8'), enc, optional_quotes)
+ def check(inp, out, optional_quotes=False, quote_newlines=None):
+ self._check(inp, out.encode('utf-8'), enc, optional_quotes, quote_newlines)
self._test_quote_output_all(enc)
check(u"\u2621", u"'\u2621'", True)
check(u"'\u2621", u"\"'\u2621\"")
check(u"\"\u2621", u"'\"\u2621'")
check(u"\u2621\"", u"'\u2621\"'", True)
+ check(u"\n", u"'\n'", True, quote_newlines=False)
+ check(u"\n", u"\"\\x0a\"", quote_newlines=True)
def test_quote_output_default(self):
encodingutil.io_encoding = 'ascii'
return out
-def _unicode_escape(m):
+def _unicode_escape(m, quote_newlines):
u = m.group(0)
- if u == '"' or u == '$' or u == '`' or u == '\\':
+ if u == u'"' or u == u'$' or u == u'`' or u == u'\\':
return u'\\' + u
+ elif u == u'\n' and not quote_newlines:
+ return u
if len(u) == 2:
codepoint = (ord(u[0])-0xD800)*0x400 + ord(u[1])-0xDC00 + 0x10000
else:
else:
return u'\\x%02x' % (codepoint,)
-def _str_escape(m):
+def _str_escape(m, quote_newlines):
c = m.group(0)
if c == '"' or c == '$' or c == '`' or c == '\\':
return '\\' + c
+ elif c == '\n' and not quote_newlines:
+ return c
else:
return '\\x%02x' % (ord(c),)
-MUST_DOUBLE_QUOTE = re.compile(ur'[^\x20-\x26\x28-\x7E\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFC]', re.DOTALL)
+MUST_DOUBLE_QUOTE_NL = re.compile(ur'[^\x20-\x26\x28-\x7E\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFC]', re.DOTALL)
+MUST_DOUBLE_QUOTE = re.compile(ur'[^\n\x20-\x26\x28-\x7E\u00A0-\uD7FF\uE000-\uFDCF\uFDF0-\uFFFC]', re.DOTALL)
# if we must double-quote, then we have to escape ", $ and `, but need not escape '
ESCAPABLE_UNICODE = re.compile(ur'([\uD800-\uDBFF][\uDC00-\uDFFF])|' # valid surrogate pairs
ESCAPABLE_8BIT = re.compile( r'[^ !#\x25-\x5B\x5D-\x5F\x61-\x7E]', re.DOTALL)
-def quote_output(s, quotemarks=True, encoding=None):
+def quote_output(s, quotemarks=True, quote_newlines=None, encoding=None):
"""
Encode either a Unicode string or a UTF-8-encoded bytestring for representation
on stdout or stderr, tolerating errors. If 'quotemarks' is True, the string is
always quoted; otherwise, it is quoted only if necessary to avoid ambiguity or
- control bytes in the output.
+ control bytes in the output. (Newlines are counted as control bytes iff
+ quote_newlines is True.)
+
Quoting may use either single or double quotes. Within single quotes, all
characters stand for themselves, and ' will not appear. Within double quotes,
Python-compatible backslash escaping is used.
+
+ If not explicitly given, quote_newlines is True when quotemarks is True.
"""
precondition(isinstance(s, (str, unicode)), s)
+ if quote_newlines is None:
+ quote_newlines = quotemarks
if isinstance(s, str):
try:
s = s.decode('utf-8')
except UnicodeDecodeError:
- return 'b"%s"' % (ESCAPABLE_8BIT.sub(_str_escape, s),)
+ return 'b"%s"' % (ESCAPABLE_8BIT.sub(lambda m: _str_escape(m, quote_newlines), s),)
- if MUST_DOUBLE_QUOTE.search(s) is None:
+ must_double_quote = quote_newlines and MUST_DOUBLE_QUOTE_NL or MUST_DOUBLE_QUOTE
+ if must_double_quote.search(s) is None:
try:
out = s.encode(encoding or io_encoding)
if quotemarks or out.startswith('"'):
except (UnicodeDecodeError, UnicodeEncodeError):
pass
- escaped = ESCAPABLE_UNICODE.sub(_unicode_escape, s)
+ escaped = ESCAPABLE_UNICODE.sub(lambda m: _unicode_escape(m, quote_newlines), s)
return '"%s"' % (escaped.encode(encoding or io_encoding, 'backslashreplace'),)
def quote_path(path, quotemarks=True):
- return quote_output("/".join(map(to_str, path)), quotemarks=quotemarks)
+ return quote_output("/".join(map(to_str, path)), quotemarks=quotemarks, quote_newlines=True)
def unicode_platform():