if len(sys.argv) != 2:
print "Usage: %s lumi<e-grave>re" % sys.argv[0]
sys.exit(1)
-
+
+ if sys.platform == "win32":
+ try:
+ from allmydata.windows.fixups import initialize
+ except ImportError:
+ print "set PYTHONPATH to the src directory"
+ sys.exit(1)
+ initialize()
+
print
print "class MyWeirdOS(EncodingUtil, unittest.TestCase):"
print " uname = '%s'" % ' '.join(platform.uname())
- if sys.platform != "win32":
- print " argv = %s" % repr(sys.argv[1])
+ print " argv = %s" % repr(sys.argv[1])
print " platform = '%s'" % sys.platform
print " filesystem_encoding = '%s'" % sys.getfilesystemencoding()
- print " output_encoding = '%s'" % sys.stdout.encoding
- print " argv_encoding = '%s'" % (sys.platform == "win32" and 'ascii' or sys.stdout.encoding)
-
+ print " io_encoding = '%s'" % sys.stdout.encoding
try:
tmpdir = tempfile.mkdtemp()
for fname in TEST_FILENAMES:
- open(os.path.join(tmpdir, fname), 'w').close()
+ open(os.path.join(tmpdir, fname), 'w').close()
# Use Unicode API under Windows or MacOS X
if sys.platform in ('win32', 'darwin'):
import os, sys, locale
from allmydata.test.common_util import ReallyEqualMixin
+from allmydata.util import encodingutil, fileutil
from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \
- unicode_to_output, unicode_platform, listdir_unicode, FilenameEncodingError, \
- get_output_encoding, get_filesystem_encoding, _reload
+ unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \
+ unicode_platform, listdir_unicode, FilenameEncodingError, get_io_encoding, \
+ get_filesystem_encoding, to_str, from_utf8_or_none, _reload
from allmydata.dirnode import normalize
from twisted.python import usage
class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase):
- def tearDown(self):
- _reload()
@patch('sys.stdout')
- def test_get_output_encoding(self, mock_stdout):
+ def test_get_io_encoding(self, mock_stdout):
mock_stdout.encoding = 'UTF-8'
_reload()
- self.failUnlessReallyEqual(get_output_encoding(), 'utf-8')
+ self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
mock_stdout.encoding = 'cp65001'
_reload()
- self.failUnlessReallyEqual(get_output_encoding(), 'utf-8')
+ self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
mock_stdout.encoding = 'koi8-r'
+ expected = sys.platform == "win32" and 'utf-8' or 'koi8-r'
_reload()
- self.failUnlessReallyEqual(get_output_encoding(), 'koi8-r')
+ self.failUnlessReallyEqual(get_io_encoding(), expected)
mock_stdout.encoding = 'nonexistent_encoding'
- self.failUnlessRaises(AssertionError, _reload)
+ if sys.platform == "win32":
+ _reload()
+ self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
+ else:
+ self.failUnlessRaises(AssertionError, _reload)
@patch('locale.getpreferredencoding')
- def test_get_output_encoding_not_from_stdout(self, mock_locale_getpreferredencoding):
+ def test_get_io_encoding_not_from_stdout(self, mock_locale_getpreferredencoding):
locale # hush pyflakes
mock_locale_getpreferredencoding.return_value = 'koi8-r'
old_stdout = sys.stdout
sys.stdout = DummyStdout()
try:
+ expected = sys.platform == "win32" and 'utf-8' or 'koi8-r'
_reload()
- self.failUnlessReallyEqual(get_output_encoding(), 'koi8-r')
+ self.failUnlessReallyEqual(get_io_encoding(), expected)
sys.stdout.encoding = None
_reload()
- self.failUnlessReallyEqual(get_output_encoding(), 'koi8-r')
+ self.failUnlessReallyEqual(get_io_encoding(), expected)
mock_locale_getpreferredencoding.return_value = None
_reload()
- self.failUnlessReallyEqual(get_output_encoding(), 'utf-8')
+ self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
finally:
sys.stdout = old_stdout
- @patch('sys.stdout')
- def test_argv_to_unicode(self, mock):
- mock.encoding = 'utf-8'
- _reload()
-
+ def test_argv_to_unicode(self):
+ encodingutil.io_encoding = 'utf-8'
self.failUnlessRaises(usage.UsageError,
argv_to_unicode,
lumiere_nfc.encode('latin1'))
- @patch('sys.stdout')
- def test_unicode_to_output(self, mock):
- # Encoding koi8-r cannot represent e-grave
- mock.encoding = 'koi8-r'
- _reload()
+ def test_unicode_to_output(self):
+ encodingutil.io_encoding = 'koi8-r'
self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc)
@patch('os.listdir')
self.failUnlessRaises(FilenameEncodingError,
listdir_unicode,
u'/dummy')
-
+
# We're trying to list a directory whose name cannot be represented in
# the filesystem encoding. This should fail.
mock_getfilesystemencoding.return_value = 'ascii'
listdir_unicode,
u'/' + lumiere_nfc)
+
class EncodingUtil(ReallyEqualMixin):
def setUp(self):
- # Mock sys.platform because unicode_platform() uses it
self.original_platform = sys.platform
sys.platform = self.platform
if 'argv' not in dir(self):
return
- mock.encoding = self.output_encoding
+ mock.encoding = self.io_encoding
argu = lumiere_nfc
argv = self.argv
_reload()
@patch('sys.stdout')
def test_unicode_to_output(self, mock):
- if 'output' not in dir(self):
+ if 'argv' not in dir(self):
return
- mock.encoding = self.output_encoding
+ mock.encoding = self.io_encoding
_reload()
- self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), self.output)
+ self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), self.argv)
def test_unicode_platform(self):
matrix = {
'linux2': False,
+ 'linux3': False,
'openbsd4': False,
'win32': True,
'darwin': True,
_reload()
self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform])
-
+
@patch('sys.getfilesystemencoding')
@patch('os.listdir')
def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
raise unittest.SkipTest("%r\nIt is possible that the filesystem on which this test is being run "
"does not support Unicode, even though the platform does." % (e,))
- fn = lumiere_nfc + '/' + lumiere_nfc + '.txt'
+ fn = lumiere_nfc + u'/' + lumiere_nfc + u'.txt'
open(fn, 'wb').close()
self.failUnless(os.path.exists(fn))
- self.failUnless(os.path.exists(os.path.abspath(fn)))
+ self.failUnless(os.path.exists(os.path.join(os.getcwdu(), fn)))
filenames = listdir_unicode(lumiere_nfc)
# We only require that the listing includes a filename that is canonically equivalent
# to lumiere_nfc (on Mac OS X, it will be the NFD equivalent).
self.failUnlessIn(lumiere_nfc + ".txt", set([normalize(fname) for fname in filenames]))
- expanded = os.path.expanduser("~/" + lumiere_nfc)
- self.failIfIn("~", expanded)
+ expanded = fileutil.expanduser(u"~/" + lumiere_nfc)
+ self.failIfIn(u"~", expanded)
self.failUnless(expanded.endswith(lumiere_nfc), expanded)
- def test_open_unrepresentable(self, mock):
+ def test_open_unrepresentable(self):
if unicode_platform():
raise unittest.SkipTest("This test is not applicable to platforms that represent filenames as Unicode.")
self.failUnlessRaises(UnicodeEncodeError, open, fn, 'wb')
+class QuoteOutput(ReallyEqualMixin, unittest.TestCase):
+ def tearDown(self):
+ _reload()
+
+ def _check(self, inp, out, enc, optional_quotes, quote_newlines):
+ out2 = out
+ if optional_quotes:
+ out2 = out2[1:-1]
+ self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quote_newlines=quote_newlines), out)
+ self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
+ if out[0:2] == 'b"':
+ pass
+ elif isinstance(inp, str):
+ self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quote_newlines=quote_newlines), out)
+ self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
+ else:
+ self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc, quote_newlines=quote_newlines), out)
+ self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
+
+ def _test_quote_output_all(self, enc):
+ def check(inp, out, optional_quotes=False, quote_newlines=None):
+ self._check(inp, out, enc, optional_quotes, quote_newlines)
+
+ # optional single quotes
+ check("foo", "'foo'", True)
+ check("\\", "'\\'", True)
+ check("$\"`", "'$\"`'", True)
+ check("\n", "'\n'", True, quote_newlines=False)
+
+ # mandatory single quotes
+ check("\"", "'\"'")
+
+ # double quotes
+ check("'", "\"'\"")
+ check("\n", "\"\\x0a\"", quote_newlines=True)
+ check("\x00", "\"\\x00\"")
+
+ # invalid Unicode and astral planes
+ check(u"\uFDD0\uFDEF", "\"\\ufdd0\\ufdef\"")
+ check(u"\uDC00\uD800", "\"\\udc00\\ud800\"")
+ check(u"\uDC00\uD800\uDC00", "\"\\udc00\\U00010000\"")
+ check(u"\uD800\uDC00", "\"\\U00010000\"")
+ check(u"\uD800\uDC01", "\"\\U00010001\"")
+ check(u"\uD801\uDC00", "\"\\U00010400\"")
+ check(u"\uDBFF\uDFFF", "\"\\U0010ffff\"")
+ check(u"'\uDBFF\uDFFF", "\"'\\U0010ffff\"")
+ check(u"\"\uDBFF\uDFFF", "\"\\\"\\U0010ffff\"")
+
+ # invalid UTF-8
+ check("\xFF", "b\"\\xff\"")
+ check("\x00\"$\\`\x80\xFF", "b\"\\x00\\\"\\$\\\\\\`\\x80\\xff\"")
+
+ def test_quote_output_ascii(self, enc='ascii'):
+ def check(inp, out, optional_quotes=False, quote_newlines=None):
+ self._check(inp, out, enc, optional_quotes, quote_newlines)
+
+ self._test_quote_output_all(enc)
+ check(u"\u00D7", "\"\\xd7\"")
+ check(u"'\u00D7", "\"'\\xd7\"")
+ check(u"\"\u00D7", "\"\\\"\\xd7\"")
+ check(u"\u2621", "\"\\u2621\"")
+ check(u"'\u2621", "\"'\\u2621\"")
+ check(u"\"\u2621", "\"\\\"\\u2621\"")
+ check(u"\n", "'\n'", True, quote_newlines=False)
+ check(u"\n", "\"\\x0a\"", quote_newlines=True)
+
+ def test_quote_output_latin1(self, enc='latin1'):
+ def check(inp, out, optional_quotes=False, quote_newlines=None):
+ self._check(inp, out.encode('latin1'), enc, optional_quotes, quote_newlines)
+
+ self._test_quote_output_all(enc)
+ check(u"\u00D7", u"'\u00D7'", True)
+ check(u"'\u00D7", u"\"'\u00D7\"")
+ check(u"\"\u00D7", u"'\"\u00D7'")
+ check(u"\u00D7\"", u"'\u00D7\"'", True)
+ check(u"\u2621", u"\"\\u2621\"")
+ check(u"'\u2621", u"\"'\\u2621\"")
+ check(u"\"\u2621", u"\"\\\"\\u2621\"")
+ check(u"\n", u"'\n'", True, quote_newlines=False)
+ check(u"\n", u"\"\\x0a\"", quote_newlines=True)
+
+ def test_quote_output_utf8(self, enc='utf-8'):
+ def check(inp, out, optional_quotes=False, quote_newlines=None):
+ self._check(inp, out.encode('utf-8'), enc, optional_quotes, quote_newlines)
+
+ self._test_quote_output_all(enc)
+ check(u"\u2621", u"'\u2621'", True)
+ check(u"'\u2621", u"\"'\u2621\"")
+ check(u"\"\u2621", u"'\"\u2621'")
+ check(u"\u2621\"", u"'\u2621\"'", True)
+ check(u"\n", u"'\n'", True, quote_newlines=False)
+ check(u"\n", u"\"\\x0a\"", quote_newlines=True)
+
+ def test_quote_output_default(self):
+ encodingutil.io_encoding = 'ascii'
+ self.test_quote_output_ascii(None)
+
+ encodingutil.io_encoding = 'latin1'
+ self.test_quote_output_latin1(None)
+
+ encodingutil.io_encoding = 'utf-8'
+ self.test_quote_output_utf8(None)
+
+
+class QuotePaths(ReallyEqualMixin, unittest.TestCase):
+ def test_quote_path(self):
+ self.failUnlessReallyEqual(quote_path([u'foo', u'bar']), "'foo/bar'")
+ self.failUnlessReallyEqual(quote_path([u'foo', u'bar'], quotemarks=True), "'foo/bar'")
+ self.failUnlessReallyEqual(quote_path([u'foo', u'bar'], quotemarks=False), "foo/bar")
+ self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar']), '"foo/\\x0abar"')
+ self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar'], quotemarks=True), '"foo/\\x0abar"')
+ self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar'], quotemarks=False), '"foo/\\x0abar"')
+
+ def win32_other(win32, other):
+ return win32 if sys.platform == "win32" else other
+
+ self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo"),
+ win32_other("'C:\\foo'", "'\\\\?\\C:\\foo'"))
+ self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=True),
+ win32_other("'C:\\foo'", "'\\\\?\\C:\\foo'"))
+ self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=False),
+ win32_other("C:\\foo", "\\\\?\\C:\\foo"))
+ self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar"),
+ win32_other("'\\\\foo\\bar'", "'\\\\?\\UNC\\foo\\bar'"))
+ self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=True),
+ win32_other("'\\\\foo\\bar'", "'\\\\?\\UNC\\foo\\bar'"))
+ self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=False),
+ win32_other("\\\\foo\\bar", "\\\\?\\UNC\\foo\\bar"))
+
+
class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase):
uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
- output = 'lumi\xc3\xa8re'
argv = 'lumi\xc3\xa8re'
platform = 'linux2'
filesystem_encoding = 'UTF-8'
- output_encoding = 'UTF-8'
- argv_encoding = 'UTF-8'
+ io_encoding = 'UTF-8'
dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt']
class UbuntuKarmicLatin1(EncodingUtil, unittest.TestCase):
uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
- output = 'lumi\xe8re'
argv = 'lumi\xe8re'
platform = 'linux2'
filesystem_encoding = 'ISO-8859-1'
- output_encoding = 'ISO-8859-1'
- argv_encoding = 'ISO-8859-1'
+ io_encoding = 'ISO-8859-1'
dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3']
-class WindowsXP(EncodingUtil, unittest.TestCase):
- uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
- output = 'lumi\x8are'
- platform = 'win32'
- filesystem_encoding = 'mbcs'
- output_encoding = 'cp850'
- argv_encoding = 'ascii'
- dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
-
-class WindowsXP_UTF8(EncodingUtil, unittest.TestCase):
+class Windows(EncodingUtil, unittest.TestCase):
uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
- output = 'lumi\xc3\xa8re'
- platform = 'win32'
- filesystem_encoding = 'mbcs'
- output_encoding = 'cp65001'
- argv_encoding = 'ascii'
- dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
-
-class WindowsVista(EncodingUtil, unittest.TestCase):
- uname = 'Windows Vista 6.0.6000 x86 x86 Family 6 Model 15 Stepping 11, GenuineIntel'
- output = 'lumi\x8are'
+ argv = 'lumi\xc3\xa8re'
platform = 'win32'
filesystem_encoding = 'mbcs'
- output_encoding = 'cp850'
- argv_encoding = 'ascii'
+ io_encoding = 'utf-8'
dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
class MacOSXLeopard(EncodingUtil, unittest.TestCase):
uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
output = 'lumi\xc3\xa8re'
- argv = 'lumi\xc3\xa8re'
platform = 'darwin'
filesystem_encoding = 'utf-8'
- output_encoding = 'UTF-8'
- argv_encoding = 'UTF-8'
+ io_encoding = 'UTF-8'
dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
class MacOSXLeopard7bit(EncodingUtil, unittest.TestCase):
uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
platform = 'darwin'
filesystem_encoding = 'utf-8'
- output_encoding = 'US-ASCII'
- argv_encoding = 'US-ASCII'
+ io_encoding = 'US-ASCII'
dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
class OpenBSD(EncodingUtil, unittest.TestCase):
uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)'
platform = 'openbsd4'
filesystem_encoding = '646'
- output_encoding = '646'
- argv_encoding = '646'
+ io_encoding = '646'
# Oops, I cannot write filenames containing non-ascii characters
+
+
+class TestToFromStr(ReallyEqualMixin, unittest.TestCase):
+ def test_to_str(self):
+ self.failUnlessReallyEqual(to_str("foo"), "foo")
+ self.failUnlessReallyEqual(to_str("lumi\xc3\xa8re"), "lumi\xc3\xa8re")
+ self.failUnlessReallyEqual(to_str("\xFF"), "\xFF") # passes through invalid UTF-8 -- is this what we want?
+ self.failUnlessReallyEqual(to_str(u"lumi\u00E8re"), "lumi\xc3\xa8re")
+ self.failUnlessReallyEqual(to_str(None), None)
+
+ def test_from_utf8_or_none(self):
+ self.failUnlessRaises(AssertionError, from_utf8_or_none, u"foo")
+ self.failUnlessReallyEqual(from_utf8_or_none("lumi\xc3\xa8re"), u"lumi\u00E8re")
+ self.failUnlessReallyEqual(from_utf8_or_none(None), None)
+ self.failUnlessRaises(UnicodeDecodeError, from_utf8_or_none, "\xFF")