2 lumiere_nfc = u"lumi\u00E8re"
3 Artonwall_nfc = u"\u00C4rtonwall.mp3"
4 Artonwall_nfd = u"A\u0308rtonwall.mp3"
12 # The following main helps to generate a test class for other operating
15 if __name__ == "__main__":
21 if len(sys.argv) != 2:
22 print "Usage: %s lumi<e-grave>re" % sys.argv[0]
25 if sys.platform == "win32":
27 from allmydata.windows.fixups import initialize
29 print "set PYTHONPATH to the src directory"
34 print "class MyWeirdOS(EncodingUtil, unittest.TestCase):"
35 print " uname = '%s'" % ' '.join(platform.uname())
36 print " argv = %s" % repr(sys.argv[1])
37 print " platform = '%s'" % sys.platform
38 print " filesystem_encoding = '%s'" % sys.getfilesystemencoding()
39 print " io_encoding = '%s'" % sys.stdout.encoding
41 tmpdir = tempfile.mkdtemp()
42 for fname in TEST_FILENAMES:
43 open(os.path.join(tmpdir, fname), 'w').close()
45 # Use Unicode API under Windows or MacOS X
46 if sys.platform in ('win32', 'darwin'):
47 dirlist = os.listdir(unicode(tmpdir))
49 dirlist = os.listdir(tmpdir)
51 print " dirlist = %s" % repr(dirlist)
53 print " # Oops, I cannot write filenames containing non-ascii characters"
59 from twisted.trial import unittest
60 from mock import patch
61 import os, sys, locale
63 from allmydata.test.common_util import ReallyEqualMixin
64 from allmydata.util import encodingutil
65 from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \
66 unicode_to_output, quote_output, unicode_platform, listdir_unicode, \
67 FilenameEncodingError, get_io_encoding, get_filesystem_encoding, _reload
68 from allmydata.dirnode import normalize
70 from twisted.python import usage
72 class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase):
75 def test_get_io_encoding(self, mock_stdout):
76 mock_stdout.encoding = 'UTF-8'
78 self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
80 mock_stdout.encoding = 'cp65001'
82 self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
84 mock_stdout.encoding = 'koi8-r'
85 expected = sys.platform == "win32" and 'utf-8' or 'koi8-r'
87 self.failUnlessReallyEqual(get_io_encoding(), expected)
89 mock_stdout.encoding = 'nonexistent_encoding'
90 if sys.platform == "win32":
92 self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
94 self.failUnlessRaises(AssertionError, _reload)
96 @patch('locale.getpreferredencoding')
97 def test_get_io_encoding_not_from_stdout(self, mock_locale_getpreferredencoding):
98 locale # hush pyflakes
99 mock_locale_getpreferredencoding.return_value = 'koi8-r'
103 old_stdout = sys.stdout
104 sys.stdout = DummyStdout()
106 expected = sys.platform == "win32" and 'utf-8' or 'koi8-r'
108 self.failUnlessReallyEqual(get_io_encoding(), expected)
110 sys.stdout.encoding = None
112 self.failUnlessReallyEqual(get_io_encoding(), expected)
114 mock_locale_getpreferredencoding.return_value = None
116 self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
118 sys.stdout = old_stdout
120 def test_argv_to_unicode(self):
121 encodingutil.io_encoding = 'utf-8'
122 self.failUnlessRaises(usage.UsageError,
124 lumiere_nfc.encode('latin1'))
126 def test_unicode_to_output(self):
127 encodingutil.io_encoding = 'koi8-r'
128 self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc)
131 def test_no_unicode_normalization(self, mock):
132 # Pretend to run on a Unicode platform.
133 # We normalized to NFC in 1.7beta, but we now don't.
134 orig_platform = sys.platform
136 sys.platform = 'darwin'
137 mock.return_value = [Artonwall_nfd]
139 self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd])
141 sys.platform = orig_platform
143 # The following tests apply only to platforms that don't store filenames as
144 # Unicode entities on the filesystem.
145 class EncodingUtilNonUnicodePlatform(unittest.TestCase):
147 # Mock sys.platform because unicode_platform() uses it
148 self.original_platform = sys.platform
149 sys.platform = 'linux'
152 sys.platform = self.original_platform
155 @patch('sys.getfilesystemencoding')
157 def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
158 # What happens if latin1-encoded filenames are encountered on an UTF-8
160 mock_listdir.return_value = [
161 lumiere_nfc.encode('utf-8'),
162 lumiere_nfc.encode('latin1')]
164 mock_getfilesystemencoding.return_value = 'utf-8'
166 self.failUnlessRaises(FilenameEncodingError,
170 # We're trying to list a directory whose name cannot be represented in
171 # the filesystem encoding. This should fail.
172 mock_getfilesystemencoding.return_value = 'ascii'
174 self.failUnlessRaises(FilenameEncodingError,
179 class EncodingUtil(ReallyEqualMixin):
181 self.original_platform = sys.platform
182 sys.platform = self.platform
185 sys.platform = self.original_platform
189 def test_argv_to_unicode(self, mock):
190 if 'argv' not in dir(self):
193 mock.encoding = self.io_encoding
197 self.failUnlessReallyEqual(argv_to_unicode(argv), argu)
199 def test_unicode_to_url(self):
200 self.failUnless(unicode_to_url(lumiere_nfc), "lumi\xc3\xa8re")
203 def test_unicode_to_output(self, mock):
204 if 'argv' not in dir(self):
207 mock.encoding = self.io_encoding
209 self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), self.argv)
211 def test_unicode_platform(self):
221 self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform])
223 @patch('sys.getfilesystemencoding')
225 def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
226 if 'dirlist' not in dir(self):
230 u"test".encode(self.filesystem_encoding)
231 except (LookupError, AttributeError):
232 raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding "
233 "that we are testing for the benefit of a different platform."
234 % (self.filesystem_encoding,))
236 mock_listdir.return_value = self.dirlist
237 mock_getfilesystemencoding.return_value = self.filesystem_encoding
240 filenames = listdir_unicode(u'/dummy')
242 self.failUnlessEqual(set([normalize(fname) for fname in filenames]),
246 class StdlibUnicode(unittest.TestCase):
247 """This mainly tests that some of the stdlib functions support Unicode paths, but also that
248 listdir_unicode works for valid filenames."""
250 def skip_if_cannot_represent_filename(self, u):
251 enc = get_filesystem_encoding()
252 if not unicode_platform():
255 except UnicodeEncodeError:
256 raise unittest.SkipTest("A non-ASCII filename could not be encoded on this platform.")
258 def test_mkdir_open_exists_abspath_listdir_expanduser(self):
259 self.skip_if_cannot_represent_filename(lumiere_nfc)
262 os.mkdir(lumiere_nfc)
263 except EnvironmentError, e:
264 raise unittest.SkipTest("%r\nIt is possible that the filesystem on which this test is being run "
265 "does not support Unicode, even though the platform does." % (e,))
267 fn = lumiere_nfc + u'/' + lumiere_nfc + u'.txt'
268 open(fn, 'wb').close()
269 self.failUnless(os.path.exists(fn))
270 self.failUnless(os.path.exists(os.path.join(os.getcwdu(), fn)))
271 filenames = listdir_unicode(lumiere_nfc)
273 # We only require that the listing includes a filename that is canonically equivalent
274 # to lumiere_nfc (on Mac OS X, it will be the NFD equivalent).
275 self.failUnlessIn(lumiere_nfc + ".txt", set([normalize(fname) for fname in filenames]))
277 expanded = os.path.expanduser("~/" + lumiere_nfc)
278 self.failIfIn("~", expanded)
279 self.failUnless(expanded.endswith(lumiere_nfc), expanded)
281 def test_open_unrepresentable(self):
282 if unicode_platform():
283 raise unittest.SkipTest("This test is not applicable to platforms that represent filenames as Unicode.")
285 enc = get_filesystem_encoding()
289 raise unittest.SkipTest("This test cannot be run unless we know a filename that is not representable.")
290 except UnicodeEncodeError:
291 self.failUnlessRaises(UnicodeEncodeError, open, fn, 'wb')
294 class QuoteOutput(ReallyEqualMixin, unittest.TestCase):
298 def _check(self, inp, out, enc, optional_quotes):
302 self.failUnlessReallyEqual(quote_output(inp, encoding=enc), out)
303 self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quotemarks=False), out2)
306 elif isinstance(inp, str):
307 self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc), out)
308 self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quotemarks=False), out2)
310 self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc), out)
311 self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc, quotemarks=False), out2)
313 def _test_quote_output_all(self, enc):
314 def check(inp, out, optional_quotes=False):
315 self._check(inp, out, enc, optional_quotes)
317 # optional single quotes
318 check("foo", "'foo'", True)
319 check("\\", "'\\'", True)
320 check("$\"`", "'$\"`'", True)
322 # mandatory single quotes
327 check("\n", "\"\\x0a\"")
328 check("\x00", "\"\\x00\"")
330 # invalid Unicode and astral planes
331 check(u"\uFDD0\uFDEF", "\"\\ufdd0\\ufdef\"")
332 check(u"\uDC00\uD800", "\"\\udc00\\ud800\"")
333 check(u"\uDC00\uD800\uDC00", "\"\\udc00\\U00010000\"")
334 check(u"\uD800\uDC00", "\"\\U00010000\"")
335 check(u"\uD800\uDC01", "\"\\U00010001\"")
336 check(u"\uD801\uDC00", "\"\\U00010400\"")
337 check(u"\uDBFF\uDFFF", "\"\\U0010ffff\"")
338 check(u"'\uDBFF\uDFFF", "\"'\\U0010ffff\"")
339 check(u"\"\uDBFF\uDFFF", "\"\\\"\\U0010ffff\"")
342 check("\xFF", "b\"\\xff\"")
343 check("\x00\"$\\`\x80\xFF", "b\"\\x00\\\"\\$\\\\\\`\\x80\\xff\"")
345 def test_quote_output_ascii(self, enc='ascii'):
346 def check(inp, out, optional_quotes=False):
347 self._check(inp, out, enc, optional_quotes)
349 self._test_quote_output_all(enc)
350 check(u"\u00D7", "\"\\xd7\"")
351 check(u"'\u00D7", "\"'\\xd7\"")
352 check(u"\"\u00D7", "\"\\\"\\xd7\"")
353 check(u"\u2621", "\"\\u2621\"")
354 check(u"'\u2621", "\"'\\u2621\"")
355 check(u"\"\u2621", "\"\\\"\\u2621\"")
357 def test_quote_output_latin1(self, enc='latin1'):
358 def check(inp, out, optional_quotes=False):
359 self._check(inp, out.encode('latin1'), enc, optional_quotes)
361 self._test_quote_output_all(enc)
362 check(u"\u00D7", u"'\u00D7'", True)
363 check(u"'\u00D7", u"\"'\u00D7\"")
364 check(u"\"\u00D7", u"'\"\u00D7'")
365 check(u"\u00D7\"", u"'\u00D7\"'", True)
366 check(u"\u2621", u"\"\\u2621\"")
367 check(u"'\u2621", u"\"'\\u2621\"")
368 check(u"\"\u2621", u"\"\\\"\\u2621\"")
370 def test_quote_output_utf8(self, enc='utf-8'):
371 def check(inp, out, optional_quotes=False):
372 self._check(inp, out.encode('utf-8'), enc, optional_quotes)
374 self._test_quote_output_all(enc)
375 check(u"\u2621", u"'\u2621'", True)
376 check(u"'\u2621", u"\"'\u2621\"")
377 check(u"\"\u2621", u"'\"\u2621'")
378 check(u"\u2621\"", u"'\u2621\"'", True)
380 def test_quote_output_default(self):
381 encodingutil.io_encoding = 'ascii'
382 self.test_quote_output_ascii(None)
384 encodingutil.io_encoding = 'latin1'
385 self.test_quote_output_latin1(None)
387 encodingutil.io_encoding = 'utf-8'
388 self.test_quote_output_utf8(None)
391 class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase):
392 uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
393 argv = 'lumi\xc3\xa8re'
395 filesystem_encoding = 'UTF-8'
396 io_encoding = 'UTF-8'
397 dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt']
399 class UbuntuKarmicLatin1(EncodingUtil, unittest.TestCase):
400 uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
403 filesystem_encoding = 'ISO-8859-1'
404 io_encoding = 'ISO-8859-1'
405 dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3']
407 class Windows(EncodingUtil, unittest.TestCase):
408 uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
409 argv = 'lumi\xc3\xa8re'
411 filesystem_encoding = 'mbcs'
412 io_encoding = 'utf-8'
413 dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
415 class MacOSXLeopard(EncodingUtil, unittest.TestCase):
416 uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
417 output = 'lumi\xc3\xa8re'
419 filesystem_encoding = 'utf-8'
420 io_encoding = 'UTF-8'
421 dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
423 class MacOSXLeopard7bit(EncodingUtil, unittest.TestCase):
424 uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
426 filesystem_encoding = 'utf-8'
427 io_encoding = 'US-ASCII'
428 dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
430 class OpenBSD(EncodingUtil, unittest.TestCase):
431 uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)'
432 platform = 'openbsd4'
433 filesystem_encoding = '646'
435 # Oops, I cannot write filenames containing non-ascii characters