2 lumiere_nfc = u"lumi\u00E8re"
3 Artonwall_nfc = u"\u00C4rtonwall.mp3"
4 Artonwall_nfd = u"A\u0308rtonwall.mp3"
12 # The following main helps to generate a test class for other operating
15 if __name__ == "__main__":
21 if len(sys.argv) != 2:
22 print "Usage: %s lumi<e-grave>re" % sys.argv[0]
25 if sys.platform == "win32":
27 from allmydata.windows.fixups import initialize
29 print "set PYTHONPATH to the src directory"
34 print "class MyWeirdOS(EncodingUtil, unittest.TestCase):"
35 print " uname = '%s'" % ' '.join(platform.uname())
36 print " argv = %s" % repr(sys.argv[1])
37 print " platform = '%s'" % sys.platform
38 print " filesystem_encoding = '%s'" % sys.getfilesystemencoding()
39 print " output_encoding = '%s'" % sys.stdout.encoding
40 print " argv_encoding = '%s'" % sys.stdout.encoding
42 tmpdir = tempfile.mkdtemp()
43 for fname in TEST_FILENAMES:
44 open(os.path.join(tmpdir, fname), 'w').close()
46 # Use Unicode API under Windows or MacOS X
47 if sys.platform in ('win32', 'darwin'):
48 dirlist = os.listdir(unicode(tmpdir))
50 dirlist = os.listdir(tmpdir)
52 print " dirlist = %s" % repr(dirlist)
54 print " # Oops, I cannot write filenames containing non-ascii characters"
60 from twisted.trial import unittest
61 from mock import patch
62 import os, sys, locale
64 from allmydata.test.common_util import ReallyEqualMixin
65 from allmydata.util import encodingutil
66 from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \
67 unicode_to_output, quote_output, unicode_platform, listdir_unicode, \
68 FilenameEncodingError, get_output_encoding, get_filesystem_encoding, _reload
69 from allmydata.dirnode import normalize
71 from twisted.python import usage
73 class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase):
76 def test_get_output_encoding(self, mock_stdout):
77 mock_stdout.encoding = 'UTF-8'
79 self.failUnlessReallyEqual(get_output_encoding(), 'utf-8')
81 mock_stdout.encoding = 'cp65001'
83 self.failUnlessReallyEqual(get_output_encoding(), 'utf-8')
85 mock_stdout.encoding = 'koi8-r'
86 expected = sys.platform == "win32" and 'utf-8' or 'koi8-r'
88 self.failUnlessReallyEqual(get_output_encoding(), expected)
90 mock_stdout.encoding = 'nonexistent_encoding'
91 if sys.platform == "win32":
93 self.failUnlessReallyEqual(get_output_encoding(), 'utf-8')
95 self.failUnlessRaises(AssertionError, _reload)
97 @patch('locale.getpreferredencoding')
98 def test_get_output_encoding_not_from_stdout(self, mock_locale_getpreferredencoding):
99 locale # hush pyflakes
100 mock_locale_getpreferredencoding.return_value = 'koi8-r'
104 old_stdout = sys.stdout
105 sys.stdout = DummyStdout()
107 expected = sys.platform == "win32" and 'utf-8' or 'koi8-r'
109 self.failUnlessReallyEqual(get_output_encoding(), expected)
111 sys.stdout.encoding = None
113 self.failUnlessReallyEqual(get_output_encoding(), expected)
115 mock_locale_getpreferredencoding.return_value = None
117 self.failUnlessReallyEqual(get_output_encoding(), 'utf-8')
119 sys.stdout = old_stdout
121 def test_argv_to_unicode(self):
122 encodingutil.output_encoding = 'utf-8'
123 self.failUnlessRaises(usage.UsageError,
125 lumiere_nfc.encode('latin1'))
127 def test_unicode_to_output(self):
128 encodingutil.output_encoding = 'koi8-r'
129 self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc)
132 def test_no_unicode_normalization(self, mock):
133 # Pretend to run on a Unicode platform.
134 # We normalized to NFC in 1.7beta, but we now don't.
135 orig_platform = sys.platform
137 sys.platform = 'darwin'
138 mock.return_value = [Artonwall_nfd]
140 self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd])
142 sys.platform = orig_platform
144 # The following tests apply only to platforms that don't store filenames as
145 # Unicode entities on the filesystem.
146 class EncodingUtilNonUnicodePlatform(unittest.TestCase):
148 # Mock sys.platform because unicode_platform() uses it
149 self.original_platform = sys.platform
150 sys.platform = 'linux'
153 sys.platform = self.original_platform
156 @patch('sys.getfilesystemencoding')
158 def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
159 # What happens if latin1-encoded filenames are encountered on an UTF-8
161 mock_listdir.return_value = [
162 lumiere_nfc.encode('utf-8'),
163 lumiere_nfc.encode('latin1')]
165 mock_getfilesystemencoding.return_value = 'utf-8'
167 self.failUnlessRaises(FilenameEncodingError,
171 # We're trying to list a directory whose name cannot be represented in
172 # the filesystem encoding. This should fail.
173 mock_getfilesystemencoding.return_value = 'ascii'
175 self.failUnlessRaises(FilenameEncodingError,
180 class EncodingUtil(ReallyEqualMixin):
182 self.original_platform = sys.platform
183 sys.platform = self.platform
186 sys.platform = self.original_platform
190 def test_argv_to_unicode(self, mock):
191 if 'argv' not in dir(self):
194 mock.encoding = self.output_encoding
198 self.failUnlessReallyEqual(argv_to_unicode(argv), argu)
200 def test_unicode_to_url(self):
201 self.failUnless(unicode_to_url(lumiere_nfc), "lumi\xc3\xa8re")
204 def test_unicode_to_output(self, mock):
205 if 'argv' not in dir(self):
208 mock.encoding = self.output_encoding
210 self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), self.argv)
212 def test_unicode_platform(self):
221 self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform])
223 @patch('sys.getfilesystemencoding')
225 def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
226 if 'dirlist' not in dir(self):
230 u"test".encode(self.filesystem_encoding)
231 except (LookupError, AttributeError):
232 raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding "
233 "that we are testing for the benefit of a different platform."
234 % (self.filesystem_encoding,))
236 mock_listdir.return_value = self.dirlist
237 mock_getfilesystemencoding.return_value = self.filesystem_encoding
240 filenames = listdir_unicode(u'/dummy')
242 self.failUnlessEqual(set([normalize(fname) for fname in filenames]),
246 class StdlibUnicode(unittest.TestCase):
247 """This mainly tests that some of the stdlib functions support Unicode paths, but also that
248 listdir_unicode works for valid filenames."""
250 def skip_if_cannot_represent_filename(self, u):
251 enc = get_filesystem_encoding()
252 if not unicode_platform():
255 except UnicodeEncodeError:
256 raise unittest.SkipTest("A non-ASCII filename could not be encoded on this platform.")
258 def test_mkdir_open_exists_abspath_listdir_expanduser(self):
259 self.skip_if_cannot_represent_filename(lumiere_nfc)
262 os.mkdir(lumiere_nfc)
263 except EnvironmentError, e:
264 raise unittest.SkipTest("%r\nIt is possible that the filesystem on which this test is being run "
265 "does not support Unicode, even though the platform does." % (e,))
267 fn = lumiere_nfc + u'/' + lumiere_nfc + u'.txt'
268 open(fn, 'wb').close()
269 self.failUnless(os.path.exists(fn))
270 self.failUnless(os.path.exists(os.path.join(os.getcwdu(), fn)))
271 filenames = listdir_unicode(lumiere_nfc)
273 # We only require that the listing includes a filename that is canonically equivalent
274 # to lumiere_nfc (on Mac OS X, it will be the NFD equivalent).
275 self.failUnlessIn(lumiere_nfc + ".txt", set([normalize(fname) for fname in filenames]))
277 expanded = os.path.expanduser("~/" + lumiere_nfc)
278 self.failIfIn("~", expanded)
279 self.failUnless(expanded.endswith(lumiere_nfc), expanded)
281 def test_open_unrepresentable(self):
282 if unicode_platform():
283 raise unittest.SkipTest("This test is not applicable to platforms that represent filenames as Unicode.")
285 enc = get_filesystem_encoding()
289 raise unittest.SkipTest("This test cannot be run unless we know a filename that is not representable.")
290 except UnicodeEncodeError:
291 self.failUnlessRaises(UnicodeEncodeError, open, fn, 'wb')
294 class QuoteOutput(ReallyEqualMixin, unittest.TestCase):
298 def _check(self, inp, out, enc, optional_quotes):
302 self.failUnlessReallyEqual(quote_output(inp, encoding=enc), out)
303 self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quotemarks=False), out2)
306 elif isinstance(inp, str):
307 self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc), out)
308 self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quotemarks=False), out2)
310 self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc), out)
311 self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc, quotemarks=False), out2)
313 def _test_quote_output_all(self, enc):
314 def check(inp, out, optional_quotes=False):
315 self._check(inp, out, enc, optional_quotes)
317 # optional single quotes
318 check("foo", "'foo'", True)
319 check("\\", "'\\'", True)
320 check("$\"`", "'$\"`'", True)
322 # mandatory single quotes
327 check("\n", "\"\\x0a\"")
328 check("\x00", "\"\\x00\"")
330 # invalid Unicode and astral planes
331 check(u"\uFDD0\uFDEF", "\"\\ufdd0\\ufdef\"")
332 check(u"\uDC00\uD800", "\"\\udc00\\ud800\"")
333 check(u"\uDC00\uD800\uDC00", "\"\\udc00\\U00010000\"")
334 check(u"\uD800\uDC00", "\"\\U00010000\"")
335 check(u"\uD800\uDC01", "\"\\U00010001\"")
336 check(u"\uD801\uDC00", "\"\\U00010400\"")
337 check(u"\uDBFF\uDFFF", "\"\\U0010ffff\"")
338 check(u"'\uDBFF\uDFFF", "\"'\\U0010ffff\"")
339 check(u"\"\uDBFF\uDFFF", "\"\\\"\\U0010ffff\"")
342 check("\xFF", "b\"\\xff\"")
343 check("\x00\"$\\`\x80\xFF", "b\"\\x00\\\"\\$\\\\\\`\\x80\\xff\"")
345 def test_quote_output_ascii(self, enc='ascii'):
346 def check(inp, out, optional_quotes=False):
347 self._check(inp, out, enc, optional_quotes)
349 self._test_quote_output_all(enc)
350 check(u"\u00D7", "\"\\xd7\"")
351 check(u"'\u00D7", "\"'\\xd7\"")
352 check(u"\"\u00D7", "\"\\\"\\xd7\"")
353 check(u"\u2621", "\"\\u2621\"")
354 check(u"'\u2621", "\"'\\u2621\"")
355 check(u"\"\u2621", "\"\\\"\\u2621\"")
357 def test_quote_output_latin1(self, enc='latin1'):
358 def check(inp, out, optional_quotes=False):
359 self._check(inp, out.encode('latin1'), enc, optional_quotes)
361 self._test_quote_output_all(enc)
362 check(u"\u00D7", u"'\u00D7'", True)
363 check(u"'\u00D7", u"\"'\u00D7\"")
364 check(u"\"\u00D7", u"'\"\u00D7'")
365 check(u"\u00D7\"", u"'\u00D7\"'", True)
366 check(u"\u2621", u"\"\\u2621\"")
367 check(u"'\u2621", u"\"'\\u2621\"")
368 check(u"\"\u2621", u"\"\\\"\\u2621\"")
370 def test_quote_output_utf8(self, enc='utf-8'):
371 def check(inp, out, optional_quotes=False):
372 self._check(inp, out.encode('utf-8'), enc, optional_quotes)
374 self._test_quote_output_all(enc)
375 check(u"\u2621", u"'\u2621'", True)
376 check(u"'\u2621", u"\"'\u2621\"")
377 check(u"\"\u2621", u"'\"\u2621'")
378 check(u"\u2621\"", u"'\u2621\"'", True)
380 def test_quote_output_default(self):
381 encodingutil.output_encoding = 'ascii'
382 self.test_quote_output_ascii(None)
384 encodingutil.output_encoding = 'latin1'
385 self.test_quote_output_latin1(None)
387 encodingutil.output_encoding = 'utf-8'
388 self.test_quote_output_utf8(None)
391 class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase):
392 uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
393 argv = 'lumi\xc3\xa8re'
395 filesystem_encoding = 'UTF-8'
396 output_encoding = 'UTF-8'
397 argv_encoding = 'UTF-8'
398 dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt']
400 class UbuntuKarmicLatin1(EncodingUtil, unittest.TestCase):
401 uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
404 filesystem_encoding = 'ISO-8859-1'
405 output_encoding = 'ISO-8859-1'
406 argv_encoding = 'ISO-8859-1'
407 dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3']
409 class Windows(EncodingUtil, unittest.TestCase):
410 uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
411 argv = 'lumi\xc3\xa8re'
413 filesystem_encoding = 'mbcs'
414 output_encoding = 'utf-8'
415 argv_encoding = 'utf-8'
416 dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
418 class MacOSXLeopard(EncodingUtil, unittest.TestCase):
419 uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
420 output = 'lumi\xc3\xa8re'
422 filesystem_encoding = 'utf-8'
423 output_encoding = 'UTF-8'
424 argv_encoding = 'UTF-8'
425 dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
427 class MacOSXLeopard7bit(EncodingUtil, unittest.TestCase):
428 uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
430 filesystem_encoding = 'utf-8'
431 output_encoding = 'US-ASCII'
432 argv_encoding = 'US-ASCII'
433 dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
435 class OpenBSD(EncodingUtil, unittest.TestCase):
436 uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)'
437 platform = 'openbsd4'
438 filesystem_encoding = '646'
439 output_encoding = '646'
440 argv_encoding = '646'
441 # Oops, I cannot write filenames containing non-ascii characters