2 lumiere_nfc = u"lumi\u00E8re"
3 Artonwall_nfc = u"\u00C4rtonwall.mp3"
4 Artonwall_nfd = u"A\u0308rtonwall.mp3"
12 # The following main helps to generate a test class for other operating
15 if __name__ == "__main__":
21 if len(sys.argv) != 2:
22 print "Usage: %s lumi<e-grave>re" % sys.argv[0]
26 print "class MyWeirdOS(EncodingUtil, unittest.TestCase):"
27 print " uname = '%s'" % ' '.join(platform.uname())
28 if sys.platform != "win32":
29 print " argv = %s" % repr(sys.argv[1])
30 print " platform = '%s'" % sys.platform
31 print " filesystem_encoding = '%s'" % sys.getfilesystemencoding()
32 print " output_encoding = '%s'" % sys.stdout.encoding
33 print " argv_encoding = '%s'" % (sys.platform == "win32" and 'ascii' or sys.stdout.encoding)
36 tmpdir = tempfile.mkdtemp()
37 for fname in TEST_FILENAMES:
38 open(os.path.join(tmpdir, fname), 'w').close()
40 # Use Unicode API under Windows or MacOS X
41 if sys.platform in ('win32', 'darwin'):
42 dirlist = os.listdir(unicode(tmpdir))
44 dirlist = os.listdir(tmpdir)
46 print " dirlist = %s" % repr(dirlist)
48 print " # Oops, I cannot write filenames containing non-ascii characters"
54 from twisted.trial import unittest
55 from mock import patch
56 import os, sys, locale
58 from allmydata.test.common_util import ReallyEqualMixin
59 from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \
60 unicode_to_output, quote_output, unicode_platform, listdir_unicode, \
61 FilenameEncodingError, get_output_encoding, get_filesystem_encoding, _reload
62 from allmydata.dirnode import normalize
64 from twisted.python import usage
66 class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase):
71 def test_get_output_encoding(self, mock_stdout):
72 mock_stdout.encoding = 'UTF-8'
74 self.failUnlessReallyEqual(get_output_encoding(), 'utf-8')
76 mock_stdout.encoding = 'cp65001'
78 self.failUnlessReallyEqual(get_output_encoding(), 'utf-8')
80 mock_stdout.encoding = 'koi8-r'
82 self.failUnlessReallyEqual(get_output_encoding(), 'koi8-r')
84 mock_stdout.encoding = 'nonexistent_encoding'
85 self.failUnlessRaises(AssertionError, _reload)
87 @patch('locale.getpreferredencoding')
88 def test_get_output_encoding_not_from_stdout(self, mock_locale_getpreferredencoding):
89 locale # hush pyflakes
90 mock_locale_getpreferredencoding.return_value = 'koi8-r'
94 old_stdout = sys.stdout
95 sys.stdout = DummyStdout()
98 self.failUnlessReallyEqual(get_output_encoding(), 'koi8-r')
100 sys.stdout.encoding = None
102 self.failUnlessReallyEqual(get_output_encoding(), 'koi8-r')
104 mock_locale_getpreferredencoding.return_value = None
106 self.failUnlessReallyEqual(get_output_encoding(), 'utf-8')
108 sys.stdout = old_stdout
111 def test_argv_to_unicode(self, mock):
112 mock.encoding = 'utf-8'
115 self.failUnlessRaises(usage.UsageError,
117 lumiere_nfc.encode('latin1'))
120 def test_unicode_to_output(self, mock):
121 # Encoding koi8-r cannot represent e-grave
122 mock.encoding = 'koi8-r'
124 self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc)
127 def test_no_unicode_normalization(self, mock):
128 # Pretend to run on a Unicode platform.
129 # We normalized to NFC in 1.7beta, but we now don't.
130 orig_platform = sys.platform
132 sys.platform = 'darwin'
133 mock.return_value = [Artonwall_nfd]
135 self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd])
137 sys.platform = orig_platform
139 # The following tests apply only to platforms that don't store filenames as
140 # Unicode entities on the filesystem.
141 class EncodingUtilNonUnicodePlatform(unittest.TestCase):
143 # Mock sys.platform because unicode_platform() uses it
144 self.original_platform = sys.platform
145 sys.platform = 'linux'
148 sys.platform = self.original_platform
151 @patch('sys.getfilesystemencoding')
153 def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
154 # What happens if latin1-encoded filenames are encountered on an UTF-8
156 mock_listdir.return_value = [
157 lumiere_nfc.encode('utf-8'),
158 lumiere_nfc.encode('latin1')]
160 mock_getfilesystemencoding.return_value = 'utf-8'
162 self.failUnlessRaises(FilenameEncodingError,
166 # We're trying to list a directory whose name cannot be represented in
167 # the filesystem encoding. This should fail.
168 mock_getfilesystemencoding.return_value = 'ascii'
170 self.failUnlessRaises(FilenameEncodingError,
174 class EncodingUtil(ReallyEqualMixin):
176 # Mock sys.platform because unicode_platform() uses it
177 self.original_platform = sys.platform
178 sys.platform = self.platform
181 sys.platform = self.original_platform
185 def test_argv_to_unicode(self, mock):
186 if 'argv' not in dir(self):
189 mock.encoding = self.output_encoding
193 self.failUnlessReallyEqual(argv_to_unicode(argv), argu)
195 def test_unicode_to_url(self):
196 self.failUnless(unicode_to_url(lumiere_nfc), "lumi\xc3\xa8re")
199 def test_unicode_to_output(self, mock):
200 if 'output' not in dir(self):
203 mock.encoding = self.output_encoding
205 self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), self.output)
207 def test_unicode_platform(self):
216 self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform])
218 @patch('sys.getfilesystemencoding')
220 def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
221 if 'dirlist' not in dir(self):
225 u"test".encode(self.filesystem_encoding)
226 except (LookupError, AttributeError):
227 raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding "
228 "that we are testing for the benefit of a different platform."
229 % (self.filesystem_encoding,))
231 mock_listdir.return_value = self.dirlist
232 mock_getfilesystemencoding.return_value = self.filesystem_encoding
235 filenames = listdir_unicode(u'/dummy')
237 self.failUnlessEqual(set([normalize(fname) for fname in filenames]),
241 class StdlibUnicode(unittest.TestCase):
242 """This mainly tests that some of the stdlib functions support Unicode paths, but also that
243 listdir_unicode works for valid filenames."""
245 def skip_if_cannot_represent_filename(self, u):
246 enc = get_filesystem_encoding()
247 if not unicode_platform():
250 except UnicodeEncodeError:
251 raise unittest.SkipTest("A non-ASCII filename could not be encoded on this platform.")
253 def test_mkdir_open_exists_abspath_listdir_expanduser(self):
254 self.skip_if_cannot_represent_filename(lumiere_nfc)
257 os.mkdir(lumiere_nfc)
258 except EnvironmentError, e:
259 raise unittest.SkipTest("%r\nIt is possible that the filesystem on which this test is being run "
260 "does not support Unicode, even though the platform does." % (e,))
262 fn = lumiere_nfc + u'/' + lumiere_nfc + u'.txt'
263 open(fn, 'wb').close()
264 self.failUnless(os.path.exists(fn))
265 self.failUnless(os.path.exists(os.path.join(os.getcwdu(), fn)))
266 filenames = listdir_unicode(lumiere_nfc)
268 # We only require that the listing includes a filename that is canonically equivalent
269 # to lumiere_nfc (on Mac OS X, it will be the NFD equivalent).
270 self.failUnlessIn(lumiere_nfc + ".txt", set([normalize(fname) for fname in filenames]))
272 expanded = os.path.expanduser("~/" + lumiere_nfc)
273 self.failIfIn("~", expanded)
274 self.failUnless(expanded.endswith(lumiere_nfc), expanded)
276 def test_open_unrepresentable(self):
277 if unicode_platform():
278 raise unittest.SkipTest("This test is not applicable to platforms that represent filenames as Unicode.")
280 enc = get_filesystem_encoding()
284 raise unittest.SkipTest("This test cannot be run unless we know a filename that is not representable.")
285 except UnicodeEncodeError:
286 self.failUnlessRaises(UnicodeEncodeError, open, fn, 'wb')
289 class QuoteOutput(ReallyEqualMixin, unittest.TestCase):
290 def _check(self, inp, out, enc, optional_quotes):
294 self.failUnlessReallyEqual(quote_output(inp, encoding=enc), out)
295 self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quotemarks=False), out2)
297 if isinstance(inp, str):
298 self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc), out)
299 self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quotemarks=False), out2)
301 self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc), out)
302 self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc, quotemarks=False), out2)
304 def _test_quote_output_all(self, enc):
305 def check(inp, out, optional_quotes=False):
306 self._check(inp, out, enc, optional_quotes)
308 # optional single quotes
309 check("foo", "'foo'", True)
310 check("\\", "'\\'", True)
311 check("$\"`", "'$\"`'", True)
313 # mandatory single quotes
318 check("\n", "\"\\x0a\"")
319 check("\x00", "\"\\x00\"")
321 # invalid Unicode and astral planes
322 check(u"\uFDD0\uFDEF", "\"\\ufdd0\\ufdef\"")
323 check(u"\uDC00\uD800", "\"\\udc00\\ud800\"")
324 check(u"\uDC00\uD800\uDC00", "\"\\udc00\\U00010000\"")
325 check(u"\uD800\uDC00", "\"\\U00010000\"")
326 check(u"\uD800\uDC01", "\"\\U00010001\"")
327 check(u"\uD801\uDC00", "\"\\U00010400\"")
328 check(u"\uDBFF\uDFFF", "\"\\U0010ffff\"")
329 check(u"'\uDBFF\uDFFF", "\"'\\U0010ffff\"")
330 check(u"\"\uDBFF\uDFFF", "\"\\\"\\U0010ffff\"")
333 check("\xFF", "b\"\\xff\"")
334 check("\x00\"$\\`\x80\xFF", "b\"\\x00\\\"\\$\\\\\\`\\x80\\xff\"")
336 def test_quote_output_ascii(self, enc='ascii'):
337 def check(inp, out, optional_quotes=False):
338 self._check(inp, out, enc, optional_quotes)
340 self._test_quote_output_all(enc)
341 check(u"\u00D7", "\"\\xd7\"")
342 check(u"'\u00D7", "\"'\\xd7\"")
343 check(u"\"\u00D7", "\"\\\"\\xd7\"")
344 check(u"\u2621", "\"\\u2621\"")
345 check(u"'\u2621", "\"'\\u2621\"")
346 check(u"\"\u2621", "\"\\\"\\u2621\"")
348 def test_quote_output_latin1(self, enc='latin1'):
349 def check(inp, out, optional_quotes=False):
350 self._check(inp, out.encode('latin1'), enc, optional_quotes)
352 self._test_quote_output_all(enc)
353 check(u"\u00D7", u"'\u00D7'", True)
354 check(u"'\u00D7", u"\"'\u00D7\"")
355 check(u"\"\u00D7", u"'\"\u00D7'")
356 check(u"\u00D7\"", u"'\u00D7\"'", True)
357 check(u"\u2621", u"\"\\u2621\"")
358 check(u"'\u2621", u"\"'\\u2621\"")
359 check(u"\"\u2621", u"\"\\\"\\u2621\"")
361 def test_quote_output_utf8(self, enc='utf-8'):
362 def check(inp, out, optional_quotes=False):
363 self._check(inp, out.encode('utf-8'), enc, optional_quotes)
365 self._test_quote_output_all(enc)
366 check(u"\u2621", u"'\u2621'", True)
367 check(u"'\u2621", u"\"'\u2621\"")
368 check(u"\"\u2621", u"'\"\u2621'")
369 check(u"\u2621\"", u"'\u2621\"'", True)
372 def test_quote_output_mock(self, mock_stdout):
373 mock_stdout.encoding = 'ascii'
375 self.test_quote_output_ascii(None)
377 mock_stdout.encoding = 'latin1'
379 self.test_quote_output_latin1(None)
381 mock_stdout.encoding = 'utf-8'
383 self.test_quote_output_utf8(None)
386 class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase):
387 uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
388 output = 'lumi\xc3\xa8re'
389 argv = 'lumi\xc3\xa8re'
391 filesystem_encoding = 'UTF-8'
392 output_encoding = 'UTF-8'
393 argv_encoding = 'UTF-8'
394 dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt']
396 class UbuntuKarmicLatin1(EncodingUtil, unittest.TestCase):
397 uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
398 output = 'lumi\xe8re'
401 filesystem_encoding = 'ISO-8859-1'
402 output_encoding = 'ISO-8859-1'
403 argv_encoding = 'ISO-8859-1'
404 dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3']
406 class WindowsXP(EncodingUtil, unittest.TestCase):
407 uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
408 output = 'lumi\x8are'
410 filesystem_encoding = 'mbcs'
411 output_encoding = 'cp850'
412 argv_encoding = 'ascii'
413 dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
415 class WindowsXP_UTF8(EncodingUtil, unittest.TestCase):
416 uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
417 output = 'lumi\xc3\xa8re'
419 filesystem_encoding = 'mbcs'
420 output_encoding = 'cp65001'
421 argv_encoding = 'ascii'
422 dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
424 class WindowsVista(EncodingUtil, unittest.TestCase):
425 uname = 'Windows Vista 6.0.6000 x86 x86 Family 6 Model 15 Stepping 11, GenuineIntel'
426 output = 'lumi\x8are'
428 filesystem_encoding = 'mbcs'
429 output_encoding = 'cp850'
430 argv_encoding = 'ascii'
431 dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
433 class MacOSXLeopard(EncodingUtil, unittest.TestCase):
434 uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
435 output = 'lumi\xc3\xa8re'
436 argv = 'lumi\xc3\xa8re'
438 filesystem_encoding = 'utf-8'
439 output_encoding = 'UTF-8'
440 argv_encoding = 'UTF-8'
441 dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
443 class MacOSXLeopard7bit(EncodingUtil, unittest.TestCase):
444 uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
446 filesystem_encoding = 'utf-8'
447 output_encoding = 'US-ASCII'
448 argv_encoding = 'US-ASCII'
449 dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
451 class OpenBSD(EncodingUtil, unittest.TestCase):
452 uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)'
453 platform = 'openbsd4'
454 filesystem_encoding = '646'
455 output_encoding = '646'
456 argv_encoding = '646'
457 # Oops, I cannot write filenames containing non-ascii characters