2 lumiere_nfc = u"lumi\u00E8re"
3 Artonwall_nfc = u"\u00C4rtonwall.mp3"
4 Artonwall_nfd = u"A\u0308rtonwall.mp3"
12 # The following main helps to generate a test class for other operating
15 if __name__ == "__main__":
21 if len(sys.argv) != 2:
22 print "Usage: %s lumi<e-grave>re" % sys.argv[0]
25 if sys.platform == "win32":
27 from allmydata.windows.fixups import initialize
29 print "set PYTHONPATH to the src directory"
34 print "class MyWeirdOS(EncodingUtil, unittest.TestCase):"
35 print " uname = '%s'" % ' '.join(platform.uname())
36 print " argv = %s" % repr(sys.argv[1])
37 print " platform = '%s'" % sys.platform
38 print " filesystem_encoding = '%s'" % sys.getfilesystemencoding()
39 print " io_encoding = '%s'" % sys.stdout.encoding
41 tmpdir = tempfile.mkdtemp()
42 for fname in TEST_FILENAMES:
43 open(os.path.join(tmpdir, fname), 'w').close()
45 # Use Unicode API under Windows or MacOS X
46 if sys.platform in ('win32', 'darwin'):
47 dirlist = os.listdir(unicode(tmpdir))
49 dirlist = os.listdir(tmpdir)
51 print " dirlist = %s" % repr(dirlist)
53 print " # Oops, I cannot write filenames containing non-ascii characters"
60 import os, sys, locale
62 from twisted.trial import unittest
64 from twisted.python.filepath import FilePath
66 from allmydata.test.common_util import ReallyEqualMixin
67 from allmydata.util import encodingutil, fileutil
68 from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \
69 unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \
70 quote_filepath, unicode_platform, listdir_unicode, FilenameEncodingError, \
71 get_io_encoding, get_filesystem_encoding, to_str, from_utf8_or_none, _reload, \
72 to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from
73 from allmydata.dirnode import normalize
75 from twisted.python import usage
78 class MockStdout(object):
81 class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase):
82 def test_get_io_encoding(self):
83 mock_stdout = MockStdout()
84 self.patch(sys, 'stdout', mock_stdout)
86 mock_stdout.encoding = 'UTF-8'
88 self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
90 mock_stdout.encoding = 'cp65001'
92 self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
94 mock_stdout.encoding = 'koi8-r'
95 expected = sys.platform == "win32" and 'utf-8' or 'koi8-r'
97 self.failUnlessReallyEqual(get_io_encoding(), expected)
99 mock_stdout.encoding = 'nonexistent_encoding'
100 if sys.platform == "win32":
102 self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
104 self.failUnlessRaises(AssertionError, _reload)
106 def test_get_io_encoding_not_from_stdout(self):
107 preferredencoding = 'koi8-r'
108 def call_locale_getpreferredencoding():
109 return preferredencoding
110 self.patch(locale, 'getpreferredencoding', call_locale_getpreferredencoding)
111 mock_stdout = MockStdout()
112 self.patch(sys, 'stdout', mock_stdout)
114 expected = sys.platform == "win32" and 'utf-8' or 'koi8-r'
116 self.failUnlessReallyEqual(get_io_encoding(), expected)
118 mock_stdout.encoding = None
120 self.failUnlessReallyEqual(get_io_encoding(), expected)
122 preferredencoding = None
124 self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
126 def test_argv_to_unicode(self):
127 encodingutil.io_encoding = 'utf-8'
128 self.failUnlessRaises(usage.UsageError,
130 lumiere_nfc.encode('latin1'))
132 def test_unicode_to_output(self):
133 encodingutil.io_encoding = 'koi8-r'
134 self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc)
136 def test_no_unicode_normalization(self):
137 # Pretend to run on a Unicode platform.
138 # listdir_unicode normalized to NFC in 1.7beta, but now doesn't.
140 def call_os_listdir(path):
141 return [Artonwall_nfd]
142 self.patch(os, 'listdir', call_os_listdir)
143 self.patch(sys, 'platform', 'darwin')
146 self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd])
149 # The following tests apply only to platforms that don't store filenames as
150 # Unicode entities on the filesystem.
151 class EncodingUtilNonUnicodePlatform(unittest.TestCase):
153 # Mock sys.platform because unicode_platform() uses it
154 self.original_platform = sys.platform
155 sys.platform = 'linux'
158 sys.platform = self.original_platform
161 def test_listdir_unicode(self):
162 # What happens if latin1-encoded filenames are encountered on an UTF-8
164 def call_os_listdir(path):
166 lumiere_nfc.encode('utf-8'),
167 lumiere_nfc.encode('latin1')
169 self.patch(os, 'listdir', call_os_listdir)
171 sys_filesystemencoding = 'utf-8'
172 def call_sys_getfilesystemencoding():
173 return sys_filesystemencoding
174 self.patch(sys, 'getfilesystemencoding', call_sys_getfilesystemencoding)
177 self.failUnlessRaises(FilenameEncodingError,
181 # We're trying to list a directory whose name cannot be represented in
182 # the filesystem encoding. This should fail.
183 sys_filesystemencoding = 'ascii'
185 self.failUnlessRaises(FilenameEncodingError,
190 class EncodingUtil(ReallyEqualMixin):
192 self.original_platform = sys.platform
193 sys.platform = self.platform
196 sys.platform = self.original_platform
199 def test_argv_to_unicode(self):
200 if 'argv' not in dir(self):
203 mock_stdout = MockStdout()
204 mock_stdout.encoding = self.io_encoding
205 self.patch(sys, 'stdout', mock_stdout)
210 self.failUnlessReallyEqual(argv_to_unicode(argv), argu)
212 def test_unicode_to_url(self):
213 self.failUnless(unicode_to_url(lumiere_nfc), "lumi\xc3\xa8re")
215 def test_unicode_to_output(self):
216 if 'argv' not in dir(self):
219 mock_stdout = MockStdout()
220 mock_stdout.encoding = self.io_encoding
221 self.patch(sys, 'stdout', mock_stdout)
224 self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), self.argv)
226 def test_unicode_platform(self):
236 self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform])
238 def test_listdir_unicode(self):
239 if 'dirlist' not in dir(self):
243 u"test".encode(self.filesystem_encoding)
244 except (LookupError, AttributeError):
245 raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding "
246 "that we are testing for the benefit of a different platform."
247 % (self.filesystem_encoding,))
249 def call_os_listdir(path):
251 self.patch(os, 'listdir', call_os_listdir)
253 def call_sys_getfilesystemencoding():
254 return self.filesystem_encoding
255 self.patch(sys, 'getfilesystemencoding', call_sys_getfilesystemencoding)
258 filenames = listdir_unicode(u'/dummy')
260 self.failUnlessEqual(set([normalize(fname) for fname in filenames]),
264 class StdlibUnicode(unittest.TestCase):
265 """This mainly tests that some of the stdlib functions support Unicode paths, but also that
266 listdir_unicode works for valid filenames."""
268 def skip_if_cannot_represent_filename(self, u):
269 enc = get_filesystem_encoding()
270 if not unicode_platform():
273 except UnicodeEncodeError:
274 raise unittest.SkipTest("A non-ASCII filename could not be encoded on this platform.")
276 def test_mkdir_open_exists_abspath_listdir_expanduser(self):
277 self.skip_if_cannot_represent_filename(lumiere_nfc)
280 os.mkdir(lumiere_nfc)
281 except EnvironmentError, e:
282 raise unittest.SkipTest("%r\nIt is possible that the filesystem on which this test is being run "
283 "does not support Unicode, even though the platform does." % (e,))
285 fn = lumiere_nfc + u'/' + lumiere_nfc + u'.txt'
286 open(fn, 'wb').close()
287 self.failUnless(os.path.exists(fn))
288 self.failUnless(os.path.exists(os.path.join(os.getcwdu(), fn)))
289 filenames = listdir_unicode(lumiere_nfc)
291 # We only require that the listing includes a filename that is canonically equivalent
292 # to lumiere_nfc (on Mac OS X, it will be the NFD equivalent).
293 self.failUnlessIn(lumiere_nfc + ".txt", set([normalize(fname) for fname in filenames]))
295 expanded = fileutil.expanduser(u"~/" + lumiere_nfc)
296 self.failIfIn(u"~", expanded)
297 self.failUnless(expanded.endswith(lumiere_nfc), expanded)
299 def test_open_unrepresentable(self):
300 if unicode_platform():
301 raise unittest.SkipTest("This test is not applicable to platforms that represent filenames as Unicode.")
303 enc = get_filesystem_encoding()
307 raise unittest.SkipTest("This test cannot be run unless we know a filename that is not representable.")
308 except UnicodeEncodeError:
309 self.failUnlessRaises(UnicodeEncodeError, open, fn, 'wb')
312 class QuoteOutput(ReallyEqualMixin, unittest.TestCase):
316 def _check(self, inp, out, enc, optional_quotes, quote_newlines):
320 self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quote_newlines=quote_newlines), out)
321 self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
324 elif isinstance(inp, str):
325 self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quote_newlines=quote_newlines), out)
326 self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
328 self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc, quote_newlines=quote_newlines), out)
329 self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
331 def _test_quote_output_all(self, enc):
332 def check(inp, out, optional_quotes=False, quote_newlines=None):
333 self._check(inp, out, enc, optional_quotes, quote_newlines)
335 # optional single quotes
336 check("foo", "'foo'", True)
337 check("\\", "'\\'", True)
338 check("$\"`", "'$\"`'", True)
339 check("\n", "'\n'", True, quote_newlines=False)
341 # mandatory single quotes
346 check("\n", "\"\\x0a\"", quote_newlines=True)
347 check("\x00", "\"\\x00\"")
349 # invalid Unicode and astral planes
350 check(u"\uFDD0\uFDEF", "\"\\ufdd0\\ufdef\"")
351 check(u"\uDC00\uD800", "\"\\udc00\\ud800\"")
352 check(u"\uDC00\uD800\uDC00", "\"\\udc00\\U00010000\"")
353 check(u"\uD800\uDC00", "\"\\U00010000\"")
354 check(u"\uD800\uDC01", "\"\\U00010001\"")
355 check(u"\uD801\uDC00", "\"\\U00010400\"")
356 check(u"\uDBFF\uDFFF", "\"\\U0010ffff\"")
357 check(u"'\uDBFF\uDFFF", "\"'\\U0010ffff\"")
358 check(u"\"\uDBFF\uDFFF", "\"\\\"\\U0010ffff\"")
361 check("\xFF", "b\"\\xff\"")
362 check("\x00\"$\\`\x80\xFF", "b\"\\x00\\\"\\$\\\\\\`\\x80\\xff\"")
364 def test_quote_output_ascii(self, enc='ascii'):
365 def check(inp, out, optional_quotes=False, quote_newlines=None):
366 self._check(inp, out, enc, optional_quotes, quote_newlines)
368 self._test_quote_output_all(enc)
369 check(u"\u00D7", "\"\\xd7\"")
370 check(u"'\u00D7", "\"'\\xd7\"")
371 check(u"\"\u00D7", "\"\\\"\\xd7\"")
372 check(u"\u2621", "\"\\u2621\"")
373 check(u"'\u2621", "\"'\\u2621\"")
374 check(u"\"\u2621", "\"\\\"\\u2621\"")
375 check(u"\n", "'\n'", True, quote_newlines=False)
376 check(u"\n", "\"\\x0a\"", quote_newlines=True)
378 def test_quote_output_latin1(self, enc='latin1'):
379 def check(inp, out, optional_quotes=False, quote_newlines=None):
380 self._check(inp, out.encode('latin1'), enc, optional_quotes, quote_newlines)
382 self._test_quote_output_all(enc)
383 check(u"\u00D7", u"'\u00D7'", True)
384 check(u"'\u00D7", u"\"'\u00D7\"")
385 check(u"\"\u00D7", u"'\"\u00D7'")
386 check(u"\u00D7\"", u"'\u00D7\"'", True)
387 check(u"\u2621", u"\"\\u2621\"")
388 check(u"'\u2621", u"\"'\\u2621\"")
389 check(u"\"\u2621", u"\"\\\"\\u2621\"")
390 check(u"\n", u"'\n'", True, quote_newlines=False)
391 check(u"\n", u"\"\\x0a\"", quote_newlines=True)
393 def test_quote_output_utf8(self, enc='utf-8'):
394 def check(inp, out, optional_quotes=False, quote_newlines=None):
395 self._check(inp, out.encode('utf-8'), enc, optional_quotes, quote_newlines)
397 self._test_quote_output_all(enc)
398 check(u"\u2621", u"'\u2621'", True)
399 check(u"'\u2621", u"\"'\u2621\"")
400 check(u"\"\u2621", u"'\"\u2621'")
401 check(u"\u2621\"", u"'\u2621\"'", True)
402 check(u"\n", u"'\n'", True, quote_newlines=False)
403 check(u"\n", u"\"\\x0a\"", quote_newlines=True)
405 def test_quote_output_default(self):
406 self.patch(encodingutil, 'io_encoding', 'ascii')
407 self.test_quote_output_ascii(None)
409 self.patch(encodingutil, 'io_encoding', 'latin1')
410 self.test_quote_output_latin1(None)
412 self.patch(encodingutil, 'io_encoding', 'utf-8')
413 self.test_quote_output_utf8(None)
416 def win32_other(win32, other):
417 return win32 if sys.platform == "win32" else other
419 class QuotePaths(ReallyEqualMixin, unittest.TestCase):
420 def test_quote_path(self):
421 self.failUnlessReallyEqual(quote_path([u'foo', u'bar']), "'foo/bar'")
422 self.failUnlessReallyEqual(quote_path([u'foo', u'bar'], quotemarks=True), "'foo/bar'")
423 self.failUnlessReallyEqual(quote_path([u'foo', u'bar'], quotemarks=False), "foo/bar")
424 self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar']), '"foo/\\x0abar"')
425 self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar'], quotemarks=True), '"foo/\\x0abar"')
426 self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar'], quotemarks=False), '"foo/\\x0abar"')
428 self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo"),
429 win32_other("'C:\\foo'", "'\\\\?\\C:\\foo'"))
430 self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=True),
431 win32_other("'C:\\foo'", "'\\\\?\\C:\\foo'"))
432 self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=False),
433 win32_other("C:\\foo", "\\\\?\\C:\\foo"))
434 self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar"),
435 win32_other("'\\\\foo\\bar'", "'\\\\?\\UNC\\foo\\bar'"))
436 self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=True),
437 win32_other("'\\\\foo\\bar'", "'\\\\?\\UNC\\foo\\bar'"))
438 self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=False),
439 win32_other("\\\\foo\\bar", "\\\\?\\UNC\\foo\\bar"))
441 def test_quote_filepath(self):
442 foo_bar_fp = FilePath(win32_other(u'C:\\foo\\bar', u'/foo/bar'))
443 self.failUnlessReallyEqual(quote_filepath(foo_bar_fp),
444 win32_other("'C:\\foo\\bar'", "'/foo/bar'"))
445 self.failUnlessReallyEqual(quote_filepath(foo_bar_fp, quotemarks=True),
446 win32_other("'C:\\foo\\bar'", "'/foo/bar'"))
447 self.failUnlessReallyEqual(quote_filepath(foo_bar_fp, quotemarks=False),
448 win32_other("C:\\foo\\bar", "/foo/bar"))
450 foo_longfp = FilePath(u'\\\\?\\C:\\foo')
451 self.failUnlessReallyEqual(quote_filepath(foo_longfp),
452 win32_other("'C:\\foo'", "'\\\\?\\C:\\foo'"))
453 self.failUnlessReallyEqual(quote_filepath(foo_longfp, quotemarks=True),
454 win32_other("'C:\\foo'", "'\\\\?\\C:\\foo'"))
455 self.failUnlessReallyEqual(quote_filepath(foo_longfp, quotemarks=False),
456 win32_other("C:\\foo", "\\\\?\\C:\\foo"))
459 class FilePaths(ReallyEqualMixin, unittest.TestCase):
460 def test_to_filepath(self):
461 foo_u = win32_other(u'C:\\foo', u'/foo')
463 nosep_fp = to_filepath(foo_u)
464 sep_fp = to_filepath(foo_u + os.path.sep)
466 for fp in (nosep_fp, sep_fp):
467 self.failUnlessReallyEqual(fp, FilePath(foo_u))
468 self.failUnlessReallyEqual(fp.path, foo_u)
470 if sys.platform == "win32":
471 long_u = u'\\\\?\\C:\\foo'
472 longfp = to_filepath(long_u + u'\\')
473 self.failUnlessReallyEqual(longfp, FilePath(long_u))
474 self.failUnlessReallyEqual(longfp.path, long_u)
476 def test_extend_filepath(self):
477 foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo'))
478 foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo'))
479 foo_bar_baz_u = win32_other(u'C:\\foo\\bar\\baz', u'/foo/bar/baz')
481 for foo_fp in (foo_bfp, foo_ufp):
482 fp = extend_filepath(foo_fp, [u'bar', u'baz'])
483 self.failUnlessReallyEqual(fp, FilePath(foo_bar_baz_u))
484 self.failUnlessReallyEqual(fp.path, foo_bar_baz_u)
486 def test_unicode_from_filepath(self):
487 foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo'))
488 foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo'))
489 foo_u = win32_other(u'C:\\foo', u'/foo')
491 for foo_fp in (foo_bfp, foo_ufp):
492 self.failUnlessReallyEqual(unicode_from_filepath(foo_fp), foo_u)
494 def test_unicode_segments_from(self):
495 foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo'))
496 foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo'))
497 foo_bar_baz_bfp = FilePath(win32_other(b'C:\\foo\\bar\\baz', b'/foo/bar/baz'))
498 foo_bar_baz_ufp = FilePath(win32_other(u'C:\\foo\\bar\\baz', u'/foo/bar/baz'))
500 for foo_fp in (foo_bfp, foo_ufp):
501 for foo_bar_baz_fp in (foo_bar_baz_bfp, foo_bar_baz_ufp):
502 self.failUnlessReallyEqual(unicode_segments_from(foo_bar_baz_fp, foo_fp),
506 class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase):
507 uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
508 argv = 'lumi\xc3\xa8re'
510 filesystem_encoding = 'UTF-8'
511 io_encoding = 'UTF-8'
512 dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt']
514 class UbuntuKarmicLatin1(EncodingUtil, unittest.TestCase):
515 uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
518 filesystem_encoding = 'ISO-8859-1'
519 io_encoding = 'ISO-8859-1'
520 dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3']
522 class Windows(EncodingUtil, unittest.TestCase):
523 uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
524 argv = 'lumi\xc3\xa8re'
526 filesystem_encoding = 'mbcs'
527 io_encoding = 'utf-8'
528 dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
530 class MacOSXLeopard(EncodingUtil, unittest.TestCase):
531 uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
532 output = 'lumi\xc3\xa8re'
534 filesystem_encoding = 'utf-8'
535 io_encoding = 'UTF-8'
536 dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
538 class MacOSXLeopard7bit(EncodingUtil, unittest.TestCase):
539 uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
541 filesystem_encoding = 'utf-8'
542 io_encoding = 'US-ASCII'
543 dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
545 class OpenBSD(EncodingUtil, unittest.TestCase):
546 uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)'
547 platform = 'openbsd4'
548 filesystem_encoding = '646'
550 # Oops, I cannot write filenames containing non-ascii characters
553 class TestToFromStr(ReallyEqualMixin, unittest.TestCase):
554 def test_to_str(self):
555 self.failUnlessReallyEqual(to_str("foo"), "foo")
556 self.failUnlessReallyEqual(to_str("lumi\xc3\xa8re"), "lumi\xc3\xa8re")
557 self.failUnlessReallyEqual(to_str("\xFF"), "\xFF") # passes through invalid UTF-8 -- is this what we want?
558 self.failUnlessReallyEqual(to_str(u"lumi\u00E8re"), "lumi\xc3\xa8re")
559 self.failUnlessReallyEqual(to_str(None), None)
561 def test_from_utf8_or_none(self):
562 self.failUnlessRaises(AssertionError, from_utf8_or_none, u"foo")
563 self.failUnlessReallyEqual(from_utf8_or_none("lumi\xc3\xa8re"), u"lumi\u00E8re")
564 self.failUnlessReallyEqual(from_utf8_or_none(None), None)
565 self.failUnlessRaises(UnicodeDecodeError, from_utf8_or_none, "\xFF")