]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/test/test_encodingutil.py
Fix a corner case for to_filepath on Windows to make it consistent with Unix.
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / test / test_encodingutil.py
1
2 lumiere_nfc = u"lumi\u00E8re"
3 Artonwall_nfc = u"\u00C4rtonwall.mp3"
4 Artonwall_nfd = u"A\u0308rtonwall.mp3"
5
6 TEST_FILENAMES = (
7   Artonwall_nfc,
8   u'test_file',
9   u'Blah blah.txt',
10 )
11
12 # The following main helps to generate a test class for other operating
13 # systems.
14
15 if __name__ == "__main__":
16     import sys, os
17     import tempfile
18     import shutil
19     import platform
20
21     if len(sys.argv) != 2:
22         print "Usage: %s lumi<e-grave>re" % sys.argv[0]
23         sys.exit(1)
24
25     if sys.platform == "win32":
26         try:
27             from allmydata.windows.fixups import initialize
28         except ImportError:
29             print "set PYTHONPATH to the src directory"
30             sys.exit(1)
31         initialize()
32
33     print
34     print "class MyWeirdOS(EncodingUtil, unittest.TestCase):"
35     print "    uname = '%s'" % ' '.join(platform.uname())
36     print "    argv = %s" % repr(sys.argv[1])
37     print "    platform = '%s'" % sys.platform
38     print "    filesystem_encoding = '%s'" % sys.getfilesystemencoding()
39     print "    io_encoding = '%s'" % sys.stdout.encoding
40     try:
41         tmpdir = tempfile.mkdtemp()
42         for fname in TEST_FILENAMES:
43             open(os.path.join(tmpdir, fname), 'w').close()
44
45         # Use Unicode API under Windows or MacOS X
46         if sys.platform in ('win32', 'darwin'):
47             dirlist = os.listdir(unicode(tmpdir))
48         else:
49             dirlist = os.listdir(tmpdir)
50
51         print "    dirlist = %s" % repr(dirlist)
52     except:
53         print "    # Oops, I cannot write filenames containing non-ascii characters"
54     print
55
56     shutil.rmtree(tmpdir)
57     sys.exit(0)
58
59
60 import os, sys, locale
61
62 from twisted.trial import unittest
63
64 from twisted.python.filepath import FilePath
65
66 from allmydata.test.common_util import ReallyEqualMixin
67 from allmydata.util import encodingutil, fileutil
68 from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \
69     unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \
70     quote_filepath, unicode_platform, listdir_unicode, FilenameEncodingError, \
71     get_io_encoding, get_filesystem_encoding, to_str, from_utf8_or_none, _reload, \
72     to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from
73 from allmydata.dirnode import normalize
74
75 from twisted.python import usage
76
77
78 class MockStdout(object):
79     pass
80
81 class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase):
82     def test_get_io_encoding(self):
83         mock_stdout = MockStdout()
84         self.patch(sys, 'stdout', mock_stdout)
85
86         mock_stdout.encoding = 'UTF-8'
87         _reload()
88         self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
89
90         mock_stdout.encoding = 'cp65001'
91         _reload()
92         self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
93
94         mock_stdout.encoding = 'koi8-r'
95         expected = sys.platform == "win32" and 'utf-8' or 'koi8-r'
96         _reload()
97         self.failUnlessReallyEqual(get_io_encoding(), expected)
98
99         mock_stdout.encoding = 'nonexistent_encoding'
100         if sys.platform == "win32":
101             _reload()
102             self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
103         else:
104             self.failUnlessRaises(AssertionError, _reload)
105
106     def test_get_io_encoding_not_from_stdout(self):
107         preferredencoding = 'koi8-r'
108         def call_locale_getpreferredencoding():
109             return preferredencoding
110         self.patch(locale, 'getpreferredencoding', call_locale_getpreferredencoding)
111         mock_stdout = MockStdout()
112         self.patch(sys, 'stdout', mock_stdout)
113
114         expected = sys.platform == "win32" and 'utf-8' or 'koi8-r'
115         _reload()
116         self.failUnlessReallyEqual(get_io_encoding(), expected)
117
118         mock_stdout.encoding = None
119         _reload()
120         self.failUnlessReallyEqual(get_io_encoding(), expected)
121
122         preferredencoding = None
123         _reload()
124         self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
125
126     def test_argv_to_unicode(self):
127         encodingutil.io_encoding = 'utf-8'
128         self.failUnlessRaises(usage.UsageError,
129                               argv_to_unicode,
130                               lumiere_nfc.encode('latin1'))
131
132     def test_unicode_to_output(self):
133         encodingutil.io_encoding = 'koi8-r'
134         self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc)
135
136     def test_no_unicode_normalization(self):
137         # Pretend to run on a Unicode platform.
138         # listdir_unicode normalized to NFC in 1.7beta, but now doesn't.
139
140         def call_os_listdir(path):
141             return [Artonwall_nfd]
142         self.patch(os, 'listdir', call_os_listdir)
143         self.patch(sys, 'platform', 'darwin')
144
145         _reload()
146         self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd])
147
148
149 # The following tests apply only to platforms that don't store filenames as
150 # Unicode entities on the filesystem.
151 class EncodingUtilNonUnicodePlatform(unittest.TestCase):
152     def setUp(self):
153         # Mock sys.platform because unicode_platform() uses it
154         self.original_platform = sys.platform
155         sys.platform = 'linux'
156
157     def tearDown(self):
158         sys.platform = self.original_platform
159         _reload()
160
161     def test_listdir_unicode(self):
162         # What happens if latin1-encoded filenames are encountered on an UTF-8
163         # filesystem?
164         def call_os_listdir(path):
165             return [
166               lumiere_nfc.encode('utf-8'),
167               lumiere_nfc.encode('latin1')
168             ]
169         self.patch(os, 'listdir', call_os_listdir)
170
171         sys_filesystemencoding = 'utf-8'
172         def call_sys_getfilesystemencoding():
173             return sys_filesystemencoding
174         self.patch(sys, 'getfilesystemencoding', call_sys_getfilesystemencoding)
175
176         _reload()
177         self.failUnlessRaises(FilenameEncodingError,
178                               listdir_unicode,
179                               u'/dummy')
180
181         # We're trying to list a directory whose name cannot be represented in
182         # the filesystem encoding.  This should fail.
183         sys_filesystemencoding = 'ascii'
184         _reload()
185         self.failUnlessRaises(FilenameEncodingError,
186                               listdir_unicode,
187                               u'/' + lumiere_nfc)
188
189
190 class EncodingUtil(ReallyEqualMixin):
191     def setUp(self):
192         self.original_platform = sys.platform
193         sys.platform = self.platform
194
195     def tearDown(self):
196         sys.platform = self.original_platform
197         _reload()
198
199     def test_argv_to_unicode(self):
200         if 'argv' not in dir(self):
201             return
202
203         mock_stdout = MockStdout()
204         mock_stdout.encoding = self.io_encoding
205         self.patch(sys, 'stdout', mock_stdout)
206
207         argu = lumiere_nfc
208         argv = self.argv
209         _reload()
210         self.failUnlessReallyEqual(argv_to_unicode(argv), argu)
211
212     def test_unicode_to_url(self):
213         self.failUnless(unicode_to_url(lumiere_nfc), "lumi\xc3\xa8re")
214
215     def test_unicode_to_output(self):
216         if 'argv' not in dir(self):
217             return
218
219         mock_stdout = MockStdout()
220         mock_stdout.encoding = self.io_encoding
221         self.patch(sys, 'stdout', mock_stdout)
222
223         _reload()
224         self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), self.argv)
225
226     def test_unicode_platform(self):
227         matrix = {
228           'linux2': False,
229           'linux3': False,
230           'openbsd4': False,
231           'win32':  True,
232           'darwin': True,
233         }
234
235         _reload()
236         self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform])
237
238     def test_listdir_unicode(self):
239         if 'dirlist' not in dir(self):
240             return
241
242         try:
243             u"test".encode(self.filesystem_encoding)
244         except (LookupError, AttributeError):
245             raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding "
246                                     "that we are testing for the benefit of a different platform."
247                                     % (self.filesystem_encoding,))
248
249         def call_os_listdir(path):
250             return self.dirlist
251         self.patch(os, 'listdir', call_os_listdir)
252
253         def call_sys_getfilesystemencoding():
254             return self.filesystem_encoding
255         self.patch(sys, 'getfilesystemencoding', call_sys_getfilesystemencoding)
256
257         _reload()
258         filenames = listdir_unicode(u'/dummy')
259
260         self.failUnlessEqual(set([normalize(fname) for fname in filenames]),
261                              set(TEST_FILENAMES))
262
263
264 class StdlibUnicode(unittest.TestCase):
265     """This mainly tests that some of the stdlib functions support Unicode paths, but also that
266     listdir_unicode works for valid filenames."""
267
268     def skip_if_cannot_represent_filename(self, u):
269         enc = get_filesystem_encoding()
270         if not unicode_platform():
271             try:
272                 u.encode(enc)
273             except UnicodeEncodeError:
274                 raise unittest.SkipTest("A non-ASCII filename could not be encoded on this platform.")
275
276     def test_mkdir_open_exists_abspath_listdir_expanduser(self):
277         self.skip_if_cannot_represent_filename(lumiere_nfc)
278
279         try:
280             os.mkdir(lumiere_nfc)
281         except EnvironmentError, e:
282             raise unittest.SkipTest("%r\nIt is possible that the filesystem on which this test is being run "
283                                     "does not support Unicode, even though the platform does." % (e,))
284
285         fn = lumiere_nfc + u'/' + lumiere_nfc + u'.txt'
286         open(fn, 'wb').close()
287         self.failUnless(os.path.exists(fn))
288         self.failUnless(os.path.exists(os.path.join(os.getcwdu(), fn)))
289         filenames = listdir_unicode(lumiere_nfc)
290
291         # We only require that the listing includes a filename that is canonically equivalent
292         # to lumiere_nfc (on Mac OS X, it will be the NFD equivalent).
293         self.failUnlessIn(lumiere_nfc + ".txt", set([normalize(fname) for fname in filenames]))
294
295         expanded = fileutil.expanduser(u"~/" + lumiere_nfc)
296         self.failIfIn(u"~", expanded)
297         self.failUnless(expanded.endswith(lumiere_nfc), expanded)
298
299     def test_open_unrepresentable(self):
300         if unicode_platform():
301             raise unittest.SkipTest("This test is not applicable to platforms that represent filenames as Unicode.")
302
303         enc = get_filesystem_encoding()
304         fn = u'\u2621.txt'
305         try:
306             fn.encode(enc)
307             raise unittest.SkipTest("This test cannot be run unless we know a filename that is not representable.")
308         except UnicodeEncodeError:
309             self.failUnlessRaises(UnicodeEncodeError, open, fn, 'wb')
310
311
312 class QuoteOutput(ReallyEqualMixin, unittest.TestCase):
313     def tearDown(self):
314         _reload()
315
316     def _check(self, inp, out, enc, optional_quotes, quote_newlines):
317         out2 = out
318         if optional_quotes:
319             out2 = out2[1:-1]
320         self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quote_newlines=quote_newlines), out)
321         self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
322         if out[0:2] == 'b"':
323             pass
324         elif isinstance(inp, str):
325             self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quote_newlines=quote_newlines), out)
326             self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
327         else:
328             self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc, quote_newlines=quote_newlines), out)
329             self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
330
331     def _test_quote_output_all(self, enc):
332         def check(inp, out, optional_quotes=False, quote_newlines=None):
333             self._check(inp, out, enc, optional_quotes, quote_newlines)
334
335         # optional single quotes
336         check("foo",  "'foo'",  True)
337         check("\\",   "'\\'",   True)
338         check("$\"`", "'$\"`'", True)
339         check("\n",   "'\n'",   True, quote_newlines=False)
340
341         # mandatory single quotes
342         check("\"",   "'\"'")
343
344         # double quotes
345         check("'",    "\"'\"")
346         check("\n",   "\"\\x0a\"", quote_newlines=True)
347         check("\x00", "\"\\x00\"")
348
349         # invalid Unicode and astral planes
350         check(u"\uFDD0\uFDEF",       "\"\\ufdd0\\ufdef\"")
351         check(u"\uDC00\uD800",       "\"\\udc00\\ud800\"")
352         check(u"\uDC00\uD800\uDC00", "\"\\udc00\\U00010000\"")
353         check(u"\uD800\uDC00",       "\"\\U00010000\"")
354         check(u"\uD800\uDC01",       "\"\\U00010001\"")
355         check(u"\uD801\uDC00",       "\"\\U00010400\"")
356         check(u"\uDBFF\uDFFF",       "\"\\U0010ffff\"")
357         check(u"'\uDBFF\uDFFF",      "\"'\\U0010ffff\"")
358         check(u"\"\uDBFF\uDFFF",     "\"\\\"\\U0010ffff\"")
359
360         # invalid UTF-8
361         check("\xFF",                "b\"\\xff\"")
362         check("\x00\"$\\`\x80\xFF",  "b\"\\x00\\\"\\$\\\\\\`\\x80\\xff\"")
363
364     def test_quote_output_ascii(self, enc='ascii'):
365         def check(inp, out, optional_quotes=False, quote_newlines=None):
366             self._check(inp, out, enc, optional_quotes, quote_newlines)
367
368         self._test_quote_output_all(enc)
369         check(u"\u00D7",   "\"\\xd7\"")
370         check(u"'\u00D7",  "\"'\\xd7\"")
371         check(u"\"\u00D7", "\"\\\"\\xd7\"")
372         check(u"\u2621",   "\"\\u2621\"")
373         check(u"'\u2621",  "\"'\\u2621\"")
374         check(u"\"\u2621", "\"\\\"\\u2621\"")
375         check(u"\n",       "'\n'",      True, quote_newlines=False)
376         check(u"\n",       "\"\\x0a\"", quote_newlines=True)
377
378     def test_quote_output_latin1(self, enc='latin1'):
379         def check(inp, out, optional_quotes=False, quote_newlines=None):
380             self._check(inp, out.encode('latin1'), enc, optional_quotes, quote_newlines)
381
382         self._test_quote_output_all(enc)
383         check(u"\u00D7",   u"'\u00D7'", True)
384         check(u"'\u00D7",  u"\"'\u00D7\"")
385         check(u"\"\u00D7", u"'\"\u00D7'")
386         check(u"\u00D7\"", u"'\u00D7\"'", True)
387         check(u"\u2621",   u"\"\\u2621\"")
388         check(u"'\u2621",  u"\"'\\u2621\"")
389         check(u"\"\u2621", u"\"\\\"\\u2621\"")
390         check(u"\n",       u"'\n'", True, quote_newlines=False)
391         check(u"\n",       u"\"\\x0a\"", quote_newlines=True)
392
393     def test_quote_output_utf8(self, enc='utf-8'):
394         def check(inp, out, optional_quotes=False, quote_newlines=None):
395             self._check(inp, out.encode('utf-8'), enc, optional_quotes, quote_newlines)
396
397         self._test_quote_output_all(enc)
398         check(u"\u2621",   u"'\u2621'", True)
399         check(u"'\u2621",  u"\"'\u2621\"")
400         check(u"\"\u2621", u"'\"\u2621'")
401         check(u"\u2621\"", u"'\u2621\"'", True)
402         check(u"\n",       u"'\n'", True, quote_newlines=False)
403         check(u"\n",       u"\"\\x0a\"", quote_newlines=True)
404
405     def test_quote_output_default(self):
406         self.patch(encodingutil, 'io_encoding', 'ascii')
407         self.test_quote_output_ascii(None)
408
409         self.patch(encodingutil, 'io_encoding', 'latin1')
410         self.test_quote_output_latin1(None)
411
412         self.patch(encodingutil, 'io_encoding', 'utf-8')
413         self.test_quote_output_utf8(None)
414
415
416 def win32_other(win32, other):
417     return win32 if sys.platform == "win32" else other
418
419 class QuotePaths(ReallyEqualMixin, unittest.TestCase):
420     def test_quote_path(self):
421         self.failUnlessReallyEqual(quote_path([u'foo', u'bar']), "'foo/bar'")
422         self.failUnlessReallyEqual(quote_path([u'foo', u'bar'], quotemarks=True), "'foo/bar'")
423         self.failUnlessReallyEqual(quote_path([u'foo', u'bar'], quotemarks=False), "foo/bar")
424         self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar']), '"foo/\\x0abar"')
425         self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar'], quotemarks=True), '"foo/\\x0abar"')
426         self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar'], quotemarks=False), '"foo/\\x0abar"')
427
428         self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo"),
429                                    win32_other("'C:\\foo'", "'\\\\?\\C:\\foo'"))
430         self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=True),
431                                    win32_other("'C:\\foo'", "'\\\\?\\C:\\foo'"))
432         self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=False),
433                                    win32_other("C:\\foo", "\\\\?\\C:\\foo"))
434         self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar"),
435                                    win32_other("'\\\\foo\\bar'", "'\\\\?\\UNC\\foo\\bar'"))
436         self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=True),
437                                    win32_other("'\\\\foo\\bar'", "'\\\\?\\UNC\\foo\\bar'"))
438         self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=False),
439                                    win32_other("\\\\foo\\bar", "\\\\?\\UNC\\foo\\bar"))
440
441     def test_quote_filepath(self):
442         foo_bar_fp = FilePath(win32_other(u'C:\\foo\\bar', u'/foo/bar'))
443         self.failUnlessReallyEqual(quote_filepath(foo_bar_fp),
444                                    win32_other("'C:\\foo\\bar'", "'/foo/bar'"))
445         self.failUnlessReallyEqual(quote_filepath(foo_bar_fp, quotemarks=True),
446                                    win32_other("'C:\\foo\\bar'", "'/foo/bar'"))
447         self.failUnlessReallyEqual(quote_filepath(foo_bar_fp, quotemarks=False),
448                                    win32_other("C:\\foo\\bar", "/foo/bar"))
449
450         foo_longfp = FilePath(u'\\\\?\\C:\\foo')
451         self.failUnlessReallyEqual(quote_filepath(foo_longfp),
452                                    win32_other("'C:\\foo'", "'\\\\?\\C:\\foo'"))
453         self.failUnlessReallyEqual(quote_filepath(foo_longfp, quotemarks=True),
454                                    win32_other("'C:\\foo'", "'\\\\?\\C:\\foo'"))
455         self.failUnlessReallyEqual(quote_filepath(foo_longfp, quotemarks=False),
456                                    win32_other("C:\\foo", "\\\\?\\C:\\foo"))
457
458
459 class FilePaths(ReallyEqualMixin, unittest.TestCase):
460     def test_to_filepath(self):
461         foo_u = win32_other(u'C:\\foo', u'/foo')
462
463         nosep_fp = to_filepath(foo_u)
464         sep_fp = to_filepath(foo_u + os.path.sep)
465
466         for fp in (nosep_fp, sep_fp):
467             self.failUnlessReallyEqual(fp, FilePath(foo_u))
468             self.failUnlessReallyEqual(fp.path, foo_u)
469
470         if sys.platform == "win32":
471             long_u = u'\\\\?\\C:\\foo'
472             longfp = to_filepath(long_u + u'\\')
473             self.failUnlessReallyEqual(longfp, FilePath(long_u))
474             self.failUnlessReallyEqual(longfp.path, long_u)
475
476     def test_extend_filepath(self):
477         foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo'))
478         foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo'))
479         foo_bar_baz_u = win32_other(u'C:\\foo\\bar\\baz', u'/foo/bar/baz')
480
481         for foo_fp in (foo_bfp, foo_ufp):
482             fp = extend_filepath(foo_fp, [u'bar', u'baz'])
483             self.failUnlessReallyEqual(fp, FilePath(foo_bar_baz_u))
484             self.failUnlessReallyEqual(fp.path, foo_bar_baz_u)
485
486     def test_unicode_from_filepath(self):
487         foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo'))
488         foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo'))
489         foo_u = win32_other(u'C:\\foo', u'/foo')
490
491         for foo_fp in (foo_bfp, foo_ufp):
492             self.failUnlessReallyEqual(unicode_from_filepath(foo_fp), foo_u)
493
494     def test_unicode_segments_from(self):
495         foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo'))
496         foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo'))
497         foo_bar_baz_bfp = FilePath(win32_other(b'C:\\foo\\bar\\baz', b'/foo/bar/baz'))
498         foo_bar_baz_ufp = FilePath(win32_other(u'C:\\foo\\bar\\baz', u'/foo/bar/baz'))
499
500         for foo_fp in (foo_bfp, foo_ufp):
501             for foo_bar_baz_fp in (foo_bar_baz_bfp, foo_bar_baz_ufp):
502                 self.failUnlessReallyEqual(unicode_segments_from(foo_bar_baz_fp, foo_fp),
503                                            [u'bar', u'baz'])
504
505
506 class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase):
507     uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
508     argv = 'lumi\xc3\xa8re'
509     platform = 'linux2'
510     filesystem_encoding = 'UTF-8'
511     io_encoding = 'UTF-8'
512     dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt']
513
514 class UbuntuKarmicLatin1(EncodingUtil, unittest.TestCase):
515     uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
516     argv = 'lumi\xe8re'
517     platform = 'linux2'
518     filesystem_encoding = 'ISO-8859-1'
519     io_encoding = 'ISO-8859-1'
520     dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3']
521
522 class Windows(EncodingUtil, unittest.TestCase):
523     uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
524     argv = 'lumi\xc3\xa8re'
525     platform = 'win32'
526     filesystem_encoding = 'mbcs'
527     io_encoding = 'utf-8'
528     dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
529
530 class MacOSXLeopard(EncodingUtil, unittest.TestCase):
531     uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
532     output = 'lumi\xc3\xa8re'
533     platform = 'darwin'
534     filesystem_encoding = 'utf-8'
535     io_encoding = 'UTF-8'
536     dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
537
538 class MacOSXLeopard7bit(EncodingUtil, unittest.TestCase):
539     uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
540     platform = 'darwin'
541     filesystem_encoding = 'utf-8'
542     io_encoding = 'US-ASCII'
543     dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
544
545 class OpenBSD(EncodingUtil, unittest.TestCase):
546     uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)'
547     platform = 'openbsd4'
548     filesystem_encoding = '646'
549     io_encoding = '646'
550     # Oops, I cannot write filenames containing non-ascii characters
551
552
553 class TestToFromStr(ReallyEqualMixin, unittest.TestCase):
554     def test_to_str(self):
555         self.failUnlessReallyEqual(to_str("foo"), "foo")
556         self.failUnlessReallyEqual(to_str("lumi\xc3\xa8re"), "lumi\xc3\xa8re")
557         self.failUnlessReallyEqual(to_str("\xFF"), "\xFF")  # passes through invalid UTF-8 -- is this what we want?
558         self.failUnlessReallyEqual(to_str(u"lumi\u00E8re"), "lumi\xc3\xa8re")
559         self.failUnlessReallyEqual(to_str(None), None)
560
561     def test_from_utf8_or_none(self):
562         self.failUnlessRaises(AssertionError, from_utf8_or_none, u"foo")
563         self.failUnlessReallyEqual(from_utf8_or_none("lumi\xc3\xa8re"), u"lumi\u00E8re")
564         self.failUnlessReallyEqual(from_utf8_or_none(None), None)
565         self.failUnlessRaises(UnicodeDecodeError, from_utf8_or_none, "\xFF")