]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/test/test_encodingutil.py
test_encodingutil: fixes for Unix.
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / test / test_encodingutil.py
1
2 lumiere_nfc = u"lumi\u00E8re"
3 Artonwall_nfc = u"\u00C4rtonwall.mp3"
4 Artonwall_nfd = u"A\u0308rtonwall.mp3"
5
6 TEST_FILENAMES = (
7   Artonwall_nfc,
8   u'test_file',
9   u'Blah blah.txt',
10 )
11
12 # The following main helps to generate a test class for other operating
13 # systems.
14
15 if __name__ == "__main__":
16     import sys, os
17     import tempfile
18     import shutil
19     import platform
20
21     if len(sys.argv) != 2:
22         print "Usage: %s lumi<e-grave>re" % sys.argv[0]
23         sys.exit(1)
24
25     if sys.platform == "win32":
26         try:
27             from allmydata.windows.fixups import initialize
28         except ImportError:
29             print "set PYTHONPATH to the src directory"
30             sys.exit(1)
31         initialize()
32
33     print
34     print "class MyWeirdOS(EncodingUtil, unittest.TestCase):"
35     print "    uname = '%s'" % ' '.join(platform.uname())
36     print "    argv = %s" % repr(sys.argv[1])
37     print "    platform = '%s'" % sys.platform
38     print "    filesystem_encoding = '%s'" % sys.getfilesystemencoding()
39     print "    io_encoding = '%s'" % sys.stdout.encoding
40     try:
41         tmpdir = tempfile.mkdtemp()
42         for fname in TEST_FILENAMES:
43             open(os.path.join(tmpdir, fname), 'w').close()
44
45         # Use Unicode API under Windows or MacOS X
46         if sys.platform in ('win32', 'darwin'):
47             dirlist = os.listdir(unicode(tmpdir))
48         else:
49             dirlist = os.listdir(tmpdir)
50
51         print "    dirlist = %s" % repr(dirlist)
52     except:
53         print "    # Oops, I cannot write filenames containing non-ascii characters"
54     print
55
56     shutil.rmtree(tmpdir)
57     sys.exit(0)
58
59
60 import os, sys, locale
61
62 from twisted.trial import unittest
63
64 from twisted.python.filepath import FilePath
65
66 from allmydata.test.common_util import ReallyEqualMixin
67 from allmydata.util import encodingutil, fileutil
68 from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \
69     unicode_to_output, quote_output, quote_path, quote_local_unicode_path, \
70     quote_filepath, unicode_platform, listdir_unicode, FilenameEncodingError, \
71     get_io_encoding, get_filesystem_encoding, to_str, from_utf8_or_none, _reload, \
72     to_filepath, extend_filepath, unicode_from_filepath, unicode_segments_from
73 from allmydata.dirnode import normalize
74
75 from twisted.python import usage
76
77
78 class MockStdout(object):
79     pass
80
81 class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase):
82     def test_get_io_encoding(self):
83         mock_stdout = MockStdout()
84         self.patch(sys, 'stdout', mock_stdout)
85
86         mock_stdout.encoding = 'UTF-8'
87         _reload()
88         self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
89
90         mock_stdout.encoding = 'cp65001'
91         _reload()
92         self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
93
94         mock_stdout.encoding = 'koi8-r'
95         expected = sys.platform == "win32" and 'utf-8' or 'koi8-r'
96         _reload()
97         self.failUnlessReallyEqual(get_io_encoding(), expected)
98
99         mock_stdout.encoding = 'nonexistent_encoding'
100         if sys.platform == "win32":
101             _reload()
102             self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
103         else:
104             self.failUnlessRaises(AssertionError, _reload)
105
106     def test_get_io_encoding_not_from_stdout(self):
107         preferredencoding = 'koi8-r'
108         def call_locale_getpreferredencoding():
109             return preferredencoding
110         self.patch(locale, 'getpreferredencoding', call_locale_getpreferredencoding)
111         mock_stdout = MockStdout()
112         self.patch(sys, 'stdout', mock_stdout)
113
114         expected = sys.platform == "win32" and 'utf-8' or 'koi8-r'
115         _reload()
116         self.failUnlessReallyEqual(get_io_encoding(), expected)
117
118         mock_stdout.encoding = None
119         _reload()
120         self.failUnlessReallyEqual(get_io_encoding(), expected)
121
122         preferredencoding = None
123         _reload()
124         self.failUnlessReallyEqual(get_io_encoding(), 'utf-8')
125
126     def test_argv_to_unicode(self):
127         encodingutil.io_encoding = 'utf-8'
128         self.failUnlessRaises(usage.UsageError,
129                               argv_to_unicode,
130                               lumiere_nfc.encode('latin1'))
131
132     def test_unicode_to_output(self):
133         encodingutil.io_encoding = 'koi8-r'
134         self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc)
135
136     def test_no_unicode_normalization(self):
137         # Pretend to run on a Unicode platform.
138         # listdir_unicode normalized to NFC in 1.7beta, but now doesn't.
139
140         def call_os_listdir(path):
141             return [Artonwall_nfd]
142         self.patch(os, 'listdir', call_os_listdir)
143         self.patch(sys, 'platform', 'darwin')
144
145         _reload()
146         self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd])
147
148
149 # The following tests apply only to platforms that don't store filenames as
150 # Unicode entities on the filesystem.
151 class EncodingUtilNonUnicodePlatform(unittest.TestCase):
152     def setUp(self):
153         # Mock sys.platform because unicode_platform() uses it
154         self.original_platform = sys.platform
155         sys.platform = 'linux'
156
157     def tearDown(self):
158         sys.platform = self.original_platform
159         _reload()
160
161     def test_listdir_unicode(self):
162         # What happens if latin1-encoded filenames are encountered on an UTF-8
163         # filesystem?
164         def call_os_listdir(path):
165             return [
166               lumiere_nfc.encode('utf-8'),
167               lumiere_nfc.encode('latin1')
168             ]
169         self.patch(os, 'listdir', call_os_listdir)
170
171         sys_filesystemencoding = 'utf-8'
172         def call_sys_getfilesystemencoding():
173             return sys_filesystemencoding
174         self.patch(sys, 'getfilesystemencoding', call_sys_getfilesystemencoding)
175
176         _reload()
177         self.failUnlessRaises(FilenameEncodingError,
178                               listdir_unicode,
179                               u'/dummy')
180
181         # We're trying to list a directory whose name cannot be represented in
182         # the filesystem encoding.  This should fail.
183         sys_filesystemencoding = 'ascii'
184         _reload()
185         self.failUnlessRaises(FilenameEncodingError,
186                               listdir_unicode,
187                               u'/' + lumiere_nfc)
188
189
190 class EncodingUtil(ReallyEqualMixin):
191     def setUp(self):
192         self.original_platform = sys.platform
193         sys.platform = self.platform
194
195     def tearDown(self):
196         sys.platform = self.original_platform
197         _reload()
198
199     def test_argv_to_unicode(self):
200         if 'argv' not in dir(self):
201             return
202
203         mock_stdout = MockStdout()
204         mock_stdout.encoding = self.io_encoding
205         self.patch(sys, 'stdout', mock_stdout)
206
207         argu = lumiere_nfc
208         argv = self.argv
209         _reload()
210         self.failUnlessReallyEqual(argv_to_unicode(argv), argu)
211
212     def test_unicode_to_url(self):
213         self.failUnless(unicode_to_url(lumiere_nfc), "lumi\xc3\xa8re")
214
215     def test_unicode_to_output(self):
216         if 'argv' not in dir(self):
217             return
218
219         mock_stdout = MockStdout()
220         mock_stdout.encoding = self.io_encoding
221         self.patch(sys, 'stdout', mock_stdout)
222
223         _reload()
224         self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), self.argv)
225
226     def test_unicode_platform(self):
227         matrix = {
228           'linux2': False,
229           'linux3': False,
230           'openbsd4': False,
231           'win32':  True,
232           'darwin': True,
233         }
234
235         _reload()
236         self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform])
237
238     def test_listdir_unicode(self):
239         if 'dirlist' not in dir(self):
240             return
241
242         try:
243             u"test".encode(self.filesystem_encoding)
244         except (LookupError, AttributeError):
245             raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding "
246                                     "that we are testing for the benefit of a different platform."
247                                     % (self.filesystem_encoding,))
248
249         def call_os_listdir(path):
250             return self.dirlist
251         self.patch(os, 'listdir', call_os_listdir)
252
253         def call_sys_getfilesystemencoding():
254             return self.filesystem_encoding
255         self.patch(sys, 'getfilesystemencoding', call_sys_getfilesystemencoding)
256
257         _reload()
258         filenames = listdir_unicode(u'/dummy')
259
260         self.failUnlessEqual(set([normalize(fname) for fname in filenames]),
261                              set(TEST_FILENAMES))
262
263
264 class StdlibUnicode(unittest.TestCase):
265     """This mainly tests that some of the stdlib functions support Unicode paths, but also that
266     listdir_unicode works for valid filenames."""
267
268     def skip_if_cannot_represent_filename(self, u):
269         enc = get_filesystem_encoding()
270         if not unicode_platform():
271             try:
272                 u.encode(enc)
273             except UnicodeEncodeError:
274                 raise unittest.SkipTest("A non-ASCII filename could not be encoded on this platform.")
275
276     def test_mkdir_open_exists_abspath_listdir_expanduser(self):
277         self.skip_if_cannot_represent_filename(lumiere_nfc)
278
279         try:
280             os.mkdir(lumiere_nfc)
281         except EnvironmentError, e:
282             raise unittest.SkipTest("%r\nIt is possible that the filesystem on which this test is being run "
283                                     "does not support Unicode, even though the platform does." % (e,))
284
285         fn = lumiere_nfc + u'/' + lumiere_nfc + u'.txt'
286         open(fn, 'wb').close()
287         self.failUnless(os.path.exists(fn))
288         self.failUnless(os.path.exists(os.path.join(os.getcwdu(), fn)))
289         filenames = listdir_unicode(lumiere_nfc)
290
291         # We only require that the listing includes a filename that is canonically equivalent
292         # to lumiere_nfc (on Mac OS X, it will be the NFD equivalent).
293         self.failUnlessIn(lumiere_nfc + ".txt", set([normalize(fname) for fname in filenames]))
294
295         expanded = fileutil.expanduser(u"~/" + lumiere_nfc)
296         self.failIfIn(u"~", expanded)
297         self.failUnless(expanded.endswith(lumiere_nfc), expanded)
298
299     def test_open_unrepresentable(self):
300         if unicode_platform():
301             raise unittest.SkipTest("This test is not applicable to platforms that represent filenames as Unicode.")
302
303         enc = get_filesystem_encoding()
304         fn = u'\u2621.txt'
305         try:
306             fn.encode(enc)
307             raise unittest.SkipTest("This test cannot be run unless we know a filename that is not representable.")
308         except UnicodeEncodeError:
309             self.failUnlessRaises(UnicodeEncodeError, open, fn, 'wb')
310
311
312 class QuoteOutput(ReallyEqualMixin, unittest.TestCase):
313     def tearDown(self):
314         _reload()
315
316     def _check(self, inp, out, enc, optional_quotes, quote_newlines):
317         out2 = out
318         if optional_quotes:
319             out2 = out2[1:-1]
320         self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quote_newlines=quote_newlines), out)
321         self.failUnlessReallyEqual(quote_output(inp, encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
322         if out[0:2] == 'b"':
323             pass
324         elif isinstance(inp, str):
325             self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quote_newlines=quote_newlines), out)
326             self.failUnlessReallyEqual(quote_output(unicode(inp), encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
327         else:
328             self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc, quote_newlines=quote_newlines), out)
329             self.failUnlessReallyEqual(quote_output(inp.encode('utf-8'), encoding=enc, quotemarks=False, quote_newlines=quote_newlines), out2)
330
331     def _test_quote_output_all(self, enc):
332         def check(inp, out, optional_quotes=False, quote_newlines=None):
333             self._check(inp, out, enc, optional_quotes, quote_newlines)
334
335         # optional single quotes
336         check("foo",  "'foo'",  True)
337         check("\\",   "'\\'",   True)
338         check("$\"`", "'$\"`'", True)
339         check("\n",   "'\n'",   True, quote_newlines=False)
340
341         # mandatory single quotes
342         check("\"",   "'\"'")
343
344         # double quotes
345         check("'",    "\"'\"")
346         check("\n",   "\"\\x0a\"", quote_newlines=True)
347         check("\x00", "\"\\x00\"")
348
349         # invalid Unicode and astral planes
350         check(u"\uFDD0\uFDEF",       "\"\\ufdd0\\ufdef\"")
351         check(u"\uDC00\uD800",       "\"\\udc00\\ud800\"")
352         check(u"\uDC00\uD800\uDC00", "\"\\udc00\\U00010000\"")
353         check(u"\uD800\uDC00",       "\"\\U00010000\"")
354         check(u"\uD800\uDC01",       "\"\\U00010001\"")
355         check(u"\uD801\uDC00",       "\"\\U00010400\"")
356         check(u"\uDBFF\uDFFF",       "\"\\U0010ffff\"")
357         check(u"'\uDBFF\uDFFF",      "\"'\\U0010ffff\"")
358         check(u"\"\uDBFF\uDFFF",     "\"\\\"\\U0010ffff\"")
359
360         # invalid UTF-8
361         check("\xFF",                "b\"\\xff\"")
362         check("\x00\"$\\`\x80\xFF",  "b\"\\x00\\\"\\$\\\\\\`\\x80\\xff\"")
363
364     def test_quote_output_ascii(self, enc='ascii'):
365         def check(inp, out, optional_quotes=False, quote_newlines=None):
366             self._check(inp, out, enc, optional_quotes, quote_newlines)
367
368         self._test_quote_output_all(enc)
369         check(u"\u00D7",   "\"\\xd7\"")
370         check(u"'\u00D7",  "\"'\\xd7\"")
371         check(u"\"\u00D7", "\"\\\"\\xd7\"")
372         check(u"\u2621",   "\"\\u2621\"")
373         check(u"'\u2621",  "\"'\\u2621\"")
374         check(u"\"\u2621", "\"\\\"\\u2621\"")
375         check(u"\n",       "'\n'",      True, quote_newlines=False)
376         check(u"\n",       "\"\\x0a\"", quote_newlines=True)
377
378     def test_quote_output_latin1(self, enc='latin1'):
379         def check(inp, out, optional_quotes=False, quote_newlines=None):
380             self._check(inp, out.encode('latin1'), enc, optional_quotes, quote_newlines)
381
382         self._test_quote_output_all(enc)
383         check(u"\u00D7",   u"'\u00D7'", True)
384         check(u"'\u00D7",  u"\"'\u00D7\"")
385         check(u"\"\u00D7", u"'\"\u00D7'")
386         check(u"\u00D7\"", u"'\u00D7\"'", True)
387         check(u"\u2621",   u"\"\\u2621\"")
388         check(u"'\u2621",  u"\"'\\u2621\"")
389         check(u"\"\u2621", u"\"\\\"\\u2621\"")
390         check(u"\n",       u"'\n'", True, quote_newlines=False)
391         check(u"\n",       u"\"\\x0a\"", quote_newlines=True)
392
393     def test_quote_output_utf8(self, enc='utf-8'):
394         def check(inp, out, optional_quotes=False, quote_newlines=None):
395             self._check(inp, out.encode('utf-8'), enc, optional_quotes, quote_newlines)
396
397         self._test_quote_output_all(enc)
398         check(u"\u2621",   u"'\u2621'", True)
399         check(u"'\u2621",  u"\"'\u2621\"")
400         check(u"\"\u2621", u"'\"\u2621'")
401         check(u"\u2621\"", u"'\u2621\"'", True)
402         check(u"\n",       u"'\n'", True, quote_newlines=False)
403         check(u"\n",       u"\"\\x0a\"", quote_newlines=True)
404
405     def test_quote_output_default(self):
406         self.patch(encodingutil, 'io_encoding', 'ascii')
407         self.test_quote_output_ascii(None)
408
409         self.patch(encodingutil, 'io_encoding', 'latin1')
410         self.test_quote_output_latin1(None)
411
412         self.patch(encodingutil, 'io_encoding', 'utf-8')
413         self.test_quote_output_utf8(None)
414
415
416 def win32_other(win32, other):
417     return win32 if sys.platform == "win32" else other
418
419 class QuotePaths(ReallyEqualMixin, unittest.TestCase):
420     def test_quote_path(self):
421         self.failUnlessReallyEqual(quote_path([u'foo', u'bar']), "'foo/bar'")
422         self.failUnlessReallyEqual(quote_path([u'foo', u'bar'], quotemarks=True), "'foo/bar'")
423         self.failUnlessReallyEqual(quote_path([u'foo', u'bar'], quotemarks=False), "foo/bar")
424         self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar']), '"foo/\\x0abar"')
425         self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar'], quotemarks=True), '"foo/\\x0abar"')
426         self.failUnlessReallyEqual(quote_path([u'foo', u'\nbar'], quotemarks=False), '"foo/\\x0abar"')
427
428         self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo"),
429                                    win32_other("'C:\\foo'", "'\\\\?\\C:\\foo'"))
430         self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=True),
431                                    win32_other("'C:\\foo'", "'\\\\?\\C:\\foo'"))
432         self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\C:\\foo", quotemarks=False),
433                                    win32_other("C:\\foo", "\\\\?\\C:\\foo"))
434         self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar"),
435                                    win32_other("'\\\\foo\\bar'", "'\\\\?\\UNC\\foo\\bar'"))
436         self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=True),
437                                    win32_other("'\\\\foo\\bar'", "'\\\\?\\UNC\\foo\\bar'"))
438         self.failUnlessReallyEqual(quote_local_unicode_path(u"\\\\?\\UNC\\foo\\bar", quotemarks=False),
439                                    win32_other("\\\\foo\\bar", "\\\\?\\UNC\\foo\\bar"))
440
441     def test_quote_filepath(self):
442         foo_bar_fp = FilePath(win32_other(u'C:\\foo\\bar', u'/foo/bar'))
443         self.failUnlessReallyEqual(quote_filepath(foo_bar_fp),
444                                    win32_other("'C:\\foo\\bar'", "'/foo/bar'"))
445         self.failUnlessReallyEqual(quote_filepath(foo_bar_fp, quotemarks=True),
446                                    win32_other("'C:\\foo\\bar'", "'/foo/bar'"))
447         self.failUnlessReallyEqual(quote_filepath(foo_bar_fp, quotemarks=False),
448                                    win32_other("C:\\foo\\bar", "/foo/bar"))
449
450         if sys.platform == "win32":
451             foo_longfp = FilePath(u'\\\\?\\C:\\foo')
452             self.failUnlessReallyEqual(quote_filepath(foo_longfp),
453                                        "'C:\\foo'")
454             self.failUnlessReallyEqual(quote_filepath(foo_longfp, quotemarks=True),
455                                        "'C:\\foo'")
456             self.failUnlessReallyEqual(quote_filepath(foo_longfp, quotemarks=False),
457                                        "C:\\foo")
458
459
460 class FilePaths(ReallyEqualMixin, unittest.TestCase):
461     def test_to_filepath(self):
462         foo_u = win32_other(u'C:\\foo', u'/foo')
463
464         nosep_fp = to_filepath(foo_u)
465         sep_fp = to_filepath(foo_u + os.path.sep)
466
467         for fp in (nosep_fp, sep_fp):
468             self.failUnlessReallyEqual(fp, FilePath(foo_u))
469             if encodingutil.use_unicode_filepath:
470                 self.failUnlessReallyEqual(fp.path, foo_u)
471
472         if sys.platform == "win32":
473             long_u = u'\\\\?\\C:\\foo'
474             longfp = to_filepath(long_u + u'\\')
475             self.failUnlessReallyEqual(longfp, FilePath(long_u))
476             self.failUnlessReallyEqual(longfp.path, long_u)
477
478     def test_extend_filepath(self):
479         foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo'))
480         foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo'))
481         foo_bar_baz_u = win32_other(u'C:\\foo\\bar\\baz', u'/foo/bar/baz')
482
483         for foo_fp in (foo_bfp, foo_ufp):
484             fp = extend_filepath(foo_fp, [u'bar', u'baz'])
485             self.failUnlessReallyEqual(fp, FilePath(foo_bar_baz_u))
486             if encodingutil.use_unicode_filepath:
487                 self.failUnlessReallyEqual(fp.path, foo_bar_baz_u)
488
489     def test_unicode_from_filepath(self):
490         foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo'))
491         foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo'))
492         foo_u = win32_other(u'C:\\foo', u'/foo')
493
494         for foo_fp in (foo_bfp, foo_ufp):
495             self.failUnlessReallyEqual(unicode_from_filepath(foo_fp), foo_u)
496
497     def test_unicode_segments_from(self):
498         foo_bfp = FilePath(win32_other(b'C:\\foo', b'/foo'))
499         foo_ufp = FilePath(win32_other(u'C:\\foo', u'/foo'))
500         foo_bar_baz_bfp = FilePath(win32_other(b'C:\\foo\\bar\\baz', b'/foo/bar/baz'))
501         foo_bar_baz_ufp = FilePath(win32_other(u'C:\\foo\\bar\\baz', u'/foo/bar/baz'))
502
503         for foo_fp in (foo_bfp, foo_ufp):
504             for foo_bar_baz_fp in (foo_bar_baz_bfp, foo_bar_baz_ufp):
505                 self.failUnlessReallyEqual(unicode_segments_from(foo_bar_baz_fp, foo_fp),
506                                            [u'bar', u'baz'])
507
508
509 class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase):
510     uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
511     argv = 'lumi\xc3\xa8re'
512     platform = 'linux2'
513     filesystem_encoding = 'UTF-8'
514     io_encoding = 'UTF-8'
515     dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt']
516
517 class UbuntuKarmicLatin1(EncodingUtil, unittest.TestCase):
518     uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
519     argv = 'lumi\xe8re'
520     platform = 'linux2'
521     filesystem_encoding = 'ISO-8859-1'
522     io_encoding = 'ISO-8859-1'
523     dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3']
524
525 class Windows(EncodingUtil, unittest.TestCase):
526     uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
527     argv = 'lumi\xc3\xa8re'
528     platform = 'win32'
529     filesystem_encoding = 'mbcs'
530     io_encoding = 'utf-8'
531     dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
532
533 class MacOSXLeopard(EncodingUtil, unittest.TestCase):
534     uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
535     output = 'lumi\xc3\xa8re'
536     platform = 'darwin'
537     filesystem_encoding = 'utf-8'
538     io_encoding = 'UTF-8'
539     dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
540
541 class MacOSXLeopard7bit(EncodingUtil, unittest.TestCase):
542     uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
543     platform = 'darwin'
544     filesystem_encoding = 'utf-8'
545     io_encoding = 'US-ASCII'
546     dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
547
548 class OpenBSD(EncodingUtil, unittest.TestCase):
549     uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)'
550     platform = 'openbsd4'
551     filesystem_encoding = '646'
552     io_encoding = '646'
553     # Oops, I cannot write filenames containing non-ascii characters
554
555
556 class TestToFromStr(ReallyEqualMixin, unittest.TestCase):
557     def test_to_str(self):
558         self.failUnlessReallyEqual(to_str("foo"), "foo")
559         self.failUnlessReallyEqual(to_str("lumi\xc3\xa8re"), "lumi\xc3\xa8re")
560         self.failUnlessReallyEqual(to_str("\xFF"), "\xFF")  # passes through invalid UTF-8 -- is this what we want?
561         self.failUnlessReallyEqual(to_str(u"lumi\u00E8re"), "lumi\xc3\xa8re")
562         self.failUnlessReallyEqual(to_str(None), None)
563
564     def test_from_utf8_or_none(self):
565         self.failUnlessRaises(AssertionError, from_utf8_or_none, u"foo")
566         self.failUnlessReallyEqual(from_utf8_or_none("lumi\xc3\xa8re"), u"lumi\u00E8re")
567         self.failUnlessReallyEqual(from_utf8_or_none(None), None)
568         self.failUnlessRaises(UnicodeDecodeError, from_utf8_or_none, "\xFF")