2 lumiere_nfc = u"lumi\u00E8re"
3 Artonwall_nfc = u"\u00C4rtonwall.mp3"
4 Artonwall_nfd = u"A\u0308rtonwall.mp3"
12 # The following main helps to generate a test class for other operating
15 if __name__ == "__main__":
21 if len(sys.argv) != 2:
22 print "Usage: %s lumi<e-grave>re" % sys.argv[0]
26 print "class MyWeirdOS(EncodingUtil, unittest.TestCase):"
27 print " uname = '%s'" % ' '.join(platform.uname())
28 if sys.platform != "win32":
29 print " argv = %s" % repr(sys.argv[1])
30 print " platform = '%s'" % sys.platform
31 print " filesystem_encoding = '%s'" % sys.getfilesystemencoding()
32 print " output_encoding = '%s'" % sys.stdout.encoding
33 print " argv_encoding = '%s'" % (sys.platform == "win32" and 'ascii' or sys.stdout.encoding)
36 tmpdir = tempfile.mkdtemp()
37 for fname in TEST_FILENAMES:
38 open(os.path.join(tmpdir, fname), 'w').close()
40 # Use Unicode API under Windows or MacOS X
41 if sys.platform in ('win32', 'darwin'):
42 dirlist = os.listdir(unicode(tmpdir))
44 dirlist = os.listdir(tmpdir)
46 print " dirlist = %s" % repr(dirlist)
48 print " # Oops, I cannot write filenames containing non-ascii characters"
54 from twisted.trial import unittest
55 from mock import patch
56 import os, sys, locale
58 from allmydata.test.common_util import ReallyEqualMixin
59 from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \
60 unicode_to_output, unicode_platform, listdir_unicode, FilenameEncodingError, \
61 get_output_encoding, get_filesystem_encoding, _reload
62 from allmydata.dirnode import normalize
64 from twisted.python import usage
66 class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase):
71 def test_get_output_encoding(self, mock_stdout):
72 mock_stdout.encoding = 'UTF-8'
74 self.failUnlessReallyEqual(get_output_encoding(), 'utf-8')
76 mock_stdout.encoding = 'cp65001'
78 self.failUnlessReallyEqual(get_output_encoding(), 'utf-8')
80 mock_stdout.encoding = 'koi8-r'
82 self.failUnlessReallyEqual(get_output_encoding(), 'koi8-r')
84 mock_stdout.encoding = 'nonexistent_encoding'
85 self.failUnlessRaises(AssertionError, _reload)
87 @patch('locale.getpreferredencoding')
88 def test_get_output_encoding_not_from_stdout(self, mock_locale_getpreferredencoding):
89 locale # hush pyflakes
90 mock_locale_getpreferredencoding.return_value = 'koi8-r'
94 old_stdout = sys.stdout
95 sys.stdout = DummyStdout()
98 self.failUnlessReallyEqual(get_output_encoding(), 'koi8-r')
100 sys.stdout.encoding = None
102 self.failUnlessReallyEqual(get_output_encoding(), 'koi8-r')
104 mock_locale_getpreferredencoding.return_value = None
106 self.failUnlessReallyEqual(get_output_encoding(), 'utf-8')
108 sys.stdout = old_stdout
111 def test_argv_to_unicode(self, mock):
112 mock.encoding = 'utf-8'
115 self.failUnlessRaises(usage.UsageError,
117 lumiere_nfc.encode('latin1'))
120 def test_unicode_to_output(self, mock):
121 # Encoding koi8-r cannot represent e-grave
122 mock.encoding = 'koi8-r'
124 self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc)
127 def test_no_unicode_normalization(self, mock):
128 # Pretend to run on a Unicode platform.
129 # We normalized to NFC in 1.7beta, but we now don't.
130 orig_platform = sys.platform
132 sys.platform = 'darwin'
133 mock.return_value = [Artonwall_nfd]
135 self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd])
137 sys.platform = orig_platform
139 # The following tests apply only to platforms that don't store filenames as
140 # Unicode entities on the filesystem.
141 class EncodingUtilNonUnicodePlatform(unittest.TestCase):
143 # Mock sys.platform because unicode_platform() uses it
144 self.original_platform = sys.platform
145 sys.platform = 'linux'
148 sys.platform = self.original_platform
151 @patch('sys.getfilesystemencoding')
153 def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
154 # What happens if latin1-encoded filenames are encountered on an UTF-8
156 mock_listdir.return_value = [
157 lumiere_nfc.encode('utf-8'),
158 lumiere_nfc.encode('latin1')]
160 mock_getfilesystemencoding.return_value = 'utf-8'
162 self.failUnlessRaises(FilenameEncodingError,
166 # We're trying to list a directory whose name cannot be represented in
167 # the filesystem encoding. This should fail.
168 mock_getfilesystemencoding.return_value = 'ascii'
170 self.failUnlessRaises(FilenameEncodingError,
174 class EncodingUtil(ReallyEqualMixin):
176 # Mock sys.platform because unicode_platform() uses it
177 self.original_platform = sys.platform
178 sys.platform = self.platform
181 sys.platform = self.original_platform
185 def test_argv_to_unicode(self, mock):
186 if 'argv' not in dir(self):
189 mock.encoding = self.output_encoding
193 self.failUnlessReallyEqual(argv_to_unicode(argv), argu)
195 def test_unicode_to_url(self):
196 self.failUnless(unicode_to_url(lumiere_nfc), "lumi\xc3\xa8re")
199 def test_unicode_to_output(self, mock):
200 if 'output' not in dir(self):
203 mock.encoding = self.output_encoding
205 self.failUnlessReallyEqual(unicode_to_output(lumiere_nfc), self.output)
207 def test_unicode_platform(self):
216 self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform])
218 @patch('sys.getfilesystemencoding')
220 def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
221 if 'dirlist' not in dir(self):
225 u"test".encode(self.filesystem_encoding)
226 except (LookupError, AttributeError):
227 raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding "
228 "that we are testing for the benefit of a different platform."
229 % (self.filesystem_encoding,))
231 mock_listdir.return_value = self.dirlist
232 mock_getfilesystemencoding.return_value = self.filesystem_encoding
235 filenames = listdir_unicode(u'/dummy')
237 self.failUnlessEqual(set([normalize(fname) for fname in filenames]),
241 class StdlibUnicode(unittest.TestCase):
242 """This mainly tests that some of the stdlib functions support Unicode paths, but also that
243 listdir_unicode works for valid filenames."""
245 def skip_if_cannot_represent_filename(self, u):
246 enc = get_filesystem_encoding()
247 if not unicode_platform():
250 except UnicodeEncodeError:
251 raise unittest.SkipTest("A non-ASCII filename could not be encoded on this platform.")
253 def test_mkdir_open_exists_abspath_listdir_expanduser(self):
254 self.skip_if_cannot_represent_filename(lumiere_nfc)
257 os.mkdir(lumiere_nfc)
258 except EnvironmentError, e:
259 raise unittest.SkipTest("%r\nIt is possible that the filesystem on which this test is being run "
260 "does not support Unicode, even though the platform does." % (e,))
262 fn = lumiere_nfc + '/' + lumiere_nfc + '.txt'
263 open(fn, 'wb').close()
264 self.failUnless(os.path.exists(fn))
265 self.failUnless(os.path.exists(os.path.abspath(fn)))
266 filenames = listdir_unicode(lumiere_nfc)
268 # We only require that the listing includes a filename that is canonically equivalent
269 # to lumiere_nfc (on Mac OS X, it will be the NFD equivalent).
270 self.failUnlessIn(lumiere_nfc + ".txt", set([normalize(fname) for fname in filenames]))
272 expanded = os.path.expanduser("~/" + lumiere_nfc)
273 self.failIfIn("~", expanded)
274 self.failUnless(expanded.endswith(lumiere_nfc), expanded)
276 @patch('sys.getfilesystemencoding')
277 def test_open_unrepresentable(self, mock):
278 if unicode_platform():
279 raise unittest.SkipTest("This test is not applicable to platforms that represent filenames as Unicode.")
281 mock.return_value = 'ascii'
282 self.failUnlessRaises(UnicodeEncodeError, open, lumiere_nfc, 'rb')
285 class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase):
286 uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
287 output = 'lumi\xc3\xa8re'
288 argv = 'lumi\xc3\xa8re'
290 filesystem_encoding = 'UTF-8'
291 output_encoding = 'UTF-8'
292 argv_encoding = 'UTF-8'
293 dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt']
295 class UbuntuKarmicLatin1(EncodingUtil, unittest.TestCase):
296 uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
297 output = 'lumi\xe8re'
300 filesystem_encoding = 'ISO-8859-1'
301 output_encoding = 'ISO-8859-1'
302 argv_encoding = 'ISO-8859-1'
303 dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3']
305 class WindowsXP(EncodingUtil, unittest.TestCase):
306 uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
307 output = 'lumi\x8are'
309 filesystem_encoding = 'mbcs'
310 output_encoding = 'cp850'
311 argv_encoding = 'ascii'
312 dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
314 class WindowsXP_UTF8(EncodingUtil, unittest.TestCase):
315 uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
316 output = 'lumi\xc3\xa8re'
318 filesystem_encoding = 'mbcs'
319 output_encoding = 'cp65001'
320 argv_encoding = 'ascii'
321 dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
323 class WindowsVista(EncodingUtil, unittest.TestCase):
324 uname = 'Windows Vista 6.0.6000 x86 x86 Family 6 Model 15 Stepping 11, GenuineIntel'
325 output = 'lumi\x8are'
327 filesystem_encoding = 'mbcs'
328 output_encoding = 'cp850'
329 argv_encoding = 'ascii'
330 dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
332 class MacOSXLeopard(EncodingUtil, unittest.TestCase):
333 uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
334 output = 'lumi\xc3\xa8re'
335 argv = 'lumi\xc3\xa8re'
337 filesystem_encoding = 'utf-8'
338 output_encoding = 'UTF-8'
339 argv_encoding = 'UTF-8'
340 dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
342 class MacOSXLeopard7bit(EncodingUtil, unittest.TestCase):
343 uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
345 filesystem_encoding = 'utf-8'
346 output_encoding = 'US-ASCII'
347 argv_encoding = 'US-ASCII'
348 dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
350 class OpenBSD(EncodingUtil, unittest.TestCase):
351 uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)'
352 platform = 'openbsd4'
353 filesystem_encoding = '646'
354 output_encoding = '646'
355 argv_encoding = '646'
356 # Oops, I cannot write filenames containing non-ascii characters