9 # The following main helps to generate a test class for other operating
12 if __name__ == "__main__":
18 if len(sys.argv) != 2:
19 print "Usage: %s lumière" % sys.argv[0]
23 print "class MyWeirdOS(StringUtils, unittest.TestCase):"
24 print " uname = '%s'" % ' '.join(platform.uname())
25 if sys.platform != "win32":
26 print " argv = %s" % repr(sys.argv[1])
27 print " platform = '%s'" % sys.platform
28 print " filesystem_encoding = '%s'" % sys.getfilesystemencoding()
29 print " output_encoding = '%s'" % sys.stdout.encoding
30 print " argv_encoding = '%s'" % (sys.platform == "win32" and 'utf-8' or sys.stdout.encoding)
33 tmpdir = tempfile.mkdtemp()
34 for fname in TEST_FILENAMES:
35 open(os.path.join(tmpdir, fname), 'w').close()
37 # Use Unicode API under Windows or MacOS X
38 if sys.platform in ('win32', 'darwin'):
39 dirlist = os.listdir(unicode(tmpdir))
41 dirlist = os.listdir(tmpdir)
43 print " dirlist = %s" % repr(dirlist)
45 print " # Oops, I cannot write filenames containing non-ascii characters"
51 from twisted.trial import unittest
52 from mock import patch
55 from allmydata.test.common_util import ReallyEqualMixin
56 from allmydata.util.stringutils import argv_to_unicode, unicode_to_url, \
57 unicode_to_output, unicode_platform, listdir_unicode, open_unicode, \
58 FilenameEncodingError, get_output_encoding, _reload
60 from twisted.python import usage
62 class StringUtilsErrors(ReallyEqualMixin, unittest.TestCase):
67 def test_get_output_encoding(self, mock_stdout):
68 mock_stdout.encoding = 'UTF-8'
70 self.failUnlessReallyEqual(get_output_encoding(), 'utf-8')
72 mock_stdout.encoding = 'cp65001'
74 self.failUnlessReallyEqual(get_output_encoding(), 'utf-8')
76 mock_stdout.encoding = 'koi8-r'
78 self.failUnlessReallyEqual(get_output_encoding(), 'koi8-r')
80 mock_stdout.encoding = 'nonexistent_encoding'
81 self.failUnlessRaises(AssertionError, _reload)
83 # TODO: mock_stdout.encoding = None
86 def test_argv_to_unicode(self, mock):
87 mock.encoding = 'utf-8'
90 self.failUnlessRaises(usage.UsageError,
92 u'lumière'.encode('latin1'))
95 def test_unicode_to_output(self, mock):
96 # Encoding koi8-r cannot represent 'è'
97 mock.encoding = 'koi8-r'
99 self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, u'lumière')
102 def test_unicode_normalization(self, mock):
103 # Pretend to run on an Unicode platform
104 orig_platform = sys.platform
106 sys.platform = 'darwin'
107 mock.return_value = [u'A\u0308rtonwall.mp3']
109 self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [u'\xc4rtonwall.mp3'])
111 sys.platform = orig_platform
113 # The following tests applies only to platforms which don't store filenames as
114 # Unicode entities on the filesystem.
115 class StringUtilsNonUnicodePlatform(unittest.TestCase):
117 # Mock sys.platform because unicode_platform() uses it
118 self.original_platform = sys.platform
119 sys.platform = 'linux'
122 sys.platform = self.original_platform
125 @patch('sys.getfilesystemencoding')
127 def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
128 # What happens if latin1-encoded filenames are encountered on an UTF-8
130 mock_listdir.return_value = [
131 u'lumière'.encode('utf-8'),
132 u'lumière'.encode('latin1')]
134 mock_getfilesystemencoding.return_value = 'utf-8'
136 self.failUnlessRaises(FilenameEncodingError,
140 # We're trying to list a directory whose name cannot be represented in
141 # the filesystem encoding. This should fail.
142 mock_getfilesystemencoding.return_value = 'ascii'
144 self.failUnlessRaises(FilenameEncodingError,
148 @patch('sys.getfilesystemencoding')
149 def test_open_unicode(self, mock):
150 mock.return_value = 'ascii'
152 self.failUnlessRaises(FilenameEncodingError,
156 class StringUtils(ReallyEqualMixin):
158 # Mock sys.platform because unicode_platform() uses it
159 self.original_platform = sys.platform
160 sys.platform = self.platform
163 sys.platform = self.original_platform
167 def test_argv_to_unicode(self, mock):
168 if 'argv' not in dir(self):
171 mock.encoding = self.output_encoding
175 self.failUnlessReallyEqual(argv_to_unicode(argv), argu)
177 def test_unicode_to_url(self):
178 self.failUnless(unicode_to_url(u'lumière'), "lumi\xc3\xa8re")
181 def test_unicode_to_output(self, mock):
182 if 'output' not in dir(self):
185 mock.encoding = self.output_encoding
187 self.failUnlessReallyEqual(unicode_to_output(u'lumière'), self.output)
189 def test_unicode_platform(self):
198 self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform])
200 @patch('sys.getfilesystemencoding')
202 def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
203 if 'dirlist' not in dir(self):
207 u"test".encode(self.filesystem_encoding)
209 raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding "
210 "that we are testing for the benefit of a different platform.")
212 mock_listdir.return_value = self.dirlist
213 mock_getfilesystemencoding.return_value = self.filesystem_encoding
216 filenames = listdir_unicode(u'/dummy')
218 for fname in TEST_FILENAMES:
219 self.failUnless(isinstance(fname, unicode))
220 self.failUnlessIn(fname, filenames)
222 @patch('sys.getfilesystemencoding')
223 @patch('__builtin__.open')
224 def test_open_unicode(self, mock_open, mock_getfilesystemencoding):
225 mock_getfilesystemencoding.return_value = self.filesystem_encoding
226 fn = u'/dummy_directory/lumière.txt'
229 u"test".encode(self.filesystem_encoding)
231 raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding "
232 "that we are testing for the benefit of a different platform.")
236 open_unicode(fn, 'rb')
237 except FilenameEncodingError:
240 # Pass Unicode string to open() on Unicode platforms
241 if unicode_platform():
242 mock_open.assert_called_with(fn, 'rb')
244 # Pass correctly encoded bytestrings to open() on non-Unicode platforms
246 fn_bytestring = fn.encode(self.filesystem_encoding)
247 mock_open.assert_called_with(fn_bytestring, 'rb')
250 class UbuntuKarmicUTF8(StringUtils, unittest.TestCase):
251 uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
252 output = 'lumi\xc3\xa8re'
253 argv = 'lumi\xc3\xa8re'
255 filesystem_encoding = 'UTF-8'
256 output_encoding = 'UTF-8'
257 argv_encoding = 'UTF-8'
258 dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt']
260 class UbuntuKarmicLatin1(StringUtils, unittest.TestCase):
261 uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
262 output = 'lumi\xe8re'
265 filesystem_encoding = 'ISO-8859-1'
266 output_encoding = 'ISO-8859-1'
267 argv_encoding = 'ISO-8859-1'
268 dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3']
270 class WindowsXP(StringUtils, unittest.TestCase):
271 uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
272 output = 'lumi\x8are'
273 argv = 'lumi\xc3\xa8re'
275 filesystem_encoding = 'mbcs'
276 output_encoding = 'cp850'
277 argv_encoding = 'utf-8'
278 dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
280 class WindowsXP_UTF8(StringUtils, unittest.TestCase):
281 uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
282 output = 'lumi\xc3\xa8re'
283 argv = 'lumi\xc3\xa8re'
285 filesystem_encoding = 'mbcs'
286 output_encoding = 'cp65001'
287 argv_encoding = 'utf-8'
288 dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
290 class WindowsVista(StringUtils, unittest.TestCase):
291 uname = 'Windows Vista 6.0.6000 x86 x86 Family 6 Model 15 Stepping 11, GenuineIntel'
292 output = 'lumi\x8are'
293 argv = 'lumi\xc3\xa8re'
295 filesystem_encoding = 'mbcs'
296 output_encoding = 'cp850'
297 argv_encoding = 'utf-8'
298 dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
300 class MacOSXLeopard(StringUtils, unittest.TestCase):
301 uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
302 output = 'lumi\xc3\xa8re'
303 argv = 'lumi\xc3\xa8re'
305 filesystem_encoding = 'utf-8'
306 output_encoding = 'UTF-8'
307 argv_encoding = 'UTF-8'
308 dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
310 class MacOSXLeopard7bit(StringUtils, unittest.TestCase):
311 uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
313 filesystem_encoding = 'utf-8'
314 output_encoding = 'US-ASCII'
315 argv_encoding = 'US-ASCII'
316 dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
318 class OpenBSD(StringUtils, unittest.TestCase):
319 uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)'
320 platform = 'openbsd4'
321 filesystem_encoding = '646'
322 output_encoding = '646'
323 argv_encoding = '646'
324 # Oops, I cannot write filenames containing non-ascii characters