DefaultAliasMarker, TahoeError
from allmydata.scripts.common_http import do_http, HTTPError
from allmydata import uri
-from allmydata.util.encodingutil import unicode_to_url, quote_output, to_str
from allmydata.util import fileutil
-from allmydata.util.fileutil import open_expanduser, abspath_expanduser
+from allmydata.util.encodingutil import unicode_to_url, listdir_unicode, quote_output, to_str
from allmydata.util.assertutil import precondition
return True
def open(self, caps_only):
- return open_expanduser(self.pathname, "rb")
+ return open(os.path.expanduser(self.pathname), "rb")
class LocalFileTarget:
if self.children is not None:
return
self.children = {}
- children = os.listdir(self.pathname)
+ children = listdir_unicode(self.pathname)
for i,n in enumerate(children):
self.progressfunc("examining %d of %d" % (i, len(children)))
pn = os.path.join(self.pathname, n)
if self.children is not None:
return
self.children = {}
- children = os.listdir(self.pathname)
+ children = listdir_unicode(self.pathname)
for i,n in enumerate(children):
self.progressfunc("examining %d of %d" % (i, len(children)))
n = unicode(n)
rootcap, path = get_alias(self.aliases, destination_spec, None)
if rootcap == DefaultAliasMarker:
# no alias, so this is a local file
- pathname = abspath_expanduser(path.decode('utf-8'))
+ pathname = os.path.abspath(os.path.expanduser(path.decode('utf-8')))
if not os.path.exists(pathname):
t = LocalMissingTarget(pathname)
elif os.path.isdir(pathname):
rootcap, path = get_alias(self.aliases, source_spec, None)
if rootcap == DefaultAliasMarker:
# no alias, so this is a local file
- pathname = abspath_expanduser(path.decode('utf-8'))
+ pathname = os.path.abspath(os.path.expanduser(path.decode('utf-8')))
name = os.path.basename(pathname)
if not os.path.exists(pathname):
raise MissingSourceError(source_spec)
# systems.
if __name__ == "__main__":
- import sys
+ import sys, os
+ import tempfile
+ import shutil
import platform
if len(sys.argv) != 2:
sys.exit(1)
print
- print "class MyWeirdOS(StringUtils, unittest.TestCase):"
+ print "class MyWeirdOS(EncodingUtil, unittest.TestCase):"
print " uname = '%s'" % ' '.join(platform.uname())
if sys.platform != "win32":
print " argv = %s" % repr(sys.argv[1])
print " filesystem_encoding = '%s'" % sys.getfilesystemencoding()
print " output_encoding = '%s'" % sys.stdout.encoding
print " argv_encoding = '%s'" % (sys.platform == "win32" and 'ascii' or sys.stdout.encoding)
+
+ try:
+ tmpdir = tempfile.mkdtemp()
+ for fname in TEST_FILENAMES:
+ open(os.path.join(tmpdir, fname), 'w').close()
+
+ # Use Unicode API under Windows or MacOS X
+ if sys.platform in ('win32', 'darwin'):
+ dirlist = os.listdir(unicode(tmpdir))
+ else:
+ dirlist = os.listdir(tmpdir)
+
+ print " dirlist = %s" % repr(dirlist)
+ except:
+ print " # Oops, I cannot write filenames containing non-ascii characters"
print
+ shutil.rmtree(tmpdir)
sys.exit(0)
from twisted.trial import unittest
from mock import patch
-import sys, locale
+import os, sys, locale
from allmydata.test.common_util import ReallyEqualMixin
from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \
- unicode_to_output, unicode_platform, get_output_encoding, _reload
+ unicode_to_output, unicode_platform, listdir_unicode, FilenameEncodingError, \
+ get_output_encoding, get_filesystem_encoding, _reload
+from allmydata.dirnode import normalize
from twisted.python import usage
-class StringUtilsErrors(ReallyEqualMixin, unittest.TestCase):
+class EncodingUtilErrors(ReallyEqualMixin, unittest.TestCase):
def tearDown(self):
_reload()
_reload()
self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc)
+ @patch('os.listdir')
+ def test_no_unicode_normalization(self, mock):
+ # Pretend to run on a Unicode platform.
+ # We normalized to NFC in 1.7beta, but we now don't.
+ orig_platform = sys.platform
+ try:
+ sys.platform = 'darwin'
+ mock.return_value = [Artonwall_nfd]
+ _reload()
+ self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd])
+ finally:
+ sys.platform = orig_platform
-class StringUtils(ReallyEqualMixin):
+# The following tests apply only to platforms that don't store filenames as
+# Unicode entities on the filesystem.
+class EncodingUtilNonUnicodePlatform(unittest.TestCase):
+ def setUp(self):
+ # Mock sys.platform because unicode_platform() uses it
+ self.original_platform = sys.platform
+ sys.platform = 'linux'
+
+ def tearDown(self):
+ sys.platform = self.original_platform
+ _reload()
+
+ @patch('sys.getfilesystemencoding')
+ @patch('os.listdir')
+ def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
+ # What happens if latin1-encoded filenames are encountered on an UTF-8
+ # filesystem?
+ mock_listdir.return_value = [
+ lumiere_nfc.encode('utf-8'),
+ lumiere_nfc.encode('latin1')]
+
+ mock_getfilesystemencoding.return_value = 'utf-8'
+ _reload()
+ self.failUnlessRaises(FilenameEncodingError,
+ listdir_unicode,
+ u'/dummy')
+
+ # We're trying to list a directory whose name cannot be represented in
+ # the filesystem encoding. This should fail.
+ mock_getfilesystemencoding.return_value = 'ascii'
+ _reload()
+ self.failUnlessRaises(FilenameEncodingError,
+ listdir_unicode,
+ u'/' + lumiere_nfc)
+
+class EncodingUtil(ReallyEqualMixin):
def setUp(self):
# Mock sys.platform because unicode_platform() uses it
self.original_platform = sys.platform
_reload()
self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform])
+ @patch('sys.getfilesystemencoding')
+ @patch('os.listdir')
+ def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
+ if 'dirlist' not in dir(self):
+ return
+
+ try:
+ u"test".encode(self.filesystem_encoding)
+ except (LookupError, AttributeError):
+ raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding "
+ "that we are testing for the benefit of a different platform."
+ % (self.filesystem_encoding,))
+
+ mock_listdir.return_value = self.dirlist
+ mock_getfilesystemencoding.return_value = self.filesystem_encoding
+
+ _reload()
+ filenames = listdir_unicode(u'/dummy')
+
+ self.failUnlessEqual(set([normalize(fname) for fname in filenames]),
+ set(TEST_FILENAMES))
+
+
+class StdlibUnicode(unittest.TestCase):
+ """This mainly tests that some of the stdlib functions support Unicode paths, but also that
+ listdir_unicode works for valid filenames."""
+
+ def skip_if_cannot_represent_filename(self, u):
+ enc = get_filesystem_encoding()
+ if not unicode_platform():
+ try:
+ u.encode(enc)
+ except UnicodeEncodeError:
+ raise unittest.SkipTest("A non-ASCII filename could not be encoded on this platform.")
+
+ def test_mkdir_open_exists_abspath_listdir_expanduser(self):
+ self.skip_if_cannot_represent_filename(lumiere_nfc)
+
+ try:
+ os.mkdir(lumiere_nfc)
+ except EnvironmentError, e:
+ raise unittest.SkipTest("%r\nIt is possible that the filesystem on which this test is being run "
+ "does not support Unicode, even though the platform does." % (e,))
+
+ fn = lumiere_nfc + '/' + lumiere_nfc + '.txt'
+ open(fn, 'wb').close()
+ self.failUnless(os.path.exists(fn))
+ self.failUnless(os.path.exists(os.path.abspath(fn)))
+ filenames = listdir_unicode(lumiere_nfc)
+
+ # We only require that the listing includes a filename that is canonically equivalent
+ # to lumiere_nfc (on Mac OS X, it will be the NFD equivalent).
+ self.failUnlessIn(lumiere_nfc + ".txt", set([normalize(fname) for fname in filenames]))
+
+ expanded = os.path.expanduser("~/" + lumiere_nfc)
+ self.failIfIn("~", expanded)
+ self.failUnless(expanded.endswith(lumiere_nfc), expanded)
+
+ @patch('sys.getfilesystemencoding')
+ def test_open_unrepresentable(self, mock):
+ if unicode_platform():
+ raise unittest.SkipTest("This test is not applicable to platforms that represent filenames as Unicode.")
+
+ mock.return_value = 'ascii'
+ self.failUnlessRaises(UnicodeEncodeError, open, lumiere_nfc, 'rb')
+
-class UbuntuKarmicUTF8(StringUtils, unittest.TestCase):
+class UbuntuKarmicUTF8(EncodingUtil, unittest.TestCase):
uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
output = 'lumi\xc3\xa8re'
argv = 'lumi\xc3\xa8re'
filesystem_encoding = 'UTF-8'
output_encoding = 'UTF-8'
argv_encoding = 'UTF-8'
+ dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt']
-class UbuntuKarmicLatin1(StringUtils, unittest.TestCase):
+class UbuntuKarmicLatin1(EncodingUtil, unittest.TestCase):
uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
output = 'lumi\xe8re'
argv = 'lumi\xe8re'
filesystem_encoding = 'ISO-8859-1'
output_encoding = 'ISO-8859-1'
argv_encoding = 'ISO-8859-1'
+ dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3']
-class WindowsXP(StringUtils, unittest.TestCase):
+class WindowsXP(EncodingUtil, unittest.TestCase):
uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
output = 'lumi\x8are'
platform = 'win32'
filesystem_encoding = 'mbcs'
output_encoding = 'cp850'
argv_encoding = 'ascii'
+ dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
-class WindowsXP_UTF8(StringUtils, unittest.TestCase):
+class WindowsXP_UTF8(EncodingUtil, unittest.TestCase):
uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
output = 'lumi\xc3\xa8re'
platform = 'win32'
filesystem_encoding = 'mbcs'
output_encoding = 'cp65001'
argv_encoding = 'ascii'
+ dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
-class WindowsVista(StringUtils, unittest.TestCase):
+class WindowsVista(EncodingUtil, unittest.TestCase):
uname = 'Windows Vista 6.0.6000 x86 x86 Family 6 Model 15 Stepping 11, GenuineIntel'
output = 'lumi\x8are'
platform = 'win32'
filesystem_encoding = 'mbcs'
output_encoding = 'cp850'
argv_encoding = 'ascii'
+ dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
-class MacOSXLeopard(StringUtils, unittest.TestCase):
+class MacOSXLeopard(EncodingUtil, unittest.TestCase):
uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
output = 'lumi\xc3\xa8re'
argv = 'lumi\xc3\xa8re'
filesystem_encoding = 'utf-8'
output_encoding = 'UTF-8'
argv_encoding = 'UTF-8'
+ dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
-class MacOSXLeopard7bit(StringUtils, unittest.TestCase):
+class MacOSXLeopard7bit(EncodingUtil, unittest.TestCase):
uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
platform = 'darwin'
filesystem_encoding = 'utf-8'
output_encoding = 'US-ASCII'
argv_encoding = 'US-ASCII'
+ dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
-class OpenBSD(StringUtils, unittest.TestCase):
+class OpenBSD(EncodingUtil, unittest.TestCase):
uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)'
platform = 'openbsd4'
filesystem_encoding = '646'