From 11077ea74de4d59aa58acd2c48cdcd9ef44cdd5c Mon Sep 17 00:00:00 2001 From: david-sarah Date: Sun, 11 Jul 2010 17:30:15 -0700 Subject: [PATCH] Rename stringutils to encodingutil, and drop listdir_unicode and open_unicode (since the Python stdlib functions work fine with Unicode paths). Also move some utility functions to fileutil. --- src/allmydata/dirnode.py | 2 +- src/allmydata/scripts/cli.py | 2 +- src/allmydata/scripts/common.py | 2 +- src/allmydata/scripts/common_http.py | 2 +- src/allmydata/scripts/slow_operation.py | 2 +- src/allmydata/scripts/tahoe_add_alias.py | 2 +- src/allmydata/scripts/tahoe_backup.py | 11 +- src/allmydata/scripts/tahoe_check.py | 2 +- src/allmydata/scripts/tahoe_cp.py | 35 ++--- src/allmydata/scripts/tahoe_get.py | 4 +- src/allmydata/scripts/tahoe_ls.py | 2 +- src/allmydata/scripts/tahoe_manifest.py | 2 +- src/allmydata/scripts/tahoe_mkdir.py | 2 +- src/allmydata/scripts/tahoe_mv.py | 2 +- src/allmydata/scripts/tahoe_put.py | 5 +- src/allmydata/test/test_backupdb.py | 4 +- src/allmydata/test/test_cli.py | 20 +-- ...st_stringutils.py => test_encodingutil.py} | 138 +----------------- .../util/{stringutils.py => encodingutil.py} | 69 --------- src/allmydata/util/fileutil.py | 20 +++ src/allmydata/web/common.py | 2 +- 21 files changed, 67 insertions(+), 263 deletions(-) rename src/allmydata/test/{test_stringutils.py => test_encodingutil.py} (52%) rename src/allmydata/util/{stringutils.py => encodingutil.py} (70%) diff --git a/src/allmydata/dirnode.py b/src/allmydata/dirnode.py index 56fd56b2..9d41af19 100644 --- a/src/allmydata/dirnode.py +++ b/src/allmydata/dirnode.py @@ -16,7 +16,7 @@ from allmydata.check_results import DeepCheckResults, \ DeepCheckAndRepairResults from allmydata.monitor import Monitor from allmydata.util import hashutil, mathutil, base32, log -from allmydata.util.stringutils import quote_output +from allmydata.util.encodingutil import quote_output from allmydata.util.assertutil import precondition from allmydata.util.netstring import netstring, split_netstring from allmydata.util.consumer import download_to_data diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index 446c6b5a..833d5d08 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -1,7 +1,7 @@ import os.path, re, sys, fnmatch from twisted.python import usage from allmydata.scripts.common import BaseOptions, get_aliases -from allmydata.util.stringutils import argv_to_unicode +from allmydata.util.encodingutil import argv_to_unicode NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?") diff --git a/src/allmydata/scripts/common.py b/src/allmydata/scripts/common.py index 1116e1db..88fdd286 100644 --- a/src/allmydata/scripts/common.py +++ b/src/allmydata/scripts/common.py @@ -2,7 +2,7 @@ import os, sys, urllib import codecs from twisted.python import usage -from allmydata.util.stringutils import unicode_to_url, quote_output +from allmydata.util.encodingutil import unicode_to_url, quote_output from allmydata.util.assertutil import precondition class BaseOptions: diff --git a/src/allmydata/scripts/common_http.py b/src/allmydata/scripts/common_http.py index 1cf76d02..9ceb7a79 100644 --- a/src/allmydata/scripts/common_http.py +++ b/src/allmydata/scripts/common_http.py @@ -3,7 +3,7 @@ from cStringIO import StringIO import urlparse, httplib import allmydata # for __full_version__ -from allmydata.util.stringutils import quote_output +from allmydata.util.encodingutil import quote_output from allmydata.scripts.common import TahoeError diff --git a/src/allmydata/scripts/slow_operation.py b/src/allmydata/scripts/slow_operation.py index f47c0412..3da511f9 100644 --- a/src/allmydata/scripts/slow_operation.py +++ b/src/allmydata/scripts/slow_operation.py @@ -4,7 +4,7 @@ from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.scripts.common_http import do_http, format_http_error from allmydata.util import base32 -from allmydata.util.stringutils import quote_output, is_printable_ascii +from allmydata.util.encodingutil import quote_output, is_printable_ascii import urllib import simplejson diff --git a/src/allmydata/scripts/tahoe_add_alias.py b/src/allmydata/scripts/tahoe_add_alias.py index 984719a4..a3ae1843 100644 --- a/src/allmydata/scripts/tahoe_add_alias.py +++ b/src/allmydata/scripts/tahoe_add_alias.py @@ -5,7 +5,7 @@ from allmydata import uri from allmydata.scripts.common_http import do_http, check_http_error from allmydata.scripts.common import get_aliases from allmydata.util.fileutil import move_into_place -from allmydata.util.stringutils import unicode_to_output, quote_output +from allmydata.util.encodingutil import unicode_to_output, quote_output def add_line_to_aliasfile(aliasfile, alias, cap): diff --git a/src/allmydata/scripts/tahoe_backup.py b/src/allmydata/scripts/tahoe_backup.py index 8b853c9e..2525b2fa 100644 --- a/src/allmydata/scripts/tahoe_backup.py +++ b/src/allmydata/scripts/tahoe_backup.py @@ -9,7 +9,8 @@ from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS, \ from allmydata.scripts.common_http import do_http, HTTPError, format_http_error from allmydata.util import time_format from allmydata.scripts import backupdb -from allmydata.util.stringutils import listdir_unicode, open_unicode, quote_output, to_str +from allmydata.util.encodingutil import quote_output, to_str +from allmydata.util.fileutil import open_expanduser from allmydata.util.assertutil import precondition @@ -166,11 +167,15 @@ class BackerUpper: compare_contents = {} # childname -> rocap try: - children = listdir_unicode(localpath) + children = os.listdir(localpath) except EnvironmentError: self.directories_skipped += 1 self.warn("WARNING: permission denied on directory %s" % quote_output(localpath)) children = [] + except (UnicodeEncodeError, UnicodeDecodeError): + self.directories_skipped += 1 + self.warn("WARNING: could not list directory %s due to an encoding error" % quote_output(localpath)) + children = [] for child in self.options.filter_listdir(children): assert isinstance(child, unicode), child @@ -292,7 +297,7 @@ class BackerUpper: if must_upload: self.verboseprint("uploading %s.." % quote_output(childpath)) - infileobj = open_unicode(childpath, "rb") + infileobj = open_expanduser(childpath, "rb") url = self.options['node-url'] + "uri" resp = do_http("PUT", url, infileobj) if resp.status not in (200, 201): diff --git a/src/allmydata/scripts/tahoe_check.py b/src/allmydata/scripts/tahoe_check.py index acf70f06..adb0ccc4 100644 --- a/src/allmydata/scripts/tahoe_check.py +++ b/src/allmydata/scripts/tahoe_check.py @@ -5,7 +5,7 @@ from twisted.protocols.basic import LineOnlyReceiver from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.scripts.common_http import do_http, format_http_error -from allmydata.util.stringutils import quote_output, quote_path +from allmydata.util.encodingutil import quote_output, quote_path class Checker: pass diff --git a/src/allmydata/scripts/tahoe_cp.py b/src/allmydata/scripts/tahoe_cp.py index 59c35445..76fa95ef 100644 --- a/src/allmydata/scripts/tahoe_cp.py +++ b/src/allmydata/scripts/tahoe_cp.py @@ -8,25 +8,12 @@ from allmydata.scripts.common import get_alias, escape_path, \ DefaultAliasMarker, TahoeError from allmydata.scripts.common_http import do_http, HTTPError from allmydata import uri -from allmydata.util.stringutils import unicode_to_url, listdir_unicode, open_unicode, \ - abspath_expanduser_unicode, quote_output, to_str +from allmydata.util.encodingutil import unicode_to_url, quote_output, to_str +from allmydata.util import fileutil +from allmydata.util.fileutil import open_expanduser, abspath_expanduser from allmydata.util.assertutil import precondition -def _put_local_file(pathname, inf): - # TODO: create temporary file and move into place? - # TODO: move this to fileutil. - outf = open_unicode(pathname, "wb") - try: - while True: - data = inf.read(32768) - if not data: - break - outf.write(data) - finally: - outf.close() - - class MissingSourceError(TahoeError): def __init__(self, name): TahoeError.__init__(self, "No such file or directory %s" % quote_output(name)) @@ -81,7 +68,7 @@ class LocalFileSource: return True def open(self, caps_only): - return open_unicode(self.pathname, "rb") + return open_expanduser(self.pathname, "rb") class LocalFileTarget: @@ -90,7 +77,7 @@ class LocalFileTarget: self.pathname = pathname def put_file(self, inf): - _put_local_file(self.pathname, inf) + fileutil.put_file(self.pathname, inf) class LocalMissingTarget: @@ -99,7 +86,7 @@ class LocalMissingTarget: self.pathname = pathname def put_file(self, inf): - _put_local_file(self.pathname, inf) + fileutil.put_file(self.pathname, inf) class LocalDirectorySource: @@ -114,7 +101,7 @@ class LocalDirectorySource: if self.children is not None: return self.children = {} - children = listdir_unicode(self.pathname) + children = os.listdir(self.pathname) for i,n in enumerate(children): self.progressfunc("examining %d of %d" % (i, len(children))) pn = os.path.join(self.pathname, n) @@ -142,7 +129,7 @@ class LocalDirectoryTarget: if self.children is not None: return self.children = {} - children = listdir_unicode(self.pathname) + children = os.listdir(self.pathname) for i,n in enumerate(children): self.progressfunc("examining %d of %d" % (i, len(children))) n = unicode(n) @@ -168,7 +155,7 @@ class LocalDirectoryTarget: def put_file(self, name, inf): precondition(isinstance(name, unicode), name) pathname = os.path.join(self.pathname, name) - _put_local_file(pathname, inf) + fileutil.put_file(pathname, inf) def set_children(self): pass @@ -525,7 +512,7 @@ class Copier: rootcap, path = get_alias(self.aliases, destination_spec, None) if rootcap == DefaultAliasMarker: # no alias, so this is a local file - pathname = abspath_expanduser_unicode(path.decode('utf-8')) + pathname = abspath_expanduser(path.decode('utf-8')) if not os.path.exists(pathname): t = LocalMissingTarget(pathname) elif os.path.isdir(pathname): @@ -565,7 +552,7 @@ class Copier: rootcap, path = get_alias(self.aliases, source_spec, None) if rootcap == DefaultAliasMarker: # no alias, so this is a local file - pathname = abspath_expanduser_unicode(path.decode('utf-8')) + pathname = abspath_expanduser(path.decode('utf-8')) name = os.path.basename(pathname) if not os.path.exists(pathname): raise MissingSourceError(source_spec) diff --git a/src/allmydata/scripts/tahoe_get.py b/src/allmydata/scripts/tahoe_get.py index dfc36644..63032ed4 100644 --- a/src/allmydata/scripts/tahoe_get.py +++ b/src/allmydata/scripts/tahoe_get.py @@ -3,7 +3,7 @@ import urllib from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.scripts.common_http import do_http, format_http_error -from allmydata.util.stringutils import open_unicode +from allmydata.util.fileutil import open_expanduser def get(options): nodeurl = options['node-url'] @@ -27,7 +27,7 @@ def get(options): resp = do_http("GET", url) if resp.status in (200, 201,): if to_file: - outf = open_unicode(to_file, "wb") + outf = open_expanduser(to_file, "wb") else: outf = stdout while True: diff --git a/src/allmydata/scripts/tahoe_ls.py b/src/allmydata/scripts/tahoe_ls.py index d7c7ef83..89ef3347 100644 --- a/src/allmydata/scripts/tahoe_ls.py +++ b/src/allmydata/scripts/tahoe_ls.py @@ -4,7 +4,7 @@ import simplejson from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.scripts.common_http import do_http, format_http_error -from allmydata.util.stringutils import unicode_to_output, quote_output, is_printable_ascii, to_str +from allmydata.util.encodingutil import unicode_to_output, quote_output, is_printable_ascii, to_str def list(options): nodeurl = options['node-url'] diff --git a/src/allmydata/scripts/tahoe_manifest.py b/src/allmydata/scripts/tahoe_manifest.py index db422082..0b9c64f6 100644 --- a/src/allmydata/scripts/tahoe_manifest.py +++ b/src/allmydata/scripts/tahoe_manifest.py @@ -6,7 +6,7 @@ from allmydata.scripts.slow_operation import SlowOperationRunner from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.scripts.common_http import do_http, format_http_error -from allmydata.util.stringutils import quote_output, quote_path +from allmydata.util.encodingutil import quote_output, quote_path class FakeTransport: disconnecting = False diff --git a/src/allmydata/scripts/tahoe_mkdir.py b/src/allmydata/scripts/tahoe_mkdir.py index 7ff8a99a..dbcabac6 100644 --- a/src/allmydata/scripts/tahoe_mkdir.py +++ b/src/allmydata/scripts/tahoe_mkdir.py @@ -2,7 +2,7 @@ import urllib from allmydata.scripts.common_http import do_http, check_http_error from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, UnknownAliasError -from allmydata.util.stringutils import quote_output +from allmydata.util.encodingutil import quote_output def mkdir(options): nodeurl = options['node-url'] diff --git a/src/allmydata/scripts/tahoe_mv.py b/src/allmydata/scripts/tahoe_mv.py index d336d804..3be219d6 100644 --- a/src/allmydata/scripts/tahoe_mv.py +++ b/src/allmydata/scripts/tahoe_mv.py @@ -5,7 +5,7 @@ import simplejson from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError from allmydata.scripts.common_http import do_http, format_http_error -from allmydata.util.stringutils import to_str +from allmydata.util.encodingutil import to_str # this script is used for both 'mv' and 'ln' diff --git a/src/allmydata/scripts/tahoe_put.py b/src/allmydata/scripts/tahoe_put.py index 82a9e4cc..d646110e 100644 --- a/src/allmydata/scripts/tahoe_put.py +++ b/src/allmydata/scripts/tahoe_put.py @@ -4,7 +4,8 @@ import urllib from allmydata.scripts.common_http import do_http, format_http_success, format_http_error from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \ UnknownAliasError -from allmydata.util.stringutils import quote_output, open_unicode +from allmydata.util.encodingutil import quote_output +from allmydata.util.fileutil import open_expanduser def put(options): """ @@ -64,7 +65,7 @@ def put(options): if mutable: url += "?mutable=true" if from_file: - infileobj = open_unicode(from_file, "rb") + infileobj = open_expanduser(from_file, "rb") else: # do_http() can't use stdin directly: for one thing, we need a # Content-Length field. So we currently must copy it. diff --git a/src/allmydata/test/test_backupdb.py b/src/allmydata/test/test_backupdb.py index 13c86792..1d6d9734 100644 --- a/src/allmydata/test/test_backupdb.py +++ b/src/allmydata/test/test_backupdb.py @@ -4,7 +4,7 @@ from StringIO import StringIO from twisted.trial import unittest from allmydata.util import fileutil -from allmydata.util.stringutils import listdir_unicode, get_filesystem_encoding, unicode_platform +from allmydata.util.encodingutil import get_filesystem_encoding, unicode_platform from allmydata.util.assertutil import precondition from allmydata.scripts import backupdb @@ -249,7 +249,7 @@ class BackupDB(unittest.TestCase): self.failUnless(bdb) self.writeto(u"f\u00f6\u00f6.txt", "foo.txt") - files = [fn for fn in listdir_unicode(unicode(basedir)) if fn.endswith(".txt")] + files = [fn for fn in os.listdir(unicode(basedir)) if fn.endswith(".txt")] self.failUnlessEqual(len(files), 1) foo_fn = os.path.join(basedir, files[0]) #print foo_fn, type(foo_fn) diff --git a/src/allmydata/test/test_cli.py b/src/allmydata/test/test_cli.py index ef6090aa..70746b7d 100644 --- a/src/allmydata/test/test_cli.py +++ b/src/allmydata/test/test_cli.py @@ -31,8 +31,8 @@ from twisted.internet import threads # CLI tests use deferToThread from twisted.python import usage from allmydata.util.assertutil import precondition -from allmydata.util.stringutils import listdir_unicode, open_unicode, unicode_platform, \ - quote_output, get_output_encoding, get_argv_encoding, get_filesystem_encoding, \ +from allmydata.util.encodingutil import unicode_platform, quote_output, \ + get_output_encoding, get_argv_encoding, get_filesystem_encoding, \ unicode_to_output, to_str timeout = 480 # deep_check takes 360s on Zandr's linksys box, others take > 240s @@ -439,9 +439,9 @@ class CLI(CLITestMixin, unittest.TestCase): fileutil.make_dirs(basedir) for name in filenames: - open_unicode(os.path.join(unicode(basedir), name), "wb").close() + open(os.path.join(unicode(basedir), name), "wb").close() - for file in listdir_unicode(unicode(basedir)): + for file in os.listdir(unicode(basedir)): self.failUnlessIn(normalize(file), filenames) @@ -974,11 +974,7 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase): rel_fn = os.path.join(unicode(self.basedir), u"à trier.txt") # we make the file small enough to fit in a LIT file, for speed DATA = "short file" - f = open_unicode(rel_fn, "wb") - try: - f.write(DATA) - finally: - f.close() + fileutil.write(rel_fn, DATA) d = self.do_cli("create-alias", "tahoe") @@ -1349,11 +1345,7 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase): self.set_up_grid() DATA1 = "unicode file content" - f = open_unicode(fn1, "wb") - try: - f.write(DATA1) - finally: - f.close() + fileutil.write(fn1, DATA1) fn2 = os.path.join(self.basedir, "Metallica") DATA2 = "non-unicode file content" diff --git a/src/allmydata/test/test_stringutils.py b/src/allmydata/test/test_encodingutil.py similarity index 52% rename from src/allmydata/test/test_stringutils.py rename to src/allmydata/test/test_encodingutil.py index 8013c23d..a287bf41 100644 --- a/src/allmydata/test/test_stringutils.py +++ b/src/allmydata/test/test_encodingutil.py @@ -13,9 +13,7 @@ TEST_FILENAMES = ( # systems. if __name__ == "__main__": - import sys, os - import tempfile - import shutil + import sys import platform if len(sys.argv) != 2: @@ -31,24 +29,8 @@ if __name__ == "__main__": print " filesystem_encoding = '%s'" % sys.getfilesystemencoding() print " output_encoding = '%s'" % sys.stdout.encoding print " argv_encoding = '%s'" % (sys.platform == "win32" and 'ascii' or sys.stdout.encoding) - - try: - tmpdir = tempfile.mkdtemp() - for fname in TEST_FILENAMES: - open(os.path.join(tmpdir, fname), 'w').close() - - # Use Unicode API under Windows or MacOS X - if sys.platform in ('win32', 'darwin'): - dirlist = os.listdir(unicode(tmpdir)) - else: - dirlist = os.listdir(tmpdir) - - print " dirlist = %s" % repr(dirlist) - except: - print " # Oops, I cannot write filenames containing non-ascii characters" print - shutil.rmtree(tmpdir) sys.exit(0) from twisted.trial import unittest @@ -56,10 +38,8 @@ from mock import patch import sys from allmydata.test.common_util import ReallyEqualMixin -from allmydata.util.stringutils import argv_to_unicode, unicode_to_url, \ - unicode_to_output, unicode_platform, listdir_unicode, open_unicode, \ - FilenameEncodingError, get_output_encoding, _reload -from allmydata.dirnode import normalize +from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \ + unicode_to_output, unicode_platform, get_output_encoding, _reload from twisted.python import usage @@ -102,61 +82,6 @@ class StringUtilsErrors(ReallyEqualMixin, unittest.TestCase): _reload() self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc) - @patch('os.listdir') - def test_no_unicode_normalization(self, mock): - # Pretend to run on a Unicode platform. - # We normalized to NFC in 1.7beta, but we now don't. - orig_platform = sys.platform - try: - sys.platform = 'darwin' - mock.return_value = [Artonwall_nfd] - _reload() - self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd]) - finally: - sys.platform = orig_platform - -# The following tests applies only to platforms which don't store filenames as -# Unicode entities on the filesystem. -class StringUtilsNonUnicodePlatform(unittest.TestCase): - def setUp(self): - # Mock sys.platform because unicode_platform() uses it - self.original_platform = sys.platform - sys.platform = 'linux' - - def tearDown(self): - sys.platform = self.original_platform - _reload() - - @patch('sys.getfilesystemencoding') - @patch('os.listdir') - def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding): - # What happens if latin1-encoded filenames are encountered on an UTF-8 - # filesystem? - mock_listdir.return_value = [ - lumiere_nfc.encode('utf-8'), - lumiere_nfc.encode('latin1')] - - mock_getfilesystemencoding.return_value = 'utf-8' - _reload() - self.failUnlessRaises(FilenameEncodingError, - listdir_unicode, - u'/dummy') - - # We're trying to list a directory whose name cannot be represented in - # the filesystem encoding. This should fail. - mock_getfilesystemencoding.return_value = 'ascii' - _reload() - self.failUnlessRaises(FilenameEncodingError, - listdir_unicode, - u'/' + lumiere_nfc) - - @patch('sys.getfilesystemencoding') - def test_open_unicode(self, mock): - mock.return_value = 'ascii' - _reload() - self.failUnlessRaises(FilenameEncodingError, - open_unicode, - lumiere_nfc, 'rb') class StringUtils(ReallyEqualMixin): def setUp(self): @@ -202,56 +127,6 @@ class StringUtils(ReallyEqualMixin): _reload() self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform]) - @patch('sys.getfilesystemencoding') - @patch('os.listdir') - def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding): - if 'dirlist' not in dir(self): - return - - try: - u"test".encode(self.filesystem_encoding) - except (LookupError, AttributeError): - raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding " - "that we are testing for the benefit of a different platform." - % (self.filesystem_encoding,)) - - mock_listdir.return_value = self.dirlist - mock_getfilesystemencoding.return_value = self.filesystem_encoding - - _reload() - filenames = listdir_unicode(u'/dummy') - - self.failUnlessEqual(set([normalize(fname) for fname in filenames]), - set(TEST_FILENAMES)) - - @patch('sys.getfilesystemencoding') - @patch('__builtin__.open') - def test_open_unicode(self, mock_open, mock_getfilesystemencoding): - mock_getfilesystemencoding.return_value = self.filesystem_encoding - fn = u'/dummy_directory/" + lumiere_nfc + ".txt' - - try: - u"test".encode(self.filesystem_encoding) - except (LookupError, AttributeError): - raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding " - "that we are testing for the benefit of a different platform." - % (self.filesystem_encoding,)) - - _reload() - try: - open_unicode(fn, 'rb') - except FilenameEncodingError: - return - - # Pass Unicode string to open() on Unicode platforms - if unicode_platform(): - mock_open.assert_called_with(fn, 'rb') - - # Pass correctly encoded bytestrings to open() on non-Unicode platforms - else: - fn_bytestring = fn.encode(self.filesystem_encoding) - mock_open.assert_called_with(fn_bytestring, 'rb') - class UbuntuKarmicUTF8(StringUtils, unittest.TestCase): uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64' @@ -261,7 +136,6 @@ class UbuntuKarmicUTF8(StringUtils, unittest.TestCase): filesystem_encoding = 'UTF-8' output_encoding = 'UTF-8' argv_encoding = 'UTF-8' - dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt'] class UbuntuKarmicLatin1(StringUtils, unittest.TestCase): uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64' @@ -271,7 +145,6 @@ class UbuntuKarmicLatin1(StringUtils, unittest.TestCase): filesystem_encoding = 'ISO-8859-1' output_encoding = 'ISO-8859-1' argv_encoding = 'ISO-8859-1' - dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3'] class WindowsXP(StringUtils, unittest.TestCase): uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD' @@ -280,7 +153,6 @@ class WindowsXP(StringUtils, unittest.TestCase): filesystem_encoding = 'mbcs' output_encoding = 'cp850' argv_encoding = 'ascii' - dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3'] class WindowsXP_UTF8(StringUtils, unittest.TestCase): uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD' @@ -289,7 +161,6 @@ class WindowsXP_UTF8(StringUtils, unittest.TestCase): filesystem_encoding = 'mbcs' output_encoding = 'cp65001' argv_encoding = 'ascii' - dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3'] class WindowsVista(StringUtils, unittest.TestCase): uname = 'Windows Vista 6.0.6000 x86 x86 Family 6 Model 15 Stepping 11, GenuineIntel' @@ -298,7 +169,6 @@ class WindowsVista(StringUtils, unittest.TestCase): filesystem_encoding = 'mbcs' output_encoding = 'cp850' argv_encoding = 'ascii' - dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3'] class MacOSXLeopard(StringUtils, unittest.TestCase): uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc' @@ -308,7 +178,6 @@ class MacOSXLeopard(StringUtils, unittest.TestCase): filesystem_encoding = 'utf-8' output_encoding = 'UTF-8' argv_encoding = 'UTF-8' - dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file'] class MacOSXLeopard7bit(StringUtils, unittest.TestCase): uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc' @@ -316,7 +185,6 @@ class MacOSXLeopard7bit(StringUtils, unittest.TestCase): filesystem_encoding = 'utf-8' output_encoding = 'US-ASCII' argv_encoding = 'US-ASCII' - dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file'] class OpenBSD(StringUtils, unittest.TestCase): uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)' diff --git a/src/allmydata/util/stringutils.py b/src/allmydata/util/encodingutil.py similarity index 70% rename from src/allmydata/util/stringutils.py rename to src/allmydata/util/encodingutil.py index e0983404..61d58fef 100644 --- a/src/allmydata/util/stringutils.py +++ b/src/allmydata/util/encodingutil.py @@ -4,7 +4,6 @@ unicode and back. """ import sys -import os import re from allmydata.util.assertutil import precondition from twisted.python import usage @@ -174,71 +173,3 @@ def unicode_platform(): Does the current platform handle Unicode filenames natively? """ return is_unicode_platform - -class FilenameEncodingError(Exception): - """ - Filename cannot be encoded using the current encoding of your filesystem - (%s). Please configure your locale correctly or rename this file. - """ - pass - -def listdir_unicode_fallback(path): - """ - This function emulates a fallback Unicode API similar to one available - under Windows or MacOS X. - - If badly encoded filenames are encountered, an exception is raised. - """ - precondition(isinstance(path, unicode), path) - - try: - byte_path = path.encode(filesystem_encoding) - except (UnicodeEncodeError, UnicodeDecodeError): - raise FilenameEncodingError(path) - - try: - return [unicode(fn, filesystem_encoding) for fn in os.listdir(byte_path)] - except UnicodeDecodeError: - raise FilenameEncodingError(fn) - -def listdir_unicode(path): - """ - Wrapper around listdir() which provides safe access to the convenient - Unicode API even under platforms that don't provide one natively. - """ - precondition(isinstance(path, unicode), path) - - # On Windows and MacOS X, the Unicode API is used - # On other platforms (ie. Unix systems), the byte-level API is used - - if is_unicode_platform: - return os.listdir(path) - else: - return listdir_unicode_fallback(path) - -def open_unicode(path, mode): - """ - Wrapper around open() which provides safe access to the convenient Unicode - API even under Unix. - """ - precondition(isinstance(path, unicode), path) - - if is_unicode_platform: - return open(os.path.expanduser(path), mode) - else: - try: - return open(os.path.expanduser(path.encode(filesystem_encoding)), mode) - except UnicodeEncodeError: - raise FilenameEncodingError(path) - -def abspath_expanduser_unicode(path): - precondition(isinstance(path, unicode), path) - - if is_unicode_platform: - return os.path.abspath(os.path.expanduser(path)) - else: - try: - pathstr = path.encode(filesystem_encoding) - return os.path.abspath(os.path.expanduser(pathstr)).decode(filesystem_encoding) - except (UnicodeEncodeError, UnicodeDecodeError): - raise FilenameEncodingError(path) diff --git a/src/allmydata/util/fileutil.py b/src/allmydata/util/fileutil.py index 740e5093..bd9deb43 100644 --- a/src/allmydata/util/fileutil.py +++ b/src/allmydata/util/fileutil.py @@ -208,3 +208,23 @@ def read(path): return rf.read() finally: rf.close() + +def put_file(pathname, inf): + # TODO: create temporary file and move into place? + outf = open_expanduser(pathname, "wb") + try: + while True: + data = inf.read(32768) + if not data: + break + outf.write(data) + finally: + outf.close() + +def open_expanduser(path, mode): + assert isinstance(path, unicode), path + return open(os.path.expanduser(path), mode) + +def abspath_expanduser(path): + assert isinstance(path, unicode), path + return os.path.abspath(os.path.expanduser(path)) diff --git a/src/allmydata/web/common.py b/src/allmydata/web/common.py index f2fb43cb..d65e1867 100644 --- a/src/allmydata/web/common.py +++ b/src/allmydata/web/common.py @@ -12,7 +12,7 @@ from allmydata.interfaces import ExistingChildError, NoSuchChildError, \ MustBeReadonlyError, MustNotBeUnknownRWError from allmydata.mutable.common import UnrecoverableFileError from allmydata.util import abbreviate -from allmydata.util.stringutils import to_str +from allmydata.util.encodingutil import to_str class IOpHandleTable(Interface): pass -- 2.37.2