From 11077ea74de4d59aa58acd2c48cdcd9ef44cdd5c Mon Sep 17 00:00:00 2001
From: david-sarah <david-sarah@jacaranda.org>
Date: Sun, 11 Jul 2010 17:30:15 -0700
Subject: [PATCH] Rename stringutils to encodingutil, and drop listdir_unicode
 and open_unicode (since the Python stdlib functions work fine with Unicode
 paths). Also move some utility functions to fileutil.

---
 src/allmydata/dirnode.py                      |   2 +-
 src/allmydata/scripts/cli.py                  |   2 +-
 src/allmydata/scripts/common.py               |   2 +-
 src/allmydata/scripts/common_http.py          |   2 +-
 src/allmydata/scripts/slow_operation.py       |   2 +-
 src/allmydata/scripts/tahoe_add_alias.py      |   2 +-
 src/allmydata/scripts/tahoe_backup.py         |  11 +-
 src/allmydata/scripts/tahoe_check.py          |   2 +-
 src/allmydata/scripts/tahoe_cp.py             |  35 ++---
 src/allmydata/scripts/tahoe_get.py            |   4 +-
 src/allmydata/scripts/tahoe_ls.py             |   2 +-
 src/allmydata/scripts/tahoe_manifest.py       |   2 +-
 src/allmydata/scripts/tahoe_mkdir.py          |   2 +-
 src/allmydata/scripts/tahoe_mv.py             |   2 +-
 src/allmydata/scripts/tahoe_put.py            |   5 +-
 src/allmydata/test/test_backupdb.py           |   4 +-
 src/allmydata/test/test_cli.py                |  20 +--
 ...st_stringutils.py => test_encodingutil.py} | 138 +-----------------
 .../util/{stringutils.py => encodingutil.py}  |  69 ---------
 src/allmydata/util/fileutil.py                |  20 +++
 src/allmydata/web/common.py                   |   2 +-
 21 files changed, 67 insertions(+), 263 deletions(-)
 rename src/allmydata/test/{test_stringutils.py => test_encodingutil.py} (52%)
 rename src/allmydata/util/{stringutils.py => encodingutil.py} (70%)

diff --git a/src/allmydata/dirnode.py b/src/allmydata/dirnode.py
index 56fd56b2..9d41af19 100644
--- a/src/allmydata/dirnode.py
+++ b/src/allmydata/dirnode.py
@@ -16,7 +16,7 @@ from allmydata.check_results import DeepCheckResults, \
      DeepCheckAndRepairResults
 from allmydata.monitor import Monitor
 from allmydata.util import hashutil, mathutil, base32, log
-from allmydata.util.stringutils import quote_output
+from allmydata.util.encodingutil import quote_output
 from allmydata.util.assertutil import precondition
 from allmydata.util.netstring import netstring, split_netstring
 from allmydata.util.consumer import download_to_data
diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py
index 446c6b5a..833d5d08 100644
--- a/src/allmydata/scripts/cli.py
+++ b/src/allmydata/scripts/cli.py
@@ -1,7 +1,7 @@
 import os.path, re, sys, fnmatch
 from twisted.python import usage
 from allmydata.scripts.common import BaseOptions, get_aliases
-from allmydata.util.stringutils import argv_to_unicode
+from allmydata.util.encodingutil import argv_to_unicode
 
 NODEURL_RE=re.compile("http(s?)://([^:]*)(:([1-9][0-9]*))?")
 
diff --git a/src/allmydata/scripts/common.py b/src/allmydata/scripts/common.py
index 1116e1db..88fdd286 100644
--- a/src/allmydata/scripts/common.py
+++ b/src/allmydata/scripts/common.py
@@ -2,7 +2,7 @@
 import os, sys, urllib
 import codecs
 from twisted.python import usage
-from allmydata.util.stringutils import unicode_to_url, quote_output
+from allmydata.util.encodingutil import unicode_to_url, quote_output
 from allmydata.util.assertutil import precondition
 
 class BaseOptions:
diff --git a/src/allmydata/scripts/common_http.py b/src/allmydata/scripts/common_http.py
index 1cf76d02..9ceb7a79 100644
--- a/src/allmydata/scripts/common_http.py
+++ b/src/allmydata/scripts/common_http.py
@@ -3,7 +3,7 @@ from cStringIO import StringIO
 import urlparse, httplib
 import allmydata # for __full_version__
 
-from allmydata.util.stringutils import quote_output
+from allmydata.util.encodingutil import quote_output
 from allmydata.scripts.common import TahoeError
 
 
diff --git a/src/allmydata/scripts/slow_operation.py b/src/allmydata/scripts/slow_operation.py
index f47c0412..3da511f9 100644
--- a/src/allmydata/scripts/slow_operation.py
+++ b/src/allmydata/scripts/slow_operation.py
@@ -4,7 +4,7 @@ from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
                                      UnknownAliasError
 from allmydata.scripts.common_http import do_http, format_http_error
 from allmydata.util import base32
-from allmydata.util.stringutils import quote_output, is_printable_ascii
+from allmydata.util.encodingutil import quote_output, is_printable_ascii
 import urllib
 import simplejson
 
diff --git a/src/allmydata/scripts/tahoe_add_alias.py b/src/allmydata/scripts/tahoe_add_alias.py
index 984719a4..a3ae1843 100644
--- a/src/allmydata/scripts/tahoe_add_alias.py
+++ b/src/allmydata/scripts/tahoe_add_alias.py
@@ -5,7 +5,7 @@ from allmydata import uri
 from allmydata.scripts.common_http import do_http, check_http_error
 from allmydata.scripts.common import get_aliases
 from allmydata.util.fileutil import move_into_place
-from allmydata.util.stringutils import unicode_to_output, quote_output
+from allmydata.util.encodingutil import unicode_to_output, quote_output
 
 
 def add_line_to_aliasfile(aliasfile, alias, cap):
diff --git a/src/allmydata/scripts/tahoe_backup.py b/src/allmydata/scripts/tahoe_backup.py
index 8b853c9e..2525b2fa 100644
--- a/src/allmydata/scripts/tahoe_backup.py
+++ b/src/allmydata/scripts/tahoe_backup.py
@@ -9,7 +9,8 @@ from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS, \
 from allmydata.scripts.common_http import do_http, HTTPError, format_http_error
 from allmydata.util import time_format
 from allmydata.scripts import backupdb
-from allmydata.util.stringutils import listdir_unicode, open_unicode, quote_output, to_str
+from allmydata.util.encodingutil import quote_output, to_str
+from allmydata.util.fileutil import open_expanduser
 from allmydata.util.assertutil import precondition
 
 
@@ -166,11 +167,15 @@ class BackerUpper:
         compare_contents = {} # childname -> rocap
 
         try:
-            children = listdir_unicode(localpath)
+            children = os.listdir(localpath)
         except EnvironmentError:
             self.directories_skipped += 1
             self.warn("WARNING: permission denied on directory %s" % quote_output(localpath))
             children = []
+        except (UnicodeEncodeError, UnicodeDecodeError):
+            self.directories_skipped += 1
+            self.warn("WARNING: could not list directory %s due to an encoding error" % quote_output(localpath))
+            children = []
 
         for child in self.options.filter_listdir(children):
             assert isinstance(child, unicode), child
@@ -292,7 +297,7 @@ class BackerUpper:
 
         if must_upload:
             self.verboseprint("uploading %s.." % quote_output(childpath))
-            infileobj = open_unicode(childpath, "rb")
+            infileobj = open_expanduser(childpath, "rb")
             url = self.options['node-url'] + "uri"
             resp = do_http("PUT", url, infileobj)
             if resp.status not in (200, 201):
diff --git a/src/allmydata/scripts/tahoe_check.py b/src/allmydata/scripts/tahoe_check.py
index acf70f06..adb0ccc4 100644
--- a/src/allmydata/scripts/tahoe_check.py
+++ b/src/allmydata/scripts/tahoe_check.py
@@ -5,7 +5,7 @@ from twisted.protocols.basic import LineOnlyReceiver
 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
                                      UnknownAliasError
 from allmydata.scripts.common_http import do_http, format_http_error
-from allmydata.util.stringutils import quote_output, quote_path
+from allmydata.util.encodingutil import quote_output, quote_path
 
 class Checker:
     pass
diff --git a/src/allmydata/scripts/tahoe_cp.py b/src/allmydata/scripts/tahoe_cp.py
index 59c35445..76fa95ef 100644
--- a/src/allmydata/scripts/tahoe_cp.py
+++ b/src/allmydata/scripts/tahoe_cp.py
@@ -8,25 +8,12 @@ from allmydata.scripts.common import get_alias, escape_path, \
                                      DefaultAliasMarker, TahoeError
 from allmydata.scripts.common_http import do_http, HTTPError
 from allmydata import uri
-from allmydata.util.stringutils import unicode_to_url, listdir_unicode, open_unicode, \
-    abspath_expanduser_unicode, quote_output, to_str
+from allmydata.util.encodingutil import unicode_to_url, quote_output, to_str
+from allmydata.util import fileutil
+from allmydata.util.fileutil import open_expanduser, abspath_expanduser
 from allmydata.util.assertutil import precondition
 
 
-def _put_local_file(pathname, inf):
-    # TODO: create temporary file and move into place?
-    # TODO: move this to fileutil.
-    outf = open_unicode(pathname, "wb")
-    try:
-        while True:
-            data = inf.read(32768)
-            if not data:
-                break
-            outf.write(data)
-    finally:
-        outf.close()
-
-
 class MissingSourceError(TahoeError):
     def __init__(self, name):
         TahoeError.__init__(self, "No such file or directory %s" % quote_output(name))
@@ -81,7 +68,7 @@ class LocalFileSource:
         return True
 
     def open(self, caps_only):
-        return open_unicode(self.pathname, "rb")
+        return open_expanduser(self.pathname, "rb")
 
 
 class LocalFileTarget:
@@ -90,7 +77,7 @@ class LocalFileTarget:
         self.pathname = pathname
 
     def put_file(self, inf):
-        _put_local_file(self.pathname, inf)
+        fileutil.put_file(self.pathname, inf)
 
 
 class LocalMissingTarget:
@@ -99,7 +86,7 @@ class LocalMissingTarget:
         self.pathname = pathname
 
     def put_file(self, inf):
-        _put_local_file(self.pathname, inf)
+        fileutil.put_file(self.pathname, inf)
 
 
 class LocalDirectorySource:
@@ -114,7 +101,7 @@ class LocalDirectorySource:
         if self.children is not None:
             return
         self.children = {}
-        children = listdir_unicode(self.pathname)
+        children = os.listdir(self.pathname)
         for i,n in enumerate(children):
             self.progressfunc("examining %d of %d" % (i, len(children)))
             pn = os.path.join(self.pathname, n)
@@ -142,7 +129,7 @@ class LocalDirectoryTarget:
         if self.children is not None:
             return
         self.children = {}
-        children = listdir_unicode(self.pathname)
+        children = os.listdir(self.pathname)
         for i,n in enumerate(children):
             self.progressfunc("examining %d of %d" % (i, len(children)))
             n = unicode(n)
@@ -168,7 +155,7 @@ class LocalDirectoryTarget:
     def put_file(self, name, inf):
         precondition(isinstance(name, unicode), name)
         pathname = os.path.join(self.pathname, name)
-        _put_local_file(pathname, inf)
+        fileutil.put_file(pathname, inf)
 
     def set_children(self):
         pass
@@ -525,7 +512,7 @@ class Copier:
         rootcap, path = get_alias(self.aliases, destination_spec, None)
         if rootcap == DefaultAliasMarker:
             # no alias, so this is a local file
-            pathname = abspath_expanduser_unicode(path.decode('utf-8'))
+            pathname = abspath_expanduser(path.decode('utf-8'))
             if not os.path.exists(pathname):
                 t = LocalMissingTarget(pathname)
             elif os.path.isdir(pathname):
@@ -565,7 +552,7 @@ class Copier:
         rootcap, path = get_alias(self.aliases, source_spec, None)
         if rootcap == DefaultAliasMarker:
             # no alias, so this is a local file
-            pathname = abspath_expanduser_unicode(path.decode('utf-8'))
+            pathname = abspath_expanduser(path.decode('utf-8'))
             name = os.path.basename(pathname)
             if not os.path.exists(pathname):
                 raise MissingSourceError(source_spec)
diff --git a/src/allmydata/scripts/tahoe_get.py b/src/allmydata/scripts/tahoe_get.py
index dfc36644..63032ed4 100644
--- a/src/allmydata/scripts/tahoe_get.py
+++ b/src/allmydata/scripts/tahoe_get.py
@@ -3,7 +3,7 @@ import urllib
 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
                                      UnknownAliasError
 from allmydata.scripts.common_http import do_http, format_http_error
-from allmydata.util.stringutils import open_unicode
+from allmydata.util.fileutil import open_expanduser
 
 def get(options):
     nodeurl = options['node-url']
@@ -27,7 +27,7 @@ def get(options):
     resp = do_http("GET", url)
     if resp.status in (200, 201,):
         if to_file:
-            outf = open_unicode(to_file, "wb")
+            outf = open_expanduser(to_file, "wb")
         else:
             outf = stdout
         while True:
diff --git a/src/allmydata/scripts/tahoe_ls.py b/src/allmydata/scripts/tahoe_ls.py
index d7c7ef83..89ef3347 100644
--- a/src/allmydata/scripts/tahoe_ls.py
+++ b/src/allmydata/scripts/tahoe_ls.py
@@ -4,7 +4,7 @@ import simplejson
 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
                                      UnknownAliasError
 from allmydata.scripts.common_http import do_http, format_http_error
-from allmydata.util.stringutils import unicode_to_output, quote_output, is_printable_ascii, to_str
+from allmydata.util.encodingutil import unicode_to_output, quote_output, is_printable_ascii, to_str
 
 def list(options):
     nodeurl = options['node-url']
diff --git a/src/allmydata/scripts/tahoe_manifest.py b/src/allmydata/scripts/tahoe_manifest.py
index db422082..0b9c64f6 100644
--- a/src/allmydata/scripts/tahoe_manifest.py
+++ b/src/allmydata/scripts/tahoe_manifest.py
@@ -6,7 +6,7 @@ from allmydata.scripts.slow_operation import SlowOperationRunner
 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
                                      UnknownAliasError
 from allmydata.scripts.common_http import do_http, format_http_error
-from allmydata.util.stringutils import quote_output, quote_path
+from allmydata.util.encodingutil import quote_output, quote_path
 
 class FakeTransport:
     disconnecting = False
diff --git a/src/allmydata/scripts/tahoe_mkdir.py b/src/allmydata/scripts/tahoe_mkdir.py
index 7ff8a99a..dbcabac6 100644
--- a/src/allmydata/scripts/tahoe_mkdir.py
+++ b/src/allmydata/scripts/tahoe_mkdir.py
@@ -2,7 +2,7 @@
 import urllib
 from allmydata.scripts.common_http import do_http, check_http_error
 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, UnknownAliasError
-from allmydata.util.stringutils import quote_output
+from allmydata.util.encodingutil import quote_output
 
 def mkdir(options):
     nodeurl = options['node-url']
diff --git a/src/allmydata/scripts/tahoe_mv.py b/src/allmydata/scripts/tahoe_mv.py
index d336d804..3be219d6 100644
--- a/src/allmydata/scripts/tahoe_mv.py
+++ b/src/allmydata/scripts/tahoe_mv.py
@@ -5,7 +5,7 @@ import simplejson
 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
                                      UnknownAliasError
 from allmydata.scripts.common_http import do_http, format_http_error
-from allmydata.util.stringutils import to_str
+from allmydata.util.encodingutil import to_str
 
 # this script is used for both 'mv' and 'ln'
 
diff --git a/src/allmydata/scripts/tahoe_put.py b/src/allmydata/scripts/tahoe_put.py
index 82a9e4cc..d646110e 100644
--- a/src/allmydata/scripts/tahoe_put.py
+++ b/src/allmydata/scripts/tahoe_put.py
@@ -4,7 +4,8 @@ import urllib
 from allmydata.scripts.common_http import do_http, format_http_success, format_http_error
 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
                                      UnknownAliasError
-from allmydata.util.stringutils import quote_output, open_unicode
+from allmydata.util.encodingutil import quote_output
+from allmydata.util.fileutil import open_expanduser
 
 def put(options):
     """
@@ -64,7 +65,7 @@ def put(options):
     if mutable:
         url += "?mutable=true"
     if from_file:
-        infileobj = open_unicode(from_file, "rb")
+        infileobj = open_expanduser(from_file, "rb")
     else:
         # do_http() can't use stdin directly: for one thing, we need a
         # Content-Length field. So we currently must copy it.
diff --git a/src/allmydata/test/test_backupdb.py b/src/allmydata/test/test_backupdb.py
index 13c86792..1d6d9734 100644
--- a/src/allmydata/test/test_backupdb.py
+++ b/src/allmydata/test/test_backupdb.py
@@ -4,7 +4,7 @@ from StringIO import StringIO
 from twisted.trial import unittest
 
 from allmydata.util import fileutil
-from allmydata.util.stringutils import listdir_unicode, get_filesystem_encoding, unicode_platform
+from allmydata.util.encodingutil import get_filesystem_encoding, unicode_platform
 from allmydata.util.assertutil import precondition
 from allmydata.scripts import backupdb
 
@@ -249,7 +249,7 @@ class BackupDB(unittest.TestCase):
         self.failUnless(bdb)
 
         self.writeto(u"f\u00f6\u00f6.txt", "foo.txt")
-        files = [fn for fn in listdir_unicode(unicode(basedir)) if fn.endswith(".txt")]
+        files = [fn for fn in os.listdir(unicode(basedir)) if fn.endswith(".txt")]
         self.failUnlessEqual(len(files), 1)
         foo_fn = os.path.join(basedir, files[0])
         #print foo_fn, type(foo_fn)
diff --git a/src/allmydata/test/test_cli.py b/src/allmydata/test/test_cli.py
index ef6090aa..70746b7d 100644
--- a/src/allmydata/test/test_cli.py
+++ b/src/allmydata/test/test_cli.py
@@ -31,8 +31,8 @@ from twisted.internet import threads # CLI tests use deferToThread
 from twisted.python import usage
 
 from allmydata.util.assertutil import precondition
-from allmydata.util.stringutils import listdir_unicode, open_unicode, unicode_platform, \
-    quote_output, get_output_encoding, get_argv_encoding, get_filesystem_encoding, \
+from allmydata.util.encodingutil import unicode_platform, quote_output, \
+    get_output_encoding, get_argv_encoding, get_filesystem_encoding, \
     unicode_to_output, to_str
 
 timeout = 480 # deep_check takes 360s on Zandr's linksys box, others take > 240s
@@ -439,9 +439,9 @@ class CLI(CLITestMixin, unittest.TestCase):
         fileutil.make_dirs(basedir)
 
         for name in filenames:
-            open_unicode(os.path.join(unicode(basedir), name), "wb").close()
+            open(os.path.join(unicode(basedir), name), "wb").close()
 
-        for file in listdir_unicode(unicode(basedir)):
+        for file in os.listdir(unicode(basedir)):
             self.failUnlessIn(normalize(file), filenames)
 
 
@@ -974,11 +974,7 @@ class Put(GridTestMixin, CLITestMixin, unittest.TestCase):
         rel_fn = os.path.join(unicode(self.basedir), u"à trier.txt")
         # we make the file small enough to fit in a LIT file, for speed
         DATA = "short file"
-        f = open_unicode(rel_fn, "wb")
-        try:
-            f.write(DATA)
-        finally:
-            f.close()
+        fileutil.write(rel_fn, DATA)
 
         d = self.do_cli("create-alias", "tahoe")
 
@@ -1349,11 +1345,7 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase):
         self.set_up_grid()
 
         DATA1 = "unicode file content"
-        f = open_unicode(fn1, "wb")
-        try:
-            f.write(DATA1)
-        finally:
-            f.close()
+        fileutil.write(fn1, DATA1)
 
         fn2 = os.path.join(self.basedir, "Metallica")
         DATA2 = "non-unicode file content"
diff --git a/src/allmydata/test/test_stringutils.py b/src/allmydata/test/test_encodingutil.py
similarity index 52%
rename from src/allmydata/test/test_stringutils.py
rename to src/allmydata/test/test_encodingutil.py
index 8013c23d..a287bf41 100644
--- a/src/allmydata/test/test_stringutils.py
+++ b/src/allmydata/test/test_encodingutil.py
@@ -13,9 +13,7 @@ TEST_FILENAMES = (
 # systems.
 
 if __name__ == "__main__":
-    import sys, os
-    import tempfile
-    import shutil
+    import sys
     import platform
 
     if len(sys.argv) != 2:
@@ -31,24 +29,8 @@ if __name__ == "__main__":
     print "    filesystem_encoding = '%s'" % sys.getfilesystemencoding()
     print "    output_encoding = '%s'" % sys.stdout.encoding
     print "    argv_encoding = '%s'" % (sys.platform == "win32" and 'ascii' or sys.stdout.encoding)
-
-    try:
-        tmpdir = tempfile.mkdtemp()
-        for fname in TEST_FILENAMES:
-            open(os.path.join(tmpdir, fname), 'w').close() 
-
-        # Use Unicode API under Windows or MacOS X
-        if sys.platform in ('win32', 'darwin'):
-            dirlist = os.listdir(unicode(tmpdir))
-        else:
-            dirlist = os.listdir(tmpdir)
-
-        print "    dirlist = %s" % repr(dirlist)
-    except:
-        print "    # Oops, I cannot write filenames containing non-ascii characters"
     print
 
-    shutil.rmtree(tmpdir)
     sys.exit(0)
 
 from twisted.trial import unittest
@@ -56,10 +38,8 @@ from mock import patch
 import sys
 
 from allmydata.test.common_util import ReallyEqualMixin
-from allmydata.util.stringutils import argv_to_unicode, unicode_to_url, \
-    unicode_to_output, unicode_platform, listdir_unicode, open_unicode, \
-    FilenameEncodingError, get_output_encoding, _reload
-from allmydata.dirnode import normalize
+from allmydata.util.encodingutil import argv_to_unicode, unicode_to_url, \
+    unicode_to_output, unicode_platform, get_output_encoding, _reload
 
 from twisted.python import usage
 
@@ -102,61 +82,6 @@ class StringUtilsErrors(ReallyEqualMixin, unittest.TestCase):
         _reload()
         self.failUnlessRaises(UnicodeEncodeError, unicode_to_output, lumiere_nfc)
 
-    @patch('os.listdir')
-    def test_no_unicode_normalization(self, mock):
-        # Pretend to run on a Unicode platform.
-        # We normalized to NFC in 1.7beta, but we now don't.
-        orig_platform = sys.platform
-        try:
-            sys.platform = 'darwin'
-            mock.return_value = [Artonwall_nfd]
-            _reload()
-            self.failUnlessReallyEqual(listdir_unicode(u'/dummy'), [Artonwall_nfd])
-        finally:
-            sys.platform = orig_platform
-
-# The following tests applies only to platforms which don't store filenames as
-# Unicode entities on the filesystem.
-class StringUtilsNonUnicodePlatform(unittest.TestCase):
-    def setUp(self):
-        # Mock sys.platform because unicode_platform() uses it
-        self.original_platform = sys.platform
-        sys.platform = 'linux'
-
-    def tearDown(self):
-        sys.platform = self.original_platform
-        _reload()
-
-    @patch('sys.getfilesystemencoding')
-    @patch('os.listdir')
-    def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
-        # What happens if latin1-encoded filenames are encountered on an UTF-8
-        # filesystem?
-        mock_listdir.return_value = [
-            lumiere_nfc.encode('utf-8'),
-            lumiere_nfc.encode('latin1')]
-
-        mock_getfilesystemencoding.return_value = 'utf-8'
-        _reload()
-        self.failUnlessRaises(FilenameEncodingError,
-                              listdir_unicode,
-                              u'/dummy')
-        
-        # We're trying to list a directory whose name cannot be represented in
-        # the filesystem encoding.  This should fail.
-        mock_getfilesystemencoding.return_value = 'ascii'
-        _reload()
-        self.failUnlessRaises(FilenameEncodingError,
-                              listdir_unicode,
-                              u'/' + lumiere_nfc)
-
-    @patch('sys.getfilesystemencoding')
-    def test_open_unicode(self, mock):
-        mock.return_value = 'ascii'
-        _reload()
-        self.failUnlessRaises(FilenameEncodingError,
-                              open_unicode,
-                              lumiere_nfc, 'rb')
 
 class StringUtils(ReallyEqualMixin):
     def setUp(self):
@@ -202,56 +127,6 @@ class StringUtils(ReallyEqualMixin):
         _reload()
         self.failUnlessReallyEqual(unicode_platform(), matrix[self.platform])
  
-    @patch('sys.getfilesystemencoding')
-    @patch('os.listdir')
-    def test_listdir_unicode(self, mock_listdir, mock_getfilesystemencoding):
-        if 'dirlist' not in dir(self):
-            return
-
-        try:
-            u"test".encode(self.filesystem_encoding)
-        except (LookupError, AttributeError):
-            raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding "
-                                    "that we are testing for the benefit of a different platform."
-                                    % (self.filesystem_encoding,))
-
-        mock_listdir.return_value = self.dirlist
-        mock_getfilesystemencoding.return_value = self.filesystem_encoding
-       
-        _reload()
-        filenames = listdir_unicode(u'/dummy')
-
-        self.failUnlessEqual(set([normalize(fname) for fname in filenames]),
-                             set(TEST_FILENAMES))
-
-    @patch('sys.getfilesystemencoding')
-    @patch('__builtin__.open')
-    def test_open_unicode(self, mock_open, mock_getfilesystemencoding):
-        mock_getfilesystemencoding.return_value = self.filesystem_encoding
-        fn = u'/dummy_directory/" + lumiere_nfc + ".txt'
-
-        try:
-            u"test".encode(self.filesystem_encoding)
-        except (LookupError, AttributeError):
-            raise unittest.SkipTest("This platform does not support the '%s' filesystem encoding "
-                                    "that we are testing for the benefit of a different platform."
-                                    % (self.filesystem_encoding,))
-
-        _reload()
-        try:
-            open_unicode(fn, 'rb')
-        except FilenameEncodingError:
-            return
-
-        # Pass Unicode string to open() on Unicode platforms
-        if unicode_platform():
-            mock_open.assert_called_with(fn, 'rb')
-
-        # Pass correctly encoded bytestrings to open() on non-Unicode platforms
-        else:
-            fn_bytestring = fn.encode(self.filesystem_encoding)
-            mock_open.assert_called_with(fn_bytestring, 'rb')
-
 
 class UbuntuKarmicUTF8(StringUtils, unittest.TestCase):
     uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
@@ -261,7 +136,6 @@ class UbuntuKarmicUTF8(StringUtils, unittest.TestCase):
     filesystem_encoding = 'UTF-8'
     output_encoding = 'UTF-8'
     argv_encoding = 'UTF-8'
-    dirlist = ['test_file', '\xc3\x84rtonwall.mp3', 'Blah blah.txt']
 
 class UbuntuKarmicLatin1(StringUtils, unittest.TestCase):
     uname = 'Linux korn 2.6.31-14-generic #48-Ubuntu SMP Fri Oct 16 14:05:01 UTC 2009 x86_64'
@@ -271,7 +145,6 @@ class UbuntuKarmicLatin1(StringUtils, unittest.TestCase):
     filesystem_encoding = 'ISO-8859-1'
     output_encoding = 'ISO-8859-1'
     argv_encoding = 'ISO-8859-1'
-    dirlist = ['test_file', 'Blah blah.txt', '\xc4rtonwall.mp3']
 
 class WindowsXP(StringUtils, unittest.TestCase):
     uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
@@ -280,7 +153,6 @@ class WindowsXP(StringUtils, unittest.TestCase):
     filesystem_encoding = 'mbcs'
     output_encoding = 'cp850'
     argv_encoding = 'ascii'
-    dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
 
 class WindowsXP_UTF8(StringUtils, unittest.TestCase):
     uname = 'Windows XP 5.1.2600 x86 x86 Family 15 Model 75 Step ping 2, AuthenticAMD'
@@ -289,7 +161,6 @@ class WindowsXP_UTF8(StringUtils, unittest.TestCase):
     filesystem_encoding = 'mbcs'
     output_encoding = 'cp65001'
     argv_encoding = 'ascii'
-    dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
 
 class WindowsVista(StringUtils, unittest.TestCase):
     uname = 'Windows Vista 6.0.6000 x86 x86 Family 6 Model 15 Stepping 11, GenuineIntel'
@@ -298,7 +169,6 @@ class WindowsVista(StringUtils, unittest.TestCase):
     filesystem_encoding = 'mbcs'
     output_encoding = 'cp850'
     argv_encoding = 'ascii'
-    dirlist = [u'Blah blah.txt', u'test_file', u'\xc4rtonwall.mp3']
 
 class MacOSXLeopard(StringUtils, unittest.TestCase):
     uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
@@ -308,7 +178,6 @@ class MacOSXLeopard(StringUtils, unittest.TestCase):
     filesystem_encoding = 'utf-8'
     output_encoding = 'UTF-8'
     argv_encoding = 'UTF-8'
-    dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
 
 class MacOSXLeopard7bit(StringUtils, unittest.TestCase):
     uname = 'Darwin g5.local 9.8.0 Darwin Kernel Version 9.8.0: Wed Jul 15 16:57:01 PDT 2009; root:xnu-1228.15.4~1/RELEASE_PPC Power Macintosh powerpc'
@@ -316,7 +185,6 @@ class MacOSXLeopard7bit(StringUtils, unittest.TestCase):
     filesystem_encoding = 'utf-8'
     output_encoding = 'US-ASCII'
     argv_encoding = 'US-ASCII'
-    dirlist = [u'A\u0308rtonwall.mp3', u'Blah blah.txt', u'test_file']
 
 class OpenBSD(StringUtils, unittest.TestCase):
     uname = 'OpenBSD 4.1 GENERIC#187 i386 Intel(R) Celeron(R) CPU 2.80GHz ("GenuineIntel" 686-class)'
diff --git a/src/allmydata/util/stringutils.py b/src/allmydata/util/encodingutil.py
similarity index 70%
rename from src/allmydata/util/stringutils.py
rename to src/allmydata/util/encodingutil.py
index e0983404..61d58fef 100644
--- a/src/allmydata/util/stringutils.py
+++ b/src/allmydata/util/encodingutil.py
@@ -4,7 +4,6 @@ unicode and back.
 """
 
 import sys
-import os
 import re
 from allmydata.util.assertutil import precondition
 from twisted.python import usage
@@ -174,71 +173,3 @@ def unicode_platform():
     Does the current platform handle Unicode filenames natively?
     """
     return is_unicode_platform
-
-class FilenameEncodingError(Exception):
-    """
-    Filename cannot be encoded using the current encoding of your filesystem
-    (%s). Please configure your locale correctly or rename this file.
-    """
-    pass
-
-def listdir_unicode_fallback(path):
-    """
-    This function emulates a fallback Unicode API similar to one available
-    under Windows or MacOS X.
-
-    If badly encoded filenames are encountered, an exception is raised.
-    """
-    precondition(isinstance(path, unicode), path)
-
-    try:
-        byte_path = path.encode(filesystem_encoding)
-    except (UnicodeEncodeError, UnicodeDecodeError):
-        raise FilenameEncodingError(path)
-
-    try:
-        return [unicode(fn, filesystem_encoding) for fn in os.listdir(byte_path)]
-    except UnicodeDecodeError:
-        raise FilenameEncodingError(fn)
-
-def listdir_unicode(path):
-    """
-    Wrapper around listdir() which provides safe access to the convenient
-    Unicode API even under platforms that don't provide one natively.
-    """
-    precondition(isinstance(path, unicode), path)
-
-    # On Windows and MacOS X, the Unicode API is used
-    # On other platforms (ie. Unix systems), the byte-level API is used
-
-    if is_unicode_platform:
-        return os.listdir(path)
-    else:
-        return listdir_unicode_fallback(path)
-
-def open_unicode(path, mode):
-    """
-    Wrapper around open() which provides safe access to the convenient Unicode
-    API even under Unix.
-    """
-    precondition(isinstance(path, unicode), path)
-
-    if is_unicode_platform:
-        return open(os.path.expanduser(path), mode)
-    else:
-        try:
-            return open(os.path.expanduser(path.encode(filesystem_encoding)), mode)
-        except UnicodeEncodeError:
-            raise FilenameEncodingError(path)
-
-def abspath_expanduser_unicode(path):
-    precondition(isinstance(path, unicode), path)
-
-    if is_unicode_platform:
-        return os.path.abspath(os.path.expanduser(path))
-    else:
-        try:
-            pathstr = path.encode(filesystem_encoding)
-            return os.path.abspath(os.path.expanduser(pathstr)).decode(filesystem_encoding)
-        except (UnicodeEncodeError, UnicodeDecodeError):
-            raise FilenameEncodingError(path)
diff --git a/src/allmydata/util/fileutil.py b/src/allmydata/util/fileutil.py
index 740e5093..bd9deb43 100644
--- a/src/allmydata/util/fileutil.py
+++ b/src/allmydata/util/fileutil.py
@@ -208,3 +208,23 @@ def read(path):
         return rf.read()
     finally:
         rf.close()
+
+def put_file(pathname, inf):
+    # TODO: create temporary file and move into place?
+    outf = open_expanduser(pathname, "wb")
+    try:
+        while True:
+            data = inf.read(32768)
+            if not data:
+                break
+            outf.write(data)
+    finally:
+        outf.close()
+
+def open_expanduser(path, mode):
+    assert isinstance(path, unicode), path
+    return open(os.path.expanduser(path), mode)
+
+def abspath_expanduser(path):
+    assert isinstance(path, unicode), path
+    return os.path.abspath(os.path.expanduser(path))
diff --git a/src/allmydata/web/common.py b/src/allmydata/web/common.py
index f2fb43cb..d65e1867 100644
--- a/src/allmydata/web/common.py
+++ b/src/allmydata/web/common.py
@@ -12,7 +12,7 @@ from allmydata.interfaces import ExistingChildError, NoSuchChildError, \
      MustBeReadonlyError, MustNotBeUnknownRWError
 from allmydata.mutable.common import UnrecoverableFileError
 from allmydata.util import abbreviate
-from allmydata.util.stringutils import to_str
+from allmydata.util.encodingutil import to_str
 
 class IOpHandleTable(Interface):
     pass
-- 
2.45.2