]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blobdiff - src/allmydata/scripts/tahoe_cp.py
tahoe cp: ignore trailing slash on source arguments
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / scripts / tahoe_cp.py
index 661272afd3a9315ad713f3a0826df9d7edd98f08..a5cfe16a1a25b7b65e9ca9d9856256457b7da9a4 100644 (file)
@@ -2,27 +2,31 @@
 import os.path
 import urllib
 import simplejson
-from allmydata.scripts.common import get_alias, escape_path, DefaultAliasMarker
-from allmydata.scripts.common_http import do_http
+from collections import defaultdict
+from cStringIO import StringIO
+from twisted.python.failure import Failure
+from allmydata.scripts.common import get_alias, escape_path, \
+                                     DefaultAliasMarker, TahoeError
+from allmydata.scripts.common_http import do_http, HTTPError
 from allmydata import uri
+from allmydata.util import fileutil
+from allmydata.util.fileutil import abspath_expanduser_unicode, precondition_abspath
+from allmydata.util.encodingutil import unicode_to_url, listdir_unicode, quote_output, \
+    quote_local_unicode_path, to_str
+from allmydata.util.assertutil import precondition, _assert
 
-def ascii_or_none(s):
-    if s is None:
-        return s
-    return str(s)
 
-class WriteError(Exception):
-    pass
-class ReadError(Exception):
-    pass
+class MissingSourceError(TahoeError):
+    def __init__(self, name, quotefn=quote_output):
+        TahoeError.__init__(self, "No such file or directory %s" % quotefn(name))
+
 
 def GET_to_file(url):
     resp = do_http("GET", url)
     if resp.status == 200:
         return resp
-    raise ReadError("Error during GET: %s %s %s" % (resp.status,
-                                                    resp.reason,
-                                                    resp.read()))
+    raise HTTPError("Error during GET", resp)
+
 def GET_to_string(url):
     f = GET_to_file(url)
     return f.read()
@@ -31,117 +35,161 @@ def PUT(url, data):
     resp = do_http("PUT", url, data)
     if resp.status in (200, 201):
         return resp.read()
-    raise WriteError("Error during PUT: %s %s %s" % (resp.status, resp.reason,
-                                                     resp.read()))
+    raise HTTPError("Error during PUT", resp)
+
+def POST(url, data):
+    resp = do_http("POST", url, data)
+    if resp.status in (200, 201):
+        return resp.read()
+    raise HTTPError("Error during POST", resp)
 
 def mkdir(targeturl):
-    resp = do_http("POST", targeturl)
+    url = targeturl + "?t=mkdir"
+    resp = do_http("POST", url)
     if resp.status in (200, 201):
         return resp.read().strip()
-    raise WriteError("Error during mkdir: %s %s %s" % (resp.status, resp.reason,
-                                                       resp.read()))
+    raise HTTPError("Error during mkdir", resp)
 
 def make_tahoe_subdirectory(nodeurl, parent_writecap, name):
     url = nodeurl + "/".join(["uri",
                               urllib.quote(parent_writecap),
-                              urllib.quote(name),
+                              urllib.quote(unicode_to_url(name)),
                               ]) + "?t=mkdir"
     resp = do_http("POST", url)
     if resp.status in (200, 201):
         return resp.read().strip()
-    raise WriteError("Error during mkdir: %s %s %s" % (resp.status, resp.reason,
-                                                       resp.read()))
+    raise HTTPError("Error during mkdir", resp)
 
 
 class LocalFileSource:
-    def __init__(self, pathname):
+    def __init__(self, pathname, basename):
+        precondition_abspath(pathname)
         self.pathname = pathname
+        self._basename = basename
+
+    def basename(self):
+        return self._basename
 
     def need_to_copy_bytes(self):
         return True
 
-    def open(self):
+    def open(self, caps_only):
         return open(self.pathname, "rb")
 
 class LocalFileTarget:
     def __init__(self, pathname):
+        precondition_abspath(pathname)
         self.pathname = pathname
 
+    def put_file(self, inf):
+        fileutil.put_file(self.pathname, inf)
+
+class LocalMissingTarget:
+    def __init__(self, pathname):
+        precondition_abspath(pathname)
+        self.pathname = pathname
+
+    def put_file(self, inf):
+        fileutil.put_file(self.pathname, inf)
+
 class LocalDirectorySource:
-    def __init__(self, progressfunc, pathname):
+    def __init__(self, progressfunc, pathname, basename):
+        precondition_abspath(pathname)
+
         self.progressfunc = progressfunc
         self.pathname = pathname
         self.children = None
+        self._basename = basename
+
+    def basename(self):
+        return self._basename
 
     def populate(self, recurse):
-        children = os.listdir(self.pathname)
+        if self.children is not None:
+            return
+        self.children = {}
+        children = listdir_unicode(self.pathname)
         for i,n in enumerate(children):
-            self.progressfunc("examining %d of %d" % (i, len(children)))
+            self.progressfunc("examining %d of %d" % (i+1, len(children)))
             pn = os.path.join(self.pathname, n)
             if os.path.isdir(pn):
-                child = LocalDirectorySource(self.progressfunc, pn)
+                child = LocalDirectorySource(self.progressfunc, pn, n)
                 self.children[n] = child
                 if recurse:
-                    child.populate(True)
+                    child.populate(recurse=True)
+            elif os.path.isfile(pn):
+                self.children[n] = LocalFileSource(pn, n)
             else:
-                assert os.path.isfile(pn)
-                self.children[n] = LocalFileSource(pn)
+                # Could be dangling symlink; probably not copy-able.
+                # TODO: output a warning
+                pass
 
 class LocalDirectoryTarget:
     def __init__(self, progressfunc, pathname):
+        precondition_abspath(pathname)
+
         self.progressfunc = progressfunc
         self.pathname = pathname
         self.children = None
 
     def populate(self, recurse):
-        children = os.listdir(self.pathname)
+        if self.children is not None:
+            return
+        self.children = {}
+        children = listdir_unicode(self.pathname)
         for i,n in enumerate(children):
-            self.progressfunc("examining %d of %d" % (i, len(children)))
+            self.progressfunc("examining %d of %d" % (i+1, len(children)))
             pn = os.path.join(self.pathname, n)
             if os.path.isdir(pn):
                 child = LocalDirectoryTarget(self.progressfunc, pn)
                 self.children[n] = child
                 if recurse:
-                    child.populate(True)
+                    child.populate(recurse=True)
             else:
                 assert os.path.isfile(pn)
                 self.children[n] = LocalFileTarget(pn)
 
     def get_child_target(self, name):
+        precondition(isinstance(name, unicode), name)
+        precondition(len(name), name) # don't want ""
         if self.children is None:
-            self.populate(False)
+            self.populate(recurse=False)
         if name in self.children:
             return self.children[name]
         pathname = os.path.join(self.pathname, name)
         os.makedirs(pathname)
-        return LocalDirectoryTarget(self.progressfunc, pathname)
+        child = LocalDirectoryTarget(self.progressfunc, pathname)
+        self.children[name] = child
+        return child
 
     def put_file(self, name, inf):
+        precondition(isinstance(name, unicode), name)
         pathname = os.path.join(self.pathname, name)
-        outf = open(pathname, "wb")
-        while True:
-            data = inf.read(32768)
-            if not data:
-                break
-            outf.write(data)
-        outf.close()
+        fileutil.put_file(pathname, inf)
 
     def set_children(self):
         pass
 
+
 class TahoeFileSource:
-    def __init__(self, nodeurl, mutable, writecap, readcap):
+    def __init__(self, nodeurl, mutable, writecap, readcap, basename):
         self.nodeurl = nodeurl
         self.mutable = mutable
         self.writecap = writecap
         self.readcap = readcap
+        self._basename = basename # unicode, or None for raw filecaps
+
+    def basename(self):
+        return self._basename
 
     def need_to_copy_bytes(self):
         if self.mutable:
             return True
         return False
 
-    def open(self):
+    def open(self, caps_only):
+        if caps_only:
+            return StringIO(self.readcap)
         url = self.nodeurl + "uri/" + urllib.quote(self.readcap)
         return GET_to_file(url)
 
@@ -149,17 +197,34 @@ class TahoeFileSource:
         return self.writecap or self.readcap
 
 class TahoeFileTarget:
-    def __init__(self, nodeurl, mutable, writecap, readcap):
+    def __init__(self, nodeurl, mutable, writecap, readcap, url):
         self.nodeurl = nodeurl
         self.mutable = mutable
         self.writecap = writecap
         self.readcap = readcap
+        self.url = url
+
+    def put_file(self, inf):
+        # We want to replace this object in-place.
+        assert self.url
+        # our do_http() call currently requires a string or a filehandle with
+        # a real .seek
+        if not hasattr(inf, "seek"):
+            inf = inf.read()
+        PUT(self.url, inf)
+        # TODO: this always creates immutable files. We might want an option
+        # to always create mutable files, or to copy mutable files into new
+        # mutable files. ticket #835
 
 class TahoeDirectorySource:
-    def __init__(self, nodeurl, cache, progressfunc):
+    def __init__(self, nodeurl, cache, progressfunc, basename):
         self.nodeurl = nodeurl
         self.cache = cache
         self.progressfunc = progressfunc
+        self._basename = basename # unicode, or None for raw dircaps
+
+    def basename(self):
+        return self._basename
 
     def init_from_grid(self, writecap, readcap):
         self.writecap = writecap
@@ -167,43 +232,79 @@ class TahoeDirectorySource:
         bestcap = writecap or readcap
         url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
         resp = do_http("GET", url + "?t=json")
-        assert resp.status == 200
+        if resp.status != 200:
+            raise HTTPError("Error examining source directory", resp)
         parsed = simplejson.loads(resp.read())
         nodetype, d = parsed
         assert nodetype == "dirnode"
         self.mutable = d.get("mutable", False) # older nodes don't provide it
-        self.children_d = d["children"]
+        self.children_d = dict( [(unicode(name),value)
+                                 for (name,value)
+                                 in d["children"].iteritems()] )
+        self.children = None
+
+    def init_from_parsed(self, parsed):
+        nodetype, d = parsed
+        self.writecap = to_str(d.get("rw_uri"))
+        self.readcap = to_str(d.get("ro_uri"))
+        self.mutable = d.get("mutable", False) # older nodes don't provide it
+        self.children_d = dict( [(unicode(name),value)
+                                 for (name,value)
+                                 in d["children"].iteritems()] )
         self.children = None
 
     def populate(self, recurse):
+        if self.children is not None:
+            return
         self.children = {}
-        for i,(name, data) in enumerate(self.children_d):
-            self.progressfunc("examining %d of %d" % (i, len(self.children_d)))
+        for i,(name, data) in enumerate(self.children_d.items()):
+            self.progressfunc("examining %d of %d" % (i+1, len(self.children_d)))
             if data[0] == "filenode":
                 mutable = data[1].get("mutable", False)
-                writecap = ascii_or_none(data[1].get("rw_uri"))
-                readcap = ascii_or_none(data[1].get("ro_uri"))
+                writecap = to_str(data[1].get("rw_uri"))
+                readcap = to_str(data[1].get("ro_uri"))
                 self.children[name] = TahoeFileSource(self.nodeurl, mutable,
-                                                      writecap, readcap)
-            else:
-                assert data[0] == "dirnode"
-                writecap = ascii_or_none(data[1].get("rw_uri"))
-                readcap = ascii_or_none(data[1].get("ro_uri"))
+                                                      writecap, readcap, name)
+            elif data[0] == "dirnode":
+                writecap = to_str(data[1].get("rw_uri"))
+                readcap = to_str(data[1].get("ro_uri"))
                 if writecap and writecap in self.cache:
                     child = self.cache[writecap]
                 elif readcap and readcap in self.cache:
                     child = self.cache[readcap]
                 else:
                     child = TahoeDirectorySource(self.nodeurl, self.cache,
-                                                 self.progressfunc)
+                                                 self.progressfunc, name)
                     child.init_from_grid(writecap, readcap)
                     if writecap:
                         self.cache[writecap] = child
                     if readcap:
                         self.cache[readcap] = child
                     if recurse:
-                        child.populate(True)
+                        child.populate(recurse=True)
                 self.children[name] = child
+            else:
+                # TODO: there should be an option to skip unknown nodes.
+                raise TahoeError("Cannot copy unknown nodes (ticket #839). "
+                                 "You probably need to use a later version of "
+                                 "Tahoe-LAFS to copy this directory.")
+
+class TahoeMissingTarget:
+    def __init__(self, url):
+        self.url = url
+
+    def put_file(self, inf):
+        # We want to replace this object in-place.
+        if not hasattr(inf, "seek"):
+            inf = inf.read()
+        PUT(self.url, inf)
+        # TODO: this always creates immutable files. We might want an option
+        # to always create mutable files, or to copy mutable files into new
+        # mutable files.
+
+    def put_uri(self, filecap):
+        # I'm not sure this will always work
+        return PUT(self.url + "?t=uri", filecap)
 
 class TahoeDirectoryTarget:
     def __init__(self, nodeurl, cache, progressfunc):
@@ -212,41 +313,61 @@ class TahoeDirectoryTarget:
         self.progressfunc = progressfunc
         self.new_children = {}
 
+    def init_from_parsed(self, parsed):
+        nodetype, d = parsed
+        self.writecap = to_str(d.get("rw_uri"))
+        self.readcap = to_str(d.get("ro_uri"))
+        self.mutable = d.get("mutable", False) # older nodes don't provide it
+        self.children_d = dict( [(unicode(name),value)
+                                 for (name,value)
+                                 in d["children"].iteritems()] )
+        self.children = None
+
     def init_from_grid(self, writecap, readcap):
         self.writecap = writecap
         self.readcap = readcap
         bestcap = writecap or readcap
         url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
         resp = do_http("GET", url + "?t=json")
-        assert resp.status == 200
+        if resp.status != 200:
+            raise HTTPError("Error examining target directory", resp)
         parsed = simplejson.loads(resp.read())
         nodetype, d = parsed
         assert nodetype == "dirnode"
         self.mutable = d.get("mutable", False) # older nodes don't provide it
-        self.children_d = d["children"]
+        self.children_d = dict( [(unicode(name),value)
+                                 for (name,value)
+                                 in d["children"].iteritems()] )
         self.children = None
 
     def just_created(self, writecap):
+        # TODO: maybe integrate this with the constructor
         self.writecap = writecap
-        self.readcap = uri.from_string().get_readonly().to_string()
+        self.readcap = uri.from_string(writecap).get_readonly().to_string()
         self.mutable = True
         self.children_d = {}
         self.children = {}
 
     def populate(self, recurse):
+        if self.children is not None:
+            return
         self.children = {}
-        for i,(name, data) in enumerate(self.children_d):
-            self.progressfunc("examining %d of %d" % (i, len(self.children_d)))
+        for i,(name, data) in enumerate(self.children_d.items()):
+            self.progressfunc("examining %d of %d" % (i+1, len(self.children_d)))
             if data[0] == "filenode":
                 mutable = data[1].get("mutable", False)
-                writecap = ascii_or_none(data[1].get("rw_uri"))
-                readcap = ascii_or_none(data[1].get("ro_uri"))
+                writecap = to_str(data[1].get("rw_uri"))
+                readcap = to_str(data[1].get("ro_uri"))
+                url = None
+                if self.writecap:
+                    url = self.nodeurl + "/".join(["uri",
+                                                   urllib.quote(self.writecap),
+                                                   urllib.quote(unicode_to_url(name))])
                 self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
-                                                      writecap, readcap)
-            else:
-                assert data[0] == "dirnode"
-                writecap = ascii_or_none(data[1].get("rw_uri"))
-                readcap = ascii_or_none(data[1].get("ro_uri"))
+                                                      writecap, readcap, url)
+            elif data[0] == "dirnode":
+                writecap = to_str(data[1].get("rw_uri"))
+                readcap = to_str(data[1].get("ro_uri"))
                 if writecap and writecap in self.cache:
                     child = self.cache[writecap]
                 elif readcap and readcap in self.cache:
@@ -260,13 +381,19 @@ class TahoeDirectoryTarget:
                     if readcap:
                         self.cache[readcap] = child
                     if recurse:
-                        child.populate(True)
+                        child.populate(recurse=True)
                 self.children[name] = child
+            else:
+                # TODO: there should be an option to skip unknown nodes.
+                raise TahoeError("Cannot copy unknown nodes (ticket #839). "
+                                 "You probably need to use a later version of "
+                                 "Tahoe-LAFS to copy this directory.")
 
     def get_child_target(self, name):
         # return a new target for a named subdirectory of this dir
+        precondition(isinstance(name, unicode), name)
         if self.children is None:
-            self.populate(False)
+            self.populate(recurse=False)
         if name in self.children:
             return self.children[name]
         writecap = make_tahoe_subdirectory(self.nodeurl, self.writecap, name)
@@ -277,293 +404,412 @@ class TahoeDirectoryTarget:
         return child
 
     def put_file(self, name, inf):
+        precondition(isinstance(name, unicode), name)
         url = self.nodeurl + "uri"
-        # I'm not sure this will work: we might not have .seek, so if not:
-        #inf = inf.read()
+        if not hasattr(inf, "seek"):
+            inf = inf.read()
 
-        # TODO: this always creates immutable files. We might want an option
-        # to always create mutable files, or to copy mutable files into new
-        # mutable files.
-        resp = do_http("PUT", url, inf)
-        filecap = check_PUT(resp)
-        self.new_children[name] = filecap
+        if self.children is None:
+            self.populate(recurse=False)
+
+        # Check to see if we already have a mutable file by this name.
+        # If so, overwrite that file in place.
+        if name in self.children and self.children[name].mutable:
+            self.children[name].put_file(inf)
+        else:
+            filecap = PUT(url, inf)
+            # TODO: this always creates immutable files. We might want an option
+            # to always create mutable files, or to copy mutable files into new
+            # mutable files.
+            self.new_children[name] = filecap
 
     def put_uri(self, name, filecap):
+        precondition(isinstance(name, unicode), name)
         self.new_children[name] = filecap
 
     def set_children(self):
         if not self.new_children:
             return
-        # XXX TODO t=set_children
+        url = (self.nodeurl + "uri/" + urllib.quote(self.writecap)
+               + "?t=set_children")
+        set_data = {}
+        for (name, filecap) in self.new_children.items():
+            # it just so happens that ?t=set_children will accept both file
+            # read-caps and write-caps as ['rw_uri'], and will handle either
+            # correctly. So don't bother trying to figure out whether the one
+            # we have is read-only or read-write.
+            # TODO: think about how this affects forward-compatibility for
+            # unknown caps
+            set_data[name] = ["filenode", {"rw_uri": filecap}]
+        body = simplejson.dumps(set_data)
+        POST(url, body)
+
+FileSources = (LocalFileSource, TahoeFileSource)
+DirectorySources = (LocalDirectorySource, TahoeDirectorySource)
+FileTargets = (LocalFileTarget, TahoeFileTarget)
+DirectoryTargets = (LocalDirectoryTarget, TahoeDirectoryTarget)
+MissingTargets = (LocalMissingTarget, TahoeMissingTarget)
 
 class Copier:
-    def __init__(self, nodeurl, config, aliases,
-                 verbosity, stdout, stderr,
-                 progressfunc=None):
+
+    def do_copy(self, options, progressfunc=None):
+        if options['quiet']:
+            verbosity = 0
+        elif options['verbose']:
+            verbosity = 2
+        else:
+            verbosity = 1
+
+        nodeurl = options['node-url']
         if nodeurl[-1] != "/":
             nodeurl += "/"
         self.nodeurl = nodeurl
         self.progressfunc = progressfunc
-        self.config = config
-        self.aliases = aliases
+        self.options = options
+        self.aliases = options.aliases
         self.verbosity = verbosity
-        self.stdout = stdout
-        self.stderr = stderr
-
-    def to_stderr(self, text):
-        print >>self.stderr, text
-
-    def do_copy(self, sources, destination):
-        recursive = self.config["recursive"]
-
-        #print "sources:", sources
-        #print "dest:", destination
-
-        target = self.get_info(destination)
-        #print target
+        self.stdout = options.stdout
+        self.stderr = options.stderr
+        if verbosity >= 2 and not self.progressfunc:
+            def progress(message):
+                print >>self.stderr, message
+            self.progressfunc = progress
+        self.caps_only = options["caps-only"]
+        self.cache = {}
+        try:
+            status = self.try_copy()
+            return status
+        except TahoeError, te:
+            if verbosity >= 2:
+                Failure().printTraceback(self.stderr)
+                print >>self.stderr
+            te.display(self.stderr)
+            return 1
 
-        source_info = dict([(self.get_info(source), source)
-                            for source in sources])
-        source_files = [s for s in source_info if s[0] == "file"]
-        source_dirs = [s for s in source_info if s[0] == "directory"]
-        empty_sources = [s for s in source_info if s[0] == "empty"]
-        if empty_sources:
-            for s in empty_sources:
-                self.to_stderr("no such file or directory %s" % source_info[s])
+    def try_copy(self):
+        """
+        All usage errors are caught here, not in a subroutine. This bottoms
+        out in copy_file_to_file() or copy_things_to_directory().
+        """
+        source_specs = self.options.sources
+        destination_spec = self.options.destination
+        recursive = self.options["recursive"]
+
+        target = self.get_target_info(destination_spec)
+        precondition(isinstance(target, FileTargets + DirectoryTargets + MissingTargets), target)
+        target_has_trailing_slash = destination_spec.endswith("/")
+
+        sources = [] # list of source objects
+        for ss in source_specs:
+            si = self.get_source_info(ss)
+            precondition(isinstance(si, FileSources + DirectorySources), si)
+            sources.append(si)
+
+        # if any source is a directory, must use -r
+        # if target is missing:
+        #    if source is a single file, target will be a file
+        #    else target will be a directory, so mkdir it
+        # if there are multiple sources, target must be a dir
+        # if target is a file, source must be a single file
+        # if target is directory, sources must be named or a dir
+
+        have_source_dirs = any([isinstance(s, DirectorySources)
+                                for s in sources])
+        if have_source_dirs and not recursive:
+            # 'cp dir target' without -r: error
+            self.to_stderr("cannot copy directories without --recursive")
             return 1
+        del recursive # -r is only used for signalling errors
+
+        if isinstance(target, FileTargets):
+            target_is_file = True
+        elif isinstance(target, DirectoryTargets):
+            target_is_file = False
+        else: # isinstance(target, MissingTargets)
+            if len(sources) == 1 and isinstance(sources[0], FileSources):
+                target_is_file = True
+            else:
+                target_is_file = False
 
-        #print "source_files", " ".join([source_info[s] for s in source_files])
-        #print "source_dirs", " ".join([source_info[s] for s in source_dirs])
+        if target_is_file and target_has_trailing_slash:
+            self.to_stderr("target is not a directory, but ends with a slash")
+            return 1
 
-        if source_dirs and not recursive:
-            self.to_stderr("cannot copy directories without --recursive")
+        if len(sources) > 1 and target_is_file:
+            self.to_stderr("copying multiple things requires target be a directory")
             return 1
 
-        if target[0] == "file":
-            # cp STUFF foo.txt, where foo.txt already exists. This limits the
-            # possibilities considerably.
-            if len(sources) > 1:
-                self.to_stderr("target '%s' is not a directory" % destination)
-                return 1
-            if source_dirs:
+        if target_is_file:
+            _assert(len(sources) == 1, sources)
+            if not isinstance(sources[0], FileSources):
+                # 'cp -r dir existingfile': error
                 self.to_stderr("cannot copy directory into a file")
                 return 1
-            return self.copy_to_file(source_files[0], target)
-
-        if target[0] == "empty":
-            if recursive:
-                return self.copy_to_directory(source_files, source_dirs, target)
-            if len(sources) > 1:
-                # if we have -r, we'll auto-create the target directory. Without
-                # it, we'll only create a file.
-                self.to_stderr("cannot copy multiple files into a file without -r")
+            return self.copy_file_to_file(sources[0], target)
+
+        # else target is a directory, so each source must be one of:
+        # * a named file (copied to a new file under the target)
+        # * a named directory (causes a new directory of the same name to be
+        #   created under the target, then the contents of the source are
+        #   copied into that directory)
+        # * an unnamed directory (the contents of the source are copied into
+        #   the target, without a new directory being made)
+        #
+        # If any source is an unnamed file, throw an error, since we have no
+        # way to name the output file.
+        _assert(isinstance(target, DirectoryTargets + MissingTargets), target)
+
+        for source in sources:
+            if isinstance(source, FileSources) and source.basename() is None:
+                self.to_stderr("when copying into a directory, all source files must have names, but %s is unnamed" % quote_output(source_specs[0]))
                 return 1
-            # cp file1 newfile
-            return self.copy_to_file(source_files[0], target)
+        return self.copy_things_to_directory(sources, target)
 
-        if target[0] == "directory":
-            return self.copy_to_directory(source_files, source_dirs, target)
+    def to_stderr(self, text):
+        print >>self.stderr, text
 
-        self.to_stderr("unknown target")
-        return 1
+    # FIXME reduce the amount of near-duplicate code between get_target_info
+    # and get_source_info.
 
-    def get_info(self, target):
-        rootcap, path = get_alias(self.aliases, target, None)
+    def get_target_info(self, destination_spec):
+        precondition(isinstance(destination_spec, unicode), destination_spec)
+        rootcap, path_utf8 = get_alias(self.aliases, destination_spec, None)
+        path = path_utf8.decode("utf-8")
         if rootcap == DefaultAliasMarker:
-            # this is a local file
-            pathname = os.path.abspath(os.path.expanduser(path))
+            # no alias, so this is a local file
+            pathname = abspath_expanduser_unicode(path)
             if not os.path.exists(pathname):
-                name = os.path.basename(pathname)
-                return ("empty", "local", name, pathname)
+                t = LocalMissingTarget(pathname)
+            elif os.path.isdir(pathname):
+                t = LocalDirectoryTarget(self.progress, pathname)
+            else:
+                # TODO: should this be _assert? what happens if the target is
+                # a special file?
+                assert os.path.isfile(pathname), pathname
+                t = LocalFileTarget(pathname) # non-empty
+        else:
+            # this is a tahoe object
+            url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
+            if path:
+                url += "/" + escape_path(path)
+
+            resp = do_http("GET", url + "?t=json")
+            if resp.status == 404:
+                # doesn't exist yet
+                t = TahoeMissingTarget(url)
+            elif resp.status == 200:
+                parsed = simplejson.loads(resp.read())
+                nodetype, d = parsed
+                if nodetype == "dirnode":
+                    t = TahoeDirectoryTarget(self.nodeurl, self.cache,
+                                             self.progress)
+                    t.init_from_parsed(parsed)
+                else:
+                    writecap = to_str(d.get("rw_uri"))
+                    readcap = to_str(d.get("ro_uri"))
+                    mutable = d.get("mutable", False)
+                    t = TahoeFileTarget(self.nodeurl, mutable,
+                                        writecap, readcap, url)
+            else:
+                raise HTTPError("Error examining target %s"
+                                 % quote_output(destination_spec), resp)
+        return t
+
+    def get_source_info(self, source_spec):
+        """
+        This turns an argv string into a (Local|Tahoe)(File|Directory)Source.
+        """
+        precondition(isinstance(source_spec, unicode), source_spec)
+        rootcap, path_utf8 = get_alias(self.aliases, source_spec, None)
+        path = path_utf8.decode("utf-8")
+        if rootcap == DefaultAliasMarker:
+            # no alias, so this is a local file
+            pathname = abspath_expanduser_unicode(path)
+            name = os.path.basename(pathname)
+            if not os.path.exists(pathname):
+                raise MissingSourceError(source_spec, quotefn=quote_local_unicode_path)
             if os.path.isdir(pathname):
-                return ("directory", "local", pathname)
+                t = LocalDirectorySource(self.progress, pathname, name)
             else:
                 assert os.path.isfile(pathname)
-                name = os.path.basename(pathname)
-                return ("file", "local", name, pathname)
+                t = LocalFileSource(pathname, name) # non-empty
         else:
             # this is a tahoe object
             url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
             name = None
             if path:
+                if path.endswith("/"):
+                    path = path[:-1]
                 url += "/" + escape_path(path)
-                last_slash = path.rfind("/")
+                last_slash = path.rfind(u"/")
                 name = path
-                if last_slash:
+                if last_slash != -1:
                     name = path[last_slash+1:]
-            return self.get_info_tahoe_dirnode(url, name)
-
-    def get_info_tahoe_dirnode(self, url, name):
-        resp = do_http("GET", url + "?t=json")
-        if resp.status == 404:
-            # doesn't exist yet
-            return ("empty", "tahoe", False, name, None, None, url)
-        parsed = simplejson.loads(resp.read())
-        nodetype, d = parsed
-        mutable = d.get("mutable", False) # older nodes don't provide 'mutable'
-        rw_uri = ascii_or_none(d.get("rw_uri"))
-        ro_uri = ascii_or_none(d.get("ro_uri"))
-        if nodetype == "dirnode":
-            return ("directory", "tahoe", mutable, name, rw_uri, ro_uri,
-                    d["children"], url)
-        else:
-            return ("file", "tahoe", mutable, name, rw_uri, ro_uri, url)
-
-
-    def get_file_data(self, source):
-        assert source[0] == "file"
-        if source[1] == "local":
-            (ig1, ig2, name, pathname) = source
-            return open(pathname, "rb").read()
-        (ig1, ig2, mutable, name, writecap, readcap, url) = source
-        return GET_to_string(url)
-
-    def put_file_data(self, data, target):
-        assert target[0] in ("file", "empty")
-        if target[1] == "local":
-            (ig1, ig2, name, pathname) = target
-            open(pathname, "wb").write(data)
-            return True
-        (ig1, ig2, mutable, name, writecap, readcap, url) = target
-        return PUT(url, data)
 
-    def put_uri(self, uri, targeturl):
-        return PUT(targeturl + "?t=uri", uri)
-
-    def upload_data(self, data):
-        url = self.nodeurl + "uri"
-        return PUT(url, data)
-
-    def copy_to_file(self, source, target):
-        assert source[0] == "file"
-        # do we need to copy bytes?
-        if source[1] == "local" or source[2] == True or target[1] == "local":
-            # yes
-            data = self.get_file_data(source)
-            self.put_file_data(data, target)
-            return
-        # no, we're getting data from an immutable source, and we're copying
-        # into the tahoe grid, so we can just copy the URI.
-        uri = source[3] or source[4] # prefer rw_uri, fall back to ro_uri
-        # TODO: if the original was mutable, and we're creating the target,
-        # should be we create a mutable file to match? At the moment we always
-        # create immutable files.
-        self.put_uri(uri, target[-1])
-
-    def copy_to_directory(self, source_file_infos, source_dir_infos,
-                          target_info):
-        # step one: build a graph of the source tree. This returns a dictionary,
-        # with child names as keys, and values that are either Directory or File
-        # instances (local or tahoe).
-        source_dirs = self.build_graphs(source_dir_infos)
-
-        # step two: create the top-level target directory object
-        assert target_info[0] in ("empty", "directory")
-        if target_info[1] == "local":
-            pathname = target_info[-1]
-            if not os.path.exists(pathname):
-                os.makedirs(pathname)
-            assert os.path.isdir(pathname)
-            target = LocalDirectoryTarget(self.progressfunc, target_info[-1])
-        else:
-            assert target_info[1] == "tahoe"
-            target = TahoeDirectoryTarget(self.nodeurl, self.cache,
-                                          self.progressfunc)
-            if target_info[0] == "empty":
-                writecap = mkdir(target_info[-1])
-                target.just_created(writecap)
+            resp = do_http("GET", url + "?t=json")
+            if resp.status == 404:
+                raise MissingSourceError(source_spec)
+            elif resp.status != 200:
+                raise HTTPError("Error examining source %s" % quote_output(source_spec),
+                                resp)
+            parsed = simplejson.loads(resp.read())
+            nodetype, d = parsed
+            if nodetype == "dirnode":
+                t = TahoeDirectorySource(self.nodeurl, self.cache,
+                                         self.progress, name)
+                t.init_from_parsed(parsed)
             else:
-                (ig1, ig2, mutable, name, writecap, readcap, url) = target_info
-                target.init_from_grid(writecap, readcap)
-
-        # step three: find a target for each source node, creating
-        # directories as necessary. 'targetmap' is a dictionary that uses
-        # target Directory instances as keys, and has values of
-        # (name->sourceobject) dicts for all the files that need to wind up
-        # there.
+                writecap = to_str(d.get("rw_uri"))
+                readcap = to_str(d.get("ro_uri"))
+                mutable = d.get("mutable", False) # older nodes don't provide it
+                t = TahoeFileSource(self.nodeurl, mutable, writecap, readcap, name)
+        return t
 
-        # sources are all LocalFile/LocalDirectory/TahoeFile/TahoeDirectory
-        # target is LocalDirectory/TahoeDirectory
 
-        self.targetmap = {}
-        self.files_to_copy = 0
+    def need_to_copy_bytes(self, source, target):
+        if source.need_to_copy_bytes:
+            # mutable tahoe files, and local files
+            return True
+        if isinstance(target, (LocalFileTarget, LocalDirectoryTarget)):
+            return True
+        return False
 
-        for source in source_file_infos:
-            if source[1] == "local":
-                (ig1, ig2, name, pathname) = source
-                s = LocalFileSource(pathname)
-            else:
-                assert source[1] == "tahoe"
-                (ig1, ig2, mutable, name, writecap, readcap, url) = source
-                s = TahoeFileSource(self.nodeurl, mutable,
-                                    writecap, readcap)
-            self.attach_to_target(s, name, target)
-            self.files_to_copy += 1
+    def announce_success(self, msg):
+        if self.verbosity >= 1:
+            print >>self.stdout, "Success: %s" % msg
+        return 0
 
-        for source in source_dirs:
-            self.assign_targets(source, target)
+    def copy_file_to_file(self, source, target):
+        precondition(isinstance(source, FileSources), source)
+        precondition(isinstance(target, FileTargets + MissingTargets), target)
+        if self.need_to_copy_bytes(source, target):
+            # if the target is a local directory, this will just write the
+            # bytes to disk. If it is a tahoe directory, it will upload the
+            # data, and stash the new filecap for a later set_children call.
+            f = source.open(self.caps_only)
+            target.put_file(f)
+            return self.announce_success("file copied")
+        # otherwise we're copying tahoe to tahoe, and using immutable files,
+        # so we can just make a link. TODO: this probably won't always work:
+        # need to enumerate the cases and analyze them.
+        target.put_uri(source.bestcap())
+        return self.announce_success("file linked")
+
+    def copy_things_to_directory(self, sources, target):
+        # step one: if the target is missing, we should mkdir it
+        target = self.maybe_create_target(target)
+        target.populate(recurse=False)
+
+        # step two: scan any source dirs, recursively, to find children
+        for s in sources:
+            if isinstance(s, DirectorySources):
+                s.populate(recurse=True)
+            if isinstance(s, FileSources):
+                # each source must have a name, or be a directory
+                _assert(s.basename() is not None, s)
 
-        self.progress("starting copy, %d files, %d directories" %
-                      (self.files_to_copy, len(self.targets)))
-        self.files_copied = 0
-        self.targets_finished = 0
+        # step three: find a target for each source node, creating
+        # directories as necessary. 'targetmap' is a dictionary that uses
+        # target Directory instances as keys, and has values of (name:
+        # sourceobject) dicts for all the files that need to wind up there.
+        targetmap = self.build_targetmap(sources, target)
 
         # step four: walk through the list of targets. For each one, copy all
         # the files. If the target is a TahoeDirectory, upload and create
         # read-caps, then do a set_children to the target directory.
+        self.copy_to_targetmap(targetmap)
 
-        for target in self.targets:
-            self.copy_files(self.targets[target], target)
-            self.targets_finished += 1
-            self.progress("%d/%d directories" %
-                          (self.targets_finished, len(self.targets)))
+        return self.announce_success("files copied")
 
-    def attach_to_target(self, source, name, target):
-        if target not in self.targets:
-            self.targets[target] = {}
-        self.targets[target][name] = source
-        self.files_to_copy += 1
+    def maybe_create_target(self, target):
+        if isinstance(target, LocalMissingTarget):
+            os.makedirs(target.pathname)
+            target = LocalDirectoryTarget(self.progress, target.pathname)
+        elif isinstance(target, TahoeMissingTarget):
+            writecap = mkdir(target.url)
+            target = TahoeDirectoryTarget(self.nodeurl, self.cache,
+                                          self.progress)
+            target.just_created(writecap)
+        # afterwards, or otherwise, it will be a directory
+        precondition(isinstance(target, DirectoryTargets), target)
+        return target
+
+    def build_targetmap(self, sources, target):
+        num_source_files = len([s for s in sources
+                                if isinstance(s, FileSources)])
+        num_source_dirs = len([s for s in sources
+                               if isinstance(s, DirectorySources)])
+        self.progress("attaching sources to targets, "
+                      "%d files / %d dirs in root" %
+                      (num_source_files, num_source_dirs))
+
+        # this maps each target directory to a list of source files that need
+        # to be copied into it. All source files have names.
+        targetmap = defaultdict(list)
+
+        for s in sources:
+            if isinstance(s, FileSources):
+                targetmap[target].append(s)
+            else:
+                _assert(isinstance(s, DirectorySources), s)
+                name = s.basename()
+                if name is not None:
+                    # named sources get a new directory. see #2329
+                    new_target = target.get_child_target(name)
+                else:
+                    # unnamed sources have their contents copied directly
+                    new_target = target
+                self.assign_targets(targetmap, s, new_target)
 
-    def assign_targets(self, source, target):
-        # copy everything in s to the target
-        assert isinstance(source, (LocalDirectorySource, TahoeDirectorySource))
+        self.progress("targets assigned, %s dirs, %s files" %
+                      (len(targetmap), self.count_files_to_copy(targetmap)))
+        return targetmap
 
+    def assign_targets(self, targetmap, source, target):
+        # copy everything in the source into the target
+        precondition(isinstance(source, DirectorySources), source)
         for name, child in source.children.items():
-            if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)):
+            if isinstance(child, DirectorySources):
                 # we will need a target directory for this one
                 subtarget = target.get_child_target(name)
-                self.assign_targets(source, subtarget)
+                self.assign_targets(targetmap, child, subtarget)
             else:
-                assert isinstance(child, (LocalFileSource, TahoeFileSource))
-                self.attach_to_target(source, name, target)
-
+                _assert(isinstance(child, FileSources), child)
+                targetmap[target].append(child)
 
+    def copy_to_targetmap(self, targetmap):
+        files_to_copy = self.count_files_to_copy(targetmap)
+        self.progress("starting copy, %d files, %d directories" %
+                      (files_to_copy, len(targetmap)))
+        files_copied = 0
+        targets_finished = 0
+
+        for target, sources in targetmap.items():
+            _assert(isinstance(target, DirectoryTargets), target)
+            for source in sources:
+                _assert(isinstance(source, FileSources), source)
+                self.copy_file_into_dir(source, source.basename(), target)
+                files_copied += 1
+                self.progress("%d/%d files, %d/%d directories" %
+                              (files_copied, files_to_copy,
+                               targets_finished, len(targetmap)))
+            target.set_children()
+            targets_finished += 1
+            self.progress("%d/%d directories" %
+                          (targets_finished, len(targetmap)))
 
-    def copy_files(self, targetmap, target):
-        for name, source in targetmap.items():
-            assert isinstance(source, (LocalFileSource, TahoeFileSource))
-            self.copy_file(source, name, target)
-            self.files_copied += 1
-            self.progress("%d/%d files, %d/%d directories" %
-                          (self.files_copied, self.files_to_copy,
-                           self.targets_finished, len(self.targets)))
-        target.set_children()
-
-    def need_to_copy_bytes(self, source, target):
-        if source.need_to_copy_bytes:
-            # mutable tahoe files, and local files
-            return True
-        if isinstance(target, LocalDirectoryTarget):
-            return True
-        return False
+    def count_files_to_copy(self, targetmap):
+        return sum([len(sources) for sources in targetmap.values()])
 
-    def copy_file(self, source, name, target):
-        assert isinstance(source, (LocalFileSource, TahoeFileSource))
+    def copy_file_into_dir(self, source, name, target):
+        precondition(isinstance(source, FileSources), source)
+        precondition(isinstance(target, DirectoryTargets), target)
+        precondition(isinstance(name, unicode), name)
         if self.need_to_copy_bytes(source, target):
             # if the target is a local directory, this will just write the
             # bytes to disk. If it is a tahoe directory, it will upload the
             # data, and stash the new filecap for a later set_children call.
-            f = source.open()
+            f = source.open(self.caps_only)
             target.put_file(name, f)
             return
         # otherwise we're copying tahoe to tahoe, and using immutable files,
@@ -572,29 +818,30 @@ class Copier:
 
 
     def progress(self, message):
-        print message
+        #print message
         if self.progressfunc:
             self.progressfunc(message)
 
-    def build_graphs(self, sources):
-        cache = {}
-        graphs = []
-        for source in sources:
-            assert source[0] == "directory"
-            if source[1] == "local":
-                root = LocalDirectorySource(self.progress, source[-1])
-                root.populate(True)
-            else:
-                assert source[1] == "tahoe"
-                (ig1, ig2, mutable, name, writecap, readcap, url) = source
-                root = TahoeDirectorySource(self.nodeurl, cache, self.progress)
-                root.init_from_grid(writecap, readcap)
-                root.populate(True)
-            graphs.append(root)
-        return graphs
-
-
-def copy(nodeurl, config, aliases, sources, destination,
-         verbosity, stdout, stderr):
-    c = Copier(nodeurl, config, aliases, verbosity, stdout, stderr)
-    return c.do_copy(sources, destination)
+
+def copy(options):
+    return Copier().do_copy(options)
+
+# error cases that need improvement:
+#  local-file-in-the-way
+#   touch proposed
+#   tahoe cp -r my:docs/proposed/denver.txt proposed/denver.txt
+#  handling of unknown nodes
+
+# things that maybe should be errors but aren't
+#  local-dir-in-the-way
+#   mkdir denver.txt
+#   tahoe cp -r my:docs/proposed/denver.txt denver.txt
+#   (creates denver.txt/denver.txt)
+
+# error cases that look good:
+#  tahoe cp -r my:docs/missing missing
+#  disconnect servers
+#   tahoe cp -r my:docs/missing missing  -> No JSON object could be decoded
+#  tahoe-file-in-the-way (when we want to make a directory)
+#   tahoe put README my:docs
+#   tahoe cp -r docs/proposed my:docs/proposed