]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blobdiff - src/allmydata/scripts/tahoe_cp.py
Avoid double-counting source files in 'tahoe cp --verbose'. fixes #1783
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / scripts / tahoe_cp.py
index 479996a935e0938adc24eac5d72471abf09fd1d3..1ad460d897403bec53ebc147dc1ecc8c95336b05 100644 (file)
@@ -2,42 +2,28 @@
 import os.path
 import urllib
 import simplejson
-import sys
 from cStringIO import StringIO
 from twisted.python.failure import Failure
 from allmydata.scripts.common import get_alias, escape_path, \
-                                     DefaultAliasMarker, UnknownAliasError
-from allmydata.scripts.common_http import do_http
+                                     DefaultAliasMarker, TahoeError
+from allmydata.scripts.common_http import do_http, HTTPError
 from allmydata import uri
-from twisted.python import usage
-from allmydata.util.stringutils import unicode_to_url, listdir_unicode, open_unicode
+from allmydata.util import fileutil
+from allmydata.util.fileutil import abspath_expanduser_unicode
+from allmydata.util.encodingutil import unicode_to_url, listdir_unicode, quote_output, to_str
 from allmydata.util.assertutil import precondition
 
 
-def ascii_or_none(s):
-    if s is None:
-        return s
-    return str(s)
+class MissingSourceError(TahoeError):
+    def __init__(self, name):
+        TahoeError.__init__(self, "No such file or directory %s" % quote_output(name))
 
-class TahoeError(Exception):
-    def __init__(self, msg, resp):
-        self.msg = msg
-        self.status = resp.status
-        self.reason = resp.reason
-        self.body = resp.read()
-
-    def display(self, err):
-        print >>err, "%s: %s %s" % (self.msg, self.status, self.reason)
-        print >>err, self.body
-
-class MissingSourceError(Exception):
-    pass
 
 def GET_to_file(url):
     resp = do_http("GET", url)
     if resp.status == 200:
         return resp
-    raise TahoeError("Error during GET", resp)
+    raise HTTPError("Error during GET", resp)
 
 def GET_to_string(url):
     f = GET_to_file(url)
@@ -47,30 +33,30 @@ def PUT(url, data):
     resp = do_http("PUT", url, data)
     if resp.status in (200, 201):
         return resp.read()
-    raise TahoeError("Error during PUT", resp)
+    raise HTTPError("Error during PUT", resp)
 
 def POST(url, data):
     resp = do_http("POST", url, data)
     if resp.status in (200, 201):
         return resp.read()
-    raise TahoeError("Error during POST", resp)
+    raise HTTPError("Error during POST", resp)
 
 def mkdir(targeturl):
     url = targeturl + "?t=mkdir"
     resp = do_http("POST", url)
     if resp.status in (200, 201):
         return resp.read().strip()
-    raise TahoeError("Error during mkdir", resp)
+    raise HTTPError("Error during mkdir", resp)
 
 def make_tahoe_subdirectory(nodeurl, parent_writecap, name):
     url = nodeurl + "/".join(["uri",
                               urllib.quote(parent_writecap),
-                              urllib.quote(name),
+                              urllib.quote(unicode_to_url(name)),
                               ]) + "?t=mkdir"
     resp = do_http("POST", url)
     if resp.status in (200, 201):
         return resp.read().strip()
-    raise TahoeError("Error during mkdir", resp)
+    raise HTTPError("Error during mkdir", resp)
 
 
 class LocalFileSource:
@@ -82,20 +68,17 @@ class LocalFileSource:
         return True
 
     def open(self, caps_only):
-        return open(self.pathname, "rb")
+        return open(os.path.expanduser(self.pathname), "rb")
+
 
 class LocalFileTarget:
     def __init__(self, pathname):
         precondition(isinstance(pathname, unicode), pathname)
         self.pathname = pathname
+
     def put_file(self, inf):
-        outf = open(self.pathname, "wb")
-        while True:
-            data = inf.read(32768)
-            if not data:
-                break
-            outf.write(data)
-        outf.close()
+        fileutil.put_file(self.pathname, inf)
+
 
 class LocalMissingTarget:
     def __init__(self, pathname):
@@ -103,13 +86,8 @@ class LocalMissingTarget:
         self.pathname = pathname
 
     def put_file(self, inf):
-        outf = open(self.pathname, "wb")
-        while True:
-            data = inf.read(32768)
-            if not data:
-                break
-            outf.write(data)
-        outf.close()
+        fileutil.put_file(self.pathname, inf)
+
 
 class LocalDirectorySource:
     def __init__(self, progressfunc, pathname):
@@ -125,7 +103,7 @@ class LocalDirectorySource:
         self.children = {}
         children = listdir_unicode(self.pathname)
         for i,n in enumerate(children):
-            self.progressfunc("examining %d of %d" % (i, len(children)))
+            self.progressfunc("examining %d of %d" % (i+1, len(children)))
             pn = os.path.join(self.pathname, n)
             if os.path.isdir(pn):
                 child = LocalDirectorySource(self.progressfunc, pn)
@@ -136,6 +114,7 @@ class LocalDirectorySource:
                 self.children[n] = LocalFileSource(pn)
             else:
                 # Could be dangling symlink; probably not copy-able.
+                # TODO: output a warning
                 pass
 
 class LocalDirectoryTarget:
@@ -152,7 +131,8 @@ class LocalDirectoryTarget:
         self.children = {}
         children = listdir_unicode(self.pathname)
         for i,n in enumerate(children):
-            self.progressfunc("examining %d of %d" % (i, len(children)))
+            self.progressfunc("examining %d of %d" % (i+1, len(children)))
+            n = unicode(n)
             pn = os.path.join(self.pathname, n)
             if os.path.isdir(pn):
                 child = LocalDirectoryTarget(self.progressfunc, pn)
@@ -175,13 +155,7 @@ class LocalDirectoryTarget:
     def put_file(self, name, inf):
         precondition(isinstance(name, unicode), name)
         pathname = os.path.join(self.pathname, name)
-        outf = open_unicode(pathname, "wb")
-        while True:
-            data = inf.read(32768)
-            if not data:
-                break
-            outf.write(data)
-        outf.close()
+        fileutil.put_file(pathname, inf)
 
     def set_children(self):
         pass
@@ -240,7 +214,7 @@ class TahoeDirectorySource:
         url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
         resp = do_http("GET", url + "?t=json")
         if resp.status != 200:
-            raise TahoeError("Error examining source directory", resp)
+            raise HTTPError("Error examining source directory", resp)
         parsed = simplejson.loads(resp.read())
         nodetype, d = parsed
         assert nodetype == "dirnode"
@@ -252,8 +226,8 @@ class TahoeDirectorySource:
 
     def init_from_parsed(self, parsed):
         nodetype, d = parsed
-        self.writecap = ascii_or_none(d.get("rw_uri"))
-        self.readcap = ascii_or_none(d.get("ro_uri"))
+        self.writecap = to_str(d.get("rw_uri"))
+        self.readcap = to_str(d.get("ro_uri"))
         self.mutable = d.get("mutable", False) # older nodes don't provide it
         self.children_d = dict( [(unicode(name),value)
                                  for (name,value)
@@ -265,16 +239,16 @@ class TahoeDirectorySource:
             return
         self.children = {}
         for i,(name, data) in enumerate(self.children_d.items()):
-            self.progressfunc("examining %d of %d" % (i, len(self.children_d)))
+            self.progressfunc("examining %d of %d" % (i+1, len(self.children_d)))
             if data[0] == "filenode":
                 mutable = data[1].get("mutable", False)
-                writecap = ascii_or_none(data[1].get("rw_uri"))
-                readcap = ascii_or_none(data[1].get("ro_uri"))
+                writecap = to_str(data[1].get("rw_uri"))
+                readcap = to_str(data[1].get("ro_uri"))
                 self.children[name] = TahoeFileSource(self.nodeurl, mutable,
                                                       writecap, readcap)
             elif data[0] == "dirnode":
-                writecap = ascii_or_none(data[1].get("rw_uri"))
-                readcap = ascii_or_none(data[1].get("ro_uri"))
+                writecap = to_str(data[1].get("rw_uri"))
+                readcap = to_str(data[1].get("ro_uri"))
                 if writecap and writecap in self.cache:
                     child = self.cache[writecap]
                 elif readcap and readcap in self.cache:
@@ -322,8 +296,8 @@ class TahoeDirectoryTarget:
 
     def init_from_parsed(self, parsed):
         nodetype, d = parsed
-        self.writecap = ascii_or_none(d.get("rw_uri"))
-        self.readcap = ascii_or_none(d.get("ro_uri"))
+        self.writecap = to_str(d.get("rw_uri"))
+        self.readcap = to_str(d.get("ro_uri"))
         self.mutable = d.get("mutable", False) # older nodes don't provide it
         self.children_d = dict( [(unicode(name),value)
                                  for (name,value)
@@ -337,7 +311,7 @@ class TahoeDirectoryTarget:
         url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
         resp = do_http("GET", url + "?t=json")
         if resp.status != 200:
-            raise TahoeError("Error examining target directory", resp)
+            raise HTTPError("Error examining target directory", resp)
         parsed = simplejson.loads(resp.read())
         nodetype, d = parsed
         assert nodetype == "dirnode"
@@ -359,11 +333,11 @@ class TahoeDirectoryTarget:
             return
         self.children = {}
         for i,(name, data) in enumerate(self.children_d.items()):
-            self.progressfunc("examining %d of %d" % (i, len(self.children_d)))
+            self.progressfunc("examining %d of %d" % (i+1, len(self.children_d)))
             if data[0] == "filenode":
                 mutable = data[1].get("mutable", False)
-                writecap = ascii_or_none(data[1].get("rw_uri"))
-                readcap = ascii_or_none(data[1].get("ro_uri"))
+                writecap = to_str(data[1].get("rw_uri"))
+                readcap = to_str(data[1].get("ro_uri"))
                 url = None
                 if self.writecap:
                     url = self.nodeurl + "/".join(["uri",
@@ -372,8 +346,8 @@ class TahoeDirectoryTarget:
                 self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
                                                       writecap, readcap, url)
             elif data[0] == "dirnode":
-                writecap = ascii_or_none(data[1].get("rw_uri"))
-                readcap = ascii_or_none(data[1].get("ro_uri"))
+                writecap = to_str(data[1].get("rw_uri"))
+                readcap = to_str(data[1].get("ro_uri"))
                 if writecap and writecap in self.cache:
                     child = self.cache[writecap]
                 elif readcap and readcap in self.cache:
@@ -412,11 +386,20 @@ class TahoeDirectoryTarget:
         url = self.nodeurl + "uri"
         if not hasattr(inf, "seek"):
             inf = inf.read()
-        filecap = PUT(url, inf)
-        # TODO: this always creates immutable files. We might want an option
-        # to always create mutable files, or to copy mutable files into new
-        # mutable files.
-        self.new_children[name] = filecap
+
+        if self.children is None:
+            self.populate(False)
+
+        # Check to see if we already have a mutable file by this name.
+        # If so, overwrite that file in place.
+        if name in self.children and self.children[name].mutable:
+            self.children[name].put_file(inf)
+        else:
+            filecap = PUT(url, inf)
+            # TODO: this always creates immutable files. We might want an option
+            # to always create mutable files, or to copy mutable files into new
+            # mutable files.
+            self.new_children[name] = filecap
 
     def put_uri(self, name, filecap):
         self.new_children[name] = filecap
@@ -468,8 +451,9 @@ class Copier:
             status = self.try_copy()
             return status
         except TahoeError, te:
-            Failure().printTraceback(self.stderr)
-            print >>self.stderr
+            if verbosity >= 2:
+                Failure().printTraceback(self.stderr)
+                print >>self.stderr
             te.display(self.stderr)
             return 1
 
@@ -478,23 +462,12 @@ class Copier:
         destination_spec = self.options.destination
         recursive = self.options["recursive"]
 
-        try:
-            target = self.get_target_info(destination_spec)
-        except UnknownAliasError, e:
-            self.to_stderr("error: %s" % e.args[0])
-            return 1
+        target = self.get_target_info(destination_spec)
 
-        try:
-            sources = [] # list of (name, source object)
-            for ss in source_specs:
-                name, source = self.get_source_info(ss)
-                sources.append( (name, source) )
-        except MissingSourceError, e:
-            self.to_stderr("No such file or directory %s" % e.args[0])
-            return 1
-        except UnknownAliasError, e:
-            self.to_stderr("error: %s" % e.args[0])
-            return 1
+        sources = [] # list of (name, source object)
+        for ss in source_specs:
+            name, source = self.get_source_info(ss)
+            sources.append( (name, source) )
 
         have_source_dirs = bool([s for (name,s) in sources
                                  if isinstance(s, (LocalDirectorySource,
@@ -508,7 +481,7 @@ class Copier:
             # cp STUFF foo.txt, where foo.txt already exists. This limits the
             # possibilities considerably.
             if len(sources) > 1:
-                self.to_stderr("target '%s' is not a directory" % destination_spec)
+                self.to_stderr("target %s is not a directory" % quote_output(destination_spec))
                 return 1
             if have_source_dirs:
                 self.to_stderr("cannot copy directory into a file")
@@ -529,7 +502,7 @@ class Copier:
             return self.copy_file(source, target)
 
         if isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget)):
-            # We're copying to an existing directory -- make sure that we 
+            # We're copying to an existing directory -- make sure that we
             # have target names for everything
             for (name, source) in sources:
                 if name is None and isinstance(source, TahoeFileSource):
@@ -548,7 +521,7 @@ class Copier:
         rootcap, path = get_alias(self.aliases, destination_spec, None)
         if rootcap == DefaultAliasMarker:
             # no alias, so this is a local file
-            pathname = os.path.abspath(os.path.expanduser(path))
+            pathname = abspath_expanduser_unicode(path.decode('utf-8'))
             if not os.path.exists(pathname):
                 t = LocalMissingTarget(pathname)
             elif os.path.isdir(pathname):
@@ -574,21 +547,21 @@ class Copier:
                                              self.progress)
                     t.init_from_parsed(parsed)
                 else:
-                    writecap = ascii_or_none(d.get("rw_uri"))
-                    readcap = ascii_or_none(d.get("ro_uri"))
+                    writecap = to_str(d.get("rw_uri"))
+                    readcap = to_str(d.get("ro_uri"))
                     mutable = d.get("mutable", False)
                     t = TahoeFileTarget(self.nodeurl, mutable,
                                         writecap, readcap, url)
             else:
-                raise TahoeError("Error examining target '%s'"
-                                 % destination_spec, resp)
+                raise HTTPError("Error examining target %s"
+                                 % quote_output(destination_spec), resp)
         return t
 
     def get_source_info(self, source_spec):
         rootcap, path = get_alias(self.aliases, source_spec, None)
         if rootcap == DefaultAliasMarker:
             # no alias, so this is a local file
-            pathname = os.path.abspath(os.path.expanduser(path))
+            pathname = abspath_expanduser_unicode(path.decode('utf-8'))
             name = os.path.basename(pathname)
             if not os.path.exists(pathname):
                 raise MissingSourceError(source_spec)
@@ -612,8 +585,8 @@ class Copier:
             if resp.status == 404:
                 raise MissingSourceError(source_spec)
             elif resp.status != 200:
-                raise TahoeError("Error examining source '%s'" % source_spec,
-                                 resp)
+                raise HTTPError("Error examining source %s" % quote_output(source_spec),
+                                resp)
             parsed = simplejson.loads(resp.read())
             nodetype, d = parsed
             if nodetype == "dirnode":
@@ -621,8 +594,8 @@ class Copier:
                                          self.progress)
                 t.init_from_parsed(parsed)
             else:
-                writecap = ascii_or_none(d.get("rw_uri"))
-                readcap = ascii_or_none(d.get("ro_uri"))
+                writecap = to_str(d.get("rw_uri"))
+                readcap = to_str(d.get("ro_uri"))
                 mutable = d.get("mutable", False) # older nodes don't provide it
                 if source_spec.rfind('/') != -1:
                     name = source_spec[source_spec.rfind('/')+1:]
@@ -632,7 +605,7 @@ class Copier:
 
     def dump_graph(self, s, indent=" "):
         for name, child in s.children.items():
-            print indent + name + ":" + str(child)
+            print "%s%s: %r" % (indent, quote_output(name), child)
             if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)):
                 self.dump_graph(child, indent+"  ")
 
@@ -679,7 +652,6 @@ class Copier:
 
         for (name,s) in source_files:
             self.attach_to_target(s, name, target)
-            self.files_to_copy += 1
 
         for source in source_dirs:
             self.assign_targets(source, target)