CLI: implement the easy part of cp (no -r, only two arguments)

author Brian Warner <warner@allmydata.com>

Tue, 20 May 2008 23:56:03 +0000 (16:56 -0700)

committer Brian Warner <warner@allmydata.com>

Tue, 20 May 2008 23:56:03 +0000 (16:56 -0700)
author Brian Warner <warner@allmydata.com>
Tue, 20 May 2008 23:56:03 +0000 (16:56 -0700)
committer Brian Warner <warner@allmydata.com>
Tue, 20 May 2008 23:56:03 +0000 (16:56 -0700)
diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py

index 9d0ea68f36ff2a29e57deb7a4203f52c63ec2954..c5751e2a034592182946905fd8748e31915e6300 100644 (file)
--- a/src/allmydata/scripts/cli.py
+++ b/src/allmydata/scripts/cli.py
@@ -122,6 +122,16 @@ class PutOptions(VDriveOptions):
      contents from the local filesystem). LOCAL_FILE is required to be a
      local file (it can't be stdin)."""
  
+class CpOptions(VDriveOptions):
+    optFlags = [
+        ("recursive", "r", "Copy source directory recursively."),
+        ]
+    def parseArgs(self, *args):
+        if len(args) < 2:
+            raise usage.UsageError("cp requires at least two arguments")
+        self.sources = args[:-1]
+        self.destination = args[-1]
+
  class RmOptions(VDriveOptions):
      def parseArgs(self, where):
          self.where = where
@@ -161,6 +171,7 @@ subCommands = [
      ["ls", None, ListOptions, "List a directory"],
      ["get", None, GetOptions, "Retrieve a file from the virtual drive."],
      ["put", None, PutOptions, "Upload a file into the virtual drive."],
+    ["cp", None, CpOptions, "Copy one or more files."],
      ["rm", None, RmOptions, "Unlink a file or directory in the virtual drive."],
      ["mv", None, MvOptions, "Move a file within the virtual drive."],
      ["ln", None, LnOptions, "Make an additional link to an existing file."],
@@ -232,6 +243,21 @@ def put(config, stdout, stderr, stdin=sys.stdin):
                         stdin, stdout, stderr)
      return rc
  
+def cp(config, stdout, stderr):
+    from allmydata.scripts import tahoe_cp
+    if config['quiet']:
+        verbosity = 0
+    else:
+        verbosity = 2
+    rc = tahoe_cp.copy(config['node-url'],
+                       config,
+                       config.aliases,
+                       config.sources,
+                       config.destination,
+                       verbosity,
+                       stdout, stderr)
+    return rc
+
  def rm(config, stdout, stderr):
      from allmydata.scripts import tahoe_rm
      if config['quiet']:
@@ -287,6 +313,7 @@ dispatch = {
      "ls": list,
      "get": get,
      "put": put,
+    "cp": cp,
      "rm": rm,
      "mv": mv,
      "ln": ln,
diff --git a/src/allmydata/scripts/common.py b/src/allmydata/scripts/common.py

index 44e8950afa2c5813a82080c31a7b6b229fdbf9c0..2fa146174310bde3d6cee385cf0c44f7ec152138 100644 (file)
--- a/src/allmydata/scripts/common.py
+++ b/src/allmydata/scripts/common.py
@@ -91,9 +91,14 @@ def get_aliases(nodedir):
          pass
      return aliases
  
+class DefaultAliasMarker:
+    pass
+
  def get_alias(aliases, path, default):
-    # transform "work:path/filename" into (aliases["work"], "path/filename")
-    # We special-case URI:
+    # transform "work:path/filename" into (aliases["work"], "path/filename").
+    # If default=None, then an empty alias is indicated by returning
+    # DefaultAliasMarker. We special-case "URI:" to make it easy to access
+    # specific files/directories by their read-cap.
      if path.startswith("URI:"):
          # The only way to get a sub-path is to use URI:blah:./foo, and we
          # strip out the :./ sequence.
@@ -104,11 +109,15 @@ def get_alias(aliases, path, default):
      colon = path.find(":")
      if colon == -1:
          # no alias
+        if default == None:
+            return DefaultAliasMarker, path
          return aliases[default], path
      alias = path[:colon]
      if "/" in alias:
          # no alias, but there's a colon in a dirname/filename, like
          # "foo/bar:7"
+        if default == None:
+            return DefaultAliasMarker, path
          return aliases[default], path
      return aliases[alias], path[colon+1:]
  
diff --git a/src/allmydata/scripts/tahoe_cp.py b/src/allmydata/scripts/tahoe_cp.py

new file mode 100644 (file)

index 0000000..b904b17
--- /dev/null
+++ b/src/allmydata/scripts/tahoe_cp.py
@@ -0,0 +1,145 @@
+
+import os.path
+import urllib
+import simplejson
+from allmydata.scripts.common import get_alias, escape_path, DefaultAliasMarker
+from allmydata.scripts.common_http import do_http
+
+def ascii_or_none(s):
+    if s is None:
+        return s
+    return str(s)
+
+def get_info(nodeurl, aliases, target):
+    rootcap, path = get_alias(aliases, target, None)
+    if rootcap == DefaultAliasMarker:
+        # this is a local file
+        pathname = os.path.abspath(os.path.expanduser(path))
+        if not os.path.exists(pathname):
+            return ("empty", "local", pathname)
+        if os.path.isdir(pathname):
+            return ("directory", "local", pathname)
+        else:
+            assert os.path.isfile(pathname)
+            return ("file", "local", pathname)
+    else:
+        # this is a tahoe object
+        url = nodeurl + "uri/%s" % urllib.quote(rootcap)
+        if path:
+            url += "/" + escape_path(path)
+        resp = do_http("GET", url + "?t=json")
+        if resp.status == 404:
+            # doesn't exist yet
+            return ("empty", "tahoe", False, None, None, url)
+        parsed = simplejson.loads(resp.read())
+        nodetype, d = parsed
+        mutable = d.get("mutable", False) # older nodes don't provide 'mutable'
+        rw_uri = ascii_or_none(d.get("rw_uri"))
+        ro_uri = ascii_or_none(d.get("ro_uri"))
+        if nodetype == "dirnode":
+            return ("directory", "tahoe", mutable, rw_uri, ro_uri, url)
+        else:
+            return ("file", "tahoe", mutable, rw_uri, ro_uri, url)
+
+def copy(nodeurl, config, aliases, sources, destination,
+         verbosity, stdout, stderr):
+    if nodeurl[-1] != "/":
+        nodeurl += "/"
+    recursive = config["recursive"]
+
+    #print "sources:", sources
+    #print "dest:", destination
+
+    target = get_info(nodeurl, aliases, destination)
+    #print target
+
+    source_info = dict([(get_info(nodeurl, aliases, source), source)
+                        for source in sources])
+    source_files = [s for s in source_info if s[0] == "file"]
+    source_dirs = [s for s in source_info if s[0] == "directory"]
+    empty_sources = [s for s in source_info if s[0] == "empty"]
+    if empty_sources:
+        for s in empty_sources:
+            print >>stderr, "no such file or directory %s" % source_info[s]
+        return 1
+
+    #print "source_files", " ".join([source_info[s] for s in source_files])
+    #print "source_dirs", " ".join([source_info[s] for s in source_dirs])
+
+    if source_dirs and not recursive:
+        print >>stderr, "cannot copy directories without --recursive"
+        return 1
+
+    if target[0] == "file":
+        # cp STUFF foo.txt, where foo.txt already exists. This limits the
+        # possibilities considerably.
+        if len(sources) > 1:
+            print >>stderr, "target '%s' is not a directory" % destination
+            return 1
+        if source_dirs:
+            print >>stderr, "cannot copy directory into a file"
+            return 1
+        return copy_to_file(source_files[0], target)
+
+    if target[0] == "empty":
+        if recursive:
+            return copy_to_directory(source_files, source_dirs, target)
+        if len(sources) > 1:
+            # if we have -r, we'll auto-create the target directory. Without
+            # it, we'll only create a file.
+            print >>stderr, "cannot copy multiple files into a file without -r"
+            return 1
+        # cp file1 newfile
+        return copy_to_file(source_files[0], target)
+
+    if target[0] == "directory":
+        return copy_to_directory(source_files, source_dirs, target)
+
+    print >>stderr, "unknown target"
+    return 1
+
+
+def get_file_data(source):
+    assert source[0] == "file"
+    if source[1] == "local":
+        return open(source[2], "rb").read()
+    return do_http("GET", source[-1]).read()
+
+class WriteError(Exception):
+    pass
+
+def check_PUT(resp):
+    if resp.status in (200, 201):
+        return True
+    raise WriteError("Error during PUT: %s %s %s" % (resp.status, resp.reason,
+                                                     resp.read()))
+
+def put_file_data(data, target):
+    if target[1] == "local":
+        open(target[2], "wb").write(data)
+        return True
+    resp = do_http("PUT", target[-1], data)
+    return check_PUT(resp)
+
+def put_uri(uri, target):
+    resp = do_http("PUT", target[-1] + "?t=uri", uri)
+    return check_PUT(resp)
+
+def copy_to_file(source, target):
+    assert source[0] == "file"
+    # do we need to copy bytes?
+    if source[1] == "local" or source[2] == True or target[1] == "local":
+        # yes
+        data = get_file_data(source)
+        put_file_data(data, target)
+        return
+    # no, we're getting data from an immutable source, and we're copying into
+    # the tahoe grid, so we can just copy the URI.
+    uri = source[3] or source[4] # prefer rw_uri, fall back to ro_uri
+    # TODO: if the original was mutable, and we're creating the target,
+    # should be we create a mutable file to match? At the moment we always
+    # create immutable files.
+    put_uri(uri, target)
+
+def copy_to_directory(source_files, source_dirs, target):
+    NotImplementedError
diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py

index ef1899687becee17413371a420a651474e696fb1..fbe341133f52a155db6a94e0da5adada20cb3a80 100644 (file)
--- a/src/allmydata/test/test_system.py
+++ b/src/allmydata/test/test_system.py
@@ -1603,6 +1603,10 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, testutil.StallMixin,
              datas.append(data)
              open(fn,"wb").write(data)
  
+        def _check_stdout_against((out,err), filenum):
+            self.failUnlessEqual(err, "")
+            self.failUnlessEqual(out, datas[filenum])
+
          # test all both forms of put: from a file, and from stdin
          #  tahoe put bar FOO
          d.addCallback(run, "put", files[0], "tahoe-file0")
@@ -1621,6 +1625,8 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, testutil.StallMixin,
          def _check_put_mutable((out,err)):
              self._mutable_file3_uri = out.strip()
          d.addCallback(_check_put_mutable)
+        d.addCallback(run, "get", "tahoe:file3")
+        d.addCallback(_check_stdout_against, 3)
  
          def _put_from_stdin(res, data, *args):
              args = nodeargs + list(args)
@@ -1659,11 +1665,9 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, testutil.StallMixin,
  
          # tahoe get: (to stdin and to a file)
          d.addCallback(run, "get", "tahoe-file0")
-        d.addCallback(lambda (out,err):
-                      self.failUnlessEqual(out, "data to be uploaded: file0\n"))
+        d.addCallback(_check_stdout_against, 0)
          d.addCallback(run, "get", "tahoe:subdir/tahoe-file1")
-        d.addCallback(lambda (out,err):
-                      self.failUnlessEqual(out, "data to be uploaded: file1\n"))
+        d.addCallback(_check_stdout_against, 1)
          outfile0 = os.path.join(self.basedir, "outfile0")
          d.addCallback(run, "get", "file2", outfile0)
          def _check_outfile0((out,err)):
@@ -1721,6 +1725,59 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, testutil.StallMixin,
          d.addCallback(run, "ls")
          d.addCallback(_check_ls, ["tahoe-moved", "newlink"])
  
+        d.addCallback(run, "cp", "tahoe:file3", "tahoe:file3-copy")
+        d.addCallback(run, "ls")
+        d.addCallback(_check_ls, ["file3", "file3-copy"])
+        d.addCallback(run, "get", "tahoe:file3-copy")
+        d.addCallback(_check_stdout_against, 3)
+
+        # copy from disk into tahoe
+        d.addCallback(run, "cp", files[4], "tahoe:file4")
+        d.addCallback(run, "ls")
+        d.addCallback(_check_ls, ["file3", "file3-copy", "file4"])
+        d.addCallback(run, "get", "tahoe:file4")
+        d.addCallback(_check_stdout_against, 4)
+
+        # copy from tahoe into disk
+        target_filename = os.path.join(self.basedir, "file-out")
+        d.addCallback(run, "cp", "tahoe:file4", target_filename)
+        def _check_cp_out((out,err)):
+            self.failUnless(os.path.exists(target_filename))
+            got = open(target_filename,"rb").read()
+            self.failUnlessEqual(got, datas[4])
+        d.addCallback(_check_cp_out)
+
+        # copy from disk to disk (silly case)
+        target2_filename = os.path.join(self.basedir, "file-out-copy")
+        d.addCallback(run, "cp", target_filename, target2_filename)
+        def _check_cp_out2((out,err)):
+            self.failUnless(os.path.exists(target2_filename))
+            got = open(target2_filename,"rb").read()
+            self.failUnlessEqual(got, datas[4])
+        d.addCallback(_check_cp_out2)
+
+        # copy from tahoe into disk, overwriting an existing file
+        d.addCallback(run, "cp", "tahoe:file3", target_filename)
+        def _check_cp_out3((out,err)):
+            self.failUnless(os.path.exists(target_filename))
+            got = open(target_filename,"rb").read()
+            self.failUnlessEqual(got, datas[3])
+        d.addCallback(_check_cp_out3)
+
+        # copy from disk into tahoe, overwriting an existing immutable file
+        d.addCallback(run, "cp", files[5], "tahoe:file4")
+        d.addCallback(run, "ls")
+        d.addCallback(_check_ls, ["file3", "file3-copy", "file4"])
+        d.addCallback(run, "get", "tahoe:file4")
+        d.addCallback(_check_stdout_against, 5)
+
+        # copy from disk into tahoe, overwriting an existing mutable file
+        d.addCallback(run, "cp", files[5], "tahoe:file3")
+        d.addCallback(run, "ls")
+        d.addCallback(_check_ls, ["file3", "file3-copy", "file4"])
+        d.addCallback(run, "get", "tahoe:file3")
+        d.addCallback(_check_stdout_against, 5)
+
          # tahoe_ls doesn't currently handle the error correctly: it tries to
          # JSON-parse a traceback.
  ##         def _ls_missing(res):
author	Brian Warner <warner@allmydata.com>
	Tue, 20 May 2008 23:56:03 +0000 (16:56 -0700)
committer	Brian Warner <warner@allmydata.com>
	Tue, 20 May 2008 23:56:03 +0000 (16:56 -0700)
src/allmydata/scripts/cli.py		patch \| blob \| history
src/allmydata/scripts/common.py		patch \| blob \| history
src/allmydata/scripts/tahoe_cp.py	[new file with mode: 0644]	patch \| blob
src/allmydata/test/test_system.py		patch \| blob \| history