From 448278e807e489e44f539e4eb5993fa991349895 Mon Sep 17 00:00:00 2001
From: Kevan Carstensen <kevan@isnotajoke.com>
Date: Fri, 29 Jul 2011 13:20:39 -0700
Subject: [PATCH] cli: make 'tahoe cp' overwrite mutable files in-place

---
 src/allmydata/scripts/tahoe_cp.py |  19 +-
 src/allmydata/test/test_cli.py    | 351 ++++++++++++++++++++++++++++++
 2 files changed, 365 insertions(+), 5 deletions(-)

diff --git a/src/allmydata/scripts/tahoe_cp.py b/src/allmydata/scripts/tahoe_cp.py
index 9f910042..694bf353 100644
--- a/src/allmydata/scripts/tahoe_cp.py
+++ b/src/allmydata/scripts/tahoe_cp.py
@@ -386,11 +386,20 @@ class TahoeDirectoryTarget:
         url = self.nodeurl + "uri"
         if not hasattr(inf, "seek"):
             inf = inf.read()
-        filecap = PUT(url, inf)
-        # TODO: this always creates immutable files. We might want an option
-        # to always create mutable files, or to copy mutable files into new
-        # mutable files.
-        self.new_children[name] = filecap
+
+        if self.children is None:
+            self.populate(False)
+
+        # Check to see if we already have a mutable file by this name.
+        # If so, overwrite that file in place.
+        if name in self.children and self.children[name].mutable:
+            self.children[name].put_file(inf)
+        else:
+            filecap = PUT(url, inf)
+            # TODO: this always creates immutable files. We might want an option
+            # to always create mutable files, or to copy mutable files into new
+            # mutable files.
+            self.new_children[name] = filecap
 
     def put_uri(self, name, filecap):
         self.new_children[name] = filecap
diff --git a/src/allmydata/test/test_cli.py b/src/allmydata/test/test_cli.py
index 33d00ba1..4fd4bfab 100644
--- a/src/allmydata/test/test_cli.py
+++ b/src/allmydata/test/test_cli.py
@@ -1625,6 +1625,357 @@ class Cp(GridTestMixin, CLITestMixin, unittest.TestCase):
 
         return d
 
+    def test_cp_replaces_mutable_file_contents(self):
+        self.basedir = "cli/Cp/cp_replaces_mutable_file_contents"
+        self.set_up_grid()
+
+        # Write a test file, which we'll copy to the grid.
+        test_txt_path = os.path.join(self.basedir, "test.txt")
+        test_txt_contents = "foo bar baz"
+        f = open(test_txt_path, "w")
+        f.write(test_txt_contents)
+        f.close()
+
+        d = self.do_cli("create-alias", "tahoe")
+        d.addCallback(lambda ignored:
+            self.do_cli("mkdir", "tahoe:test"))
+        # We have to use 'tahoe put' here because 'tahoe cp' doesn't
+        # know how to make mutable files at the destination.
+        d.addCallback(lambda ignored:
+            self.do_cli("put", "--mutable", test_txt_path, "tahoe:test/test.txt"))
+        d.addCallback(lambda ignored:
+            self.do_cli("get", "tahoe:test/test.txt"))
+        def _check((rc, out, err)):
+            self.failUnlessEqual(rc, 0)
+            self.failUnlessEqual(out, test_txt_contents)
+        d.addCallback(_check)
+
+        # We'll do ls --json to get the read uri and write uri for the
+        # file we've just uploaded.
+        d.addCallback(lambda ignored:
+            self.do_cli("ls", "--json", "tahoe:test/test.txt"))
+        def _get_test_txt_uris((rc, out, err)):
+            self.failUnlessEqual(rc, 0)
+            filetype, data = simplejson.loads(out)
+
+            self.failUnlessEqual(filetype, "filenode")
+            self.failUnless(data['mutable'])
+
+            self.failUnlessIn("rw_uri", data)
+            self.rw_uri = data["rw_uri"]
+            self.failUnlessIn("ro_uri", data)
+            self.ro_uri = data['ro_uri']
+        d.addCallback(_get_test_txt_uris)
+
+        # Now make a new file to copy in place of test.txt.
+        new_txt_path = os.path.join(self.basedir, "new.txt")
+        new_txt_contents = "baz bar foo" * 100000
+        f = open(new_txt_path, "w")
+        f.write(new_txt_contents)
+        f.close()
+
+        # Copy the new file on top of the old file.
+        d.addCallback(lambda ignored:
+            self.do_cli("cp", new_txt_path, "tahoe:test/test.txt"))
+
+        # If we get test.txt now, we should see the new data.
+        d.addCallback(lambda ignored:
+            self.do_cli("get", "tahoe:test/test.txt"))
+        d.addCallback(lambda (rc, out, err):
+            self.failUnlessEqual(out, new_txt_contents))
+        # If we get the json of the new file, we should see that the old
+        # uri is there
+        d.addCallback(lambda ignored:
+            self.do_cli("ls", "--json", "tahoe:test/test.txt"))
+        def _check_json((rc, out, err)):
+            self.failUnlessEqual(rc, 0)
+            filetype, data = simplejson.loads(out)
+
+            self.failUnlessEqual(filetype, "filenode")
+            self.failUnless(data['mutable'])
+
+            self.failUnlessIn("ro_uri", data)
+            self.failUnlessEqual(data["ro_uri"], self.ro_uri)
+            self.failUnlessIn("rw_uri", data)
+            self.failUnlessEqual(data['rw_uri'], self.rw_uri)
+        d.addCallback(_check_json)
+
+        # and, finally, doing a GET directly on one of the old uris
+        # should give us the new contents.
+        d.addCallback(lambda ignored:
+            self.do_cli("get", self.rw_uri))
+        d.addCallback(lambda (rc, out, err):
+            self.failUnlessEqual(out, new_txt_contents))
+        # Now copy the old test.txt without an explicit destination
+        # file. tahoe cp will match it to the existing file and
+        # overwrite it appropriately.
+        d.addCallback(lambda ignored:
+            self.do_cli("cp", test_txt_path, "tahoe:test"))
+        d.addCallback(lambda ignored:
+            self.do_cli("get", "tahoe:test/test.txt"))
+        d.addCallback(lambda (rc, out, err):
+            self.failUnlessEqual(out, test_txt_contents))
+        d.addCallback(lambda ignored:
+            self.do_cli("ls", "--json", "tahoe:test/test.txt"))
+        d.addCallback(_check_json)
+        d.addCallback(lambda ignored:
+            self.do_cli("get", self.rw_uri))
+        d.addCallback(lambda (rc, out, err):
+            self.failUnlessEqual(out, test_txt_contents))
+
+        # Now we'll make a more complicated directory structure.
+        # test2/
+        # test2/mutable1
+        # test2/mutable2
+        # test2/imm1
+        # test2/imm2
+        imm_test_txt_path = os.path.join(self.basedir, "imm_test.txt")
+        imm_test_txt_contents = test_txt_contents * 10000
+        fileutil.write(imm_test_txt_path, imm_test_txt_contents)
+        d.addCallback(lambda ignored:
+            self.do_cli("mkdir", "tahoe:test2"))
+        d.addCallback(lambda ignored:
+            self.do_cli("put", "--mutable", new_txt_path,
+                        "tahoe:test2/mutable1"))
+        d.addCallback(lambda ignored:
+            self.do_cli("put", "--mutable", new_txt_path,
+                        "tahoe:test2/mutable2"))
+        d.addCallback(lambda ignored:
+            self.do_cli('put', new_txt_path, "tahoe:test2/imm1"))
+        d.addCallback(lambda ignored:
+            self.do_cli("put", imm_test_txt_path, "tahoe:test2/imm2"))
+        d.addCallback(lambda ignored:
+            self.do_cli("ls", "--json", "tahoe:test2"))
+        def _process_directory_json((rc, out, err)):
+            self.failUnlessEqual(rc, 0)
+
+            filetype, data = simplejson.loads(out)
+            self.failUnlessEqual(filetype, "dirnode")
+            self.failUnless(data['mutable'])
+            self.failUnlessIn("children", data)
+            children = data['children']
+
+            # Store the URIs for later use.
+            self.childuris = {}
+            for k in ["mutable1", "mutable2", "imm1", "imm2"]:
+                self.failUnlessIn(k, children)
+                childtype, childdata = children[k]
+                self.failUnlessEqual(childtype, "filenode")
+                if "mutable" in k:
+                    self.failUnless(childdata['mutable'])
+                    self.failUnlessIn("rw_uri", childdata)
+                    uri_key = "rw_uri"
+                else:
+                    self.failIf(childdata['mutable'])
+                    self.failUnlessIn("ro_uri", childdata)
+                    uri_key = "ro_uri"
+                self.childuris[k] = childdata[uri_key]
+        d.addCallback(_process_directory_json)
+        # Now build a local directory to copy into place, like the following:
+        # source1/
+        # source1/mutable1
+        # source1/mutable2
+        # source1/imm1
+        # source1/imm3
+        def _build_local_directory(ignored):
+            source1_path = os.path.join(self.basedir, "source1")
+            fileutil.make_dirs(source1_path)
+            for fn in ("mutable1", "mutable2", "imm1", "imm3"):
+                fileutil.write(os.path.join(source1_path, fn), fn * 1000)
+            self.source1_path = source1_path
+        d.addCallback(_build_local_directory)
+        d.addCallback(lambda ignored:
+            self.do_cli("cp", "-r", self.source1_path, "tahoe:test2"))
+
+        # We expect that mutable1 and mutable2 are overwritten in-place,
+        # so they'll retain their URIs but have different content.
+        def _process_file_json((rc, out, err), fn):
+            self.failUnlessEqual(rc, 0)
+            filetype, data = simplejson.loads(out)
+            self.failUnlessEqual(filetype, "filenode")
+
+            if "mutable" in fn:
+                self.failUnless(data['mutable'])
+                self.failUnlessIn("rw_uri", data)
+                self.failUnlessEqual(data["rw_uri"], self.childuris[fn])
+            else:
+                self.failIf(data['mutable'])
+                self.failUnlessIn("ro_uri", data)
+                self.failIfEqual(data["ro_uri"], self.childuris[fn])
+
+        for fn in ("mutable1", "mutable2"):
+            d.addCallback(lambda ignored, fn=fn:
+                self.do_cli("get", "tahoe:test2/%s" % fn))
+            d.addCallback(lambda (rc, out, err), fn=fn:
+                self.failUnlessEqual(out, fn * 1000))
+            d.addCallback(lambda ignored, fn=fn:
+                self.do_cli("ls", "--json", "tahoe:test2/%s" % fn))
+            d.addCallback(_process_file_json, fn=fn)
+
+        # imm1 should have been replaced, so both its uri and content
+        # should be different.
+        d.addCallback(lambda ignored:
+            self.do_cli("get", "tahoe:test2/imm1"))
+        d.addCallback(lambda (rc, out, err):
+            self.failUnlessEqual(out, "imm1" * 1000))
+        d.addCallback(lambda ignored:
+            self.do_cli("ls", "--json", "tahoe:test2/imm1"))
+        d.addCallback(_process_file_json, fn="imm1")
+
+        # imm3 should have been created.
+        d.addCallback(lambda ignored:
+            self.do_cli("get", "tahoe:test2/imm3"))
+        d.addCallback(lambda (rc, out, err):
+            self.failUnlessEqual(out, "imm3" * 1000))
+
+        # imm2 should be exactly as we left it, since our newly-copied
+        # directory didn't contain an imm2 entry.
+        d.addCallback(lambda ignored:
+            self.do_cli("get", "tahoe:test2/imm2"))
+        d.addCallback(lambda (rc, out, err):
+            self.failUnlessEqual(out, imm_test_txt_contents))
+        d.addCallback(lambda ignored:
+            self.do_cli("ls", "--json", "tahoe:test2/imm2"))
+        def _process_imm2_json((rc, out, err)):
+            self.failUnlessEqual(rc, 0)
+            filetype, data = simplejson.loads(out)
+            self.failUnlessEqual(filetype, "filenode")
+            self.failIf(data['mutable'])
+            self.failUnlessIn("ro_uri", data)
+            self.failUnlessEqual(data['ro_uri'], self.childuris["imm2"])
+        d.addCallback(_process_imm2_json)
+        return d
+
+    def test_cp_overwrite_readonly_mutable_file(self):
+        # tahoe cp should print an error when asked to overwrite a
+        # mutable file that it can't overwrite.
+        self.basedir = "cli/Cp/overwrite_readonly_mutable_file"
+        self.set_up_grid()
+
+        # This is our initial file. We'll link its readcap into the
+        # tahoe: alias.
+        test_file_path = os.path.join(self.basedir, "test_file.txt")
+        test_file_contents = "This is a test file."
+        fileutil.write(test_file_path, test_file_contents)
+
+        # This is our replacement file. We'll try and fail to upload it
+        # over the readcap that we linked into the tahoe: alias.
+        replacement_file_path = os.path.join(self.basedir, "replacement.txt")
+        replacement_file_contents = "These are new contents."
+        fileutil.write(replacement_file_path, replacement_file_contents)
+
+        d = self.do_cli("create-alias", "tahoe:")
+        d.addCallback(lambda ignored:
+            self.do_cli("put", "--mutable", test_file_path))
+        def _get_test_uri((rc, out, err)):
+            self.failUnlessEqual(rc, 0)
+            # this should be a write uri
+            self._test_write_uri = out
+        d.addCallback(_get_test_uri)
+        d.addCallback(lambda ignored:
+            self.do_cli("ls", "--json", self._test_write_uri))
+        def _process_test_json((rc, out, err)):
+            self.failUnlessEqual(rc, 0)
+            filetype, data = simplejson.loads(out)
+
+            self.failUnlessEqual(filetype, "filenode")
+            self.failUnless(data['mutable'])
+            self.failUnlessIn("ro_uri", data)
+            self._test_read_uri = data['ro_uri']
+        d.addCallback(_process_test_json)
+        # Now we'll link the readonly URI into the tahoe: alias.
+        d.addCallback(lambda ignored:
+            self.do_cli("ln", self._test_read_uri, "tahoe:test_file.txt"))
+        d.addCallback(lambda (rc, out, err):
+            self.failUnlessEqual(rc, 0))
+        # Let's grab the json of that to make sure that we did it right.
+        d.addCallback(lambda ignored:
+            self.do_cli("ls", "--json", "tahoe:"))
+        def _process_tahoe_json((rc, out, err)):
+            self.failUnlessEqual(rc, 0)
+
+            filetype, data = simplejson.loads(out)
+            self.failUnlessEqual(filetype, "dirnode")
+            self.failUnlessIn("children", data)
+            kiddata = data['children']
+
+            self.failUnlessIn("test_file.txt", kiddata)
+            testtype, testdata = kiddata['test_file.txt']
+            self.failUnlessEqual(testtype, "filenode")
+            self.failUnless(testdata['mutable'])
+            self.failUnlessIn("ro_uri", testdata)
+            self.failUnlessEqual(testdata['ro_uri'], self._test_read_uri)
+            self.failIfIn("rw_uri", testdata)
+        d.addCallback(_process_tahoe_json)
+        # Okay, now we're going to try uploading another mutable file in
+        # place of that one. We should get an error.
+        d.addCallback(lambda ignored:
+            self.do_cli("cp", replacement_file_path, "tahoe:test_file.txt"))
+        def _check_error_message((rc, out, err)):
+            self.failUnlessEqual(rc, 1)
+            self.failUnlessIn("need write capability to publish", err)
+        d.addCallback(_check_error_message)
+        # Make extra sure that that didn't work.
+        d.addCallback(lambda ignored:
+            self.do_cli("get", "tahoe:test_file.txt"))
+        d.addCallback(lambda (rc, out, err):
+            self.failUnlessEqual(out, test_file_contents))
+        d.addCallback(lambda ignored:
+            self.do_cli("get", self._test_read_uri))
+        d.addCallback(lambda (rc, out, err):
+            self.failUnlessEqual(out, test_file_contents))
+        # Now we'll do it without an explicit destination.
+        d.addCallback(lambda ignored:
+            self.do_cli("cp", test_file_path, "tahoe:"))
+        d.addCallback(_check_error_message)
+        d.addCallback(lambda ignored:
+            self.do_cli("get", "tahoe:test_file.txt"))
+        d.addCallback(lambda (rc, out, err):
+            self.failUnlessEqual(out, test_file_contents))
+        d.addCallback(lambda ignored:
+            self.do_cli("get", self._test_read_uri))
+        d.addCallback(lambda (rc, out, err):
+            self.failUnlessEqual(out, test_file_contents))
+        # Now we'll link a readonly file into a subdirectory.
+        d.addCallback(lambda ignored:
+            self.do_cli("mkdir", "tahoe:testdir"))
+        d.addCallback(lambda (rc, out, err):
+            self.failUnlessEqual(rc, 0))
+        d.addCallback(lambda ignored:
+            self.do_cli("ln", self._test_read_uri, "tahoe:test/file2.txt"))
+        d.addCallback(lambda (rc, out, err):
+            self.failUnlessEqual(rc, 0))
+
+        test_dir_path = os.path.join(self.basedir, "test")
+        fileutil.make_dirs(test_dir_path)
+        for f in ("file1.txt", "file2.txt"):
+            fileutil.write(os.path.join(test_dir_path, f), f * 10000)
+
+        d.addCallback(lambda ignored:
+            self.do_cli("cp", "-r", test_dir_path, "tahoe:test"))
+        d.addCallback(_check_error_message)
+        d.addCallback(lambda ignored:
+            self.do_cli("ls", "--json", "tahoe:test"))
+        def _got_testdir_json((rc, out, err)):
+            self.failUnlessEqual(rc, 0)
+
+            filetype, data = simplejson.loads(out)
+            self.failUnlessEqual(filetype, "dirnode")
+
+            self.failUnlessIn("children", data)
+            childdata = data['children']
+
+            self.failUnlessIn("file2.txt", childdata)
+            file2type, file2data = childdata['file2.txt']
+            self.failUnlessEqual(file2type, "filenode")
+            self.failUnless(file2data['mutable'])
+            self.failUnlessIn("ro_uri", file2data)
+            self.failUnlessEqual(file2data['ro_uri'], self._test_read_uri)
+            self.failIfIn("rw_uri", file2data)
+        d.addCallback(_got_testdir_json)
+        return d
+
+
 class Backup(GridTestMixin, CLITestMixin, StallMixin, unittest.TestCase):
 
     def writeto(self, path, data):
-- 
2.45.2