From 0df663b7aefbc4204963cb5d5b0cb46f4098a3d8 Mon Sep 17 00:00:00 2001 From: Brian Warner Date: Tue, 20 May 2008 16:56:03 -0700 Subject: [PATCH] CLI: implement the easy part of cp (no -r, only two arguments) --- src/allmydata/scripts/cli.py | 27 ++++++ src/allmydata/scripts/common.py | 13 ++- src/allmydata/scripts/tahoe_cp.py | 145 ++++++++++++++++++++++++++++++ src/allmydata/test/test_system.py | 65 +++++++++++++- 4 files changed, 244 insertions(+), 6 deletions(-) create mode 100644 src/allmydata/scripts/tahoe_cp.py diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index 9d0ea68f..c5751e2a 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -122,6 +122,16 @@ class PutOptions(VDriveOptions): contents from the local filesystem). LOCAL_FILE is required to be a local file (it can't be stdin).""" +class CpOptions(VDriveOptions): + optFlags = [ + ("recursive", "r", "Copy source directory recursively."), + ] + def parseArgs(self, *args): + if len(args) < 2: + raise usage.UsageError("cp requires at least two arguments") + self.sources = args[:-1] + self.destination = args[-1] + class RmOptions(VDriveOptions): def parseArgs(self, where): self.where = where @@ -161,6 +171,7 @@ subCommands = [ ["ls", None, ListOptions, "List a directory"], ["get", None, GetOptions, "Retrieve a file from the virtual drive."], ["put", None, PutOptions, "Upload a file into the virtual drive."], + ["cp", None, CpOptions, "Copy one or more files."], ["rm", None, RmOptions, "Unlink a file or directory in the virtual drive."], ["mv", None, MvOptions, "Move a file within the virtual drive."], ["ln", None, LnOptions, "Make an additional link to an existing file."], @@ -232,6 +243,21 @@ def put(config, stdout, stderr, stdin=sys.stdin): stdin, stdout, stderr) return rc +def cp(config, stdout, stderr): + from allmydata.scripts import tahoe_cp + if config['quiet']: + verbosity = 0 + else: + verbosity = 2 + rc = tahoe_cp.copy(config['node-url'], + config, + config.aliases, + config.sources, + config.destination, + verbosity, + stdout, stderr) + return rc + def rm(config, stdout, stderr): from allmydata.scripts import tahoe_rm if config['quiet']: @@ -287,6 +313,7 @@ dispatch = { "ls": list, "get": get, "put": put, + "cp": cp, "rm": rm, "mv": mv, "ln": ln, diff --git a/src/allmydata/scripts/common.py b/src/allmydata/scripts/common.py index 44e8950a..2fa14617 100644 --- a/src/allmydata/scripts/common.py +++ b/src/allmydata/scripts/common.py @@ -91,9 +91,14 @@ def get_aliases(nodedir): pass return aliases +class DefaultAliasMarker: + pass + def get_alias(aliases, path, default): - # transform "work:path/filename" into (aliases["work"], "path/filename") - # We special-case URI: + # transform "work:path/filename" into (aliases["work"], "path/filename"). + # If default=None, then an empty alias is indicated by returning + # DefaultAliasMarker. We special-case "URI:" to make it easy to access + # specific files/directories by their read-cap. if path.startswith("URI:"): # The only way to get a sub-path is to use URI:blah:./foo, and we # strip out the :./ sequence. @@ -104,11 +109,15 @@ def get_alias(aliases, path, default): colon = path.find(":") if colon == -1: # no alias + if default == None: + return DefaultAliasMarker, path return aliases[default], path alias = path[:colon] if "/" in alias: # no alias, but there's a colon in a dirname/filename, like # "foo/bar:7" + if default == None: + return DefaultAliasMarker, path return aliases[default], path return aliases[alias], path[colon+1:] diff --git a/src/allmydata/scripts/tahoe_cp.py b/src/allmydata/scripts/tahoe_cp.py new file mode 100644 index 00000000..b904b179 --- /dev/null +++ b/src/allmydata/scripts/tahoe_cp.py @@ -0,0 +1,145 @@ + +import os.path +import urllib +import simplejson +from allmydata.scripts.common import get_alias, escape_path, DefaultAliasMarker +from allmydata.scripts.common_http import do_http + +def ascii_or_none(s): + if s is None: + return s + return str(s) + +def get_info(nodeurl, aliases, target): + rootcap, path = get_alias(aliases, target, None) + if rootcap == DefaultAliasMarker: + # this is a local file + pathname = os.path.abspath(os.path.expanduser(path)) + if not os.path.exists(pathname): + return ("empty", "local", pathname) + if os.path.isdir(pathname): + return ("directory", "local", pathname) + else: + assert os.path.isfile(pathname) + return ("file", "local", pathname) + else: + # this is a tahoe object + url = nodeurl + "uri/%s" % urllib.quote(rootcap) + if path: + url += "/" + escape_path(path) + resp = do_http("GET", url + "?t=json") + if resp.status == 404: + # doesn't exist yet + return ("empty", "tahoe", False, None, None, url) + parsed = simplejson.loads(resp.read()) + nodetype, d = parsed + mutable = d.get("mutable", False) # older nodes don't provide 'mutable' + rw_uri = ascii_or_none(d.get("rw_uri")) + ro_uri = ascii_or_none(d.get("ro_uri")) + if nodetype == "dirnode": + return ("directory", "tahoe", mutable, rw_uri, ro_uri, url) + else: + return ("file", "tahoe", mutable, rw_uri, ro_uri, url) + +def copy(nodeurl, config, aliases, sources, destination, + verbosity, stdout, stderr): + if nodeurl[-1] != "/": + nodeurl += "/" + recursive = config["recursive"] + + #print "sources:", sources + #print "dest:", destination + + target = get_info(nodeurl, aliases, destination) + #print target + + source_info = dict([(get_info(nodeurl, aliases, source), source) + for source in sources]) + source_files = [s for s in source_info if s[0] == "file"] + source_dirs = [s for s in source_info if s[0] == "directory"] + empty_sources = [s for s in source_info if s[0] == "empty"] + if empty_sources: + for s in empty_sources: + print >>stderr, "no such file or directory %s" % source_info[s] + return 1 + + #print "source_files", " ".join([source_info[s] for s in source_files]) + #print "source_dirs", " ".join([source_info[s] for s in source_dirs]) + + if source_dirs and not recursive: + print >>stderr, "cannot copy directories without --recursive" + return 1 + + if target[0] == "file": + # cp STUFF foo.txt, where foo.txt already exists. This limits the + # possibilities considerably. + if len(sources) > 1: + print >>stderr, "target '%s' is not a directory" % destination + return 1 + if source_dirs: + print >>stderr, "cannot copy directory into a file" + return 1 + return copy_to_file(source_files[0], target) + + if target[0] == "empty": + if recursive: + return copy_to_directory(source_files, source_dirs, target) + if len(sources) > 1: + # if we have -r, we'll auto-create the target directory. Without + # it, we'll only create a file. + print >>stderr, "cannot copy multiple files into a file without -r" + return 1 + # cp file1 newfile + return copy_to_file(source_files[0], target) + + if target[0] == "directory": + return copy_to_directory(source_files, source_dirs, target) + + print >>stderr, "unknown target" + return 1 + + +def get_file_data(source): + assert source[0] == "file" + if source[1] == "local": + return open(source[2], "rb").read() + return do_http("GET", source[-1]).read() + +class WriteError(Exception): + pass + +def check_PUT(resp): + if resp.status in (200, 201): + return True + raise WriteError("Error during PUT: %s %s %s" % (resp.status, resp.reason, + resp.read())) + +def put_file_data(data, target): + if target[1] == "local": + open(target[2], "wb").write(data) + return True + resp = do_http("PUT", target[-1], data) + return check_PUT(resp) + +def put_uri(uri, target): + resp = do_http("PUT", target[-1] + "?t=uri", uri) + return check_PUT(resp) + +def copy_to_file(source, target): + assert source[0] == "file" + # do we need to copy bytes? + if source[1] == "local" or source[2] == True or target[1] == "local": + # yes + data = get_file_data(source) + put_file_data(data, target) + return + # no, we're getting data from an immutable source, and we're copying into + # the tahoe grid, so we can just copy the URI. + uri = source[3] or source[4] # prefer rw_uri, fall back to ro_uri + # TODO: if the original was mutable, and we're creating the target, + # should be we create a mutable file to match? At the moment we always + # create immutable files. + put_uri(uri, target) + +def copy_to_directory(source_files, source_dirs, target): + NotImplementedError diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index ef189968..fbe34113 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -1603,6 +1603,10 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, testutil.StallMixin, datas.append(data) open(fn,"wb").write(data) + def _check_stdout_against((out,err), filenum): + self.failUnlessEqual(err, "") + self.failUnlessEqual(out, datas[filenum]) + # test all both forms of put: from a file, and from stdin # tahoe put bar FOO d.addCallback(run, "put", files[0], "tahoe-file0") @@ -1621,6 +1625,8 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, testutil.StallMixin, def _check_put_mutable((out,err)): self._mutable_file3_uri = out.strip() d.addCallback(_check_put_mutable) + d.addCallback(run, "get", "tahoe:file3") + d.addCallback(_check_stdout_against, 3) def _put_from_stdin(res, data, *args): args = nodeargs + list(args) @@ -1659,11 +1665,9 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, testutil.StallMixin, # tahoe get: (to stdin and to a file) d.addCallback(run, "get", "tahoe-file0") - d.addCallback(lambda (out,err): - self.failUnlessEqual(out, "data to be uploaded: file0\n")) + d.addCallback(_check_stdout_against, 0) d.addCallback(run, "get", "tahoe:subdir/tahoe-file1") - d.addCallback(lambda (out,err): - self.failUnlessEqual(out, "data to be uploaded: file1\n")) + d.addCallback(_check_stdout_against, 1) outfile0 = os.path.join(self.basedir, "outfile0") d.addCallback(run, "get", "file2", outfile0) def _check_outfile0((out,err)): @@ -1721,6 +1725,59 @@ class SystemTest(testutil.SignalMixin, testutil.PollMixin, testutil.StallMixin, d.addCallback(run, "ls") d.addCallback(_check_ls, ["tahoe-moved", "newlink"]) + d.addCallback(run, "cp", "tahoe:file3", "tahoe:file3-copy") + d.addCallback(run, "ls") + d.addCallback(_check_ls, ["file3", "file3-copy"]) + d.addCallback(run, "get", "tahoe:file3-copy") + d.addCallback(_check_stdout_against, 3) + + # copy from disk into tahoe + d.addCallback(run, "cp", files[4], "tahoe:file4") + d.addCallback(run, "ls") + d.addCallback(_check_ls, ["file3", "file3-copy", "file4"]) + d.addCallback(run, "get", "tahoe:file4") + d.addCallback(_check_stdout_against, 4) + + # copy from tahoe into disk + target_filename = os.path.join(self.basedir, "file-out") + d.addCallback(run, "cp", "tahoe:file4", target_filename) + def _check_cp_out((out,err)): + self.failUnless(os.path.exists(target_filename)) + got = open(target_filename,"rb").read() + self.failUnlessEqual(got, datas[4]) + d.addCallback(_check_cp_out) + + # copy from disk to disk (silly case) + target2_filename = os.path.join(self.basedir, "file-out-copy") + d.addCallback(run, "cp", target_filename, target2_filename) + def _check_cp_out2((out,err)): + self.failUnless(os.path.exists(target2_filename)) + got = open(target2_filename,"rb").read() + self.failUnlessEqual(got, datas[4]) + d.addCallback(_check_cp_out2) + + # copy from tahoe into disk, overwriting an existing file + d.addCallback(run, "cp", "tahoe:file3", target_filename) + def _check_cp_out3((out,err)): + self.failUnless(os.path.exists(target_filename)) + got = open(target_filename,"rb").read() + self.failUnlessEqual(got, datas[3]) + d.addCallback(_check_cp_out3) + + # copy from disk into tahoe, overwriting an existing immutable file + d.addCallback(run, "cp", files[5], "tahoe:file4") + d.addCallback(run, "ls") + d.addCallback(_check_ls, ["file3", "file3-copy", "file4"]) + d.addCallback(run, "get", "tahoe:file4") + d.addCallback(_check_stdout_against, 5) + + # copy from disk into tahoe, overwriting an existing mutable file + d.addCallback(run, "cp", files[5], "tahoe:file3") + d.addCallback(run, "ls") + d.addCallback(_check_ls, ["file3", "file3-copy", "file4"]) + d.addCallback(run, "get", "tahoe:file3") + d.addCallback(_check_stdout_against, 5) + # tahoe_ls doesn't currently handle the error correctly: it tries to # JSON-parse a traceback. ## def _ls_missing(res): -- 2.45.2