From: Brian Warner Date: Thu, 13 Nov 2008 03:17:25 +0000 (-0700) Subject: CLI: add 'tahoe manifest', which takes a directory and returns a list of things you... X-Git-Url: https://git.rkrishnan.org/components/%22news.html/architecture.txt?a=commitdiff_plain;h=0d93d6244e7a5e6ec9dee4e306baba2d3a5101ec;p=tahoe-lafs%2Ftahoe-lafs.git CLI: add 'tahoe manifest', which takes a directory and returns a list of things you can reach from it --- diff --git a/docs/CLI.txt b/docs/CLI.txt index b581f6d6..5a8a6815 100644 --- a/docs/CLI.txt +++ b/docs/CLI.txt @@ -325,6 +325,21 @@ tahoe mv tahoe:uploaded.txt fun:uploaded.txt These move a file from your tahoe root directory to the virtual directory set up earlier with "tahoe add-alias fun DIRCAP" +== Virtual Drive Maintenance == + +tahoe manifest tahoe: +tahoe manifest --storage-index tahoe: + + This performs a recursive walk of the given directory, visiting every file + and directory that can be reached from that point. It then emits one line to + stdout for each object it encounters. + + The default behavior is to print the access cap string (like URI:CHK:.. or + URI:DIR2:..), followed by a space, followed by the full path name. + + If --storage-index is added, each line will instead contain the object's + storage index. This (string) value is useful to determine which share files + (on the server) are associated with this directory tree. == Debugging == diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index ec74dd8a..c108c4cd 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -197,6 +197,18 @@ class WebopenOptions(VDriveOptions): longdesc = """Opens a webbrowser to the contents of some portion of the virtual drive.""" +class ManifestOptions(VDriveOptions): + optFlags = [ + ("storage-index", "s", "Only print storage index strings, not pathname+cap"), + ] + def parseArgs(self, where=''): + self.where = where + + def getSynopsis(self): + return "%s manifest [ALIAS:PATH]" % (os.path.basename(sys.argv[0]),) + + longdesc = """Print a list of all files/directories reachable from the given starting point.""" + subCommands = [ ["mkdir", None, MakeDirectoryOptions, "Create a new directory"], ["add-alias", None, AddAliasOptions, "Add a new alias cap"], @@ -210,6 +222,7 @@ subCommands = [ ["mv", None, MvOptions, "Move a file within the virtual drive."], ["ln", None, LnOptions, "Make an additional link to an existing file."], ["webopen", None, WebopenOptions, "Open a webbrowser to the root_dir"], + ["manifest", None, ManifestOptions, "List all files/dirs in a subtree"], ] def mkdir(options): @@ -281,6 +294,11 @@ def webopen(options, opener=None): rc = tahoe_webopen.webopen(options, opener=opener) return rc +def manifest(options): + from allmydata.scripts import tahoe_manifest + rc = tahoe_manifest.manifest(options) + return rc + dispatch = { "mkdir": mkdir, "add-alias": add_alias, @@ -294,5 +312,6 @@ dispatch = { "mv": mv, "ln": ln, "webopen": webopen, + "manifest": manifest, } diff --git a/src/allmydata/scripts/tahoe_manifest.py b/src/allmydata/scripts/tahoe_manifest.py new file mode 100644 index 00000000..8428cfce --- /dev/null +++ b/src/allmydata/scripts/tahoe_manifest.py @@ -0,0 +1,91 @@ + +import os, time +from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path +from allmydata.scripts.common_http import do_http +from allmydata.util import base32 +from allmydata import uri +import urllib +import simplejson + +class ManifestGrabber: + def run(self, options): + stderr = options.stderr + self.options = options + self.ophandle = ophandle = base32.b2a(os.urandom(16)) + nodeurl = options['node-url'] + if not nodeurl.endswith("/"): + nodeurl += "/" + self.nodeurl = nodeurl + where = options.where + rootcap, path = get_alias(options.aliases, where, DEFAULT_ALIAS) + if path == '/': + path = '' + url = nodeurl + "uri/%s" % urllib.quote(rootcap) + if path: + url += "/" + escape_path(path) + # todo: should it end with a slash? + url += "?t=start-manifest&ophandle=" + ophandle + resp = do_http("POST", url) + if resp.status not in (200, 302): + print >>stderr, "ERROR", resp.status, resp.reason, resp.read() + return 1 + # now we poll for results. We nominally poll at t=1, 5, 10, 30, 60, + # 90, k*120 seconds, but if the poll takes non-zero time, that will + # be slightly longer. I'm not worried about trying to make up for + # that time. + + return self.wait_for_results() + + def poll_times(self): + for i in (1,5,10,30,60,90): + yield i + i = 120 + while True: + yield i + i += 120 + + def wait_for_results(self): + last = 0 + for next in self.poll_times(): + delay = next - last + time.sleep(delay) + last = next + if self.poll(): + return 0 + + def poll(self): + url = self.nodeurl + "operations/" + self.ophandle + url += "?t=status&output=JSON&release-after-complete=true" + stdout = self.options.stdout + stderr = self.options.stderr + resp = do_http("GET", url) + if resp.status != 200: + print >>stderr, "ERROR", resp.status, resp.reason, resp.read() + return True + data = simplejson.loads(resp.read()) + if not data["finished"]: + return False + self.write_results(data) + return True + + def write_results(self, data): + stdout = self.options.stdout + stderr = self.options.stderr + if self.options["storage-index"]: + for (path, cap) in data["manifest"]: + u = uri.from_string(str(cap)) + si = u.get_storage_index() + if si is not None: + print >>stdout, base32.b2a(si) + else: + for (path, cap) in data["manifest"]: + try: + print >>stdout, cap, "/".join(path) + except UnicodeEncodeError: + print >>stdout, cap, "/".join([p.encode("utf-8") + for p in path]) + + + +def manifest(options): + return ManifestGrabber().run(options) diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index da0d620e..7e68de37 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -2068,6 +2068,7 @@ class DeepCheckWebGood(DeepCheckBase, unittest.TestCase): d.addCallback(self.do_stats) d.addCallback(self.do_test_check_good) d.addCallback(self.do_test_web_good) + d.addCallback(self.do_test_cli_good) d.addErrback(self.explain_web_error) d.addErrback(self.explain_error) return d @@ -2345,6 +2346,50 @@ class DeepCheckWebGood(DeepCheckBase, unittest.TestCase): return d + def _run_cli(self, argv, stdin=""): + #print "CLI:", argv + stdout, stderr = StringIO(), StringIO() + d = threads.deferToThread(runner.runner, argv, run_by_human=False, + stdin=StringIO(stdin), + stdout=stdout, stderr=stderr) + def _done(res): + return stdout.getvalue(), stderr.getvalue() + d.addCallback(_done) + return d + + def do_test_cli_good(self, ignored): + d = self._run_cli(["manifest", "-u", self.webish_url, self.root_uri]) + def _check((out,err)): + lines = [l for l in out.split("\n") if l] + self.failUnlessEqual(len(lines), 4) + caps = {} + for l in lines: + try: + cap, path = l.split(None, 1) + except ValueError: + cap = l.strip() + path = "" + caps[cap] = path + self.failUnless(self.root.get_uri() in caps) + self.failUnlessEqual(caps[self.root.get_uri()], "") + self.failUnlessEqual(caps[self.mutable.get_uri()], "mutable") + self.failUnlessEqual(caps[self.large.get_uri()], "large") + self.failUnlessEqual(caps[self.small.get_uri()], "small") + d.addCallback(_check) + + d.addCallback(lambda res: + self._run_cli(["manifest", "-u", self.webish_url, + "--storage-index", self.root_uri])) + def _check2((out,err)): + lines = [l for l in out.split("\n") if l] + self.failUnlessEqual(len(lines), 3) + self.failUnless(base32.b2a(self.root.get_storage_index()) in lines) + self.failUnless(base32.b2a(self.mutable.get_storage_index()) in lines) + self.failUnless(base32.b2a(self.large.get_storage_index()) in lines) + d.addCallback(_check2) + return d + + class DeepCheckWebBad(DeepCheckBase, unittest.TestCase): def test_bad(self): diff --git a/src/allmydata/uri.py b/src/allmydata/uri.py index c917ed38..1d09e62d 100644 --- a/src/allmydata/uri.py +++ b/src/allmydata/uri.py @@ -38,6 +38,9 @@ class _BaseURI: def to_human_encoding(self): return 'http://127.0.0.1:8123/uri/'+self.to_string() + def get_storage_index(self): + return self.storage_index + class CHKFileURI(_BaseURI): implements(IURI, IFileURI) @@ -179,6 +182,8 @@ class LiteralFileURI(_BaseURI): return False def get_readonly(self): return self + def get_storage_index(self): + return None def get_verifier(self): # LIT files need no verification, all the data is present in the URI @@ -359,6 +364,9 @@ class _NewDirectoryBaseURI(_BaseURI): def get_verifier(self): return NewDirectoryURIVerifier(self._filenode_uri.get_verifier()) + def get_storage_index(self): + return self._filenode_uri.get_storage_index() + class NewDirectoryURI(_NewDirectoryBaseURI): implements(INewDirectoryURI)