5 from cStringIO import StringIO
6 from twisted.python.failure import Failure
7 from allmydata.scripts.common import get_alias, escape_path, \
8 DefaultAliasMarker, UnknownAliasError
9 from allmydata.scripts.common_http import do_http
10 from allmydata import uri
11 from allmydata.util.stringutils import unicode_to_url, listdir_unicode, open_unicode
12 from allmydata.util.assertutil import precondition
20 class TahoeError(Exception):
21 def __init__(self, msg, resp):
23 self.status = resp.status
24 self.reason = resp.reason
25 self.body = resp.read()
27 def display(self, err):
28 print >>err, "%s: %s %s" % (self.msg, self.status, self.reason)
29 print >>err, self.body
31 class MissingSourceError(Exception):
35 resp = do_http("GET", url)
36 if resp.status == 200:
38 raise TahoeError("Error during GET", resp)
40 def GET_to_string(url):
45 resp = do_http("PUT", url, data)
46 if resp.status in (200, 201):
48 raise TahoeError("Error during PUT", resp)
51 resp = do_http("POST", url, data)
52 if resp.status in (200, 201):
54 raise TahoeError("Error during POST", resp)
57 url = targeturl + "?t=mkdir"
58 resp = do_http("POST", url)
59 if resp.status in (200, 201):
60 return resp.read().strip()
61 raise TahoeError("Error during mkdir", resp)
63 def make_tahoe_subdirectory(nodeurl, parent_writecap, name):
64 url = nodeurl + "/".join(["uri",
65 urllib.quote(parent_writecap),
68 resp = do_http("POST", url)
69 if resp.status in (200, 201):
70 return resp.read().strip()
71 raise TahoeError("Error during mkdir", resp)
74 class LocalFileSource:
75 def __init__(self, pathname):
76 precondition(isinstance(pathname, unicode), pathname)
77 self.pathname = pathname
79 def need_to_copy_bytes(self):
82 def open(self, caps_only):
83 return open(self.pathname, "rb")
85 class LocalFileTarget:
86 def __init__(self, pathname):
87 precondition(isinstance(pathname, unicode), pathname)
88 self.pathname = pathname
89 def put_file(self, inf):
90 outf = open(self.pathname, "wb")
92 data = inf.read(32768)
98 class LocalMissingTarget:
99 def __init__(self, pathname):
100 precondition(isinstance(pathname, unicode), pathname)
101 self.pathname = pathname
103 def put_file(self, inf):
104 outf = open(self.pathname, "wb")
106 data = inf.read(32768)
112 class LocalDirectorySource:
113 def __init__(self, progressfunc, pathname):
114 precondition(isinstance(pathname, unicode), pathname)
116 self.progressfunc = progressfunc
117 self.pathname = pathname
120 def populate(self, recurse):
121 if self.children is not None:
124 children = listdir_unicode(self.pathname)
125 for i,n in enumerate(children):
126 self.progressfunc("examining %d of %d" % (i, len(children)))
127 pn = os.path.join(self.pathname, n)
128 if os.path.isdir(pn):
129 child = LocalDirectorySource(self.progressfunc, pn)
130 self.children[n] = child
133 elif os.path.isfile(pn):
134 self.children[n] = LocalFileSource(pn)
136 # Could be dangling symlink; probably not copy-able.
139 class LocalDirectoryTarget:
140 def __init__(self, progressfunc, pathname):
141 precondition(isinstance(pathname, unicode), pathname)
143 self.progressfunc = progressfunc
144 self.pathname = pathname
147 def populate(self, recurse):
148 if self.children is not None:
151 children = listdir_unicode(self.pathname)
152 for i,n in enumerate(children):
153 self.progressfunc("examining %d of %d" % (i, len(children)))
154 pn = os.path.join(self.pathname, n)
155 if os.path.isdir(pn):
156 child = LocalDirectoryTarget(self.progressfunc, pn)
157 self.children[n] = child
161 assert os.path.isfile(pn)
162 self.children[n] = LocalFileTarget(pn)
164 def get_child_target(self, name):
165 if self.children is None:
167 if name in self.children:
168 return self.children[name]
169 pathname = os.path.join(self.pathname, name)
170 os.makedirs(pathname)
171 return LocalDirectoryTarget(self.progressfunc, pathname)
173 def put_file(self, name, inf):
174 precondition(isinstance(name, unicode), name)
175 pathname = os.path.join(self.pathname, name)
176 outf = open_unicode(pathname, "wb")
178 data = inf.read(32768)
184 def set_children(self):
187 class TahoeFileSource:
188 def __init__(self, nodeurl, mutable, writecap, readcap):
189 self.nodeurl = nodeurl
190 self.mutable = mutable
191 self.writecap = writecap
192 self.readcap = readcap
194 def need_to_copy_bytes(self):
199 def open(self, caps_only):
201 return StringIO(self.readcap)
202 url = self.nodeurl + "uri/" + urllib.quote(self.readcap)
203 return GET_to_file(url)
206 return self.writecap or self.readcap
208 class TahoeFileTarget:
209 def __init__(self, nodeurl, mutable, writecap, readcap, url):
210 self.nodeurl = nodeurl
211 self.mutable = mutable
212 self.writecap = writecap
213 self.readcap = readcap
216 def put_file(self, inf):
217 # We want to replace this object in-place.
219 # our do_http() call currently requires a string or a filehandle with
221 if not hasattr(inf, "seek"):
224 # TODO: this always creates immutable files. We might want an option
225 # to always create mutable files, or to copy mutable files into new
226 # mutable files. ticket #835
228 class TahoeDirectorySource:
229 def __init__(self, nodeurl, cache, progressfunc):
230 self.nodeurl = nodeurl
232 self.progressfunc = progressfunc
234 def init_from_grid(self, writecap, readcap):
235 self.writecap = writecap
236 self.readcap = readcap
237 bestcap = writecap or readcap
238 url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
239 resp = do_http("GET", url + "?t=json")
240 if resp.status != 200:
241 raise TahoeError("Error examining source directory", resp)
242 parsed = simplejson.loads(resp.read())
244 assert nodetype == "dirnode"
245 self.mutable = d.get("mutable", False) # older nodes don't provide it
246 self.children_d = dict( [(unicode(name),value)
248 in d["children"].iteritems()] )
251 def init_from_parsed(self, parsed):
253 self.writecap = ascii_or_none(d.get("rw_uri"))
254 self.readcap = ascii_or_none(d.get("ro_uri"))
255 self.mutable = d.get("mutable", False) # older nodes don't provide it
256 self.children_d = dict( [(unicode(name),value)
258 in d["children"].iteritems()] )
261 def populate(self, recurse):
262 if self.children is not None:
265 for i,(name, data) in enumerate(self.children_d.items()):
266 self.progressfunc("examining %d of %d" % (i, len(self.children_d)))
267 if data[0] == "filenode":
268 mutable = data[1].get("mutable", False)
269 writecap = ascii_or_none(data[1].get("rw_uri"))
270 readcap = ascii_or_none(data[1].get("ro_uri"))
271 self.children[name] = TahoeFileSource(self.nodeurl, mutable,
273 elif data[0] == "dirnode":
274 writecap = ascii_or_none(data[1].get("rw_uri"))
275 readcap = ascii_or_none(data[1].get("ro_uri"))
276 if writecap and writecap in self.cache:
277 child = self.cache[writecap]
278 elif readcap and readcap in self.cache:
279 child = self.cache[readcap]
281 child = TahoeDirectorySource(self.nodeurl, self.cache,
283 child.init_from_grid(writecap, readcap)
285 self.cache[writecap] = child
287 self.cache[readcap] = child
290 self.children[name] = child
292 # TODO: there should be an option to skip unknown nodes.
293 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
294 "You probably need to use a later version of "
295 "Tahoe-LAFS to copy this directory.")
297 class TahoeMissingTarget:
298 def __init__(self, url):
301 def put_file(self, inf):
302 # We want to replace this object in-place.
303 if not hasattr(inf, "seek"):
306 # TODO: this always creates immutable files. We might want an option
307 # to always create mutable files, or to copy mutable files into new
310 def put_uri(self, filecap):
311 # I'm not sure this will always work
312 return PUT(self.url + "?t=uri", filecap)
314 class TahoeDirectoryTarget:
315 def __init__(self, nodeurl, cache, progressfunc):
316 self.nodeurl = nodeurl
318 self.progressfunc = progressfunc
319 self.new_children = {}
321 def init_from_parsed(self, parsed):
323 self.writecap = ascii_or_none(d.get("rw_uri"))
324 self.readcap = ascii_or_none(d.get("ro_uri"))
325 self.mutable = d.get("mutable", False) # older nodes don't provide it
326 self.children_d = dict( [(unicode(name),value)
328 in d["children"].iteritems()] )
331 def init_from_grid(self, writecap, readcap):
332 self.writecap = writecap
333 self.readcap = readcap
334 bestcap = writecap or readcap
335 url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
336 resp = do_http("GET", url + "?t=json")
337 if resp.status != 200:
338 raise TahoeError("Error examining target directory", resp)
339 parsed = simplejson.loads(resp.read())
341 assert nodetype == "dirnode"
342 self.mutable = d.get("mutable", False) # older nodes don't provide it
343 self.children_d = dict( [(unicode(name),value)
345 in d["children"].iteritems()] )
348 def just_created(self, writecap):
349 self.writecap = writecap
350 self.readcap = uri.from_string(writecap).get_readonly().to_string()
355 def populate(self, recurse):
356 if self.children is not None:
359 for i,(name, data) in enumerate(self.children_d.items()):
360 self.progressfunc("examining %d of %d" % (i, len(self.children_d)))
361 if data[0] == "filenode":
362 mutable = data[1].get("mutable", False)
363 writecap = ascii_or_none(data[1].get("rw_uri"))
364 readcap = ascii_or_none(data[1].get("ro_uri"))
367 url = self.nodeurl + "/".join(["uri",
368 urllib.quote(self.writecap),
369 urllib.quote(unicode_to_url(name))])
370 self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
371 writecap, readcap, url)
372 elif data[0] == "dirnode":
373 writecap = ascii_or_none(data[1].get("rw_uri"))
374 readcap = ascii_or_none(data[1].get("ro_uri"))
375 if writecap and writecap in self.cache:
376 child = self.cache[writecap]
377 elif readcap and readcap in self.cache:
378 child = self.cache[readcap]
380 child = TahoeDirectoryTarget(self.nodeurl, self.cache,
382 child.init_from_grid(writecap, readcap)
384 self.cache[writecap] = child
386 self.cache[readcap] = child
389 self.children[name] = child
391 # TODO: there should be an option to skip unknown nodes.
392 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
393 "You probably need to use a later version of "
394 "Tahoe-LAFS to copy this directory.")
396 def get_child_target(self, name):
397 # return a new target for a named subdirectory of this dir
398 if self.children is None:
400 if name in self.children:
401 return self.children[name]
402 writecap = make_tahoe_subdirectory(self.nodeurl, self.writecap, name)
403 child = TahoeDirectoryTarget(self.nodeurl, self.cache,
405 child.just_created(writecap)
406 self.children[name] = child
409 def put_file(self, name, inf):
410 url = self.nodeurl + "uri"
411 if not hasattr(inf, "seek"):
413 filecap = PUT(url, inf)
414 # TODO: this always creates immutable files. We might want an option
415 # to always create mutable files, or to copy mutable files into new
417 self.new_children[name] = filecap
419 def put_uri(self, name, filecap):
420 self.new_children[name] = filecap
422 def set_children(self):
423 if not self.new_children:
425 url = (self.nodeurl + "uri/" + urllib.quote(self.writecap)
428 for (name, filecap) in self.new_children.items():
429 # it just so happens that ?t=set_children will accept both file
430 # read-caps and write-caps as ['rw_uri'], and will handle either
431 # correctly. So don't bother trying to figure out whether the one
432 # we have is read-only or read-write.
433 # TODO: think about how this affects forward-compatibility for
435 set_data[name] = ["filenode", {"rw_uri": filecap}]
436 body = simplejson.dumps(set_data)
441 def do_copy(self, options, progressfunc=None):
444 elif options['verbose']:
449 nodeurl = options['node-url']
450 if nodeurl[-1] != "/":
452 self.nodeurl = nodeurl
453 self.progressfunc = progressfunc
454 self.options = options
455 self.aliases = options.aliases
456 self.verbosity = verbosity
457 self.stdout = options.stdout
458 self.stderr = options.stderr
459 if verbosity >= 2 and not self.progressfunc:
460 def progress(message):
461 print >>self.stderr, message
462 self.progressfunc = progress
463 self.caps_only = options["caps-only"]
466 status = self.try_copy()
468 except TahoeError, te:
469 Failure().printTraceback(self.stderr)
471 te.display(self.stderr)
475 source_specs = self.options.sources
476 destination_spec = self.options.destination
477 recursive = self.options["recursive"]
480 target = self.get_target_info(destination_spec)
481 except UnknownAliasError, e:
482 self.to_stderr("error: %s" % e.args[0])
486 sources = [] # list of (name, source object)
487 for ss in source_specs:
488 name, source = self.get_source_info(ss)
489 sources.append( (name, source) )
490 except MissingSourceError, e:
491 self.to_stderr("No such file or directory %s" % e.args[0])
493 except UnknownAliasError, e:
494 self.to_stderr("error: %s" % e.args[0])
497 have_source_dirs = bool([s for (name,s) in sources
498 if isinstance(s, (LocalDirectorySource,
499 TahoeDirectorySource))])
501 if have_source_dirs and not recursive:
502 self.to_stderr("cannot copy directories without --recursive")
505 if isinstance(target, (LocalFileTarget, TahoeFileTarget)):
506 # cp STUFF foo.txt, where foo.txt already exists. This limits the
507 # possibilities considerably.
509 self.to_stderr("target '%s' is not a directory" % destination_spec)
512 self.to_stderr("cannot copy directory into a file")
514 name, source = sources[0]
515 return self.copy_file(source, target)
517 if isinstance(target, (LocalMissingTarget, TahoeMissingTarget)):
519 return self.copy_to_directory(sources, target)
521 # if we have -r, we'll auto-create the target directory. Without
522 # it, we'll only create a file.
523 self.to_stderr("cannot copy multiple files into a file without -r")
526 name, source = sources[0]
527 return self.copy_file(source, target)
529 if isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget)):
530 # We're copying to an existing directory -- make sure that we
531 # have target names for everything
532 for (name, source) in sources:
533 if name is None and isinstance(source, TahoeFileSource):
535 "error: you must specify a destination filename")
537 return self.copy_to_directory(sources, target)
539 self.to_stderr("unknown target")
542 def to_stderr(self, text):
543 print >>self.stderr, text
545 def get_target_info(self, destination_spec):
546 rootcap, path = get_alias(self.aliases, destination_spec, None)
547 if rootcap == DefaultAliasMarker:
548 # no alias, so this is a local file
549 pathname = os.path.abspath(os.path.expanduser(path))
550 if not os.path.exists(pathname):
551 t = LocalMissingTarget(pathname)
552 elif os.path.isdir(pathname):
553 t = LocalDirectoryTarget(self.progress, pathname)
555 assert os.path.isfile(pathname), pathname
556 t = LocalFileTarget(pathname) # non-empty
558 # this is a tahoe object
559 url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
561 url += "/" + escape_path(path)
563 resp = do_http("GET", url + "?t=json")
564 if resp.status == 404:
566 t = TahoeMissingTarget(url)
567 elif resp.status == 200:
568 parsed = simplejson.loads(resp.read())
570 if nodetype == "dirnode":
571 t = TahoeDirectoryTarget(self.nodeurl, self.cache,
573 t.init_from_parsed(parsed)
575 writecap = ascii_or_none(d.get("rw_uri"))
576 readcap = ascii_or_none(d.get("ro_uri"))
577 mutable = d.get("mutable", False)
578 t = TahoeFileTarget(self.nodeurl, mutable,
579 writecap, readcap, url)
581 raise TahoeError("Error examining target '%s'"
582 % destination_spec, resp)
585 def get_source_info(self, source_spec):
586 rootcap, path = get_alias(self.aliases, source_spec, None)
587 if rootcap == DefaultAliasMarker:
588 # no alias, so this is a local file
589 pathname = os.path.abspath(os.path.expanduser(path))
590 name = os.path.basename(pathname)
591 if not os.path.exists(pathname):
592 raise MissingSourceError(source_spec)
593 if os.path.isdir(pathname):
594 t = LocalDirectorySource(self.progress, pathname)
596 assert os.path.isfile(pathname)
597 t = LocalFileSource(pathname) # non-empty
599 # this is a tahoe object
600 url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
603 url += "/" + escape_path(path)
604 last_slash = path.rfind("/")
607 name = path[last_slash+1:]
609 resp = do_http("GET", url + "?t=json")
610 if resp.status == 404:
611 raise MissingSourceError(source_spec)
612 elif resp.status != 200:
613 raise TahoeError("Error examining source '%s'" % source_spec,
615 parsed = simplejson.loads(resp.read())
617 if nodetype == "dirnode":
618 t = TahoeDirectorySource(self.nodeurl, self.cache,
620 t.init_from_parsed(parsed)
622 writecap = ascii_or_none(d.get("rw_uri"))
623 readcap = ascii_or_none(d.get("ro_uri"))
624 mutable = d.get("mutable", False) # older nodes don't provide it
625 if source_spec.rfind('/') != -1:
626 name = source_spec[source_spec.rfind('/')+1:]
627 t = TahoeFileSource(self.nodeurl, mutable, writecap, readcap)
631 def dump_graph(self, s, indent=" "):
632 for name, child in s.children.items():
633 print indent + name + ":" + str(child)
634 if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)):
635 self.dump_graph(child, indent+" ")
637 def copy_to_directory(self, source_infos, target):
638 # step one: build a recursive graph of the source tree. This returns
639 # a dictionary, with child names as keys, and values that are either
640 # Directory or File instances (local or tahoe).
641 source_dirs = self.build_graphs(source_infos)
642 source_files = [source for source in source_infos
643 if isinstance(source[1], (LocalFileSource,
647 #for s in source_dirs:
650 # step two: create the top-level target directory object
651 if isinstance(target, LocalMissingTarget):
652 os.makedirs(target.pathname)
653 target = LocalDirectoryTarget(self.progress, target.pathname)
654 elif isinstance(target, TahoeMissingTarget):
655 writecap = mkdir(target.url)
656 target = TahoeDirectoryTarget(self.nodeurl, self.cache,
658 target.just_created(writecap)
659 assert isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget))
660 target.populate(False)
662 # step three: find a target for each source node, creating
663 # directories as necessary. 'targetmap' is a dictionary that uses
664 # target Directory instances as keys, and has values of
665 # (name->sourceobject) dicts for all the files that need to wind up
668 # sources are all LocalFile/LocalDirectory/TahoeFile/TahoeDirectory
669 # target is LocalDirectory/TahoeDirectory
671 self.progress("attaching sources to targets, "
672 "%d files / %d dirs in root" %
673 (len(source_files), len(source_dirs)))
676 self.files_to_copy = 0
678 for (name,s) in source_files:
679 self.attach_to_target(s, name, target)
680 self.files_to_copy += 1
682 for source in source_dirs:
683 self.assign_targets(source, target)
685 self.progress("targets assigned, %s dirs, %s files" %
686 (len(self.targetmap), self.files_to_copy))
688 self.progress("starting copy, %d files, %d directories" %
689 (self.files_to_copy, len(self.targetmap)))
690 self.files_copied = 0
691 self.targets_finished = 0
693 # step four: walk through the list of targets. For each one, copy all
694 # the files. If the target is a TahoeDirectory, upload and create
695 # read-caps, then do a set_children to the target directory.
697 for target in self.targetmap:
698 self.copy_files_to_target(self.targetmap[target], target)
699 self.targets_finished += 1
700 self.progress("%d/%d directories" %
701 (self.targets_finished, len(self.targetmap)))
703 return self.announce_success("files copied")
705 def attach_to_target(self, source, name, target):
706 if target not in self.targetmap:
707 self.targetmap[target] = {}
708 self.targetmap[target][name] = source
709 self.files_to_copy += 1
711 def assign_targets(self, source, target):
712 # copy everything in the source into the target
713 assert isinstance(source, (LocalDirectorySource, TahoeDirectorySource))
715 for name, child in source.children.items():
716 if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)):
717 # we will need a target directory for this one
718 subtarget = target.get_child_target(name)
719 self.assign_targets(child, subtarget)
721 assert isinstance(child, (LocalFileSource, TahoeFileSource))
722 self.attach_to_target(child, name, target)
726 def copy_files_to_target(self, targetmap, target):
727 for name, source in targetmap.items():
728 assert isinstance(source, (LocalFileSource, TahoeFileSource))
729 self.copy_file_into(source, name, target)
730 self.files_copied += 1
731 self.progress("%d/%d files, %d/%d directories" %
732 (self.files_copied, self.files_to_copy,
733 self.targets_finished, len(self.targetmap)))
734 target.set_children()
736 def need_to_copy_bytes(self, source, target):
737 if source.need_to_copy_bytes:
738 # mutable tahoe files, and local files
740 if isinstance(target, (LocalFileTarget, LocalDirectoryTarget)):
744 def announce_success(self, msg):
745 if self.verbosity >= 1:
746 print >>self.stdout, "Success: %s" % msg
749 def copy_file(self, source, target):
750 assert isinstance(source, (LocalFileSource, TahoeFileSource))
751 assert isinstance(target, (LocalFileTarget, TahoeFileTarget,
752 LocalMissingTarget, TahoeMissingTarget))
753 if self.need_to_copy_bytes(source, target):
754 # if the target is a local directory, this will just write the
755 # bytes to disk. If it is a tahoe directory, it will upload the
756 # data, and stash the new filecap for a later set_children call.
757 f = source.open(self.caps_only)
759 return self.announce_success("file copied")
760 # otherwise we're copying tahoe to tahoe, and using immutable files,
761 # so we can just make a link. TODO: this probably won't always work:
762 # need to enumerate the cases and analyze them.
763 target.put_uri(source.bestcap())
764 return self.announce_success("file linked")
766 def copy_file_into(self, source, name, target):
767 assert isinstance(source, (LocalFileSource, TahoeFileSource))
768 assert isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget))
769 if self.need_to_copy_bytes(source, target):
770 # if the target is a local directory, this will just write the
771 # bytes to disk. If it is a tahoe directory, it will upload the
772 # data, and stash the new filecap for a later set_children call.
773 f = source.open(self.caps_only)
774 target.put_file(name, f)
776 # otherwise we're copying tahoe to tahoe, and using immutable files,
777 # so we can just make a link
778 target.put_uri(name, source.bestcap())
781 def progress(self, message):
783 if self.progressfunc:
784 self.progressfunc(message)
786 def build_graphs(self, source_infos):
788 for name,source in source_infos:
789 if isinstance(source, (LocalDirectorySource, TahoeDirectorySource)):
790 source.populate(True)
791 graphs.append(source)
796 return Copier().do_copy(options)
798 # error cases that need improvement:
799 # local-file-in-the-way
801 # tahoe cp -r my:docs/proposed/denver.txt proposed/denver.txt
802 # handling of unknown nodes
804 # things that maybe should be errors but aren't
805 # local-dir-in-the-way
807 # tahoe cp -r my:docs/proposed/denver.txt denver.txt
808 # (creates denver.txt/denver.txt)
810 # error cases that look good:
811 # tahoe cp -r my:docs/missing missing
813 # tahoe cp -r my:docs/missing missing -> No JSON object could be decoded
814 # tahoe-file-in-the-way (when we want to make a directory)
815 # tahoe put README my:docs
816 # tahoe cp -r docs/proposed my:docs/proposed