6 from cStringIO import StringIO
7 from twisted.python.failure import Failure
8 from allmydata.scripts.common import get_alias, escape_path, \
9 DefaultAliasMarker, UnknownAliasError
10 from allmydata.scripts.common_http import do_http
11 from allmydata import uri
12 from twisted.python import usage
13 from allmydata.util.stringutils import unicode_to_url, listdir_unicode, open_unicode
14 from allmydata.util.assertutil import precondition
22 class TahoeError(Exception):
23 def __init__(self, msg, resp):
25 self.status = resp.status
26 self.reason = resp.reason
27 self.body = resp.read()
29 def display(self, err):
30 print >>err, "%s: %s %s" % (self.msg, self.status, self.reason)
31 print >>err, self.body
33 class MissingSourceError(Exception):
37 resp = do_http("GET", url)
38 if resp.status == 200:
40 raise TahoeError("Error during GET", resp)
42 def GET_to_string(url):
47 resp = do_http("PUT", url, data)
48 if resp.status in (200, 201):
50 raise TahoeError("Error during PUT", resp)
53 resp = do_http("POST", url, data)
54 if resp.status in (200, 201):
56 raise TahoeError("Error during POST", resp)
59 url = targeturl + "?t=mkdir"
60 resp = do_http("POST", url)
61 if resp.status in (200, 201):
62 return resp.read().strip()
63 raise TahoeError("Error during mkdir", resp)
65 def make_tahoe_subdirectory(nodeurl, parent_writecap, name):
66 url = nodeurl + "/".join(["uri",
67 urllib.quote(parent_writecap),
70 resp = do_http("POST", url)
71 if resp.status in (200, 201):
72 return resp.read().strip()
73 raise TahoeError("Error during mkdir", resp)
76 class LocalFileSource:
77 def __init__(self, pathname):
78 precondition(isinstance(pathname, unicode), pathname)
79 self.pathname = pathname
81 def need_to_copy_bytes(self):
84 def open(self, caps_only):
85 return open(self.pathname, "rb")
87 class LocalFileTarget:
88 def __init__(self, pathname):
89 precondition(isinstance(pathname, unicode), pathname)
90 self.pathname = pathname
91 def put_file(self, inf):
92 outf = open(self.pathname, "wb")
94 data = inf.read(32768)
100 class LocalMissingTarget:
101 def __init__(self, pathname):
102 precondition(isinstance(pathname, unicode), pathname)
103 self.pathname = pathname
105 def put_file(self, inf):
106 outf = open(self.pathname, "wb")
108 data = inf.read(32768)
114 class LocalDirectorySource:
115 def __init__(self, progressfunc, pathname):
116 precondition(isinstance(pathname, unicode), pathname)
118 self.progressfunc = progressfunc
119 self.pathname = pathname
122 def populate(self, recurse):
123 if self.children is not None:
126 children = listdir_unicode(self.pathname)
127 for i,n in enumerate(children):
128 self.progressfunc("examining %d of %d" % (i, len(children)))
129 pn = os.path.join(self.pathname, n)
130 if os.path.isdir(pn):
131 child = LocalDirectorySource(self.progressfunc, pn)
132 self.children[n] = child
135 elif os.path.isfile(pn):
136 self.children[n] = LocalFileSource(pn)
138 # Could be dangling symlink; probably not copy-able.
141 class LocalDirectoryTarget:
142 def __init__(self, progressfunc, pathname):
143 precondition(isinstance(pathname, unicode), pathname)
145 self.progressfunc = progressfunc
146 self.pathname = pathname
149 def populate(self, recurse):
150 if self.children is not None:
153 children = listdir_unicode(self.pathname)
154 for i,n in enumerate(children):
155 self.progressfunc("examining %d of %d" % (i, len(children)))
156 pn = os.path.join(self.pathname, n)
157 if os.path.isdir(pn):
158 child = LocalDirectoryTarget(self.progressfunc, pn)
159 self.children[n] = child
163 assert os.path.isfile(pn)
164 self.children[n] = LocalFileTarget(pn)
166 def get_child_target(self, name):
167 if self.children is None:
169 if name in self.children:
170 return self.children[name]
171 pathname = os.path.join(self.pathname, name)
172 os.makedirs(pathname)
173 return LocalDirectoryTarget(self.progressfunc, pathname)
175 def put_file(self, name, inf):
176 precondition(isinstance(name, unicode), name)
177 pathname = os.path.join(self.pathname, name)
178 outf = open_unicode(pathname, "wb")
180 data = inf.read(32768)
186 def set_children(self):
189 class TahoeFileSource:
190 def __init__(self, nodeurl, mutable, writecap, readcap):
191 self.nodeurl = nodeurl
192 self.mutable = mutable
193 self.writecap = writecap
194 self.readcap = readcap
196 def need_to_copy_bytes(self):
201 def open(self, caps_only):
203 return StringIO(self.readcap)
204 url = self.nodeurl + "uri/" + urllib.quote(self.readcap)
205 return GET_to_file(url)
208 return self.writecap or self.readcap
210 class TahoeFileTarget:
211 def __init__(self, nodeurl, mutable, writecap, readcap, url):
212 self.nodeurl = nodeurl
213 self.mutable = mutable
214 self.writecap = writecap
215 self.readcap = readcap
218 def put_file(self, inf):
219 # We want to replace this object in-place.
221 # our do_http() call currently requires a string or a filehandle with
223 if not hasattr(inf, "seek"):
226 # TODO: this always creates immutable files. We might want an option
227 # to always create mutable files, or to copy mutable files into new
228 # mutable files. ticket #835
230 class TahoeDirectorySource:
231 def __init__(self, nodeurl, cache, progressfunc):
232 self.nodeurl = nodeurl
234 self.progressfunc = progressfunc
236 def init_from_grid(self, writecap, readcap):
237 self.writecap = writecap
238 self.readcap = readcap
239 bestcap = writecap or readcap
240 url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
241 resp = do_http("GET", url + "?t=json")
242 if resp.status != 200:
243 raise TahoeError("Error examining source directory", resp)
244 parsed = simplejson.loads(resp.read())
246 assert nodetype == "dirnode"
247 self.mutable = d.get("mutable", False) # older nodes don't provide it
248 self.children_d = dict( [(unicode(name),value)
250 in d["children"].iteritems()] )
253 def init_from_parsed(self, parsed):
255 self.writecap = ascii_or_none(d.get("rw_uri"))
256 self.readcap = ascii_or_none(d.get("ro_uri"))
257 self.mutable = d.get("mutable", False) # older nodes don't provide it
258 self.children_d = dict( [(unicode(name),value)
260 in d["children"].iteritems()] )
263 def populate(self, recurse):
264 if self.children is not None:
267 for i,(name, data) in enumerate(self.children_d.items()):
268 self.progressfunc("examining %d of %d" % (i, len(self.children_d)))
269 if data[0] == "filenode":
270 mutable = data[1].get("mutable", False)
271 writecap = ascii_or_none(data[1].get("rw_uri"))
272 readcap = ascii_or_none(data[1].get("ro_uri"))
273 self.children[name] = TahoeFileSource(self.nodeurl, mutable,
275 elif data[0] == "dirnode":
276 writecap = ascii_or_none(data[1].get("rw_uri"))
277 readcap = ascii_or_none(data[1].get("ro_uri"))
278 if writecap and writecap in self.cache:
279 child = self.cache[writecap]
280 elif readcap and readcap in self.cache:
281 child = self.cache[readcap]
283 child = TahoeDirectorySource(self.nodeurl, self.cache,
285 child.init_from_grid(writecap, readcap)
287 self.cache[writecap] = child
289 self.cache[readcap] = child
292 self.children[name] = child
294 # TODO: there should be an option to skip unknown nodes.
295 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
296 "You probably need to use a later version of "
297 "Tahoe-LAFS to copy this directory.")
299 class TahoeMissingTarget:
300 def __init__(self, url):
303 def put_file(self, inf):
304 # We want to replace this object in-place.
305 if not hasattr(inf, "seek"):
308 # TODO: this always creates immutable files. We might want an option
309 # to always create mutable files, or to copy mutable files into new
312 def put_uri(self, filecap):
313 # I'm not sure this will always work
314 return PUT(self.url + "?t=uri", filecap)
316 class TahoeDirectoryTarget:
317 def __init__(self, nodeurl, cache, progressfunc):
318 self.nodeurl = nodeurl
320 self.progressfunc = progressfunc
321 self.new_children = {}
323 def init_from_parsed(self, parsed):
325 self.writecap = ascii_or_none(d.get("rw_uri"))
326 self.readcap = ascii_or_none(d.get("ro_uri"))
327 self.mutable = d.get("mutable", False) # older nodes don't provide it
328 self.children_d = dict( [(unicode(name),value)
330 in d["children"].iteritems()] )
333 def init_from_grid(self, writecap, readcap):
334 self.writecap = writecap
335 self.readcap = readcap
336 bestcap = writecap or readcap
337 url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
338 resp = do_http("GET", url + "?t=json")
339 if resp.status != 200:
340 raise TahoeError("Error examining target directory", resp)
341 parsed = simplejson.loads(resp.read())
343 assert nodetype == "dirnode"
344 self.mutable = d.get("mutable", False) # older nodes don't provide it
345 self.children_d = dict( [(unicode(name),value)
347 in d["children"].iteritems()] )
350 def just_created(self, writecap):
351 self.writecap = writecap
352 self.readcap = uri.from_string(writecap).get_readonly().to_string()
357 def populate(self, recurse):
358 if self.children is not None:
361 for i,(name, data) in enumerate(self.children_d.items()):
362 self.progressfunc("examining %d of %d" % (i, len(self.children_d)))
363 if data[0] == "filenode":
364 mutable = data[1].get("mutable", False)
365 writecap = ascii_or_none(data[1].get("rw_uri"))
366 readcap = ascii_or_none(data[1].get("ro_uri"))
369 url = self.nodeurl + "/".join(["uri",
370 urllib.quote(self.writecap),
371 urllib.quote(unicode_to_url(name))])
372 self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
373 writecap, readcap, url)
374 elif data[0] == "dirnode":
375 writecap = ascii_or_none(data[1].get("rw_uri"))
376 readcap = ascii_or_none(data[1].get("ro_uri"))
377 if writecap and writecap in self.cache:
378 child = self.cache[writecap]
379 elif readcap and readcap in self.cache:
380 child = self.cache[readcap]
382 child = TahoeDirectoryTarget(self.nodeurl, self.cache,
384 child.init_from_grid(writecap, readcap)
386 self.cache[writecap] = child
388 self.cache[readcap] = child
391 self.children[name] = child
393 # TODO: there should be an option to skip unknown nodes.
394 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
395 "You probably need to use a later version of "
396 "Tahoe-LAFS to copy this directory.")
398 def get_child_target(self, name):
399 # return a new target for a named subdirectory of this dir
400 if self.children is None:
402 if name in self.children:
403 return self.children[name]
404 writecap = make_tahoe_subdirectory(self.nodeurl, self.writecap, name)
405 child = TahoeDirectoryTarget(self.nodeurl, self.cache,
407 child.just_created(writecap)
408 self.children[name] = child
411 def put_file(self, name, inf):
412 url = self.nodeurl + "uri"
413 if not hasattr(inf, "seek"):
415 filecap = PUT(url, inf)
416 # TODO: this always creates immutable files. We might want an option
417 # to always create mutable files, or to copy mutable files into new
419 self.new_children[name] = filecap
421 def put_uri(self, name, filecap):
422 self.new_children[name] = filecap
424 def set_children(self):
425 if not self.new_children:
427 url = (self.nodeurl + "uri/" + urllib.quote(self.writecap)
430 for (name, filecap) in self.new_children.items():
431 # it just so happens that ?t=set_children will accept both file
432 # read-caps and write-caps as ['rw_uri'], and will handle either
433 # correctly. So don't bother trying to figure out whether the one
434 # we have is read-only or read-write.
435 # TODO: think about how this affects forward-compatibility for
437 set_data[name] = ["filenode", {"rw_uri": filecap}]
438 body = simplejson.dumps(set_data)
443 def do_copy(self, options, progressfunc=None):
446 elif options['verbose']:
451 nodeurl = options['node-url']
452 if nodeurl[-1] != "/":
454 self.nodeurl = nodeurl
455 self.progressfunc = progressfunc
456 self.options = options
457 self.aliases = options.aliases
458 self.verbosity = verbosity
459 self.stdout = options.stdout
460 self.stderr = options.stderr
461 if verbosity >= 2 and not self.progressfunc:
462 def progress(message):
463 print >>self.stderr, message
464 self.progressfunc = progress
465 self.caps_only = options["caps-only"]
468 status = self.try_copy()
470 except TahoeError, te:
471 Failure().printTraceback(self.stderr)
473 te.display(self.stderr)
477 source_specs = self.options.sources
478 destination_spec = self.options.destination
479 recursive = self.options["recursive"]
482 target = self.get_target_info(destination_spec)
483 except UnknownAliasError, e:
484 self.to_stderr("error: %s" % e.args[0])
488 sources = [] # list of (name, source object)
489 for ss in source_specs:
490 name, source = self.get_source_info(ss)
491 sources.append( (name, source) )
492 except MissingSourceError, e:
493 self.to_stderr("No such file or directory %s" % e.args[0])
495 except UnknownAliasError, e:
496 self.to_stderr("error: %s" % e.args[0])
499 have_source_dirs = bool([s for (name,s) in sources
500 if isinstance(s, (LocalDirectorySource,
501 TahoeDirectorySource))])
503 if have_source_dirs and not recursive:
504 self.to_stderr("cannot copy directories without --recursive")
507 if isinstance(target, (LocalFileTarget, TahoeFileTarget)):
508 # cp STUFF foo.txt, where foo.txt already exists. This limits the
509 # possibilities considerably.
511 self.to_stderr("target '%s' is not a directory" % destination_spec)
514 self.to_stderr("cannot copy directory into a file")
516 name, source = sources[0]
517 return self.copy_file(source, target)
519 if isinstance(target, (LocalMissingTarget, TahoeMissingTarget)):
521 return self.copy_to_directory(sources, target)
523 # if we have -r, we'll auto-create the target directory. Without
524 # it, we'll only create a file.
525 self.to_stderr("cannot copy multiple files into a file without -r")
528 name, source = sources[0]
529 return self.copy_file(source, target)
531 if isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget)):
532 # We're copying to an existing directory -- make sure that we
533 # have target names for everything
534 for (name, source) in sources:
535 if name is None and isinstance(source, TahoeFileSource):
537 "error: you must specify a destination filename")
539 return self.copy_to_directory(sources, target)
541 self.to_stderr("unknown target")
544 def to_stderr(self, text):
545 print >>self.stderr, text
547 def get_target_info(self, destination_spec):
548 rootcap, path = get_alias(self.aliases, destination_spec, None)
549 if rootcap == DefaultAliasMarker:
550 # no alias, so this is a local file
551 pathname = os.path.abspath(os.path.expanduser(path))
552 if not os.path.exists(pathname):
553 t = LocalMissingTarget(pathname)
554 elif os.path.isdir(pathname):
555 t = LocalDirectoryTarget(self.progress, pathname)
557 assert os.path.isfile(pathname), pathname
558 t = LocalFileTarget(pathname) # non-empty
560 # this is a tahoe object
561 url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
563 url += "/" + escape_path(path)
565 resp = do_http("GET", url + "?t=json")
566 if resp.status == 404:
568 t = TahoeMissingTarget(url)
569 elif resp.status == 200:
570 parsed = simplejson.loads(resp.read())
572 if nodetype == "dirnode":
573 t = TahoeDirectoryTarget(self.nodeurl, self.cache,
575 t.init_from_parsed(parsed)
577 writecap = ascii_or_none(d.get("rw_uri"))
578 readcap = ascii_or_none(d.get("ro_uri"))
579 mutable = d.get("mutable", False)
580 t = TahoeFileTarget(self.nodeurl, mutable,
581 writecap, readcap, url)
583 raise TahoeError("Error examining target '%s'"
584 % destination_spec, resp)
587 def get_source_info(self, source_spec):
588 rootcap, path = get_alias(self.aliases, source_spec, None)
589 if rootcap == DefaultAliasMarker:
590 # no alias, so this is a local file
591 pathname = os.path.abspath(os.path.expanduser(path))
592 name = os.path.basename(pathname)
593 if not os.path.exists(pathname):
594 raise MissingSourceError(source_spec)
595 if os.path.isdir(pathname):
596 t = LocalDirectorySource(self.progress, pathname)
598 assert os.path.isfile(pathname)
599 t = LocalFileSource(pathname) # non-empty
601 # this is a tahoe object
602 url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
605 url += "/" + escape_path(path)
606 last_slash = path.rfind("/")
609 name = path[last_slash+1:]
611 resp = do_http("GET", url + "?t=json")
612 if resp.status == 404:
613 raise MissingSourceError(source_spec)
614 elif resp.status != 200:
615 raise TahoeError("Error examining source '%s'" % source_spec,
617 parsed = simplejson.loads(resp.read())
619 if nodetype == "dirnode":
620 t = TahoeDirectorySource(self.nodeurl, self.cache,
622 t.init_from_parsed(parsed)
624 writecap = ascii_or_none(d.get("rw_uri"))
625 readcap = ascii_or_none(d.get("ro_uri"))
626 mutable = d.get("mutable", False) # older nodes don't provide it
627 if source_spec.rfind('/') != -1:
628 name = source_spec[source_spec.rfind('/')+1:]
629 t = TahoeFileSource(self.nodeurl, mutable, writecap, readcap)
633 def dump_graph(self, s, indent=" "):
634 for name, child in s.children.items():
635 print indent + name + ":" + str(child)
636 if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)):
637 self.dump_graph(child, indent+" ")
639 def copy_to_directory(self, source_infos, target):
640 # step one: build a recursive graph of the source tree. This returns
641 # a dictionary, with child names as keys, and values that are either
642 # Directory or File instances (local or tahoe).
643 source_dirs = self.build_graphs(source_infos)
644 source_files = [source for source in source_infos
645 if isinstance(source[1], (LocalFileSource,
649 #for s in source_dirs:
652 # step two: create the top-level target directory object
653 if isinstance(target, LocalMissingTarget):
654 os.makedirs(target.pathname)
655 target = LocalDirectoryTarget(self.progress, target.pathname)
656 elif isinstance(target, TahoeMissingTarget):
657 writecap = mkdir(target.url)
658 target = TahoeDirectoryTarget(self.nodeurl, self.cache,
660 target.just_created(writecap)
661 assert isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget))
662 target.populate(False)
664 # step three: find a target for each source node, creating
665 # directories as necessary. 'targetmap' is a dictionary that uses
666 # target Directory instances as keys, and has values of
667 # (name->sourceobject) dicts for all the files that need to wind up
670 # sources are all LocalFile/LocalDirectory/TahoeFile/TahoeDirectory
671 # target is LocalDirectory/TahoeDirectory
673 self.progress("attaching sources to targets, "
674 "%d files / %d dirs in root" %
675 (len(source_files), len(source_dirs)))
678 self.files_to_copy = 0
680 for (name,s) in source_files:
681 self.attach_to_target(s, name, target)
682 self.files_to_copy += 1
684 for source in source_dirs:
685 self.assign_targets(source, target)
687 self.progress("targets assigned, %s dirs, %s files" %
688 (len(self.targetmap), self.files_to_copy))
690 self.progress("starting copy, %d files, %d directories" %
691 (self.files_to_copy, len(self.targetmap)))
692 self.files_copied = 0
693 self.targets_finished = 0
695 # step four: walk through the list of targets. For each one, copy all
696 # the files. If the target is a TahoeDirectory, upload and create
697 # read-caps, then do a set_children to the target directory.
699 for target in self.targetmap:
700 self.copy_files_to_target(self.targetmap[target], target)
701 self.targets_finished += 1
702 self.progress("%d/%d directories" %
703 (self.targets_finished, len(self.targetmap)))
705 return self.announce_success("files copied")
707 def attach_to_target(self, source, name, target):
708 if target not in self.targetmap:
709 self.targetmap[target] = {}
710 self.targetmap[target][name] = source
711 self.files_to_copy += 1
713 def assign_targets(self, source, target):
714 # copy everything in the source into the target
715 assert isinstance(source, (LocalDirectorySource, TahoeDirectorySource))
717 for name, child in source.children.items():
718 if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)):
719 # we will need a target directory for this one
720 subtarget = target.get_child_target(name)
721 self.assign_targets(child, subtarget)
723 assert isinstance(child, (LocalFileSource, TahoeFileSource))
724 self.attach_to_target(child, name, target)
728 def copy_files_to_target(self, targetmap, target):
729 for name, source in targetmap.items():
730 assert isinstance(source, (LocalFileSource, TahoeFileSource))
731 self.copy_file_into(source, name, target)
732 self.files_copied += 1
733 self.progress("%d/%d files, %d/%d directories" %
734 (self.files_copied, self.files_to_copy,
735 self.targets_finished, len(self.targetmap)))
736 target.set_children()
738 def need_to_copy_bytes(self, source, target):
739 if source.need_to_copy_bytes:
740 # mutable tahoe files, and local files
742 if isinstance(target, (LocalFileTarget, LocalDirectoryTarget)):
746 def announce_success(self, msg):
747 if self.verbosity >= 1:
748 print >>self.stdout, "Success: %s" % msg
751 def copy_file(self, source, target):
752 assert isinstance(source, (LocalFileSource, TahoeFileSource))
753 assert isinstance(target, (LocalFileTarget, TahoeFileTarget,
754 LocalMissingTarget, TahoeMissingTarget))
755 if self.need_to_copy_bytes(source, target):
756 # if the target is a local directory, this will just write the
757 # bytes to disk. If it is a tahoe directory, it will upload the
758 # data, and stash the new filecap for a later set_children call.
759 f = source.open(self.caps_only)
761 return self.announce_success("file copied")
762 # otherwise we're copying tahoe to tahoe, and using immutable files,
763 # so we can just make a link. TODO: this probably won't always work:
764 # need to enumerate the cases and analyze them.
765 target.put_uri(source.bestcap())
766 return self.announce_success("file linked")
768 def copy_file_into(self, source, name, target):
769 assert isinstance(source, (LocalFileSource, TahoeFileSource))
770 assert isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget))
771 if self.need_to_copy_bytes(source, target):
772 # if the target is a local directory, this will just write the
773 # bytes to disk. If it is a tahoe directory, it will upload the
774 # data, and stash the new filecap for a later set_children call.
775 f = source.open(self.caps_only)
776 target.put_file(name, f)
778 # otherwise we're copying tahoe to tahoe, and using immutable files,
779 # so we can just make a link
780 target.put_uri(name, source.bestcap())
783 def progress(self, message):
785 if self.progressfunc:
786 self.progressfunc(message)
788 def build_graphs(self, source_infos):
790 for name,source in source_infos:
791 if isinstance(source, (LocalDirectorySource, TahoeDirectorySource)):
792 source.populate(True)
793 graphs.append(source)
798 return Copier().do_copy(options)
800 # error cases that need improvement:
801 # local-file-in-the-way
803 # tahoe cp -r my:docs/proposed/denver.txt proposed/denver.txt
804 # handling of unknown nodes
806 # things that maybe should be errors but aren't
807 # local-dir-in-the-way
809 # tahoe cp -r my:docs/proposed/denver.txt denver.txt
810 # (creates denver.txt/denver.txt)
812 # error cases that look good:
813 # tahoe cp -r my:docs/missing missing
815 # tahoe cp -r my:docs/missing missing -> No JSON object could be decoded
816 # tahoe-file-in-the-way (when we want to make a directory)
817 # tahoe put README my:docs
818 # tahoe cp -r docs/proposed my:docs/proposed