5 from cStringIO import StringIO
6 from twisted.python.failure import Failure
7 from allmydata.scripts.common import get_alias, escape_path, \
8 DefaultAliasMarker, TahoeError
9 from allmydata.scripts.common_http import do_http, HTTPError
10 from allmydata import uri
11 from allmydata.util import fileutil
12 from allmydata.util.fileutil import abspath_expanduser_unicode
13 from allmydata.util.encodingutil import unicode_to_url, listdir_unicode, quote_output, \
14 quote_local_unicode_path, to_str
15 from allmydata.util.assertutil import precondition
18 class MissingSourceError(TahoeError):
19 def __init__(self, name, quotefn=quote_output):
20 TahoeError.__init__(self, "No such file or directory %s" % quotefn(name))
24 resp = do_http("GET", url)
25 if resp.status == 200:
27 raise HTTPError("Error during GET", resp)
29 def GET_to_string(url):
34 resp = do_http("PUT", url, data)
35 if resp.status in (200, 201):
37 raise HTTPError("Error during PUT", resp)
40 resp = do_http("POST", url, data)
41 if resp.status in (200, 201):
43 raise HTTPError("Error during POST", resp)
46 url = targeturl + "?t=mkdir"
47 resp = do_http("POST", url)
48 if resp.status in (200, 201):
49 return resp.read().strip()
50 raise HTTPError("Error during mkdir", resp)
52 def make_tahoe_subdirectory(nodeurl, parent_writecap, name):
53 url = nodeurl + "/".join(["uri",
54 urllib.quote(parent_writecap),
55 urllib.quote(unicode_to_url(name)),
57 resp = do_http("POST", url)
58 if resp.status in (200, 201):
59 return resp.read().strip()
60 raise HTTPError("Error during mkdir", resp)
63 class LocalFileSource:
64 def __init__(self, pathname):
65 precondition(isinstance(pathname, unicode), pathname)
66 self.pathname = pathname
68 def need_to_copy_bytes(self):
71 def open(self, caps_only):
72 return open(os.path.expanduser(self.pathname), "rb")
75 class LocalFileTarget:
76 def __init__(self, pathname):
77 precondition(isinstance(pathname, unicode), pathname)
78 self.pathname = pathname
80 def put_file(self, inf):
81 fileutil.put_file(self.pathname, inf)
84 class LocalMissingTarget:
85 def __init__(self, pathname):
86 precondition(isinstance(pathname, unicode), pathname)
87 self.pathname = pathname
89 def put_file(self, inf):
90 fileutil.put_file(self.pathname, inf)
93 class LocalDirectorySource:
94 def __init__(self, progressfunc, pathname):
95 precondition(isinstance(pathname, unicode), pathname)
97 self.progressfunc = progressfunc
98 self.pathname = pathname
101 def populate(self, recurse):
102 if self.children is not None:
105 children = listdir_unicode(self.pathname)
106 for i,n in enumerate(children):
107 self.progressfunc("examining %d of %d" % (i+1, len(children)))
108 pn = os.path.join(self.pathname, n)
109 if os.path.isdir(pn):
110 child = LocalDirectorySource(self.progressfunc, pn)
111 self.children[n] = child
114 elif os.path.isfile(pn):
115 self.children[n] = LocalFileSource(pn)
117 # Could be dangling symlink; probably not copy-able.
118 # TODO: output a warning
121 class LocalDirectoryTarget:
122 def __init__(self, progressfunc, pathname):
123 precondition(isinstance(pathname, unicode), pathname)
125 self.progressfunc = progressfunc
126 self.pathname = pathname
129 def populate(self, recurse):
130 if self.children is not None:
133 children = listdir_unicode(self.pathname)
134 for i,n in enumerate(children):
135 self.progressfunc("examining %d of %d" % (i+1, len(children)))
137 pn = os.path.join(self.pathname, n)
138 if os.path.isdir(pn):
139 child = LocalDirectoryTarget(self.progressfunc, pn)
140 self.children[n] = child
144 assert os.path.isfile(pn)
145 self.children[n] = LocalFileTarget(pn)
147 def get_child_target(self, name):
148 if self.children is None:
150 if name in self.children:
151 return self.children[name]
152 pathname = os.path.join(self.pathname, name)
153 os.makedirs(pathname)
154 return LocalDirectoryTarget(self.progressfunc, pathname)
156 def put_file(self, name, inf):
157 precondition(isinstance(name, unicode), name)
158 pathname = os.path.join(self.pathname, name)
159 fileutil.put_file(pathname, inf)
161 def set_children(self):
164 class TahoeFileSource:
165 def __init__(self, nodeurl, mutable, writecap, readcap):
166 self.nodeurl = nodeurl
167 self.mutable = mutable
168 self.writecap = writecap
169 self.readcap = readcap
171 def need_to_copy_bytes(self):
176 def open(self, caps_only):
178 return StringIO(self.readcap)
179 url = self.nodeurl + "uri/" + urllib.quote(self.readcap)
180 return GET_to_file(url)
183 return self.writecap or self.readcap
185 class TahoeFileTarget:
186 def __init__(self, nodeurl, mutable, writecap, readcap, url):
187 self.nodeurl = nodeurl
188 self.mutable = mutable
189 self.writecap = writecap
190 self.readcap = readcap
193 def put_file(self, inf):
194 # We want to replace this object in-place.
196 # our do_http() call currently requires a string or a filehandle with
198 if not hasattr(inf, "seek"):
201 # TODO: this always creates immutable files. We might want an option
202 # to always create mutable files, or to copy mutable files into new
203 # mutable files. ticket #835
205 class TahoeDirectorySource:
206 def __init__(self, nodeurl, cache, progressfunc):
207 self.nodeurl = nodeurl
209 self.progressfunc = progressfunc
211 def init_from_grid(self, writecap, readcap):
212 self.writecap = writecap
213 self.readcap = readcap
214 bestcap = writecap or readcap
215 url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
216 resp = do_http("GET", url + "?t=json")
217 if resp.status != 200:
218 raise HTTPError("Error examining source directory", resp)
219 parsed = simplejson.loads(resp.read())
221 assert nodetype == "dirnode"
222 self.mutable = d.get("mutable", False) # older nodes don't provide it
223 self.children_d = dict( [(unicode(name),value)
225 in d["children"].iteritems()] )
228 def init_from_parsed(self, parsed):
230 self.writecap = to_str(d.get("rw_uri"))
231 self.readcap = to_str(d.get("ro_uri"))
232 self.mutable = d.get("mutable", False) # older nodes don't provide it
233 self.children_d = dict( [(unicode(name),value)
235 in d["children"].iteritems()] )
238 def populate(self, recurse):
239 if self.children is not None:
242 for i,(name, data) in enumerate(self.children_d.items()):
243 self.progressfunc("examining %d of %d" % (i+1, len(self.children_d)))
244 if data[0] == "filenode":
245 mutable = data[1].get("mutable", False)
246 writecap = to_str(data[1].get("rw_uri"))
247 readcap = to_str(data[1].get("ro_uri"))
248 self.children[name] = TahoeFileSource(self.nodeurl, mutable,
250 elif data[0] == "dirnode":
251 writecap = to_str(data[1].get("rw_uri"))
252 readcap = to_str(data[1].get("ro_uri"))
253 if writecap and writecap in self.cache:
254 child = self.cache[writecap]
255 elif readcap and readcap in self.cache:
256 child = self.cache[readcap]
258 child = TahoeDirectorySource(self.nodeurl, self.cache,
260 child.init_from_grid(writecap, readcap)
262 self.cache[writecap] = child
264 self.cache[readcap] = child
267 self.children[name] = child
269 # TODO: there should be an option to skip unknown nodes.
270 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
271 "You probably need to use a later version of "
272 "Tahoe-LAFS to copy this directory.")
274 class TahoeMissingTarget:
275 def __init__(self, url):
278 def put_file(self, inf):
279 # We want to replace this object in-place.
280 if not hasattr(inf, "seek"):
283 # TODO: this always creates immutable files. We might want an option
284 # to always create mutable files, or to copy mutable files into new
287 def put_uri(self, filecap):
288 # I'm not sure this will always work
289 return PUT(self.url + "?t=uri", filecap)
291 class TahoeDirectoryTarget:
292 def __init__(self, nodeurl, cache, progressfunc):
293 self.nodeurl = nodeurl
295 self.progressfunc = progressfunc
296 self.new_children = {}
298 def init_from_parsed(self, parsed):
300 self.writecap = to_str(d.get("rw_uri"))
301 self.readcap = to_str(d.get("ro_uri"))
302 self.mutable = d.get("mutable", False) # older nodes don't provide it
303 self.children_d = dict( [(unicode(name),value)
305 in d["children"].iteritems()] )
308 def init_from_grid(self, writecap, readcap):
309 self.writecap = writecap
310 self.readcap = readcap
311 bestcap = writecap or readcap
312 url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
313 resp = do_http("GET", url + "?t=json")
314 if resp.status != 200:
315 raise HTTPError("Error examining target directory", resp)
316 parsed = simplejson.loads(resp.read())
318 assert nodetype == "dirnode"
319 self.mutable = d.get("mutable", False) # older nodes don't provide it
320 self.children_d = dict( [(unicode(name),value)
322 in d["children"].iteritems()] )
325 def just_created(self, writecap):
326 self.writecap = writecap
327 self.readcap = uri.from_string(writecap).get_readonly().to_string()
332 def populate(self, recurse):
333 if self.children is not None:
336 for i,(name, data) in enumerate(self.children_d.items()):
337 self.progressfunc("examining %d of %d" % (i+1, len(self.children_d)))
338 if data[0] == "filenode":
339 mutable = data[1].get("mutable", False)
340 writecap = to_str(data[1].get("rw_uri"))
341 readcap = to_str(data[1].get("ro_uri"))
344 url = self.nodeurl + "/".join(["uri",
345 urllib.quote(self.writecap),
346 urllib.quote(unicode_to_url(name))])
347 self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
348 writecap, readcap, url)
349 elif data[0] == "dirnode":
350 writecap = to_str(data[1].get("rw_uri"))
351 readcap = to_str(data[1].get("ro_uri"))
352 if writecap and writecap in self.cache:
353 child = self.cache[writecap]
354 elif readcap and readcap in self.cache:
355 child = self.cache[readcap]
357 child = TahoeDirectoryTarget(self.nodeurl, self.cache,
359 child.init_from_grid(writecap, readcap)
361 self.cache[writecap] = child
363 self.cache[readcap] = child
366 self.children[name] = child
368 # TODO: there should be an option to skip unknown nodes.
369 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
370 "You probably need to use a later version of "
371 "Tahoe-LAFS to copy this directory.")
373 def get_child_target(self, name):
374 # return a new target for a named subdirectory of this dir
375 if self.children is None:
377 if name in self.children:
378 return self.children[name]
379 writecap = make_tahoe_subdirectory(self.nodeurl, self.writecap, name)
380 child = TahoeDirectoryTarget(self.nodeurl, self.cache,
382 child.just_created(writecap)
383 self.children[name] = child
386 def put_file(self, name, inf):
387 url = self.nodeurl + "uri"
388 if not hasattr(inf, "seek"):
391 if self.children is None:
394 # Check to see if we already have a mutable file by this name.
395 # If so, overwrite that file in place.
396 if name in self.children and self.children[name].mutable:
397 self.children[name].put_file(inf)
399 filecap = PUT(url, inf)
400 # TODO: this always creates immutable files. We might want an option
401 # to always create mutable files, or to copy mutable files into new
403 self.new_children[name] = filecap
405 def put_uri(self, name, filecap):
406 self.new_children[name] = filecap
408 def set_children(self):
409 if not self.new_children:
411 url = (self.nodeurl + "uri/" + urllib.quote(self.writecap)
414 for (name, filecap) in self.new_children.items():
415 # it just so happens that ?t=set_children will accept both file
416 # read-caps and write-caps as ['rw_uri'], and will handle either
417 # correctly. So don't bother trying to figure out whether the one
418 # we have is read-only or read-write.
419 # TODO: think about how this affects forward-compatibility for
421 set_data[name] = ["filenode", {"rw_uri": filecap}]
422 body = simplejson.dumps(set_data)
427 def do_copy(self, options, progressfunc=None):
430 elif options['verbose']:
435 nodeurl = options['node-url']
436 if nodeurl[-1] != "/":
438 self.nodeurl = nodeurl
439 self.progressfunc = progressfunc
440 self.options = options
441 self.aliases = options.aliases
442 self.verbosity = verbosity
443 self.stdout = options.stdout
444 self.stderr = options.stderr
445 if verbosity >= 2 and not self.progressfunc:
446 def progress(message):
447 print >>self.stderr, message
448 self.progressfunc = progress
449 self.caps_only = options["caps-only"]
452 status = self.try_copy()
454 except TahoeError, te:
456 Failure().printTraceback(self.stderr)
458 te.display(self.stderr)
462 source_specs = self.options.sources
463 destination_spec = self.options.destination
464 recursive = self.options["recursive"]
466 target = self.get_target_info(destination_spec)
468 sources = [] # list of (name, source object)
469 for ss in source_specs:
470 name, source = self.get_source_info(ss)
471 sources.append( (name, source) )
474 have_source_dirs = bool([s for (name,s) in sources
475 if isinstance(s, (LocalDirectorySource,
476 TahoeDirectorySource))])
478 if have_source_dirs and not recursive:
479 self.to_stderr("cannot copy directories without --recursive")
482 if isinstance(target, (LocalFileTarget, TahoeFileTarget)):
483 # cp STUFF foo.txt, where foo.txt already exists. This limits the
484 # possibilities considerably.
486 self.to_stderr("target %s is not a directory" % quote_output(destination_spec))
489 self.to_stderr("cannot copy directory into a file")
491 name, source = sources[0]
492 return self.copy_file(source, target)
494 if isinstance(target, (LocalMissingTarget, TahoeMissingTarget)):
496 return self.copy_to_directory(sources, target)
498 # if we have -r, we'll auto-create the target directory. Without
499 # it, we'll only create a file.
500 self.to_stderr("cannot copy multiple files into a file without -r")
503 name, source = sources[0]
504 return self.copy_file(source, target)
506 if isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget)):
507 # We're copying to an existing directory -- make sure that we
508 # have target names for everything
509 for (name, source) in sources:
510 if name is None and isinstance(source, TahoeFileSource):
512 "error: you must specify a destination filename")
514 return self.copy_to_directory(sources, target)
516 self.to_stderr("unknown target")
519 def to_stderr(self, text):
520 print >>self.stderr, text
522 def get_target_info(self, destination_spec):
523 rootcap, path = get_alias(self.aliases, destination_spec, None)
524 if rootcap == DefaultAliasMarker:
525 # no alias, so this is a local file
526 pathname = abspath_expanduser_unicode(path.decode('utf-8'))
527 if not os.path.exists(pathname):
528 t = LocalMissingTarget(pathname)
529 elif os.path.isdir(pathname):
530 t = LocalDirectoryTarget(self.progress, pathname)
532 assert os.path.isfile(pathname), pathname
533 t = LocalFileTarget(pathname) # non-empty
535 # this is a tahoe object
536 url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
538 url += "/" + escape_path(path)
540 resp = do_http("GET", url + "?t=json")
541 if resp.status == 404:
543 t = TahoeMissingTarget(url)
544 elif resp.status == 200:
545 parsed = simplejson.loads(resp.read())
547 if nodetype == "dirnode":
548 t = TahoeDirectoryTarget(self.nodeurl, self.cache,
550 t.init_from_parsed(parsed)
552 writecap = to_str(d.get("rw_uri"))
553 readcap = to_str(d.get("ro_uri"))
554 mutable = d.get("mutable", False)
555 t = TahoeFileTarget(self.nodeurl, mutable,
556 writecap, readcap, url)
558 raise HTTPError("Error examining target %s"
559 % quote_output(destination_spec), resp)
562 def get_source_info(self, source_spec):
563 rootcap, path = get_alias(self.aliases, source_spec, None)
564 if rootcap == DefaultAliasMarker:
565 # no alias, so this is a local file
566 pathname = abspath_expanduser_unicode(path.decode('utf-8'))
567 name = os.path.basename(pathname)
568 if not os.path.exists(pathname):
569 raise MissingSourceError(source_spec, quotefn=quote_local_unicode_path)
570 if os.path.isdir(pathname):
571 t = LocalDirectorySource(self.progress, pathname)
573 assert os.path.isfile(pathname)
574 t = LocalFileSource(pathname) # non-empty
576 # this is a tahoe object
577 url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
580 url += "/" + escape_path(path)
581 last_slash = path.rfind("/")
584 name = path[last_slash+1:]
586 resp = do_http("GET", url + "?t=json")
587 if resp.status == 404:
588 raise MissingSourceError(source_spec)
589 elif resp.status != 200:
590 raise HTTPError("Error examining source %s" % quote_output(source_spec),
592 parsed = simplejson.loads(resp.read())
594 if nodetype == "dirnode":
595 t = TahoeDirectorySource(self.nodeurl, self.cache,
597 t.init_from_parsed(parsed)
599 writecap = to_str(d.get("rw_uri"))
600 readcap = to_str(d.get("ro_uri"))
601 mutable = d.get("mutable", False) # older nodes don't provide it
602 if source_spec.rfind('/') != -1:
603 name = source_spec[source_spec.rfind('/')+1:]
604 t = TahoeFileSource(self.nodeurl, mutable, writecap, readcap)
608 def dump_graph(self, s, indent=" "):
609 for name, child in s.children.items():
610 print "%s%s: %r" % (indent, quote_output(name), child)
611 if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)):
612 self.dump_graph(child, indent+" ")
614 def copy_to_directory(self, source_infos, target):
615 # step one: build a recursive graph of the source tree. This returns
616 # a dictionary, with child names as keys, and values that are either
617 # Directory or File instances (local or tahoe).
618 source_dirs = self.build_graphs(source_infos)
619 source_files = [source for source in source_infos
620 if isinstance(source[1], (LocalFileSource,
624 #for s in source_dirs:
627 # step two: create the top-level target directory object
628 if isinstance(target, LocalMissingTarget):
629 os.makedirs(target.pathname)
630 target = LocalDirectoryTarget(self.progress, target.pathname)
631 elif isinstance(target, TahoeMissingTarget):
632 writecap = mkdir(target.url)
633 target = TahoeDirectoryTarget(self.nodeurl, self.cache,
635 target.just_created(writecap)
636 assert isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget))
637 target.populate(False)
639 # step three: find a target for each source node, creating
640 # directories as necessary. 'targetmap' is a dictionary that uses
641 # target Directory instances as keys, and has values of
642 # (name->sourceobject) dicts for all the files that need to wind up
645 # sources are all LocalFile/LocalDirectory/TahoeFile/TahoeDirectory
646 # target is LocalDirectory/TahoeDirectory
648 self.progress("attaching sources to targets, "
649 "%d files / %d dirs in root" %
650 (len(source_files), len(source_dirs)))
653 self.files_to_copy = 0
655 for (name,s) in source_files:
656 self.attach_to_target(s, name, target)
658 for (name, source) in source_dirs:
659 new_target = target.get_child_target(name)
660 self.assign_targets(source, new_target)
662 self.progress("targets assigned, %s dirs, %s files" %
663 (len(self.targetmap), self.files_to_copy))
665 self.progress("starting copy, %d files, %d directories" %
666 (self.files_to_copy, len(self.targetmap)))
667 self.files_copied = 0
668 self.targets_finished = 0
670 # step four: walk through the list of targets. For each one, copy all
671 # the files. If the target is a TahoeDirectory, upload and create
672 # read-caps, then do a set_children to the target directory.
674 for target in self.targetmap:
675 self.copy_files_to_target(self.targetmap[target], target)
676 self.targets_finished += 1
677 self.progress("%d/%d directories" %
678 (self.targets_finished, len(self.targetmap)))
680 return self.announce_success("files copied")
682 def attach_to_target(self, source, name, target):
683 if target not in self.targetmap:
684 self.targetmap[target] = {}
685 self.targetmap[target][name] = source
686 self.files_to_copy += 1
688 def assign_targets(self, source, target):
689 # copy everything in the source into the target
690 assert isinstance(source, (LocalDirectorySource, TahoeDirectorySource))
692 for name, child in source.children.items():
693 if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)):
694 # we will need a target directory for this one
695 subtarget = target.get_child_target(name)
696 self.assign_targets(child, subtarget)
698 assert isinstance(child, (LocalFileSource, TahoeFileSource))
699 self.attach_to_target(child, name, target)
703 def copy_files_to_target(self, targetmap, target):
704 for name, source in targetmap.items():
705 assert isinstance(source, (LocalFileSource, TahoeFileSource))
706 self.copy_file_into(source, name, target)
707 self.files_copied += 1
708 self.progress("%d/%d files, %d/%d directories" %
709 (self.files_copied, self.files_to_copy,
710 self.targets_finished, len(self.targetmap)))
711 target.set_children()
713 def need_to_copy_bytes(self, source, target):
714 if source.need_to_copy_bytes:
715 # mutable tahoe files, and local files
717 if isinstance(target, (LocalFileTarget, LocalDirectoryTarget)):
721 def announce_success(self, msg):
722 if self.verbosity >= 1:
723 print >>self.stdout, "Success: %s" % msg
726 def copy_file(self, source, target):
727 assert isinstance(source, (LocalFileSource, TahoeFileSource))
728 assert isinstance(target, (LocalFileTarget, TahoeFileTarget,
729 LocalMissingTarget, TahoeMissingTarget))
730 if self.need_to_copy_bytes(source, target):
731 # if the target is a local directory, this will just write the
732 # bytes to disk. If it is a tahoe directory, it will upload the
733 # data, and stash the new filecap for a later set_children call.
734 f = source.open(self.caps_only)
736 return self.announce_success("file copied")
737 # otherwise we're copying tahoe to tahoe, and using immutable files,
738 # so we can just make a link. TODO: this probably won't always work:
739 # need to enumerate the cases and analyze them.
740 target.put_uri(source.bestcap())
741 return self.announce_success("file linked")
743 def copy_file_into(self, source, name, target):
744 assert isinstance(source, (LocalFileSource, TahoeFileSource))
745 assert isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget))
746 if self.need_to_copy_bytes(source, target):
747 # if the target is a local directory, this will just write the
748 # bytes to disk. If it is a tahoe directory, it will upload the
749 # data, and stash the new filecap for a later set_children call.
750 f = source.open(self.caps_only)
751 target.put_file(name, f)
753 # otherwise we're copying tahoe to tahoe, and using immutable files,
754 # so we can just make a link
755 target.put_uri(name, source.bestcap())
758 def progress(self, message):
760 if self.progressfunc:
761 self.progressfunc(message)
763 def build_graphs(self, source_infos):
765 for name,source in source_infos:
766 if isinstance(source, (LocalDirectorySource, TahoeDirectorySource)):
767 source.populate(True)
768 # Remove trailing slash (if applicable) and get dir name
769 name = os.path.basename(os.path.normpath(name))
770 graphs.append((name, source))
775 return Copier().do_copy(options)
777 # error cases that need improvement:
778 # local-file-in-the-way
780 # tahoe cp -r my:docs/proposed/denver.txt proposed/denver.txt
781 # handling of unknown nodes
783 # things that maybe should be errors but aren't
784 # local-dir-in-the-way
786 # tahoe cp -r my:docs/proposed/denver.txt denver.txt
787 # (creates denver.txt/denver.txt)
789 # error cases that look good:
790 # tahoe cp -r my:docs/missing missing
792 # tahoe cp -r my:docs/missing missing -> No JSON object could be decoded
793 # tahoe-file-in-the-way (when we want to make a directory)
794 # tahoe put README my:docs
795 # tahoe cp -r docs/proposed my:docs/proposed