5 from cStringIO import StringIO
6 from twisted.python.failure import Failure
7 from allmydata.scripts.common import get_alias, escape_path, \
8 DefaultAliasMarker, TahoeError
9 from allmydata.scripts.common_http import do_http, HTTPError
10 from allmydata import uri
11 from allmydata.util.stringutils import unicode_to_url, listdir_unicode, open_unicode, \
12 abspath_expanduser_unicode, quote_output, to_str
13 from allmydata.util.assertutil import precondition
16 def _put_local_file(pathname, inf):
17 # TODO: create temporary file and move into place?
18 # TODO: move this to fileutil.
19 outf = open_unicode(pathname, "wb")
22 data = inf.read(32768)
30 class MissingSourceError(TahoeError):
31 def __init__(self, name):
32 TahoeError.__init__(self, "No such file or directory %s" % quote_output(name))
36 resp = do_http("GET", url)
37 if resp.status == 200:
39 raise HTTPError("Error during GET", resp)
41 def GET_to_string(url):
46 resp = do_http("PUT", url, data)
47 if resp.status in (200, 201):
49 raise HTTPError("Error during PUT", resp)
52 resp = do_http("POST", url, data)
53 if resp.status in (200, 201):
55 raise HTTPError("Error during POST", resp)
58 url = targeturl + "?t=mkdir"
59 resp = do_http("POST", url)
60 if resp.status in (200, 201):
61 return resp.read().strip()
62 raise HTTPError("Error during mkdir", resp)
64 def make_tahoe_subdirectory(nodeurl, parent_writecap, name):
65 url = nodeurl + "/".join(["uri",
66 urllib.quote(parent_writecap),
69 resp = do_http("POST", url)
70 if resp.status in (200, 201):
71 return resp.read().strip()
72 raise HTTPError("Error during mkdir", resp)
75 class LocalFileSource:
76 def __init__(self, pathname):
77 precondition(isinstance(pathname, unicode), pathname)
78 self.pathname = pathname
80 def need_to_copy_bytes(self):
83 def open(self, caps_only):
84 return open_unicode(self.pathname, "rb")
87 class LocalFileTarget:
88 def __init__(self, pathname):
89 precondition(isinstance(pathname, unicode), pathname)
90 self.pathname = pathname
92 def put_file(self, inf):
93 _put_local_file(self.pathname, inf)
96 class LocalMissingTarget:
97 def __init__(self, pathname):
98 precondition(isinstance(pathname, unicode), pathname)
99 self.pathname = pathname
101 def put_file(self, inf):
102 _put_local_file(self.pathname, inf)
105 class LocalDirectorySource:
106 def __init__(self, progressfunc, pathname):
107 precondition(isinstance(pathname, unicode), pathname)
109 self.progressfunc = progressfunc
110 self.pathname = pathname
113 def populate(self, recurse):
114 if self.children is not None:
117 children = listdir_unicode(self.pathname)
118 for i,n in enumerate(children):
119 self.progressfunc("examining %d of %d" % (i, len(children)))
120 pn = os.path.join(self.pathname, n)
121 if os.path.isdir(pn):
122 child = LocalDirectorySource(self.progressfunc, pn)
123 self.children[n] = child
126 elif os.path.isfile(pn):
127 self.children[n] = LocalFileSource(pn)
129 # Could be dangling symlink; probably not copy-able.
130 # TODO: output a warning
133 class LocalDirectoryTarget:
134 def __init__(self, progressfunc, pathname):
135 precondition(isinstance(pathname, unicode), pathname)
137 self.progressfunc = progressfunc
138 self.pathname = pathname
141 def populate(self, recurse):
142 if self.children is not None:
145 children = listdir_unicode(self.pathname)
146 for i,n in enumerate(children):
147 self.progressfunc("examining %d of %d" % (i, len(children)))
149 pn = os.path.join(self.pathname, n)
150 if os.path.isdir(pn):
151 child = LocalDirectoryTarget(self.progressfunc, pn)
152 self.children[n] = child
156 assert os.path.isfile(pn)
157 self.children[n] = LocalFileTarget(pn)
159 def get_child_target(self, name):
160 if self.children is None:
162 if name in self.children:
163 return self.children[name]
164 pathname = os.path.join(self.pathname, name)
165 os.makedirs(pathname)
166 return LocalDirectoryTarget(self.progressfunc, pathname)
168 def put_file(self, name, inf):
169 precondition(isinstance(name, unicode), name)
170 pathname = os.path.join(self.pathname, name)
171 _put_local_file(pathname, inf)
173 def set_children(self):
176 class TahoeFileSource:
177 def __init__(self, nodeurl, mutable, writecap, readcap):
178 self.nodeurl = nodeurl
179 self.mutable = mutable
180 self.writecap = writecap
181 self.readcap = readcap
183 def need_to_copy_bytes(self):
188 def open(self, caps_only):
190 return StringIO(self.readcap)
191 url = self.nodeurl + "uri/" + urllib.quote(self.readcap)
192 return GET_to_file(url)
195 return self.writecap or self.readcap
197 class TahoeFileTarget:
198 def __init__(self, nodeurl, mutable, writecap, readcap, url):
199 self.nodeurl = nodeurl
200 self.mutable = mutable
201 self.writecap = writecap
202 self.readcap = readcap
205 def put_file(self, inf):
206 # We want to replace this object in-place.
208 # our do_http() call currently requires a string or a filehandle with
210 if not hasattr(inf, "seek"):
213 # TODO: this always creates immutable files. We might want an option
214 # to always create mutable files, or to copy mutable files into new
215 # mutable files. ticket #835
217 class TahoeDirectorySource:
218 def __init__(self, nodeurl, cache, progressfunc):
219 self.nodeurl = nodeurl
221 self.progressfunc = progressfunc
223 def init_from_grid(self, writecap, readcap):
224 self.writecap = writecap
225 self.readcap = readcap
226 bestcap = writecap or readcap
227 url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
228 resp = do_http("GET", url + "?t=json")
229 if resp.status != 200:
230 raise HTTPError("Error examining source directory", resp)
231 parsed = simplejson.loads(resp.read())
233 assert nodetype == "dirnode"
234 self.mutable = d.get("mutable", False) # older nodes don't provide it
235 self.children_d = dict( [(unicode(name),value)
237 in d["children"].iteritems()] )
240 def init_from_parsed(self, parsed):
242 self.writecap = to_str(d.get("rw_uri"))
243 self.readcap = to_str(d.get("ro_uri"))
244 self.mutable = d.get("mutable", False) # older nodes don't provide it
245 self.children_d = dict( [(unicode(name),value)
247 in d["children"].iteritems()] )
250 def populate(self, recurse):
251 if self.children is not None:
254 for i,(name, data) in enumerate(self.children_d.items()):
255 self.progressfunc("examining %d of %d" % (i, len(self.children_d)))
256 if data[0] == "filenode":
257 mutable = data[1].get("mutable", False)
258 writecap = to_str(data[1].get("rw_uri"))
259 readcap = to_str(data[1].get("ro_uri"))
260 self.children[name] = TahoeFileSource(self.nodeurl, mutable,
262 elif data[0] == "dirnode":
263 writecap = to_str(data[1].get("rw_uri"))
264 readcap = to_str(data[1].get("ro_uri"))
265 if writecap and writecap in self.cache:
266 child = self.cache[writecap]
267 elif readcap and readcap in self.cache:
268 child = self.cache[readcap]
270 child = TahoeDirectorySource(self.nodeurl, self.cache,
272 child.init_from_grid(writecap, readcap)
274 self.cache[writecap] = child
276 self.cache[readcap] = child
279 self.children[name] = child
281 # TODO: there should be an option to skip unknown nodes.
282 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
283 "You probably need to use a later version of "
284 "Tahoe-LAFS to copy this directory.")
286 class TahoeMissingTarget:
287 def __init__(self, url):
290 def put_file(self, inf):
291 # We want to replace this object in-place.
292 if not hasattr(inf, "seek"):
295 # TODO: this always creates immutable files. We might want an option
296 # to always create mutable files, or to copy mutable files into new
299 def put_uri(self, filecap):
300 # I'm not sure this will always work
301 return PUT(self.url + "?t=uri", filecap)
303 class TahoeDirectoryTarget:
304 def __init__(self, nodeurl, cache, progressfunc):
305 self.nodeurl = nodeurl
307 self.progressfunc = progressfunc
308 self.new_children = {}
310 def init_from_parsed(self, parsed):
312 self.writecap = to_str(d.get("rw_uri"))
313 self.readcap = to_str(d.get("ro_uri"))
314 self.mutable = d.get("mutable", False) # older nodes don't provide it
315 self.children_d = dict( [(unicode(name),value)
317 in d["children"].iteritems()] )
320 def init_from_grid(self, writecap, readcap):
321 self.writecap = writecap
322 self.readcap = readcap
323 bestcap = writecap or readcap
324 url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
325 resp = do_http("GET", url + "?t=json")
326 if resp.status != 200:
327 raise HTTPError("Error examining target directory", resp)
328 parsed = simplejson.loads(resp.read())
330 assert nodetype == "dirnode"
331 self.mutable = d.get("mutable", False) # older nodes don't provide it
332 self.children_d = dict( [(unicode(name),value)
334 in d["children"].iteritems()] )
337 def just_created(self, writecap):
338 self.writecap = writecap
339 self.readcap = uri.from_string(writecap).get_readonly().to_string()
344 def populate(self, recurse):
345 if self.children is not None:
348 for i,(name, data) in enumerate(self.children_d.items()):
349 self.progressfunc("examining %d of %d" % (i, len(self.children_d)))
350 if data[0] == "filenode":
351 mutable = data[1].get("mutable", False)
352 writecap = to_str(data[1].get("rw_uri"))
353 readcap = to_str(data[1].get("ro_uri"))
356 url = self.nodeurl + "/".join(["uri",
357 urllib.quote(self.writecap),
358 urllib.quote(unicode_to_url(name))])
359 self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
360 writecap, readcap, url)
361 elif data[0] == "dirnode":
362 writecap = to_str(data[1].get("rw_uri"))
363 readcap = to_str(data[1].get("ro_uri"))
364 if writecap and writecap in self.cache:
365 child = self.cache[writecap]
366 elif readcap and readcap in self.cache:
367 child = self.cache[readcap]
369 child = TahoeDirectoryTarget(self.nodeurl, self.cache,
371 child.init_from_grid(writecap, readcap)
373 self.cache[writecap] = child
375 self.cache[readcap] = child
378 self.children[name] = child
380 # TODO: there should be an option to skip unknown nodes.
381 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
382 "You probably need to use a later version of "
383 "Tahoe-LAFS to copy this directory.")
385 def get_child_target(self, name):
386 # return a new target for a named subdirectory of this dir
387 if self.children is None:
389 if name in self.children:
390 return self.children[name]
391 writecap = make_tahoe_subdirectory(self.nodeurl, self.writecap, name)
392 child = TahoeDirectoryTarget(self.nodeurl, self.cache,
394 child.just_created(writecap)
395 self.children[name] = child
398 def put_file(self, name, inf):
399 url = self.nodeurl + "uri"
400 if not hasattr(inf, "seek"):
402 filecap = PUT(url, inf)
403 # TODO: this always creates immutable files. We might want an option
404 # to always create mutable files, or to copy mutable files into new
406 self.new_children[name] = filecap
408 def put_uri(self, name, filecap):
409 self.new_children[name] = filecap
411 def set_children(self):
412 if not self.new_children:
414 url = (self.nodeurl + "uri/" + urllib.quote(self.writecap)
417 for (name, filecap) in self.new_children.items():
418 # it just so happens that ?t=set_children will accept both file
419 # read-caps and write-caps as ['rw_uri'], and will handle either
420 # correctly. So don't bother trying to figure out whether the one
421 # we have is read-only or read-write.
422 # TODO: think about how this affects forward-compatibility for
424 set_data[name] = ["filenode", {"rw_uri": filecap}]
425 body = simplejson.dumps(set_data)
430 def do_copy(self, options, progressfunc=None):
433 elif options['verbose']:
438 nodeurl = options['node-url']
439 if nodeurl[-1] != "/":
441 self.nodeurl = nodeurl
442 self.progressfunc = progressfunc
443 self.options = options
444 self.aliases = options.aliases
445 self.verbosity = verbosity
446 self.stdout = options.stdout
447 self.stderr = options.stderr
448 if verbosity >= 2 and not self.progressfunc:
449 def progress(message):
450 print >>self.stderr, message
451 self.progressfunc = progress
452 self.caps_only = options["caps-only"]
455 status = self.try_copy()
457 except TahoeError, te:
459 Failure().printTraceback(self.stderr)
461 te.display(self.stderr)
465 source_specs = self.options.sources
466 destination_spec = self.options.destination
467 recursive = self.options["recursive"]
469 target = self.get_target_info(destination_spec)
471 sources = [] # list of (name, source object)
472 for ss in source_specs:
473 name, source = self.get_source_info(ss)
474 sources.append( (name, source) )
476 have_source_dirs = bool([s for (name,s) in sources
477 if isinstance(s, (LocalDirectorySource,
478 TahoeDirectorySource))])
480 if have_source_dirs and not recursive:
481 self.to_stderr("cannot copy directories without --recursive")
484 if isinstance(target, (LocalFileTarget, TahoeFileTarget)):
485 # cp STUFF foo.txt, where foo.txt already exists. This limits the
486 # possibilities considerably.
488 self.to_stderr("target %s is not a directory" % quote_output(destination_spec))
491 self.to_stderr("cannot copy directory into a file")
493 name, source = sources[0]
494 return self.copy_file(source, target)
496 if isinstance(target, (LocalMissingTarget, TahoeMissingTarget)):
498 return self.copy_to_directory(sources, target)
500 # if we have -r, we'll auto-create the target directory. Without
501 # it, we'll only create a file.
502 self.to_stderr("cannot copy multiple files into a file without -r")
505 name, source = sources[0]
506 return self.copy_file(source, target)
508 if isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget)):
509 # We're copying to an existing directory -- make sure that we
510 # have target names for everything
511 for (name, source) in sources:
512 if name is None and isinstance(source, TahoeFileSource):
514 "error: you must specify a destination filename")
516 return self.copy_to_directory(sources, target)
518 self.to_stderr("unknown target")
521 def to_stderr(self, text):
522 print >>self.stderr, text
524 def get_target_info(self, destination_spec):
525 rootcap, path = get_alias(self.aliases, destination_spec, None)
526 if rootcap == DefaultAliasMarker:
527 # no alias, so this is a local file
528 pathname = abspath_expanduser_unicode(path.decode('utf-8'))
529 if not os.path.exists(pathname):
530 t = LocalMissingTarget(pathname)
531 elif os.path.isdir(pathname):
532 t = LocalDirectoryTarget(self.progress, pathname)
534 assert os.path.isfile(pathname), pathname
535 t = LocalFileTarget(pathname) # non-empty
537 # this is a tahoe object
538 url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
540 url += "/" + escape_path(path)
542 resp = do_http("GET", url + "?t=json")
543 if resp.status == 404:
545 t = TahoeMissingTarget(url)
546 elif resp.status == 200:
547 parsed = simplejson.loads(resp.read())
549 if nodetype == "dirnode":
550 t = TahoeDirectoryTarget(self.nodeurl, self.cache,
552 t.init_from_parsed(parsed)
554 writecap = to_str(d.get("rw_uri"))
555 readcap = to_str(d.get("ro_uri"))
556 mutable = d.get("mutable", False)
557 t = TahoeFileTarget(self.nodeurl, mutable,
558 writecap, readcap, url)
560 raise HTTPError("Error examining target %s"
561 % quote_output(destination_spec), resp)
564 def get_source_info(self, source_spec):
565 rootcap, path = get_alias(self.aliases, source_spec, None)
566 if rootcap == DefaultAliasMarker:
567 # no alias, so this is a local file
568 pathname = abspath_expanduser_unicode(path.decode('utf-8'))
569 name = os.path.basename(pathname)
570 if not os.path.exists(pathname):
571 raise MissingSourceError(source_spec)
572 if os.path.isdir(pathname):
573 t = LocalDirectorySource(self.progress, pathname)
575 assert os.path.isfile(pathname)
576 t = LocalFileSource(pathname) # non-empty
578 # this is a tahoe object
579 url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
582 url += "/" + escape_path(path)
583 last_slash = path.rfind("/")
586 name = path[last_slash+1:]
588 resp = do_http("GET", url + "?t=json")
589 if resp.status == 404:
590 raise MissingSourceError(source_spec)
591 elif resp.status != 200:
592 raise HTTPError("Error examining source %s" % quote_output(source_spec),
594 parsed = simplejson.loads(resp.read())
596 if nodetype == "dirnode":
597 t = TahoeDirectorySource(self.nodeurl, self.cache,
599 t.init_from_parsed(parsed)
601 writecap = to_str(d.get("rw_uri"))
602 readcap = to_str(d.get("ro_uri"))
603 mutable = d.get("mutable", False) # older nodes don't provide it
604 if source_spec.rfind('/') != -1:
605 name = source_spec[source_spec.rfind('/')+1:]
606 t = TahoeFileSource(self.nodeurl, mutable, writecap, readcap)
610 def dump_graph(self, s, indent=" "):
611 for name, child in s.children.items():
612 print "%s%s: %r" % (indent, quote_output(name), child)
613 if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)):
614 self.dump_graph(child, indent+" ")
616 def copy_to_directory(self, source_infos, target):
617 # step one: build a recursive graph of the source tree. This returns
618 # a dictionary, with child names as keys, and values that are either
619 # Directory or File instances (local or tahoe).
620 source_dirs = self.build_graphs(source_infos)
621 source_files = [source for source in source_infos
622 if isinstance(source[1], (LocalFileSource,
626 #for s in source_dirs:
629 # step two: create the top-level target directory object
630 if isinstance(target, LocalMissingTarget):
631 os.makedirs(target.pathname)
632 target = LocalDirectoryTarget(self.progress, target.pathname)
633 elif isinstance(target, TahoeMissingTarget):
634 writecap = mkdir(target.url)
635 target = TahoeDirectoryTarget(self.nodeurl, self.cache,
637 target.just_created(writecap)
638 assert isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget))
639 target.populate(False)
641 # step three: find a target for each source node, creating
642 # directories as necessary. 'targetmap' is a dictionary that uses
643 # target Directory instances as keys, and has values of
644 # (name->sourceobject) dicts for all the files that need to wind up
647 # sources are all LocalFile/LocalDirectory/TahoeFile/TahoeDirectory
648 # target is LocalDirectory/TahoeDirectory
650 self.progress("attaching sources to targets, "
651 "%d files / %d dirs in root" %
652 (len(source_files), len(source_dirs)))
655 self.files_to_copy = 0
657 for (name,s) in source_files:
658 self.attach_to_target(s, name, target)
659 self.files_to_copy += 1
661 for source in source_dirs:
662 self.assign_targets(source, target)
664 self.progress("targets assigned, %s dirs, %s files" %
665 (len(self.targetmap), self.files_to_copy))
667 self.progress("starting copy, %d files, %d directories" %
668 (self.files_to_copy, len(self.targetmap)))
669 self.files_copied = 0
670 self.targets_finished = 0
672 # step four: walk through the list of targets. For each one, copy all
673 # the files. If the target is a TahoeDirectory, upload and create
674 # read-caps, then do a set_children to the target directory.
676 for target in self.targetmap:
677 self.copy_files_to_target(self.targetmap[target], target)
678 self.targets_finished += 1
679 self.progress("%d/%d directories" %
680 (self.targets_finished, len(self.targetmap)))
682 return self.announce_success("files copied")
684 def attach_to_target(self, source, name, target):
685 if target not in self.targetmap:
686 self.targetmap[target] = {}
687 self.targetmap[target][name] = source
688 self.files_to_copy += 1
690 def assign_targets(self, source, target):
691 # copy everything in the source into the target
692 assert isinstance(source, (LocalDirectorySource, TahoeDirectorySource))
694 for name, child in source.children.items():
695 if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)):
696 # we will need a target directory for this one
697 subtarget = target.get_child_target(name)
698 self.assign_targets(child, subtarget)
700 assert isinstance(child, (LocalFileSource, TahoeFileSource))
701 self.attach_to_target(child, name, target)
705 def copy_files_to_target(self, targetmap, target):
706 for name, source in targetmap.items():
707 assert isinstance(source, (LocalFileSource, TahoeFileSource))
708 self.copy_file_into(source, name, target)
709 self.files_copied += 1
710 self.progress("%d/%d files, %d/%d directories" %
711 (self.files_copied, self.files_to_copy,
712 self.targets_finished, len(self.targetmap)))
713 target.set_children()
715 def need_to_copy_bytes(self, source, target):
716 if source.need_to_copy_bytes:
717 # mutable tahoe files, and local files
719 if isinstance(target, (LocalFileTarget, LocalDirectoryTarget)):
723 def announce_success(self, msg):
724 if self.verbosity >= 1:
725 print >>self.stdout, "Success: %s" % msg
728 def copy_file(self, source, target):
729 assert isinstance(source, (LocalFileSource, TahoeFileSource))
730 assert isinstance(target, (LocalFileTarget, TahoeFileTarget,
731 LocalMissingTarget, TahoeMissingTarget))
732 if self.need_to_copy_bytes(source, target):
733 # if the target is a local directory, this will just write the
734 # bytes to disk. If it is a tahoe directory, it will upload the
735 # data, and stash the new filecap for a later set_children call.
736 f = source.open(self.caps_only)
738 return self.announce_success("file copied")
739 # otherwise we're copying tahoe to tahoe, and using immutable files,
740 # so we can just make a link. TODO: this probably won't always work:
741 # need to enumerate the cases and analyze them.
742 target.put_uri(source.bestcap())
743 return self.announce_success("file linked")
745 def copy_file_into(self, source, name, target):
746 assert isinstance(source, (LocalFileSource, TahoeFileSource))
747 assert isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget))
748 if self.need_to_copy_bytes(source, target):
749 # if the target is a local directory, this will just write the
750 # bytes to disk. If it is a tahoe directory, it will upload the
751 # data, and stash the new filecap for a later set_children call.
752 f = source.open(self.caps_only)
753 target.put_file(name, f)
755 # otherwise we're copying tahoe to tahoe, and using immutable files,
756 # so we can just make a link
757 target.put_uri(name, source.bestcap())
760 def progress(self, message):
762 if self.progressfunc:
763 self.progressfunc(message)
765 def build_graphs(self, source_infos):
767 for name,source in source_infos:
768 if isinstance(source, (LocalDirectorySource, TahoeDirectorySource)):
769 source.populate(True)
770 graphs.append(source)
775 return Copier().do_copy(options)
777 # error cases that need improvement:
778 # local-file-in-the-way
780 # tahoe cp -r my:docs/proposed/denver.txt proposed/denver.txt
781 # handling of unknown nodes
783 # things that maybe should be errors but aren't
784 # local-dir-in-the-way
786 # tahoe cp -r my:docs/proposed/denver.txt denver.txt
787 # (creates denver.txt/denver.txt)
789 # error cases that look good:
790 # tahoe cp -r my:docs/missing missing
792 # tahoe cp -r my:docs/missing missing -> No JSON object could be decoded
793 # tahoe-file-in-the-way (when we want to make a directory)
794 # tahoe put README my:docs
795 # tahoe cp -r docs/proposed my:docs/proposed