5 from cStringIO import StringIO
6 from twisted.python.failure import Failure
7 from allmydata.scripts.common import get_alias, escape_path, \
8 DefaultAliasMarker, TahoeError
9 from allmydata.scripts.common_http import do_http, HTTPError
10 from allmydata import uri
11 from allmydata.util import fileutil
12 from allmydata.util.fileutil import abspath_expanduser_unicode
13 from allmydata.util.encodingutil import unicode_to_url, listdir_unicode, quote_output, to_str
14 from allmydata.util.assertutil import precondition
17 class MissingSourceError(TahoeError):
18 def __init__(self, name):
19 TahoeError.__init__(self, "No such file or directory %s" % quote_output(name))
23 resp = do_http("GET", url)
24 if resp.status == 200:
26 raise HTTPError("Error during GET", resp)
28 def GET_to_string(url):
33 resp = do_http("PUT", url, data)
34 if resp.status in (200, 201):
36 raise HTTPError("Error during PUT", resp)
39 resp = do_http("POST", url, data)
40 if resp.status in (200, 201):
42 raise HTTPError("Error during POST", resp)
45 url = targeturl + "?t=mkdir"
46 resp = do_http("POST", url)
47 if resp.status in (200, 201):
48 return resp.read().strip()
49 raise HTTPError("Error during mkdir", resp)
51 def make_tahoe_subdirectory(nodeurl, parent_writecap, name):
52 url = nodeurl + "/".join(["uri",
53 urllib.quote(parent_writecap),
54 urllib.quote(unicode_to_url(name)),
56 resp = do_http("POST", url)
57 if resp.status in (200, 201):
58 return resp.read().strip()
59 raise HTTPError("Error during mkdir", resp)
62 class LocalFileSource:
63 def __init__(self, pathname):
64 precondition(isinstance(pathname, unicode), pathname)
65 self.pathname = pathname
67 def need_to_copy_bytes(self):
70 def open(self, caps_only):
71 return open(os.path.expanduser(self.pathname), "rb")
74 class LocalFileTarget:
75 def __init__(self, pathname):
76 precondition(isinstance(pathname, unicode), pathname)
77 self.pathname = pathname
79 def put_file(self, inf):
80 fileutil.put_file(self.pathname, inf)
83 class LocalMissingTarget:
84 def __init__(self, pathname):
85 precondition(isinstance(pathname, unicode), pathname)
86 self.pathname = pathname
88 def put_file(self, inf):
89 fileutil.put_file(self.pathname, inf)
92 class LocalDirectorySource:
93 def __init__(self, progressfunc, pathname):
94 precondition(isinstance(pathname, unicode), pathname)
96 self.progressfunc = progressfunc
97 self.pathname = pathname
100 def populate(self, recurse):
101 if self.children is not None:
104 children = listdir_unicode(self.pathname)
105 for i,n in enumerate(children):
106 self.progressfunc("examining %d of %d" % (i+1, len(children)))
107 pn = os.path.join(self.pathname, n)
108 if os.path.isdir(pn):
109 child = LocalDirectorySource(self.progressfunc, pn)
110 self.children[n] = child
113 elif os.path.isfile(pn):
114 self.children[n] = LocalFileSource(pn)
116 # Could be dangling symlink; probably not copy-able.
117 # TODO: output a warning
120 class LocalDirectoryTarget:
121 def __init__(self, progressfunc, pathname):
122 precondition(isinstance(pathname, unicode), pathname)
124 self.progressfunc = progressfunc
125 self.pathname = pathname
128 def populate(self, recurse):
129 if self.children is not None:
132 children = listdir_unicode(self.pathname)
133 for i,n in enumerate(children):
134 self.progressfunc("examining %d of %d" % (i+1, len(children)))
136 pn = os.path.join(self.pathname, n)
137 if os.path.isdir(pn):
138 child = LocalDirectoryTarget(self.progressfunc, pn)
139 self.children[n] = child
143 assert os.path.isfile(pn)
144 self.children[n] = LocalFileTarget(pn)
146 def get_child_target(self, name):
147 if self.children is None:
149 if name in self.children:
150 return self.children[name]
151 pathname = os.path.join(self.pathname, name)
152 os.makedirs(pathname)
153 return LocalDirectoryTarget(self.progressfunc, pathname)
155 def put_file(self, name, inf):
156 precondition(isinstance(name, unicode), name)
157 pathname = os.path.join(self.pathname, name)
158 fileutil.put_file(pathname, inf)
160 def set_children(self):
163 class TahoeFileSource:
164 def __init__(self, nodeurl, mutable, writecap, readcap):
165 self.nodeurl = nodeurl
166 self.mutable = mutable
167 self.writecap = writecap
168 self.readcap = readcap
170 def need_to_copy_bytes(self):
175 def open(self, caps_only):
177 return StringIO(self.readcap)
178 url = self.nodeurl + "uri/" + urllib.quote(self.readcap)
179 return GET_to_file(url)
182 return self.writecap or self.readcap
184 class TahoeFileTarget:
185 def __init__(self, nodeurl, mutable, writecap, readcap, url):
186 self.nodeurl = nodeurl
187 self.mutable = mutable
188 self.writecap = writecap
189 self.readcap = readcap
192 def put_file(self, inf):
193 # We want to replace this object in-place.
195 # our do_http() call currently requires a string or a filehandle with
197 if not hasattr(inf, "seek"):
200 # TODO: this always creates immutable files. We might want an option
201 # to always create mutable files, or to copy mutable files into new
202 # mutable files. ticket #835
204 class TahoeDirectorySource:
205 def __init__(self, nodeurl, cache, progressfunc):
206 self.nodeurl = nodeurl
208 self.progressfunc = progressfunc
210 def init_from_grid(self, writecap, readcap):
211 self.writecap = writecap
212 self.readcap = readcap
213 bestcap = writecap or readcap
214 url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
215 resp = do_http("GET", url + "?t=json")
216 if resp.status != 200:
217 raise HTTPError("Error examining source directory", resp)
218 parsed = simplejson.loads(resp.read())
220 assert nodetype == "dirnode"
221 self.mutable = d.get("mutable", False) # older nodes don't provide it
222 self.children_d = dict( [(unicode(name),value)
224 in d["children"].iteritems()] )
227 def init_from_parsed(self, parsed):
229 self.writecap = to_str(d.get("rw_uri"))
230 self.readcap = to_str(d.get("ro_uri"))
231 self.mutable = d.get("mutable", False) # older nodes don't provide it
232 self.children_d = dict( [(unicode(name),value)
234 in d["children"].iteritems()] )
237 def populate(self, recurse):
238 if self.children is not None:
241 for i,(name, data) in enumerate(self.children_d.items()):
242 self.progressfunc("examining %d of %d" % (i+1, len(self.children_d)))
243 if data[0] == "filenode":
244 mutable = data[1].get("mutable", False)
245 writecap = to_str(data[1].get("rw_uri"))
246 readcap = to_str(data[1].get("ro_uri"))
247 self.children[name] = TahoeFileSource(self.nodeurl, mutable,
249 elif data[0] == "dirnode":
250 writecap = to_str(data[1].get("rw_uri"))
251 readcap = to_str(data[1].get("ro_uri"))
252 if writecap and writecap in self.cache:
253 child = self.cache[writecap]
254 elif readcap and readcap in self.cache:
255 child = self.cache[readcap]
257 child = TahoeDirectorySource(self.nodeurl, self.cache,
259 child.init_from_grid(writecap, readcap)
261 self.cache[writecap] = child
263 self.cache[readcap] = child
266 self.children[name] = child
268 # TODO: there should be an option to skip unknown nodes.
269 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
270 "You probably need to use a later version of "
271 "Tahoe-LAFS to copy this directory.")
273 class TahoeMissingTarget:
274 def __init__(self, url):
277 def put_file(self, inf):
278 # We want to replace this object in-place.
279 if not hasattr(inf, "seek"):
282 # TODO: this always creates immutable files. We might want an option
283 # to always create mutable files, or to copy mutable files into new
286 def put_uri(self, filecap):
287 # I'm not sure this will always work
288 return PUT(self.url + "?t=uri", filecap)
290 class TahoeDirectoryTarget:
291 def __init__(self, nodeurl, cache, progressfunc):
292 self.nodeurl = nodeurl
294 self.progressfunc = progressfunc
295 self.new_children = {}
297 def init_from_parsed(self, parsed):
299 self.writecap = to_str(d.get("rw_uri"))
300 self.readcap = to_str(d.get("ro_uri"))
301 self.mutable = d.get("mutable", False) # older nodes don't provide it
302 self.children_d = dict( [(unicode(name),value)
304 in d["children"].iteritems()] )
307 def init_from_grid(self, writecap, readcap):
308 self.writecap = writecap
309 self.readcap = readcap
310 bestcap = writecap or readcap
311 url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
312 resp = do_http("GET", url + "?t=json")
313 if resp.status != 200:
314 raise HTTPError("Error examining target directory", resp)
315 parsed = simplejson.loads(resp.read())
317 assert nodetype == "dirnode"
318 self.mutable = d.get("mutable", False) # older nodes don't provide it
319 self.children_d = dict( [(unicode(name),value)
321 in d["children"].iteritems()] )
324 def just_created(self, writecap):
325 self.writecap = writecap
326 self.readcap = uri.from_string(writecap).get_readonly().to_string()
331 def populate(self, recurse):
332 if self.children is not None:
335 for i,(name, data) in enumerate(self.children_d.items()):
336 self.progressfunc("examining %d of %d" % (i+1, len(self.children_d)))
337 if data[0] == "filenode":
338 mutable = data[1].get("mutable", False)
339 writecap = to_str(data[1].get("rw_uri"))
340 readcap = to_str(data[1].get("ro_uri"))
343 url = self.nodeurl + "/".join(["uri",
344 urllib.quote(self.writecap),
345 urllib.quote(unicode_to_url(name))])
346 self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
347 writecap, readcap, url)
348 elif data[0] == "dirnode":
349 writecap = to_str(data[1].get("rw_uri"))
350 readcap = to_str(data[1].get("ro_uri"))
351 if writecap and writecap in self.cache:
352 child = self.cache[writecap]
353 elif readcap and readcap in self.cache:
354 child = self.cache[readcap]
356 child = TahoeDirectoryTarget(self.nodeurl, self.cache,
358 child.init_from_grid(writecap, readcap)
360 self.cache[writecap] = child
362 self.cache[readcap] = child
365 self.children[name] = child
367 # TODO: there should be an option to skip unknown nodes.
368 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
369 "You probably need to use a later version of "
370 "Tahoe-LAFS to copy this directory.")
372 def get_child_target(self, name):
373 # return a new target for a named subdirectory of this dir
374 if self.children is None:
376 if name in self.children:
377 return self.children[name]
378 writecap = make_tahoe_subdirectory(self.nodeurl, self.writecap, name)
379 child = TahoeDirectoryTarget(self.nodeurl, self.cache,
381 child.just_created(writecap)
382 self.children[name] = child
385 def put_file(self, name, inf):
386 url = self.nodeurl + "uri"
387 if not hasattr(inf, "seek"):
390 if self.children is None:
393 # Check to see if we already have a mutable file by this name.
394 # If so, overwrite that file in place.
395 if name in self.children and self.children[name].mutable:
396 self.children[name].put_file(inf)
398 filecap = PUT(url, inf)
399 # TODO: this always creates immutable files. We might want an option
400 # to always create mutable files, or to copy mutable files into new
402 self.new_children[name] = filecap
404 def put_uri(self, name, filecap):
405 self.new_children[name] = filecap
407 def set_children(self):
408 if not self.new_children:
410 url = (self.nodeurl + "uri/" + urllib.quote(self.writecap)
413 for (name, filecap) in self.new_children.items():
414 # it just so happens that ?t=set_children will accept both file
415 # read-caps and write-caps as ['rw_uri'], and will handle either
416 # correctly. So don't bother trying to figure out whether the one
417 # we have is read-only or read-write.
418 # TODO: think about how this affects forward-compatibility for
420 set_data[name] = ["filenode", {"rw_uri": filecap}]
421 body = simplejson.dumps(set_data)
426 def do_copy(self, options, progressfunc=None):
429 elif options['verbose']:
434 nodeurl = options['node-url']
435 if nodeurl[-1] != "/":
437 self.nodeurl = nodeurl
438 self.progressfunc = progressfunc
439 self.options = options
440 self.aliases = options.aliases
441 self.verbosity = verbosity
442 self.stdout = options.stdout
443 self.stderr = options.stderr
444 if verbosity >= 2 and not self.progressfunc:
445 def progress(message):
446 print >>self.stderr, message
447 self.progressfunc = progress
448 self.caps_only = options["caps-only"]
451 status = self.try_copy()
453 except TahoeError, te:
455 Failure().printTraceback(self.stderr)
457 te.display(self.stderr)
461 source_specs = self.options.sources
462 destination_spec = self.options.destination
463 recursive = self.options["recursive"]
465 target = self.get_target_info(destination_spec)
467 sources = [] # list of (name, source object)
468 for ss in source_specs:
469 name, source = self.get_source_info(ss)
470 sources.append( (name, source) )
472 have_source_dirs = bool([s for (name,s) in sources
473 if isinstance(s, (LocalDirectorySource,
474 TahoeDirectorySource))])
476 if have_source_dirs and not recursive:
477 self.to_stderr("cannot copy directories without --recursive")
480 if isinstance(target, (LocalFileTarget, TahoeFileTarget)):
481 # cp STUFF foo.txt, where foo.txt already exists. This limits the
482 # possibilities considerably.
484 self.to_stderr("target %s is not a directory" % quote_output(destination_spec))
487 self.to_stderr("cannot copy directory into a file")
489 name, source = sources[0]
490 return self.copy_file(source, target)
492 if isinstance(target, (LocalMissingTarget, TahoeMissingTarget)):
494 return self.copy_to_directory(sources, target)
496 # if we have -r, we'll auto-create the target directory. Without
497 # it, we'll only create a file.
498 self.to_stderr("cannot copy multiple files into a file without -r")
501 name, source = sources[0]
502 return self.copy_file(source, target)
504 if isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget)):
505 # We're copying to an existing directory -- make sure that we
506 # have target names for everything
507 for (name, source) in sources:
508 if name is None and isinstance(source, TahoeFileSource):
510 "error: you must specify a destination filename")
512 return self.copy_to_directory(sources, target)
514 self.to_stderr("unknown target")
517 def to_stderr(self, text):
518 print >>self.stderr, text
520 def get_target_info(self, destination_spec):
521 rootcap, path = get_alias(self.aliases, destination_spec, None)
522 if rootcap == DefaultAliasMarker:
523 # no alias, so this is a local file
524 pathname = abspath_expanduser_unicode(path.decode('utf-8'))
525 if not os.path.exists(pathname):
526 t = LocalMissingTarget(pathname)
527 elif os.path.isdir(pathname):
528 t = LocalDirectoryTarget(self.progress, pathname)
530 assert os.path.isfile(pathname), pathname
531 t = LocalFileTarget(pathname) # non-empty
533 # this is a tahoe object
534 url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
536 url += "/" + escape_path(path)
538 resp = do_http("GET", url + "?t=json")
539 if resp.status == 404:
541 t = TahoeMissingTarget(url)
542 elif resp.status == 200:
543 parsed = simplejson.loads(resp.read())
545 if nodetype == "dirnode":
546 t = TahoeDirectoryTarget(self.nodeurl, self.cache,
548 t.init_from_parsed(parsed)
550 writecap = to_str(d.get("rw_uri"))
551 readcap = to_str(d.get("ro_uri"))
552 mutable = d.get("mutable", False)
553 t = TahoeFileTarget(self.nodeurl, mutable,
554 writecap, readcap, url)
556 raise HTTPError("Error examining target %s"
557 % quote_output(destination_spec), resp)
560 def get_source_info(self, source_spec):
561 rootcap, path = get_alias(self.aliases, source_spec, None)
562 if rootcap == DefaultAliasMarker:
563 # no alias, so this is a local file
564 pathname = abspath_expanduser_unicode(path.decode('utf-8'))
565 name = os.path.basename(pathname)
566 if not os.path.exists(pathname):
567 raise MissingSourceError(source_spec)
568 if os.path.isdir(pathname):
569 t = LocalDirectorySource(self.progress, pathname)
571 assert os.path.isfile(pathname)
572 t = LocalFileSource(pathname) # non-empty
574 # this is a tahoe object
575 url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
578 url += "/" + escape_path(path)
579 last_slash = path.rfind("/")
582 name = path[last_slash+1:]
584 resp = do_http("GET", url + "?t=json")
585 if resp.status == 404:
586 raise MissingSourceError(source_spec)
587 elif resp.status != 200:
588 raise HTTPError("Error examining source %s" % quote_output(source_spec),
590 parsed = simplejson.loads(resp.read())
592 if nodetype == "dirnode":
593 t = TahoeDirectorySource(self.nodeurl, self.cache,
595 t.init_from_parsed(parsed)
597 writecap = to_str(d.get("rw_uri"))
598 readcap = to_str(d.get("ro_uri"))
599 mutable = d.get("mutable", False) # older nodes don't provide it
600 if source_spec.rfind('/') != -1:
601 name = source_spec[source_spec.rfind('/')+1:]
602 t = TahoeFileSource(self.nodeurl, mutable, writecap, readcap)
606 def dump_graph(self, s, indent=" "):
607 for name, child in s.children.items():
608 print "%s%s: %r" % (indent, quote_output(name), child)
609 if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)):
610 self.dump_graph(child, indent+" ")
612 def copy_to_directory(self, source_infos, target):
613 # step one: build a recursive graph of the source tree. This returns
614 # a dictionary, with child names as keys, and values that are either
615 # Directory or File instances (local or tahoe).
616 source_dirs = self.build_graphs(source_infos)
617 source_files = [source for source in source_infos
618 if isinstance(source[1], (LocalFileSource,
622 #for s in source_dirs:
625 # step two: create the top-level target directory object
626 if isinstance(target, LocalMissingTarget):
627 os.makedirs(target.pathname)
628 target = LocalDirectoryTarget(self.progress, target.pathname)
629 elif isinstance(target, TahoeMissingTarget):
630 writecap = mkdir(target.url)
631 target = TahoeDirectoryTarget(self.nodeurl, self.cache,
633 target.just_created(writecap)
634 assert isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget))
635 target.populate(False)
637 # step three: find a target for each source node, creating
638 # directories as necessary. 'targetmap' is a dictionary that uses
639 # target Directory instances as keys, and has values of
640 # (name->sourceobject) dicts for all the files that need to wind up
643 # sources are all LocalFile/LocalDirectory/TahoeFile/TahoeDirectory
644 # target is LocalDirectory/TahoeDirectory
646 self.progress("attaching sources to targets, "
647 "%d files / %d dirs in root" %
648 (len(source_files), len(source_dirs)))
651 self.files_to_copy = 0
653 for (name,s) in source_files:
654 self.attach_to_target(s, name, target)
655 self.files_to_copy += 1
657 for source in source_dirs:
658 self.assign_targets(source, target)
660 self.progress("targets assigned, %s dirs, %s files" %
661 (len(self.targetmap), self.files_to_copy))
663 self.progress("starting copy, %d files, %d directories" %
664 (self.files_to_copy, len(self.targetmap)))
665 self.files_copied = 0
666 self.targets_finished = 0
668 # step four: walk through the list of targets. For each one, copy all
669 # the files. If the target is a TahoeDirectory, upload and create
670 # read-caps, then do a set_children to the target directory.
672 for target in self.targetmap:
673 self.copy_files_to_target(self.targetmap[target], target)
674 self.targets_finished += 1
675 self.progress("%d/%d directories" %
676 (self.targets_finished, len(self.targetmap)))
678 return self.announce_success("files copied")
680 def attach_to_target(self, source, name, target):
681 if target not in self.targetmap:
682 self.targetmap[target] = {}
683 self.targetmap[target][name] = source
684 self.files_to_copy += 1
686 def assign_targets(self, source, target):
687 # copy everything in the source into the target
688 assert isinstance(source, (LocalDirectorySource, TahoeDirectorySource))
690 for name, child in source.children.items():
691 if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)):
692 # we will need a target directory for this one
693 subtarget = target.get_child_target(name)
694 self.assign_targets(child, subtarget)
696 assert isinstance(child, (LocalFileSource, TahoeFileSource))
697 self.attach_to_target(child, name, target)
701 def copy_files_to_target(self, targetmap, target):
702 for name, source in targetmap.items():
703 assert isinstance(source, (LocalFileSource, TahoeFileSource))
704 self.copy_file_into(source, name, target)
705 self.files_copied += 1
706 self.progress("%d/%d files, %d/%d directories" %
707 (self.files_copied, self.files_to_copy,
708 self.targets_finished, len(self.targetmap)))
709 target.set_children()
711 def need_to_copy_bytes(self, source, target):
712 if source.need_to_copy_bytes:
713 # mutable tahoe files, and local files
715 if isinstance(target, (LocalFileTarget, LocalDirectoryTarget)):
719 def announce_success(self, msg):
720 if self.verbosity >= 1:
721 print >>self.stdout, "Success: %s" % msg
724 def copy_file(self, source, target):
725 assert isinstance(source, (LocalFileSource, TahoeFileSource))
726 assert isinstance(target, (LocalFileTarget, TahoeFileTarget,
727 LocalMissingTarget, TahoeMissingTarget))
728 if self.need_to_copy_bytes(source, target):
729 # if the target is a local directory, this will just write the
730 # bytes to disk. If it is a tahoe directory, it will upload the
731 # data, and stash the new filecap for a later set_children call.
732 f = source.open(self.caps_only)
734 return self.announce_success("file copied")
735 # otherwise we're copying tahoe to tahoe, and using immutable files,
736 # so we can just make a link. TODO: this probably won't always work:
737 # need to enumerate the cases and analyze them.
738 target.put_uri(source.bestcap())
739 return self.announce_success("file linked")
741 def copy_file_into(self, source, name, target):
742 assert isinstance(source, (LocalFileSource, TahoeFileSource))
743 assert isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget))
744 if self.need_to_copy_bytes(source, target):
745 # if the target is a local directory, this will just write the
746 # bytes to disk. If it is a tahoe directory, it will upload the
747 # data, and stash the new filecap for a later set_children call.
748 f = source.open(self.caps_only)
749 target.put_file(name, f)
751 # otherwise we're copying tahoe to tahoe, and using immutable files,
752 # so we can just make a link
753 target.put_uri(name, source.bestcap())
756 def progress(self, message):
758 if self.progressfunc:
759 self.progressfunc(message)
761 def build_graphs(self, source_infos):
763 for name,source in source_infos:
764 if isinstance(source, (LocalDirectorySource, TahoeDirectorySource)):
765 source.populate(True)
766 graphs.append(source)
771 return Copier().do_copy(options)
773 # error cases that need improvement:
774 # local-file-in-the-way
776 # tahoe cp -r my:docs/proposed/denver.txt proposed/denver.txt
777 # handling of unknown nodes
779 # things that maybe should be errors but aren't
780 # local-dir-in-the-way
782 # tahoe cp -r my:docs/proposed/denver.txt denver.txt
783 # (creates denver.txt/denver.txt)
785 # error cases that look good:
786 # tahoe cp -r my:docs/missing missing
788 # tahoe cp -r my:docs/missing missing -> No JSON object could be decoded
789 # tahoe-file-in-the-way (when we want to make a directory)
790 # tahoe put README my:docs
791 # tahoe cp -r docs/proposed my:docs/proposed