5 from cStringIO import StringIO
6 from twisted.python.failure import Failure
7 from allmydata.scripts.common import get_alias, escape_path, \
8 DefaultAliasMarker, TahoeError
9 from allmydata.scripts.common_http import do_http, HTTPError
10 from allmydata import uri
11 from allmydata.util import fileutil
12 from allmydata.util.encodingutil import unicode_to_url, listdir_unicode, quote_output, to_str
13 from allmydata.util.assertutil import precondition
16 class MissingSourceError(TahoeError):
17 def __init__(self, name):
18 TahoeError.__init__(self, "No such file or directory %s" % quote_output(name))
22 resp = do_http("GET", url)
23 if resp.status == 200:
25 raise HTTPError("Error during GET", resp)
27 def GET_to_string(url):
32 resp = do_http("PUT", url, data)
33 if resp.status in (200, 201):
35 raise HTTPError("Error during PUT", resp)
38 resp = do_http("POST", url, data)
39 if resp.status in (200, 201):
41 raise HTTPError("Error during POST", resp)
44 url = targeturl + "?t=mkdir"
45 resp = do_http("POST", url)
46 if resp.status in (200, 201):
47 return resp.read().strip()
48 raise HTTPError("Error during mkdir", resp)
50 def make_tahoe_subdirectory(nodeurl, parent_writecap, name):
51 url = nodeurl + "/".join(["uri",
52 urllib.quote(parent_writecap),
55 resp = do_http("POST", url)
56 if resp.status in (200, 201):
57 return resp.read().strip()
58 raise HTTPError("Error during mkdir", resp)
61 class LocalFileSource:
62 def __init__(self, pathname):
63 precondition(isinstance(pathname, unicode), pathname)
64 self.pathname = pathname
66 def need_to_copy_bytes(self):
69 def open(self, caps_only):
70 return open(os.path.expanduser(self.pathname), "rb")
73 class LocalFileTarget:
74 def __init__(self, pathname):
75 precondition(isinstance(pathname, unicode), pathname)
76 self.pathname = pathname
78 def put_file(self, inf):
79 fileutil.put_file(self.pathname, inf)
82 class LocalMissingTarget:
83 def __init__(self, pathname):
84 precondition(isinstance(pathname, unicode), pathname)
85 self.pathname = pathname
87 def put_file(self, inf):
88 fileutil.put_file(self.pathname, inf)
91 class LocalDirectorySource:
92 def __init__(self, progressfunc, pathname):
93 precondition(isinstance(pathname, unicode), pathname)
95 self.progressfunc = progressfunc
96 self.pathname = pathname
99 def populate(self, recurse):
100 if self.children is not None:
103 children = listdir_unicode(self.pathname)
104 for i,n in enumerate(children):
105 self.progressfunc("examining %d of %d" % (i, len(children)))
106 pn = os.path.join(self.pathname, n)
107 if os.path.isdir(pn):
108 child = LocalDirectorySource(self.progressfunc, pn)
109 self.children[n] = child
112 elif os.path.isfile(pn):
113 self.children[n] = LocalFileSource(pn)
115 # Could be dangling symlink; probably not copy-able.
116 # TODO: output a warning
119 class LocalDirectoryTarget:
120 def __init__(self, progressfunc, pathname):
121 precondition(isinstance(pathname, unicode), pathname)
123 self.progressfunc = progressfunc
124 self.pathname = pathname
127 def populate(self, recurse):
128 if self.children is not None:
131 children = listdir_unicode(self.pathname)
132 for i,n in enumerate(children):
133 self.progressfunc("examining %d of %d" % (i, len(children)))
135 pn = os.path.join(self.pathname, n)
136 if os.path.isdir(pn):
137 child = LocalDirectoryTarget(self.progressfunc, pn)
138 self.children[n] = child
142 assert os.path.isfile(pn)
143 self.children[n] = LocalFileTarget(pn)
145 def get_child_target(self, name):
146 if self.children is None:
148 if name in self.children:
149 return self.children[name]
150 pathname = os.path.join(self.pathname, name)
151 os.makedirs(pathname)
152 return LocalDirectoryTarget(self.progressfunc, pathname)
154 def put_file(self, name, inf):
155 precondition(isinstance(name, unicode), name)
156 pathname = os.path.join(self.pathname, name)
157 fileutil.put_file(pathname, inf)
159 def set_children(self):
162 class TahoeFileSource:
163 def __init__(self, nodeurl, mutable, writecap, readcap):
164 self.nodeurl = nodeurl
165 self.mutable = mutable
166 self.writecap = writecap
167 self.readcap = readcap
169 def need_to_copy_bytes(self):
174 def open(self, caps_only):
176 return StringIO(self.readcap)
177 url = self.nodeurl + "uri/" + urllib.quote(self.readcap)
178 return GET_to_file(url)
181 return self.writecap or self.readcap
183 class TahoeFileTarget:
184 def __init__(self, nodeurl, mutable, writecap, readcap, url):
185 self.nodeurl = nodeurl
186 self.mutable = mutable
187 self.writecap = writecap
188 self.readcap = readcap
191 def put_file(self, inf):
192 # We want to replace this object in-place.
194 # our do_http() call currently requires a string or a filehandle with
196 if not hasattr(inf, "seek"):
199 # TODO: this always creates immutable files. We might want an option
200 # to always create mutable files, or to copy mutable files into new
201 # mutable files. ticket #835
203 class TahoeDirectorySource:
204 def __init__(self, nodeurl, cache, progressfunc):
205 self.nodeurl = nodeurl
207 self.progressfunc = progressfunc
209 def init_from_grid(self, writecap, readcap):
210 self.writecap = writecap
211 self.readcap = readcap
212 bestcap = writecap or readcap
213 url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
214 resp = do_http("GET", url + "?t=json")
215 if resp.status != 200:
216 raise HTTPError("Error examining source directory", resp)
217 parsed = simplejson.loads(resp.read())
219 assert nodetype == "dirnode"
220 self.mutable = d.get("mutable", False) # older nodes don't provide it
221 self.children_d = dict( [(unicode(name),value)
223 in d["children"].iteritems()] )
226 def init_from_parsed(self, parsed):
228 self.writecap = to_str(d.get("rw_uri"))
229 self.readcap = to_str(d.get("ro_uri"))
230 self.mutable = d.get("mutable", False) # older nodes don't provide it
231 self.children_d = dict( [(unicode(name),value)
233 in d["children"].iteritems()] )
236 def populate(self, recurse):
237 if self.children is not None:
240 for i,(name, data) in enumerate(self.children_d.items()):
241 self.progressfunc("examining %d of %d" % (i, len(self.children_d)))
242 if data[0] == "filenode":
243 mutable = data[1].get("mutable", False)
244 writecap = to_str(data[1].get("rw_uri"))
245 readcap = to_str(data[1].get("ro_uri"))
246 self.children[name] = TahoeFileSource(self.nodeurl, mutable,
248 elif data[0] == "dirnode":
249 writecap = to_str(data[1].get("rw_uri"))
250 readcap = to_str(data[1].get("ro_uri"))
251 if writecap and writecap in self.cache:
252 child = self.cache[writecap]
253 elif readcap and readcap in self.cache:
254 child = self.cache[readcap]
256 child = TahoeDirectorySource(self.nodeurl, self.cache,
258 child.init_from_grid(writecap, readcap)
260 self.cache[writecap] = child
262 self.cache[readcap] = child
265 self.children[name] = child
267 # TODO: there should be an option to skip unknown nodes.
268 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
269 "You probably need to use a later version of "
270 "Tahoe-LAFS to copy this directory.")
272 class TahoeMissingTarget:
273 def __init__(self, url):
276 def put_file(self, inf):
277 # We want to replace this object in-place.
278 if not hasattr(inf, "seek"):
281 # TODO: this always creates immutable files. We might want an option
282 # to always create mutable files, or to copy mutable files into new
285 def put_uri(self, filecap):
286 # I'm not sure this will always work
287 return PUT(self.url + "?t=uri", filecap)
289 class TahoeDirectoryTarget:
290 def __init__(self, nodeurl, cache, progressfunc):
291 self.nodeurl = nodeurl
293 self.progressfunc = progressfunc
294 self.new_children = {}
296 def init_from_parsed(self, parsed):
298 self.writecap = to_str(d.get("rw_uri"))
299 self.readcap = to_str(d.get("ro_uri"))
300 self.mutable = d.get("mutable", False) # older nodes don't provide it
301 self.children_d = dict( [(unicode(name),value)
303 in d["children"].iteritems()] )
306 def init_from_grid(self, writecap, readcap):
307 self.writecap = writecap
308 self.readcap = readcap
309 bestcap = writecap or readcap
310 url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
311 resp = do_http("GET", url + "?t=json")
312 if resp.status != 200:
313 raise HTTPError("Error examining target directory", resp)
314 parsed = simplejson.loads(resp.read())
316 assert nodetype == "dirnode"
317 self.mutable = d.get("mutable", False) # older nodes don't provide it
318 self.children_d = dict( [(unicode(name),value)
320 in d["children"].iteritems()] )
323 def just_created(self, writecap):
324 self.writecap = writecap
325 self.readcap = uri.from_string(writecap).get_readonly().to_string()
330 def populate(self, recurse):
331 if self.children is not None:
334 for i,(name, data) in enumerate(self.children_d.items()):
335 self.progressfunc("examining %d of %d" % (i, len(self.children_d)))
336 if data[0] == "filenode":
337 mutable = data[1].get("mutable", False)
338 writecap = to_str(data[1].get("rw_uri"))
339 readcap = to_str(data[1].get("ro_uri"))
342 url = self.nodeurl + "/".join(["uri",
343 urllib.quote(self.writecap),
344 urllib.quote(unicode_to_url(name))])
345 self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
346 writecap, readcap, url)
347 elif data[0] == "dirnode":
348 writecap = to_str(data[1].get("rw_uri"))
349 readcap = to_str(data[1].get("ro_uri"))
350 if writecap and writecap in self.cache:
351 child = self.cache[writecap]
352 elif readcap and readcap in self.cache:
353 child = self.cache[readcap]
355 child = TahoeDirectoryTarget(self.nodeurl, self.cache,
357 child.init_from_grid(writecap, readcap)
359 self.cache[writecap] = child
361 self.cache[readcap] = child
364 self.children[name] = child
366 # TODO: there should be an option to skip unknown nodes.
367 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
368 "You probably need to use a later version of "
369 "Tahoe-LAFS to copy this directory.")
371 def get_child_target(self, name):
372 # return a new target for a named subdirectory of this dir
373 if self.children is None:
375 if name in self.children:
376 return self.children[name]
377 writecap = make_tahoe_subdirectory(self.nodeurl, self.writecap, name)
378 child = TahoeDirectoryTarget(self.nodeurl, self.cache,
380 child.just_created(writecap)
381 self.children[name] = child
384 def put_file(self, name, inf):
385 url = self.nodeurl + "uri"
386 if not hasattr(inf, "seek"):
388 filecap = PUT(url, inf)
389 # TODO: this always creates immutable files. We might want an option
390 # to always create mutable files, or to copy mutable files into new
392 self.new_children[name] = filecap
394 def put_uri(self, name, filecap):
395 self.new_children[name] = filecap
397 def set_children(self):
398 if not self.new_children:
400 url = (self.nodeurl + "uri/" + urllib.quote(self.writecap)
403 for (name, filecap) in self.new_children.items():
404 # it just so happens that ?t=set_children will accept both file
405 # read-caps and write-caps as ['rw_uri'], and will handle either
406 # correctly. So don't bother trying to figure out whether the one
407 # we have is read-only or read-write.
408 # TODO: think about how this affects forward-compatibility for
410 set_data[name] = ["filenode", {"rw_uri": filecap}]
411 body = simplejson.dumps(set_data)
416 def do_copy(self, options, progressfunc=None):
419 elif options['verbose']:
424 nodeurl = options['node-url']
425 if nodeurl[-1] != "/":
427 self.nodeurl = nodeurl
428 self.progressfunc = progressfunc
429 self.options = options
430 self.aliases = options.aliases
431 self.verbosity = verbosity
432 self.stdout = options.stdout
433 self.stderr = options.stderr
434 if verbosity >= 2 and not self.progressfunc:
435 def progress(message):
436 print >>self.stderr, message
437 self.progressfunc = progress
438 self.caps_only = options["caps-only"]
441 status = self.try_copy()
443 except TahoeError, te:
445 Failure().printTraceback(self.stderr)
447 te.display(self.stderr)
451 source_specs = self.options.sources
452 destination_spec = self.options.destination
453 recursive = self.options["recursive"]
455 target = self.get_target_info(destination_spec)
457 sources = [] # list of (name, source object)
458 for ss in source_specs:
459 name, source = self.get_source_info(ss)
460 sources.append( (name, source) )
462 have_source_dirs = bool([s for (name,s) in sources
463 if isinstance(s, (LocalDirectorySource,
464 TahoeDirectorySource))])
466 if have_source_dirs and not recursive:
467 self.to_stderr("cannot copy directories without --recursive")
470 if isinstance(target, (LocalFileTarget, TahoeFileTarget)):
471 # cp STUFF foo.txt, where foo.txt already exists. This limits the
472 # possibilities considerably.
474 self.to_stderr("target %s is not a directory" % quote_output(destination_spec))
477 self.to_stderr("cannot copy directory into a file")
479 name, source = sources[0]
480 return self.copy_file(source, target)
482 if isinstance(target, (LocalMissingTarget, TahoeMissingTarget)):
484 return self.copy_to_directory(sources, target)
486 # if we have -r, we'll auto-create the target directory. Without
487 # it, we'll only create a file.
488 self.to_stderr("cannot copy multiple files into a file without -r")
491 name, source = sources[0]
492 return self.copy_file(source, target)
494 if isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget)):
495 # We're copying to an existing directory -- make sure that we
496 # have target names for everything
497 for (name, source) in sources:
498 if name is None and isinstance(source, TahoeFileSource):
500 "error: you must specify a destination filename")
502 return self.copy_to_directory(sources, target)
504 self.to_stderr("unknown target")
507 def to_stderr(self, text):
508 print >>self.stderr, text
510 def get_target_info(self, destination_spec):
511 rootcap, path = get_alias(self.aliases, destination_spec, None)
512 if rootcap == DefaultAliasMarker:
513 # no alias, so this is a local file
514 pathname = os.path.abspath(os.path.expanduser(path.decode('utf-8')))
515 if not os.path.exists(pathname):
516 t = LocalMissingTarget(pathname)
517 elif os.path.isdir(pathname):
518 t = LocalDirectoryTarget(self.progress, pathname)
520 assert os.path.isfile(pathname), pathname
521 t = LocalFileTarget(pathname) # non-empty
523 # this is a tahoe object
524 url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
526 url += "/" + escape_path(path)
528 resp = do_http("GET", url + "?t=json")
529 if resp.status == 404:
531 t = TahoeMissingTarget(url)
532 elif resp.status == 200:
533 parsed = simplejson.loads(resp.read())
535 if nodetype == "dirnode":
536 t = TahoeDirectoryTarget(self.nodeurl, self.cache,
538 t.init_from_parsed(parsed)
540 writecap = to_str(d.get("rw_uri"))
541 readcap = to_str(d.get("ro_uri"))
542 mutable = d.get("mutable", False)
543 t = TahoeFileTarget(self.nodeurl, mutable,
544 writecap, readcap, url)
546 raise HTTPError("Error examining target %s"
547 % quote_output(destination_spec), resp)
550 def get_source_info(self, source_spec):
551 rootcap, path = get_alias(self.aliases, source_spec, None)
552 if rootcap == DefaultAliasMarker:
553 # no alias, so this is a local file
554 pathname = os.path.abspath(os.path.expanduser(path.decode('utf-8')))
555 name = os.path.basename(pathname)
556 if not os.path.exists(pathname):
557 raise MissingSourceError(source_spec)
558 if os.path.isdir(pathname):
559 t = LocalDirectorySource(self.progress, pathname)
561 assert os.path.isfile(pathname)
562 t = LocalFileSource(pathname) # non-empty
564 # this is a tahoe object
565 url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
568 url += "/" + escape_path(path)
569 last_slash = path.rfind("/")
572 name = path[last_slash+1:]
574 resp = do_http("GET", url + "?t=json")
575 if resp.status == 404:
576 raise MissingSourceError(source_spec)
577 elif resp.status != 200:
578 raise HTTPError("Error examining source %s" % quote_output(source_spec),
580 parsed = simplejson.loads(resp.read())
582 if nodetype == "dirnode":
583 t = TahoeDirectorySource(self.nodeurl, self.cache,
585 t.init_from_parsed(parsed)
587 writecap = to_str(d.get("rw_uri"))
588 readcap = to_str(d.get("ro_uri"))
589 mutable = d.get("mutable", False) # older nodes don't provide it
590 if source_spec.rfind('/') != -1:
591 name = source_spec[source_spec.rfind('/')+1:]
592 t = TahoeFileSource(self.nodeurl, mutable, writecap, readcap)
596 def dump_graph(self, s, indent=" "):
597 for name, child in s.children.items():
598 print "%s%s: %r" % (indent, quote_output(name), child)
599 if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)):
600 self.dump_graph(child, indent+" ")
602 def copy_to_directory(self, source_infos, target):
603 # step one: build a recursive graph of the source tree. This returns
604 # a dictionary, with child names as keys, and values that are either
605 # Directory or File instances (local or tahoe).
606 source_dirs = self.build_graphs(source_infos)
607 source_files = [source for source in source_infos
608 if isinstance(source[1], (LocalFileSource,
612 #for s in source_dirs:
615 # step two: create the top-level target directory object
616 if isinstance(target, LocalMissingTarget):
617 os.makedirs(target.pathname)
618 target = LocalDirectoryTarget(self.progress, target.pathname)
619 elif isinstance(target, TahoeMissingTarget):
620 writecap = mkdir(target.url)
621 target = TahoeDirectoryTarget(self.nodeurl, self.cache,
623 target.just_created(writecap)
624 assert isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget))
625 target.populate(False)
627 # step three: find a target for each source node, creating
628 # directories as necessary. 'targetmap' is a dictionary that uses
629 # target Directory instances as keys, and has values of
630 # (name->sourceobject) dicts for all the files that need to wind up
633 # sources are all LocalFile/LocalDirectory/TahoeFile/TahoeDirectory
634 # target is LocalDirectory/TahoeDirectory
636 self.progress("attaching sources to targets, "
637 "%d files / %d dirs in root" %
638 (len(source_files), len(source_dirs)))
641 self.files_to_copy = 0
643 for (name,s) in source_files:
644 self.attach_to_target(s, name, target)
645 self.files_to_copy += 1
647 for source in source_dirs:
648 self.assign_targets(source, target)
650 self.progress("targets assigned, %s dirs, %s files" %
651 (len(self.targetmap), self.files_to_copy))
653 self.progress("starting copy, %d files, %d directories" %
654 (self.files_to_copy, len(self.targetmap)))
655 self.files_copied = 0
656 self.targets_finished = 0
658 # step four: walk through the list of targets. For each one, copy all
659 # the files. If the target is a TahoeDirectory, upload and create
660 # read-caps, then do a set_children to the target directory.
662 for target in self.targetmap:
663 self.copy_files_to_target(self.targetmap[target], target)
664 self.targets_finished += 1
665 self.progress("%d/%d directories" %
666 (self.targets_finished, len(self.targetmap)))
668 return self.announce_success("files copied")
670 def attach_to_target(self, source, name, target):
671 if target not in self.targetmap:
672 self.targetmap[target] = {}
673 self.targetmap[target][name] = source
674 self.files_to_copy += 1
676 def assign_targets(self, source, target):
677 # copy everything in the source into the target
678 assert isinstance(source, (LocalDirectorySource, TahoeDirectorySource))
680 for name, child in source.children.items():
681 if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)):
682 # we will need a target directory for this one
683 subtarget = target.get_child_target(name)
684 self.assign_targets(child, subtarget)
686 assert isinstance(child, (LocalFileSource, TahoeFileSource))
687 self.attach_to_target(child, name, target)
691 def copy_files_to_target(self, targetmap, target):
692 for name, source in targetmap.items():
693 assert isinstance(source, (LocalFileSource, TahoeFileSource))
694 self.copy_file_into(source, name, target)
695 self.files_copied += 1
696 self.progress("%d/%d files, %d/%d directories" %
697 (self.files_copied, self.files_to_copy,
698 self.targets_finished, len(self.targetmap)))
699 target.set_children()
701 def need_to_copy_bytes(self, source, target):
702 if source.need_to_copy_bytes:
703 # mutable tahoe files, and local files
705 if isinstance(target, (LocalFileTarget, LocalDirectoryTarget)):
709 def announce_success(self, msg):
710 if self.verbosity >= 1:
711 print >>self.stdout, "Success: %s" % msg
714 def copy_file(self, source, target):
715 assert isinstance(source, (LocalFileSource, TahoeFileSource))
716 assert isinstance(target, (LocalFileTarget, TahoeFileTarget,
717 LocalMissingTarget, TahoeMissingTarget))
718 if self.need_to_copy_bytes(source, target):
719 # if the target is a local directory, this will just write the
720 # bytes to disk. If it is a tahoe directory, it will upload the
721 # data, and stash the new filecap for a later set_children call.
722 f = source.open(self.caps_only)
724 return self.announce_success("file copied")
725 # otherwise we're copying tahoe to tahoe, and using immutable files,
726 # so we can just make a link. TODO: this probably won't always work:
727 # need to enumerate the cases and analyze them.
728 target.put_uri(source.bestcap())
729 return self.announce_success("file linked")
731 def copy_file_into(self, source, name, target):
732 assert isinstance(source, (LocalFileSource, TahoeFileSource))
733 assert isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget))
734 if self.need_to_copy_bytes(source, target):
735 # if the target is a local directory, this will just write the
736 # bytes to disk. If it is a tahoe directory, it will upload the
737 # data, and stash the new filecap for a later set_children call.
738 f = source.open(self.caps_only)
739 target.put_file(name, f)
741 # otherwise we're copying tahoe to tahoe, and using immutable files,
742 # so we can just make a link
743 target.put_uri(name, source.bestcap())
746 def progress(self, message):
748 if self.progressfunc:
749 self.progressfunc(message)
751 def build_graphs(self, source_infos):
753 for name,source in source_infos:
754 if isinstance(source, (LocalDirectorySource, TahoeDirectorySource)):
755 source.populate(True)
756 graphs.append(source)
761 return Copier().do_copy(options)
763 # error cases that need improvement:
764 # local-file-in-the-way
766 # tahoe cp -r my:docs/proposed/denver.txt proposed/denver.txt
767 # handling of unknown nodes
769 # things that maybe should be errors but aren't
770 # local-dir-in-the-way
772 # tahoe cp -r my:docs/proposed/denver.txt denver.txt
773 # (creates denver.txt/denver.txt)
775 # error cases that look good:
776 # tahoe cp -r my:docs/missing missing
778 # tahoe cp -r my:docs/missing missing -> No JSON object could be decoded
779 # tahoe-file-in-the-way (when we want to make a directory)
780 # tahoe put README my:docs
781 # tahoe cp -r docs/proposed my:docs/proposed