5 from allmydata.scripts.common import get_alias, escape_path, DefaultAliasMarker
6 from allmydata.scripts.common_http import do_http
7 from allmydata import uri
14 class WriteError(Exception):
16 class ReadError(Exception):
18 class MissingSourceError(Exception):
22 resp = do_http("GET", url)
23 if resp.status == 200:
25 raise ReadError("Error during GET: %s %s %s" % (resp.status,
28 def GET_to_string(url):
33 resp = do_http("PUT", url, data)
34 if resp.status in (200, 201):
36 raise WriteError("Error during PUT: %s %s %s" % (resp.status, resp.reason,
40 resp = do_http("POST", url, data)
41 if resp.status in (200, 201):
43 raise WriteError("Error during POST: %s %s %s" % (resp.status, resp.reason,
47 url = targeturl + "?t=mkdir"
48 resp = do_http("POST", url)
49 if resp.status in (200, 201):
50 return resp.read().strip()
51 raise WriteError("Error during mkdir: %s %s %s" % (resp.status, resp.reason,
54 def make_tahoe_subdirectory(nodeurl, parent_writecap, name):
55 url = nodeurl + "/".join(["uri",
56 urllib.quote(parent_writecap),
59 resp = do_http("POST", url)
60 if resp.status in (200, 201):
61 return resp.read().strip()
62 raise WriteError("Error during mkdir: %s %s %s" % (resp.status, resp.reason,
66 class LocalFileSource:
67 def __init__(self, pathname):
68 self.pathname = pathname
70 def need_to_copy_bytes(self):
74 return open(self.pathname, "rb")
76 class LocalFileTarget:
77 def __init__(self, pathname):
78 self.pathname = pathname
79 def put_file(self, inf):
80 outf = open(self.pathname, "wb")
82 data = inf.read(32768)
88 class LocalMissingTarget:
89 def __init__(self, pathname):
90 self.pathname = pathname
92 def put_file(self, inf):
93 outf = open(self.pathname, "wb")
95 data = inf.read(32768)
101 class LocalDirectorySource:
102 def __init__(self, progressfunc, pathname):
103 self.progressfunc = progressfunc
104 self.pathname = pathname
107 def populate(self, recurse):
108 if self.children is not None:
111 children = os.listdir(self.pathname)
112 for i,n in enumerate(children):
113 self.progressfunc("examining %d of %d" % (i, len(children)))
114 pn = os.path.join(self.pathname, n)
115 if os.path.isdir(pn):
116 child = LocalDirectorySource(self.progressfunc, pn)
117 self.children[n] = child
120 elif os.path.isfile(pn):
121 self.children[n] = LocalFileSource(pn)
123 # Could be dangling symlink; probably not copy-able.
126 class LocalDirectoryTarget:
127 def __init__(self, progressfunc, pathname):
128 self.progressfunc = progressfunc
129 self.pathname = pathname
132 def populate(self, recurse):
133 if self.children is not None:
136 children = os.listdir(self.pathname)
137 for i,n in enumerate(children):
138 self.progressfunc("examining %d of %d" % (i, len(children)))
139 pn = os.path.join(self.pathname, n)
140 if os.path.isdir(pn):
141 child = LocalDirectoryTarget(self.progressfunc, pn)
142 self.children[n] = child
146 assert os.path.isfile(pn)
147 self.children[n] = LocalFileTarget(pn)
149 def get_child_target(self, name):
150 if self.children is None:
152 if name in self.children:
153 return self.children[name]
154 pathname = os.path.join(self.pathname, name)
155 os.makedirs(pathname)
156 return LocalDirectoryTarget(self.progressfunc, pathname)
158 def put_file(self, name, inf):
159 pathname = os.path.join(self.pathname, name)
160 outf = open(pathname, "wb")
162 data = inf.read(32768)
168 def set_children(self):
171 class TahoeFileSource:
172 def __init__(self, nodeurl, mutable, writecap, readcap):
173 self.nodeurl = nodeurl
174 self.mutable = mutable
175 self.writecap = writecap
176 self.readcap = readcap
178 def need_to_copy_bytes(self):
184 url = self.nodeurl + "uri/" + urllib.quote(self.readcap)
185 return GET_to_file(url)
188 return self.writecap or self.readcap
190 class TahoeFileTarget:
191 def __init__(self, nodeurl, mutable, writecap, readcap, url):
192 self.nodeurl = nodeurl
193 self.mutable = mutable
194 self.writecap = writecap
195 self.readcap = readcap
198 def put_file(self, inf):
199 # We want to replace this object in-place.
201 # our do_http() call currently requires a string or a filehandle with
203 if not hasattr(inf, "seek"):
206 # TODO: this always creates immutable files. We might want an option
207 # to always create mutable files, or to copy mutable files into new
210 class TahoeDirectorySource:
211 def __init__(self, nodeurl, cache, progressfunc):
212 self.nodeurl = nodeurl
214 self.progressfunc = progressfunc
216 def init_from_grid(self, writecap, readcap):
217 self.writecap = writecap
218 self.readcap = readcap
219 bestcap = writecap or readcap
220 url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
221 resp = do_http("GET", url + "?t=json")
222 assert resp.status == 200
223 parsed = simplejson.loads(resp.read())
225 assert nodetype == "dirnode"
226 self.mutable = d.get("mutable", False) # older nodes don't provide it
227 self.children_d = dict( [(unicode(name),value)
229 in d["children"].iteritems()] )
232 def init_from_parsed(self, parsed):
234 self.writecap = ascii_or_none(d.get("rw_uri"))
235 self.readcap = ascii_or_none(d.get("ro_uri"))
236 self.mutable = d.get("mutable", False) # older nodes don't provide it
237 self.children_d = dict( [(unicode(name),value)
239 in d["children"].iteritems()] )
242 def populate(self, recurse):
243 if self.children is not None:
246 for i,(name, data) in enumerate(self.children_d.items()):
247 self.progressfunc("examining %d of %d" % (i, len(self.children_d)))
248 if data[0] == "filenode":
249 mutable = data[1].get("mutable", False)
250 writecap = ascii_or_none(data[1].get("rw_uri"))
251 readcap = ascii_or_none(data[1].get("ro_uri"))
252 self.children[name] = TahoeFileSource(self.nodeurl, mutable,
255 assert data[0] == "dirnode"
256 writecap = ascii_or_none(data[1].get("rw_uri"))
257 readcap = ascii_or_none(data[1].get("ro_uri"))
258 if writecap and writecap in self.cache:
259 child = self.cache[writecap]
260 elif readcap and readcap in self.cache:
261 child = self.cache[readcap]
263 child = TahoeDirectorySource(self.nodeurl, self.cache,
265 child.init_from_grid(writecap, readcap)
267 self.cache[writecap] = child
269 self.cache[readcap] = child
272 self.children[name] = child
274 class TahoeMissingTarget:
275 def __init__(self, url):
278 def put_file(self, inf):
279 # We want to replace this object in-place.
280 if not hasattr(inf, "seek"):
283 # TODO: this always creates immutable files. We might want an option
284 # to always create mutable files, or to copy mutable files into new
287 def put_uri(self, filecap):
288 # I'm not sure this will always work
289 return PUT(self.url + "?t=uri", filecap)
291 class TahoeDirectoryTarget:
292 def __init__(self, nodeurl, cache, progressfunc):
293 self.nodeurl = nodeurl
295 self.progressfunc = progressfunc
296 self.new_children = {}
298 def init_from_parsed(self, parsed):
300 self.writecap = ascii_or_none(d.get("rw_uri"))
301 self.readcap = ascii_or_none(d.get("ro_uri"))
302 self.mutable = d.get("mutable", False) # older nodes don't provide it
303 self.children_d = dict( [(unicode(name),value)
305 in d["children"].iteritems()] )
308 def init_from_grid(self, writecap, readcap):
309 self.writecap = writecap
310 self.readcap = readcap
311 bestcap = writecap or readcap
312 url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
313 resp = do_http("GET", url + "?t=json")
314 assert resp.status == 200
315 parsed = simplejson.loads(resp.read())
317 assert nodetype == "dirnode"
318 self.mutable = d.get("mutable", False) # older nodes don't provide it
319 self.children_d = dict( [(unicode(name),value)
321 in d["children"].iteritems()] )
324 def just_created(self, writecap):
325 self.writecap = writecap
326 self.readcap = uri.from_string(writecap).get_readonly().to_string()
331 def populate(self, recurse):
332 if self.children is not None:
335 for i,(name, data) in enumerate(self.children_d.items()):
336 self.progressfunc("examining %d of %d" % (i, len(self.children_d)))
337 if data[0] == "filenode":
338 mutable = data[1].get("mutable", False)
339 writecap = ascii_or_none(data[1].get("rw_uri"))
340 readcap = ascii_or_none(data[1].get("ro_uri"))
343 url = self.nodeurl + "/".join(["uri",
344 urllib.quote(self.writecap),
345 urllib.quote(name.encode('utf-8'))])
346 self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
347 writecap, readcap, url)
349 assert data[0] == "dirnode"
350 writecap = ascii_or_none(data[1].get("rw_uri"))
351 readcap = ascii_or_none(data[1].get("ro_uri"))
352 if writecap and writecap in self.cache:
353 child = self.cache[writecap]
354 elif readcap and readcap in self.cache:
355 child = self.cache[readcap]
357 child = TahoeDirectoryTarget(self.nodeurl, self.cache,
359 child.init_from_grid(writecap, readcap)
361 self.cache[writecap] = child
363 self.cache[readcap] = child
366 self.children[name] = child
368 def get_child_target(self, name):
369 # return a new target for a named subdirectory of this dir
370 if self.children is None:
372 if name in self.children:
373 return self.children[name]
374 writecap = make_tahoe_subdirectory(self.nodeurl, self.writecap, name)
375 child = TahoeDirectoryTarget(self.nodeurl, self.cache,
377 child.just_created(writecap)
378 self.children[name] = child
381 def put_file(self, name, inf):
382 url = self.nodeurl + "uri"
383 if not hasattr(inf, "seek"):
385 filecap = PUT(url, inf)
386 # TODO: this always creates immutable files. We might want an option
387 # to always create mutable files, or to copy mutable files into new
389 self.new_children[name] = filecap
391 def put_uri(self, name, filecap):
392 self.new_children[name] = filecap
394 def set_children(self):
395 if not self.new_children:
397 url = (self.nodeurl + "uri/" + urllib.quote(self.writecap)
400 for (name, filecap) in self.new_children.items():
401 # it just so happens that ?t=set_children will accept both file
402 # read-caps and write-caps as ['rw_uri'], and will handle eithe
403 # correctly. So don't bother trying to figure out whether the one
404 # we have is read-only or read-write.
405 set_data[name] = ["filenode", {"rw_uri": filecap}]
406 body = simplejson.dumps(set_data)
411 def do_copy(self, options, progressfunc=None):
414 elif options['verbose']:
419 nodeurl = options['node-url']
420 if nodeurl[-1] != "/":
422 self.nodeurl = nodeurl
423 self.progressfunc = progressfunc
424 self.options = options
425 self.aliases = options.aliases
426 self.verbosity = verbosity
427 self.stdout = options.stdout
428 self.stderr = options.stderr
429 if verbosity >= 2 and not self.progressfunc:
430 def progress(message):
431 print >>self.stderr, message
432 self.progressfunc = progress
434 source_specs = options.sources
435 destination_spec = options.destination
436 recursive = self.options["recursive"]
438 target = self.get_target_info(destination_spec)
441 sources = [] # list of (name, source object)
442 for ss in source_specs:
443 name, source = self.get_source_info(ss)
444 sources.append( (name, source) )
445 except MissingSourceError, e:
446 self.to_stderr("No such file or directory %s" % e.args[0])
449 have_source_dirs = bool([s for (name,s) in sources
450 if isinstance(s, (LocalDirectorySource,
451 TahoeDirectorySource))])
453 if have_source_dirs and not recursive:
454 self.to_stderr("cannot copy directories without --recursive")
457 if isinstance(target, (LocalFileTarget, TahoeFileTarget)):
458 # cp STUFF foo.txt, where foo.txt already exists. This limits the
459 # possibilities considerably.
461 self.to_stderr("target '%s' is not a directory" % destination_spec)
464 self.to_stderr("cannot copy directory into a file")
466 name, source = sources[0]
467 return self.copy_file(source, target)
469 if isinstance(target, (LocalMissingTarget, TahoeMissingTarget)):
471 return self.copy_to_directory(sources, target)
473 # if we have -r, we'll auto-create the target directory. Without
474 # it, we'll only create a file.
475 self.to_stderr("cannot copy multiple files into a file without -r")
478 name, source = sources[0]
479 return self.copy_file(source, target)
481 if isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget)):
482 return self.copy_to_directory(sources, target)
484 self.to_stderr("unknown target")
487 def to_stderr(self, text):
488 print >>self.stderr, text
490 def get_target_info(self, destination_spec):
491 rootcap, path = get_alias(self.aliases, destination_spec, None)
492 if rootcap == DefaultAliasMarker:
493 # no alias, so this is a local file
494 pathname = os.path.abspath(os.path.expanduser(path))
495 if not os.path.exists(pathname):
496 t = LocalMissingTarget(pathname)
497 elif os.path.isdir(pathname):
498 t = LocalDirectoryTarget(self.progress, pathname)
500 assert os.path.isfile(pathname), pathname
501 t = LocalFileTarget(pathname) # non-empty
503 # this is a tahoe object
504 url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
506 url += "/" + escape_path(path)
507 last_slash = path.rfind("/")
509 resp = do_http("GET", url + "?t=json")
510 if resp.status == 404:
512 t = TahoeMissingTarget(url)
514 parsed = simplejson.loads(resp.read())
516 if nodetype == "dirnode":
517 t = TahoeDirectoryTarget(self.nodeurl, self.cache,
519 t.init_from_parsed(parsed)
521 writecap = ascii_or_none(d.get("rw_uri"))
522 readcap = ascii_or_none(d.get("ro_uri"))
523 mutable = d.get("mutable", False)
524 t = TahoeFileTarget(self.nodeurl, mutable,
525 writecap, readcap, url)
528 def get_source_info(self, source_spec):
529 rootcap, path = get_alias(self.aliases, source_spec, None)
530 if rootcap == DefaultAliasMarker:
531 # no alias, so this is a local file
532 pathname = os.path.abspath(os.path.expanduser(path))
533 name = os.path.basename(pathname)
534 if not os.path.exists(pathname):
535 raise MissingSourceError(source_spec)
536 if os.path.isdir(pathname):
537 t = LocalDirectorySource(self.progress, pathname)
539 assert os.path.isfile(pathname)
540 t = LocalFileSource(pathname) # non-empty
542 # this is a tahoe object
543 url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
546 url += "/" + escape_path(path)
547 last_slash = path.rfind("/")
550 name = path[last_slash+1:]
552 resp = do_http("GET", url + "?t=json")
553 if resp.status == 404:
554 raise MissingSourceError(source_spec)
555 parsed = simplejson.loads(resp.read())
557 if nodetype == "dirnode":
558 t = TahoeDirectorySource(self.nodeurl, self.cache,
560 t.init_from_parsed(parsed)
562 writecap = ascii_or_none(d.get("rw_uri"))
563 readcap = ascii_or_none(d.get("ro_uri"))
564 mutable = d.get("mutable", False) # older nodes don't provide it
565 t = TahoeFileSource(self.nodeurl, mutable, writecap, readcap)
569 def dump_graph(self, s, indent=" "):
570 for name, child in s.children.items():
571 print indent + name + ":" + str(child)
572 if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)):
573 self.dump_graph(child, indent+" ")
575 def copy_to_directory(self, source_infos, target):
576 # step one: build a recursive graph of the source tree. This returns
577 # a dictionary, with child names as keys, and values that are either
578 # Directory or File instances (local or tahoe).
579 source_dirs = self.build_graphs(source_infos)
580 source_files = [source for source in source_infos
581 if isinstance(source[1], (LocalFileSource,
585 #for s in source_dirs:
588 # step two: create the top-level target directory object
589 if isinstance(target, LocalMissingTarget):
590 os.makedirs(target.pathname)
591 target = LocalDirectoryTarget(self.progress, target.pathname)
592 elif isinstance(target, TahoeMissingTarget):
593 writecap = mkdir(target.url)
594 target = TahoeDirectoryTarget(self.nodeurl, self.cache,
596 target.just_created(writecap)
597 assert isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget))
598 target.populate(False)
600 # step three: find a target for each source node, creating
601 # directories as necessary. 'targetmap' is a dictionary that uses
602 # target Directory instances as keys, and has values of
603 # (name->sourceobject) dicts for all the files that need to wind up
606 # sources are all LocalFile/LocalDirectory/TahoeFile/TahoeDirectory
607 # target is LocalDirectory/TahoeDirectory
609 self.progress("attaching sources to targets, "
610 "%d files / %d dirs in root" %
611 (len(source_files), len(source_dirs)))
614 self.files_to_copy = 0
616 for (name,s) in source_files:
617 self.attach_to_target(s, name, target)
618 self.files_to_copy += 1
620 for source in source_dirs:
621 self.assign_targets(source, target)
623 self.progress("targets assigned, %s dirs, %s files" %
624 (len(self.targetmap), self.files_to_copy))
626 self.progress("starting copy, %d files, %d directories" %
627 (self.files_to_copy, len(self.targetmap)))
628 self.files_copied = 0
629 self.targets_finished = 0
631 # step four: walk through the list of targets. For each one, copy all
632 # the files. If the target is a TahoeDirectory, upload and create
633 # read-caps, then do a set_children to the target directory.
635 for target in self.targetmap:
636 self.copy_files_to_target(self.targetmap[target], target)
637 self.targets_finished += 1
638 self.progress("%d/%d directories" %
639 (self.targets_finished, len(self.targetmap)))
641 return self.announce_success("files copied")
643 def attach_to_target(self, source, name, target):
644 if target not in self.targetmap:
645 self.targetmap[target] = {}
646 self.targetmap[target][name] = source
647 self.files_to_copy += 1
649 def assign_targets(self, source, target):
650 # copy everything in the source into the target
651 assert isinstance(source, (LocalDirectorySource, TahoeDirectorySource))
653 for name, child in source.children.items():
654 if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)):
655 # we will need a target directory for this one
656 subtarget = target.get_child_target(name)
657 self.assign_targets(child, subtarget)
659 assert isinstance(child, (LocalFileSource, TahoeFileSource))
660 self.attach_to_target(child, name, target)
664 def copy_files_to_target(self, targetmap, target):
665 for name, source in targetmap.items():
666 assert isinstance(source, (LocalFileSource, TahoeFileSource))
667 self.copy_file_into(source, name, target)
668 self.files_copied += 1
669 self.progress("%d/%d files, %d/%d directories" %
670 (self.files_copied, self.files_to_copy,
671 self.targets_finished, len(self.targetmap)))
672 target.set_children()
674 def need_to_copy_bytes(self, source, target):
675 if source.need_to_copy_bytes:
676 # mutable tahoe files, and local files
678 if isinstance(target, (LocalFileTarget, LocalDirectoryTarget)):
682 def announce_success(self, msg):
683 if self.verbosity >= 1:
684 print >>self.stdout, "Success: %s" % msg
687 def copy_file(self, source, target):
688 assert isinstance(source, (LocalFileSource, TahoeFileSource))
689 assert isinstance(target, (LocalFileTarget, TahoeFileTarget,
690 LocalMissingTarget, TahoeMissingTarget))
691 if self.need_to_copy_bytes(source, target):
692 # if the target is a local directory, this will just write the
693 # bytes to disk. If it is a tahoe directory, it will upload the
694 # data, and stash the new filecap for a later set_children call.
697 return self.announce_success("file copied")
698 # otherwise we're copying tahoe to tahoe, and using immutable files,
699 # so we can just make a link. TODO: this probably won't always work:
700 # need to enumerate the cases and analyze them.
701 target.put_uri(source.bestcap())
702 return self.announce_success("file linked")
704 def copy_file_into(self, source, name, target):
705 assert isinstance(source, (LocalFileSource, TahoeFileSource))
706 assert isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget))
707 if self.need_to_copy_bytes(source, target):
708 # if the target is a local directory, this will just write the
709 # bytes to disk. If it is a tahoe directory, it will upload the
710 # data, and stash the new filecap for a later set_children call.
712 target.put_file(name, f)
714 # otherwise we're copying tahoe to tahoe, and using immutable files,
715 # so we can just make a link
716 target.put_uri(name, source.bestcap())
719 def progress(self, message):
721 if self.progressfunc:
722 self.progressfunc(message)
724 def build_graphs(self, source_infos):
726 for name,source in source_infos:
727 if isinstance(source, (LocalDirectorySource, TahoeDirectorySource)):
728 source.populate(True)
729 graphs.append(source)
734 return Copier().do_copy(options)