5 from allmydata.scripts.common import get_alias, escape_path, DefaultAliasMarker
6 from allmydata.scripts.common_http import do_http
7 from allmydata import uri
14 class WriteError(Exception):
16 class ReadError(Exception):
18 class MissingSourceError(Exception):
22 resp = do_http("GET", url)
23 if resp.status == 200:
25 raise ReadError("Error during GET: %s %s %s" % (resp.status,
28 def GET_to_string(url):
33 resp = do_http("PUT", url, data)
34 if resp.status in (200, 201):
36 raise WriteError("Error during PUT: %s %s %s" % (resp.status, resp.reason,
40 resp = do_http("POST", url, data)
41 if resp.status in (200, 201):
43 raise WriteError("Error during POST: %s %s %s" % (resp.status, resp.reason,
47 url = targeturl + "?t=mkdir"
48 resp = do_http("POST", url)
49 if resp.status in (200, 201):
50 return resp.read().strip()
51 raise WriteError("Error during mkdir: %s %s %s" % (resp.status, resp.reason,
54 def make_tahoe_subdirectory(nodeurl, parent_writecap, name):
55 url = nodeurl + "/".join(["uri",
56 urllib.quote(parent_writecap),
59 resp = do_http("POST", url)
60 if resp.status in (200, 201):
61 return resp.read().strip()
62 raise WriteError("Error during mkdir: %s %s %s" % (resp.status, resp.reason,
66 class LocalFileSource:
67 def __init__(self, pathname):
68 self.pathname = pathname
70 def need_to_copy_bytes(self):
74 return open(self.pathname, "rb")
76 class LocalFileTarget:
77 def __init__(self, pathname):
78 self.pathname = pathname
79 def put_file(self, inf):
80 outf = open(self.pathname, "wb")
82 data = inf.read(32768)
88 class LocalMissingTarget:
89 def __init__(self, pathname):
90 self.pathname = pathname
92 def put_file(self, inf):
93 outf = open(self.pathname, "wb")
95 data = inf.read(32768)
101 class LocalDirectorySource:
102 def __init__(self, progressfunc, pathname):
103 self.progressfunc = progressfunc
104 self.pathname = pathname
107 def populate(self, recurse):
108 if self.children is not None:
111 children = os.listdir(self.pathname)
112 for i,n in enumerate(children):
113 self.progressfunc("examining %d of %d" % (i, len(children)))
114 pn = os.path.join(self.pathname, n)
115 if os.path.isdir(pn):
116 child = LocalDirectorySource(self.progressfunc, pn)
117 self.children[n] = child
121 assert os.path.isfile(pn)
122 self.children[n] = LocalFileSource(pn)
124 class LocalDirectoryTarget:
125 def __init__(self, progressfunc, pathname):
126 self.progressfunc = progressfunc
127 self.pathname = pathname
130 def populate(self, recurse):
131 if self.children is not None:
134 children = os.listdir(self.pathname)
135 for i,n in enumerate(children):
136 self.progressfunc("examining %d of %d" % (i, len(children)))
137 pn = os.path.join(self.pathname, n)
138 if os.path.isdir(pn):
139 child = LocalDirectoryTarget(self.progressfunc, pn)
140 self.children[n] = child
144 assert os.path.isfile(pn)
145 self.children[n] = LocalFileTarget(pn)
147 def get_child_target(self, name):
148 if self.children is None:
150 if name in self.children:
151 return self.children[name]
152 pathname = os.path.join(self.pathname, name)
153 os.makedirs(pathname)
154 return LocalDirectoryTarget(self.progressfunc, pathname)
156 def put_file(self, name, inf):
157 pathname = os.path.join(self.pathname, name)
158 outf = open(pathname, "wb")
160 data = inf.read(32768)
166 def set_children(self):
169 class TahoeFileSource:
170 def __init__(self, nodeurl, mutable, writecap, readcap):
171 self.nodeurl = nodeurl
172 self.mutable = mutable
173 self.writecap = writecap
174 self.readcap = readcap
176 def need_to_copy_bytes(self):
182 url = self.nodeurl + "uri/" + urllib.quote(self.readcap)
183 return GET_to_file(url)
186 return self.writecap or self.readcap
188 class TahoeFileTarget:
189 def __init__(self, nodeurl, mutable, writecap, readcap, url):
190 self.nodeurl = nodeurl
191 self.mutable = mutable
192 self.writecap = writecap
193 self.readcap = readcap
196 def put_file(self, inf):
197 # We want to replace this object in-place.
199 # our do_http() call currently requires a string or a filehandle with
201 if not hasattr(inf, "seek"):
204 # TODO: this always creates immutable files. We might want an option
205 # to always create mutable files, or to copy mutable files into new
208 class TahoeDirectorySource:
209 def __init__(self, nodeurl, cache, progressfunc):
210 self.nodeurl = nodeurl
212 self.progressfunc = progressfunc
214 def init_from_grid(self, writecap, readcap):
215 self.writecap = writecap
216 self.readcap = readcap
217 bestcap = writecap or readcap
218 url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
219 resp = do_http("GET", url + "?t=json")
220 assert resp.status == 200
221 parsed = simplejson.loads(resp.read())
223 assert nodetype == "dirnode"
224 self.mutable = d.get("mutable", False) # older nodes don't provide it
225 self.children_d = d["children"]
228 def init_from_parsed(self, parsed):
230 self.writecap = ascii_or_none(d.get("rw_uri"))
231 self.readcap = ascii_or_none(d.get("ro_uri"))
232 self.mutable = d.get("mutable", False) # older nodes don't provide it
233 self.children_d = d["children"]
236 def populate(self, recurse):
237 if self.children is not None:
240 for i,(name, data) in enumerate(self.children_d.items()):
241 self.progressfunc("examining %d of %d" % (i, len(self.children_d)))
242 if data[0] == "filenode":
243 mutable = data[1].get("mutable", False)
244 writecap = ascii_or_none(data[1].get("rw_uri"))
245 readcap = ascii_or_none(data[1].get("ro_uri"))
246 self.children[name] = TahoeFileSource(self.nodeurl, mutable,
249 assert data[0] == "dirnode"
250 writecap = ascii_or_none(data[1].get("rw_uri"))
251 readcap = ascii_or_none(data[1].get("ro_uri"))
252 if writecap and writecap in self.cache:
253 child = self.cache[writecap]
254 elif readcap and readcap in self.cache:
255 child = self.cache[readcap]
257 child = TahoeDirectorySource(self.nodeurl, self.cache,
259 child.init_from_grid(writecap, readcap)
261 self.cache[writecap] = child
263 self.cache[readcap] = child
266 self.children[name] = child
268 class TahoeMissingTarget:
269 def __init__(self, url):
272 def put_file(self, inf):
273 # We want to replace this object in-place.
274 if not hasattr(inf, "seek"):
277 # TODO: this always creates immutable files. We might want an option
278 # to always create mutable files, or to copy mutable files into new
281 def put_uri(self, filecap):
282 # I'm not sure this will always work
283 return PUT(self.url + "?t=uri", filecap)
285 class TahoeDirectoryTarget:
286 def __init__(self, nodeurl, cache, progressfunc):
287 self.nodeurl = nodeurl
289 self.progressfunc = progressfunc
290 self.new_children = {}
292 def init_from_parsed(self, parsed):
294 self.writecap = ascii_or_none(d.get("rw_uri"))
295 self.readcap = ascii_or_none(d.get("ro_uri"))
296 self.mutable = d.get("mutable", False) # older nodes don't provide it
297 self.children_d = d["children"]
300 def init_from_grid(self, writecap, readcap):
301 self.writecap = writecap
302 self.readcap = readcap
303 bestcap = writecap or readcap
304 url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
305 resp = do_http("GET", url + "?t=json")
306 assert resp.status == 200
307 parsed = simplejson.loads(resp.read())
309 assert nodetype == "dirnode"
310 self.mutable = d.get("mutable", False) # older nodes don't provide it
311 self.children_d = d["children"]
314 def just_created(self, writecap):
315 self.writecap = writecap
316 self.readcap = uri.from_string(writecap).get_readonly().to_string()
321 def populate(self, recurse):
322 if self.children is not None:
325 for i,(name, data) in enumerate(self.children_d.items()):
326 self.progressfunc("examining %d of %d" % (i, len(self.children_d)))
327 if data[0] == "filenode":
328 mutable = data[1].get("mutable", False)
329 writecap = ascii_or_none(data[1].get("rw_uri"))
330 readcap = ascii_or_none(data[1].get("ro_uri"))
333 url = self.nodeurl + "/".join(["uri",
334 urllib.quote(self.writecap),
336 self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
337 writecap, readcap, url)
339 assert data[0] == "dirnode"
340 writecap = ascii_or_none(data[1].get("rw_uri"))
341 readcap = ascii_or_none(data[1].get("ro_uri"))
342 if writecap and writecap in self.cache:
343 child = self.cache[writecap]
344 elif readcap and readcap in self.cache:
345 child = self.cache[readcap]
347 child = TahoeDirectoryTarget(self.nodeurl, self.cache,
349 child.init_from_grid(writecap, readcap)
351 self.cache[writecap] = child
353 self.cache[readcap] = child
356 self.children[name] = child
358 def get_child_target(self, name):
359 # return a new target for a named subdirectory of this dir
360 if self.children is None:
362 if name in self.children:
363 return self.children[name]
364 writecap = make_tahoe_subdirectory(self.nodeurl, self.writecap, name)
365 child = TahoeDirectoryTarget(self.nodeurl, self.cache,
367 child.just_created(writecap)
368 self.children[name] = child
371 def put_file(self, name, inf):
372 url = self.nodeurl + "uri"
373 if not hasattr(inf, "seek"):
375 filecap = PUT(url, inf)
376 # TODO: this always creates immutable files. We might want an option
377 # to always create mutable files, or to copy mutable files into new
379 self.new_children[name] = filecap
381 def put_uri(self, name, filecap):
382 self.new_children[name] = filecap
384 def set_children(self):
385 if not self.new_children:
387 url = (self.nodeurl + "uri/" + urllib.quote(self.writecap)
390 for (name, filecap) in self.new_children.items():
391 # it just so happens that ?t=set_children will accept both file
392 # read-caps and write-caps as ['rw_uri'], and will handle eithe
393 # correctly. So don't bother trying to figure out whether the one
394 # we have is read-only or read-write.
395 set_data[name] = ["filenode", {"rw_uri": filecap}]
396 body = simplejson.dumps(set_data)
401 def do_copy(self, options, progressfunc=None):
407 nodeurl = options['node-url']
408 if nodeurl[-1] != "/":
410 self.nodeurl = nodeurl
411 self.progressfunc = progressfunc
412 self.options = options
413 self.aliases = options.aliases
414 self.verbosity = verbosity
415 self.stdout = options.stdout
416 self.stderr = options.stderr
417 if options["verbose"] and not self.progressfunc:
418 def progress(message):
419 print >>self.stderr, message
420 self.progressfunc = progress
422 source_specs = options.sources
423 destination_spec = options.destination
424 recursive = self.options["recursive"]
426 target = self.get_target_info(destination_spec)
429 sources = [] # list of (name, source object)
430 for ss in source_specs:
431 name, source = self.get_source_info(ss)
432 sources.append( (name, source) )
433 except MissingSourceError, e:
434 self.to_stderr("No such file or directory %s" % e.args[0])
437 have_source_dirs = bool([s for (name,s) in sources
438 if isinstance(s, (LocalDirectorySource,
439 TahoeDirectorySource))])
441 if have_source_dirs and not recursive:
442 self.to_stderr("cannot copy directories without --recursive")
445 if isinstance(target, (LocalFileTarget, TahoeFileTarget)):
446 # cp STUFF foo.txt, where foo.txt already exists. This limits the
447 # possibilities considerably.
449 self.to_stderr("target '%s' is not a directory" % destination_spec)
452 self.to_stderr("cannot copy directory into a file")
454 name, source = sources[0]
455 return self.copy_file(source, target)
457 if isinstance(target, (LocalMissingTarget, TahoeMissingTarget)):
459 return self.copy_to_directory(sources, target)
461 # if we have -r, we'll auto-create the target directory. Without
462 # it, we'll only create a file.
463 self.to_stderr("cannot copy multiple files into a file without -r")
466 name, source = sources[0]
467 return self.copy_file(source, target)
469 if isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget)):
470 return self.copy_to_directory(sources, target)
472 self.to_stderr("unknown target")
475 def to_stderr(self, text):
476 print >>self.stderr, text
478 def get_target_info(self, destination_spec):
479 rootcap, path = get_alias(self.aliases, destination_spec, None)
480 if rootcap == DefaultAliasMarker:
481 # no alias, so this is a local file
482 pathname = os.path.abspath(os.path.expanduser(path))
483 if not os.path.exists(pathname):
484 t = LocalMissingTarget(pathname)
485 elif os.path.isdir(pathname):
486 t = LocalDirectoryTarget(self.progress, pathname)
488 assert os.path.isfile(pathname), pathname
489 t = LocalFileTarget(pathname) # non-empty
491 # this is a tahoe object
492 url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
494 url += "/" + escape_path(path)
495 last_slash = path.rfind("/")
497 resp = do_http("GET", url + "?t=json")
498 if resp.status == 404:
500 t = TahoeMissingTarget(url)
502 parsed = simplejson.loads(resp.read())
504 if nodetype == "dirnode":
505 t = TahoeDirectoryTarget(self.nodeurl, self.cache,
507 t.init_from_parsed(parsed)
509 writecap = ascii_or_none(d.get("rw_uri"))
510 readcap = ascii_or_none(d.get("ro_uri"))
511 mutable = d.get("mutable", False)
512 t = TahoeFileTarget(self.nodeurl, mutable,
513 writecap, readcap, url)
516 def get_source_info(self, source_spec):
517 rootcap, path = get_alias(self.aliases, source_spec, None)
518 if rootcap == DefaultAliasMarker:
519 # no alias, so this is a local file
520 pathname = os.path.abspath(os.path.expanduser(path))
521 name = os.path.basename(pathname)
522 if not os.path.exists(pathname):
523 raise MissingSourceError(source_spec)
524 if os.path.isdir(pathname):
525 t = LocalDirectorySource(self.progress, pathname)
527 assert os.path.isfile(pathname)
528 t = LocalFileSource(pathname) # non-empty
530 # this is a tahoe object
531 url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
534 url += "/" + escape_path(path)
535 last_slash = path.rfind("/")
538 name = path[last_slash+1:]
540 resp = do_http("GET", url + "?t=json")
541 if resp.status == 404:
542 raise MissingSourceError(source_spec)
543 parsed = simplejson.loads(resp.read())
545 if nodetype == "dirnode":
546 t = TahoeDirectorySource(self.nodeurl, self.cache,
548 t.init_from_parsed(parsed)
550 writecap = ascii_or_none(d.get("rw_uri"))
551 readcap = ascii_or_none(d.get("ro_uri"))
552 mutable = d.get("mutable", False) # older nodes don't provide it
553 t = TahoeFileSource(self.nodeurl, mutable, writecap, readcap)
557 def dump_graph(self, s, indent=" "):
558 for name, child in s.children.items():
559 print indent + name + ":" + str(child)
560 if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)):
561 self.dump_graph(child, indent+" ")
563 def copy_to_directory(self, source_infos, target):
564 # step one: build a recursive graph of the source tree. This returns
565 # a dictionary, with child names as keys, and values that are either
566 # Directory or File instances (local or tahoe).
567 source_dirs = self.build_graphs(source_infos)
568 source_files = [source for source in source_infos
569 if isinstance(source[1], (LocalFileSource,
573 #for s in source_dirs:
576 # step two: create the top-level target directory object
577 if isinstance(target, LocalMissingTarget):
578 os.makedirs(target.pathname)
579 target = LocalDirectoryTarget(self.progress, target.pathname)
580 elif isinstance(target, TahoeMissingTarget):
581 writecap = mkdir(target.url)
582 target = TahoeDirectoryTarget(self.nodeurl, self.cache,
584 target.just_created(writecap)
585 assert isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget))
586 target.populate(False)
588 # step three: find a target for each source node, creating
589 # directories as necessary. 'targetmap' is a dictionary that uses
590 # target Directory instances as keys, and has values of
591 # (name->sourceobject) dicts for all the files that need to wind up
594 # sources are all LocalFile/LocalDirectory/TahoeFile/TahoeDirectory
595 # target is LocalDirectory/TahoeDirectory
597 self.progress("attaching sources to targets, "
598 "%d files / %d dirs in root" %
599 (len(source_files), len(source_dirs)))
602 self.files_to_copy = 0
604 for (name,s) in source_files:
605 self.attach_to_target(s, name, target)
606 self.files_to_copy += 1
608 for source in source_dirs:
609 self.assign_targets(source, target)
611 self.progress("targets assigned, %s dirs, %s files" %
612 (len(self.targetmap), self.files_to_copy))
614 self.progress("starting copy, %d files, %d directories" %
615 (self.files_to_copy, len(self.targetmap)))
616 self.files_copied = 0
617 self.targets_finished = 0
619 # step four: walk through the list of targets. For each one, copy all
620 # the files. If the target is a TahoeDirectory, upload and create
621 # read-caps, then do a set_children to the target directory.
623 for target in self.targetmap:
624 self.copy_files_to_target(self.targetmap[target], target)
625 self.targets_finished += 1
626 self.progress("%d/%d directories" %
627 (self.targets_finished, len(self.targetmap)))
629 def attach_to_target(self, source, name, target):
630 if target not in self.targetmap:
631 self.targetmap[target] = {}
632 self.targetmap[target][name] = source
633 self.files_to_copy += 1
635 def assign_targets(self, source, target):
636 # copy everything in the source into the target
637 assert isinstance(source, (LocalDirectorySource, TahoeDirectorySource))
639 for name, child in source.children.items():
640 if isinstance(child, (LocalDirectorySource, TahoeDirectorySource)):
641 # we will need a target directory for this one
642 subtarget = target.get_child_target(name)
643 self.assign_targets(child, subtarget)
645 assert isinstance(child, (LocalFileSource, TahoeFileSource))
646 self.attach_to_target(child, name, target)
650 def copy_files_to_target(self, targetmap, target):
651 for name, source in targetmap.items():
652 assert isinstance(source, (LocalFileSource, TahoeFileSource))
653 self.copy_file_into(source, name, target)
654 self.files_copied += 1
655 self.progress("%d/%d files, %d/%d directories" %
656 (self.files_copied, self.files_to_copy,
657 self.targets_finished, len(self.targetmap)))
658 target.set_children()
660 def need_to_copy_bytes(self, source, target):
661 if source.need_to_copy_bytes:
662 # mutable tahoe files, and local files
664 if isinstance(target, (LocalFileTarget, LocalDirectoryTarget)):
668 def copy_file(self, source, target):
669 assert isinstance(source, (LocalFileSource, TahoeFileSource))
670 assert isinstance(target, (LocalFileTarget, TahoeFileTarget,
671 LocalMissingTarget, TahoeMissingTarget))
672 if self.need_to_copy_bytes(source, target):
673 # if the target is a local directory, this will just write the
674 # bytes to disk. If it is a tahoe directory, it will upload the
675 # data, and stash the new filecap for a later set_children call.
679 # otherwise we're copying tahoe to tahoe, and using immutable files,
680 # so we can just make a link. TODO: this probably won't always work:
681 # need to enumerate the cases and analyze them.
682 target.put_uri(source.bestcap())
684 def copy_file_into(self, source, name, target):
685 assert isinstance(source, (LocalFileSource, TahoeFileSource))
686 assert isinstance(target, (LocalDirectoryTarget, TahoeDirectoryTarget))
687 if self.need_to_copy_bytes(source, target):
688 # if the target is a local directory, this will just write the
689 # bytes to disk. If it is a tahoe directory, it will upload the
690 # data, and stash the new filecap for a later set_children call.
692 target.put_file(name, f)
694 # otherwise we're copying tahoe to tahoe, and using immutable files,
695 # so we can just make a link
696 target.put_uri(name, source.bestcap())
699 def progress(self, message):
701 if self.progressfunc:
702 self.progressfunc(message)
704 def build_graphs(self, source_infos):
706 for name,source in source_infos:
707 if isinstance(source, (LocalDirectorySource, TahoeDirectorySource)):
708 source.populate(True)
709 graphs.append(source)
714 return Copier().do_copy(options)