]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/scripts/tahoe_cp.py
cp: trailing slash on source filename is an error, just like on targets
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / scripts / tahoe_cp.py
1
2 import os.path
3 import urllib
4 import simplejson
5 from collections import defaultdict
6 from cStringIO import StringIO
7 from twisted.python.failure import Failure
8 from allmydata.scripts.common import get_alias, escape_path, \
9                                      DefaultAliasMarker, TahoeError
10 from allmydata.scripts.common_http import do_http, HTTPError
11 from allmydata import uri
12 from allmydata.util import fileutil
13 from allmydata.util.fileutil import abspath_expanduser_unicode, precondition_abspath
14 from allmydata.util.encodingutil import unicode_to_url, listdir_unicode, quote_output, \
15     quote_local_unicode_path, to_str
16 from allmydata.util.assertutil import precondition, _assert
17
18
19 class MissingSourceError(TahoeError):
20     def __init__(self, name, quotefn=quote_output):
21         TahoeError.__init__(self, "No such file or directory %s" % quotefn(name))
22
23 class FilenameWithTrailingSlashError(TahoeError):
24     def __init__(self, name, quotefn=quote_output):
25         TahoeError.__init__(self, "source '%s' is not a directory, but ends with a slash" % quotefn(name))
26
27
28 def GET_to_file(url):
29     resp = do_http("GET", url)
30     if resp.status == 200:
31         return resp
32     raise HTTPError("Error during GET", resp)
33
34 def GET_to_string(url):
35     f = GET_to_file(url)
36     return f.read()
37
38 def PUT(url, data):
39     resp = do_http("PUT", url, data)
40     if resp.status in (200, 201):
41         return resp.read()
42     raise HTTPError("Error during PUT", resp)
43
44 def POST(url, data):
45     resp = do_http("POST", url, data)
46     if resp.status in (200, 201):
47         return resp.read()
48     raise HTTPError("Error during POST", resp)
49
50 def mkdir(targeturl):
51     url = targeturl + "?t=mkdir"
52     resp = do_http("POST", url)
53     if resp.status in (200, 201):
54         return resp.read().strip()
55     raise HTTPError("Error during mkdir", resp)
56
57 def make_tahoe_subdirectory(nodeurl, parent_writecap, name):
58     url = nodeurl + "/".join(["uri",
59                               urllib.quote(parent_writecap),
60                               urllib.quote(unicode_to_url(name)),
61                               ]) + "?t=mkdir"
62     resp = do_http("POST", url)
63     if resp.status in (200, 201):
64         return resp.read().strip()
65     raise HTTPError("Error during mkdir", resp)
66
67
68 class LocalFileSource:
69     def __init__(self, pathname, basename):
70         precondition_abspath(pathname)
71         self.pathname = pathname
72         self._basename = basename
73
74     def basename(self):
75         return self._basename
76
77     def need_to_copy_bytes(self):
78         return True
79
80     def open(self, caps_only):
81         return open(self.pathname, "rb")
82
83 class LocalFileTarget:
84     def __init__(self, pathname):
85         precondition_abspath(pathname)
86         self.pathname = pathname
87
88     def put_file(self, inf):
89         fileutil.put_file(self.pathname, inf)
90
91 class LocalMissingTarget:
92     def __init__(self, pathname):
93         precondition_abspath(pathname)
94         self.pathname = pathname
95
96     def put_file(self, inf):
97         fileutil.put_file(self.pathname, inf)
98
99 class LocalDirectorySource:
100     def __init__(self, progressfunc, pathname, basename):
101         precondition_abspath(pathname)
102
103         self.progressfunc = progressfunc
104         self.pathname = pathname
105         self.children = None
106         self._basename = basename
107
108     def basename(self):
109         return self._basename
110
111     def populate(self, recurse):
112         if self.children is not None:
113             return
114         self.children = {}
115         children = listdir_unicode(self.pathname)
116         for i,n in enumerate(children):
117             self.progressfunc("examining %d of %d" % (i+1, len(children)))
118             pn = os.path.join(self.pathname, n)
119             if os.path.isdir(pn):
120                 child = LocalDirectorySource(self.progressfunc, pn, n)
121                 self.children[n] = child
122                 if recurse:
123                     child.populate(recurse=True)
124             elif os.path.isfile(pn):
125                 self.children[n] = LocalFileSource(pn, n)
126             else:
127                 # Could be dangling symlink; probably not copy-able.
128                 # TODO: output a warning
129                 pass
130
131 class LocalDirectoryTarget:
132     def __init__(self, progressfunc, pathname):
133         precondition_abspath(pathname)
134
135         self.progressfunc = progressfunc
136         self.pathname = pathname
137         self.children = None
138
139     def populate(self, recurse):
140         if self.children is not None:
141             return
142         self.children = {}
143         children = listdir_unicode(self.pathname)
144         for i,n in enumerate(children):
145             self.progressfunc("examining %d of %d" % (i+1, len(children)))
146             pn = os.path.join(self.pathname, n)
147             if os.path.isdir(pn):
148                 child = LocalDirectoryTarget(self.progressfunc, pn)
149                 self.children[n] = child
150                 if recurse:
151                     child.populate(recurse=True)
152             else:
153                 assert os.path.isfile(pn)
154                 self.children[n] = LocalFileTarget(pn)
155
156     def get_child_target(self, name):
157         precondition(isinstance(name, unicode), name)
158         precondition(len(name), name) # don't want ""
159         if self.children is None:
160             self.populate(recurse=False)
161         if name in self.children:
162             return self.children[name]
163         pathname = os.path.join(self.pathname, name)
164         os.makedirs(pathname)
165         child = LocalDirectoryTarget(self.progressfunc, pathname)
166         self.children[name] = child
167         return child
168
169     def put_file(self, name, inf):
170         precondition(isinstance(name, unicode), name)
171         pathname = os.path.join(self.pathname, name)
172         fileutil.put_file(pathname, inf)
173
174     def set_children(self):
175         pass
176
177
178 class TahoeFileSource:
179     def __init__(self, nodeurl, mutable, writecap, readcap, basename):
180         self.nodeurl = nodeurl
181         self.mutable = mutable
182         self.writecap = writecap
183         self.readcap = readcap
184         self._basename = basename # unicode, or None for raw filecaps
185
186     def basename(self):
187         return self._basename
188
189     def need_to_copy_bytes(self):
190         if self.mutable:
191             return True
192         return False
193
194     def open(self, caps_only):
195         if caps_only:
196             return StringIO(self.readcap)
197         url = self.nodeurl + "uri/" + urllib.quote(self.readcap)
198         return GET_to_file(url)
199
200     def bestcap(self):
201         return self.writecap or self.readcap
202
203 class TahoeFileTarget:
204     def __init__(self, nodeurl, mutable, writecap, readcap, url):
205         self.nodeurl = nodeurl
206         self.mutable = mutable
207         self.writecap = writecap
208         self.readcap = readcap
209         self.url = url
210
211     def put_file(self, inf):
212         # We want to replace this object in-place.
213         assert self.url
214         # our do_http() call currently requires a string or a filehandle with
215         # a real .seek
216         if not hasattr(inf, "seek"):
217             inf = inf.read()
218         PUT(self.url, inf)
219         # TODO: this always creates immutable files. We might want an option
220         # to always create mutable files, or to copy mutable files into new
221         # mutable files. ticket #835
222
223 class TahoeDirectorySource:
224     def __init__(self, nodeurl, cache, progressfunc, basename):
225         self.nodeurl = nodeurl
226         self.cache = cache
227         self.progressfunc = progressfunc
228         self._basename = basename # unicode, or None for raw dircaps
229
230     def basename(self):
231         return self._basename
232
233     def init_from_grid(self, writecap, readcap):
234         self.writecap = writecap
235         self.readcap = readcap
236         bestcap = writecap or readcap
237         url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
238         resp = do_http("GET", url + "?t=json")
239         if resp.status != 200:
240             raise HTTPError("Error examining source directory", resp)
241         parsed = simplejson.loads(resp.read())
242         nodetype, d = parsed
243         assert nodetype == "dirnode"
244         self.mutable = d.get("mutable", False) # older nodes don't provide it
245         self.children_d = dict( [(unicode(name),value)
246                                  for (name,value)
247                                  in d["children"].iteritems()] )
248         self.children = None
249
250     def init_from_parsed(self, parsed):
251         nodetype, d = parsed
252         self.writecap = to_str(d.get("rw_uri"))
253         self.readcap = to_str(d.get("ro_uri"))
254         self.mutable = d.get("mutable", False) # older nodes don't provide it
255         self.children_d = dict( [(unicode(name),value)
256                                  for (name,value)
257                                  in d["children"].iteritems()] )
258         self.children = None
259
260     def populate(self, recurse):
261         if self.children is not None:
262             return
263         self.children = {}
264         for i,(name, data) in enumerate(self.children_d.items()):
265             self.progressfunc("examining %d of %d" % (i+1, len(self.children_d)))
266             if data[0] == "filenode":
267                 mutable = data[1].get("mutable", False)
268                 writecap = to_str(data[1].get("rw_uri"))
269                 readcap = to_str(data[1].get("ro_uri"))
270                 self.children[name] = TahoeFileSource(self.nodeurl, mutable,
271                                                       writecap, readcap, name)
272             elif data[0] == "dirnode":
273                 writecap = to_str(data[1].get("rw_uri"))
274                 readcap = to_str(data[1].get("ro_uri"))
275                 if writecap and writecap in self.cache:
276                     child = self.cache[writecap]
277                 elif readcap and readcap in self.cache:
278                     child = self.cache[readcap]
279                 else:
280                     child = TahoeDirectorySource(self.nodeurl, self.cache,
281                                                  self.progressfunc, name)
282                     child.init_from_grid(writecap, readcap)
283                     if writecap:
284                         self.cache[writecap] = child
285                     if readcap:
286                         self.cache[readcap] = child
287                     if recurse:
288                         child.populate(recurse=True)
289                 self.children[name] = child
290             else:
291                 # TODO: there should be an option to skip unknown nodes.
292                 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
293                                  "You probably need to use a later version of "
294                                  "Tahoe-LAFS to copy this directory.")
295
296 class TahoeMissingTarget:
297     def __init__(self, url):
298         self.url = url
299
300     def put_file(self, inf):
301         # We want to replace this object in-place.
302         if not hasattr(inf, "seek"):
303             inf = inf.read()
304         PUT(self.url, inf)
305         # TODO: this always creates immutable files. We might want an option
306         # to always create mutable files, or to copy mutable files into new
307         # mutable files.
308
309     def put_uri(self, filecap):
310         # I'm not sure this will always work
311         return PUT(self.url + "?t=uri", filecap)
312
313 class TahoeDirectoryTarget:
314     def __init__(self, nodeurl, cache, progressfunc):
315         self.nodeurl = nodeurl
316         self.cache = cache
317         self.progressfunc = progressfunc
318         self.new_children = {}
319
320     def init_from_parsed(self, parsed):
321         nodetype, d = parsed
322         self.writecap = to_str(d.get("rw_uri"))
323         self.readcap = to_str(d.get("ro_uri"))
324         self.mutable = d.get("mutable", False) # older nodes don't provide it
325         self.children_d = dict( [(unicode(name),value)
326                                  for (name,value)
327                                  in d["children"].iteritems()] )
328         self.children = None
329
330     def init_from_grid(self, writecap, readcap):
331         self.writecap = writecap
332         self.readcap = readcap
333         bestcap = writecap or readcap
334         url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
335         resp = do_http("GET", url + "?t=json")
336         if resp.status != 200:
337             raise HTTPError("Error examining target directory", resp)
338         parsed = simplejson.loads(resp.read())
339         nodetype, d = parsed
340         assert nodetype == "dirnode"
341         self.mutable = d.get("mutable", False) # older nodes don't provide it
342         self.children_d = dict( [(unicode(name),value)
343                                  for (name,value)
344                                  in d["children"].iteritems()] )
345         self.children = None
346
347     def just_created(self, writecap):
348         # TODO: maybe integrate this with the constructor
349         self.writecap = writecap
350         self.readcap = uri.from_string(writecap).get_readonly().to_string()
351         self.mutable = True
352         self.children_d = {}
353         self.children = {}
354
355     def populate(self, recurse):
356         if self.children is not None:
357             return
358         self.children = {}
359         for i,(name, data) in enumerate(self.children_d.items()):
360             self.progressfunc("examining %d of %d" % (i+1, len(self.children_d)))
361             if data[0] == "filenode":
362                 mutable = data[1].get("mutable", False)
363                 writecap = to_str(data[1].get("rw_uri"))
364                 readcap = to_str(data[1].get("ro_uri"))
365                 url = None
366                 if self.writecap:
367                     url = self.nodeurl + "/".join(["uri",
368                                                    urllib.quote(self.writecap),
369                                                    urllib.quote(unicode_to_url(name))])
370                 self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
371                                                       writecap, readcap, url)
372             elif data[0] == "dirnode":
373                 writecap = to_str(data[1].get("rw_uri"))
374                 readcap = to_str(data[1].get("ro_uri"))
375                 if writecap and writecap in self.cache:
376                     child = self.cache[writecap]
377                 elif readcap and readcap in self.cache:
378                     child = self.cache[readcap]
379                 else:
380                     child = TahoeDirectoryTarget(self.nodeurl, self.cache,
381                                                  self.progressfunc)
382                     child.init_from_grid(writecap, readcap)
383                     if writecap:
384                         self.cache[writecap] = child
385                     if readcap:
386                         self.cache[readcap] = child
387                     if recurse:
388                         child.populate(recurse=True)
389                 self.children[name] = child
390             else:
391                 # TODO: there should be an option to skip unknown nodes.
392                 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
393                                  "You probably need to use a later version of "
394                                  "Tahoe-LAFS to copy this directory.")
395
396     def get_child_target(self, name):
397         # return a new target for a named subdirectory of this dir
398         precondition(isinstance(name, unicode), name)
399         if self.children is None:
400             self.populate(recurse=False)
401         if name in self.children:
402             return self.children[name]
403         writecap = make_tahoe_subdirectory(self.nodeurl, self.writecap, name)
404         child = TahoeDirectoryTarget(self.nodeurl, self.cache,
405                                      self.progressfunc)
406         child.just_created(writecap)
407         self.children[name] = child
408         return child
409
410     def put_file(self, name, inf):
411         precondition(isinstance(name, unicode), name)
412         url = self.nodeurl + "uri"
413         if not hasattr(inf, "seek"):
414             inf = inf.read()
415
416         if self.children is None:
417             self.populate(recurse=False)
418
419         # Check to see if we already have a mutable file by this name.
420         # If so, overwrite that file in place.
421         if name in self.children and self.children[name].mutable:
422             self.children[name].put_file(inf)
423         else:
424             filecap = PUT(url, inf)
425             # TODO: this always creates immutable files. We might want an option
426             # to always create mutable files, or to copy mutable files into new
427             # mutable files.
428             self.new_children[name] = filecap
429
430     def put_uri(self, name, filecap):
431         precondition(isinstance(name, unicode), name)
432         self.new_children[name] = filecap
433
434     def set_children(self):
435         if not self.new_children:
436             return
437         url = (self.nodeurl + "uri/" + urllib.quote(self.writecap)
438                + "?t=set_children")
439         set_data = {}
440         for (name, filecap) in self.new_children.items():
441             # it just so happens that ?t=set_children will accept both file
442             # read-caps and write-caps as ['rw_uri'], and will handle either
443             # correctly. So don't bother trying to figure out whether the one
444             # we have is read-only or read-write.
445             # TODO: think about how this affects forward-compatibility for
446             # unknown caps
447             set_data[name] = ["filenode", {"rw_uri": filecap}]
448         body = simplejson.dumps(set_data)
449         POST(url, body)
450
451 FileSources = (LocalFileSource, TahoeFileSource)
452 DirectorySources = (LocalDirectorySource, TahoeDirectorySource)
453 FileTargets = (LocalFileTarget, TahoeFileTarget)
454 DirectoryTargets = (LocalDirectoryTarget, TahoeDirectoryTarget)
455 MissingTargets = (LocalMissingTarget, TahoeMissingTarget)
456
457 class Copier:
458
459     def do_copy(self, options, progressfunc=None):
460         if options['quiet']:
461             verbosity = 0
462         elif options['verbose']:
463             verbosity = 2
464         else:
465             verbosity = 1
466
467         nodeurl = options['node-url']
468         if nodeurl[-1] != "/":
469             nodeurl += "/"
470         self.nodeurl = nodeurl
471         self.progressfunc = progressfunc
472         self.options = options
473         self.aliases = options.aliases
474         self.verbosity = verbosity
475         self.stdout = options.stdout
476         self.stderr = options.stderr
477         if verbosity >= 2 and not self.progressfunc:
478             def progress(message):
479                 print >>self.stderr, message
480             self.progressfunc = progress
481         self.caps_only = options["caps-only"]
482         self.cache = {}
483         try:
484             status = self.try_copy()
485             return status
486         except TahoeError, te:
487             if verbosity >= 2:
488                 Failure().printTraceback(self.stderr)
489                 print >>self.stderr
490             te.display(self.stderr)
491             return 1
492
493     def try_copy(self):
494         """
495         All usage errors are caught here, not in a subroutine. This bottoms
496         out in copy_file_to_file() or copy_things_to_directory().
497         """
498         source_specs = self.options.sources
499         destination_spec = self.options.destination
500         recursive = self.options["recursive"]
501
502         target = self.get_target_info(destination_spec)
503         precondition(isinstance(target, FileTargets + DirectoryTargets + MissingTargets), target)
504         target_has_trailing_slash = destination_spec.endswith("/")
505
506         sources = [] # list of source objects
507         for ss in source_specs:
508             try:
509                 si = self.get_source_info(ss)
510             except FilenameWithTrailingSlashError:
511                 self.to_stderr("source is not a directory, but ends with a slash")
512                 return 1
513             precondition(isinstance(si, FileSources + DirectorySources), si)
514             sources.append(si)
515
516         # if any source is a directory, must use -r
517         # if target is missing:
518         #    if source is a single file, target will be a file
519         #    else target will be a directory, so mkdir it
520         # if there are multiple sources, target must be a dir
521         # if target is a file, source must be a single file
522         # if target is directory, sources must be named or a dir
523
524         have_source_dirs = any([isinstance(s, DirectorySources)
525                                 for s in sources])
526         if have_source_dirs and not recursive:
527             # 'cp dir target' without -r: error
528             self.to_stderr("cannot copy directories without --recursive")
529             return 1
530         del recursive # -r is only used for signalling errors
531
532         if isinstance(target, FileTargets):
533             target_is_file = True
534         elif isinstance(target, DirectoryTargets):
535             target_is_file = False
536         else: # isinstance(target, MissingTargets)
537             if len(sources) == 1 and isinstance(sources[0], FileSources):
538                 target_is_file = True
539             else:
540                 target_is_file = False
541
542         if target_is_file and target_has_trailing_slash:
543             self.to_stderr("target is not a directory, but ends with a slash")
544             return 1
545
546         if len(sources) > 1 and target_is_file:
547             self.to_stderr("copying multiple things requires target be a directory")
548             return 1
549
550         if target_is_file:
551             _assert(len(sources) == 1, sources)
552             if not isinstance(sources[0], FileSources):
553                 # 'cp -r dir existingfile': error
554                 self.to_stderr("cannot copy directory into a file")
555                 return 1
556             return self.copy_file_to_file(sources[0], target)
557
558         # else target is a directory, so each source must be one of:
559         # * a named file (copied to a new file under the target)
560         # * a named directory (causes a new directory of the same name to be
561         #   created under the target, then the contents of the source are
562         #   copied into that directory)
563         # * an unnamed directory (the contents of the source are copied into
564         #   the target, without a new directory being made)
565         #
566         # If any source is an unnamed file, throw an error, since we have no
567         # way to name the output file.
568         _assert(isinstance(target, DirectoryTargets + MissingTargets), target)
569
570         for source in sources:
571             if isinstance(source, FileSources) and source.basename() is None:
572                 self.to_stderr("when copying into a directory, all source files must have names, but %s is unnamed" % quote_output(source_specs[0]))
573                 return 1
574         return self.copy_things_to_directory(sources, target)
575
576     def to_stderr(self, text):
577         print >>self.stderr, text
578
579     # FIXME reduce the amount of near-duplicate code between get_target_info
580     # and get_source_info.
581
582     def get_target_info(self, destination_spec):
583         precondition(isinstance(destination_spec, unicode), destination_spec)
584         rootcap, path_utf8 = get_alias(self.aliases, destination_spec, None)
585         path = path_utf8.decode("utf-8")
586         if rootcap == DefaultAliasMarker:
587             # no alias, so this is a local file
588             pathname = abspath_expanduser_unicode(path)
589             if not os.path.exists(pathname):
590                 t = LocalMissingTarget(pathname)
591             elif os.path.isdir(pathname):
592                 t = LocalDirectoryTarget(self.progress, pathname)
593             else:
594                 # TODO: should this be _assert? what happens if the target is
595                 # a special file?
596                 assert os.path.isfile(pathname), pathname
597                 t = LocalFileTarget(pathname) # non-empty
598         else:
599             # this is a tahoe object
600             url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
601             if path:
602                 url += "/" + escape_path(path)
603
604             resp = do_http("GET", url + "?t=json")
605             if resp.status == 404:
606                 # doesn't exist yet
607                 t = TahoeMissingTarget(url)
608             elif resp.status == 200:
609                 parsed = simplejson.loads(resp.read())
610                 nodetype, d = parsed
611                 if nodetype == "dirnode":
612                     t = TahoeDirectoryTarget(self.nodeurl, self.cache,
613                                              self.progress)
614                     t.init_from_parsed(parsed)
615                 else:
616                     writecap = to_str(d.get("rw_uri"))
617                     readcap = to_str(d.get("ro_uri"))
618                     mutable = d.get("mutable", False)
619                     t = TahoeFileTarget(self.nodeurl, mutable,
620                                         writecap, readcap, url)
621             else:
622                 raise HTTPError("Error examining target %s"
623                                  % quote_output(destination_spec), resp)
624         return t
625
626     def get_source_info(self, source_spec):
627         """
628         This turns an argv string into a (Local|Tahoe)(File|Directory)Source.
629         """
630         precondition(isinstance(source_spec, unicode), source_spec)
631         rootcap, path_utf8 = get_alias(self.aliases, source_spec, None)
632         path = path_utf8.decode("utf-8")
633         # any trailing slash is removed in abspath_expanduser_unicode(), so
634         # make a note of it here, to throw an error later
635         had_trailing_slash = path.endswith("/")
636         if rootcap == DefaultAliasMarker:
637             # no alias, so this is a local file
638             pathname = abspath_expanduser_unicode(path)
639             name = os.path.basename(pathname)
640             if not os.path.exists(pathname):
641                 raise MissingSourceError(source_spec, quotefn=quote_local_unicode_path)
642             if os.path.isdir(pathname):
643                 t = LocalDirectorySource(self.progress, pathname, name)
644             else:
645                 if had_trailing_slash:
646                     raise FilenameWithTrailingSlashError(source_spec)
647                 assert os.path.isfile(pathname)
648                 t = LocalFileSource(pathname, name) # non-empty
649         else:
650             # this is a tahoe object
651             url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
652             name = None
653             if path:
654                 if path.endswith("/"):
655                     path = path[:-1]
656                 url += "/" + escape_path(path)
657                 last_slash = path.rfind(u"/")
658                 name = path
659                 if last_slash != -1:
660                     name = path[last_slash+1:]
661
662             resp = do_http("GET", url + "?t=json")
663             if resp.status == 404:
664                 raise MissingSourceError(source_spec)
665             elif resp.status != 200:
666                 raise HTTPError("Error examining source %s" % quote_output(source_spec),
667                                 resp)
668             parsed = simplejson.loads(resp.read())
669             nodetype, d = parsed
670             if nodetype == "dirnode":
671                 t = TahoeDirectorySource(self.nodeurl, self.cache,
672                                          self.progress, name)
673                 t.init_from_parsed(parsed)
674             else:
675                 if had_trailing_slash:
676                     raise FilenameWithTrailingSlashError(source_spec)
677                 writecap = to_str(d.get("rw_uri"))
678                 readcap = to_str(d.get("ro_uri"))
679                 mutable = d.get("mutable", False) # older nodes don't provide it
680                 t = TahoeFileSource(self.nodeurl, mutable, writecap, readcap, name)
681         return t
682
683
684     def need_to_copy_bytes(self, source, target):
685         if source.need_to_copy_bytes:
686             # mutable tahoe files, and local files
687             return True
688         if isinstance(target, (LocalFileTarget, LocalDirectoryTarget)):
689             return True
690         return False
691
692     def announce_success(self, msg):
693         if self.verbosity >= 1:
694             print >>self.stdout, "Success: %s" % msg
695         return 0
696
697     def copy_file_to_file(self, source, target):
698         precondition(isinstance(source, FileSources), source)
699         precondition(isinstance(target, FileTargets + MissingTargets), target)
700         if self.need_to_copy_bytes(source, target):
701             # if the target is a local directory, this will just write the
702             # bytes to disk. If it is a tahoe directory, it will upload the
703             # data, and stash the new filecap for a later set_children call.
704             f = source.open(self.caps_only)
705             target.put_file(f)
706             return self.announce_success("file copied")
707         # otherwise we're copying tahoe to tahoe, and using immutable files,
708         # so we can just make a link. TODO: this probably won't always work:
709         # need to enumerate the cases and analyze them.
710         target.put_uri(source.bestcap())
711         return self.announce_success("file linked")
712
713     def copy_things_to_directory(self, sources, target):
714         # step one: if the target is missing, we should mkdir it
715         target = self.maybe_create_target(target)
716         target.populate(recurse=False)
717
718         # step two: scan any source dirs, recursively, to find children
719         for s in sources:
720             if isinstance(s, DirectorySources):
721                 s.populate(recurse=True)
722             if isinstance(s, FileSources):
723                 # each source must have a name, or be a directory
724                 _assert(s.basename() is not None, s)
725
726         # step three: find a target for each source node, creating
727         # directories as necessary. 'targetmap' is a dictionary that uses
728         # target Directory instances as keys, and has values of (name:
729         # sourceobject) dicts for all the files that need to wind up there.
730         targetmap = self.build_targetmap(sources, target)
731
732         # step four: walk through the list of targets. For each one, copy all
733         # the files. If the target is a TahoeDirectory, upload and create
734         # read-caps, then do a set_children to the target directory.
735         self.copy_to_targetmap(targetmap)
736
737         return self.announce_success("files copied")
738
739     def maybe_create_target(self, target):
740         if isinstance(target, LocalMissingTarget):
741             os.makedirs(target.pathname)
742             target = LocalDirectoryTarget(self.progress, target.pathname)
743         elif isinstance(target, TahoeMissingTarget):
744             writecap = mkdir(target.url)
745             target = TahoeDirectoryTarget(self.nodeurl, self.cache,
746                                           self.progress)
747             target.just_created(writecap)
748         # afterwards, or otherwise, it will be a directory
749         precondition(isinstance(target, DirectoryTargets), target)
750         return target
751
752     def build_targetmap(self, sources, target):
753         num_source_files = len([s for s in sources
754                                 if isinstance(s, FileSources)])
755         num_source_dirs = len([s for s in sources
756                                if isinstance(s, DirectorySources)])
757         self.progress("attaching sources to targets, "
758                       "%d files / %d dirs in root" %
759                       (num_source_files, num_source_dirs))
760
761         # this maps each target directory to a list of source files that need
762         # to be copied into it. All source files have names.
763         targetmap = defaultdict(list)
764
765         for s in sources:
766             if isinstance(s, FileSources):
767                 targetmap[target].append(s)
768             else:
769                 _assert(isinstance(s, DirectorySources), s)
770                 name = s.basename()
771                 if name is not None:
772                     # named sources get a new directory. see #2329
773                     new_target = target.get_child_target(name)
774                 else:
775                     # unnamed sources have their contents copied directly
776                     new_target = target
777                 self.assign_targets(targetmap, s, new_target)
778
779         self.progress("targets assigned, %s dirs, %s files" %
780                       (len(targetmap), self.count_files_to_copy(targetmap)))
781         return targetmap
782
783     def assign_targets(self, targetmap, source, target):
784         # copy everything in the source into the target
785         precondition(isinstance(source, DirectorySources), source)
786         for name, child in source.children.items():
787             if isinstance(child, DirectorySources):
788                 # we will need a target directory for this one
789                 subtarget = target.get_child_target(name)
790                 self.assign_targets(targetmap, child, subtarget)
791             else:
792                 _assert(isinstance(child, FileSources), child)
793                 targetmap[target].append(child)
794
795     def copy_to_targetmap(self, targetmap):
796         files_to_copy = self.count_files_to_copy(targetmap)
797         self.progress("starting copy, %d files, %d directories" %
798                       (files_to_copy, len(targetmap)))
799         files_copied = 0
800         targets_finished = 0
801
802         for target, sources in targetmap.items():
803             _assert(isinstance(target, DirectoryTargets), target)
804             for source in sources:
805                 _assert(isinstance(source, FileSources), source)
806                 self.copy_file_into_dir(source, source.basename(), target)
807                 files_copied += 1
808                 self.progress("%d/%d files, %d/%d directories" %
809                               (files_copied, files_to_copy,
810                                targets_finished, len(targetmap)))
811             target.set_children()
812             targets_finished += 1
813             self.progress("%d/%d directories" %
814                           (targets_finished, len(targetmap)))
815
816     def count_files_to_copy(self, targetmap):
817         return sum([len(sources) for sources in targetmap.values()])
818
819     def copy_file_into_dir(self, source, name, target):
820         precondition(isinstance(source, FileSources), source)
821         precondition(isinstance(target, DirectoryTargets), target)
822         precondition(isinstance(name, unicode), name)
823         if self.need_to_copy_bytes(source, target):
824             # if the target is a local directory, this will just write the
825             # bytes to disk. If it is a tahoe directory, it will upload the
826             # data, and stash the new filecap for a later set_children call.
827             f = source.open(self.caps_only)
828             target.put_file(name, f)
829             return
830         # otherwise we're copying tahoe to tahoe, and using immutable files,
831         # so we can just make a link
832         target.put_uri(name, source.bestcap())
833
834
835     def progress(self, message):
836         #print message
837         if self.progressfunc:
838             self.progressfunc(message)
839
840
841 def copy(options):
842     return Copier().do_copy(options)
843
844 # error cases that need improvement:
845 #  local-file-in-the-way
846 #   touch proposed
847 #   tahoe cp -r my:docs/proposed/denver.txt proposed/denver.txt
848 #  handling of unknown nodes
849
850 # things that maybe should be errors but aren't
851 #  local-dir-in-the-way
852 #   mkdir denver.txt
853 #   tahoe cp -r my:docs/proposed/denver.txt denver.txt
854 #   (creates denver.txt/denver.txt)
855
856 # error cases that look good:
857 #  tahoe cp -r my:docs/missing missing
858 #  disconnect servers
859 #   tahoe cp -r my:docs/missing missing  -> No JSON object could be decoded
860 #  tahoe-file-in-the-way (when we want to make a directory)
861 #   tahoe put README my:docs
862 #   tahoe cp -r docs/proposed my:docs/proposed