]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/scripts/tahoe_cp.py
a5cfe16a1a25b7b65e9ca9d9856256457b7da9a4
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / scripts / tahoe_cp.py
1
2 import os.path
3 import urllib
4 import simplejson
5 from collections import defaultdict
6 from cStringIO import StringIO
7 from twisted.python.failure import Failure
8 from allmydata.scripts.common import get_alias, escape_path, \
9                                      DefaultAliasMarker, TahoeError
10 from allmydata.scripts.common_http import do_http, HTTPError
11 from allmydata import uri
12 from allmydata.util import fileutil
13 from allmydata.util.fileutil import abspath_expanduser_unicode, precondition_abspath
14 from allmydata.util.encodingutil import unicode_to_url, listdir_unicode, quote_output, \
15     quote_local_unicode_path, to_str
16 from allmydata.util.assertutil import precondition, _assert
17
18
19 class MissingSourceError(TahoeError):
20     def __init__(self, name, quotefn=quote_output):
21         TahoeError.__init__(self, "No such file or directory %s" % quotefn(name))
22
23
24 def GET_to_file(url):
25     resp = do_http("GET", url)
26     if resp.status == 200:
27         return resp
28     raise HTTPError("Error during GET", resp)
29
30 def GET_to_string(url):
31     f = GET_to_file(url)
32     return f.read()
33
34 def PUT(url, data):
35     resp = do_http("PUT", url, data)
36     if resp.status in (200, 201):
37         return resp.read()
38     raise HTTPError("Error during PUT", resp)
39
40 def POST(url, data):
41     resp = do_http("POST", url, data)
42     if resp.status in (200, 201):
43         return resp.read()
44     raise HTTPError("Error during POST", resp)
45
46 def mkdir(targeturl):
47     url = targeturl + "?t=mkdir"
48     resp = do_http("POST", url)
49     if resp.status in (200, 201):
50         return resp.read().strip()
51     raise HTTPError("Error during mkdir", resp)
52
53 def make_tahoe_subdirectory(nodeurl, parent_writecap, name):
54     url = nodeurl + "/".join(["uri",
55                               urllib.quote(parent_writecap),
56                               urllib.quote(unicode_to_url(name)),
57                               ]) + "?t=mkdir"
58     resp = do_http("POST", url)
59     if resp.status in (200, 201):
60         return resp.read().strip()
61     raise HTTPError("Error during mkdir", resp)
62
63
64 class LocalFileSource:
65     def __init__(self, pathname, basename):
66         precondition_abspath(pathname)
67         self.pathname = pathname
68         self._basename = basename
69
70     def basename(self):
71         return self._basename
72
73     def need_to_copy_bytes(self):
74         return True
75
76     def open(self, caps_only):
77         return open(self.pathname, "rb")
78
79 class LocalFileTarget:
80     def __init__(self, pathname):
81         precondition_abspath(pathname)
82         self.pathname = pathname
83
84     def put_file(self, inf):
85         fileutil.put_file(self.pathname, inf)
86
87 class LocalMissingTarget:
88     def __init__(self, pathname):
89         precondition_abspath(pathname)
90         self.pathname = pathname
91
92     def put_file(self, inf):
93         fileutil.put_file(self.pathname, inf)
94
95 class LocalDirectorySource:
96     def __init__(self, progressfunc, pathname, basename):
97         precondition_abspath(pathname)
98
99         self.progressfunc = progressfunc
100         self.pathname = pathname
101         self.children = None
102         self._basename = basename
103
104     def basename(self):
105         return self._basename
106
107     def populate(self, recurse):
108         if self.children is not None:
109             return
110         self.children = {}
111         children = listdir_unicode(self.pathname)
112         for i,n in enumerate(children):
113             self.progressfunc("examining %d of %d" % (i+1, len(children)))
114             pn = os.path.join(self.pathname, n)
115             if os.path.isdir(pn):
116                 child = LocalDirectorySource(self.progressfunc, pn, n)
117                 self.children[n] = child
118                 if recurse:
119                     child.populate(recurse=True)
120             elif os.path.isfile(pn):
121                 self.children[n] = LocalFileSource(pn, n)
122             else:
123                 # Could be dangling symlink; probably not copy-able.
124                 # TODO: output a warning
125                 pass
126
127 class LocalDirectoryTarget:
128     def __init__(self, progressfunc, pathname):
129         precondition_abspath(pathname)
130
131         self.progressfunc = progressfunc
132         self.pathname = pathname
133         self.children = None
134
135     def populate(self, recurse):
136         if self.children is not None:
137             return
138         self.children = {}
139         children = listdir_unicode(self.pathname)
140         for i,n in enumerate(children):
141             self.progressfunc("examining %d of %d" % (i+1, len(children)))
142             pn = os.path.join(self.pathname, n)
143             if os.path.isdir(pn):
144                 child = LocalDirectoryTarget(self.progressfunc, pn)
145                 self.children[n] = child
146                 if recurse:
147                     child.populate(recurse=True)
148             else:
149                 assert os.path.isfile(pn)
150                 self.children[n] = LocalFileTarget(pn)
151
152     def get_child_target(self, name):
153         precondition(isinstance(name, unicode), name)
154         precondition(len(name), name) # don't want ""
155         if self.children is None:
156             self.populate(recurse=False)
157         if name in self.children:
158             return self.children[name]
159         pathname = os.path.join(self.pathname, name)
160         os.makedirs(pathname)
161         child = LocalDirectoryTarget(self.progressfunc, pathname)
162         self.children[name] = child
163         return child
164
165     def put_file(self, name, inf):
166         precondition(isinstance(name, unicode), name)
167         pathname = os.path.join(self.pathname, name)
168         fileutil.put_file(pathname, inf)
169
170     def set_children(self):
171         pass
172
173
174 class TahoeFileSource:
175     def __init__(self, nodeurl, mutable, writecap, readcap, basename):
176         self.nodeurl = nodeurl
177         self.mutable = mutable
178         self.writecap = writecap
179         self.readcap = readcap
180         self._basename = basename # unicode, or None for raw filecaps
181
182     def basename(self):
183         return self._basename
184
185     def need_to_copy_bytes(self):
186         if self.mutable:
187             return True
188         return False
189
190     def open(self, caps_only):
191         if caps_only:
192             return StringIO(self.readcap)
193         url = self.nodeurl + "uri/" + urllib.quote(self.readcap)
194         return GET_to_file(url)
195
196     def bestcap(self):
197         return self.writecap or self.readcap
198
199 class TahoeFileTarget:
200     def __init__(self, nodeurl, mutable, writecap, readcap, url):
201         self.nodeurl = nodeurl
202         self.mutable = mutable
203         self.writecap = writecap
204         self.readcap = readcap
205         self.url = url
206
207     def put_file(self, inf):
208         # We want to replace this object in-place.
209         assert self.url
210         # our do_http() call currently requires a string or a filehandle with
211         # a real .seek
212         if not hasattr(inf, "seek"):
213             inf = inf.read()
214         PUT(self.url, inf)
215         # TODO: this always creates immutable files. We might want an option
216         # to always create mutable files, or to copy mutable files into new
217         # mutable files. ticket #835
218
219 class TahoeDirectorySource:
220     def __init__(self, nodeurl, cache, progressfunc, basename):
221         self.nodeurl = nodeurl
222         self.cache = cache
223         self.progressfunc = progressfunc
224         self._basename = basename # unicode, or None for raw dircaps
225
226     def basename(self):
227         return self._basename
228
229     def init_from_grid(self, writecap, readcap):
230         self.writecap = writecap
231         self.readcap = readcap
232         bestcap = writecap or readcap
233         url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
234         resp = do_http("GET", url + "?t=json")
235         if resp.status != 200:
236             raise HTTPError("Error examining source directory", resp)
237         parsed = simplejson.loads(resp.read())
238         nodetype, d = parsed
239         assert nodetype == "dirnode"
240         self.mutable = d.get("mutable", False) # older nodes don't provide it
241         self.children_d = dict( [(unicode(name),value)
242                                  for (name,value)
243                                  in d["children"].iteritems()] )
244         self.children = None
245
246     def init_from_parsed(self, parsed):
247         nodetype, d = parsed
248         self.writecap = to_str(d.get("rw_uri"))
249         self.readcap = to_str(d.get("ro_uri"))
250         self.mutable = d.get("mutable", False) # older nodes don't provide it
251         self.children_d = dict( [(unicode(name),value)
252                                  for (name,value)
253                                  in d["children"].iteritems()] )
254         self.children = None
255
256     def populate(self, recurse):
257         if self.children is not None:
258             return
259         self.children = {}
260         for i,(name, data) in enumerate(self.children_d.items()):
261             self.progressfunc("examining %d of %d" % (i+1, len(self.children_d)))
262             if data[0] == "filenode":
263                 mutable = data[1].get("mutable", False)
264                 writecap = to_str(data[1].get("rw_uri"))
265                 readcap = to_str(data[1].get("ro_uri"))
266                 self.children[name] = TahoeFileSource(self.nodeurl, mutable,
267                                                       writecap, readcap, name)
268             elif data[0] == "dirnode":
269                 writecap = to_str(data[1].get("rw_uri"))
270                 readcap = to_str(data[1].get("ro_uri"))
271                 if writecap and writecap in self.cache:
272                     child = self.cache[writecap]
273                 elif readcap and readcap in self.cache:
274                     child = self.cache[readcap]
275                 else:
276                     child = TahoeDirectorySource(self.nodeurl, self.cache,
277                                                  self.progressfunc, name)
278                     child.init_from_grid(writecap, readcap)
279                     if writecap:
280                         self.cache[writecap] = child
281                     if readcap:
282                         self.cache[readcap] = child
283                     if recurse:
284                         child.populate(recurse=True)
285                 self.children[name] = child
286             else:
287                 # TODO: there should be an option to skip unknown nodes.
288                 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
289                                  "You probably need to use a later version of "
290                                  "Tahoe-LAFS to copy this directory.")
291
292 class TahoeMissingTarget:
293     def __init__(self, url):
294         self.url = url
295
296     def put_file(self, inf):
297         # We want to replace this object in-place.
298         if not hasattr(inf, "seek"):
299             inf = inf.read()
300         PUT(self.url, inf)
301         # TODO: this always creates immutable files. We might want an option
302         # to always create mutable files, or to copy mutable files into new
303         # mutable files.
304
305     def put_uri(self, filecap):
306         # I'm not sure this will always work
307         return PUT(self.url + "?t=uri", filecap)
308
309 class TahoeDirectoryTarget:
310     def __init__(self, nodeurl, cache, progressfunc):
311         self.nodeurl = nodeurl
312         self.cache = cache
313         self.progressfunc = progressfunc
314         self.new_children = {}
315
316     def init_from_parsed(self, parsed):
317         nodetype, d = parsed
318         self.writecap = to_str(d.get("rw_uri"))
319         self.readcap = to_str(d.get("ro_uri"))
320         self.mutable = d.get("mutable", False) # older nodes don't provide it
321         self.children_d = dict( [(unicode(name),value)
322                                  for (name,value)
323                                  in d["children"].iteritems()] )
324         self.children = None
325
326     def init_from_grid(self, writecap, readcap):
327         self.writecap = writecap
328         self.readcap = readcap
329         bestcap = writecap or readcap
330         url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
331         resp = do_http("GET", url + "?t=json")
332         if resp.status != 200:
333             raise HTTPError("Error examining target directory", resp)
334         parsed = simplejson.loads(resp.read())
335         nodetype, d = parsed
336         assert nodetype == "dirnode"
337         self.mutable = d.get("mutable", False) # older nodes don't provide it
338         self.children_d = dict( [(unicode(name),value)
339                                  for (name,value)
340                                  in d["children"].iteritems()] )
341         self.children = None
342
343     def just_created(self, writecap):
344         # TODO: maybe integrate this with the constructor
345         self.writecap = writecap
346         self.readcap = uri.from_string(writecap).get_readonly().to_string()
347         self.mutable = True
348         self.children_d = {}
349         self.children = {}
350
351     def populate(self, recurse):
352         if self.children is not None:
353             return
354         self.children = {}
355         for i,(name, data) in enumerate(self.children_d.items()):
356             self.progressfunc("examining %d of %d" % (i+1, len(self.children_d)))
357             if data[0] == "filenode":
358                 mutable = data[1].get("mutable", False)
359                 writecap = to_str(data[1].get("rw_uri"))
360                 readcap = to_str(data[1].get("ro_uri"))
361                 url = None
362                 if self.writecap:
363                     url = self.nodeurl + "/".join(["uri",
364                                                    urllib.quote(self.writecap),
365                                                    urllib.quote(unicode_to_url(name))])
366                 self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
367                                                       writecap, readcap, url)
368             elif data[0] == "dirnode":
369                 writecap = to_str(data[1].get("rw_uri"))
370                 readcap = to_str(data[1].get("ro_uri"))
371                 if writecap and writecap in self.cache:
372                     child = self.cache[writecap]
373                 elif readcap and readcap in self.cache:
374                     child = self.cache[readcap]
375                 else:
376                     child = TahoeDirectoryTarget(self.nodeurl, self.cache,
377                                                  self.progressfunc)
378                     child.init_from_grid(writecap, readcap)
379                     if writecap:
380                         self.cache[writecap] = child
381                     if readcap:
382                         self.cache[readcap] = child
383                     if recurse:
384                         child.populate(recurse=True)
385                 self.children[name] = child
386             else:
387                 # TODO: there should be an option to skip unknown nodes.
388                 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
389                                  "You probably need to use a later version of "
390                                  "Tahoe-LAFS to copy this directory.")
391
392     def get_child_target(self, name):
393         # return a new target for a named subdirectory of this dir
394         precondition(isinstance(name, unicode), name)
395         if self.children is None:
396             self.populate(recurse=False)
397         if name in self.children:
398             return self.children[name]
399         writecap = make_tahoe_subdirectory(self.nodeurl, self.writecap, name)
400         child = TahoeDirectoryTarget(self.nodeurl, self.cache,
401                                      self.progressfunc)
402         child.just_created(writecap)
403         self.children[name] = child
404         return child
405
406     def put_file(self, name, inf):
407         precondition(isinstance(name, unicode), name)
408         url = self.nodeurl + "uri"
409         if not hasattr(inf, "seek"):
410             inf = inf.read()
411
412         if self.children is None:
413             self.populate(recurse=False)
414
415         # Check to see if we already have a mutable file by this name.
416         # If so, overwrite that file in place.
417         if name in self.children and self.children[name].mutable:
418             self.children[name].put_file(inf)
419         else:
420             filecap = PUT(url, inf)
421             # TODO: this always creates immutable files. We might want an option
422             # to always create mutable files, or to copy mutable files into new
423             # mutable files.
424             self.new_children[name] = filecap
425
426     def put_uri(self, name, filecap):
427         precondition(isinstance(name, unicode), name)
428         self.new_children[name] = filecap
429
430     def set_children(self):
431         if not self.new_children:
432             return
433         url = (self.nodeurl + "uri/" + urllib.quote(self.writecap)
434                + "?t=set_children")
435         set_data = {}
436         for (name, filecap) in self.new_children.items():
437             # it just so happens that ?t=set_children will accept both file
438             # read-caps and write-caps as ['rw_uri'], and will handle either
439             # correctly. So don't bother trying to figure out whether the one
440             # we have is read-only or read-write.
441             # TODO: think about how this affects forward-compatibility for
442             # unknown caps
443             set_data[name] = ["filenode", {"rw_uri": filecap}]
444         body = simplejson.dumps(set_data)
445         POST(url, body)
446
447 FileSources = (LocalFileSource, TahoeFileSource)
448 DirectorySources = (LocalDirectorySource, TahoeDirectorySource)
449 FileTargets = (LocalFileTarget, TahoeFileTarget)
450 DirectoryTargets = (LocalDirectoryTarget, TahoeDirectoryTarget)
451 MissingTargets = (LocalMissingTarget, TahoeMissingTarget)
452
453 class Copier:
454
455     def do_copy(self, options, progressfunc=None):
456         if options['quiet']:
457             verbosity = 0
458         elif options['verbose']:
459             verbosity = 2
460         else:
461             verbosity = 1
462
463         nodeurl = options['node-url']
464         if nodeurl[-1] != "/":
465             nodeurl += "/"
466         self.nodeurl = nodeurl
467         self.progressfunc = progressfunc
468         self.options = options
469         self.aliases = options.aliases
470         self.verbosity = verbosity
471         self.stdout = options.stdout
472         self.stderr = options.stderr
473         if verbosity >= 2 and not self.progressfunc:
474             def progress(message):
475                 print >>self.stderr, message
476             self.progressfunc = progress
477         self.caps_only = options["caps-only"]
478         self.cache = {}
479         try:
480             status = self.try_copy()
481             return status
482         except TahoeError, te:
483             if verbosity >= 2:
484                 Failure().printTraceback(self.stderr)
485                 print >>self.stderr
486             te.display(self.stderr)
487             return 1
488
489     def try_copy(self):
490         """
491         All usage errors are caught here, not in a subroutine. This bottoms
492         out in copy_file_to_file() or copy_things_to_directory().
493         """
494         source_specs = self.options.sources
495         destination_spec = self.options.destination
496         recursive = self.options["recursive"]
497
498         target = self.get_target_info(destination_spec)
499         precondition(isinstance(target, FileTargets + DirectoryTargets + MissingTargets), target)
500         target_has_trailing_slash = destination_spec.endswith("/")
501
502         sources = [] # list of source objects
503         for ss in source_specs:
504             si = self.get_source_info(ss)
505             precondition(isinstance(si, FileSources + DirectorySources), si)
506             sources.append(si)
507
508         # if any source is a directory, must use -r
509         # if target is missing:
510         #    if source is a single file, target will be a file
511         #    else target will be a directory, so mkdir it
512         # if there are multiple sources, target must be a dir
513         # if target is a file, source must be a single file
514         # if target is directory, sources must be named or a dir
515
516         have_source_dirs = any([isinstance(s, DirectorySources)
517                                 for s in sources])
518         if have_source_dirs and not recursive:
519             # 'cp dir target' without -r: error
520             self.to_stderr("cannot copy directories without --recursive")
521             return 1
522         del recursive # -r is only used for signalling errors
523
524         if isinstance(target, FileTargets):
525             target_is_file = True
526         elif isinstance(target, DirectoryTargets):
527             target_is_file = False
528         else: # isinstance(target, MissingTargets)
529             if len(sources) == 1 and isinstance(sources[0], FileSources):
530                 target_is_file = True
531             else:
532                 target_is_file = False
533
534         if target_is_file and target_has_trailing_slash:
535             self.to_stderr("target is not a directory, but ends with a slash")
536             return 1
537
538         if len(sources) > 1 and target_is_file:
539             self.to_stderr("copying multiple things requires target be a directory")
540             return 1
541
542         if target_is_file:
543             _assert(len(sources) == 1, sources)
544             if not isinstance(sources[0], FileSources):
545                 # 'cp -r dir existingfile': error
546                 self.to_stderr("cannot copy directory into a file")
547                 return 1
548             return self.copy_file_to_file(sources[0], target)
549
550         # else target is a directory, so each source must be one of:
551         # * a named file (copied to a new file under the target)
552         # * a named directory (causes a new directory of the same name to be
553         #   created under the target, then the contents of the source are
554         #   copied into that directory)
555         # * an unnamed directory (the contents of the source are copied into
556         #   the target, without a new directory being made)
557         #
558         # If any source is an unnamed file, throw an error, since we have no
559         # way to name the output file.
560         _assert(isinstance(target, DirectoryTargets + MissingTargets), target)
561
562         for source in sources:
563             if isinstance(source, FileSources) and source.basename() is None:
564                 self.to_stderr("when copying into a directory, all source files must have names, but %s is unnamed" % quote_output(source_specs[0]))
565                 return 1
566         return self.copy_things_to_directory(sources, target)
567
568     def to_stderr(self, text):
569         print >>self.stderr, text
570
571     # FIXME reduce the amount of near-duplicate code between get_target_info
572     # and get_source_info.
573
574     def get_target_info(self, destination_spec):
575         precondition(isinstance(destination_spec, unicode), destination_spec)
576         rootcap, path_utf8 = get_alias(self.aliases, destination_spec, None)
577         path = path_utf8.decode("utf-8")
578         if rootcap == DefaultAliasMarker:
579             # no alias, so this is a local file
580             pathname = abspath_expanduser_unicode(path)
581             if not os.path.exists(pathname):
582                 t = LocalMissingTarget(pathname)
583             elif os.path.isdir(pathname):
584                 t = LocalDirectoryTarget(self.progress, pathname)
585             else:
586                 # TODO: should this be _assert? what happens if the target is
587                 # a special file?
588                 assert os.path.isfile(pathname), pathname
589                 t = LocalFileTarget(pathname) # non-empty
590         else:
591             # this is a tahoe object
592             url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
593             if path:
594                 url += "/" + escape_path(path)
595
596             resp = do_http("GET", url + "?t=json")
597             if resp.status == 404:
598                 # doesn't exist yet
599                 t = TahoeMissingTarget(url)
600             elif resp.status == 200:
601                 parsed = simplejson.loads(resp.read())
602                 nodetype, d = parsed
603                 if nodetype == "dirnode":
604                     t = TahoeDirectoryTarget(self.nodeurl, self.cache,
605                                              self.progress)
606                     t.init_from_parsed(parsed)
607                 else:
608                     writecap = to_str(d.get("rw_uri"))
609                     readcap = to_str(d.get("ro_uri"))
610                     mutable = d.get("mutable", False)
611                     t = TahoeFileTarget(self.nodeurl, mutable,
612                                         writecap, readcap, url)
613             else:
614                 raise HTTPError("Error examining target %s"
615                                  % quote_output(destination_spec), resp)
616         return t
617
618     def get_source_info(self, source_spec):
619         """
620         This turns an argv string into a (Local|Tahoe)(File|Directory)Source.
621         """
622         precondition(isinstance(source_spec, unicode), source_spec)
623         rootcap, path_utf8 = get_alias(self.aliases, source_spec, None)
624         path = path_utf8.decode("utf-8")
625         if rootcap == DefaultAliasMarker:
626             # no alias, so this is a local file
627             pathname = abspath_expanduser_unicode(path)
628             name = os.path.basename(pathname)
629             if not os.path.exists(pathname):
630                 raise MissingSourceError(source_spec, quotefn=quote_local_unicode_path)
631             if os.path.isdir(pathname):
632                 t = LocalDirectorySource(self.progress, pathname, name)
633             else:
634                 assert os.path.isfile(pathname)
635                 t = LocalFileSource(pathname, name) # non-empty
636         else:
637             # this is a tahoe object
638             url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
639             name = None
640             if path:
641                 if path.endswith("/"):
642                     path = path[:-1]
643                 url += "/" + escape_path(path)
644                 last_slash = path.rfind(u"/")
645                 name = path
646                 if last_slash != -1:
647                     name = path[last_slash+1:]
648
649             resp = do_http("GET", url + "?t=json")
650             if resp.status == 404:
651                 raise MissingSourceError(source_spec)
652             elif resp.status != 200:
653                 raise HTTPError("Error examining source %s" % quote_output(source_spec),
654                                 resp)
655             parsed = simplejson.loads(resp.read())
656             nodetype, d = parsed
657             if nodetype == "dirnode":
658                 t = TahoeDirectorySource(self.nodeurl, self.cache,
659                                          self.progress, name)
660                 t.init_from_parsed(parsed)
661             else:
662                 writecap = to_str(d.get("rw_uri"))
663                 readcap = to_str(d.get("ro_uri"))
664                 mutable = d.get("mutable", False) # older nodes don't provide it
665                 t = TahoeFileSource(self.nodeurl, mutable, writecap, readcap, name)
666         return t
667
668
669     def need_to_copy_bytes(self, source, target):
670         if source.need_to_copy_bytes:
671             # mutable tahoe files, and local files
672             return True
673         if isinstance(target, (LocalFileTarget, LocalDirectoryTarget)):
674             return True
675         return False
676
677     def announce_success(self, msg):
678         if self.verbosity >= 1:
679             print >>self.stdout, "Success: %s" % msg
680         return 0
681
682     def copy_file_to_file(self, source, target):
683         precondition(isinstance(source, FileSources), source)
684         precondition(isinstance(target, FileTargets + MissingTargets), target)
685         if self.need_to_copy_bytes(source, target):
686             # if the target is a local directory, this will just write the
687             # bytes to disk. If it is a tahoe directory, it will upload the
688             # data, and stash the new filecap for a later set_children call.
689             f = source.open(self.caps_only)
690             target.put_file(f)
691             return self.announce_success("file copied")
692         # otherwise we're copying tahoe to tahoe, and using immutable files,
693         # so we can just make a link. TODO: this probably won't always work:
694         # need to enumerate the cases and analyze them.
695         target.put_uri(source.bestcap())
696         return self.announce_success("file linked")
697
698     def copy_things_to_directory(self, sources, target):
699         # step one: if the target is missing, we should mkdir it
700         target = self.maybe_create_target(target)
701         target.populate(recurse=False)
702
703         # step two: scan any source dirs, recursively, to find children
704         for s in sources:
705             if isinstance(s, DirectorySources):
706                 s.populate(recurse=True)
707             if isinstance(s, FileSources):
708                 # each source must have a name, or be a directory
709                 _assert(s.basename() is not None, s)
710
711         # step three: find a target for each source node, creating
712         # directories as necessary. 'targetmap' is a dictionary that uses
713         # target Directory instances as keys, and has values of (name:
714         # sourceobject) dicts for all the files that need to wind up there.
715         targetmap = self.build_targetmap(sources, target)
716
717         # step four: walk through the list of targets. For each one, copy all
718         # the files. If the target is a TahoeDirectory, upload and create
719         # read-caps, then do a set_children to the target directory.
720         self.copy_to_targetmap(targetmap)
721
722         return self.announce_success("files copied")
723
724     def maybe_create_target(self, target):
725         if isinstance(target, LocalMissingTarget):
726             os.makedirs(target.pathname)
727             target = LocalDirectoryTarget(self.progress, target.pathname)
728         elif isinstance(target, TahoeMissingTarget):
729             writecap = mkdir(target.url)
730             target = TahoeDirectoryTarget(self.nodeurl, self.cache,
731                                           self.progress)
732             target.just_created(writecap)
733         # afterwards, or otherwise, it will be a directory
734         precondition(isinstance(target, DirectoryTargets), target)
735         return target
736
737     def build_targetmap(self, sources, target):
738         num_source_files = len([s for s in sources
739                                 if isinstance(s, FileSources)])
740         num_source_dirs = len([s for s in sources
741                                if isinstance(s, DirectorySources)])
742         self.progress("attaching sources to targets, "
743                       "%d files / %d dirs in root" %
744                       (num_source_files, num_source_dirs))
745
746         # this maps each target directory to a list of source files that need
747         # to be copied into it. All source files have names.
748         targetmap = defaultdict(list)
749
750         for s in sources:
751             if isinstance(s, FileSources):
752                 targetmap[target].append(s)
753             else:
754                 _assert(isinstance(s, DirectorySources), s)
755                 name = s.basename()
756                 if name is not None:
757                     # named sources get a new directory. see #2329
758                     new_target = target.get_child_target(name)
759                 else:
760                     # unnamed sources have their contents copied directly
761                     new_target = target
762                 self.assign_targets(targetmap, s, new_target)
763
764         self.progress("targets assigned, %s dirs, %s files" %
765                       (len(targetmap), self.count_files_to_copy(targetmap)))
766         return targetmap
767
768     def assign_targets(self, targetmap, source, target):
769         # copy everything in the source into the target
770         precondition(isinstance(source, DirectorySources), source)
771         for name, child in source.children.items():
772             if isinstance(child, DirectorySources):
773                 # we will need a target directory for this one
774                 subtarget = target.get_child_target(name)
775                 self.assign_targets(targetmap, child, subtarget)
776             else:
777                 _assert(isinstance(child, FileSources), child)
778                 targetmap[target].append(child)
779
780     def copy_to_targetmap(self, targetmap):
781         files_to_copy = self.count_files_to_copy(targetmap)
782         self.progress("starting copy, %d files, %d directories" %
783                       (files_to_copy, len(targetmap)))
784         files_copied = 0
785         targets_finished = 0
786
787         for target, sources in targetmap.items():
788             _assert(isinstance(target, DirectoryTargets), target)
789             for source in sources:
790                 _assert(isinstance(source, FileSources), source)
791                 self.copy_file_into_dir(source, source.basename(), target)
792                 files_copied += 1
793                 self.progress("%d/%d files, %d/%d directories" %
794                               (files_copied, files_to_copy,
795                                targets_finished, len(targetmap)))
796             target.set_children()
797             targets_finished += 1
798             self.progress("%d/%d directories" %
799                           (targets_finished, len(targetmap)))
800
801     def count_files_to_copy(self, targetmap):
802         return sum([len(sources) for sources in targetmap.values()])
803
804     def copy_file_into_dir(self, source, name, target):
805         precondition(isinstance(source, FileSources), source)
806         precondition(isinstance(target, DirectoryTargets), target)
807         precondition(isinstance(name, unicode), name)
808         if self.need_to_copy_bytes(source, target):
809             # if the target is a local directory, this will just write the
810             # bytes to disk. If it is a tahoe directory, it will upload the
811             # data, and stash the new filecap for a later set_children call.
812             f = source.open(self.caps_only)
813             target.put_file(name, f)
814             return
815         # otherwise we're copying tahoe to tahoe, and using immutable files,
816         # so we can just make a link
817         target.put_uri(name, source.bestcap())
818
819
820     def progress(self, message):
821         #print message
822         if self.progressfunc:
823             self.progressfunc(message)
824
825
826 def copy(options):
827     return Copier().do_copy(options)
828
829 # error cases that need improvement:
830 #  local-file-in-the-way
831 #   touch proposed
832 #   tahoe cp -r my:docs/proposed/denver.txt proposed/denver.txt
833 #  handling of unknown nodes
834
835 # things that maybe should be errors but aren't
836 #  local-dir-in-the-way
837 #   mkdir denver.txt
838 #   tahoe cp -r my:docs/proposed/denver.txt denver.txt
839 #   (creates denver.txt/denver.txt)
840
841 # error cases that look good:
842 #  tahoe cp -r my:docs/missing missing
843 #  disconnect servers
844 #   tahoe cp -r my:docs/missing missing  -> No JSON object could be decoded
845 #  tahoe-file-in-the-way (when we want to make a directory)
846 #   tahoe put README my:docs
847 #   tahoe cp -r docs/proposed my:docs/proposed