]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/scripts/tahoe_cp.py
cp: error on target-filename collisions, rather than overwrite
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / scripts / tahoe_cp.py
1
2 import os.path
3 import urllib
4 import simplejson
5 from collections import defaultdict
6 from cStringIO import StringIO
7 from twisted.python.failure import Failure
8 from allmydata.scripts.common import get_alias, escape_path, \
9                                      DefaultAliasMarker, TahoeError
10 from allmydata.scripts.common_http import do_http, HTTPError
11 from allmydata import uri
12 from allmydata.util import fileutil
13 from allmydata.util.fileutil import abspath_expanduser_unicode, precondition_abspath
14 from allmydata.util.encodingutil import unicode_to_url, listdir_unicode, quote_output, \
15     quote_local_unicode_path, to_str
16 from allmydata.util.assertutil import precondition, _assert
17
18
19 class MissingSourceError(TahoeError):
20     def __init__(self, name, quotefn=quote_output):
21         TahoeError.__init__(self, "No such file or directory %s" % quotefn(name))
22
23 class FilenameWithTrailingSlashError(TahoeError):
24     def __init__(self, name, quotefn=quote_output):
25         TahoeError.__init__(self, "source '%s' is not a directory, but ends with a slash" % quotefn(name))
26
27 class WeirdSourceError(TahoeError):
28     def __init__(self, absname):
29         quoted = quote_local_unicode_path(absname)
30         TahoeError.__init__(self, "source '%s' is neither a file nor a directory, I can't handle it" % quoted)
31
32 def GET_to_file(url):
33     resp = do_http("GET", url)
34     if resp.status == 200:
35         return resp
36     raise HTTPError("Error during GET", resp)
37
38 def GET_to_string(url):
39     f = GET_to_file(url)
40     return f.read()
41
42 def PUT(url, data):
43     resp = do_http("PUT", url, data)
44     if resp.status in (200, 201):
45         return resp.read()
46     raise HTTPError("Error during PUT", resp)
47
48 def POST(url, data):
49     resp = do_http("POST", url, data)
50     if resp.status in (200, 201):
51         return resp.read()
52     raise HTTPError("Error during POST", resp)
53
54 def mkdir(targeturl):
55     url = targeturl + "?t=mkdir"
56     resp = do_http("POST", url)
57     if resp.status in (200, 201):
58         return resp.read().strip()
59     raise HTTPError("Error during mkdir", resp)
60
61 def make_tahoe_subdirectory(nodeurl, parent_writecap, name):
62     url = nodeurl + "/".join(["uri",
63                               urllib.quote(parent_writecap),
64                               urllib.quote(unicode_to_url(name)),
65                               ]) + "?t=mkdir"
66     resp = do_http("POST", url)
67     if resp.status in (200, 201):
68         return resp.read().strip()
69     raise HTTPError("Error during mkdir", resp)
70
71
72 class LocalFileSource:
73     def __init__(self, pathname, basename):
74         precondition_abspath(pathname)
75         self.pathname = pathname
76         self._basename = basename
77
78     def basename(self):
79         return self._basename
80
81     def need_to_copy_bytes(self):
82         return True
83
84     def open(self, caps_only):
85         return open(self.pathname, "rb")
86
87 class LocalFileTarget:
88     def __init__(self, pathname):
89         precondition_abspath(pathname)
90         self.pathname = pathname
91
92     def put_file(self, inf):
93         fileutil.put_file(self.pathname, inf)
94
95 class LocalMissingTarget:
96     def __init__(self, pathname):
97         precondition_abspath(pathname)
98         self.pathname = pathname
99
100     def put_file(self, inf):
101         fileutil.put_file(self.pathname, inf)
102
103 class LocalDirectorySource:
104     def __init__(self, progressfunc, pathname, basename):
105         precondition_abspath(pathname)
106
107         self.progressfunc = progressfunc
108         self.pathname = pathname
109         self.children = None
110         self._basename = basename
111
112     def basename(self):
113         return self._basename
114
115     def populate(self, recurse):
116         if self.children is not None:
117             return
118         self.children = {}
119         children = listdir_unicode(self.pathname)
120         for i,n in enumerate(children):
121             self.progressfunc("examining %d of %d" % (i+1, len(children)))
122             pn = os.path.join(self.pathname, n)
123             if os.path.isdir(pn):
124                 child = LocalDirectorySource(self.progressfunc, pn, n)
125                 self.children[n] = child
126                 if recurse:
127                     child.populate(recurse=True)
128             elif os.path.isfile(pn):
129                 self.children[n] = LocalFileSource(pn, n)
130             else:
131                 # Could be dangling symlink; probably not copy-able.
132                 # TODO: output a warning
133                 pass
134
135 class LocalDirectoryTarget:
136     def __init__(self, progressfunc, pathname):
137         precondition_abspath(pathname)
138
139         self.progressfunc = progressfunc
140         self.pathname = pathname
141         self.children = None
142
143     def populate(self, recurse):
144         if self.children is not None:
145             return
146         self.children = {}
147         children = listdir_unicode(self.pathname)
148         for i,n in enumerate(children):
149             self.progressfunc("examining %d of %d" % (i+1, len(children)))
150             pn = os.path.join(self.pathname, n)
151             if os.path.isdir(pn):
152                 child = LocalDirectoryTarget(self.progressfunc, pn)
153                 self.children[n] = child
154                 if recurse:
155                     child.populate(recurse=True)
156             else:
157                 assert os.path.isfile(pn)
158                 self.children[n] = LocalFileTarget(pn)
159
160     def get_child_target(self, name):
161         precondition(isinstance(name, unicode), name)
162         precondition(len(name), name) # don't want ""
163         if self.children is None:
164             self.populate(recurse=False)
165         if name in self.children:
166             return self.children[name]
167         pathname = os.path.join(self.pathname, name)
168         os.makedirs(pathname)
169         child = LocalDirectoryTarget(self.progressfunc, pathname)
170         self.children[name] = child
171         return child
172
173     def put_file(self, name, inf):
174         precondition(isinstance(name, unicode), name)
175         pathname = os.path.join(self.pathname, name)
176         fileutil.put_file(pathname, inf)
177
178     def set_children(self):
179         pass
180
181
182 class TahoeFileSource:
183     def __init__(self, nodeurl, mutable, writecap, readcap, basename):
184         self.nodeurl = nodeurl
185         self.mutable = mutable
186         self.writecap = writecap
187         self.readcap = readcap
188         self._basename = basename # unicode, or None for raw filecaps
189
190     def basename(self):
191         return self._basename
192
193     def need_to_copy_bytes(self):
194         if self.mutable:
195             return True
196         return False
197
198     def open(self, caps_only):
199         if caps_only:
200             return StringIO(self.readcap)
201         url = self.nodeurl + "uri/" + urllib.quote(self.readcap)
202         return GET_to_file(url)
203
204     def bestcap(self):
205         return self.writecap or self.readcap
206
207 class TahoeFileTarget:
208     def __init__(self, nodeurl, mutable, writecap, readcap, url):
209         self.nodeurl = nodeurl
210         self.mutable = mutable
211         self.writecap = writecap
212         self.readcap = readcap
213         self.url = url
214
215     def put_file(self, inf):
216         # We want to replace this object in-place.
217         assert self.url
218         # our do_http() call currently requires a string or a filehandle with
219         # a real .seek
220         if not hasattr(inf, "seek"):
221             inf = inf.read()
222         PUT(self.url, inf)
223         # TODO: this always creates immutable files. We might want an option
224         # to always create mutable files, or to copy mutable files into new
225         # mutable files. ticket #835
226
227 class TahoeDirectorySource:
228     def __init__(self, nodeurl, cache, progressfunc, basename):
229         self.nodeurl = nodeurl
230         self.cache = cache
231         self.progressfunc = progressfunc
232         self._basename = basename # unicode, or None for raw dircaps
233
234     def basename(self):
235         return self._basename
236
237     def init_from_grid(self, writecap, readcap):
238         self.writecap = writecap
239         self.readcap = readcap
240         bestcap = writecap or readcap
241         url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
242         resp = do_http("GET", url + "?t=json")
243         if resp.status != 200:
244             raise HTTPError("Error examining source directory", resp)
245         parsed = simplejson.loads(resp.read())
246         nodetype, d = parsed
247         assert nodetype == "dirnode"
248         self.mutable = d.get("mutable", False) # older nodes don't provide it
249         self.children_d = dict( [(unicode(name),value)
250                                  for (name,value)
251                                  in d["children"].iteritems()] )
252         self.children = None
253
254     def init_from_parsed(self, parsed):
255         nodetype, d = parsed
256         self.writecap = to_str(d.get("rw_uri"))
257         self.readcap = to_str(d.get("ro_uri"))
258         self.mutable = d.get("mutable", False) # older nodes don't provide it
259         self.children_d = dict( [(unicode(name),value)
260                                  for (name,value)
261                                  in d["children"].iteritems()] )
262         self.children = None
263
264     def populate(self, recurse):
265         if self.children is not None:
266             return
267         self.children = {}
268         for i,(name, data) in enumerate(self.children_d.items()):
269             self.progressfunc("examining %d of %d" % (i+1, len(self.children_d)))
270             if data[0] == "filenode":
271                 mutable = data[1].get("mutable", False)
272                 writecap = to_str(data[1].get("rw_uri"))
273                 readcap = to_str(data[1].get("ro_uri"))
274                 self.children[name] = TahoeFileSource(self.nodeurl, mutable,
275                                                       writecap, readcap, name)
276             elif data[0] == "dirnode":
277                 writecap = to_str(data[1].get("rw_uri"))
278                 readcap = to_str(data[1].get("ro_uri"))
279                 if writecap and writecap in self.cache:
280                     child = self.cache[writecap]
281                 elif readcap and readcap in self.cache:
282                     child = self.cache[readcap]
283                 else:
284                     child = TahoeDirectorySource(self.nodeurl, self.cache,
285                                                  self.progressfunc, name)
286                     child.init_from_grid(writecap, readcap)
287                     if writecap:
288                         self.cache[writecap] = child
289                     if readcap:
290                         self.cache[readcap] = child
291                     if recurse:
292                         child.populate(recurse=True)
293                 self.children[name] = child
294             else:
295                 # TODO: there should be an option to skip unknown nodes.
296                 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
297                                  "You probably need to use a later version of "
298                                  "Tahoe-LAFS to copy this directory.")
299
300 class TahoeMissingTarget:
301     def __init__(self, url):
302         self.url = url
303
304     def put_file(self, inf):
305         # We want to replace this object in-place.
306         if not hasattr(inf, "seek"):
307             inf = inf.read()
308         PUT(self.url, inf)
309         # TODO: this always creates immutable files. We might want an option
310         # to always create mutable files, or to copy mutable files into new
311         # mutable files.
312
313     def put_uri(self, filecap):
314         # I'm not sure this will always work
315         return PUT(self.url + "?t=uri", filecap)
316
317 class TahoeDirectoryTarget:
318     def __init__(self, nodeurl, cache, progressfunc):
319         self.nodeurl = nodeurl
320         self.cache = cache
321         self.progressfunc = progressfunc
322         self.new_children = {}
323
324     def init_from_parsed(self, parsed):
325         nodetype, d = parsed
326         self.writecap = to_str(d.get("rw_uri"))
327         self.readcap = to_str(d.get("ro_uri"))
328         self.mutable = d.get("mutable", False) # older nodes don't provide it
329         self.children_d = dict( [(unicode(name),value)
330                                  for (name,value)
331                                  in d["children"].iteritems()] )
332         self.children = None
333
334     def init_from_grid(self, writecap, readcap):
335         self.writecap = writecap
336         self.readcap = readcap
337         bestcap = writecap or readcap
338         url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
339         resp = do_http("GET", url + "?t=json")
340         if resp.status != 200:
341             raise HTTPError("Error examining target directory", resp)
342         parsed = simplejson.loads(resp.read())
343         nodetype, d = parsed
344         assert nodetype == "dirnode"
345         self.mutable = d.get("mutable", False) # older nodes don't provide it
346         self.children_d = dict( [(unicode(name),value)
347                                  for (name,value)
348                                  in d["children"].iteritems()] )
349         self.children = None
350
351     def just_created(self, writecap):
352         # TODO: maybe integrate this with the constructor
353         self.writecap = writecap
354         self.readcap = uri.from_string(writecap).get_readonly().to_string()
355         self.mutable = True
356         self.children_d = {}
357         self.children = {}
358
359     def populate(self, recurse):
360         if self.children is not None:
361             return
362         self.children = {}
363         for i,(name, data) in enumerate(self.children_d.items()):
364             self.progressfunc("examining %d of %d" % (i+1, len(self.children_d)))
365             if data[0] == "filenode":
366                 mutable = data[1].get("mutable", False)
367                 writecap = to_str(data[1].get("rw_uri"))
368                 readcap = to_str(data[1].get("ro_uri"))
369                 url = None
370                 if self.writecap:
371                     url = self.nodeurl + "/".join(["uri",
372                                                    urllib.quote(self.writecap),
373                                                    urllib.quote(unicode_to_url(name))])
374                 self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
375                                                       writecap, readcap, url)
376             elif data[0] == "dirnode":
377                 writecap = to_str(data[1].get("rw_uri"))
378                 readcap = to_str(data[1].get("ro_uri"))
379                 if writecap and writecap in self.cache:
380                     child = self.cache[writecap]
381                 elif readcap and readcap in self.cache:
382                     child = self.cache[readcap]
383                 else:
384                     child = TahoeDirectoryTarget(self.nodeurl, self.cache,
385                                                  self.progressfunc)
386                     child.init_from_grid(writecap, readcap)
387                     if writecap:
388                         self.cache[writecap] = child
389                     if readcap:
390                         self.cache[readcap] = child
391                     if recurse:
392                         child.populate(recurse=True)
393                 self.children[name] = child
394             else:
395                 # TODO: there should be an option to skip unknown nodes.
396                 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
397                                  "You probably need to use a later version of "
398                                  "Tahoe-LAFS to copy this directory.")
399
400     def get_child_target(self, name):
401         # return a new target for a named subdirectory of this dir
402         precondition(isinstance(name, unicode), name)
403         if self.children is None:
404             self.populate(recurse=False)
405         if name in self.children:
406             return self.children[name]
407         writecap = make_tahoe_subdirectory(self.nodeurl, self.writecap, name)
408         child = TahoeDirectoryTarget(self.nodeurl, self.cache,
409                                      self.progressfunc)
410         child.just_created(writecap)
411         self.children[name] = child
412         return child
413
414     def put_file(self, name, inf):
415         precondition(isinstance(name, unicode), name)
416         url = self.nodeurl + "uri"
417         if not hasattr(inf, "seek"):
418             inf = inf.read()
419
420         if self.children is None:
421             self.populate(recurse=False)
422
423         # Check to see if we already have a mutable file by this name.
424         # If so, overwrite that file in place.
425         if name in self.children and self.children[name].mutable:
426             self.children[name].put_file(inf)
427         else:
428             filecap = PUT(url, inf)
429             # TODO: this always creates immutable files. We might want an option
430             # to always create mutable files, or to copy mutable files into new
431             # mutable files.
432             self.new_children[name] = filecap
433
434     def put_uri(self, name, filecap):
435         precondition(isinstance(name, unicode), name)
436         self.new_children[name] = filecap
437
438     def set_children(self):
439         if not self.new_children:
440             return
441         url = (self.nodeurl + "uri/" + urllib.quote(self.writecap)
442                + "?t=set_children")
443         set_data = {}
444         for (name, filecap) in self.new_children.items():
445             # it just so happens that ?t=set_children will accept both file
446             # read-caps and write-caps as ['rw_uri'], and will handle either
447             # correctly. So don't bother trying to figure out whether the one
448             # we have is read-only or read-write.
449             # TODO: think about how this affects forward-compatibility for
450             # unknown caps
451             set_data[name] = ["filenode", {"rw_uri": filecap}]
452         body = simplejson.dumps(set_data)
453         POST(url, body)
454
455 FileSources = (LocalFileSource, TahoeFileSource)
456 DirectorySources = (LocalDirectorySource, TahoeDirectorySource)
457 FileTargets = (LocalFileTarget, TahoeFileTarget)
458 DirectoryTargets = (LocalDirectoryTarget, TahoeDirectoryTarget)
459 MissingTargets = (LocalMissingTarget, TahoeMissingTarget)
460
461 class Copier:
462
463     def do_copy(self, options, progressfunc=None):
464         if options['quiet']:
465             verbosity = 0
466         elif options['verbose']:
467             verbosity = 2
468         else:
469             verbosity = 1
470
471         nodeurl = options['node-url']
472         if nodeurl[-1] != "/":
473             nodeurl += "/"
474         self.nodeurl = nodeurl
475         self.progressfunc = progressfunc
476         self.options = options
477         self.aliases = options.aliases
478         self.verbosity = verbosity
479         self.stdout = options.stdout
480         self.stderr = options.stderr
481         if verbosity >= 2 and not self.progressfunc:
482             def progress(message):
483                 print >>self.stderr, message
484             self.progressfunc = progress
485         self.caps_only = options["caps-only"]
486         self.cache = {}
487         try:
488             status = self.try_copy()
489             return status
490         except TahoeError, te:
491             if verbosity >= 2:
492                 Failure().printTraceback(self.stderr)
493                 print >>self.stderr
494             te.display(self.stderr)
495             return 1
496
497     def try_copy(self):
498         """
499         All usage errors (except for target filename collisions) are caught
500         here, not in a subroutine. This bottoms out in copy_file_to_file() or
501         copy_things_to_directory().
502         """
503         source_specs = self.options.sources
504         destination_spec = self.options.destination
505         recursive = self.options["recursive"]
506
507         target = self.get_target_info(destination_spec)
508         precondition(isinstance(target, FileTargets + DirectoryTargets + MissingTargets), target)
509         target_has_trailing_slash = destination_spec.endswith("/")
510
511         sources = [] # list of source objects
512         for ss in source_specs:
513             try:
514                 si = self.get_source_info(ss)
515             except FilenameWithTrailingSlashError as e:
516                 self.to_stderr(str(e))
517                 return 1
518             precondition(isinstance(si, FileSources + DirectorySources), si)
519             sources.append(si)
520
521         # if any source is a directory, must use -r
522         # if target is missing:
523         #    if source is a single file, target will be a file
524         #    else target will be a directory, so mkdir it
525         # if there are multiple sources, target must be a dir
526         # if target is a file, source must be a single file
527         # if target is directory, sources must be named or a dir
528
529         have_source_dirs = any([isinstance(s, DirectorySources)
530                                 for s in sources])
531         if have_source_dirs and not recursive:
532             # 'cp dir target' without -r: error
533             self.to_stderr("cannot copy directories without --recursive")
534             return 1
535         del recursive # -r is only used for signalling errors
536
537         if isinstance(target, FileTargets):
538             target_is_file = True
539         elif isinstance(target, DirectoryTargets):
540             target_is_file = False
541         else: # isinstance(target, MissingTargets)
542             if len(sources) == 1 and isinstance(sources[0], FileSources):
543                 target_is_file = True
544             else:
545                 target_is_file = False
546
547         if target_is_file and target_has_trailing_slash:
548             self.to_stderr("target is not a directory, but ends with a slash")
549             return 1
550
551         if len(sources) > 1 and target_is_file:
552             self.to_stderr("copying multiple things requires target be a directory")
553             return 1
554
555         if target_is_file:
556             _assert(len(sources) == 1, sources)
557             if not isinstance(sources[0], FileSources):
558                 # 'cp -r dir existingfile': error
559                 self.to_stderr("cannot copy directory into a file")
560                 return 1
561             return self.copy_file_to_file(sources[0], target)
562
563         # else target is a directory, so each source must be one of:
564         # * a named file (copied to a new file under the target)
565         # * a named directory (causes a new directory of the same name to be
566         #   created under the target, then the contents of the source are
567         #   copied into that directory)
568         # * an unnamed directory (the contents of the source are copied into
569         #   the target, without a new directory being made)
570         #
571         # If any source is an unnamed file, throw an error, since we have no
572         # way to name the output file.
573         _assert(isinstance(target, DirectoryTargets + MissingTargets), target)
574
575         for source in sources:
576             if isinstance(source, FileSources) and source.basename() is None:
577                 self.to_stderr("when copying into a directory, all source files must have names, but %s is unnamed" % quote_output(source_specs[0]))
578                 return 1
579         return self.copy_things_to_directory(sources, target)
580
581     def to_stderr(self, text):
582         print >>self.stderr, text
583
584     # FIXME reduce the amount of near-duplicate code between get_target_info
585     # and get_source_info.
586
587     def get_target_info(self, destination_spec):
588         precondition(isinstance(destination_spec, unicode), destination_spec)
589         rootcap, path_utf8 = get_alias(self.aliases, destination_spec, None)
590         path = path_utf8.decode("utf-8")
591         if rootcap == DefaultAliasMarker:
592             # no alias, so this is a local file
593             pathname = abspath_expanduser_unicode(path)
594             if not os.path.exists(pathname):
595                 t = LocalMissingTarget(pathname)
596             elif os.path.isdir(pathname):
597                 t = LocalDirectoryTarget(self.progress, pathname)
598             else:
599                 # TODO: should this be _assert? what happens if the target is
600                 # a special file?
601                 assert os.path.isfile(pathname), pathname
602                 t = LocalFileTarget(pathname) # non-empty
603         else:
604             # this is a tahoe object
605             url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
606             if path:
607                 url += "/" + escape_path(path)
608
609             resp = do_http("GET", url + "?t=json")
610             if resp.status == 404:
611                 # doesn't exist yet
612                 t = TahoeMissingTarget(url)
613             elif resp.status == 200:
614                 parsed = simplejson.loads(resp.read())
615                 nodetype, d = parsed
616                 if nodetype == "dirnode":
617                     t = TahoeDirectoryTarget(self.nodeurl, self.cache,
618                                              self.progress)
619                     t.init_from_parsed(parsed)
620                 else:
621                     writecap = to_str(d.get("rw_uri"))
622                     readcap = to_str(d.get("ro_uri"))
623                     mutable = d.get("mutable", False)
624                     t = TahoeFileTarget(self.nodeurl, mutable,
625                                         writecap, readcap, url)
626             else:
627                 raise HTTPError("Error examining target %s"
628                                  % quote_output(destination_spec), resp)
629         return t
630
631     def get_source_info(self, source_spec):
632         """
633         This turns an argv string into a (Local|Tahoe)(File|Directory)Source.
634         """
635         precondition(isinstance(source_spec, unicode), source_spec)
636         rootcap, path_utf8 = get_alias(self.aliases, source_spec, None)
637         path = path_utf8.decode("utf-8")
638         # any trailing slash is removed in abspath_expanduser_unicode(), so
639         # make a note of it here, to throw an error later
640         had_trailing_slash = path.endswith("/")
641         if rootcap == DefaultAliasMarker:
642             # no alias, so this is a local file
643             pathname = abspath_expanduser_unicode(path)
644             name = os.path.basename(pathname)
645             if not os.path.exists(pathname):
646                 raise MissingSourceError(source_spec, quotefn=quote_local_unicode_path)
647             if os.path.isdir(pathname):
648                 t = LocalDirectorySource(self.progress, pathname, name)
649             else:
650                 if had_trailing_slash:
651                     raise FilenameWithTrailingSlashError(source_spec,
652                                                          quotefn=quote_local_unicode_path)
653                 if not os.path.isfile(pathname):
654                     raise WeirdSourceError(pathname)
655                 t = LocalFileSource(pathname, name) # non-empty
656         else:
657             # this is a tahoe object
658             url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
659             name = None
660             if path:
661                 if path.endswith("/"):
662                     path = path[:-1]
663                 url += "/" + escape_path(path)
664                 last_slash = path.rfind(u"/")
665                 name = path
666                 if last_slash != -1:
667                     name = path[last_slash+1:]
668
669             resp = do_http("GET", url + "?t=json")
670             if resp.status == 404:
671                 raise MissingSourceError(source_spec)
672             elif resp.status != 200:
673                 raise HTTPError("Error examining source %s" % quote_output(source_spec),
674                                 resp)
675             parsed = simplejson.loads(resp.read())
676             nodetype, d = parsed
677             if nodetype == "dirnode":
678                 t = TahoeDirectorySource(self.nodeurl, self.cache,
679                                          self.progress, name)
680                 t.init_from_parsed(parsed)
681             else:
682                 if had_trailing_slash:
683                     raise FilenameWithTrailingSlashError(source_spec)
684                 writecap = to_str(d.get("rw_uri"))
685                 readcap = to_str(d.get("ro_uri"))
686                 mutable = d.get("mutable", False) # older nodes don't provide it
687                 t = TahoeFileSource(self.nodeurl, mutable, writecap, readcap, name)
688         return t
689
690
691     def need_to_copy_bytes(self, source, target):
692         if source.need_to_copy_bytes:
693             # mutable tahoe files, and local files
694             return True
695         if isinstance(target, (LocalFileTarget, LocalDirectoryTarget)):
696             return True
697         return False
698
699     def announce_success(self, msg):
700         if self.verbosity >= 1:
701             print >>self.stdout, "Success: %s" % msg
702         return 0
703
704     def copy_file_to_file(self, source, target):
705         precondition(isinstance(source, FileSources), source)
706         precondition(isinstance(target, FileTargets + MissingTargets), target)
707         if self.need_to_copy_bytes(source, target):
708             # if the target is a local directory, this will just write the
709             # bytes to disk. If it is a tahoe directory, it will upload the
710             # data, and stash the new filecap for a later set_children call.
711             f = source.open(self.caps_only)
712             target.put_file(f)
713             return self.announce_success("file copied")
714         # otherwise we're copying tahoe to tahoe, and using immutable files,
715         # so we can just make a link. TODO: this probably won't always work:
716         # need to enumerate the cases and analyze them.
717         target.put_uri(source.bestcap())
718         return self.announce_success("file linked")
719
720     def copy_things_to_directory(self, sources, target):
721         # step one: if the target is missing, we should mkdir it
722         target = self.maybe_create_target(target)
723         target.populate(recurse=False)
724
725         # step two: scan any source dirs, recursively, to find children
726         for s in sources:
727             if isinstance(s, DirectorySources):
728                 s.populate(recurse=True)
729             if isinstance(s, FileSources):
730                 # each source must have a name, or be a directory
731                 _assert(s.basename() is not None, s)
732
733         # step three: find a target for each source node, creating
734         # directories as necessary. 'targetmap' is a dictionary that uses
735         # target Directory instances as keys, and has values of (name:
736         # sourceobject) dicts for all the files that need to wind up there.
737         targetmap = self.build_targetmap(sources, target)
738
739         # target name collisions are an error
740         collisions = []
741         for target, sources in targetmap.items():
742             target_names = {}
743             for source in sources:
744                 name = source.basename()
745                 if name in target_names:
746                     collisions.append((target, source, target_names[name]))
747                 else:
748                     target_names[name] = source
749         if collisions:
750             self.to_stderr("cannot copy multiple files with the same name into the same target directory")
751             # I'm not sure how to show where the collisions are coming from
752             #for (target, source1, source2) in collisions:
753             #    self.to_stderr(source1.basename())
754             return 1
755
756         # step four: walk through the list of targets. For each one, copy all
757         # the files. If the target is a TahoeDirectory, upload and create
758         # read-caps, then do a set_children to the target directory.
759         self.copy_to_targetmap(targetmap)
760
761         return self.announce_success("files copied")
762
763     def maybe_create_target(self, target):
764         if isinstance(target, LocalMissingTarget):
765             os.makedirs(target.pathname)
766             target = LocalDirectoryTarget(self.progress, target.pathname)
767         elif isinstance(target, TahoeMissingTarget):
768             writecap = mkdir(target.url)
769             target = TahoeDirectoryTarget(self.nodeurl, self.cache,
770                                           self.progress)
771             target.just_created(writecap)
772         # afterwards, or otherwise, it will be a directory
773         precondition(isinstance(target, DirectoryTargets), target)
774         return target
775
776     def build_targetmap(self, sources, target):
777         num_source_files = len([s for s in sources
778                                 if isinstance(s, FileSources)])
779         num_source_dirs = len([s for s in sources
780                                if isinstance(s, DirectorySources)])
781         self.progress("attaching sources to targets, "
782                       "%d files / %d dirs in root" %
783                       (num_source_files, num_source_dirs))
784
785         # this maps each target directory to a list of source files that need
786         # to be copied into it. All source files have names.
787         targetmap = defaultdict(list)
788
789         for s in sources:
790             if isinstance(s, FileSources):
791                 targetmap[target].append(s)
792             else:
793                 _assert(isinstance(s, DirectorySources), s)
794                 name = s.basename()
795                 if name is not None:
796                     # named sources get a new directory. see #2329
797                     new_target = target.get_child_target(name)
798                 else:
799                     # unnamed sources have their contents copied directly
800                     new_target = target
801                 self.assign_targets(targetmap, s, new_target)
802
803         self.progress("targets assigned, %s dirs, %s files" %
804                       (len(targetmap), self.count_files_to_copy(targetmap)))
805         return targetmap
806
807     def assign_targets(self, targetmap, source, target):
808         # copy everything in the source into the target
809         precondition(isinstance(source, DirectorySources), source)
810         for name, child in source.children.items():
811             if isinstance(child, DirectorySources):
812                 # we will need a target directory for this one
813                 subtarget = target.get_child_target(name)
814                 self.assign_targets(targetmap, child, subtarget)
815             else:
816                 _assert(isinstance(child, FileSources), child)
817                 targetmap[target].append(child)
818
819     def copy_to_targetmap(self, targetmap):
820         files_to_copy = self.count_files_to_copy(targetmap)
821         self.progress("starting copy, %d files, %d directories" %
822                       (files_to_copy, len(targetmap)))
823         files_copied = 0
824         targets_finished = 0
825
826         for target, sources in targetmap.items():
827             _assert(isinstance(target, DirectoryTargets), target)
828             for source in sources:
829                 _assert(isinstance(source, FileSources), source)
830                 self.copy_file_into_dir(source, source.basename(), target)
831                 files_copied += 1
832                 self.progress("%d/%d files, %d/%d directories" %
833                               (files_copied, files_to_copy,
834                                targets_finished, len(targetmap)))
835             target.set_children()
836             targets_finished += 1
837             self.progress("%d/%d directories" %
838                           (targets_finished, len(targetmap)))
839
840     def count_files_to_copy(self, targetmap):
841         return sum([len(sources) for sources in targetmap.values()])
842
843     def copy_file_into_dir(self, source, name, target):
844         precondition(isinstance(source, FileSources), source)
845         precondition(isinstance(target, DirectoryTargets), target)
846         precondition(isinstance(name, unicode), name)
847         if self.need_to_copy_bytes(source, target):
848             # if the target is a local directory, this will just write the
849             # bytes to disk. If it is a tahoe directory, it will upload the
850             # data, and stash the new filecap for a later set_children call.
851             f = source.open(self.caps_only)
852             target.put_file(name, f)
853             return
854         # otherwise we're copying tahoe to tahoe, and using immutable files,
855         # so we can just make a link
856         target.put_uri(name, source.bestcap())
857
858
859     def progress(self, message):
860         #print message
861         if self.progressfunc:
862             self.progressfunc(message)
863
864
865 def copy(options):
866     return Copier().do_copy(options)
867
868 # error cases that need improvement:
869 #  local-file-in-the-way
870 #   touch proposed
871 #   tahoe cp -r my:docs/proposed/denver.txt proposed/denver.txt
872 #  handling of unknown nodes
873
874 # things that maybe should be errors but aren't
875 #  local-dir-in-the-way
876 #   mkdir denver.txt
877 #   tahoe cp -r my:docs/proposed/denver.txt denver.txt
878 #   (creates denver.txt/denver.txt)
879
880 # error cases that look good:
881 #  tahoe cp -r my:docs/missing missing
882 #  disconnect servers
883 #   tahoe cp -r my:docs/missing missing  -> No JSON object could be decoded
884 #  tahoe-file-in-the-way (when we want to make a directory)
885 #   tahoe put README my:docs
886 #   tahoe cp -r docs/proposed my:docs/proposed