]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/scripts/tahoe_cp.py
tahoe_cp: minor cleanups, no behavior changes
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / scripts / tahoe_cp.py
1
2 import os.path
3 import urllib
4 import simplejson
5 from collections import defaultdict
6 from cStringIO import StringIO
7 from twisted.python.failure import Failure
8 from allmydata.scripts.common import get_alias, escape_path, \
9                                      DefaultAliasMarker, TahoeError
10 from allmydata.scripts.common_http import do_http, HTTPError
11 from allmydata import uri
12 from allmydata.util import fileutil
13 from allmydata.util.fileutil import abspath_expanduser_unicode, precondition_abspath
14 from allmydata.util.encodingutil import unicode_to_url, listdir_unicode, quote_output, \
15     quote_local_unicode_path, to_str
16 from allmydata.util.assertutil import precondition, _assert
17
18
19 class MissingSourceError(TahoeError):
20     def __init__(self, name, quotefn=quote_output):
21         TahoeError.__init__(self, "No such file or directory %s" % quotefn(name))
22
23
24 def GET_to_file(url):
25     resp = do_http("GET", url)
26     if resp.status == 200:
27         return resp
28     raise HTTPError("Error during GET", resp)
29
30 def GET_to_string(url):
31     f = GET_to_file(url)
32     return f.read()
33
34 def PUT(url, data):
35     resp = do_http("PUT", url, data)
36     if resp.status in (200, 201):
37         return resp.read()
38     raise HTTPError("Error during PUT", resp)
39
40 def POST(url, data):
41     resp = do_http("POST", url, data)
42     if resp.status in (200, 201):
43         return resp.read()
44     raise HTTPError("Error during POST", resp)
45
46 def mkdir(targeturl):
47     url = targeturl + "?t=mkdir"
48     resp = do_http("POST", url)
49     if resp.status in (200, 201):
50         return resp.read().strip()
51     raise HTTPError("Error during mkdir", resp)
52
53 def make_tahoe_subdirectory(nodeurl, parent_writecap, name):
54     url = nodeurl + "/".join(["uri",
55                               urllib.quote(parent_writecap),
56                               urllib.quote(unicode_to_url(name)),
57                               ]) + "?t=mkdir"
58     resp = do_http("POST", url)
59     if resp.status in (200, 201):
60         return resp.read().strip()
61     raise HTTPError("Error during mkdir", resp)
62
63
64 class LocalFileSource:
65     def __init__(self, pathname, basename):
66         precondition_abspath(pathname)
67         self.pathname = pathname
68         self._basename = basename
69
70     def basename(self):
71         return self._basename
72
73     def need_to_copy_bytes(self):
74         return True
75
76     def open(self, caps_only):
77         return open(self.pathname, "rb")
78
79 class LocalFileTarget:
80     def __init__(self, pathname):
81         precondition_abspath(pathname)
82         self.pathname = pathname
83
84     def put_file(self, inf):
85         fileutil.put_file(self.pathname, inf)
86
87 class LocalMissingTarget:
88     def __init__(self, pathname):
89         precondition_abspath(pathname)
90         self.pathname = pathname
91
92     def put_file(self, inf):
93         fileutil.put_file(self.pathname, inf)
94
95 class LocalDirectorySource:
96     def __init__(self, progressfunc, pathname, basename):
97         precondition_abspath(pathname)
98
99         self.progressfunc = progressfunc
100         self.pathname = pathname
101         self.children = None
102         self._basename = basename
103
104     def basename(self):
105         return self._basename
106
107     def populate(self, recurse):
108         if self.children is not None:
109             return
110         self.children = {}
111         children = listdir_unicode(self.pathname)
112         for i,n in enumerate(children):
113             self.progressfunc("examining %d of %d" % (i+1, len(children)))
114             pn = os.path.join(self.pathname, n)
115             if os.path.isdir(pn):
116                 child = LocalDirectorySource(self.progressfunc, pn, n)
117                 self.children[n] = child
118                 if recurse:
119                     child.populate(recurse=True)
120             elif os.path.isfile(pn):
121                 self.children[n] = LocalFileSource(pn, n)
122             else:
123                 # Could be dangling symlink; probably not copy-able.
124                 # TODO: output a warning
125                 pass
126
127 class LocalDirectoryTarget:
128     def __init__(self, progressfunc, pathname):
129         precondition_abspath(pathname)
130
131         self.progressfunc = progressfunc
132         self.pathname = pathname
133         self.children = None
134
135     def populate(self, recurse):
136         if self.children is not None:
137             return
138         self.children = {}
139         children = listdir_unicode(self.pathname)
140         for i,n in enumerate(children):
141             self.progressfunc("examining %d of %d" % (i+1, len(children)))
142             pn = os.path.join(self.pathname, n)
143             if os.path.isdir(pn):
144                 child = LocalDirectoryTarget(self.progressfunc, pn)
145                 self.children[n] = child
146                 if recurse:
147                     child.populate(recurse=True)
148             else:
149                 assert os.path.isfile(pn)
150                 self.children[n] = LocalFileTarget(pn)
151
152     def get_child_target(self, name):
153         precondition(isinstance(name, unicode), name)
154         if self.children is None:
155             self.populate(recurse=False)
156         if name in self.children:
157             return self.children[name]
158         pathname = os.path.join(self.pathname, name)
159         os.makedirs(pathname)
160         child = LocalDirectoryTarget(self.progressfunc, pathname)
161         self.children[name] = child
162         return child
163
164     def put_file(self, name, inf):
165         precondition(isinstance(name, unicode), name)
166         pathname = os.path.join(self.pathname, name)
167         fileutil.put_file(pathname, inf)
168
169     def set_children(self):
170         pass
171
172
173 class TahoeFileSource:
174     def __init__(self, nodeurl, mutable, writecap, readcap, basename):
175         self.nodeurl = nodeurl
176         self.mutable = mutable
177         self.writecap = writecap
178         self.readcap = readcap
179         self._basename = basename # unicode, or None for raw filecaps
180
181     def basename(self):
182         return self._basename
183
184     def need_to_copy_bytes(self):
185         if self.mutable:
186             return True
187         return False
188
189     def open(self, caps_only):
190         if caps_only:
191             return StringIO(self.readcap)
192         url = self.nodeurl + "uri/" + urllib.quote(self.readcap)
193         return GET_to_file(url)
194
195     def bestcap(self):
196         return self.writecap or self.readcap
197
198 class TahoeFileTarget:
199     def __init__(self, nodeurl, mutable, writecap, readcap, url):
200         self.nodeurl = nodeurl
201         self.mutable = mutable
202         self.writecap = writecap
203         self.readcap = readcap
204         self.url = url
205
206     def put_file(self, inf):
207         # We want to replace this object in-place.
208         assert self.url
209         # our do_http() call currently requires a string or a filehandle with
210         # a real .seek
211         if not hasattr(inf, "seek"):
212             inf = inf.read()
213         PUT(self.url, inf)
214         # TODO: this always creates immutable files. We might want an option
215         # to always create mutable files, or to copy mutable files into new
216         # mutable files. ticket #835
217
218 class TahoeDirectorySource:
219     def __init__(self, nodeurl, cache, progressfunc, basename):
220         self.nodeurl = nodeurl
221         self.cache = cache
222         self.progressfunc = progressfunc
223         self._basename = basename # unicode, or None for raw dircaps
224
225     def basename(self):
226         return self._basename
227
228     def init_from_grid(self, writecap, readcap):
229         self.writecap = writecap
230         self.readcap = readcap
231         bestcap = writecap or readcap
232         url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
233         resp = do_http("GET", url + "?t=json")
234         if resp.status != 200:
235             raise HTTPError("Error examining source directory", resp)
236         parsed = simplejson.loads(resp.read())
237         nodetype, d = parsed
238         assert nodetype == "dirnode"
239         self.mutable = d.get("mutable", False) # older nodes don't provide it
240         self.children_d = dict( [(unicode(name),value)
241                                  for (name,value)
242                                  in d["children"].iteritems()] )
243         self.children = None
244
245     def init_from_parsed(self, parsed):
246         nodetype, d = parsed
247         self.writecap = to_str(d.get("rw_uri"))
248         self.readcap = to_str(d.get("ro_uri"))
249         self.mutable = d.get("mutable", False) # older nodes don't provide it
250         self.children_d = dict( [(unicode(name),value)
251                                  for (name,value)
252                                  in d["children"].iteritems()] )
253         self.children = None
254
255     def populate(self, recurse):
256         if self.children is not None:
257             return
258         self.children = {}
259         for i,(name, data) in enumerate(self.children_d.items()):
260             self.progressfunc("examining %d of %d" % (i+1, len(self.children_d)))
261             if data[0] == "filenode":
262                 mutable = data[1].get("mutable", False)
263                 writecap = to_str(data[1].get("rw_uri"))
264                 readcap = to_str(data[1].get("ro_uri"))
265                 self.children[name] = TahoeFileSource(self.nodeurl, mutable,
266                                                       writecap, readcap, name)
267             elif data[0] == "dirnode":
268                 writecap = to_str(data[1].get("rw_uri"))
269                 readcap = to_str(data[1].get("ro_uri"))
270                 if writecap and writecap in self.cache:
271                     child = self.cache[writecap]
272                 elif readcap and readcap in self.cache:
273                     child = self.cache[readcap]
274                 else:
275                     child = TahoeDirectorySource(self.nodeurl, self.cache,
276                                                  self.progressfunc, name)
277                     child.init_from_grid(writecap, readcap)
278                     if writecap:
279                         self.cache[writecap] = child
280                     if readcap:
281                         self.cache[readcap] = child
282                     if recurse:
283                         child.populate(recurse=True)
284                 self.children[name] = child
285             else:
286                 # TODO: there should be an option to skip unknown nodes.
287                 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
288                                  "You probably need to use a later version of "
289                                  "Tahoe-LAFS to copy this directory.")
290
291 class TahoeMissingTarget:
292     def __init__(self, url):
293         self.url = url
294
295     def put_file(self, inf):
296         # We want to replace this object in-place.
297         if not hasattr(inf, "seek"):
298             inf = inf.read()
299         PUT(self.url, inf)
300         # TODO: this always creates immutable files. We might want an option
301         # to always create mutable files, or to copy mutable files into new
302         # mutable files.
303
304     def put_uri(self, filecap):
305         # I'm not sure this will always work
306         return PUT(self.url + "?t=uri", filecap)
307
308 class TahoeDirectoryTarget:
309     def __init__(self, nodeurl, cache, progressfunc):
310         self.nodeurl = nodeurl
311         self.cache = cache
312         self.progressfunc = progressfunc
313         self.new_children = {}
314
315     def init_from_parsed(self, parsed):
316         nodetype, d = parsed
317         self.writecap = to_str(d.get("rw_uri"))
318         self.readcap = to_str(d.get("ro_uri"))
319         self.mutable = d.get("mutable", False) # older nodes don't provide it
320         self.children_d = dict( [(unicode(name),value)
321                                  for (name,value)
322                                  in d["children"].iteritems()] )
323         self.children = None
324
325     def init_from_grid(self, writecap, readcap):
326         self.writecap = writecap
327         self.readcap = readcap
328         bestcap = writecap or readcap
329         url = self.nodeurl + "uri/%s" % urllib.quote(bestcap)
330         resp = do_http("GET", url + "?t=json")
331         if resp.status != 200:
332             raise HTTPError("Error examining target directory", resp)
333         parsed = simplejson.loads(resp.read())
334         nodetype, d = parsed
335         assert nodetype == "dirnode"
336         self.mutable = d.get("mutable", False) # older nodes don't provide it
337         self.children_d = dict( [(unicode(name),value)
338                                  for (name,value)
339                                  in d["children"].iteritems()] )
340         self.children = None
341
342     def just_created(self, writecap):
343         # TODO: maybe integrate this with the constructor
344         self.writecap = writecap
345         self.readcap = uri.from_string(writecap).get_readonly().to_string()
346         self.mutable = True
347         self.children_d = {}
348         self.children = {}
349
350     def populate(self, recurse):
351         if self.children is not None:
352             return
353         self.children = {}
354         for i,(name, data) in enumerate(self.children_d.items()):
355             self.progressfunc("examining %d of %d" % (i+1, len(self.children_d)))
356             if data[0] == "filenode":
357                 mutable = data[1].get("mutable", False)
358                 writecap = to_str(data[1].get("rw_uri"))
359                 readcap = to_str(data[1].get("ro_uri"))
360                 url = None
361                 if self.writecap:
362                     url = self.nodeurl + "/".join(["uri",
363                                                    urllib.quote(self.writecap),
364                                                    urllib.quote(unicode_to_url(name))])
365                 self.children[name] = TahoeFileTarget(self.nodeurl, mutable,
366                                                       writecap, readcap, url)
367             elif data[0] == "dirnode":
368                 writecap = to_str(data[1].get("rw_uri"))
369                 readcap = to_str(data[1].get("ro_uri"))
370                 if writecap and writecap in self.cache:
371                     child = self.cache[writecap]
372                 elif readcap and readcap in self.cache:
373                     child = self.cache[readcap]
374                 else:
375                     child = TahoeDirectoryTarget(self.nodeurl, self.cache,
376                                                  self.progressfunc)
377                     child.init_from_grid(writecap, readcap)
378                     if writecap:
379                         self.cache[writecap] = child
380                     if readcap:
381                         self.cache[readcap] = child
382                     if recurse:
383                         child.populate(recurse=True)
384                 self.children[name] = child
385             else:
386                 # TODO: there should be an option to skip unknown nodes.
387                 raise TahoeError("Cannot copy unknown nodes (ticket #839). "
388                                  "You probably need to use a later version of "
389                                  "Tahoe-LAFS to copy this directory.")
390
391     def get_child_target(self, name):
392         # return a new target for a named subdirectory of this dir
393         precondition(isinstance(name, unicode), name)
394         if self.children is None:
395             self.populate(recurse=False)
396         if name in self.children:
397             return self.children[name]
398         writecap = make_tahoe_subdirectory(self.nodeurl, self.writecap, name)
399         child = TahoeDirectoryTarget(self.nodeurl, self.cache,
400                                      self.progressfunc)
401         child.just_created(writecap)
402         self.children[name] = child
403         return child
404
405     def put_file(self, name, inf):
406         precondition(isinstance(name, unicode), name)
407         url = self.nodeurl + "uri"
408         if not hasattr(inf, "seek"):
409             inf = inf.read()
410
411         if self.children is None:
412             self.populate(recurse=False)
413
414         # Check to see if we already have a mutable file by this name.
415         # If so, overwrite that file in place.
416         if name in self.children and self.children[name].mutable:
417             self.children[name].put_file(inf)
418         else:
419             filecap = PUT(url, inf)
420             # TODO: this always creates immutable files. We might want an option
421             # to always create mutable files, or to copy mutable files into new
422             # mutable files.
423             self.new_children[name] = filecap
424
425     def put_uri(self, name, filecap):
426         precondition(isinstance(name, unicode), name)
427         self.new_children[name] = filecap
428
429     def set_children(self):
430         if not self.new_children:
431             return
432         url = (self.nodeurl + "uri/" + urllib.quote(self.writecap)
433                + "?t=set_children")
434         set_data = {}
435         for (name, filecap) in self.new_children.items():
436             # it just so happens that ?t=set_children will accept both file
437             # read-caps and write-caps as ['rw_uri'], and will handle either
438             # correctly. So don't bother trying to figure out whether the one
439             # we have is read-only or read-write.
440             # TODO: think about how this affects forward-compatibility for
441             # unknown caps
442             set_data[name] = ["filenode", {"rw_uri": filecap}]
443         body = simplejson.dumps(set_data)
444         POST(url, body)
445
446 FileSources = (LocalFileSource, TahoeFileSource)
447 DirectorySources = (LocalDirectorySource, TahoeDirectorySource)
448 FileTargets = (LocalFileTarget, TahoeFileTarget)
449 DirectoryTargets = (LocalDirectoryTarget, TahoeDirectoryTarget)
450 MissingTargets = (LocalMissingTarget, TahoeMissingTarget)
451
452 class Copier:
453
454     def do_copy(self, options, progressfunc=None):
455         if options['quiet']:
456             verbosity = 0
457         elif options['verbose']:
458             verbosity = 2
459         else:
460             verbosity = 1
461
462         nodeurl = options['node-url']
463         if nodeurl[-1] != "/":
464             nodeurl += "/"
465         self.nodeurl = nodeurl
466         self.progressfunc = progressfunc
467         self.options = options
468         self.aliases = options.aliases
469         self.verbosity = verbosity
470         self.stdout = options.stdout
471         self.stderr = options.stderr
472         if verbosity >= 2 and not self.progressfunc:
473             def progress(message):
474                 print >>self.stderr, message
475             self.progressfunc = progress
476         self.caps_only = options["caps-only"]
477         self.cache = {}
478         try:
479             status = self.try_copy()
480             return status
481         except TahoeError, te:
482             if verbosity >= 2:
483                 Failure().printTraceback(self.stderr)
484                 print >>self.stderr
485             te.display(self.stderr)
486             return 1
487
488     def try_copy(self):
489         """
490         All usage errors are caught here, not in a subroutine. This bottoms
491         out in copy_file_to_file() or copy_things_to_directory().
492         """
493         source_specs = self.options.sources
494         destination_spec = self.options.destination
495         recursive = self.options["recursive"]
496
497         target = self.get_target_info(destination_spec)
498         precondition(isinstance(target, FileTargets + DirectoryTargets + MissingTargets), target)
499         target_has_trailing_slash = destination_spec.endswith("/")
500
501         sources = [] # list of source objects
502         for ss in source_specs:
503             si = self.get_source_info(ss)
504             precondition(isinstance(si, FileSources + DirectorySources), si)
505             sources.append(si)
506
507         # if any source is a directory, must use -r
508         # if target is missing:
509         #    if source is a single file, target will be a file
510         #    else target will be a directory, so mkdir it
511         # if there are multiple sources, target must be a dir
512         # if target is a file, source must be a single file
513         # if target is directory, sources must be named or a dir
514
515         have_source_dirs = any([isinstance(s, DirectorySources)
516                                 for s in sources])
517         if have_source_dirs and not recursive:
518             # 'cp dir target' without -r: error
519             self.to_stderr("cannot copy directories without --recursive")
520             return 1
521         del recursive # -r is only used for signalling errors
522
523         if isinstance(target, FileTargets):
524             target_is_file = True
525         elif isinstance(target, DirectoryTargets):
526             target_is_file = False
527         else: # isinstance(target, MissingTargets)
528             if len(sources) == 1 and isinstance(sources[0], FileSources):
529                 target_is_file = True
530             else:
531                 target_is_file = False
532
533         if target_is_file and target_has_trailing_slash:
534             self.to_stderr("target is not a directory, but ends with a slash")
535             return 1
536
537         if len(sources) > 1 and target_is_file:
538             self.to_stderr("copying multiple things requires target be a directory")
539             return 1
540
541         if target_is_file:
542             _assert(len(sources) == 1, sources)
543             if not isinstance(sources[0], FileSources):
544                 # 'cp -r dir existingfile': error
545                 self.to_stderr("cannot copy directory into a file")
546                 return 1
547             return self.copy_file_to_file(sources[0], target)
548
549         # else target is a directory, so each source must be one of:
550         # * a named file (copied to a new file under the target)
551         # * a named directory (causes a new directory of the same name to be
552         #   created under the target, then the contents of the source are
553         #   copied into that directory)
554         # * an unnamed directory (the contents of the source are copied into
555         #   the target, without a new directory being made)
556         #
557         # If any source is an unnamed file, throw an error, since we have no
558         # way to name the output file.
559         _assert(isinstance(target, DirectoryTargets + MissingTargets), target)
560
561         for source in sources:
562             if isinstance(source, FileSources) and not source.basename():
563                 self.to_stderr("when copying into a directory, all source files must have names, but %s is unnamed" % quote_output(source_specs[0]))
564                 return 1
565         return self.copy_things_to_directory(sources, target)
566
567     def to_stderr(self, text):
568         print >>self.stderr, text
569
570     # FIXME reduce the amount of near-duplicate code between get_target_info
571     # and get_source_info.
572
573     def get_target_info(self, destination_spec):
574         precondition(isinstance(destination_spec, unicode), destination_spec)
575         rootcap, path_utf8 = get_alias(self.aliases, destination_spec, None)
576         path = path_utf8.decode("utf-8")
577         if rootcap == DefaultAliasMarker:
578             # no alias, so this is a local file
579             pathname = abspath_expanduser_unicode(path)
580             if not os.path.exists(pathname):
581                 t = LocalMissingTarget(pathname)
582             elif os.path.isdir(pathname):
583                 t = LocalDirectoryTarget(self.progress, pathname)
584             else:
585                 # TODO: should this be _assert? what happens if the target is
586                 # a special file?
587                 assert os.path.isfile(pathname), pathname
588                 t = LocalFileTarget(pathname) # non-empty
589         else:
590             # this is a tahoe object
591             url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
592             if path:
593                 url += "/" + escape_path(path)
594
595             resp = do_http("GET", url + "?t=json")
596             if resp.status == 404:
597                 # doesn't exist yet
598                 t = TahoeMissingTarget(url)
599             elif resp.status == 200:
600                 parsed = simplejson.loads(resp.read())
601                 nodetype, d = parsed
602                 if nodetype == "dirnode":
603                     t = TahoeDirectoryTarget(self.nodeurl, self.cache,
604                                              self.progress)
605                     t.init_from_parsed(parsed)
606                 else:
607                     writecap = to_str(d.get("rw_uri"))
608                     readcap = to_str(d.get("ro_uri"))
609                     mutable = d.get("mutable", False)
610                     t = TahoeFileTarget(self.nodeurl, mutable,
611                                         writecap, readcap, url)
612             else:
613                 raise HTTPError("Error examining target %s"
614                                  % quote_output(destination_spec), resp)
615         return t
616
617     def get_source_info(self, source_spec):
618         """
619         This turns an argv string into a (Local|Tahoe)(File|Directory)Source.
620         """
621         precondition(isinstance(source_spec, unicode), source_spec)
622         rootcap, path_utf8 = get_alias(self.aliases, source_spec, None)
623         path = path_utf8.decode("utf-8")
624         if rootcap == DefaultAliasMarker:
625             # no alias, so this is a local file
626             pathname = abspath_expanduser_unicode(path)
627             name = os.path.basename(pathname)
628             if not os.path.exists(pathname):
629                 raise MissingSourceError(source_spec, quotefn=quote_local_unicode_path)
630             if os.path.isdir(pathname):
631                 t = LocalDirectorySource(self.progress, pathname, name)
632             else:
633                 assert os.path.isfile(pathname)
634                 t = LocalFileSource(pathname, name) # non-empty
635         else:
636             # this is a tahoe object
637             url = self.nodeurl + "uri/%s" % urllib.quote(rootcap)
638             name = None
639             if path:
640                 url += "/" + escape_path(path)
641                 last_slash = path.rfind(u"/")
642                 name = path
643                 if last_slash != -1:
644                     name = path[last_slash+1:]
645
646             resp = do_http("GET", url + "?t=json")
647             if resp.status == 404:
648                 raise MissingSourceError(source_spec)
649             elif resp.status != 200:
650                 raise HTTPError("Error examining source %s" % quote_output(source_spec),
651                                 resp)
652             parsed = simplejson.loads(resp.read())
653             nodetype, d = parsed
654             if nodetype == "dirnode":
655                 t = TahoeDirectorySource(self.nodeurl, self.cache,
656                                          self.progress, name)
657                 t.init_from_parsed(parsed)
658             else:
659                 writecap = to_str(d.get("rw_uri"))
660                 readcap = to_str(d.get("ro_uri"))
661                 mutable = d.get("mutable", False) # older nodes don't provide it
662
663                 last_slash = source_spec.rfind(u"/")
664                 if last_slash != -1:
665                     # TODO: this looks funny and redundant with the 'name'
666                     # assignment above. cf #2329
667                     name = source_spec[last_slash+1:]
668
669                 t = TahoeFileSource(self.nodeurl, mutable, writecap, readcap, name)
670         return t
671
672
673     def need_to_copy_bytes(self, source, target):
674         if source.need_to_copy_bytes:
675             # mutable tahoe files, and local files
676             return True
677         if isinstance(target, (LocalFileTarget, LocalDirectoryTarget)):
678             return True
679         return False
680
681     def announce_success(self, msg):
682         if self.verbosity >= 1:
683             print >>self.stdout, "Success: %s" % msg
684         return 0
685
686     def copy_file_to_file(self, source, target):
687         precondition(isinstance(source, FileSources), source)
688         precondition(isinstance(target, FileTargets + MissingTargets), target)
689         if self.need_to_copy_bytes(source, target):
690             # if the target is a local directory, this will just write the
691             # bytes to disk. If it is a tahoe directory, it will upload the
692             # data, and stash the new filecap for a later set_children call.
693             f = source.open(self.caps_only)
694             target.put_file(f)
695             return self.announce_success("file copied")
696         # otherwise we're copying tahoe to tahoe, and using immutable files,
697         # so we can just make a link. TODO: this probably won't always work:
698         # need to enumerate the cases and analyze them.
699         target.put_uri(source.bestcap())
700         return self.announce_success("file linked")
701
702     def copy_things_to_directory(self, sources, target):
703         # step one: if the target is missing, we should mkdir it
704         target = self.maybe_create_target(target)
705         target.populate(recurse=False)
706
707         # step two: scan any source dirs, recursively, to find children
708         for s in sources:
709             if isinstance(s, DirectorySources):
710                 s.populate(recurse=True)
711             if isinstance(s, FileSources):
712                 # each source must have a name, or be a directory
713                 _assert(s.basename() is not None, s)
714
715         # step three: find a target for each source node, creating
716         # directories as necessary. 'targetmap' is a dictionary that uses
717         # target Directory instances as keys, and has values of (name:
718         # sourceobject) dicts for all the files that need to wind up there.
719         targetmap = self.build_targetmap(sources, target)
720
721         # step four: walk through the list of targets. For each one, copy all
722         # the files. If the target is a TahoeDirectory, upload and create
723         # read-caps, then do a set_children to the target directory.
724         self.copy_to_targetmap(targetmap)
725
726         return self.announce_success("files copied")
727
728     def maybe_create_target(self, target):
729         if isinstance(target, LocalMissingTarget):
730             os.makedirs(target.pathname)
731             target = LocalDirectoryTarget(self.progress, target.pathname)
732         elif isinstance(target, TahoeMissingTarget):
733             writecap = mkdir(target.url)
734             target = TahoeDirectoryTarget(self.nodeurl, self.cache,
735                                           self.progress)
736             target.just_created(writecap)
737         # afterwards, or otherwise, it will be a directory
738         precondition(isinstance(target, DirectoryTargets), target)
739         return target
740
741     def build_targetmap(self, sources, target):
742         num_source_files = len([s for s in sources
743                                 if isinstance(s, FileSources)])
744         num_source_dirs = len([s for s in sources
745                                if isinstance(s, DirectorySources)])
746         self.progress("attaching sources to targets, "
747                       "%d files / %d dirs in root" %
748                       (num_source_files, num_source_dirs))
749
750         # this maps each target directory to a list of source files that need
751         # to be copied into it. All source files have names.
752         targetmap = defaultdict(list)
753
754         for s in sources:
755             if isinstance(s, FileSources):
756                 targetmap[target].append(s)
757             else:
758                 _assert(isinstance(s, DirectorySources), s)
759                 name = s.basename()
760                 if name is not None:
761                     # named sources get a new directory. see #2329
762                     new_target = target.get_child_target(name)
763                 else:
764                     # unnamed sources have their contents copied directly
765                     new_target = target
766                 self.assign_targets(targetmap, s, new_target)
767
768         self.progress("targets assigned, %s dirs, %s files" %
769                       (len(targetmap), self.count_files_to_copy(targetmap)))
770         return targetmap
771
772     def assign_targets(self, targetmap, source, target):
773         # copy everything in the source into the target
774         precondition(isinstance(source, DirectorySources), source)
775         for name, child in source.children.items():
776             if isinstance(child, DirectorySources):
777                 # we will need a target directory for this one
778                 subtarget = target.get_child_target(name)
779                 self.assign_targets(targetmap, child, subtarget)
780             else:
781                 _assert(isinstance(child, FileSources), child)
782                 targetmap[target].append(child)
783
784     def copy_to_targetmap(self, targetmap):
785         files_to_copy = self.count_files_to_copy(targetmap)
786         self.progress("starting copy, %d files, %d directories" %
787                       (files_to_copy, len(targetmap)))
788         files_copied = 0
789         targets_finished = 0
790
791         for target, sources in targetmap.items():
792             _assert(isinstance(target, DirectoryTargets), target)
793             for source in sources:
794                 _assert(isinstance(source, FileSources), source)
795                 self.copy_file_into_dir(source, source.basename(), target)
796                 files_copied += 1
797                 self.progress("%d/%d files, %d/%d directories" %
798                               (files_copied, files_to_copy,
799                                targets_finished, len(targetmap)))
800             target.set_children()
801             targets_finished += 1
802             self.progress("%d/%d directories" %
803                           (targets_finished, len(targetmap)))
804
805     def count_files_to_copy(self, targetmap):
806         return sum([len(sources) for sources in targetmap.values()])
807
808     def copy_file_into_dir(self, source, name, target):
809         precondition(isinstance(source, FileSources), source)
810         precondition(isinstance(target, DirectoryTargets), target)
811         precondition(isinstance(name, unicode), name)
812         if self.need_to_copy_bytes(source, target):
813             # if the target is a local directory, this will just write the
814             # bytes to disk. If it is a tahoe directory, it will upload the
815             # data, and stash the new filecap for a later set_children call.
816             f = source.open(self.caps_only)
817             target.put_file(name, f)
818             return
819         # otherwise we're copying tahoe to tahoe, and using immutable files,
820         # so we can just make a link
821         target.put_uri(name, source.bestcap())
822
823
824     def progress(self, message):
825         #print message
826         if self.progressfunc:
827             self.progressfunc(message)
828
829
830 def copy(options):
831     return Copier().do_copy(options)
832
833 # error cases that need improvement:
834 #  local-file-in-the-way
835 #   touch proposed
836 #   tahoe cp -r my:docs/proposed/denver.txt proposed/denver.txt
837 #  handling of unknown nodes
838
839 # things that maybe should be errors but aren't
840 #  local-dir-in-the-way
841 #   mkdir denver.txt
842 #   tahoe cp -r my:docs/proposed/denver.txt denver.txt
843 #   (creates denver.txt/denver.txt)
844
845 # error cases that look good:
846 #  tahoe cp -r my:docs/missing missing
847 #  disconnect servers
848 #   tahoe cp -r my:docs/missing missing  -> No JSON object could be decoded
849 #  tahoe-file-in-the-way (when we want to make a directory)
850 #   tahoe put README my:docs
851 #   tahoe cp -r docs/proposed my:docs/proposed