]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blobdiff - src/allmydata/frontends/magic_folder.py
WIP: exclude own dirnode from scan. This is not quite right; we shouldn't exclude...
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / frontends / magic_folder.py
index a53cd3752b830c2cdb05ef39f124cfccf447a4b2..9f5af7022ffb4e777278076280e2aad8855ae669 100644 (file)
@@ -12,8 +12,8 @@ from twisted.application import service
 from allmydata.util import fileutil
 from allmydata.interfaces import IDirectoryNode
 from allmydata.util import log
-from allmydata.util.fileutil import precondition_abspath, get_pathinfo
-from allmydata.util.assertutil import precondition
+from allmydata.util.fileutil import precondition_abspath, get_pathinfo, ConflictError
+from allmydata.util.assertutil import precondition, _assert
 from allmydata.util.deferredutil import HookMixin
 from allmydata.util.encodingutil import listdir_filepath, to_filepath, \
      extend_filepath, unicode_from_filepath, unicode_segments_from, \
@@ -61,8 +61,11 @@ class MagicFolder(service.MultiService):
 
         self.is_ready = False
 
-        self.uploader = Uploader(client, local_path_u, db, upload_dircap, pending_delay, clock)
-        self.downloader = Downloader(client, local_path_u, db, collective_dircap, clock)
+        upload_dirnode = self._client.create_node_from_uri(upload_dircap)
+        collective_dirnode = self._client.create_node_from_uri(collective_dircap)
+
+        self.uploader = Uploader(client, local_path_u, db, upload_dirnode, pending_delay, clock)
+        self.downloader = Downloader(client, local_path_u, db, collective_dirnode, upload_dirnode.get_readonly_uri(), clock)
 
     def startService(self):
         # TODO: why is this being called more than once?
@@ -133,6 +136,10 @@ class QueueMixin(HookMixin):
         self._log("%s += %r" % (counter_name, delta))
         self._client.stats_provider.count(ctr, delta)
 
+    def _logcb(self, res, msg):
+        self._log("%s: %r" % (msg, res))
+        return res
+
     def _log(self, msg):
         s = "Magic Folder %s %s: %s" % (quote_output(self._client.nickname), self._name, msg)
         self._client.log(s)
@@ -150,10 +157,13 @@ class QueueMixin(HookMixin):
             self._clock.callLater(0, self._turn_deque)
 
     def _turn_deque(self):
+        self._log("_turn_deque")
         if self._stopped:
+            self._log("stopped")
             return
         try:
             item = self._deque.pop()
+            self._log("popped %r" % (item,))
             self._count('objects_queued', -1)
         except IndexError:
             self._log("deque is now empty")
@@ -166,18 +176,19 @@ class QueueMixin(HookMixin):
 
 
 class Uploader(QueueMixin):
-    def __init__(self, client, local_path_u, db, upload_dircap, pending_delay, clock):
+    def __init__(self, client, local_path_u, db, upload_dirnode, pending_delay, clock):
         QueueMixin.__init__(self, client, local_path_u, db, 'uploader', clock)
 
         self.is_ready = False
 
-        # TODO: allow a path rather than a cap URI.
-        self._upload_dirnode = self._client.create_node_from_uri(upload_dircap)
-        if not IDirectoryNode.providedBy(self._upload_dirnode):
-            raise AssertionError("The URI in 'private/magic_folder_dircap' does not refer to a directory.")
-        if self._upload_dirnode.is_unknown() or self._upload_dirnode.is_readonly():
-            raise AssertionError("The URI in 'private/magic_folder_dircap' is not a writecap to a directory.")
+        if not IDirectoryNode.providedBy(upload_dirnode):
+            raise AssertionError("The URI in '%s' does not refer to a directory."
+                                 % os.path.join('private', 'magic_folder_dircap'))
+        if upload_dirnode.is_unknown() or upload_dirnode.is_readonly():
+            raise AssertionError("The URI in '%s' is not a writecap to a directory."
+                                 % os.path.join('private', 'magic_folder_dircap'))
 
+        self._upload_dirnode = upload_dirnode
         self._inotify = get_inotify_module()
         self._notifier = self._inotify.INotify()
 
@@ -245,7 +256,7 @@ class Uploader(QueueMixin):
 
         d = defer.succeed(None)
         for child in children:
-            assert isinstance(child, unicode), child
+            _assert(isinstance(child, unicode), child=child)
             d.addCallback(lambda ign, child=child:
                           ("%s/%s" % (reldir_u, child) if reldir_u else child))
             def _add_pending(relpath_u):
@@ -314,8 +325,12 @@ class Uploader(QueueMixin):
                 current_version = self._db.get_local_file_version(relpath_u)
                 if current_version is None:
                     new_version = 0
-                else:
+                elif self._db.is_new_file(pathinfo, relpath_u):
                     new_version = current_version + 1
+                else:
+                    self._log("Not uploading %r" % (relpath_u,))
+                    self._count('objects_not_uploaded')
+                    return
 
                 metadata = { 'version': new_version,
                              'deleted': True,
@@ -338,7 +353,9 @@ class Uploader(QueueMixin):
                 self.warn("WARNING: cannot upload symlink %s" % quote_filepath(fp))
                 return None
             elif pathinfo.isdir:
-                self._notifier.watch(fp, mask=self.mask, callbacks=[self._notify], recursive=True)
+                if not getattr(self._notifier, 'recursive_includes_new_subdirectories', False):
+                    self._notifier.watch(fp, mask=self.mask, callbacks=[self._notify], recursive=True)
+
                 uploadable = Data("", self._client.convergence)
                 encoded_path_u += magicpath.path2magic(u"/")
                 upload_d = self._upload_dirnode.add_file(encoded_path_u, uploadable, metadata={"version":0}, overwrite=True)
@@ -361,6 +378,8 @@ class Uploader(QueueMixin):
                 elif self._db.is_new_file(pathinfo, relpath_u):
                     new_version = current_version + 1
                 else:
+                    self._log("Not uploading %r" % (relpath_u,))
+                    self._count('objects_not_uploaded')
                     return None
 
                 metadata = { 'version': new_version,
@@ -391,7 +410,7 @@ class Uploader(QueueMixin):
             return res
         def _failed(f):
             self._count('objects_failed')
-            self._log("%r while processing %r" % (f, relpath_u))
+            self._log("%s while processing %r" % (f, relpath_u))
             return f
         d.addCallbacks(_succeeded, _failed)
         return d
@@ -414,6 +433,9 @@ class Uploader(QueueMixin):
 class WriteFileMixin(object):
     FUDGE_SECONDS = 10.0
 
+    def _get_conflicted_filename(self, abspath_u):
+        return abspath_u + u".conflict"
+
     def _write_downloaded_file(self, abspath_u, file_contents, is_conflict=False, now=None):
         self._log("_write_downloaded_file(%r, <%d bytes>, is_conflict=%r, now=%r)"
                   % (abspath_u, len(file_contents), is_conflict, now))
@@ -442,6 +464,7 @@ class WriteFileMixin(object):
         fileutil.write(replacement_path_u, file_contents)
         os.utime(replacement_path_u, (now, now - self.FUDGE_SECONDS))
         if is_conflict:
+            print "0x00 ------------ <><> is conflict; calling _rename_conflicted_file... %r %r" % (abspath_u, replacement_path_u)
             return self._rename_conflicted_file(abspath_u, replacement_path_u)
         else:
             try:
@@ -453,24 +476,41 @@ class WriteFileMixin(object):
     def _rename_conflicted_file(self, abspath_u, replacement_path_u):
         self._log("_rename_conflicted_file(%r, %r)" % (abspath_u, replacement_path_u))
 
-        conflict_path_u = abspath_u + u".conflict"
+        conflict_path_u = self._get_conflicted_filename(abspath_u)
+        print "XXX rename %r %r" % (replacement_path_u, conflict_path_u)
+        if os.path.isfile(replacement_path_u):
+            print "%r exists" % (replacement_path_u,)
+        if os.path.isfile(conflict_path_u):
+            print "%r exists" % (conflict_path_u,)
+
         fileutil.rename_no_overwrite(replacement_path_u, conflict_path_u)
         return conflict_path_u
 
+    def _rename_deleted_file(self, abspath_u):
+        self._log('renaming deleted file to backup: %s' % (abspath_u,))
+        try:
+            fileutil.rename_no_overwrite(abspath_u, abspath_u + u'.backup')
+        except IOError:
+            # XXX is this the correct error?
+            self._log("Already gone: '%s'" % (abspath_u,))
+        return abspath_u
+
 
 class Downloader(QueueMixin, WriteFileMixin):
     REMOTE_SCAN_INTERVAL = 3  # facilitates tests
 
-    def __init__(self, client, local_path_u, db, collective_dircap, clock):
+    def __init__(self, client, local_path_u, db, collective_dirnode, upload_readonly_dircap, clock):
         QueueMixin.__init__(self, client, local_path_u, db, 'downloader', clock)
 
-        # TODO: allow a path rather than a cap URI.
-        self._collective_dirnode = self._client.create_node_from_uri(collective_dircap)
+        if not IDirectoryNode.providedBy(collective_dirnode):
+            raise AssertionError("The URI in '%s' does not refer to a directory."
+                                 % os.path.join('private', 'collective_dircap'))
+        if collective_dirnode.is_unknown() or not collective_dirnode.is_readonly():
+            raise AssertionError("The URI in '%s' is not a readonly cap to a directory."
+                                 % os.path.join('private', 'collective_dircap'))
 
-        if not IDirectoryNode.providedBy(self._collective_dirnode):
-            raise AssertionError("The URI in 'private/collective_dircap' does not refer to a directory.")
-        if self._collective_dirnode.is_unknown() or not self._collective_dirnode.is_readonly():
-            raise AssertionError("The URI in 'private/collective_dircap' is not a readonly cap to a directory.")
+        self._collective_dirnode = collective_dirnode
+        self._upload_readonly_dircap = upload_readonly_dircap
 
         self._turn_delay = self.REMOTE_SCAN_INTERVAL
         self._download_scan_batch = {} # path -> [(filenode, metadata)]
@@ -496,9 +536,13 @@ class Downloader(QueueMixin, WriteFileMixin):
         We check the remote metadata version against our magic-folder db version number;
         latest version wins.
         """
+        self._log("_should_download(%r, %r)" % (relpath_u, remote_version))
         if magicpath.should_ignore_file(relpath_u):
+            self._log("nope")
             return False
+        self._log("yep")
         v = self._db.get_local_file_version(relpath_u)
+        self._log("v = %r" % (v,))
         return (v is None or v < remote_version)
 
     def _get_local_latest(self, relpath_u):
@@ -564,30 +608,29 @@ class Downloader(QueueMixin, WriteFileMixin):
                     self._log("%r added to download queue" % (relpath_u,))
                     self._append_to_batch(relpath_u, file_node, metadata)
         d.addCallback(scan_listing)
+        d.addBoth(self._logcb, "end of _scan_remote")
         return d
 
     def _scan_remote_collective(self):
         self._log("_scan_remote_collective")
         self._download_scan_batch = {} # XXX
 
-        if self._collective_dirnode is None:
-            return
-        collective_dirmap_d = self._collective_dirnode.list()
-        def do_list(result):
-            others = [x for x in result.keys()]
-            return result, others
-        collective_dirmap_d.addCallback(do_list)
-        def scan_collective(result):
-            d = defer.succeed(None)
-            collective_dirmap, others_list = result
-            for dir_name in others_list:
-                d.addCallback(lambda x, dir_name=dir_name: self._scan_remote(dir_name, collective_dirmap[dir_name][0]))
-                # XXX todo add errback
-            return d
-        collective_dirmap_d.addCallback(scan_collective)
-        collective_dirmap_d.addCallback(self._filter_scan_batch)
-        collective_dirmap_d.addCallback(self._add_batch_to_download_queue)
-        return collective_dirmap_d
+        d = self._collective_dirnode.list()
+        def scan_collective(dirmap):
+            d2 = defer.succeed(None)
+            for dir_name in dirmap:
+                (dirnode, metadata) = dirmap[dir_name]
+                if dirnode.get_readonly_uri() != self._upload_readonly_dircap:
+                    d2.addCallback(lambda ign, dir_name=dir_name: self._scan_remote(dir_name, dirnode))
+                    def _err(f):
+                        self._log("failed to scan DMD for client %r: %s" % (dir_name, f))
+                        # XXX what should we do to make this failure more visible to users?
+                    d2.addErrback(_err)
+            return d2
+        d.addCallback(scan_collective)
+        d.addCallback(self._filter_scan_batch)
+        d.addCallback(self._add_batch_to_download_queue)
+        return d
 
     def _add_batch_to_download_queue(self, result):
         self._log("result = %r" % (result,))
@@ -600,6 +643,7 @@ class Downloader(QueueMixin, WriteFileMixin):
         self._log("pending after = %r" % (self._pending,))
 
     def _filter_scan_batch(self, result):
+        self._log("_filter_scan_batch")
         extension = [] # consider whether this should be a dict
         for relpath_u in self._download_scan_batch.keys():
             if relpath_u in self._pending:
@@ -607,10 +651,15 @@ class Downloader(QueueMixin, WriteFileMixin):
             file_node, metadata = max(self._download_scan_batch[relpath_u], key=lambda x: x[1]['version'])
             if self._should_download(relpath_u, metadata['version']):
                 extension += [(relpath_u, file_node, metadata)]
+            else:
+                self._log("Excluding %r" % (relpath_u,))
+                self._count('objects_excluded')
+                self._call_hook(None, 'processed')
         return extension
 
     def _when_queue_is_empty(self):
         d = task.deferLater(self._clock, self._turn_delay, self._scan_remote_collective)
+        d.addBoth(self._logcb, "after _scan_remote_collective")
         d.addCallback(lambda ign: self._turn_deque())
         return d
 
@@ -621,15 +670,8 @@ class Downloader(QueueMixin, WriteFileMixin):
         (relpath_u, file_node, metadata) = item
         fp = self._get_filepath(relpath_u)
         abspath_u = unicode_from_filepath(fp)
-
+        conflict_path_u = self._get_conflicted_filename(abspath_u)
         d = defer.succeed(None)
-        if relpath_u.endswith(u"/"):
-            self._log("mkdir(%r)" % (abspath_u,))
-            d.addCallback(lambda ign: fileutil.make_dirs(abspath_u))
-            d.addCallback(lambda ign: abspath_u)
-        else:
-            d.addCallback(lambda ign: file_node.download_best_version())
-            d.addCallback(lambda contents: self._write_downloaded_file(abspath_u, contents, is_conflict=False))
 
         def do_update_db(written_abspath_u):
             filecap = file_node.get_uri()
@@ -637,7 +679,8 @@ class Downloader(QueueMixin, WriteFileMixin):
             last_downloaded_uri = filecap
             last_downloaded_timestamp = now
             written_pathinfo = get_pathinfo(written_abspath_u)
-            if not written_pathinfo.exists:
+
+            if not written_pathinfo.exists and not metadata.get('deleted', False):
                 raise Exception("downloaded object %s disappeared" % quote_local_unicode_path(written_abspath_u))
 
             self._db.did_upload_version(relpath_u, metadata['version'], last_uploaded_uri,
@@ -647,9 +690,49 @@ class Downloader(QueueMixin, WriteFileMixin):
             self._log("download failed: %s" % (str(f),))
             self._count('objects_failed')
             return f
+
+        if os.path.isfile(conflict_path_u):
+            def fail(res):
+                raise ConflictError("download failed: already conflicted: %r" % (relpath_u,))
+            d.addCallback(fail)
+        else:
+            is_conflict = False
+            if self._db.check_file_db_exists(relpath_u):
+                dmd_last_downloaded_uri = metadata.get('last_downloaded_uri', None)
+                local_last_downloaded_uri = self._db.get_last_downloaded_uri(relpath_u)
+                print "metadata %r" % (metadata,)
+                print "<<<<--- if %r != %r" % (dmd_last_downloaded_uri, local_last_downloaded_uri)
+                if dmd_last_downloaded_uri is not None and local_last_downloaded_uri is not None:
+                    if dmd_last_downloaded_uri != local_last_downloaded_uri:
+                        is_conflict = True
+                        self._count('objects_conflicted')
+
+                #dmd_last_uploaded_uri = metadata.get('last_uploaded_uri', None)
+                #local_last_uploaded_uri = ...
+
+            if relpath_u.endswith(u"/"):
+                if metadata.get('deleted', False):
+                    self._log("rmdir(%r) ignored" % (abspath_u,))
+                else:
+                    self._log("mkdir(%r)" % (abspath_u,))
+                    d.addCallback(lambda ign: fileutil.make_dirs(abspath_u))
+                    d.addCallback(lambda ign: abspath_u)
+            else:
+                if metadata.get('deleted', False):
+                    d.addCallback(lambda ign: self._rename_deleted_file(abspath_u))
+                else:
+                    d.addCallback(lambda ign: file_node.download_best_version())
+                    d.addCallback(lambda contents: self._write_downloaded_file(abspath_u, contents,
+                                                                               is_conflict=is_conflict))
+
         d.addCallbacks(do_update_db, failed)
+
         def remove_from_pending(res):
             self._pending.remove(relpath_u)
             return res
         d.addBoth(remove_from_pending)
+        def trap_conflicts(f):
+            f.trap(ConflictError)
+            return None
+        d.addErrback(trap_conflicts)
         return d