]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blobdiff - src/allmydata/frontends/magic_folder.py
Daira's excellent fixes to the uploader from our pairing session
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / frontends / magic_folder.py
index 1be3e683adf3b9e8eb003093982b0c18521613e0..315ee70470da411ad09a828c1ef1ad3d2293e8c2 100644 (file)
@@ -42,10 +42,21 @@ def get_inotify_module():
         raise
 
 
+def is_new_file(pathinfo, db_entry):
+    if db_entry is None:
+        return True
+
+    if not pathinfo.exists and db_entry.size is None:
+        return False
+
+    return ((pathinfo.size, pathinfo.ctime, pathinfo.mtime) !=
+            (db_entry.size, db_entry.ctime, db_entry.mtime))
+
+
 class MagicFolder(service.MultiService):
     name = 'magic-folder'
 
-    def __init__(self, client, upload_dircap, collective_dircap, local_path_u, dbfile,
+    def __init__(self, client, upload_dircap, collective_dircap, local_path_u, dbfile, umask,
                  pending_delay=1.0, clock=None):
         precondition_abspath(local_path_u)
 
@@ -65,7 +76,8 @@ class MagicFolder(service.MultiService):
         collective_dirnode = self._client.create_node_from_uri(collective_dircap)
 
         self.uploader = Uploader(client, local_path_u, db, upload_dirnode, pending_delay, clock, immediate)
-        self.downloader = Downloader(client, local_path_u, db, collective_dirnode, upload_dirnode.get_readonly_uri(), clock)
+        self.downloader = Downloader(client, local_path_u, db, collective_dirnode,
+                                     upload_dirnode.get_readonly_uri(), clock, self.uploader.is_pending, umask)
 
     def startService(self):
         # TODO: why is this being called more than once?
@@ -79,10 +91,8 @@ class MagicFolder(service.MultiService):
         """ready is used to signal us to start
         processing the upload and download items...
         """
-        d = self.uploader.start_scanning()
-        d2 = self.downloader.start_scanning()
-        d.addCallback(lambda ign: d2)
-        return d
+        self.uploader.start_uploading()  # synchronous
+        return self.downloader.start_downloading()
 
     def finish(self):
         print "finish"
@@ -182,7 +192,7 @@ class Uploader(QueueMixin):
         self._upload_dirnode = upload_dirnode
         self._inotify = get_inotify_module()
         self._notifier = self._inotify.INotify()
-        self._pending = set()
+        self._pending = set()  # of unicode relpaths
 
         if hasattr(self._notifier, 'set_pending_delay'):
             self._notifier.set_pending_delay(pending_delay)
@@ -219,22 +229,43 @@ class Uploader(QueueMixin):
         d.addCallback(lambda ign: self._lazy_tail)
         return d
 
-    def start_scanning(self):
-        self._log("start_scanning")
+    def start_uploading(self):
+        self._log("start_uploading")
         self.is_ready = True
-        self._pending = self._db.get_all_relpaths()
+
+        all_relpaths = self._db.get_all_relpaths()
+        self._log("all relpaths: %r" % (all_relpaths,))
+
+        for relpath_u in all_relpaths:
+            self._add_pending(relpath_u)
+
+        self._full_scan()
+        self._extend_queue_and_keep_going(self._pending)
+
+    def _extend_queue_and_keep_going(self, relpaths_u):
+        self._log("queueing %r" % (relpaths_u,))
+        self._deque.extend(relpaths_u)
+        self._count('objects_queued', len(relpaths_u))
+
+        if self.is_ready:
+            if self._immediate:  # for tests
+                self._turn_deque()
+            else:
+                self._clock.callLater(0, self._turn_deque)
+
+    def _full_scan(self):
+        print "FULL SCAN"
         self._log("all_files %r" % (self._pending))
-        d = self._scan(u"")
-        def _add_pending(ign):
-            # This adds all of the files that were in the db but not already processed
-            # (normally because they have been deleted on disk).
-            self._log("adding %r" % (self._pending))
-            self._deque.extend(self._pending)
-        d.addCallback(_add_pending)
-        d.addCallback(lambda ign: self._turn_deque())
-        return d
+        self._scan(u"")
+
+    def _add_pending(self, relpath_u):
+        if not magicpath.should_ignore_file(relpath_u):
+            self._pending.add(relpath_u)
 
     def _scan(self, reldir_u):
+        # Scan a directory by (synchronously) adding the paths of all its children to self._pending.
+        # Note that this doesn't add them to the deque -- that will
+
         self._log("scan %r" % (reldir_u,))
         fp = self._get_filepath(reldir_u)
         try:
@@ -246,24 +277,12 @@ class Uploader(QueueMixin):
             raise Exception("WARNING: magic folder: could not list directory %s due to a filename encoding error"
                             % quote_filepath(fp))
 
-        d = defer.succeed(None)
         for child in children:
             _assert(isinstance(child, unicode), child=child)
-            d.addCallback(lambda ign, child=child:
-                          ("%s/%s" % (reldir_u, child) if reldir_u else child))
-            def _add_pending(relpath_u):
-                if magicpath.should_ignore_file(relpath_u):
-                    return None
+            self._add_pending("%s/%s" % (reldir_u, child) if reldir_u != u"" else child)
 
-                self._pending.add(relpath_u)
-                return relpath_u
-            d.addCallback(_add_pending)
-            # This call to _process doesn't go through the deque, and probably should.
-            d.addCallback(self._process)
-            d.addBoth(self._call_hook, 'processed')
-            d.addErrback(log.err)
-
-        return d
+    def is_pending(self, relpath_u):
+        return relpath_u in self._pending
 
     def _notify(self, opaque, path, events_mask):
         self._log("inotify event %r, %r, %r\n" % (opaque, path, ', '.join(self._inotify.humanReadableMask(events_mask))))
@@ -281,21 +300,14 @@ class Uploader(QueueMixin):
             self._log("ignoring event for %r (creation of non-directory)\n" % (relpath_u,))
             return
         if relpath_u in self._pending:
-            self._log("ignoring event for %r (already pending)" % (relpath_u,))
+            self._log("not queueing %r because it is already pending" % (relpath_u,))
             return
         if magicpath.should_ignore_file(relpath_u):
             self._log("ignoring event for %r (ignorable path)" % (relpath_u,))
             return
 
-        self._log("appending %r to deque" % (relpath_u,))
-        self._deque.append(relpath_u)
         self._pending.add(relpath_u)
-        self._count('objects_queued')
-        if self.is_ready:
-            if self._immediate:  # for tests
-                self._turn_deque()
-            else:
-                self._clock.callLater(0, self._turn_deque)
+        self._extend_queue_and_keep_going([relpath_u])
 
     def _when_queue_is_empty(self):
         return defer.succeed(None)
@@ -325,17 +337,15 @@ class Uploader(QueueMixin):
                 # FIXME merge this with the 'isfile' case.
                 self._log("notified object %s disappeared (this is normal)" % quote_filepath(fp))
                 self._count('objects_disappeared')
-                if not self._db.check_file_db_exists(relpath_u):
+
+                db_entry = self._db.get_db_entry(relpath_u)
+                if db_entry is None:
                     return None
 
-                last_downloaded_timestamp = now
-                last_downloaded_uri = self._db.get_last_downloaded_uri(relpath_u)
+                last_downloaded_timestamp = now  # is this correct?
 
-                current_version = self._db.get_local_file_version(relpath_u)
-                if current_version is None:
-                    new_version = 0
-                elif self._db.is_new_file(pathinfo, relpath_u):
-                    new_version = current_version + 1
+                if is_new_file(pathinfo, db_entry):
+                    new_version = db_entry.version + 1
                 else:
                     self._log("Not uploading %r" % (relpath_u,))
                     self._count('objects_not_uploaded')
@@ -344,8 +354,8 @@ class Uploader(QueueMixin):
                 metadata = { 'version': new_version,
                              'deleted': True,
                              'last_downloaded_timestamp': last_downloaded_timestamp }
-                if last_downloaded_uri is not None:
-                    metadata['last_downloaded_uri'] = last_downloaded_uri
+                if db_entry.last_downloaded_uri is not None:
+                    metadata['last_downloaded_uri'] = db_entry.last_downloaded_uri
 
                 empty_uploadable = Data("", self._client.convergence)
                 d2 = self._upload_dirnode.add_file(encoded_path_u, empty_uploadable,
@@ -353,8 +363,10 @@ class Uploader(QueueMixin):
 
                 def _add_db_entry(filenode):
                     filecap = filenode.get_uri()
+                    last_downloaded_uri = metadata.get('last_downloaded_uri', None)
                     self._db.did_upload_version(relpath_u, new_version, filecap,
-                                                last_downloaded_uri, last_downloaded_timestamp, pathinfo)
+                                                last_downloaded_uri, last_downloaded_timestamp,
+                                                pathinfo)
                     self._count('files_uploaded')
                 d2.addCallback(_add_db_entry)
                 return d2
@@ -379,14 +391,14 @@ class Uploader(QueueMixin):
                 upload_d.addCallback(lambda ign: self._scan(relpath_u))
                 return upload_d
             elif pathinfo.isfile:
-                last_downloaded_uri = self._db.get_last_downloaded_uri(relpath_u)
+                db_entry = self._db.get_db_entry(relpath_u)
+
                 last_downloaded_timestamp = now
 
-                current_version = self._db.get_local_file_version(relpath_u)
-                if current_version is None:
+                if db_entry is None:
                     new_version = 0
-                elif self._db.is_new_file(pathinfo, relpath_u):
-                    new_version = current_version + 1
+                elif is_new_file(pathinfo, db_entry):
+                    new_version = db_entry.version + 1
                 else:
                     self._log("Not uploading %r" % (relpath_u,))
                     self._count('objects_not_uploaded')
@@ -394,8 +406,8 @@ class Uploader(QueueMixin):
 
                 metadata = { 'version': new_version,
                              'last_downloaded_timestamp': last_downloaded_timestamp }
-                if last_downloaded_uri is not None:
-                    metadata['last_downloaded_uri'] = last_downloaded_uri
+                if db_entry is not None and db_entry.last_downloaded_uri is not None:
+                    metadata['last_downloaded_uri'] = db_entry.last_downloaded_uri
 
                 uploadable = FileName(unicode_from_filepath(fp), self._client.convergence)
                 d2 = self._upload_dirnode.add_file(encoded_path_u, uploadable,
@@ -405,7 +417,8 @@ class Uploader(QueueMixin):
                     filecap = filenode.get_uri()
                     last_downloaded_uri = metadata.get('last_downloaded_uri', None)
                     self._db.did_upload_version(relpath_u, new_version, filecap,
-                                                last_downloaded_uri, last_downloaded_timestamp, pathinfo)
+                                                last_downloaded_uri, last_downloaded_timestamp,
+                                                pathinfo)
                     self._count('files_uploaded')
                 d2.addCallback(_add_db_entry)
                 return d2
@@ -468,10 +481,14 @@ class WriteFileMixin(object):
 
         # ensure parent directory exists
         head, tail = os.path.split(abspath_u)
-        mode = 0777 # XXX
-        fileutil.make_dirs(head, mode)
 
-        fileutil.write(replacement_path_u, file_contents)
+        old_mask = os.umask(self._umask)
+        try:
+            fileutil.make_dirs(head, (~ self._umask) & 0777)
+            fileutil.write(replacement_path_u, file_contents)
+        finally:
+            os.umask(old_mask)
+
         os.utime(replacement_path_u, (now, now - self.FUDGE_SECONDS))
         if is_conflict:
             print "0x00 ------------ <><> is conflict; calling _rename_conflicted_file... %r %r" % (abspath_u, replacement_path_u)
@@ -500,8 +517,7 @@ class WriteFileMixin(object):
         self._log('renaming deleted file to backup: %s' % (abspath_u,))
         try:
             fileutil.rename_no_overwrite(abspath_u, abspath_u + u'.backup')
-        except IOError:
-            # XXX is this the correct error?
+        except OSError:
             self._log("Already gone: '%s'" % (abspath_u,))
         return abspath_u
 
@@ -509,7 +525,8 @@ class WriteFileMixin(object):
 class Downloader(QueueMixin, WriteFileMixin):
     REMOTE_SCAN_INTERVAL = 3  # facilitates tests
 
-    def __init__(self, client, local_path_u, db, collective_dirnode, upload_readonly_dircap, clock):
+    def __init__(self, client, local_path_u, db, collective_dirnode,
+                 upload_readonly_dircap, clock, is_upload_pending, umask):
         QueueMixin.__init__(self, client, local_path_u, db, 'downloader', clock)
 
         if not IDirectoryNode.providedBy(collective_dirnode):
@@ -521,15 +538,15 @@ class Downloader(QueueMixin, WriteFileMixin):
 
         self._collective_dirnode = collective_dirnode
         self._upload_readonly_dircap = upload_readonly_dircap
+        self._is_upload_pending = is_upload_pending
+        self._umask = umask
 
-        self._turn_delay = self.REMOTE_SCAN_INTERVAL
-
-    def start_scanning(self):
-        self._log("start_scanning")
+    def start_downloading(self):
+        self._log("start_downloading")
         files = self._db.get_all_relpaths()
         self._log("all files %s" % files)
 
-        d = self._scan_remote_collective()
+        d = self._scan_remote_collective(scan_self=True)
         d.addBoth(self._logcb, "after _scan_remote_collective 0")
         self._turn_deque()
         return d
@@ -551,9 +568,11 @@ class Downloader(QueueMixin, WriteFileMixin):
             self._log("nope")
             return False
         self._log("yep")
-        v = self._db.get_local_file_version(relpath_u)
-        self._log("v = %r" % (v,))
-        return (v is None or v < remote_version)
+        db_entry = self._db.get_db_entry(relpath_u)
+        if db_entry is None:
+            return True
+        self._log("version %r" % (db_entry.version,))
+        return (db_entry.version < remote_version)
 
     def _get_local_latest(self, relpath_u):
         """
@@ -563,7 +582,8 @@ class Downloader(QueueMixin, WriteFileMixin):
         """
         if not self._get_filepath(relpath_u).exists():
             return None
-        return self._db.get_local_file_version(relpath_u)
+        db_entry = self._db.get_db_entry(relpath_u)
+        return None if db_entry is None else db_entry.version
 
     def _get_collective_latest_file(self, filename):
         """
@@ -620,7 +640,7 @@ class Downloader(QueueMixin, WriteFileMixin):
         d.addBoth(self._logcb, "end of _scan_remote_dmd")
         return d
 
-    def _scan_remote_collective(self):
+    def _scan_remote_collective(self, scan_self=False):
         self._log("_scan_remote_collective")
         scan_batch = {}  # path -> [(filenode, metadata)]
 
@@ -629,7 +649,7 @@ class Downloader(QueueMixin, WriteFileMixin):
             d2 = defer.succeed(None)
             for dir_name in dirmap:
                 (dirnode, metadata) = dirmap[dir_name]
-                if dirnode.get_readonly_uri() != self._upload_readonly_dircap:
+                if scan_self or dirnode.get_readonly_uri() != self._upload_readonly_dircap:
                     d2.addCallback(lambda ign, dir_name=dir_name, dirnode=dirnode:
                                    self._scan_remote_dmd(dir_name, dirnode, scan_batch))
                     def _err(f, dir_name=dir_name):
@@ -649,7 +669,6 @@ class Downloader(QueueMixin, WriteFileMixin):
                     self._deque.append( (relpath_u, file_node, metadata) )
                 else:
                     self._log("Excluding %r" % (relpath_u,))
-                    self._count('objects_excluded')
                     self._call_hook(None, 'processed')
 
             self._log("deque after = %r" % (self._deque,))
@@ -657,7 +676,7 @@ class Downloader(QueueMixin, WriteFileMixin):
         return d
 
     def _when_queue_is_empty(self):
-        d = task.deferLater(self._clock, self._turn_delay, self._scan_remote_collective)
+        d = task.deferLater(self._clock, self.REMOTE_SCAN_INTERVAL, self._scan_remote_collective)
         d.addBoth(self._logcb, "after _scan_remote_collective 1")
         d.addCallback(lambda ign: self._turn_deque())
         return d
@@ -698,18 +717,20 @@ class Downloader(QueueMixin, WriteFileMixin):
             d.addCallback(fail)
         else:
             is_conflict = False
-            if self._db.check_file_db_exists(relpath_u):
-                dmd_last_downloaded_uri = metadata.get('last_downloaded_uri', None)
-                local_last_downloaded_uri = self._db.get_last_downloaded_uri(relpath_u)
-                print "metadata %r" % (metadata,)
-                print "<<<<--- if %r != %r" % (dmd_last_downloaded_uri, local_last_downloaded_uri)
-                if dmd_last_downloaded_uri is not None and local_last_downloaded_uri is not None:
-                    if dmd_last_downloaded_uri != local_last_downloaded_uri:
+            db_entry = self._db.get_db_entry(relpath_u)
+            dmd_last_downloaded_uri = metadata.get('last_downloaded_uri', None)
+            dmd_last_uploaded_uri = metadata.get('last_uploaded_uri', None)
+            if db_entry:
+                if dmd_last_downloaded_uri is not None and db_entry.last_downloaded_uri is not None:
+                    if dmd_last_downloaded_uri != db_entry.last_downloaded_uri:
                         is_conflict = True
                         self._count('objects_conflicted')
-
-                #dmd_last_uploaded_uri = metadata.get('last_uploaded_uri', None)
-                #local_last_uploaded_uri = ...
+                elif dmd_last_uploaded_uri is not None and dmd_last_uploaded_uri != db_entry.last_uploaded_uri:
+                    is_conflict = True
+                    self._count('objects_conflicted')
+                elif self._is_upload_pending(relpath_u):
+                    is_conflict = True
+                    self._count('objects_conflicted')
 
             if relpath_u.endswith(u"/"):
                 if metadata.get('deleted', False):