]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blobdiff - src/allmydata/frontends/magic_folder.py
add docstrings
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / frontends / magic_folder.py
index 15ebf518aaff298210a752da4712c2a8d8c57cd3..c075baa53e453f70128a5c123b9799c6f3f8b446 100644 (file)
@@ -9,19 +9,22 @@ from twisted.python.failure import Failure
 from twisted.python import runtime
 from twisted.application import service
 
+from zope.interface import Interface, Attribute, implementer
+
 from allmydata.util import fileutil
 from allmydata.interfaces import IDirectoryNode
 from allmydata.util import log
 from allmydata.util.fileutil import precondition_abspath, get_pathinfo, ConflictError
 from allmydata.util.assertutil import precondition, _assert
 from allmydata.util.deferredutil import HookMixin
+from allmydata.util.progress import PercentProgress
 from allmydata.util.encodingutil import listdir_filepath, to_filepath, \
      extend_filepath, unicode_from_filepath, unicode_segments_from, \
      quote_filepath, quote_local_unicode_path, quote_output, FilenameEncodingError
 from allmydata.immutable.upload import FileName, Data
 from allmydata import magicfolderdb, magicpath
 
-
+defer.setDebugging(True)
 IN_EXCL_UNLINK = 0x04000000L
 
 def get_inotify_module():
@@ -91,10 +94,8 @@ class MagicFolder(service.MultiService):
         """ready is used to signal us to start
         processing the upload and download items...
         """
-        d = self.uploader.start_scanning()
-        d2 = self.downloader.start_scanning()
-        d.addCallback(lambda ign: d2)
-        return d
+        self.uploader.start_uploading()  # synchronous
+        return self.downloader.start_downloading()
 
     def finish(self):
         print "finish"
@@ -128,10 +129,21 @@ class QueueMixin(HookMixin):
                                  % quote_local_unicode_path(self._local_path_u))
 
         self._deque = deque()
+        # do we also want to bound on "maximum age"?
+        self._process_history = deque(maxlen=20)
         self._lazy_tail = defer.succeed(None)
         self._stopped = False
         self._turn_delay = 0
 
+    def get_status(self):
+        """
+        Returns an iterable of instances that implement IQueuedItem
+        """
+        for item in self._deque:
+            yield item
+        for item in self._process_history:
+            yield item
+
     def _get_filepath(self, relpath_u):
         self._log("_get_filepath(%r)" % (relpath_u,))
         return extend_filepath(self._local_filepath, relpath_u.split(u"/"))
@@ -158,22 +170,90 @@ class QueueMixin(HookMixin):
         #open("events", "ab+").write(msg)
 
     def _turn_deque(self):
-        self._log("_turn_deque")
-        if self._stopped:
-            self._log("stopped")
-            return
         try:
-            item = self._deque.pop()
-            self._log("popped %r" % (item,))
-            self._count('objects_queued', -1)
-        except IndexError:
-            self._log("deque is now empty")
-            self._lazy_tail.addCallback(lambda ign: self._when_queue_is_empty())
-        else:
-            self._lazy_tail.addCallback(lambda ign: self._process(item))
-            self._lazy_tail.addBoth(self._call_hook, 'processed')
-            self._lazy_tail.addErrback(log.err)
-            self._lazy_tail.addCallback(lambda ign: task.deferLater(self._clock, self._turn_delay, self._turn_deque))
+            self._log("_turn_deque")
+            if self._stopped:
+                self._log("stopped")
+                return
+            try:
+                item = IQueuedItem(self._deque.pop())
+                self._process_history.append(item)
+
+                self._log("popped %r, now have %d" % (item, len(self._deque)))
+                self._count('objects_queued', -1)
+            except IndexError:
+                self._log("deque is now empty")
+                self._lazy_tail.addBoth(self._logcb, "whawhat empty")
+                self._lazy_tail.addCallback(lambda ign: self._when_queue_is_empty())
+                self._lazy_tail.addBoth(self._logcb, "got past _when_queue_is_empty")
+            else:
+                self._log("_turn_deque else clause")
+                self._lazy_tail.addBoth(self._logcb, "whawhat else %r" % (item,))
+                self._lazy_tail.addCallback(lambda ign: self._process(item))
+                self._lazy_tail.addBoth(self._logcb, "got past _process")
+                self._lazy_tail.addBoth(self._call_hook, 'processed', async=True)
+                self._lazy_tail.addBoth(self._logcb, "got past _call_hook (turn_delay = %r)" % (self._turn_delay,))
+                self._lazy_tail.addErrback(log.err)
+                self._lazy_tail.addCallback(lambda ign: task.deferLater(self._clock, self._turn_delay, self._turn_deque))
+                self._lazy_tail.addBoth(self._logcb, "got past deferLater")
+        except Exception as e:
+            self._log("---- turn deque exception %s" % (e,))
+            raise
+
+
+# this isn't in interfaces.py because it's very specific to QueueMixin
+class IQueuedItem(Interface):
+    relpath_u = Attribute("The path this item represents")
+    progress = Attribute("A PercentProgress instance")
+
+    def set_status(self, status, current_time=None):
+        """
+        """
+
+    def status_time(self, state):
+        """
+        Get the time of particular state change, or None
+        """
+
+    def status_history(self):
+        """
+        All status changes, sorted latest -> oldest
+        """
+
+
+@implementer(IQueuedItem)
+class QueuedItem(object):
+    def __init__(self, relpath_u, progress):
+        self.relpath_u = relpath_u
+        self.progress = progress
+        self._status_history = dict()
+
+    def set_status(self, status, current_time=None):
+        if current_time is None:
+            current_time = time.time()
+        self._status_history[status] = current_time
+
+    def status_time(self, state):
+        """
+        Returns None if there's no status-update for 'state', else returns
+        the timestamp when that state was reached.
+        """
+        return self._status_history.get(state, None)
+
+    def status_history(self):
+        """
+        Returns a list of 2-tuples of (state, timestamp) sorted by timestamp
+        """
+        hist = self._status_history.items()
+        hist.sort(lambda a, b: cmp(a[1], b[1]))
+        return hist
+
+
+class UploadItem(QueuedItem):
+    """
+    Represents a single item the _deque of the Uploader
+    """
+    pass
 
 
 class Uploader(QueueMixin):
@@ -194,7 +274,9 @@ class Uploader(QueueMixin):
         self._upload_dirnode = upload_dirnode
         self._inotify = get_inotify_module()
         self._notifier = self._inotify.INotify()
-        self._pending = set()
+        self._pending = set()  # of unicode relpaths
+
+        self._periodic_full_scan_duration = 10 * 60 # perform a full scan every 10 minutes
 
         if hasattr(self._notifier, 'set_pending_delay'):
             self._notifier.set_pending_delay(pending_delay)
@@ -224,6 +306,7 @@ class Uploader(QueueMixin):
         self._log("stop")
         self._notifier.stopReading()
         self._count('dirs_monitored', -1)
+        self.periodic_callid.cancel()
         if hasattr(self._notifier, 'wait_until_stopped'):
             d = self._notifier.wait_until_stopped()
         else:
@@ -231,22 +314,50 @@ class Uploader(QueueMixin):
         d.addCallback(lambda ign: self._lazy_tail)
         return d
 
-    def start_scanning(self):
-        self._log("start_scanning")
+    def start_uploading(self):
+        self._log("start_uploading")
         self.is_ready = True
-        self._pending = self._db.get_all_relpaths()
-        self._log("all_files %r" % (self._pending))
-        d = self._scan(u"")
-        def _add_pending(ign):
-            # This adds all of the files that were in the db but not already processed
-            # (normally because they have been deleted on disk).
-            self._log("adding %r" % (self._pending))
-            self._deque.extend(self._pending)
-        d.addCallback(_add_pending)
-        d.addCallback(lambda ign: self._turn_deque())
-        return d
+
+        all_relpaths = self._db.get_all_relpaths()
+        self._log("all relpaths: %r" % (all_relpaths,))
+
+        for relpath_u in all_relpaths:
+            self._add_pending(relpath_u)
+
+        self._full_scan()
+
+    def _extend_queue_and_keep_going(self, relpaths_u):
+        self._log("_extend_queue_and_keep_going %r" % (relpaths_u,))
+        for relpath_u in relpaths_u:
+            progress = PercentProgress()
+            item = UploadItem(relpath_u, progress)
+            item.set_status('queued', self._clock.seconds())
+            self._deque.append(item)
+
+        self._count('objects_queued', len(relpaths_u))
+
+        if self.is_ready:
+            if self._immediate:  # for tests
+                self._turn_deque()
+            else:
+                self._clock.callLater(0, self._turn_deque)
+
+    def _full_scan(self):
+        self.periodic_callid = self._clock.callLater(self._periodic_full_scan_duration, self._full_scan)
+        print "FULL SCAN"
+        self._log("_pending %r" % (self._pending))
+        self._scan(u"")
+        self._extend_queue_and_keep_going(self._pending)
+
+    def _add_pending(self, relpath_u):
+        self._log("add pending %r" % (relpath_u,))
+        if not magicpath.should_ignore_file(relpath_u):
+            self._pending.add(relpath_u)
 
     def _scan(self, reldir_u):
+        # Scan a directory by (synchronously) adding the paths of all its children to self._pending.
+        # Note that this doesn't add them to the deque -- that will
+
         self._log("scan %r" % (reldir_u,))
         fp = self._get_filepath(reldir_u)
         try:
@@ -258,24 +369,9 @@ class Uploader(QueueMixin):
             raise Exception("WARNING: magic folder: could not list directory %s due to a filename encoding error"
                             % quote_filepath(fp))
 
-        d = defer.succeed(None)
         for child in children:
             _assert(isinstance(child, unicode), child=child)
-            d.addCallback(lambda ign, child=child:
-                          ("%s/%s" % (reldir_u, child) if reldir_u else child))
-            def _add_pending(relpath_u):
-                if magicpath.should_ignore_file(relpath_u):
-                    return None
-
-                self._pending.add(relpath_u)
-                return relpath_u
-            d.addCallback(_add_pending)
-            # This call to _process doesn't go through the deque, and probably should.
-            d.addCallback(self._process)
-            d.addBoth(self._call_hook, 'processed')
-            d.addErrback(log.err)
-
-        return d
+            self._add_pending("%s/%s" % (reldir_u, child) if reldir_u != u"" else child)
 
     def is_pending(self, relpath_u):
         return relpath_u in self._pending
@@ -296,36 +392,34 @@ class Uploader(QueueMixin):
             self._log("ignoring event for %r (creation of non-directory)\n" % (relpath_u,))
             return
         if relpath_u in self._pending:
-            self._log("ignoring event for %r (already pending)" % (relpath_u,))
+            self._log("not queueing %r because it is already pending" % (relpath_u,))
             return
         if magicpath.should_ignore_file(relpath_u):
             self._log("ignoring event for %r (ignorable path)" % (relpath_u,))
             return
 
-        self._log("appending %r to deque" % (relpath_u,))
-        self._deque.append(relpath_u)
         self._pending.add(relpath_u)
-        self._count('objects_queued')
-        if self.is_ready:
-            if self._immediate:  # for tests
-                self._turn_deque()
-            else:
-                self._clock.callLater(0, self._turn_deque)
+        self._extend_queue_and_keep_going([relpath_u])
 
     def _when_queue_is_empty(self):
         return defer.succeed(None)
 
-    def _process(self, relpath_u):
+    def _process(self, item):
         # Uploader
+        relpath_u = item.relpath_u
         self._log("_process(%r)" % (relpath_u,))
+        item.set_status('started', self._clock.seconds())
+
         if relpath_u is None:
+            item.set_status('invalid_path', self._clock.seconds())
             return
         precondition(isinstance(relpath_u, unicode), relpath_u)
         precondition(not relpath_u.endswith(u'/'), relpath_u)
 
         d = defer.succeed(None)
 
-        def _maybe_upload(val, now=None):
+        def _maybe_upload(ign, now=None):
+            self._log("_maybe_upload: relpath_u=%r, now=%r" % (relpath_u, now))
             if now is None:
                 now = time.time()
             fp = self._get_filepath(relpath_u)
@@ -361,8 +455,12 @@ class Uploader(QueueMixin):
                     metadata['last_downloaded_uri'] = db_entry.last_downloaded_uri
 
                 empty_uploadable = Data("", self._client.convergence)
-                d2 = self._upload_dirnode.add_file(encoded_path_u, empty_uploadable,
-                                                   metadata=metadata, overwrite=True)
+                d2 = self._upload_dirnode.add_file(
+                    encoded_path_u, empty_uploadable,
+                    metadata=metadata,
+                    overwrite=True,
+                    progress=item.progress,
+                )
 
                 def _add_db_entry(filenode):
                     filecap = filenode.get_uri()
@@ -377,21 +475,28 @@ class Uploader(QueueMixin):
                 self.warn("WARNING: cannot upload symlink %s" % quote_filepath(fp))
                 return None
             elif pathinfo.isdir:
+                print "ISDIR "
                 if not getattr(self._notifier, 'recursive_includes_new_subdirectories', False):
                     self._notifier.watch(fp, mask=self.mask, callbacks=[self._notify], recursive=True)
 
                 uploadable = Data("", self._client.convergence)
                 encoded_path_u += magicpath.path2magic(u"/")
                 self._log("encoded_path_u =  %r" % (encoded_path_u,))
-                upload_d = self._upload_dirnode.add_file(encoded_path_u, uploadable, metadata={"version":0}, overwrite=True)
-                def _succeeded(ign):
+                upload_d = self._upload_dirnode.add_file(
+                    encoded_path_u, uploadable,
+                    metadata={"version": 0},
+                    overwrite=True,
+                    progress=item.progress,
+                )
+                def _dir_succeeded(ign):
                     self._log("created subdirectory %r" % (relpath_u,))
                     self._count('directories_created')
-                def _failed(f):
+                def _dir_failed(f):
                     self._log("failed to create subdirectory %r" % (relpath_u,))
                     return f
-                upload_d.addCallbacks(_succeeded, _failed)
+                upload_d.addCallbacks(_dir_succeeded, _dir_failed)
                 upload_d.addCallback(lambda ign: self._scan(relpath_u))
+                upload_d.addCallback(lambda ign: self._extend_queue_and_keep_going(self._pending))
                 return upload_d
             elif pathinfo.isfile:
                 db_entry = self._db.get_db_entry(relpath_u)
@@ -413,8 +518,12 @@ class Uploader(QueueMixin):
                     metadata['last_downloaded_uri'] = db_entry.last_downloaded_uri
 
                 uploadable = FileName(unicode_from_filepath(fp), self._client.convergence)
-                d2 = self._upload_dirnode.add_file(encoded_path_u, uploadable,
-                                                   metadata=metadata, overwrite=True)
+                d2 = self._upload_dirnode.add_file(
+                    encoded_path_u, uploadable,
+                    metadata=metadata,
+                    overwrite=True,
+                    progress=item.progress,
+                )
 
                 def _add_db_entry(filenode):
                     filecap = filenode.get_uri()
@@ -433,10 +542,12 @@ class Uploader(QueueMixin):
 
         def _succeeded(res):
             self._count('objects_succeeded')
+            item.set_status('success', self._clock.seconds())
             return res
         def _failed(f):
             self._count('objects_failed')
             self._log("%s while processing %r" % (f, relpath_u))
+            item.set_status('failure', self._clock.seconds())
             return f
         d.addCallbacks(_succeeded, _failed)
         return d
@@ -485,8 +596,8 @@ class WriteFileMixin(object):
         # ensure parent directory exists
         head, tail = os.path.split(abspath_u)
 
+        old_mask = os.umask(self._umask)
         try:
-            old_mask = os.umask(self._umask)
             fileutil.make_dirs(head, (~ self._umask) & 0777)
             fileutil.write(replacement_path_u, file_contents)
         finally:
@@ -525,6 +636,16 @@ class WriteFileMixin(object):
         return abspath_u
 
 
+class DownloadItem(QueuedItem):
+    """
+    Represents a single item in the _deque of the Downloader
+    """
+    def __init__(self, relpath_u, progress, filenode, metadata):
+        super(DownloadItem, self).__init__(relpath_u, progress)
+        self.file_node = filenode
+        self.metadata = metadata
+
+
 class Downloader(QueueMixin, WriteFileMixin):
     REMOTE_SCAN_INTERVAL = 3  # facilitates tests
 
@@ -543,10 +664,10 @@ class Downloader(QueueMixin, WriteFileMixin):
         self._upload_readonly_dircap = upload_readonly_dircap
         self._is_upload_pending = is_upload_pending
         self._umask = umask
-        self._turn_delay = self.REMOTE_SCAN_INTERVAL
 
-    def start_scanning(self):
-        self._log("start_scanning")
+    def start_downloading(self):
+        self._log("start_downloading")
+        self._turn_delay = self.REMOTE_SCAN_INTERVAL
         files = self._db.get_all_relpaths()
         self._log("all files %s" % files)
 
@@ -556,6 +677,7 @@ class Downloader(QueueMixin, WriteFileMixin):
         return d
 
     def stop(self):
+        self._log("stop")
         self._stopped = True
         d = defer.succeed(None)
         d.addCallback(lambda ign: self._lazy_tail)
@@ -670,17 +792,24 @@ class Downloader(QueueMixin, WriteFileMixin):
                 file_node, metadata = max(scan_batch[relpath_u], key=lambda x: x[1]['version'])
 
                 if self._should_download(relpath_u, metadata['version']):
-                    self._deque.append( (relpath_u, file_node, metadata) )
+                    to_dl = DownloadItem(
+                        relpath_u,
+                        PercentProgress(file_node.get_size()),
+                        file_node,
+                        metadata,
+                    )
+                    to_dl.set_status('queued', self._clock.seconds())
+                    self._deque.append(to_dl)
                 else:
                     self._log("Excluding %r" % (relpath_u,))
-                    self._call_hook(None, 'processed')
+                    self._call_hook(None, 'processed', async=True)
 
             self._log("deque after = %r" % (self._deque,))
         d.addCallback(_filter_batch_to_deque)
         return d
 
     def _when_queue_is_empty(self):
-        d = task.deferLater(self._clock, self._turn_delay, self._scan_remote_collective)
+        d = task.deferLater(self._clock, self.REMOTE_SCAN_INTERVAL, self._scan_remote_collective)
         d.addBoth(self._logcb, "after _scan_remote_collective 1")
         d.addCallback(lambda ign: self._turn_deque())
         return d
@@ -688,42 +817,49 @@ class Downloader(QueueMixin, WriteFileMixin):
     def _process(self, item, now=None):
         # Downloader
         self._log("_process(%r)" % (item,))
-        if now is None:
-            now = time.time()
-        (relpath_u, file_node, metadata) = item
-        fp = self._get_filepath(relpath_u)
+        if now is None:  # XXX why can we pass in now?
+            now = time.time()  # self._clock.seconds()
+
+        self._log("started! %s" % (now,))
+        item.set_status('started', now)
+        fp = self._get_filepath(item.relpath_u)
         abspath_u = unicode_from_filepath(fp)
         conflict_path_u = self._get_conflicted_filename(abspath_u)
 
         d = defer.succeed(None)
 
         def do_update_db(written_abspath_u):
-            filecap = file_node.get_uri()
-            last_uploaded_uri = metadata.get('last_uploaded_uri', None)
+            filecap = item.file_node.get_uri()
+            last_uploaded_uri = item.metadata.get('last_uploaded_uri', None)
             last_downloaded_uri = filecap
             last_downloaded_timestamp = now
             written_pathinfo = get_pathinfo(written_abspath_u)
 
-            if not written_pathinfo.exists and not metadata.get('deleted', False):
+            if not written_pathinfo.exists and not item.metadata.get('deleted', False):
                 raise Exception("downloaded object %s disappeared" % quote_local_unicode_path(written_abspath_u))
 
-            self._db.did_upload_version(relpath_u, metadata['version'], last_uploaded_uri,
-                                        last_downloaded_uri, last_downloaded_timestamp, written_pathinfo)
+            self._db.did_upload_version(
+                item.relpath_u, item.metadata['version'], last_uploaded_uri,
+                last_downloaded_uri, last_downloaded_timestamp, written_pathinfo,
+            )
             self._count('objects_downloaded')
+            item.set_status('success', self._clock.seconds())
+
         def failed(f):
+            item.set_status('failure', self._clock.seconds())
             self._log("download failed: %s" % (str(f),))
             self._count('objects_failed')
             return f
 
         if os.path.isfile(conflict_path_u):
             def fail(res):
-                raise ConflictError("download failed: already conflicted: %r" % (relpath_u,))
+                raise ConflictError("download failed: already conflicted: %r" % (item.relpath_u,))
             d.addCallback(fail)
         else:
             is_conflict = False
-            db_entry = self._db.get_db_entry(relpath_u)
-            dmd_last_downloaded_uri = metadata.get('last_downloaded_uri', None)
-            dmd_last_uploaded_uri = metadata.get('last_uploaded_uri', None)
+            db_entry = self._db.get_db_entry(item.relpath_u)
+            dmd_last_downloaded_uri = item.metadata.get('last_downloaded_uri', None)
+            dmd_last_uploaded_uri = item.metadata.get('last_uploaded_uri', None)
             if db_entry:
                 if dmd_last_downloaded_uri is not None and db_entry.last_downloaded_uri is not None:
                     if dmd_last_downloaded_uri != db_entry.last_downloaded_uri:
@@ -732,22 +868,22 @@ class Downloader(QueueMixin, WriteFileMixin):
                 elif dmd_last_uploaded_uri is not None and dmd_last_uploaded_uri != db_entry.last_uploaded_uri:
                     is_conflict = True
                     self._count('objects_conflicted')
-                elif self._is_upload_pending(relpath_u):
+                elif self._is_upload_pending(item.relpath_u):
                     is_conflict = True
                     self._count('objects_conflicted')
 
-            if relpath_u.endswith(u"/"):
-                if metadata.get('deleted', False):
+            if item.relpath_u.endswith(u"/"):
+                if item.metadata.get('deleted', False):
                     self._log("rmdir(%r) ignored" % (abspath_u,))
                 else:
                     self._log("mkdir(%r)" % (abspath_u,))
                     d.addCallback(lambda ign: fileutil.make_dirs(abspath_u))
                     d.addCallback(lambda ign: abspath_u)
             else:
-                if metadata.get('deleted', False):
+                if item.metadata.get('deleted', False):
                     d.addCallback(lambda ign: self._rename_deleted_file(abspath_u))
                 else:
-                    d.addCallback(lambda ign: file_node.download_best_version())
+                    d.addCallback(lambda ign: item.file_node.download_best_version(progress=item.progress))
                     d.addCallback(lambda contents: self._write_downloaded_file(abspath_u, contents,
                                                                                is_conflict=is_conflict))