]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blobdiff - src/allmydata/frontends/magic_folder.py
Add basic bob upload test and fix conflict detect
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / frontends / magic_folder.py
index 91b92f5d2ba8a9e80715707e6ef0edd4662b2b8f..82780a02f658d462157b2ce550340ec5f9b9b239 100644 (file)
@@ -1,6 +1,7 @@
 
 import sys, os
 import os.path
+import shutil
 from collections import deque
 import time
 
@@ -12,7 +13,7 @@ from twisted.application import service
 from allmydata.util import fileutil
 from allmydata.interfaces import IDirectoryNode
 from allmydata.util import log
-from allmydata.util.fileutil import precondition_abspath, get_pathinfo
+from allmydata.util.fileutil import precondition_abspath, get_pathinfo, ConflictError
 from allmydata.util.assertutil import precondition
 from allmydata.util.deferredutil import HookMixin
 from allmydata.util.encodingutil import listdir_filepath, to_filepath, \
@@ -133,6 +134,10 @@ class QueueMixin(HookMixin):
         self._log("%s += %r" % (counter_name, delta))
         self._client.stats_provider.count(ctr, delta)
 
+    def _logcb(self, res, msg):
+        self._log("%s: %r" % (msg, res))
+        return res
+
     def _log(self, msg):
         s = "Magic Folder %s %s: %s" % (quote_output(self._client.nickname), self._name, msg)
         self._client.log(s)
@@ -150,10 +155,13 @@ class QueueMixin(HookMixin):
             self._clock.callLater(0, self._turn_deque)
 
     def _turn_deque(self):
+        self._log("_turn_deque")
         if self._stopped:
+            self._log("stopped")
             return
         try:
             item = self._deque.pop()
+            self._log("popped %r" % (item,))
             self._count('objects_queued', -1)
         except IndexError:
             self._log("deque is now empty")
@@ -184,12 +192,10 @@ class Uploader(QueueMixin):
         if hasattr(self._notifier, 'set_pending_delay'):
             self._notifier.set_pending_delay(pending_delay)
 
-        # We don't watch for IN_CREATE, because that would cause us to read and upload a
-        # possibly-incomplete file before the application has closed it. There should always
-        # be an IN_CLOSE_WRITE after an IN_CREATE (I think).
         # TODO: what about IN_MOVE_SELF, IN_MOVED_FROM, or IN_UNMOUNT?
         #
-        self.mask = ( self._inotify.IN_CLOSE_WRITE
+        self.mask = ( self._inotify.IN_CREATE
+                    | self._inotify.IN_CLOSE_WRITE
                     | self._inotify.IN_MOVED_TO
                     | self._inotify.IN_MOVED_FROM
                     | self._inotify.IN_DELETE
@@ -266,6 +272,19 @@ class Uploader(QueueMixin):
 
     def _notify(self, opaque, path, events_mask):
         self._log("inotify event %r, %r, %r\n" % (opaque, path, ', '.join(self._inotify.humanReadableMask(events_mask))))
+
+        # We filter out IN_CREATE events not associated with a directory.
+        # Acting on IN_CREATE for files could cause us to read and upload
+        # a possibly-incomplete file before the application has closed it.
+        # There should always be an IN_CLOSE_WRITE after an IN_CREATE, I think.
+        # It isn't possible to avoid watching for IN_CREATE at all, because
+        # it is the only event notified for a directory creation.
+
+        if ((events_mask & self._inotify.IN_CREATE) != 0 and
+            (events_mask & self._inotify.IN_ISDIR) == 0):
+            self._log("ignoring inotify event for creation of file %r\n" % (path,))
+            return
+
         relpath_u = self._get_relpath(path)
         self._append_to_deque(relpath_u)
 
@@ -291,33 +310,41 @@ class Uploader(QueueMixin):
             encoded_path_u = magicpath.path2magic(relpath_u)
 
             if not pathinfo.exists:
+                # FIXME merge this with the 'isfile' case.
                 self._log("notified object %s disappeared (this is normal)" % quote_filepath(fp))
                 self._count('objects_disappeared')
-                d2 = defer.succeed(None)
-                if self._db.check_file_db_exists(relpath_u):
-                    last_downloaded_timestamp = now
-                    d2.addCallback(lambda ign: self._get_metadata(encoded_path_u))
-                    current_version = self._db.get_local_file_version(relpath_u) + 1
-                    new_metadata = {}
-                    def set_deleted(metadata):
-                        last_downloaded_uri = metadata.get('last_downloaded_uri', None)
-                        new_metadata['last_downloaded_uri'] = last_downloaded_uri # XXX this has got to be wrong
-                        new_metadata['version'] = current_version
-                        new_metadata['deleted'] = True
-                        empty_uploadable = Data("", self._client.convergence)
-                        return self._upload_dirnode.add_file(encoded_path_u, empty_uploadable, overwrite=True, metadata=metadata)
-                    d2.addCallback(set_deleted)
-                    def add_db_entry(filenode):
-                        filecap = filenode.get_uri()
-
-                        self._db.did_upload_version(relpath_u, current_version, filecap, last_downloaded_uri, last_downloaded_timestamp, pathinfo)
-                        self._count('files_uploaded')
-
-                    # FIXME consider whether it's correct to retrieve the filenode again.
-                    d2.addCallback(lambda x: self._get_filenode(encoded_path_u))
-                    d2.addCallback(add_db_entry)
-
-                d2.addCallback(lambda x: Exception("file does not exist"))  # FIXME wrong
+                if not self._db.check_file_db_exists(relpath_u):
+                    return None
+
+                last_downloaded_timestamp = now
+                last_downloaded_uri = self._db.get_last_downloaded_uri(relpath_u)
+
+                current_version = self._db.get_local_file_version(relpath_u)
+                if current_version is None:
+                    new_version = 0
+                elif self._db.is_new_file(pathinfo, relpath_u):
+                    new_version = current_version + 1
+                else:
+                    self._log("Not uploading %r" % (relpath_u,))
+                    self._count('objects_not_uploaded')
+                    return
+
+                metadata = { 'version': new_version,
+                             'deleted': True,
+                             'last_downloaded_timestamp': last_downloaded_timestamp }
+                if last_downloaded_uri is not None:
+                    metadata['last_downloaded_uri'] = last_downloaded_uri
+
+                empty_uploadable = Data("", self._client.convergence)
+                d2 = self._upload_dirnode.add_file(encoded_path_u, empty_uploadable,
+                                                   metadata=metadata, overwrite=True)
+
+                def _add_db_entry(filenode):
+                    filecap = filenode.get_uri()
+                    self._db.did_upload_version(relpath_u, new_version, filecap,
+                                                last_downloaded_uri, last_downloaded_timestamp, pathinfo)
+                    self._count('files_uploaded')
+                d2.addCallback(_add_db_entry)
                 return d2
             elif pathinfo.islink:
                 self.warn("WARNING: cannot upload symlink %s" % quote_filepath(fp))
@@ -337,28 +364,35 @@ class Uploader(QueueMixin):
                 upload_d.addCallback(lambda ign: self._scan(relpath_u))
                 return upload_d
             elif pathinfo.isfile:
-                version = self._db.get_local_file_version(relpath_u)
                 last_downloaded_uri = self._db.get_last_downloaded_uri(relpath_u)
-                if version is None:
-                    version = 0
+                last_downloaded_timestamp = now
+
+                current_version = self._db.get_local_file_version(relpath_u)
+                if current_version is None:
+                    new_version = 0
                 elif self._db.is_new_file(pathinfo, relpath_u):
-                    version += 1
+                    new_version = current_version + 1
                 else:
+                    self._log("Not uploading %r" % (relpath_u,))
+                    self._count('objects_not_uploaded')
                     return None
 
-                uploadable = FileName(unicode_from_filepath(fp), self._client.convergence)
-                metadata = { "version":version }
+                metadata = { 'version': new_version,
+                             'last_downloaded_timestamp': last_downloaded_timestamp }
                 if last_downloaded_uri is not None:
-                    metadata["last_downloaded_uri"] = last_downloaded_uri
-                    metadata["last_downloaded_timestamp"] = now
-                d2 = self._upload_dirnode.add_file(encoded_path_u, uploadable, metadata=metadata, overwrite=True)
-                def add_db_entry(filenode):
+                    metadata['last_downloaded_uri'] = last_downloaded_uri
+
+                uploadable = FileName(unicode_from_filepath(fp), self._client.convergence)
+                d2 = self._upload_dirnode.add_file(encoded_path_u, uploadable,
+                                                   metadata=metadata, overwrite=True)
+
+                def _add_db_entry(filenode):
                     filecap = filenode.get_uri()
                     last_downloaded_uri = metadata.get('last_downloaded_uri', None)
-                    last_downloaded_timestamp = now
-                    self._db.did_upload_version(relpath_u, version, filecap, last_downloaded_uri, last_downloaded_timestamp, pathinfo)
+                    self._db.did_upload_version(relpath_u, new_version, filecap,
+                                                last_downloaded_uri, last_downloaded_timestamp, pathinfo)
                     self._count('files_uploaded')
-                d2.addCallback(add_db_entry)
+                d2.addCallback(_add_db_entry)
                 return d2
             else:
                 self.warn("WARNING: cannot process special file %s" % quote_filepath(fp))
@@ -391,7 +425,64 @@ class Uploader(QueueMixin):
         return d
 
 
-class Downloader(QueueMixin):
+class WriteFileMixin(object):
+    FUDGE_SECONDS = 10.0
+
+    def _get_conflicted_filename(self, abspath_u):
+        return abspath_u + u".conflict"
+
+    def _write_downloaded_file(self, abspath_u, file_contents, is_conflict=False, now=None):
+        self._log("_write_downloaded_file(%r, <%d bytes>, is_conflict=%r, now=%r)"
+                  % (abspath_u, len(file_contents), is_conflict, now))
+
+        # 1. Write a temporary file, say .foo.tmp.
+        # 2. is_conflict determines whether this is an overwrite or a conflict.
+        # 3. Set the mtime of the replacement file to be T seconds before the
+        #    current local time.
+        # 4. Perform a file replacement with backup filename foo.backup,
+        #    replaced file foo, and replacement file .foo.tmp. If any step of
+        #    this operation fails, reclassify as a conflict and stop.
+        #
+        # Returns the path of the destination file.
+
+        precondition_abspath(abspath_u)
+        replacement_path_u = abspath_u + u".tmp"  # FIXME more unique
+        backup_path_u = abspath_u + u".backup"
+        if now is None:
+            now = time.time()
+
+        # ensure parent directory exists
+        head, tail = os.path.split(abspath_u)
+        mode = 0777 # XXX
+        fileutil.make_dirs(head, mode)
+
+        fileutil.write(replacement_path_u, file_contents)
+        os.utime(replacement_path_u, (now, now - self.FUDGE_SECONDS))
+        if is_conflict:
+            print "0x00 ------------ <><> is conflict; calling _rename_conflicted_file... %r %r" % (abspath_u, replacement_path_u)
+            return self._rename_conflicted_file(abspath_u, replacement_path_u)
+        else:
+            try:
+                fileutil.replace_file(abspath_u, replacement_path_u, backup_path_u)
+                return abspath_u
+            except fileutil.ConflictError:
+                return self._rename_conflicted_file(abspath_u, replacement_path_u)
+
+    def _rename_conflicted_file(self, abspath_u, replacement_path_u):
+        self._log("_rename_conflicted_file(%r, %r)" % (abspath_u, replacement_path_u))
+
+        conflict_path_u = self._get_conflicted_filename(abspath_u)
+        print "XXX rename %r %r" % (replacement_path_u, conflict_path_u)
+        if os.path.isfile(replacement_path_u):
+            print "%r exists" % (replacement_path_u,)
+        if os.path.isfile(conflict_path_u):
+            print "%r exists" % (conflict_path_u,)
+
+        fileutil.rename_no_overwrite(replacement_path_u, conflict_path_u)
+        return conflict_path_u
+
+
+class Downloader(QueueMixin, WriteFileMixin):
     REMOTE_SCAN_INTERVAL = 3  # facilitates tests
 
     def __init__(self, client, local_path_u, db, collective_dircap, clock):
@@ -429,9 +520,13 @@ class Downloader(QueueMixin):
         We check the remote metadata version against our magic-folder db version number;
         latest version wins.
         """
+        self._log("_should_download(%r, %r)" % (relpath_u, remote_version))
         if magicpath.should_ignore_file(relpath_u):
+            self._log("nope")
             return False
+        self._log("yep")
         v = self._db.get_local_file_version(relpath_u)
+        self._log("v = %r" % (v,))
         return (v is None or v < remote_version)
 
     def _get_local_latest(self, relpath_u):
@@ -485,15 +580,19 @@ class Downloader(QueueMixin):
         self._log("_scan_remote nickname %r" % (nickname,))
         d = dirnode.list()
         def scan_listing(listing_map):
-            for name in listing_map.keys():
-                file_node, metadata = listing_map[name]
-                local_version = self._get_local_latest(name)
+            for encoded_relpath_u in listing_map.keys():
+                relpath_u = magicpath.magic2path(encoded_relpath_u)
+                self._log("found %r" % (relpath_u,))
+
+                file_node, metadata = listing_map[encoded_relpath_u]
+                local_version = self._get_local_latest(relpath_u)
                 remote_version = metadata.get('version', None)
-                self._log("%r has local version %r, remote version %r" % (name, local_version, remote_version))
+                self._log("%r has local version %r, remote version %r" % (relpath_u, local_version, remote_version))
                 if local_version is None or remote_version is None or local_version < remote_version:
-                    self._log("added to download queue\n")
-                    self._append_to_batch(name, file_node, metadata)
+                    self._log("%r added to download queue" % (relpath_u,))
+                    self._append_to_batch(relpath_u, file_node, metadata)
         d.addCallback(scan_listing)
+        d.addBoth(self._logcb, "end of _scan_remote")
         return d
 
     def _scan_remote_collective(self):
@@ -530,6 +629,7 @@ class Downloader(QueueMixin):
         self._log("pending after = %r" % (self._pending,))
 
     def _filter_scan_batch(self, result):
+        self._log("_filter_scan_batch")
         extension = [] # consider whether this should be a dict
         for relpath_u in self._download_scan_batch.keys():
             if relpath_u in self._pending:
@@ -537,10 +637,15 @@ class Downloader(QueueMixin):
             file_node, metadata = max(self._download_scan_batch[relpath_u], key=lambda x: x[1]['version'])
             if self._should_download(relpath_u, metadata['version']):
                 extension += [(relpath_u, file_node, metadata)]
+            else:
+                self._log("Excluding %r" % (relpath_u,))
+                self._count('objects_excluded')
+                self._call_hook(None, 'processed')
         return extension
 
     def _when_queue_is_empty(self):
         d = task.deferLater(self._clock, self._turn_delay, self._scan_remote_collective)
+        d.addBoth(self._logcb, "after _scan_remote_collective")
         d.addCallback(lambda ign: self._turn_deque())
         return d
 
@@ -549,76 +654,76 @@ class Downloader(QueueMixin):
         if now is None:
             now = time.time()
         (relpath_u, file_node, metadata) = item
-        d = file_node.download_best_version()
-        def succeeded(res):
-            fp = self._get_filepath(relpath_u)
-            abspath_u = unicode_from_filepath(fp)
-            d2 = defer.succeed(res)
-            d2.addCallback(lambda result: self._write_downloaded_file(abspath_u, result, is_conflict=False))
-            def do_update_db(written_abspath_u):
-                filecap = file_node.get_uri()
-                last_uploaded_uri = metadata.get('last_uploaded_uri', None)
-                last_downloaded_uri = filecap
-                last_downloaded_timestamp = now
-                written_pathinfo = get_pathinfo(written_abspath_u)
-                if not written_pathinfo.exists:
-                    raise Exception("downloaded file %s disappeared" % quote_local_unicode_path(written_abspath_u))
-
-                self._db.did_upload_version(relpath_u, metadata['version'], last_uploaded_uri,
-                                            last_downloaded_uri, last_downloaded_timestamp, written_pathinfo)
-            d2.addCallback(do_update_db)
-            # XXX handle failure here with addErrback...
+        fp = self._get_filepath(relpath_u)
+        abspath_u = unicode_from_filepath(fp)
+        conflict_path_u = self._get_conflicted_filename(abspath_u)
+        d = defer.succeed(None)
+
+        def do_update_db(written_abspath_u):
+            filecap = file_node.get_uri()
+            last_uploaded_uri = metadata.get('last_uploaded_uri', None)
+            last_downloaded_uri = filecap
+            last_downloaded_timestamp = now
+            written_pathinfo = get_pathinfo(written_abspath_u)
+
+            if not written_pathinfo.exists and not metadata.get('deleted', False):
+                raise Exception("downloaded object %s disappeared" % quote_local_unicode_path(written_abspath_u))
+
+            self._db.did_upload_version(relpath_u, metadata['version'], last_uploaded_uri,
+                                        last_downloaded_uri, last_downloaded_timestamp, written_pathinfo)
             self._count('objects_downloaded')
-            return d2
         def failed(f):
             self._log("download failed: %s" % (str(f),))
-            self._count('objects_download_failed')
+            self._count('objects_failed')
             return f
-        d.addCallbacks(succeeded, failed)
+
+        if os.path.isfile(conflict_path_u):
+            def fail(res):
+                raise ConflictError("download failed: already conflicted: %r" % (relpath_u,))
+            d.addCallback(fail)
+        else:
+            is_conflict = False
+            if self._db.check_file_db_exists(relpath_u):
+                dmd_last_downloaded_uri = metadata.get('last_downloaded_uri', None)
+                local_last_downloaded_uri = self._db.get_last_downloaded_uri(relpath_u)
+                print "metadata %r" % (metadata,)
+                print "<<<<--- if %r != %r" % (dmd_last_downloaded_uri, local_last_downloaded_uri)
+                if dmd_last_downloaded_uri is not None and local_last_downloaded_uri is not None:
+                    if dmd_last_downloaded_uri != local_last_downloaded_uri:
+                        is_conflict = True
+                        self._count('objects_conflicted')
+
+                #dmd_last_uploaded_uri = metadata.get('last_uploaded_uri', None)
+                #local_last_uploaded_uri = ...
+
+            if relpath_u.endswith(u"/"):
+                self._log("mkdir(%r)" % (abspath_u,))
+                d.addCallback(lambda ign: fileutil.make_dirs(abspath_u))
+                d.addCallback(lambda ign: abspath_u)
+            else:
+                d.addCallback(lambda ign: file_node.download_best_version())
+                if metadata.get('deleted', False):
+                    d.addCallback(lambda result: self._unlink_deleted_file(abspath_u, result))
+                else:
+                    d.addCallback(lambda contents: self._write_downloaded_file(abspath_u, contents,
+                                                                               is_conflict=is_conflict))
+
+        d.addCallbacks(do_update_db, failed)
+
         def remove_from_pending(res):
             self._pending.remove(relpath_u)
             return res
         d.addBoth(remove_from_pending)
+        def trap_conflicts(f):
+            f.trap(ConflictError)
+            return None
+        d.addErrback(trap_conflicts)
         return d
 
-    FUDGE_SECONDS = 10.0
-
-    @classmethod
-    def _write_downloaded_file(cls, abspath_u, file_contents, is_conflict=False, now=None):
-        # 1. Write a temporary file, say .foo.tmp.
-        # 2. is_conflict determines whether this is an overwrite or a conflict.
-        # 3. Set the mtime of the replacement file to be T seconds before the
-        #    current local time.
-        # 4. Perform a file replacement with backup filename foo.backup,
-        #    replaced file foo, and replacement file .foo.tmp. If any step of
-        #    this operation fails, reclassify as a conflict and stop.
-        #
-        # Returns the path of the destination file.
-
-        precondition_abspath(abspath_u)
-        replacement_path_u = abspath_u + u".tmp"  # FIXME more unique
-        backup_path_u = abspath_u + u".backup"
-        if now is None:
-            now = time.time()
-
-        # ensure parent directory exists
-        head, tail = os.path.split(abspath_u)
-        mode = 0777 # XXX
-        fileutil.make_dirs(head, mode)
-
-        fileutil.write(replacement_path_u, file_contents)
-        os.utime(replacement_path_u, (now, now - cls.FUDGE_SECONDS))
-        if is_conflict:
-            return cls._rename_conflicted_file(abspath_u, replacement_path_u)
-        else:
-            try:
-                fileutil.replace_file(abspath_u, replacement_path_u, backup_path_u)
-                return abspath_u
-            except fileutil.ConflictError:
-                return cls._rename_conflicted_file(abspath_u, replacement_path_u)
-
-    @classmethod
-    def _rename_conflicted_file(self, abspath_u, replacement_path_u):
-        conflict_path_u = abspath_u + u".conflict"
-        fileutil.rename_no_overwrite(replacement_path_u, conflict_path_u)
-        return conflict_path_u
+    def _unlink_deleted_file(self, abspath_u, result):
+        try:
+            self._log('unlinking: %s' % (abspath_u,))
+            shutil.move(abspath_u, abspath_u + '.backup')
+        except IOError:
+            self._log("Already gone: '%s'" % (abspath_u,))
+        return abspath_u