]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blobdiff - src/allmydata/mutable/retrieve.py
Retrieve: merge _validate_active_prefixes into _add_active_peers
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / mutable / retrieve.py
index f6c99d0ff10224fb1b956bc7791aca5fc9bf7d3a..cb4ba352e2b78b54c74b9047c82bdc2952448edd 100644 (file)
@@ -222,11 +222,38 @@ class Retrieve:
             self._consumer.registerProducer(self, streaming=True)
 
         self._done_deferred = defer.Deferred()
+        self._offset = offset
+        self._read_length = size
+        self._setup_download()
+        self._setup_encoding_parameters()
+        self.log("starting download")
+        self._started_fetching = time.time()
+        d = self._add_active_peers()
+        # ...
+        # The download process beyond this is a state machine.
+        # _add_active_peers will select the peers that we want to use
+        # for the download, and then attempt to start downloading. After
+        # each segment, it will check for doneness, reacting to broken
+        # peers and corrupt shares as necessary. If it runs out of good
+        # peers before downloading all of the segments, _done_deferred
+        # will errback.  Otherwise, it will eventually callback with the
+        # contents of the mutable file.
+        return self._done_deferred
+
+    def _setup_download(self):
         self._started = time.time()
         self._status.set_status("Retrieving Shares")
 
-        self._offset = offset
-        self._read_length = size
+        # how many shares do we need?
+        (seqnum,
+         root_hash,
+         IV,
+         segsize,
+         datalength,
+         k,
+         N,
+         prefix,
+         offsets_tuple) = self.verinfo
 
         # first, which servers can we use?
         versionmap = self.servermap.make_versionmap()
@@ -248,7 +275,7 @@ class Retrieve:
                                        any_cache)
             reader.peerid = peerid
             self.readers[shnum] = reader
-
+        assert len(self.remaining_sharemap) >= k
 
         self.shares = {} # maps shnum to validated blocks
         self._active_readers = [] # list of active readers for this dl.
@@ -256,46 +283,12 @@ class Retrieve:
                                         # validated the prefix of
         self._block_hash_trees = {} # shnum => hashtree
 
-        # how many shares do we need?
-        (seqnum,
-         root_hash,
-         IV,
-         segsize,
-         datalength,
-         k,
-         N,
-         prefix,
-         offsets_tuple) = self.verinfo
-
-
         # We need one share hash tree for the entire file; its leaves
         # are the roots of the block hash trees for the shares that
         # comprise it, and its root is in the verinfo.
         self.share_hash_tree = hashtree.IncompleteHashTree(N)
         self.share_hash_tree.set_hashes({0: root_hash})
 
-        # This will set up both the segment decoder and the tail segment
-        # decoder, as well as a variety of other instance variables that
-        # the download process will use.
-        self._setup_encoding_parameters()
-        assert len(self.remaining_sharemap) >= k
-
-        self.log("starting download")
-        self._started_fetching = time.time()
-
-        self._add_active_peers()
-
-        # The download process beyond this is a state machine.
-        # _add_active_peers will select the peers that we want to use
-        # for the download, and then attempt to start downloading. After
-        # each segment, it will check for doneness, reacting to broken
-        # peers and corrupt shares as necessary. If it runs out of good
-        # peers before downloading all of the segments, _done_deferred
-        # will errback.  Otherwise, it will eventually callback with the
-        # contents of the mutable file.
-        return self._done_deferred
-
-
     def decode(self, blocks_and_salts, segnum):
         """
         I am a helper method that the mutable file update process uses
@@ -322,7 +315,7 @@ class Retrieve:
     def _setup_encoding_parameters(self):
         """
         I set up the encoding parameters, including k, n, the number
-        of segments associated with this file, and the segment decoder.
+        of segments associated with this file, and the segment decoders.
         """
         (seqnum,
          root_hash,
@@ -481,95 +474,20 @@ class Retrieve:
             self._active_readers.append(self.readers[shnum])
             self.log("added reader for share %d" % shnum)
         assert len(self._active_readers) >= self._required_shares
-        # Conceptually, this is part of the _add_active_peers step. It
-        # validates the prefixes of newly added readers to make sure
-        # that they match what we are expecting for self.verinfo. If
-        # validation is successful, _validate_active_prefixes will call
-        # _download_current_segment for us. If validation is
-        # unsuccessful, then _validate_prefixes will remove the peer and
-        # call _add_active_peers again, where we will attempt to rectify
-        # the problem by choosing another peer.
-        return self._validate_active_prefixes()
-
-
-    def _validate_active_prefixes(self):
-        """
-        I check to make sure that the prefixes on the peers that I am
-        currently reading from match the prefix that we want to see, as
-        said in self.verinfo.
-
-        If I find that all of the active peers have acceptable prefixes,
-        I pass control to _download_current_segment, which will use
-        those peers to do cool things. If I find that some of the active
-        peers have unacceptable prefixes, I will remove them from active
-        peers (and from further consideration) and call
-        _add_active_peers to attempt to rectify the situation. I keep
-        track of which peers I have already validated so that I don't
-        need to do so again.
-        """
-        assert self._active_readers, "No more active readers"
-
-        ds = []
         new_readers = set(self._active_readers) - self._validated_readers
-        self.log('validating %d newly-added active readers' % len(new_readers))
 
         for reader in new_readers:
-            # We force a remote read here -- otherwise, we are relying
-            # on cached data that we already verified as valid, and we
-            # won't detect an uncoordinated write that has occurred
-            # since the last servermap update.
-            d = reader.get_prefix(force_remote=True)
-            d.addCallback(self._try_to_validate_prefix, reader)
-            ds.append(d)
-        dl = defer.DeferredList(ds, consumeErrors=True)
-        def _check_results(results):
-            # Each result in results will be of the form (success, msg).
-            # We don't care about msg, but success will tell us whether
-            # or not the checkstring validated. If it didn't, we need to
-            # remove the offending (peer,share) from our active readers,
-            # and ensure that active readers is again populated.
-            bad_readers = []
-            for i, result in enumerate(results):
-                if not result[0]:
-                    reader = self._active_readers[i]
-                    f = result[1]
-                    assert isinstance(f, failure.Failure)
-
-                    self.log("The reader %s failed to "
-                             "properly validate: %s" % \
-                             (reader, str(f.value)))
-                    bad_readers.append((reader, f))
-                else:
-                    reader = self._active_readers[i]
-                    self.log("the reader %s checks out, so we'll use it" % \
-                             reader)
-                    self._validated_readers.add(reader)
-                    # Each time we validate a reader, we check to see if
-                    # we need the private key. If we do, we politely ask
-                    # for it and then continue computing. If we find
-                    # that we haven't gotten it at the end of
-                    # segment decoding, then we'll take more drastic
-                    # measures.
-                    if self._need_privkey and not self._node.is_readonly():
-                        d = reader.get_encprivkey()
-                        d.addCallback(self._try_to_validate_privkey, reader)
-            if bad_readers:
-                # We do them all at once, or else we screw up list indexing.
-                for (reader, f) in bad_readers:
-                    self._mark_bad_share(reader, f)
-                if self._verify:
-                    if len(self._active_readers) >= self._required_shares:
-                        return self._download_current_segment()
-                    else:
-                        return self._failed()
-                else:
-                    return self._add_active_peers()
-            else:
-                return self._download_current_segment()
-            # The next step will assert that it has enough active
-            # readers to fetch shares; we just need to remove it.
-        dl.addCallback(_check_results)
-        return dl
+            self._validated_readers.add(reader)
+            # Each time we validate a reader, we check to see if we need the
+            # private key. If we do, we politely ask for it and then continue
+            # computing. If we find that we haven't gotten it at the end of
+            # segment decoding, then we'll take more drastic measures.
+            if self._need_privkey and not self._node.is_readonly():
+                d = reader.get_encprivkey()
+                d.addCallback(self._try_to_validate_privkey, reader)
+                # XXX: don't just drop the Deferred. We need error-reporting
+                # but not flow-control here.
+        return self._download_current_segment()
 
 
     def _try_to_validate_prefix(self, prefix, reader):
@@ -707,13 +625,12 @@ class Retrieve:
         ds = []
         for reader in self._active_readers:
             started = time.time()
-            d = reader.get_block_and_salt(segnum, queue=True)
+            d = reader.get_block_and_salt(segnum)
             d2 = self._get_needed_hashes(reader, segnum)
             dl = defer.DeferredList([d, d2], consumeErrors=True)
             dl.addCallback(self._validate_block, segnum, reader, started)
             dl.addErrback(self._validation_or_decoding_failed, [reader])
             ds.append(dl)
-            reader.flush()
         dl = defer.DeferredList(ds)
         if self._verify:
             dl.addCallback(lambda ignored: "")
@@ -917,12 +834,12 @@ class Retrieve:
         #needed.discard(0)
         self.log("getting blockhashes for segment %d, share %d: %s" % \
                  (segnum, reader.shnum, str(needed)))
-        d1 = reader.get_blockhashes(needed, queue=True, force_remote=True)
+        d1 = reader.get_blockhashes(needed, force_remote=True)
         if self.share_hash_tree.needed_hashes(reader.shnum):
             need = self.share_hash_tree.needed_hashes(reader.shnum)
             self.log("also need sharehashes for share %d: %s" % (reader.shnum,
                                                                  str(need)))
-            d2 = reader.get_sharehashes(need, queue=True, force_remote=True)
+            d2 = reader.get_sharehashes(need, force_remote=True)
         else:
             d2 = defer.succeed({}) # the logic in the next method
                                    # expects a dict