From: Daira Hopwood Date: Thu, 5 Sep 2013 16:38:50 +0000 (+0100) Subject: Sat Jun 16 19:24:03 BST 2012 david-sarah@jacaranda.org X-Git-Url: https://git.rkrishnan.org/components/%22news.html/reliability?a=commitdiff_plain;h=33a042fd750e27bc86e278967d1c02ee1f5ca61e;p=tahoe-lafs%2Ftahoe-lafs.git Sat Jun 16 19:24:03 BST 2012 david-sarah@jacaranda.org * [rebased for 1.9.2] After a server disconnects, make the IServer retain the dead RemoteReference, and continue to return it to anyone who calls get_rref(). This removes the need for callers to guard against receiving a None (as long as the server was connected at least once, which is always the case for servers returned by get_servers_for_psi(), which is how all upload/download code gets servers). Includes test, which is now the same as on trunk. fixes #1636 for 1.9.2. --- diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index facfc922..87679e6c 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -435,7 +435,12 @@ class IServer(Interface): def get_nickname(): pass def get_rref(): - pass + """Once a server is connected, I return a RemoteReference. + Before a server is connected for the first time, I return None. + + Note that the rref I return will start producing DeadReferenceErrors + once the connection is lost. + """ class IMutableSlotWriter(Interface): diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py index 425d2e48..8a78100c 100644 --- a/src/allmydata/storage_client.py +++ b/src/allmydata/storage_client.py @@ -77,6 +77,7 @@ class StorageFarmBroker: def test_add_rref(self, serverid, rref): s = NativeStorageServer(serverid, {}) s.rref = rref + s._is_connected = True self.servers[serverid] = s def test_add_server(self, serverid, s): @@ -127,7 +128,7 @@ class StorageFarmBroker: return frozenset(self.servers.keys()) def get_connected_servers(self): - return frozenset([s for s in self.servers.values() if s.get_rref()]) + return frozenset([s for s in self.servers.values() if s.is_connected()]) def get_known_servers(self): return frozenset(self.servers.values()) @@ -177,6 +178,7 @@ class NativeStorageServer: self.last_loss_time = None self.remote_host = None self.rref = None + self._is_connected = False self._reconnector = None self._trigger_cb = None @@ -205,6 +207,8 @@ class NativeStorageServer: return self.announcement def get_remote_host(self): return self.remote_host + def is_connected(self): + return self._is_connected def get_last_connect_time(self): return self.last_connect_time def get_last_loss_time(self): @@ -238,6 +242,7 @@ class NativeStorageServer: self.last_connect_time = time.time() self.remote_host = rref.getPeer() self.rref = rref + self._is_connected = True rref.notifyOnDisconnect(self._lost) def get_rref(self): @@ -247,7 +252,12 @@ class NativeStorageServer: log.msg(format="lost connection to %(name)s", name=self.get_name(), facility="tahoe.storage_broker", umid="zbRllw") self.last_loss_time = time.time() - self.rref = None + # self.rref is now stale: all callRemote()s will get a + # DeadReferenceError. We leave the stale reference in place so that + # uploader/downloader code (which received this IServer through + # get_connected_servers() or get_servers_for_psi()) can continue to + # use s.get_rref().callRemote() and not worry about it being None. + self._is_connected = False self.remote_host = None def stop_connecting(self): diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index 40a28989..8c053ee1 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -14,6 +14,7 @@ from allmydata.immutable.literal import LiteralFileNode from allmydata.immutable.filenode import ImmutableFileNode from allmydata.util import idlib, mathutil from allmydata.util import log, base32 +from allmydata.util.verlib import NormalizedVersion from allmydata.util.encodingutil import quote_output, unicode_to_argv, get_filesystem_encoding from allmydata.util.fileutil import abspath_expanduser_unicode from allmydata.util.consumer import MemoryConsumer, download_to_data @@ -24,6 +25,8 @@ from allmydata.monitor import Monitor from allmydata.mutable.common import NotWriteableError from allmydata.mutable import layout as mutable_layout from allmydata.mutable.publish import MutableData + +import foolscap from foolscap.api import DeadReferenceError, fireEventually from twisted.python.failure import Failure from twisted.web.client import getPage @@ -1868,3 +1871,39 @@ class SystemTest(SystemTestMixin, RunBinTahoeMixin, unittest.TestCase): return d d.addCallback(_got_lit_filenode) return d + + +class Connections(SystemTestMixin, unittest.TestCase): + def test_rref(self): + if NormalizedVersion(foolscap.__version__) < NormalizedVersion('0.6.4'): + raise unittest.SkipTest("skipped due to http://foolscap.lothar.com/trac/ticket/196 " + "(which does not affect normal usage of Tahoe-LAFS)") + + self.basedir = "system/Connections/rref" + d = self.set_up_nodes(2) + def _start(ign): + self.c0 = self.clients[0] + nonclients = [s for s in self.c0.storage_broker.get_connected_servers() + if s.get_serverid() != self.c0.nodeid] + self.failUnlessEqual(len(nonclients), 1) + + self.s1 = nonclients[0] # s1 is the server, not c0 + self.s1_rref = self.s1.get_rref() + self.failIfEqual(self.s1_rref, None) + self.failUnless(self.s1.is_connected()) + d.addCallback(_start) + + # now shut down the server + d.addCallback(lambda ign: self.clients[1].disownServiceParent()) + # and wait for the client to notice + def _poll(): + return len(self.c0.storage_broker.get_connected_servers()) < 2 + d.addCallback(lambda ign: self.poll(_poll)) + + def _down(ign): + self.failIf(self.s1.is_connected()) + rref = self.s1.get_rref() + self.failUnless(rref) + self.failUnlessIdentical(rref, self.s1_rref) + d.addCallback(_down) + return d