From: Brian Warner Date: Mon, 1 Jun 2009 21:04:07 +0000 (-0700) Subject: mutable: catch and display first error, so code bugs which break all servers get... X-Git-Tag: trac-3900~8 X-Git-Url: https://git.rkrishnan.org/%5B/%5D%20/uri/flags/reliability?a=commitdiff_plain;h=d29281c9c57f1b8bcb91450d45375d7bbf5505d0;p=tahoe-lafs%2Ftahoe-lafs.git mutable: catch and display first error, so code bugs which break all servers get displayed better --- diff --git a/src/allmydata/mutable/common.py b/src/allmydata/mutable/common.py index 6504909b..869b27d3 100644 --- a/src/allmydata/mutable/common.py +++ b/src/allmydata/mutable/common.py @@ -33,7 +33,13 @@ class UnrecoverableFileError(Exception): class NotEnoughServersError(Exception): """There were not enough functioning servers available to place shares - upon.""" + upon. This might result from all servers being full or having an error, a + local bug which causes all server requests to fail in the same way, or + from there being zero servers. The first error received (if any) is + stored in my .first_error attribute.""" + def __init__(self, why, first_error=None): + Exception.__init__(self, why, first_error) + self.first_error = first_error class CorruptShareError(Exception): def __init__(self, peerid, shnum, reason): diff --git a/src/allmydata/mutable/publish.py b/src/allmydata/mutable/publish.py index 34fad3b8..e446eeae 100644 --- a/src/allmydata/mutable/publish.py +++ b/src/allmydata/mutable/publish.py @@ -108,6 +108,7 @@ class Publish: num = self._node._client.log("Publish(%s): starting" % prefix) self._log_number = num self._running = True + self._first_write_error = None self._status = PublishStatus() self._status.set_storage_index(self._storage_index) @@ -382,7 +383,10 @@ class Publish: peerlist.sort() if not peerlist: - raise NotEnoughServersError("Ran out of non-bad servers") + raise NotEnoughServersError("Ran out of non-bad servers, " + "first_error=%s" % + str(self._first_write_error), + self._first_write_error) new_assignments = [] # we then index this peerlist with an integer, because we may have to @@ -801,6 +805,8 @@ class Publish: for shnum in shnums: self.outstanding.discard( (peerid, shnum) ) self.bad_peers.add(peerid) + if self._first_write_error is None: + self._first_write_error = f self.log(format="error while writing shares %(shnums)s to peerid %(peerid)s", shnums=list(shnums), peerid=idlib.shortnodeid_b2a(peerid), failure=f,