From: Brian Warner <warner@allmydata.com>
Date: Tue, 26 Aug 2008 00:51:55 +0000 (-0700)
Subject: logging cleanups: lower DeadReferenceError from WEIRD (which provokes Incidents)... 
X-Git-Url: https://git.rkrishnan.org/%5B/%5D%20/uri/frontends/COPYING.GPL?a=commitdiff_plain;h=735aa895b91b44ae450768ec7d17b18a8628e57a;p=tahoe-lafs%2Ftahoe-lafs.git

logging cleanups: lower DeadReferenceError from WEIRD (which provokes Incidents) to merely UNUSUAL, don't pre-format Failures in others
---

diff --git a/src/allmydata/immutable/download.py b/src/allmydata/immutable/download.py
index 878d38d0..c719739b 100644
--- a/src/allmydata/immutable/download.py
+++ b/src/allmydata/immutable/download.py
@@ -4,6 +4,7 @@ from zope.interface import implements
 from twisted.internet import defer
 from twisted.internet.interfaces import IPushProducer, IConsumer
 from twisted.application import service
+from foolscap import DeadReferenceError
 from foolscap.eventual import eventually
 
 from allmydata.util import base32, mathutil, hashutil, log
@@ -264,10 +265,10 @@ class BlockDownloader:
         self.results = results
         self._log_number = self.parent.log("starting block %d" % blocknum)
 
-    def log(self, msg, parent=None):
-        if parent is None:
-            parent = self._log_number
-        return self.parent.log(msg, parent=parent)
+    def log(self, *args, **kwargs):
+        if "parent" not in kwargs:
+            kwargs["parent"] = self._log_number
+        return self.parent.log(*args, **kwargs)
 
     def start(self, segnum):
         lognum = self.log("get_block(segnum=%d)" % segnum)
@@ -288,8 +289,11 @@ class BlockDownloader:
         self.parent.hold_block(self.blocknum, data)
 
     def _got_block_error(self, f, lognum):
-        self.log("BlockDownloader[%d] got error: %s" % (self.blocknum, f),
-                 parent=lognum)
+        level = log.WEIRD
+        if f.check(DeadReferenceError):
+            level = log.UNUSUAL
+        self.log("BlockDownloader[%d] got error" % self.blocknum,
+                 failure=f, level=level, parent=lognum)
         if self.results:
             peerid = self.vbucket.bucket.get_peerid()
             self.results.server_problems[peerid] = str(f)
@@ -311,10 +315,10 @@ class SegmentDownloader:
         self._log_number = self.parent.log("starting segment %d" %
                                            segmentnumber)
 
-    def log(self, msg, parent=None):
-        if parent is None:
-            parent = self._log_number
-        return self.parent.log(msg, parent=parent)
+    def log(self, *args, **kwargs):
+        if "parent" not in kwargs:
+            kwargs["parent"] = self._log_number
+        return self.parent.log(*args, **kwargs)
 
     def start(self):
         return self._download()
@@ -588,7 +592,10 @@ class FileDownloader:
         self._share_buckets.append( (sharenum, bucket) )
 
     def _got_error(self, f):
-        self._client.log("Somebody failed. -- %s" % (f,))
+        level = log.WEIRD
+        if f.check(DeadReferenceError):
+            level = log.UNUSUAL
+        self._client.log("Error during get_buckets", failure=f, level=level)
 
     def bucket_failed(self, vbucket):
         shnum = vbucket.sharenum
@@ -656,8 +663,12 @@ class FileDownloader:
         d.addCallback(lambda res: getattr(bucket, methname)(*args))
         d.addCallback(validatorfunc, bucket)
         def _bad(f):
-            self.log("%s from vbucket %s failed:" % (name, bucket),
-                     failure=f, level=log.WEIRD)
+            level = log.WEIRD
+            if f.check(DeadReferenceError):
+                level = log.UNUSUAL
+            self.log(format="operation %(op)s from vbucket %(vbucket)s failed",
+                     op=name, vbucket=str(bucket),
+                     failure=f, level=level)
             if not sources:
                 raise NotEnoughSharesError("ran out of peers, last error was %s"
                                           % (f,))
diff --git a/src/allmydata/mutable/publish.py b/src/allmydata/mutable/publish.py
index 15df0919..22a496ad 100644
--- a/src/allmydata/mutable/publish.py
+++ b/src/allmydata/mutable/publish.py
@@ -117,6 +117,8 @@ class Publish:
     def log(self, *args, **kwargs):
         if 'parent' not in kwargs:
             kwargs['parent'] = self._log_number
+        if "facility" not in kwargs:
+            kwargs["facility"] = "tahoe.mutable.publish"
         return log.msg(*args, **kwargs)
 
     def publish(self, newdata):
@@ -239,7 +241,7 @@ class Publish:
             self.bad_share_checkstrings[ (peerid, shnum) ] = old_checkstring
             self.connections[peerid] = self._servermap.connections[peerid]
 
-        # create the shares. We'll discard these as they are delivered. SMDF:
+        # create the shares. We'll discard these as they are delivered. SDMF:
         # we're allowed to hold everything in memory.
 
         self._status.timings["setup"] = time.time() - self._started
@@ -268,7 +270,7 @@ class Publish:
         assert self.num_segments in [0, 1,] # SDMF restrictions
 
     def _fatal_error(self, f):
-        self.log("error during loop", failure=f, level=log.SCARY)
+        self.log("error during loop", failure=f, level=log.UNUSUAL)
         self._done(f)
 
     def _update_status(self):
diff --git a/src/allmydata/mutable/retrieve.py b/src/allmydata/mutable/retrieve.py
index c93d1d28..9000f421 100644
--- a/src/allmydata/mutable/retrieve.py
+++ b/src/allmydata/mutable/retrieve.py
@@ -4,6 +4,7 @@ from itertools import count
 from zope.interface import implements
 from twisted.internet import defer
 from twisted.python import failure
+from foolscap import DeadReferenceError
 from foolscap.eventual import eventually, fireEventually
 from allmydata.interfaces import IRetrieveStatus
 from allmydata.util import hashutil, idlib, log
@@ -113,6 +114,8 @@ class Retrieve:
     def log(self, *args, **kwargs):
         if "parent" not in kwargs:
             kwargs["parent"] = self._log_number
+        if "facility" not in kwargs:
+            kwargs["facility"] = "tahoe.mutable.retrieve"
         return log.msg(*args, **kwargs)
 
     def download(self):
@@ -247,7 +250,8 @@ class Retrieve:
             except CorruptShareError, e:
                 # log it and give the other shares a chance to be processed
                 f = failure.Failure()
-                self.log("bad share: %s %s" % (f, f.value), level=log.WEIRD)
+                self.log(format="bad share: %(f_value)s",
+                         f_value=str(f.value), failure=f, level=log.WEIRD)
                 self.remove_peer(peerid)
                 self.servermap.mark_bad_share(peerid, shnum, prefix)
                 self._bad_shares.add( (peerid, shnum) )
@@ -301,7 +305,11 @@ class Retrieve:
             return
         self._last_failure = f
         self.remove_peer(peerid)
-        self.log("error during query: %s %s" % (f, f.value), level=log.WEIRD)
+        level = log.WEIRD
+        if f.check(DeadReferenceError):
+            level = log.UNUSUAL
+        self.log(format="error during query: %(f_value)s",
+                 f_value=str(f.value), failure=f, level=level)
 
     def _check_for_done(self, res):
         # exit paths:
diff --git a/src/allmydata/mutable/servermap.py b/src/allmydata/mutable/servermap.py
index 2e2e12a9..69b75b3e 100644
--- a/src/allmydata/mutable/servermap.py
+++ b/src/allmydata/mutable/servermap.py
@@ -360,6 +360,8 @@ class ServermapUpdater:
     def log(self, *args, **kwargs):
         if "parent" not in kwargs:
             kwargs["parent"] = self._log_number
+        if "facility" not in kwargs:
+            kwargs["facility"] = "tahoe.mutable.mapupdate"
         return log.msg(*args, **kwargs)
 
     def update(self):
@@ -537,8 +539,8 @@ class ServermapUpdater:
             except CorruptShareError, e:
                 # log it and give the other shares a chance to be processed
                 f = failure.Failure()
-                self.log("bad share: %s %s" % (f, f.value),
-                         parent=lp, level=log.WEIRD)
+                self.log(format="bad share: %(f_value)s", f_value=str(f.value),
+                         failure=f, parent=lp, level=log.WEIRD)
                 self._bad_peers.add(peerid)
                 self._last_failure = f
                 checkstring = data[:SIGNED_PREFIX_LENGTH]
@@ -689,7 +691,8 @@ class ServermapUpdater:
 
 
     def _query_failed(self, f, peerid):
-        self.log("error during query: %s %s" % (f, f.value), level=log.WEIRD)
+        self.log(format="error during query: %(f_value)s",
+                 f_value=str(f.value), failure=f, level=log.WEIRD)
         if not self._running:
             return
         self._must_query.discard(peerid)
@@ -716,7 +719,8 @@ class ServermapUpdater:
 
     def _privkey_query_failed(self, f, peerid, shnum, lp):
         self._queries_outstanding.discard(peerid)
-        self.log("error during privkey query: %s %s" % (f, f.value),
+        self.log(format="error during privkey query: %(f_value)s",
+                 f_value=str(f.value), failure=f,
                  parent=lp, level=log.WEIRD)
         if not self._running:
             return