From: Brian Warner Date: Tue, 26 Aug 2008 01:57:59 +0000 (-0700) Subject: logging: add 'unique-message-ids' (or 'umids') to each WEIRD-or-higher log.msg call... X-Git-Url: https://git.rkrishnan.org/listings/reliability?a=commitdiff_plain;h=a94af879ffcfcf3d80aef9b9941d32889b6dbe1d;p=tahoe-lafs%2Ftahoe-lafs.git logging: add 'unique-message-ids' (or 'umids') to each WEIRD-or-higher log.msg call, to make it easier to correlate log message with source code --- diff --git a/src/allmydata/client.py b/src/allmydata/client.py index e8f5e57b..87f49833 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -111,7 +111,8 @@ class Client(node.Node, testutil.PollMixin): # nodes that want to upload and download will need storage servers ic.subscribe_to("storage") d.addCallback(_start_introducer_client) - d.addErrback(log.err, facility="tahoe.init", level=log.BAD) + d.addErrback(log.err, facility="tahoe.init", + level=log.BAD, umid="URyI5w") def init_stats_provider(self): gatherer_furl = self.get_config('stats_gatherer.furl') @@ -165,7 +166,8 @@ class Client(node.Node, testutil.PollMixin): ri_name = RIStorageServer.__remote_name__ self.introducer_client.publish(furl, "storage", ri_name) d.addCallback(_publish) - d.addErrback(log.err, facility="tahoe.init", level=log.BAD) + d.addErrback(log.err, facility="tahoe.init", + level=log.BAD, umid="aLGBKw") def init_client(self): helper_furl = self.get_config("helper.furl") @@ -185,7 +187,8 @@ class Client(node.Node, testutil.PollMixin): self.introducer_client.publish(furl, "stub_client", ri_name) d = self.when_tub_ready() d.addCallback(_publish) - d.addErrback(log.err, facility="tahoe.init", level=log.BAD) + d.addErrback(log.err, facility="tahoe.init", + level=log.BAD, umid="OEHq3g") def init_control(self): d = self.when_tub_ready() @@ -195,7 +198,8 @@ class Client(node.Node, testutil.PollMixin): control_url = self.tub.registerReference(c) self.write_private_config("control.furl", control_url + "\n") d.addCallback(_publish) - d.addErrback(log.err, facility="tahoe.init", level=log.BAD) + d.addErrback(log.err, facility="tahoe.init", + level=log.BAD, umid="d3tNXA") def init_helper(self): d = self.when_tub_ready() @@ -211,14 +215,16 @@ class Client(node.Node, testutil.PollMixin): "private", "helper.furl") self.tub.registerReference(h, furlFile=helper_furlfile) d.addCallback(_publish) - d.addErrback(log.err, facility="tahoe.init", level=log.BAD) + d.addErrback(log.err, facility="tahoe.init", + level=log.BAD, umid="K0mW5w") def init_key_gen(self, key_gen_furl): d = self.when_tub_ready() def _subscribe(self): self.tub.connectTo(key_gen_furl, self._got_key_generator) d.addCallback(_subscribe) - d.addErrback(log.err, facility="tahoe.init", level=log.BAD) + d.addErrback(log.err, facility="tahoe.init", + level=log.BAD, umid="z9DMzw") def _got_key_generator(self, key_generator): self._key_generator = key_generator diff --git a/src/allmydata/immutable/download.py b/src/allmydata/immutable/download.py index c719739b..52decdfc 100644 --- a/src/allmydata/immutable/download.py +++ b/src/allmydata/immutable/download.py @@ -121,7 +121,8 @@ class Output: # download, not so scary self.log("download stopped", level=log.UNUSUAL) else: - self.log("download failed!", failure=why, level=log.SCARY) + self.log("download failed!", failure=why, + level=log.SCARY, umid="lp1vaQ") self.downloadable.fail(why) def close(self): @@ -293,7 +294,7 @@ class BlockDownloader: if f.check(DeadReferenceError): level = log.UNUSUAL self.log("BlockDownloader[%d] got error" % self.blocknum, - failure=f, level=level, parent=lognum) + failure=f, level=level, parent=lognum, umid="5Z4uHQ") if self.results: peerid = self.vbucket.bucket.get_peerid() self.results.server_problems[peerid] = str(f) @@ -595,7 +596,8 @@ class FileDownloader: level = log.WEIRD if f.check(DeadReferenceError): level = log.UNUSUAL - self._client.log("Error during get_buckets", failure=f, level=level) + self._client.log("Error during get_buckets", failure=f, level=level, + umid="3uuBUQ") def bucket_failed(self, vbucket): shnum = vbucket.sharenum @@ -643,7 +645,7 @@ class FileDownloader: (bucket, base32.b2a(self._uri_extension_hash), base32.b2a(h))) - self.log(msg, level=log.SCARY) + self.log(msg, level=log.SCARY, umid="jnkTtQ") raise BadURIExtensionHashValue(msg) return self._unpack_uri_extension_data(proposal) return self._obtain_validated_thing(None, @@ -668,7 +670,7 @@ class FileDownloader: level = log.UNUSUAL self.log(format="operation %(op)s from vbucket %(vbucket)s failed", op=name, vbucket=str(bucket), - failure=f, level=level) + failure=f, level=level, umid="JGXxBA") if not sources: raise NotEnoughSharesError("ran out of peers, last error was %s" % (f,)) diff --git a/src/allmydata/immutable/encode.py b/src/allmydata/immutable/encode.py index 766292fe..1982567c 100644 --- a/src/allmydata/immutable/encode.py +++ b/src/allmydata/immutable/encode.py @@ -404,7 +404,8 @@ class Encoder(object): # non-tail segments should be the full segment size if length != input_chunk_size: log.msg("non-tail segment should be full segment size: %d!=%d" - % (length, input_chunk_size), level=log.BAD) + % (length, input_chunk_size), + level=log.BAD, umid="jNk5Yw") precondition(length == input_chunk_size, "length=%d != input_chunk_size=%d" % (length, input_chunk_size)) @@ -486,7 +487,7 @@ class Encoder(object): else: # even more UNUSUAL self.log("they weren't in our list of landlords", parent=ln, - level=log.WEIRD) + level=log.WEIRD, umid="TQGFRw") if len(self.landlords) < self.shares_of_happiness: msg = "lost too many shareholders during upload: %s" % why raise NotEnoughSharesError(msg) diff --git a/src/allmydata/introducer/client.py b/src/allmydata/introducer/client.py index 59a74341..bc7d5fd3 100644 --- a/src/allmydata/introducer/client.py +++ b/src/allmydata/introducer/client.py @@ -123,7 +123,7 @@ class IntroducerClient(service.Service, Referenceable): self._introducer_reconnector = rc def connect_failed(failure): self.log("Initial Introducer connection failed: perhaps it's down", - level=log.WEIRD, failure=failure) + level=log.WEIRD, failure=failure, umid="c5MqUQ") d = self._tub.getReference(self.introducer_furl) d.addErrback(connect_failed) @@ -175,7 +175,7 @@ class IntroducerClient(service.Service, Referenceable): self._subscriptions.add(service_name) d = self._publisher.callRemote("subscribe", self, service_name) d.addErrback(log.err, facility="tahoe.introducer", - level=log.WEIRD) + level=log.WEIRD, umid="2uMScQ") def _maybe_publish(self): if not self._publisher: @@ -185,7 +185,7 @@ class IntroducerClient(service.Service, Referenceable): for ann in self._published_announcements: d = self._publisher.callRemote("publish", ann) d.addErrback(log.err, facility="tahoe.introducer", - level=log.WEIRD) + level=log.WEIRD, umid="xs9pVQ") @@ -195,7 +195,7 @@ class IntroducerClient(service.Service, Referenceable): (furl, service_name, ri_name, nickname, ver, oldest) = ann if service_name not in self._subscribed_service_names: self.log("announcement for a service we don't care about [%s]" - % (service_name,), level=log.WEIRD) + % (service_name,), level=log.UNUSUAL, umid="dIpGNA") continue if ann in self._received_announcements: self.log("ignoring old announcement: %s" % (ann,), diff --git a/src/allmydata/introducer/server.py b/src/allmydata/introducer/server.py index a6d028b8..b0b9b652 100644 --- a/src/allmydata/introducer/server.py +++ b/src/allmydata/introducer/server.py @@ -31,7 +31,8 @@ class IntroducerNode(node.Node): self.log(" introducer is at %s" % self.introducer_url) self.write_config("introducer.furl", self.introducer_url + "\n") d.addCallback(_publish) - d.addErrback(log.err, facility="tahoe.init", level=log.BAD) + d.addErrback(log.err, facility="tahoe.init", + level=log.BAD, umid="UaNs9A") def init_web(self, webport): self.log("init_web(webport=%s)", args=(webport,)) diff --git a/src/allmydata/mutable/checker.py b/src/allmydata/mutable/checker.py index 375a1fbc..ef457523 100644 --- a/src/allmydata/mutable/checker.py +++ b/src/allmydata/mutable/checker.py @@ -203,7 +203,7 @@ class MutableChecker: si=base32.b2a(self._storage_index), shnum=shnum, where=ft, - level=log.WEIRD) + level=log.WEIRD, umid="EkK8QA") self.results.status_report = "\n".join(report) + "\n" diff --git a/src/allmydata/mutable/publish.py b/src/allmydata/mutable/publish.py index 22a496ad..04621559 100644 --- a/src/allmydata/mutable/publish.py +++ b/src/allmydata/mutable/publish.py @@ -665,7 +665,7 @@ class Publish: if surprise_shares: self.log("they had shares %s that we didn't know about" % (list(surprise_shares),), - parent=lp, level=log.WEIRD) + parent=lp, level=log.WEIRD, umid="un9CSQ") self.surprised = True if not wrote: @@ -685,7 +685,7 @@ class Publish: # bother adding to self.bad_peers . self.log("our testv failed, so the write did not happen", - parent=lp, level=log.WEIRD) + parent=lp, level=log.WEIRD, umid="8sc26g") self.surprised = True self.bad_peers.add(peerid) # don't ask them again # use the checkstring to add information to the log message @@ -741,7 +741,8 @@ class Publish: self._status.timings["total"] = now - self._started self._status.set_active(False) if isinstance(res, failure.Failure): - self.log("Publish done, with failure", failure=res, level=log.WEIRD) + self.log("Publish done, with failure", failure=res, + level=log.WEIRD, umid="nRsR9Q") self._status.set_status("Failed") elif self.surprised: self.log("Publish done, UncoordinatedWriteError", level=log.UNUSUAL) diff --git a/src/allmydata/mutable/retrieve.py b/src/allmydata/mutable/retrieve.py index 9000f421..ed4b9073 100644 --- a/src/allmydata/mutable/retrieve.py +++ b/src/allmydata/mutable/retrieve.py @@ -206,7 +206,7 @@ class Retrieve: shnum=shnum, peerid=idlib.shortnodeid_b2a(peerid), failure=f, - level=log.WEIRD) + level=log.WEIRD, umid="W0xnQA") d.addErrback(_oops) d.addBoth(self._check_for_done) # any error during _check_for_done means the download fails. If the @@ -251,7 +251,8 @@ class Retrieve: # log it and give the other shares a chance to be processed f = failure.Failure() self.log(format="bad share: %(f_value)s", - f_value=str(f.value), failure=f, level=log.WEIRD) + f_value=str(f.value), failure=f, + level=log.WEIRD, umid="7fzWZw") self.remove_peer(peerid) self.servermap.mark_bad_share(peerid, shnum, prefix) self._bad_shares.add( (peerid, shnum) ) @@ -309,7 +310,7 @@ class Retrieve: if f.check(DeadReferenceError): level = log.UNUSUAL self.log(format="error during query: %(f_value)s", - f_value=str(f.value), failure=f, level=level) + f_value=str(f.value), failure=f, level=level, umid="gOJB5g") def _check_for_done(self, res): # exit paths: @@ -411,14 +412,14 @@ class Retrieve: "bad": len(self._bad_shares), } self.log(format=format, - level=log.WEIRD, **args) + level=log.WEIRD, umid="ezTfjw", **args) err = NotEnoughSharesError("%s, last failure: %s" % (format % args, self._last_failure)) if self._bad_shares: self.log("We found some bad shares this pass. You should " "update the servermap and try again to check " "more peers", - level=log.WEIRD) + level=log.WEIRD, umid="EFkOlA") err.servermap = self.servermap raise err diff --git a/src/allmydata/mutable/servermap.py b/src/allmydata/mutable/servermap.py index 69b75b3e..4ec40cc6 100644 --- a/src/allmydata/mutable/servermap.py +++ b/src/allmydata/mutable/servermap.py @@ -540,7 +540,7 @@ class ServermapUpdater: # log it and give the other shares a chance to be processed f = failure.Failure() self.log(format="bad share: %(f_value)s", f_value=str(f.value), - failure=f, parent=lp, level=log.WEIRD) + failure=f, parent=lp, level=log.WEIRD, umid="h5llHg") self._bad_peers.add(peerid) self._last_failure = f checkstring = data[:SIGNED_PREFIX_LENGTH] @@ -676,7 +676,7 @@ class ServermapUpdater: if alleged_writekey != self._node.get_writekey(): self.log("invalid privkey from %s shnum %d" % (idlib.nodeid_b2a(peerid)[:8], shnum), - parent=lp, level=log.WEIRD) + parent=lp, level=log.WEIRD, umid="aJVccw") return # it's good @@ -692,7 +692,8 @@ class ServermapUpdater: def _query_failed(self, f, peerid): self.log(format="error during query: %(f_value)s", - f_value=str(f.value), failure=f, level=log.WEIRD) + f_value=str(f.value), failure=f, + level=log.WEIRD, umid="IHXuQg") if not self._running: return self._must_query.discard(peerid) @@ -711,7 +712,8 @@ class ServermapUpdater: if not self._need_privkey: return if shnum not in datavs: - self.log("privkey wasn't there when we asked it", level=log.WEIRD) + self.log("privkey wasn't there when we asked it", + level=log.WEIRD, umid="VA9uDQ") return datav = datavs[shnum] enc_privkey = datav[0] @@ -721,7 +723,7 @@ class ServermapUpdater: self._queries_outstanding.discard(peerid) self.log(format="error during privkey query: %(f_value)s", f_value=str(f.value), failure=f, - parent=lp, level=log.WEIRD) + parent=lp, level=log.WEIRD, umid="McoJ5w") if not self._running: return self._queries_outstanding.discard(peerid) @@ -942,7 +944,7 @@ class ServermapUpdater: eventually(self._done_deferred.callback, self._servermap) def _fatal_error(self, f): - self.log("fatal error", failure=f, level=log.WEIRD) + self.log("fatal error", failure=f, level=log.WEIRD, umid="1cNvlw") self._done_deferred.errback(f) diff --git a/src/allmydata/offloaded.py b/src/allmydata/offloaded.py index 06a5345b..8ed82e26 100644 --- a/src/allmydata/offloaded.py +++ b/src/allmydata/offloaded.py @@ -101,7 +101,7 @@ class CHKCheckerAndUEBFetcher: def _ueb_error(self, f): # an error means the file is unavailable, but the overall check # shouldn't fail. - self.log("UEB fetch failed", failure=f, level=log.WEIRD) + self.log("UEB fetch failed", failure=f, level=log.WEIRD, umid="sJLKVg") return None def _done(self, res): @@ -598,7 +598,7 @@ class Helper(Referenceable, service.MultiService): d.addCallback(_checked) def _err(f): self.log("error while checking for chk-already-in-grid", - failure=f, level=log.WEIRD, parent=lp) + failure=f, level=log.WEIRD, parent=lp, umid="jDtxZg") return f d.addErrback(_err) return d diff --git a/src/allmydata/storage.py b/src/allmydata/storage.py index 72cbdadd..b2e9685b 100644 --- a/src/allmydata/storage.py +++ b/src/allmydata/storage.py @@ -694,7 +694,7 @@ class MutableShareFile: self.log(format="bad write enabler on SI %(si)s," " recorded by nodeid %(nodeid)s", facility="tahoe.storage", - level=log.WEIRD, + level=log.WEIRD, umid="cE1eBQ", si=si_s, nodeid=idlib.nodeid_b2a(write_enabler_nodeid)) msg = "The write enabler was recorded by nodeid '%s'." % \ (idlib.nodeid_b2a(write_enabler_nodeid),) diff --git a/src/allmydata/web/root.py b/src/allmydata/web/root.py index 7326a17f..c5057f48 100644 --- a/src/allmydata/web/root.py +++ b/src/allmydata/web/root.py @@ -109,7 +109,7 @@ class IncidentReporter(RenderMixin, rend.Page): req = IRequest(ctx) log.msg(format="User reports incident through web page: %(details)s", details=get_arg(req, "details", ""), - level=log.WEIRD) + level=log.WEIRD, umid="LkD9Pw") req.setHeader("content-type", "text/plain") return "Thank you for your report!"