From: Brian Warner <warner@allmydata.com> Date: Wed, 13 Feb 2008 02:31:23 +0000 (-0700) Subject: webish: censor all caps before logging the HTTP request, to preserve user privacy X-Git-Tag: allmydata-tahoe-0.8.0~84 X-Git-Url: https://git.rkrishnan.org/Site/Content/Exhibitors/%22news.html/?a=commitdiff_plain;h=94348b7182699f71cf9a83b49d5393a1488505e9;p=tahoe-lafs%2Ftahoe-lafs.git webish: censor all caps before logging the HTTP request, to preserve user privacy --- diff --git a/src/allmydata/webish.py b/src/allmydata/webish.py index e5abc8aa..acd37c94 100644 --- a/src/allmydata/webish.py +++ b/src/allmydata/webish.py @@ -2,12 +2,11 @@ import time, os.path from twisted.application import service, strports, internet from twisted.web import static, resource, server, html, http -from twisted.python import log from twisted.internet import defer, address from twisted.internet.interfaces import IConsumer from nevow import inevow, rend, loaders, appserver, url, tags as T from nevow.static import File as nevow_File # TODO: merge with static.File? -from allmydata.util import fileutil, idlib, observer +from allmydata.util import fileutil, idlib, observer, log import simplejson from allmydata.interfaces import IDownloadTarget, IDirectoryNode, IFileNode, \ IMutableFileNode @@ -124,6 +123,48 @@ class MyRequest(appserver.NevowRequest): self.process() + def _escape(self, s): + # pain in the ass. Return a string like python repr, but always + # escaped as if surrounding quotes were "". + r = repr(s) + if r[0] == "'": + return r[1:-1].replace('"', '\\"').replace("\\'", "'") + return r[1:-1] + + def _logger(self): + # we build up a log string that hides most of the cap, to preserve + # user privacy. We retain the query args so we can identify things + # like t=json. Then we send it to the flog. We make no attempt to + # match apache formatting. TODO: when we move to DSA dirnodes and + # shorter caps, consider exposing a few characters of the cap, or + # maybe a few characters of its hash. + x = self.uri.split("?", 1) + if len(x) == 1: + # no query args + path = self.uri + queryargs = "" + else: + path, queryargs = x + # there is a form handler which redirects POST /uri?uri=FOO into + # GET /uri/FOO so folks can paste in non-HTTP-prefixed uris. Make + # sure we censor these too. + if queryargs.startswith("uri="): + queryargs = "[uri=CENSORED]" + queryargs = "?" + queryargs + if path.startswith("/uri"): + path = "/uri/[CENSORED].." + uri = path + queryargs + + log.msg(format="web: %(clientip)s %(method)s %(uri)s %(code)s %(length)s", + clientip=self.getClientIP(), + method=self.method, + uri=uri, + code=self.code, + length=(self.sentLength or "-"), + facility="tahoe.webish", + level=log.OPERATIONAL, + ) + class Directory(rend.Page): addSlash = True docFactory = getxmlfile("directory.xhtml")