From: david-sarah Date: Wed, 24 Aug 2011 15:59:28 +0000 (-0700) Subject: Implementation, tests and docs for blacklists. This version allows listing directorie... X-Git-Tag: allmydata-tahoe-1.9.0a1~4 X-Git-Url: https://git.rkrishnan.org/(%5B%5E?a=commitdiff_plain;h=3d7a32647c4313856a92b0ac556e7706022cb97d;p=tahoe-lafs%2Ftahoe-lafs.git Implementation, tests and docs for blacklists. This version allows listing directories containing a blacklisted child. Inclusion of blacklist.py fixed. fixes #1425 --- diff --git a/docs/configuration.rst b/docs/configuration.rst index b9f14458..d8c189d0 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -606,6 +606,13 @@ Other files "which peers am I connected to" list), and the shortened form (the first few characters) is recorded in various log messages. +``access.blacklist`` + + Gateway nodes may find it necessary to prohibit access to certain files. The + web-API has a facility to block access to filecaps by their storage index, + returning a 403 "Forbidden" error instead of the original file. For more + details, see the "Access Blacklist" section of ``_. + Example ======= diff --git a/docs/frontends/webapi.rst b/docs/frontends/webapi.rst index 8e7b2265..1545a164 100644 --- a/docs/frontends/webapi.rst +++ b/docs/frontends/webapi.rst @@ -36,6 +36,7 @@ The Tahoe REST-ful Web API 8. `Static Files in /public_html`_ 9. `Safety and Security Issues -- Names vs. URIs`_ 10. `Concurrency Issues`_ +11. `Access Blacklist`_ Enabling the web-API port @@ -1955,6 +1956,51 @@ For more details, please see the "Consistency vs Availability" and "The Prime Coordination Directive" sections of `mutable.rst <../specifications/mutable.rst>`_. +Access Blacklist +================ + +Gateway nodes may find it necessary to prohibit access to certain files. The +web-API has a facility to block access to filecaps by their storage index, +returning a 403 "Forbidden" error instead of the original file. + +This blacklist is recorded in $NODEDIR/access.blacklist, and contains one +blocked file per line. Comment lines (starting with ``#``) are ignored. Each +line consists of the storage-index (in the usual base32 format as displayed +by the "More Info" page, or by the "tahoe debug dump-cap" command), followed +by whitespace, followed by a reason string, which will be included in the 403 +error message. This could hold a URL to a page that explains why the file is +blocked, for example. + +So for example, if you found a need to block access to a file with filecap +``URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861``, +you could do the following:: + + tahoe debug dump-cap URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861 + -> storage index: whpepioyrnff7orecjolvbudeu + echo "whpepioyrnff7orecjolvbudeu my puppy told me to" >>$NODEDIR/access.blacklist + tahoe restart $NODEDIR + tahoe get URI:CHK:n7r3m6wmomelk4sep3kw5cvduq:os7ijw5c3maek7pg65e5254k2fzjflavtpejjyhshpsxuqzhcwwq:3:20:14861 + -> error, 403 Access Prohibited: my puppy told me to + +The ``access.blacklist`` file will be checked each time a file or directory +is accessed: the file's ``mtime`` is used to decide whether it need to be +reloaded. Therefore no node restart is necessary when creating the initial +blacklist, nor when adding second, third, or additional entries to the list. +When modifying the file, be careful to update it atomically, otherwise a +request may arrive while the file is only halfway written, and the partial +file may be incorrectly parsed. + +The blacklist is applied to all access paths (including FTP, SFTP, and CLI +operations), not just the web-API. The blacklist also applies to directories. +If a directory is blacklisted, the gateway will refuse access to both that +directory and any child files/directories underneath it, when accessed via +"DIRCAP/SUBDIR/FILENAME" -style URLs. Users who go directly to the child +file/dir will bypass the blacklist. + +The node will log the SI of the file being blocked, and the reason code, into +the ``logs/twistd.log`` file. + + .. [1] URLs and HTTP and UTF-8, Oh My HTTP does not provide a mechanism to specify the character set used to diff --git a/src/allmydata/blacklist.py b/src/allmydata/blacklist.py new file mode 100644 index 00000000..c13edf0d --- /dev/null +++ b/src/allmydata/blacklist.py @@ -0,0 +1,155 @@ + +import os + +from zope.interface import implements +from twisted.internet import defer +from twisted.python import log as twisted_log + +from allmydata.interfaces import IFileNode, IFilesystemNode +from allmydata.util import base32 +from allmydata.util.encodingutil import quote_output + + +class FileProhibited(Exception): + """This client has been configured to prohibit access to this object.""" + def __init__(self, reason): + Exception.__init__(self, "Access Prohibited: %s" % quote_output(reason, encoding='utf-8', quotemarks=False)) + self.reason = reason + + +class Blacklist: + def __init__(self, blacklist_fn): + self.blacklist_fn = blacklist_fn + self.last_mtime = None + self.entries = {} + self.read_blacklist() # sets .last_mtime and .entries + + def read_blacklist(self): + try: + current_mtime = os.stat(self.blacklist_fn).st_mtime + except EnvironmentError: + # unreadable blacklist file means no blacklist + self.entries.clear() + return + try: + if self.last_mtime is None or current_mtime > self.last_mtime: + self.entries.clear() + for line in open(self.blacklist_fn, "r").readlines(): + line = line.strip() + if not line or line.startswith("#"): + continue + si_s, reason = line.split(None, 1) + si = base32.a2b(si_s) # must be valid base32 + self.entries[si] = reason + self.last_mtime = current_mtime + except Exception, e: + twisted_log.err(e, "unparseable blacklist file") + raise + + def check_storageindex(self, si): + self.read_blacklist() + reason = self.entries.get(si, None) + if reason is not None: + # log this to logs/twistd.log, since web logs go there too + twisted_log.msg("blacklist prohibited access to SI %s: %s" % + (base32.b2a(si), reason)) + return reason + + +class ProhibitedNode: + implements(IFileNode) + + def __init__(self, wrapped_node, reason): + assert IFilesystemNode.providedBy(wrapped_node), wrapped_node + self.wrapped_node = wrapped_node + self.reason = reason + + def get_cap(self): + return self.wrapped_node.get_cap() + + def get_readcap(self): + return self.wrapped_node.get_readcap() + + def is_readonly(self): + return self.wrapped_node.is_readonly() + + def is_mutable(self): + return self.wrapped_node.is_mutable() + + def is_unknown(self): + return self.wrapped_node.is_unknown() + + def is_allowed_in_immutable_directory(self): + return self.wrapped_node.is_allowed_in_immutable_directory() + + def is_alleged_immutable(self): + return self.wrapped_node.is_alleged_immutable() + + def raise_error(self): + # We don't raise an exception here because that would prevent the node from being listed. + pass + + def get_uri(self): + return self.wrapped_node.get_uri() + + def get_write_uri(self): + return self.wrapped_node.get_write_uri() + + def get_readonly_uri(self): + return self.wrapped_node.get_readonly_uri() + + def get_storage_index(self): + return self.wrapped_node.get_storage_index() + + def get_verify_cap(self): + return self.wrapped_node.get_verify_cap() + + def get_repair_cap(self): + return self.wrapped_node.get_repair_cap() + + def get_size(self): + return None + + def get_current_size(self): + return defer.succeed(None) + + def get_size_of_best_version(self): + return defer.succeed(None) + + def check(self, monitor, verify, add_lease): + return defer.succeed(None) + + def check_and_repair(self, monitor, verify, add_lease): + return defer.succeed(None) + + def get_version(self): + return None + + # Omitting any of these methods would fail safe; they are just to ensure correct error reporting. + + def get_best_readable_version(self): + raise FileProhibited(self.reason) + + def download_best_version(self): + raise FileProhibited(self.reason) + + def get_best_mutable_version(self): + raise FileProhibited(self.reason) + + def overwrite(self, new_contents): + raise FileProhibited(self.reason) + + def modify(self, modifier_cb): + raise FileProhibited(self.reason) + + def get_servermap(self, mode): + raise FileProhibited(self.reason) + + def download_version(self, servermap, version): + raise FileProhibited(self.reason) + + def upload(self, new_contents, servermap): + raise FileProhibited(self.reason) + + def get_writekey(self): + raise FileProhibited(self.reason) diff --git a/src/allmydata/client.py b/src/allmydata/client.py index 3cb4cd66..b030867b 100644 --- a/src/allmydata/client.py +++ b/src/allmydata/client.py @@ -25,6 +25,7 @@ from allmydata.history import History from allmydata.interfaces import IStatsProducer, RIStubClient, \ SDMF_VERSION, MDMF_VERSION from allmydata.nodemaker import NodeMaker +from allmydata.blacklist import Blacklist KiB=1024 @@ -279,6 +280,7 @@ class Client(node.Node, pollmixin.PollMixin): self.terminator.setServiceParent(self) self.add_service(Uploader(helper_furl, self.stats_provider)) self.init_stub_client() + self.init_blacklist() self.init_nodemaker() def init_client_storage_broker(self): @@ -331,6 +333,10 @@ class Client(node.Node, pollmixin.PollMixin): d.addErrback(log.err, facility="tahoe.init", level=log.BAD, umid="OEHq3g") + def init_blacklist(self): + fn = os.path.join(self.basedir, "access.blacklist") + self.blacklist = Blacklist(fn) + def init_nodemaker(self): self.nodemaker = NodeMaker(self.storage_broker, self._secret_holder, @@ -338,7 +344,8 @@ class Client(node.Node, pollmixin.PollMixin): self.getServiceNamed("uploader"), self.terminator, self.get_encoding_parameters(), - self._key_generator) + self._key_generator, + self.blacklist) default = self.get_config("client", "mutable.format", default="sdmf") if default == "mdmf": self.mutable_file_default = MDMF_VERSION diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index a25bb91e..69ca6239 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -784,7 +784,7 @@ class IFilesystemNode(Interface): read-only access with others, use get_readonly_uri(). """ - def get_write_uri(n): + def get_write_uri(): """Return the URI string that can be used by others to get write access to this node, if it is writeable. If this is a read-only node, return None.""" diff --git a/src/allmydata/nodemaker.py b/src/allmydata/nodemaker.py index fb69ea58..fbf527c2 100644 --- a/src/allmydata/nodemaker.py +++ b/src/allmydata/nodemaker.py @@ -9,14 +9,17 @@ from allmydata.mutable.filenode import MutableFileNode from allmydata.mutable.publish import MutableData from allmydata.dirnode import DirectoryNode, pack_children from allmydata.unknown import UnknownNode +from allmydata.blacklist import ProhibitedNode from allmydata import uri + class NodeMaker: implements(INodeMaker) def __init__(self, storage_broker, secret_holder, history, uploader, terminator, - default_encoding_parameters, key_generator): + default_encoding_parameters, key_generator, + blacklist=None): self.storage_broker = storage_broker self.secret_holder = secret_holder self.history = history @@ -24,6 +27,7 @@ class NodeMaker: self.terminator = terminator self.default_encoding_parameters = default_encoding_parameters self.key_generator = key_generator + self.blacklist = blacklist self._node_cache = weakref.WeakValueDictionary() # uri -> node @@ -62,14 +66,27 @@ class NodeMaker: else: memokey = "M" + bigcap if memokey in self._node_cache: - return self._node_cache[memokey] - cap = uri.from_string(bigcap, deep_immutable=deep_immutable, name=name) - node = self._create_from_single_cap(cap) - if node: - self._node_cache[memokey] = node # note: WeakValueDictionary + node = self._node_cache[memokey] else: - # don't cache UnknownNode - node = UnknownNode(writecap, readcap, deep_immutable=deep_immutable, name=name) + cap = uri.from_string(bigcap, deep_immutable=deep_immutable, + name=name) + node = self._create_from_single_cap(cap) + if node: + self._node_cache[memokey] = node # note: WeakValueDictionary + else: + # don't cache UnknownNode + node = UnknownNode(writecap, readcap, + deep_immutable=deep_immutable, name=name) + + if self.blacklist: + si = node.get_storage_index() + # if this node is blacklisted, return the reason, otherwise return None + reason = self.blacklist.check_storageindex(si) + if reason is not None: + # The original node object is cached above, not the ProhibitedNode wrapper. + # This ensures that removing the blacklist entry will make the node + # accessible if create_from_cap is called again. + node = ProhibitedNode(node, reason) return node def _create_from_single_cap(self, cap): diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py index 109ebdc4..b964c7fe 100644 --- a/src/allmydata/test/test_web.py +++ b/src/allmydata/test/test_web.py @@ -170,6 +170,7 @@ class FakeClient(Client): self.history = FakeHistory() self.uploader = FakeUploader() self.uploader.setServiceParent(self) + self.blacklist = None self.nodemaker = FakeNodeMaker(None, self._secret_holder, None, self.uploader, None, None, None) @@ -5262,6 +5263,146 @@ class Grid(GridTestMixin, WebErrorMixin, ShouldFailMixin, testutil.ReallyEqualMi return d + def test_blacklist(self): + # download from a blacklisted URI, get an error + self.basedir = "web/Grid/blacklist" + self.set_up_grid() + c0 = self.g.clients[0] + c0_basedir = c0.basedir + fn = os.path.join(c0_basedir, "access.blacklist") + self.uris = {} + DATA = "off-limits " * 50 + + d = c0.upload(upload.Data(DATA, convergence="")) + def _stash_uri_and_create_dir(ur): + self.uri = ur.uri + self.url = "uri/"+self.uri + u = uri.from_string_filenode(self.uri) + self.si = u.get_storage_index() + childnode = c0.create_node_from_uri(self.uri, None) + return c0.create_dirnode({u"blacklisted.txt": (childnode,{}) }) + d.addCallback(_stash_uri_and_create_dir) + def _stash_dir(node): + self.dir_node = node + self.dir_uri = node.get_uri() + self.dir_url = "uri/"+self.dir_uri + d.addCallback(_stash_dir) + d.addCallback(lambda ign: self.GET(self.dir_url, followRedirect=True)) + def _check_dir_html(body): + self.failUnlessIn("", body) + self.failUnlessIn("blacklisted.txt", body) + d.addCallback(_check_dir_html) + d.addCallback(lambda ign: self.GET(self.url)) + d.addCallback(lambda body: self.failUnlessEqual(DATA, body)) + + def _blacklist(ign): + f = open(fn, "w") + f.write(" # this is a comment\n") + f.write(" \n") + f.write("\n") # also exercise blank lines + f.write("%s %s\n" % (base32.b2a(self.si), "off-limits to you")) + f.close() + # clients should be checking the blacklist each time, so we don't + # need to restart the client + d.addCallback(_blacklist) + d.addCallback(lambda ign: self.shouldHTTPError("get_from_blacklisted_uri", + 403, "Forbidden", + "Access Prohibited: off-limits", + self.GET, self.url)) + + # We should still be able to list the parent directory, in HTML... + d.addCallback(lambda ign: self.GET(self.dir_url, followRedirect=True)) + def _check_dir_html2(body): + self.failUnlessIn("", body) + self.failUnlessIn("blacklisted.txt", body) + d.addCallback(_check_dir_html2) + + # ... and in JSON (used by CLI). + d.addCallback(lambda ign: self.GET(self.dir_url+"?t=json", followRedirect=True)) + def _check_dir_json(res): + data = simplejson.loads(res) + self.failUnless(isinstance(data, list), data) + self.failUnlessEqual(data[0], "dirnode") + self.failUnless(isinstance(data[1], dict), data) + self.failUnlessIn("children", data[1]) + self.failUnlessIn("blacklisted.txt", data[1]["children"]) + childdata = data[1]["children"]["blacklisted.txt"] + self.failUnless(isinstance(childdata, list), data) + self.failUnlessEqual(childdata[0], "filenode") + self.failUnless(isinstance(childdata[1], dict), data) + d.addCallback(_check_dir_json) + + def _unblacklist(ign): + open(fn, "w").close() + # the Blacklist object watches mtime to tell when the file has + # changed, but on windows this test will run faster than the + # filesystem's mtime resolution. So we edit Blacklist.last_mtime + # to force a reload. + self.g.clients[0].blacklist.last_mtime -= 2.0 + d.addCallback(_unblacklist) + + # now a read should work + d.addCallback(lambda ign: self.GET(self.url)) + d.addCallback(lambda body: self.failUnlessEqual(DATA, body)) + + # read again to exercise the blacklist-is-unchanged logic + d.addCallback(lambda ign: self.GET(self.url)) + d.addCallback(lambda body: self.failUnlessEqual(DATA, body)) + + # now add a blacklisted directory, and make sure files under it are + # refused too + def _add_dir(ign): + childnode = c0.create_node_from_uri(self.uri, None) + return c0.create_dirnode({u"child": (childnode,{}) }) + d.addCallback(_add_dir) + def _get_dircap(dn): + self.dir_si_b32 = base32.b2a(dn.get_storage_index()) + self.dir_url_base = "uri/"+dn.get_write_uri() + self.dir_url_json1 = "uri/"+dn.get_write_uri()+"?t=json" + self.dir_url_json2 = "uri/"+dn.get_write_uri()+"/?t=json" + self.dir_url_json_ro = "uri/"+dn.get_readonly_uri()+"/?t=json" + self.child_url = "uri/"+dn.get_readonly_uri()+"/child" + d.addCallback(_get_dircap) + d.addCallback(lambda ign: self.GET(self.dir_url_base, followRedirect=True)) + d.addCallback(lambda body: self.failUnlessIn("", body)) + d.addCallback(lambda ign: self.GET(self.dir_url_json1)) + d.addCallback(lambda res: simplejson.loads(res)) # just check it decodes + d.addCallback(lambda ign: self.GET(self.dir_url_json2)) + d.addCallback(lambda res: simplejson.loads(res)) # just check it decodes + d.addCallback(lambda ign: self.GET(self.dir_url_json_ro)) + d.addCallback(lambda res: simplejson.loads(res)) # just check it decodes + d.addCallback(lambda ign: self.GET(self.child_url)) + d.addCallback(lambda body: self.failUnlessEqual(DATA, body)) + + def _block_dir(ign): + f = open(fn, "w") + f.write("%s %s\n" % (self.dir_si_b32, "dir-off-limits to you")) + f.close() + self.g.clients[0].blacklist.last_mtime -= 2.0 + d.addCallback(_block_dir) + d.addCallback(lambda ign: self.shouldHTTPError("get_from_blacklisted_dir base", + 403, "Forbidden", + "Access Prohibited: dir-off-limits", + self.GET, self.dir_url_base)) + d.addCallback(lambda ign: self.shouldHTTPError("get_from_blacklisted_dir json1", + 403, "Forbidden", + "Access Prohibited: dir-off-limits", + self.GET, self.dir_url_json1)) + d.addCallback(lambda ign: self.shouldHTTPError("get_from_blacklisted_dir json2", + 403, "Forbidden", + "Access Prohibited: dir-off-limits", + self.GET, self.dir_url_json2)) + d.addCallback(lambda ign: self.shouldHTTPError("get_from_blacklisted_dir json_ro", + 403, "Forbidden", + "Access Prohibited: dir-off-limits", + self.GET, self.dir_url_json_ro)) + d.addCallback(lambda ign: self.shouldHTTPError("get_from_blacklisted_dir child", + 403, "Forbidden", + "Access Prohibited: dir-off-limits", + self.GET, self.child_url)) + return d + + class CompletelyUnhandledError(Exception): pass class ErrorBoom(rend.Page): diff --git a/src/allmydata/web/common.py b/src/allmydata/web/common.py index 6e905544..0a14bf30 100644 --- a/src/allmydata/web/common.py +++ b/src/allmydata/web/common.py @@ -6,6 +6,7 @@ from zope.interface import Interface from nevow import loaders, appserver from nevow.inevow import IRequest from nevow.util import resource_filename +from allmydata import blacklist from allmydata.interfaces import ExistingChildError, NoSuchChildError, \ FileTooLargeError, NotEnoughSharesError, NoSharesError, \ EmptyPathnameComponentError, MustBeDeepImmutableError, \ @@ -257,6 +258,9 @@ def humanize_failure(f): "The cap is being passed in a read slot (ro_uri), or was retrieved " "from a read slot as an unknown cap.") % quoted_name return (t, http.BAD_REQUEST) + if f.check(blacklist.FileProhibited): + t = "Access Prohibited: %s" % quote_output(f.value.reason, encoding="utf-8", quotemarks=False) + return (t, http.FORBIDDEN) if f.check(WebError): return (f.value.text, f.value.code) if f.check(FileTooLargeError): diff --git a/src/allmydata/web/directory.py b/src/allmydata/web/directory.py index d5d20294..b04cbe75 100644 --- a/src/allmydata/web/directory.py +++ b/src/allmydata/web/directory.py @@ -17,6 +17,7 @@ from allmydata.uri import from_string_dirnode from allmydata.interfaces import IDirectoryNode, IFileNode, IFilesystemNode, \ IImmutableFileNode, IMutableFileNode, ExistingChildError, \ NoSuchChildError, EmptyPathnameComponentError, SDMF_VERSION, MDMF_VERSION +from allmydata.blacklist import ProhibitedNode from allmydata.monitor import Monitor, OperationCancelledError from allmydata import dirnode from allmydata.web.common import text_plain, WebError, \ @@ -764,6 +765,17 @@ class DirectoryAsHTML(rend.Page): ctx.fillSlots("size", "-") info_link = "%s/uri/%s/?t=info" % (root, quoted_uri) + elif isinstance(target, ProhibitedNode): + ctx.fillSlots("filename", T.strike[name]) + if IDirectoryNode.providedBy(target.wrapped_node): + blacklisted_type = "DIR-BLACKLISTED" + else: + blacklisted_type = "BLACKLISTED" + ctx.fillSlots("type", blacklisted_type) + ctx.fillSlots("size", "-") + info_link = None + ctx.fillSlots("info", ["Access Prohibited:", T.br, target.reason]) + else: # unknown ctx.fillSlots("filename", html.escape(name)) @@ -779,7 +791,8 @@ class DirectoryAsHTML(rend.Page): # writecap and the readcap info_link = "%s?t=info" % urllib.quote(name) - ctx.fillSlots("info", T.a(href=info_link)["More Info"]) + if info_link: + ctx.fillSlots("info", T.a(href=info_link)["More Info"]) return ctx.tag diff --git a/src/allmydata/web/filenode.py b/src/allmydata/web/filenode.py index d5f40bc4..ef8d3bae 100644 --- a/src/allmydata/web/filenode.py +++ b/src/allmydata/web/filenode.py @@ -12,6 +12,8 @@ from allmydata.immutable.upload import FileHandle from allmydata.mutable.publish import MutableFileHandle from allmydata.mutable.common import MODE_READ from allmydata.util import log, base32 +from allmydata.util.encodingutil import quote_output +from allmydata.blacklist import FileProhibited, ProhibitedNode from allmydata.web.common import text_plain, WebError, RenderMixin, \ boolean_of_arg, get_arg, should_create_intermediate_directories, \ @@ -152,11 +154,13 @@ class FileNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin): def childFactory(self, ctx, name): req = IRequest(ctx) + if isinstance(self.node, ProhibitedNode): + raise FileProhibited(self.node.reason) if should_create_intermediate_directories(req): - raise WebError("Cannot create directory '%s', because its " - "parent is a file, not a directory" % name) - raise WebError("Files have no children, certainly not named '%s'" - % name) + raise WebError("Cannot create directory %s, because its " + "parent is a file, not a directory" % quote_output(name, encoding='utf-8')) + raise WebError("Files have no children, certainly not named %s" + % quote_output(name, encoding='utf-8')) def render_GET(self, ctx): req = IRequest(ctx) diff --git a/src/allmydata/webish.py b/src/allmydata/webish.py index 03ca3bad..a8e0bff8 100644 --- a/src/allmydata/webish.py +++ b/src/allmydata/webish.py @@ -129,7 +129,7 @@ class WebishServer(service.MultiService): name = "webish" def __init__(self, client, webport, nodeurl_path=None, staticdir=None, - clock=None): + clock=None): service.MultiService.__init__(self) # the 'data' argument to all render() methods default to the Client # the 'clock' argument to root.Root is, if set, a