From 1df7f114b7094dab8d7ffea8b390b10c0070b0fd Mon Sep 17 00:00:00 2001 From: David-Sarah Hopwood <david-sarah@jacaranda.org> Date: Thu, 25 Oct 2012 01:01:25 +0100 Subject: [PATCH] src/allmydata/web/directory.py: fix HTML double-encoding issue for filenames. Nevow automatically HTML-escapes strings passed in stan without a raw marker. Written by MK_FG. fixes #1143 Signed-off-by: David-Sarah Hopwood <david-sarah@jacaranda.org> --- src/allmydata/test/test_web.py | 83 +++++++++++++++++++++++----------- src/allmydata/web/directory.py | 13 ++---- 2 files changed, 62 insertions(+), 34 deletions(-) diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py index 2483236d..d52ce676 100644 --- a/src/allmydata/test/test_web.py +++ b/src/allmydata/test/test_web.py @@ -6,11 +6,12 @@ from twisted.application import service from twisted.trial import unittest from twisted.internet import defer, reactor from twisted.internet.task import Clock -from twisted.web import client, error, http +from twisted.web import client, error, http, html from twisted.python import failure, log from foolscap.api import fireEventually, flushEventualQueue +from nevow.util import escapeToXML from nevow import rend from allmydata import interfaces, uri, webish, dirnode @@ -256,6 +257,16 @@ class WebMixin(object): _ign, n, blocking_uri = self.makefile(1) foo.set_uri(u"blockingfile", blocking_uri, blocking_uri) + # filenode to test for html encoding issues + self._htmlname_unicode = u"<&weirdly'named\"file>>>_<iframe />.txt" + self._htmlname_raw = self._htmlname_unicode.encode('utf-8') + self._htmlname_urlencoded = urllib.quote(self._htmlname_raw, '') + self._htmlname_escaped = escapeToXML(self._htmlname_raw) + self._htmlname_escaped_attr = html.escape(self._htmlname_raw) + self._htmlname_escaped_double = escapeToXML(html.escape(self._htmlname_raw)) + self.HTMLNAME_CONTENTS, n, self._htmlname_txt_uri = self.makefile(0) + foo.set_uri(self._htmlname_unicode, self._htmlname_txt_uri, self._htmlname_txt_uri) + unicode_filename = u"n\u00fc.txt" # n u-umlaut . t x t # ok, unicode calls it LATIN SMALL LETTER U WITH DIAERESIS but I # still think of it as an umlaut @@ -280,6 +291,7 @@ class WebMixin(object): # public/foo/baz.txt # public/foo/quux.txt # public/foo/blockingfile + # public/foo/<&weirdly'named\"file>>>_<iframe />.txt # public/foo/empty/ # public/foo/sub/ # public/foo/sub/baz.txt @@ -365,8 +377,8 @@ class WebMixin(object): kidnames = sorted([unicode(n) for n in data[1]["children"]]) self.failUnlessEqual(kidnames, - [u"bar.txt", u"baz.txt", u"blockingfile", - u"empty", u"n\u00fc.txt", u"quux.txt", u"sub"]) + [self._htmlname_unicode, u"bar.txt", u"baz.txt", + u"blockingfile", u"empty", u"n\u00fc.txt", u"quux.txt", u"sub"]) kids = dict( [(unicode(name),value) for (name,value) in data[1]["children"].iteritems()] ) @@ -1341,6 +1353,24 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi d.addCallback(_check) return d + def test_GET_DIRECTORY_html_filenode_encoding(self): + d = self.GET(self.public_url + "/foo", followRedirect=True) + def _check(html): + # Check if encoded entries are there + self.failUnlessIn('@@named=/' + self._htmlname_urlencoded + '">' + + self._htmlname_escaped + '</a>', html) + self.failUnlessIn('value="' + self._htmlname_escaped_attr + '"', html) + self.failIfIn(self._htmlname_escaped_double, html) + # Make sure that Nevow escaping actually works by checking for unsafe characters + # and that '&' is escaped. + for entity in '<>': + self.failUnlessIn(entity, self._htmlname_raw) + self.failIfIn(entity, self._htmlname_escaped) + self.failUnlessIn('&', re.sub(r'&(amp|lt|gt|quot|apos);', '', self._htmlname_raw)) + self.failIfIn('&', re.sub(r'&(amp|lt|gt|quot|apos);', '', self._htmlname_escaped)) + d.addCallback(_check) + return d + def test_GET_root_html(self): d = self.GET("/") d.addCallback(self._check_upload_and_mkdir_forms) @@ -1563,16 +1593,16 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi d.addCallback(self.wait_for_operation, "127") d.addCallback(self.get_operation_results, "127", "json") def _got_json(stats): - expected = {"count-immutable-files": 3, + expected = {"count-immutable-files": 4, "count-mutable-files": 2, "count-literal-files": 0, - "count-files": 5, + "count-files": 6, "count-directories": 3, - "size-immutable-files": 57, + "size-immutable-files": 76, "size-literal-files": 0, #"size-directories": 1912, # varies #"largest-directory": 1590, - "largest-directory-children": 7, + "largest-directory-children": 8, "largest-immutable-file": 19, } for k,v in expected.iteritems(): @@ -1580,7 +1610,7 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi "stats[%s] was %s, not %s" % (k, stats[k], v)) self.failUnlessReallyEqual(stats["size-files-histogram"], - [ [11, 31, 3] ]) + [ [11, 31, 4] ]) d.addCallback(_got_json) return d @@ -1589,7 +1619,7 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi def _check(res): self.failUnless(res.endswith("\n")) units = [simplejson.loads(t) for t in res[:-1].split("\n")] - self.failUnlessReallyEqual(len(units), 9) + self.failUnlessReallyEqual(len(units), 10) self.failUnlessEqual(units[-1]["type"], "stats") first = units[0] self.failUnlessEqual(first["path"], []) @@ -2434,7 +2464,8 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi # make sure that nothing was added d.addCallback(lambda res: self.failUnlessNodeKeysAre(self._foo_node, - [u"bar.txt", u"baz.txt", u"blockingfile", + [self._htmlname_unicode, + u"bar.txt", u"baz.txt", u"blockingfile", u"empty", u"n\u00fc.txt", u"quux.txt", u"sub"])) return d @@ -2626,13 +2657,13 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi d.addCallback(self.wait_for_operation, "123") def _check_json(data): self.failUnlessReallyEqual(data["finished"], True) - self.failUnlessReallyEqual(data["count-objects-checked"], 10) - self.failUnlessReallyEqual(data["count-objects-healthy"], 10) + self.failUnlessReallyEqual(data["count-objects-checked"], 11) + self.failUnlessReallyEqual(data["count-objects-healthy"], 11) d.addCallback(_check_json) d.addCallback(self.get_operation_results, "123", "html") def _check_html(res): - self.failUnlessIn("Objects Checked: <span>10</span>", res) - self.failUnlessIn("Objects Healthy: <span>10</span>", res) + self.failUnlessIn("Objects Checked: <span>11</span>", res) + self.failUnlessIn("Objects Healthy: <span>11</span>", res) self.failUnlessIn(FAVICON_MARKUP, res) d.addCallback(_check_html) @@ -2662,22 +2693,22 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi d.addCallback(self.wait_for_operation, "124") def _check_json(data): self.failUnlessReallyEqual(data["finished"], True) - self.failUnlessReallyEqual(data["count-objects-checked"], 10) - self.failUnlessReallyEqual(data["count-objects-healthy-pre-repair"], 10) + self.failUnlessReallyEqual(data["count-objects-checked"], 11) + self.failUnlessReallyEqual(data["count-objects-healthy-pre-repair"], 11) self.failUnlessReallyEqual(data["count-objects-unhealthy-pre-repair"], 0) self.failUnlessReallyEqual(data["count-corrupt-shares-pre-repair"], 0) self.failUnlessReallyEqual(data["count-repairs-attempted"], 0) self.failUnlessReallyEqual(data["count-repairs-successful"], 0) self.failUnlessReallyEqual(data["count-repairs-unsuccessful"], 0) - self.failUnlessReallyEqual(data["count-objects-healthy-post-repair"], 10) + self.failUnlessReallyEqual(data["count-objects-healthy-post-repair"], 11) self.failUnlessReallyEqual(data["count-objects-unhealthy-post-repair"], 0) self.failUnlessReallyEqual(data["count-corrupt-shares-post-repair"], 0) d.addCallback(_check_json) d.addCallback(self.get_operation_results, "124", "html") def _check_html(res): - self.failUnlessIn("Objects Checked: <span>10</span>", res) + self.failUnlessIn("Objects Checked: <span>11</span>", res) - self.failUnlessIn("Objects Healthy (before repair): <span>10</span>", res) + self.failUnlessIn("Objects Healthy (before repair): <span>11</span>", res) self.failUnlessIn("Objects Unhealthy (before repair): <span>0</span>", res) self.failUnlessIn("Corrupt Shares (before repair): <span>0</span>", res) @@ -2685,7 +2716,7 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi self.failUnlessIn("Repairs Successful: <span>0</span>", res) self.failUnlessIn("Repairs Unsuccessful: <span>0</span>", res) - self.failUnlessIn("Objects Healthy (after repair): <span>10</span>", res) + self.failUnlessIn("Objects Healthy (after repair): <span>11</span>", res) self.failUnlessIn("Objects Unhealthy (after repair): <span>0</span>", res) self.failUnlessIn("Corrupt Shares (after repair): <span>0</span>", res) @@ -3678,12 +3709,12 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi return d def test_PUT_NEWFILEURL_bad_format(self): - new_contents = self.NEWFILE_CONTENTS * 300000 - return self.shouldHTTPError("PUT_NEWFILEURL_bad_format", - 400, "Bad Request", "Unknown format: foo", - self.PUT, self.public_url + \ - "/foo/foo.txt?format=foo", - new_contents) + new_contents = self.NEWFILE_CONTENTS * 300000 + return self.shouldHTTPError("PUT_NEWFILEURL_bad_format", + 400, "Bad Request", "Unknown format: foo", + self.PUT, self.public_url + \ + "/foo/foo.txt?format=foo", + new_contents) def test_PUT_NEWFILEURL_uri_replace(self): contents, n, new_uri = self.makefile(8) diff --git a/src/allmydata/web/directory.py b/src/allmydata/web/directory.py index cf1d4d38..33ca2599 100644 --- a/src/allmydata/web/directory.py +++ b/src/allmydata/web/directory.py @@ -6,7 +6,7 @@ from zope.interface import implements from twisted.internet import defer from twisted.internet.interfaces import IPushProducer from twisted.python.failure import Failure -from twisted.web import http, html +from twisted.web import http from nevow import url, rend, inevow, tags as T from nevow.inevow import IRequest @@ -750,8 +750,7 @@ class DirectoryAsHTML(rend.Page): # page that doesn't know about the directory at all dlurl = "%s/file/%s/@@named=/%s" % (root, quoted_uri, nameurl) - ctx.fillSlots("filename", - T.a(href=dlurl)[html.escape(name)]) + ctx.fillSlots("filename", T.a(href=dlurl)[name]) ctx.fillSlots("type", "SSK") ctx.fillSlots("size", "?") @@ -761,8 +760,7 @@ class DirectoryAsHTML(rend.Page): elif IImmutableFileNode.providedBy(target): dlurl = "%s/file/%s/@@named=/%s" % (root, quoted_uri, nameurl) - ctx.fillSlots("filename", - T.a(href=dlurl)[html.escape(name)]) + ctx.fillSlots("filename", T.a(href=dlurl)[name]) ctx.fillSlots("type", "FILE") ctx.fillSlots("size", target.get_size()) @@ -772,8 +770,7 @@ class DirectoryAsHTML(rend.Page): elif IDirectoryNode.providedBy(target): # directory uri_link = "%s/uri/%s/" % (root, urllib.quote(target_uri)) - ctx.fillSlots("filename", - T.a(href=uri_link)[html.escape(name)]) + ctx.fillSlots("filename", T.a(href=uri_link)[name]) if not target.is_mutable(): dirtype = "DIR-IMM" elif target.is_readonly(): @@ -797,7 +794,7 @@ class DirectoryAsHTML(rend.Page): else: # unknown - ctx.fillSlots("filename", html.escape(name)) + ctx.fillSlots("filename", name) if target.get_write_uri() is not None: unknowntype = "?" elif not self.node.is_mutable() or target.is_alleged_immutable(): -- 2.45.2