From 1df7f114b7094dab8d7ffea8b390b10c0070b0fd Mon Sep 17 00:00:00 2001
From: David-Sarah Hopwood <david-sarah@jacaranda.org>
Date: Thu, 25 Oct 2012 01:01:25 +0100
Subject: [PATCH] src/allmydata/web/directory.py: fix HTML double-encoding
 issue for filenames.

Nevow automatically HTML-escapes strings passed in stan without a raw marker.
Written by MK_FG. fixes #1143

Signed-off-by: David-Sarah Hopwood <david-sarah@jacaranda.org>
---
 src/allmydata/test/test_web.py | 83 +++++++++++++++++++++++-----------
 src/allmydata/web/directory.py | 13 ++----
 2 files changed, 62 insertions(+), 34 deletions(-)

diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py
index 2483236d..d52ce676 100644
--- a/src/allmydata/test/test_web.py
+++ b/src/allmydata/test/test_web.py
@@ -6,11 +6,12 @@ from twisted.application import service
 from twisted.trial import unittest
 from twisted.internet import defer, reactor
 from twisted.internet.task import Clock
-from twisted.web import client, error, http
+from twisted.web import client, error, http, html
 from twisted.python import failure, log
 
 from foolscap.api import fireEventually, flushEventualQueue
 
+from nevow.util import escapeToXML
 from nevow import rend
 
 from allmydata import interfaces, uri, webish, dirnode
@@ -256,6 +257,16 @@ class WebMixin(object):
             _ign, n, blocking_uri = self.makefile(1)
             foo.set_uri(u"blockingfile", blocking_uri, blocking_uri)
 
+            # filenode to test for html encoding issues
+            self._htmlname_unicode = u"<&weirdly'named\"file>>>_<iframe />.txt"
+            self._htmlname_raw = self._htmlname_unicode.encode('utf-8')
+            self._htmlname_urlencoded = urllib.quote(self._htmlname_raw, '')
+            self._htmlname_escaped = escapeToXML(self._htmlname_raw)
+            self._htmlname_escaped_attr = html.escape(self._htmlname_raw)
+            self._htmlname_escaped_double = escapeToXML(html.escape(self._htmlname_raw))
+            self.HTMLNAME_CONTENTS, n, self._htmlname_txt_uri = self.makefile(0)
+            foo.set_uri(self._htmlname_unicode, self._htmlname_txt_uri, self._htmlname_txt_uri)
+
             unicode_filename = u"n\u00fc.txt" # n u-umlaut . t x t
             # ok, unicode calls it LATIN SMALL LETTER U WITH DIAERESIS but I
             # still think of it as an umlaut
@@ -280,6 +291,7 @@ class WebMixin(object):
             # public/foo/baz.txt
             # public/foo/quux.txt
             # public/foo/blockingfile
+            # public/foo/<&weirdly'named\"file>>>_<iframe />.txt
             # public/foo/empty/
             # public/foo/sub/
             # public/foo/sub/baz.txt
@@ -365,8 +377,8 @@ class WebMixin(object):
 
         kidnames = sorted([unicode(n) for n in data[1]["children"]])
         self.failUnlessEqual(kidnames,
-                             [u"bar.txt", u"baz.txt", u"blockingfile",
-                              u"empty", u"n\u00fc.txt", u"quux.txt", u"sub"])
+                             [self._htmlname_unicode, u"bar.txt", u"baz.txt",
+                              u"blockingfile", u"empty", u"n\u00fc.txt", u"quux.txt", u"sub"])
         kids = dict( [(unicode(name),value)
                       for (name,value)
                       in data[1]["children"].iteritems()] )
@@ -1341,6 +1353,24 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi
         d.addCallback(_check)
         return d
 
+    def test_GET_DIRECTORY_html_filenode_encoding(self):
+        d = self.GET(self.public_url + "/foo", followRedirect=True)
+        def _check(html):
+            # Check if encoded entries are there
+            self.failUnlessIn('@@named=/' + self._htmlname_urlencoded + '">'
+                              + self._htmlname_escaped + '</a>', html)
+            self.failUnlessIn('value="' + self._htmlname_escaped_attr + '"', html)
+            self.failIfIn(self._htmlname_escaped_double, html)
+            # Make sure that Nevow escaping actually works by checking for unsafe characters
+            # and that '&' is escaped.
+            for entity in '<>':
+                self.failUnlessIn(entity, self._htmlname_raw)
+                self.failIfIn(entity, self._htmlname_escaped)
+            self.failUnlessIn('&', re.sub(r'&(amp|lt|gt|quot|apos);', '', self._htmlname_raw))
+            self.failIfIn('&', re.sub(r'&(amp|lt|gt|quot|apos);', '', self._htmlname_escaped))
+        d.addCallback(_check)
+        return d
+
     def test_GET_root_html(self):
         d = self.GET("/")
         d.addCallback(self._check_upload_and_mkdir_forms)
@@ -1563,16 +1593,16 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi
         d.addCallback(self.wait_for_operation, "127")
         d.addCallback(self.get_operation_results, "127", "json")
         def _got_json(stats):
-            expected = {"count-immutable-files": 3,
+            expected = {"count-immutable-files": 4,
                         "count-mutable-files": 2,
                         "count-literal-files": 0,
-                        "count-files": 5,
+                        "count-files": 6,
                         "count-directories": 3,
-                        "size-immutable-files": 57,
+                        "size-immutable-files": 76,
                         "size-literal-files": 0,
                         #"size-directories": 1912, # varies
                         #"largest-directory": 1590,
-                        "largest-directory-children": 7,
+                        "largest-directory-children": 8,
                         "largest-immutable-file": 19,
                         }
             for k,v in expected.iteritems():
@@ -1580,7 +1610,7 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi
                                            "stats[%s] was %s, not %s" %
                                            (k, stats[k], v))
             self.failUnlessReallyEqual(stats["size-files-histogram"],
-                                       [ [11, 31, 3] ])
+                                       [ [11, 31, 4] ])
         d.addCallback(_got_json)
         return d
 
@@ -1589,7 +1619,7 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi
         def _check(res):
             self.failUnless(res.endswith("\n"))
             units = [simplejson.loads(t) for t in res[:-1].split("\n")]
-            self.failUnlessReallyEqual(len(units), 9)
+            self.failUnlessReallyEqual(len(units), 10)
             self.failUnlessEqual(units[-1]["type"], "stats")
             first = units[0]
             self.failUnlessEqual(first["path"], [])
@@ -2434,7 +2464,8 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi
         # make sure that nothing was added
         d.addCallback(lambda res:
                       self.failUnlessNodeKeysAre(self._foo_node,
-                                                 [u"bar.txt", u"baz.txt", u"blockingfile",
+                                                 [self._htmlname_unicode,
+                                                  u"bar.txt", u"baz.txt", u"blockingfile",
                                                   u"empty", u"n\u00fc.txt", u"quux.txt",
                                                   u"sub"]))
         return d
@@ -2626,13 +2657,13 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi
         d.addCallback(self.wait_for_operation, "123")
         def _check_json(data):
             self.failUnlessReallyEqual(data["finished"], True)
-            self.failUnlessReallyEqual(data["count-objects-checked"], 10)
-            self.failUnlessReallyEqual(data["count-objects-healthy"], 10)
+            self.failUnlessReallyEqual(data["count-objects-checked"], 11)
+            self.failUnlessReallyEqual(data["count-objects-healthy"], 11)
         d.addCallback(_check_json)
         d.addCallback(self.get_operation_results, "123", "html")
         def _check_html(res):
-            self.failUnlessIn("Objects Checked: <span>10</span>", res)
-            self.failUnlessIn("Objects Healthy: <span>10</span>", res)
+            self.failUnlessIn("Objects Checked: <span>11</span>", res)
+            self.failUnlessIn("Objects Healthy: <span>11</span>", res)
             self.failUnlessIn(FAVICON_MARKUP, res)
         d.addCallback(_check_html)
 
@@ -2662,22 +2693,22 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi
         d.addCallback(self.wait_for_operation, "124")
         def _check_json(data):
             self.failUnlessReallyEqual(data["finished"], True)
-            self.failUnlessReallyEqual(data["count-objects-checked"], 10)
-            self.failUnlessReallyEqual(data["count-objects-healthy-pre-repair"], 10)
+            self.failUnlessReallyEqual(data["count-objects-checked"], 11)
+            self.failUnlessReallyEqual(data["count-objects-healthy-pre-repair"], 11)
             self.failUnlessReallyEqual(data["count-objects-unhealthy-pre-repair"], 0)
             self.failUnlessReallyEqual(data["count-corrupt-shares-pre-repair"], 0)
             self.failUnlessReallyEqual(data["count-repairs-attempted"], 0)
             self.failUnlessReallyEqual(data["count-repairs-successful"], 0)
             self.failUnlessReallyEqual(data["count-repairs-unsuccessful"], 0)
-            self.failUnlessReallyEqual(data["count-objects-healthy-post-repair"], 10)
+            self.failUnlessReallyEqual(data["count-objects-healthy-post-repair"], 11)
             self.failUnlessReallyEqual(data["count-objects-unhealthy-post-repair"], 0)
             self.failUnlessReallyEqual(data["count-corrupt-shares-post-repair"], 0)
         d.addCallback(_check_json)
         d.addCallback(self.get_operation_results, "124", "html")
         def _check_html(res):
-            self.failUnlessIn("Objects Checked: <span>10</span>", res)
+            self.failUnlessIn("Objects Checked: <span>11</span>", res)
 
-            self.failUnlessIn("Objects Healthy (before repair): <span>10</span>", res)
+            self.failUnlessIn("Objects Healthy (before repair): <span>11</span>", res)
             self.failUnlessIn("Objects Unhealthy (before repair): <span>0</span>", res)
             self.failUnlessIn("Corrupt Shares (before repair): <span>0</span>", res)
 
@@ -2685,7 +2716,7 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi
             self.failUnlessIn("Repairs Successful: <span>0</span>", res)
             self.failUnlessIn("Repairs Unsuccessful: <span>0</span>", res)
 
-            self.failUnlessIn("Objects Healthy (after repair): <span>10</span>", res)
+            self.failUnlessIn("Objects Healthy (after repair): <span>11</span>", res)
             self.failUnlessIn("Objects Unhealthy (after repair): <span>0</span>", res)
             self.failUnlessIn("Corrupt Shares (after repair): <span>0</span>", res)
 
@@ -3678,12 +3709,12 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi
         return d
 
     def test_PUT_NEWFILEURL_bad_format(self):
-       new_contents = self.NEWFILE_CONTENTS * 300000
-       return self.shouldHTTPError("PUT_NEWFILEURL_bad_format",
-                                   400, "Bad Request", "Unknown format: foo",
-                                   self.PUT, self.public_url + \
-                                   "/foo/foo.txt?format=foo",
-                                   new_contents)
+        new_contents = self.NEWFILE_CONTENTS * 300000
+        return self.shouldHTTPError("PUT_NEWFILEURL_bad_format",
+                                    400, "Bad Request", "Unknown format: foo",
+                                    self.PUT, self.public_url + \
+                                    "/foo/foo.txt?format=foo",
+                                    new_contents)
 
     def test_PUT_NEWFILEURL_uri_replace(self):
         contents, n, new_uri = self.makefile(8)
diff --git a/src/allmydata/web/directory.py b/src/allmydata/web/directory.py
index cf1d4d38..33ca2599 100644
--- a/src/allmydata/web/directory.py
+++ b/src/allmydata/web/directory.py
@@ -6,7 +6,7 @@ from zope.interface import implements
 from twisted.internet import defer
 from twisted.internet.interfaces import IPushProducer
 from twisted.python.failure import Failure
-from twisted.web import http, html
+from twisted.web import http
 from nevow import url, rend, inevow, tags as T
 from nevow.inevow import IRequest
 
@@ -750,8 +750,7 @@ class DirectoryAsHTML(rend.Page):
             # page that doesn't know about the directory at all
             dlurl = "%s/file/%s/@@named=/%s" % (root, quoted_uri, nameurl)
 
-            ctx.fillSlots("filename",
-                          T.a(href=dlurl)[html.escape(name)])
+            ctx.fillSlots("filename", T.a(href=dlurl)[name])
             ctx.fillSlots("type", "SSK")
 
             ctx.fillSlots("size", "?")
@@ -761,8 +760,7 @@ class DirectoryAsHTML(rend.Page):
         elif IImmutableFileNode.providedBy(target):
             dlurl = "%s/file/%s/@@named=/%s" % (root, quoted_uri, nameurl)
 
-            ctx.fillSlots("filename",
-                          T.a(href=dlurl)[html.escape(name)])
+            ctx.fillSlots("filename", T.a(href=dlurl)[name])
             ctx.fillSlots("type", "FILE")
 
             ctx.fillSlots("size", target.get_size())
@@ -772,8 +770,7 @@ class DirectoryAsHTML(rend.Page):
         elif IDirectoryNode.providedBy(target):
             # directory
             uri_link = "%s/uri/%s/" % (root, urllib.quote(target_uri))
-            ctx.fillSlots("filename",
-                          T.a(href=uri_link)[html.escape(name)])
+            ctx.fillSlots("filename", T.a(href=uri_link)[name])
             if not target.is_mutable():
                 dirtype = "DIR-IMM"
             elif target.is_readonly():
@@ -797,7 +794,7 @@ class DirectoryAsHTML(rend.Page):
 
         else:
             # unknown
-            ctx.fillSlots("filename", html.escape(name))
+            ctx.fillSlots("filename", name)
             if target.get_write_uri() is not None:
                 unknowntype = "?"
             elif not self.node.is_mutable() or target.is_alleged_immutable():
-- 
2.45.2