]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blobdiff - src/allmydata/web/filenode.py
Implementation, tests and docs for blacklists. This version allows listing directorie...
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / web / filenode.py
index 4c20079449e26471b73b82b4788725d1fb4aca58..ef8d3bae0a21ecd42e996479b4e211c86d448b05 100644 (file)
@@ -6,29 +6,35 @@ from twisted.internet import defer
 from nevow import url, rend
 from nevow.inevow import IRequest
 
-from allmydata.interfaces import ExistingChildError
+from allmydata.interfaces import ExistingChildError, SDMF_VERSION, MDMF_VERSION
 from allmydata.monitor import Monitor
 from allmydata.immutable.upload import FileHandle
-from allmydata.immutable.filenode import LiteralFileNode
+from allmydata.mutable.publish import MutableFileHandle
+from allmydata.mutable.common import MODE_READ
 from allmydata.util import log, base32
-
-from allmydata.web.common import text_plain, WebError, IClient, RenderMixin, \
-     boolean_of_arg, get_arg, should_create_intermediate_directories
-from allmydata.web.checker_results import CheckerResults, \
-     CheckAndRepairResults, LiteralCheckerResults
+from allmydata.util.encodingutil import quote_output
+from allmydata.blacklist import FileProhibited, ProhibitedNode
+
+from allmydata.web.common import text_plain, WebError, RenderMixin, \
+     boolean_of_arg, get_arg, should_create_intermediate_directories, \
+     MyExceptionHandler, parse_replace_arg, parse_offset_arg, \
+     parse_mutable_type_arg
+from allmydata.web.check_results import CheckResults, \
+     CheckAndRepairResults, LiteralCheckResults
 from allmydata.web.info import MoreInfo
 
 class ReplaceMeMixin:
-
-    def replace_me_with_a_child(self, ctx, replace):
+    def replace_me_with_a_child(self, req, client, replace):
         # a new file is being uploaded in our place.
-        req = IRequest(ctx)
-        client = IClient(ctx)
         mutable = boolean_of_arg(get_arg(req, "mutable", "false"))
         if mutable:
-            req.content.seek(0)
-            data = req.content.read()
-            d = client.create_mutable_file(data)
+            arg = get_arg(req, "mutable-type", None)
+            mutable_type = parse_mutable_type_arg(arg)
+            if mutable_type is "invalid":
+                raise WebError("Unknown type: %s" % arg, http.BAD_REQUEST)
+
+            data = MutableFileHandle(req.content)
+            d = client.create_mutable_file(data, version=mutable_type)
             def _uploaded(newnode):
                 d2 = self.parentnode.set_node(self.name, newnode,
                                               overwrite=replace)
@@ -41,7 +47,7 @@ class ReplaceMeMixin:
                                          overwrite=replace)
         def _done(filenode):
             log.msg("webish upload complete",
-                    facility="tahoe.webish", level=log.NOISY)
+                    facility="tahoe.webish", level=log.NOISY, umid="TCjBGQ")
             if self.node:
                 # we've replaced an existing file (or modified a mutable
                 # file), so the response code is 200
@@ -53,33 +59,28 @@ class ReplaceMeMixin:
         d.addCallback(_done)
         return d
 
-    def replace_me_with_a_childcap(self, ctx, replace):
-        req = IRequest(ctx)
+    def replace_me_with_a_childcap(self, req, client, replace):
         req.content.seek(0)
         childcap = req.content.read()
-        client = IClient(ctx)
-        childnode = client.create_node_from_uri(childcap)
+        childnode = client.create_node_from_uri(childcap, None, name=self.name)
         d = self.parentnode.set_node(self.name, childnode, overwrite=replace)
         d.addCallback(lambda res: childnode.get_uri())
         return d
 
-    def _read_data_from_formpost(self, req):
-        # SDMF: files are small, and we can only upload data, so we read
-        # the whole file into memory before uploading.
-        contents = req.fields["file"]
-        contents.file.seek(0)
-        data = contents.file.read()
-        return data
 
-    def replace_me_with_a_formpost(self, ctx, replace):
+    def replace_me_with_a_formpost(self, req, client, replace):
         # create a new file, maybe mutable, maybe immutable
-        req = IRequest(ctx)
-        client = IClient(ctx)
         mutable = boolean_of_arg(get_arg(req, "mutable", "false"))
 
+        # create an immutable file
+        contents = req.fields["file"]
         if mutable:
-            data = self._read_data_from_formpost(req)
-            d = client.create_mutable_file(data)
+            arg = get_arg(req, "mutable-type", None)
+            mutable_type = parse_mutable_type_arg(arg)
+            if mutable_type is "invalid":
+                raise WebError("Unknown type: %s" % arg, http.BAD_REQUEST)
+            uploadable = MutableFileHandle(contents.file)
+            d = client.create_mutable_file(uploadable, version=mutable_type)
             def _uploaded(newnode):
                 d2 = self.parentnode.set_node(self.name, newnode,
                                               overwrite=replace)
@@ -87,16 +88,17 @@ class ReplaceMeMixin:
                 return d2
             d.addCallback(_uploaded)
             return d
-        # create an immutable file
-        contents = req.fields["file"]
+
         uploadable = FileHandle(contents.file, convergence=client.convergence)
         d = self.parentnode.add_file(self.name, uploadable, overwrite=replace)
         d.addCallback(lambda newnode: newnode.get_uri())
         return d
 
+
 class PlaceHolderNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
-    def __init__(self, parentnode, name):
+    def __init__(self, client, parentnode, name):
         rend.Page.__init__(self)
+        self.client = client
         assert parentnode
         self.parentnode = parentnode
         self.name = name
@@ -105,15 +107,16 @@ class PlaceHolderNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
     def render_PUT(self, ctx):
         req = IRequest(ctx)
         t = get_arg(req, "t", "").strip()
-        replace = boolean_of_arg(get_arg(req, "replace", "true"))
+        replace = parse_replace_arg(get_arg(req, "replace", "true"))
+
         assert self.parentnode and self.name
         if req.getHeader("content-range"):
             raise WebError("Content-Range in PUT not yet supported",
                            http.NOT_IMPLEMENTED)
         if not t:
-            return self.replace_me_with_a_child(ctx, replace)
+            return self.replace_me_with_a_child(req, self.client, replace)
         if t == "uri":
-            return self.replace_me_with_a_childcap(ctx, replace)
+            return self.replace_me_with_a_childcap(req, self.client, replace)
 
         raise WebError("PUT to a file: bad t=%s" % t)
 
@@ -127,7 +130,7 @@ class PlaceHolderNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
             # or POST /uri/path/file?t=upload, or
             # POST /uri/path/dir?t=upload&name=foo . All have the same
             # behavior, we just ignore any name= argument
-            d = self.replace_me_with_a_formpost(ctx, replace)
+            d = self.replace_me_with_a_formpost(req, self.client, replace)
         else:
             # t=mkdir is handled in DirectoryNodeHandler._POST_mkdir, so
             # there are no other t= values left to be handled by the
@@ -141,8 +144,9 @@ class PlaceHolderNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
 
 
 class FileNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
-    def __init__(self, node, parentnode=None, name=None):
+    def __init__(self, client, node, parentnode=None, name=None):
         rend.Page.__init__(self)
+        self.client = client
         assert node
         self.node = node
         self.parentnode = parentnode
@@ -150,11 +154,13 @@ class FileNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
 
     def childFactory(self, ctx, name):
         req = IRequest(ctx)
+        if isinstance(self.node, ProhibitedNode):
+            raise FileProhibited(self.node.reason)
         if should_create_intermediate_directories(req):
-            raise WebError("Cannot create directory '%s', because its "
-                           "parent is a file, not a directory" % name)
-        raise WebError("Files have no children, certainly not named '%s'"
-                       % name)
+            raise WebError("Cannot create directory %s, because its "
+                           "parent is a file, not a directory" % quote_output(name, encoding='utf-8'))
+        raise WebError("Files have no children, certainly not named %s"
+                       % quote_output(name, encoding='utf-8'))
 
     def render_GET(self, ctx):
         req = IRequest(ctx)
@@ -173,15 +179,29 @@ class FileNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
             # properly. So we assume that at least the browser will agree
             # with itself, and echo back the same bytes that we were given.
             filename = get_arg(req, "filename", self.name) or "unknown"
-            if self.node.is_mutable():
-                # some day: d = self.node.get_best_version()
-                d = makeMutableDownloadable(self.node)
-            else:
-                d = defer.succeed(self.node)
+            d = self.node.get_best_readable_version()
             d.addCallback(lambda dn: FileDownloader(dn, filename))
             return d
         if t == "json":
-            return FileJSONMetadata(ctx, self.node)
+            # We do this to make sure that fields like size and
+            # mutable-type (which depend on the file on the grid and not
+            # just on the cap) are filled in. The latter gets used in
+            # tests, in particular.
+            #
+            # TODO: Make it so that the servermap knows how to update in
+            # a mode specifically designed to fill in these fields, and
+            # then update it in that mode.
+            if self.node.is_mutable():
+                d = self.node.get_servermap(MODE_READ)
+            else:
+                d = defer.succeed(None)
+            if self.parentnode and self.name:
+                d.addCallback(lambda ignored:
+                    self.parentnode.get_metadata_for(self.name))
+            else:
+                d.addCallback(lambda ignored: None)
+            d.addCallback(lambda md: FileJSONMetadata(ctx, self.node, md))
+            return d
         if t == "info":
             return MoreInfo(self.node)
         if t == "uri":
@@ -196,33 +216,50 @@ class FileNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
         if t:
             raise WebError("GET file: bad t=%s" % t)
         filename = get_arg(req, "filename", self.name) or "unknown"
-        if self.node.is_mutable():
-            # some day: d = self.node.get_best_version()
-            d = makeMutableDownloadable(self.node)
-        else:
-            d = defer.succeed(self.node)
+        d = self.node.get_best_readable_version()
         d.addCallback(lambda dn: FileDownloader(dn, filename))
         return d
 
     def render_PUT(self, ctx):
         req = IRequest(ctx)
         t = get_arg(req, "t", "").strip()
-        replace = boolean_of_arg(get_arg(req, "replace", "true"))
+        replace = parse_replace_arg(get_arg(req, "replace", "true"))
+        offset = parse_offset_arg(get_arg(req, "offset", None))
+
         if not t:
-            if self.node.is_mutable():
-                return self.replace_my_contents(ctx)
             if not replace:
                 # this is the early trap: if someone else modifies the
                 # directory while we're uploading, the add_file(overwrite=)
                 # call in replace_me_with_a_child will do the late trap.
                 raise ExistingChildError()
-            assert self.parentnode and self.name
-            return self.replace_me_with_a_child(ctx, replace)
+
+            if self.node.is_mutable():
+                # Are we a readonly filenode? We shouldn't allow callers
+                # to try to replace us if we are.
+                if self.node.is_readonly():
+                    raise WebError("PUT to a mutable file: replace or update"
+                                   " requested with read-only cap")
+                if offset is None:
+                    return self.replace_my_contents(req)
+
+                if offset >= 0:
+                    return self.update_my_contents(req, offset)
+
+                raise WebError("PUT to a mutable file: Invalid offset")
+
+            else:
+                if offset is not None:
+                    raise WebError("PUT to a file: append operation invoked "
+                                   "on an immutable cap")
+
+                assert self.parentnode and self.name
+                return self.replace_me_with_a_child(req, self.client, replace)
+
         if t == "uri":
             if not replace:
                 raise ExistingChildError()
             assert self.parentnode and self.name
-            return self.replace_me_with_a_childcap(ctx, replace)
+            return self.replace_me_with_a_childcap(req, self.client, replace)
 
         raise WebError("PUT to a file: bad t=%s" % t)
 
@@ -239,12 +276,12 @@ class FileNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
             # POST /uri/path/dir?t=upload&name=foo . All have the same
             # behavior, we just ignore any name= argument
             if self.node.is_mutable():
-                d = self.replace_my_contents_with_a_formpost(ctx)
+                d = self.replace_my_contents_with_a_formpost(req)
             else:
                 if not replace:
                     raise ExistingChildError()
                 assert self.parentnode and self.name
-                d = self.replace_me_with_a_formpost(ctx, replace)
+                d = self.replace_me_with_a_formpost(req, self.client, replace)
         else:
             raise WebError("POST to file: bad t=%s" % t)
 
@@ -253,17 +290,21 @@ class FileNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
             d.addCallback(lambda res: url.URL.fromString(when_done))
         return d
 
+    def _maybe_literal(self, res, Results_Class):
+        if res:
+            return Results_Class(self.client, res)
+        return LiteralCheckResults(self.client)
+
     def _POST_check(self, req):
         verify = boolean_of_arg(get_arg(req, "verify", "false"))
         repair = boolean_of_arg(get_arg(req, "repair", "false"))
-        if isinstance(self.node, LiteralFileNode):
-            return defer.succeed(LiteralCheckerResults())
+        add_lease = boolean_of_arg(get_arg(req, "add-lease", "false"))
         if repair:
-            d = self.node.check_and_repair(Monitor(), verify)
-            d.addCallback(lambda res: CheckAndRepairResults(res))
+            d = self.node.check_and_repair(Monitor(), verify, add_lease)
+            d.addCallback(self._maybe_literal, CheckAndRepairResults)
         else:
-            d = self.node.check(Monitor(), verify)
-            d.addCallback(lambda res: CheckerResults(res))
+            d = self.node.check(Monitor(), verify, add_lease)
+            d.addCallback(self._maybe_literal, CheckResults)
         return d
 
     def render_DELETE(self, ctx):
@@ -272,65 +313,91 @@ class FileNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
         d.addCallback(lambda res: self.node.get_uri())
         return d
 
-    def replace_my_contents(self, ctx):
-        req = IRequest(ctx)
+    def replace_my_contents(self, req):
         req.content.seek(0)
-        new_contents = req.content.read()
+        new_contents = MutableFileHandle(req.content)
         d = self.node.overwrite(new_contents)
         d.addCallback(lambda res: self.node.get_uri())
         return d
 
-    def replace_my_contents_with_a_formpost(self, ctx):
+
+    def update_my_contents(self, req, offset):
+        req.content.seek(0)
+        added_contents = MutableFileHandle(req.content)
+
+        d = self.node.get_best_mutable_version()
+        d.addCallback(lambda mv:
+            mv.update(added_contents, offset))
+        d.addCallback(lambda ignored:
+            self.node.get_uri())
+        return d
+
+
+    def replace_my_contents_with_a_formpost(self, req):
         # we have a mutable file. Get the data from the formpost, and replace
         # the mutable file's contents with it.
-        req = IRequest(ctx)
-        new_contents = self._read_data_from_formpost(req)
+        new_contents = req.fields['file']
+        new_contents = MutableFileHandle(new_contents.file)
+
         d = self.node.overwrite(new_contents)
         d.addCallback(lambda res: self.node.get_uri())
         return d
 
-class MutableDownloadable:
-    #implements(IDownloadable)
-    def __init__(self, size, node):
-        self.size = size
-        self.node = node
-    def get_size(self):
-        return self.size
-    def is_mutable(self):
-        return True
-    def read(self, consumer, offset=0, size=None):
-        d = self.node.download_best_version()
-        d.addCallback(self._got_data, consumer, offset, size)
-        return d
-    def _got_data(self, contents, consumer, offset, size):
-        start = offset
-        if size is not None:
-            end = offset+size
-        else:
-            end = self.size
-        # SDMF: we can write the whole file in one big chunk
-        consumer.write(contents[start:end])
-        return consumer
-
-def makeMutableDownloadable(n):
-    d = defer.maybeDeferred(n.get_size_of_best_version)
-    d.addCallback(MutableDownloadable, n)
-    return d
 
 class FileDownloader(rend.Page):
-    # since we override the rendering process (to let the tahoe Downloader
-    # drive things), we must inherit from regular old twisted.web.resource
-    # instead of nevow.rend.Page . Nevow will use adapters to wrap a
-    # nevow.appserver.OldResourceAdapter around any
-    # twisted.web.resource.IResource that it is given. TODO: it looks like
-    # that wrapper would allow us to return a Deferred from render(), which
-    # might could simplify the implementation of WebDownloadTarget.
-
     def __init__(self, filenode, filename):
         rend.Page.__init__(self)
         self.filenode = filenode
         self.filename = filename
 
+    def parse_range_header(self, range):
+        # Parse a byte ranges according to RFC 2616 "14.35.1 Byte
+        # Ranges".  Returns None if the range doesn't make sense so it
+        # can be ignored (per the spec).  When successful, returns a
+        # list of (first,last) inclusive range tuples.
+
+        filesize = self.filenode.get_size()
+        assert isinstance(filesize, (int,long)), filesize
+
+        try:
+            # byte-ranges-specifier
+            units, rangeset = range.split('=', 1)
+            if units != 'bytes':
+                return None     # nothing else supported
+
+            def parse_range(r):
+                first, last = r.split('-', 1)
+
+                if first is '':
+                    # suffix-byte-range-spec
+                    first = filesize - long(last)
+                    last = filesize - 1
+                else:
+                    # byte-range-spec
+
+                    # first-byte-pos
+                    first = long(first)
+
+                    # last-byte-pos
+                    if last is '':
+                        last = filesize - 1
+                    else:
+                        last = long(last)
+
+                if last < first:
+                    raise ValueError
+
+                return (first, last)
+
+            # byte-range-set
+            #
+            # Note: the spec uses "1#" for the list of ranges, which
+            # implicitly allows whitespace around the ',' separators,
+            # so strip it.
+            return [ parse_range(r.strip()) for r in rangeset.split(',') ]
+        except ValueError:
+            return None
+
     def renderHTTP(self, ctx):
         req = IRequest(ctx)
         gte = static.getTypeAndEncoding
@@ -342,7 +409,6 @@ class FileDownloader(rend.Page):
         if encoding:
             req.setHeader("content-encoding", encoding)
 
-        save_to_filename = None
         if boolean_of_arg(get_arg(req, "save", "False")):
             # tell the browser to save the file rather display it we don't
             # try to encode the filename, instead we echo back the exact same
@@ -353,7 +419,7 @@ class FileDownloader(rend.Page):
 
         filesize = self.filenode.get_size()
         assert isinstance(filesize, (int,long)), filesize
-        offset, size = 0, None
+        first, size = 0, None
         contentsize = filesize
         req.setHeader("accept-ranges", "bytes")
         if not self.filenode.is_mutable():
@@ -367,24 +433,54 @@ class FileDownloader(rend.Page):
         # or maybe just use the URI for CHK and LIT.
         rangeheader = req.getHeader('range')
         if rangeheader:
-            # adapted from nevow.static.File
-            bytesrange = rangeheader.split('=')
-            if bytesrange[0] != 'bytes':
-                raise WebError("Syntactically invalid http range header!")
-            start, end = bytesrange[1].split('-')
-            if start:
-                offset = int(start)
-            if end:
-                size = int(end) - offset + 1
-            req.setResponseCode(http.PARTIAL_CONTENT)
-            req.setHeader('content-range',"bytes %s-%s/%s" %
-                          (str(offset), str(offset+size-1), str(filesize)))
-            contentsize = size
+            ranges = self.parse_range_header(rangeheader)
+
+            # ranges = None means the header didn't parse, so ignore
+            # the header as if it didn't exist.  If is more than one
+            # range, then just return the first for now, until we can
+            # generate multipart/byteranges.
+            if ranges is not None:
+                first, last = ranges[0]
+
+                if first >= filesize:
+                    raise WebError('First beyond end of file',
+                                   http.REQUESTED_RANGE_NOT_SATISFIABLE)
+                else:
+                    first = max(0, first)
+                    last = min(filesize-1, last)
+
+                    req.setResponseCode(http.PARTIAL_CONTENT)
+                    req.setHeader('content-range',"bytes %s-%s/%s" %
+                                  (str(first), str(last),
+                                   str(filesize)))
+                    contentsize = last - first + 1
+                    size = contentsize
+
         req.setHeader("content-length", str(contentsize))
         if req.method == "HEAD":
             return ""
-        d = self.filenode.read(req, offset, size)
+
+        # Twisted >=9.0 throws an error if we call req.finish() on a closed
+        # HTTP connection. It also has req.notifyFinish() to help avoid it.
+        finished = []
+        def _request_finished(ign):
+            finished.append(True)
+        if hasattr(req, "notifyFinish"):
+            req.notifyFinish().addBoth(_request_finished)
+
+        d = self.filenode.read(req, first, size)
+
+        def _finished(ign):
+            if not finished:
+                req.finish()
         def _error(f):
+            lp = log.msg("error during GET", facility="tahoe.webish", failure=f,
+                         level=log.UNUSUAL, umid="xSiF3w")
+            if finished:
+                log.msg("but it's too late to tell them", parent=lp,
+                        level=log.UNUSUAL, umid="j1xIbw")
+                return
+            req._tahoe_request_had_error = f # for HTTP-style logging
             if req.startedWriting:
                 # The content-type is already set, and the response code has
                 # already been sent, so we can't provide a clean error
@@ -396,36 +492,41 @@ class FileDownloader(rend.Page):
                 #
                 # We don't have a lot of options, unfortunately.
                 req.write("problem during download\n")
+                req.finish()
             else:
                 # We haven't written anything yet, so we can provide a
                 # sensible error message.
-                msg = str(f.type)
-                msg.replace("\n", "|")
-                req.setResponseCode(http.GONE, msg)
-                req.setHeader("content-type", "text/plain")
-                req.responseHeaders.setRawHeaders("content-encoding", [])
-                req.responseHeaders.setRawHeaders("content-disposition", [])
-                # TODO: HTML-formatted exception?
-                req.write(str(f))
-        d.addErrback(_error)
-        d.addBoth(lambda ign: req.finish())
+                eh = MyExceptionHandler()
+                eh.renderHTTP_exception(ctx, f)
+        d.addCallbacks(_finished, _error)
         return req.deferred
 
 
-def FileJSONMetadata(ctx, filenode):
-    if filenode.is_readonly():
-        rw_uri = None
-        ro_uri = filenode.get_uri()
-    else:
-        rw_uri = filenode.get_uri()
-        ro_uri = filenode.get_readonly_uri()
+def FileJSONMetadata(ctx, filenode, edge_metadata):
+    rw_uri = filenode.get_write_uri()
+    ro_uri = filenode.get_readonly_uri()
     data = ("filenode", {})
     data[1]['size'] = filenode.get_size()
     if ro_uri:
         data[1]['ro_uri'] = ro_uri
     if rw_uri:
         data[1]['rw_uri'] = rw_uri
+    verifycap = filenode.get_verify_cap()
+    if verifycap:
+        data[1]['verify_uri'] = verifycap.to_string()
     data[1]['mutable'] = filenode.is_mutable()
+    if edge_metadata is not None:
+        data[1]['metadata'] = edge_metadata
+
+    if filenode.is_mutable() and filenode.get_version() is not None:
+        mutable_type = filenode.get_version()
+        assert mutable_type in (MDMF_VERSION, SDMF_VERSION)
+        if mutable_type == MDMF_VERSION:
+            mutable_type = "mdmf"
+        else:
+            mutable_type = "sdmf"
+        data[1]['mutable-type'] = mutable_type
+
     return text_plain(simplejson.dumps(data, indent=1) + "\n", ctx)
 
 def FileURI(ctx, filenode):
@@ -438,4 +539,4 @@ def FileReadOnlyURI(ctx, filenode):
 
 class FileNodeDownloadHandler(FileNodeHandler):
     def childFactory(self, ctx, name):
-        return FileNodeDownloadHandler(self.node, name=name)
+        return FileNodeDownloadHandler(self.client, self.node, name=name)