]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blobdiff - src/allmydata/web/filenode.py
Implementation, tests and docs for blacklists. This version allows listing directorie...
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / web / filenode.py
index 9fd44022a61404a2ac843140a2a0bb313676623b..ef8d3bae0a21ecd42e996479b4e211c86d448b05 100644 (file)
@@ -6,28 +6,35 @@ from twisted.internet import defer
 from nevow import url, rend
 from nevow.inevow import IRequest
 
-from allmydata.interfaces import ExistingChildError, CannotPackUnknownNodeError
+from allmydata.interfaces import ExistingChildError, SDMF_VERSION, MDMF_VERSION
 from allmydata.monitor import Monitor
 from allmydata.immutable.upload import FileHandle
-from allmydata.unknown import UnknownNode
+from allmydata.mutable.publish import MutableFileHandle
+from allmydata.mutable.common import MODE_READ
 from allmydata.util import log, base32
+from allmydata.util.encodingutil import quote_output
+from allmydata.blacklist import FileProhibited, ProhibitedNode
 
 from allmydata.web.common import text_plain, WebError, RenderMixin, \
      boolean_of_arg, get_arg, should_create_intermediate_directories, \
-     MyExceptionHandler, parse_replace_arg
+     MyExceptionHandler, parse_replace_arg, parse_offset_arg, \
+     parse_mutable_type_arg
 from allmydata.web.check_results import CheckResults, \
      CheckAndRepairResults, LiteralCheckResults
 from allmydata.web.info import MoreInfo
 
 class ReplaceMeMixin:
-
     def replace_me_with_a_child(self, req, client, replace):
         # a new file is being uploaded in our place.
         mutable = boolean_of_arg(get_arg(req, "mutable", "false"))
         if mutable:
-            req.content.seek(0)
-            data = req.content.read()
-            d = client.create_mutable_file(data)
+            arg = get_arg(req, "mutable-type", None)
+            mutable_type = parse_mutable_type_arg(arg)
+            if mutable_type is "invalid":
+                raise WebError("Unknown type: %s" % arg, http.BAD_REQUEST)
+
+            data = MutableFileHandle(req.content)
+            d = client.create_mutable_file(data, version=mutable_type)
             def _uploaded(newnode):
                 d2 = self.parentnode.set_node(self.name, newnode,
                                               overwrite=replace)
@@ -40,7 +47,7 @@ class ReplaceMeMixin:
                                          overwrite=replace)
         def _done(filenode):
             log.msg("webish upload complete",
-                    facility="tahoe.webish", level=log.NOISY)
+                    facility="tahoe.webish", level=log.NOISY, umid="TCjBGQ")
             if self.node:
                 # we've replaced an existing file (or modified a mutable
                 # file), so the response code is 200
@@ -55,33 +62,25 @@ class ReplaceMeMixin:
     def replace_me_with_a_childcap(self, req, client, replace):
         req.content.seek(0)
         childcap = req.content.read()
-        childnode = client.create_node_from_uri(childcap, childcap+"readonly")
-        if isinstance(childnode, UnknownNode):
-            # don't be willing to pack unknown nodes: we might accidentally
-            # put some write-authority into the rocap slot because we don't
-            # know how to diminish the URI they gave us. We don't even know
-            # if they gave us a readcap or a writecap.
-            msg = "cannot attach unknown node as child %s" % str(self.name)
-            raise CannotPackUnknownNodeError(msg)
+        childnode = client.create_node_from_uri(childcap, None, name=self.name)
         d = self.parentnode.set_node(self.name, childnode, overwrite=replace)
         d.addCallback(lambda res: childnode.get_uri())
         return d
 
-    def _read_data_from_formpost(self, req):
-        # SDMF: files are small, and we can only upload data, so we read
-        # the whole file into memory before uploading.
-        contents = req.fields["file"]
-        contents.file.seek(0)
-        data = contents.file.read()
-        return data
 
     def replace_me_with_a_formpost(self, req, client, replace):
         # create a new file, maybe mutable, maybe immutable
         mutable = boolean_of_arg(get_arg(req, "mutable", "false"))
 
+        # create an immutable file
+        contents = req.fields["file"]
         if mutable:
-            data = self._read_data_from_formpost(req)
-            d = client.create_mutable_file(data)
+            arg = get_arg(req, "mutable-type", None)
+            mutable_type = parse_mutable_type_arg(arg)
+            if mutable_type is "invalid":
+                raise WebError("Unknown type: %s" % arg, http.BAD_REQUEST)
+            uploadable = MutableFileHandle(contents.file)
+            d = client.create_mutable_file(uploadable, version=mutable_type)
             def _uploaded(newnode):
                 d2 = self.parentnode.set_node(self.name, newnode,
                                               overwrite=replace)
@@ -89,13 +88,13 @@ class ReplaceMeMixin:
                 return d2
             d.addCallback(_uploaded)
             return d
-        # create an immutable file
-        contents = req.fields["file"]
+
         uploadable = FileHandle(contents.file, convergence=client.convergence)
         d = self.parentnode.add_file(self.name, uploadable, overwrite=replace)
         d.addCallback(lambda newnode: newnode.get_uri())
         return d
 
+
 class PlaceHolderNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
     def __init__(self, client, parentnode, name):
         rend.Page.__init__(self)
@@ -155,11 +154,13 @@ class FileNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
 
     def childFactory(self, ctx, name):
         req = IRequest(ctx)
+        if isinstance(self.node, ProhibitedNode):
+            raise FileProhibited(self.node.reason)
         if should_create_intermediate_directories(req):
-            raise WebError("Cannot create directory '%s', because its "
-                           "parent is a file, not a directory" % name)
-        raise WebError("Files have no children, certainly not named '%s'"
-                       % name)
+            raise WebError("Cannot create directory %s, because its "
+                           "parent is a file, not a directory" % quote_output(name, encoding='utf-8'))
+        raise WebError("Files have no children, certainly not named %s"
+                       % quote_output(name, encoding='utf-8'))
 
     def render_GET(self, ctx):
         req = IRequest(ctx)
@@ -178,18 +179,27 @@ class FileNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
             # properly. So we assume that at least the browser will agree
             # with itself, and echo back the same bytes that we were given.
             filename = get_arg(req, "filename", self.name) or "unknown"
-            if self.node.is_mutable():
-                # some day: d = self.node.get_best_version()
-                d = makeMutableDownloadable(self.node)
-            else:
-                d = defer.succeed(self.node)
+            d = self.node.get_best_readable_version()
             d.addCallback(lambda dn: FileDownloader(dn, filename))
             return d
         if t == "json":
-            if self.parentnode and self.name:
-                d = self.parentnode.get_metadata_for(self.name)
+            # We do this to make sure that fields like size and
+            # mutable-type (which depend on the file on the grid and not
+            # just on the cap) are filled in. The latter gets used in
+            # tests, in particular.
+            #
+            # TODO: Make it so that the servermap knows how to update in
+            # a mode specifically designed to fill in these fields, and
+            # then update it in that mode.
+            if self.node.is_mutable():
+                d = self.node.get_servermap(MODE_READ)
             else:
                 d = defer.succeed(None)
+            if self.parentnode and self.name:
+                d.addCallback(lambda ignored:
+                    self.parentnode.get_metadata_for(self.name))
+            else:
+                d.addCallback(lambda ignored: None)
             d.addCallback(lambda md: FileJSONMetadata(ctx, self.node, md))
             return d
         if t == "info":
@@ -206,11 +216,7 @@ class FileNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
         if t:
             raise WebError("GET file: bad t=%s" % t)
         filename = get_arg(req, "filename", self.name) or "unknown"
-        if self.node.is_mutable():
-            # some day: d = self.node.get_best_version()
-            d = makeMutableDownloadable(self.node)
-        else:
-            d = defer.succeed(self.node)
+        d = self.node.get_best_readable_version()
         d.addCallback(lambda dn: FileDownloader(dn, filename))
         return d
 
@@ -218,17 +224,37 @@ class FileNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
         req = IRequest(ctx)
         t = get_arg(req, "t", "").strip()
         replace = parse_replace_arg(get_arg(req, "replace", "true"))
+        offset = parse_offset_arg(get_arg(req, "offset", None))
 
         if not t:
-            if self.node.is_mutable():
-                return self.replace_my_contents(req)
             if not replace:
                 # this is the early trap: if someone else modifies the
                 # directory while we're uploading, the add_file(overwrite=)
                 # call in replace_me_with_a_child will do the late trap.
                 raise ExistingChildError()
-            assert self.parentnode and self.name
-            return self.replace_me_with_a_child(req, self.client, replace)
+
+            if self.node.is_mutable():
+                # Are we a readonly filenode? We shouldn't allow callers
+                # to try to replace us if we are.
+                if self.node.is_readonly():
+                    raise WebError("PUT to a mutable file: replace or update"
+                                   " requested with read-only cap")
+                if offset is None:
+                    return self.replace_my_contents(req)
+
+                if offset >= 0:
+                    return self.update_my_contents(req, offset)
+
+                raise WebError("PUT to a mutable file: Invalid offset")
+
+            else:
+                if offset is not None:
+                    raise WebError("PUT to a file: append operation invoked "
+                                   "on an immutable cap")
+
+                assert self.parentnode and self.name
+                return self.replace_me_with_a_child(req, self.client, replace)
+
         if t == "uri":
             if not replace:
                 raise ExistingChildError()
@@ -289,61 +315,89 @@ class FileNodeHandler(RenderMixin, rend.Page, ReplaceMeMixin):
 
     def replace_my_contents(self, req):
         req.content.seek(0)
-        new_contents = req.content.read()
+        new_contents = MutableFileHandle(req.content)
         d = self.node.overwrite(new_contents)
         d.addCallback(lambda res: self.node.get_uri())
         return d
 
+
+    def update_my_contents(self, req, offset):
+        req.content.seek(0)
+        added_contents = MutableFileHandle(req.content)
+
+        d = self.node.get_best_mutable_version()
+        d.addCallback(lambda mv:
+            mv.update(added_contents, offset))
+        d.addCallback(lambda ignored:
+            self.node.get_uri())
+        return d
+
+
     def replace_my_contents_with_a_formpost(self, req):
         # we have a mutable file. Get the data from the formpost, and replace
         # the mutable file's contents with it.
-        new_contents = self._read_data_from_formpost(req)
+        new_contents = req.fields['file']
+        new_contents = MutableFileHandle(new_contents.file)
+
         d = self.node.overwrite(new_contents)
         d.addCallback(lambda res: self.node.get_uri())
         return d
 
-class MutableDownloadable:
-    #implements(IDownloadable)
-    def __init__(self, size, node):
-        self.size = size
-        self.node = node
-    def get_size(self):
-        return self.size
-    def is_mutable(self):
-        return True
-    def read(self, consumer, offset=0, size=None):
-        d = self.node.download_best_version()
-        d.addCallback(self._got_data, consumer, offset, size)
-        return d
-    def _got_data(self, contents, consumer, offset, size):
-        start = offset
-        if size is not None:
-            end = offset+size
-        else:
-            end = self.size
-        # SDMF: we can write the whole file in one big chunk
-        consumer.write(contents[start:end])
-        return consumer
-
-def makeMutableDownloadable(n):
-    d = defer.maybeDeferred(n.get_size_of_best_version)
-    d.addCallback(MutableDownloadable, n)
-    return d
 
 class FileDownloader(rend.Page):
-    # since we override the rendering process (to let the tahoe Downloader
-    # drive things), we must inherit from regular old twisted.web.resource
-    # instead of nevow.rend.Page . Nevow will use adapters to wrap a
-    # nevow.appserver.OldResourceAdapter around any
-    # twisted.web.resource.IResource that it is given. TODO: it looks like
-    # that wrapper would allow us to return a Deferred from render(), which
-    # might could simplify the implementation of WebDownloadTarget.
-
     def __init__(self, filenode, filename):
         rend.Page.__init__(self)
         self.filenode = filenode
         self.filename = filename
 
+    def parse_range_header(self, range):
+        # Parse a byte ranges according to RFC 2616 "14.35.1 Byte
+        # Ranges".  Returns None if the range doesn't make sense so it
+        # can be ignored (per the spec).  When successful, returns a
+        # list of (first,last) inclusive range tuples.
+
+        filesize = self.filenode.get_size()
+        assert isinstance(filesize, (int,long)), filesize
+
+        try:
+            # byte-ranges-specifier
+            units, rangeset = range.split('=', 1)
+            if units != 'bytes':
+                return None     # nothing else supported
+
+            def parse_range(r):
+                first, last = r.split('-', 1)
+
+                if first is '':
+                    # suffix-byte-range-spec
+                    first = filesize - long(last)
+                    last = filesize - 1
+                else:
+                    # byte-range-spec
+
+                    # first-byte-pos
+                    first = long(first)
+
+                    # last-byte-pos
+                    if last is '':
+                        last = filesize - 1
+                    else:
+                        last = long(last)
+
+                if last < first:
+                    raise ValueError
+
+                return (first, last)
+
+            # byte-range-set
+            #
+            # Note: the spec uses "1#" for the list of ranges, which
+            # implicitly allows whitespace around the ',' separators,
+            # so strip it.
+            return [ parse_range(r.strip()) for r in rangeset.split(',') ]
+        except ValueError:
+            return None
+
     def renderHTTP(self, ctx):
         req = IRequest(ctx)
         gte = static.getTypeAndEncoding
@@ -355,7 +409,6 @@ class FileDownloader(rend.Page):
         if encoding:
             req.setHeader("content-encoding", encoding)
 
-        save_to_filename = None
         if boolean_of_arg(get_arg(req, "save", "False")):
             # tell the browser to save the file rather display it we don't
             # try to encode the filename, instead we echo back the exact same
@@ -366,7 +419,7 @@ class FileDownloader(rend.Page):
 
         filesize = self.filenode.get_size()
         assert isinstance(filesize, (int,long)), filesize
-        offset, size = 0, None
+        first, size = 0, None
         contentsize = filesize
         req.setHeader("accept-ranges", "bytes")
         if not self.filenode.is_mutable():
@@ -380,31 +433,54 @@ class FileDownloader(rend.Page):
         # or maybe just use the URI for CHK and LIT.
         rangeheader = req.getHeader('range')
         if rangeheader:
-            # adapted from nevow.static.File
-            bytesrange = rangeheader.split('=')
-            if bytesrange[0] != 'bytes':
-                raise WebError("Syntactically invalid http range header!")
-            start, end = bytesrange[1].split('-')
-            if start:
-                offset = int(start)
-                if not end:
-                    # RFC 2616 says:
-                    #
-                    # "If the last-byte-pos value is absent, or if the value is
-                    # greater than or equal to the current length of the
-                    # entity-body, last-byte-pos is taken to be equal to one less
-                    # than the current length of the entity- body in bytes."
-                    end = filesize - 1
-                size = int(end) - offset + 1
-            req.setResponseCode(http.PARTIAL_CONTENT)
-            req.setHeader('content-range',"bytes %s-%s/%s" %
-                          (str(offset), str(offset+size-1), str(filesize)))
-            contentsize = size
+            ranges = self.parse_range_header(rangeheader)
+
+            # ranges = None means the header didn't parse, so ignore
+            # the header as if it didn't exist.  If is more than one
+            # range, then just return the first for now, until we can
+            # generate multipart/byteranges.
+            if ranges is not None:
+                first, last = ranges[0]
+
+                if first >= filesize:
+                    raise WebError('First beyond end of file',
+                                   http.REQUESTED_RANGE_NOT_SATISFIABLE)
+                else:
+                    first = max(0, first)
+                    last = min(filesize-1, last)
+
+                    req.setResponseCode(http.PARTIAL_CONTENT)
+                    req.setHeader('content-range',"bytes %s-%s/%s" %
+                                  (str(first), str(last),
+                                   str(filesize)))
+                    contentsize = last - first + 1
+                    size = contentsize
+
         req.setHeader("content-length", str(contentsize))
         if req.method == "HEAD":
             return ""
-        d = self.filenode.read(req, offset, size)
+
+        # Twisted >=9.0 throws an error if we call req.finish() on a closed
+        # HTTP connection. It also has req.notifyFinish() to help avoid it.
+        finished = []
+        def _request_finished(ign):
+            finished.append(True)
+        if hasattr(req, "notifyFinish"):
+            req.notifyFinish().addBoth(_request_finished)
+
+        d = self.filenode.read(req, first, size)
+
+        def _finished(ign):
+            if not finished:
+                req.finish()
         def _error(f):
+            lp = log.msg("error during GET", facility="tahoe.webish", failure=f,
+                         level=log.UNUSUAL, umid="xSiF3w")
+            if finished:
+                log.msg("but it's too late to tell them", parent=lp,
+                        level=log.UNUSUAL, umid="j1xIbw")
+                return
+            req._tahoe_request_had_error = f # for HTTP-style logging
             if req.startedWriting:
                 # The content-type is already set, and the response code has
                 # already been sent, so we can't provide a clean error
@@ -422,17 +498,13 @@ class FileDownloader(rend.Page):
                 # sensible error message.
                 eh = MyExceptionHandler()
                 eh.renderHTTP_exception(ctx, f)
-        d.addCallbacks(lambda ign: req.finish(), _error)
+        d.addCallbacks(_finished, _error)
         return req.deferred
 
 
 def FileJSONMetadata(ctx, filenode, edge_metadata):
-    if filenode.is_readonly():
-        rw_uri = None
-        ro_uri = filenode.get_uri()
-    else:
-        rw_uri = filenode.get_uri()
-        ro_uri = filenode.get_readonly_uri()
+    rw_uri = filenode.get_write_uri()
+    ro_uri = filenode.get_readonly_uri()
     data = ("filenode", {})
     data[1]['size'] = filenode.get_size()
     if ro_uri:
@@ -445,6 +517,16 @@ def FileJSONMetadata(ctx, filenode, edge_metadata):
     data[1]['mutable'] = filenode.is_mutable()
     if edge_metadata is not None:
         data[1]['metadata'] = edge_metadata
+
+    if filenode.is_mutable() and filenode.get_version() is not None:
+        mutable_type = filenode.get_version()
+        assert mutable_type in (MDMF_VERSION, SDMF_VERSION)
+        if mutable_type == MDMF_VERSION:
+            mutable_type = "mdmf"
+        else:
+            mutable_type = "sdmf"
+        data[1]['mutable-type'] = mutable_type
+
     return text_plain(simplejson.dumps(data, indent=1) + "\n", ctx)
 
 def FileURI(ctx, filenode):