From 32f80625c912be45ddc879b5bb780961f7392a99 Mon Sep 17 00:00:00 2001
From: Zooko O'Whielacronx <zooko@zooko.com>
Date: Mon, 12 Sep 2011 15:26:55 -0700
Subject: [PATCH] storage: more paranoid handling of bounds and palimpsests in
 mutable share files * storage server ignores requests to extend shares by
 sending a new_length * storage server fills exposed holes (created by sending
 a write vector whose offset begins after the end of the current data) with 0
 to avoid "palimpsest" exposure of previous contents * storage server zeroes
 out lease info at the old location when moving it to a new location ref.
 #1528

---
 src/allmydata/interfaces.py      | 31 +++++++++++++++++++++++------
 src/allmydata/storage/mutable.py | 34 ++++++++++++++++++++++++++------
 src/allmydata/storage/server.py  |  1 +
 3 files changed, 54 insertions(+), 12 deletions(-)

diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py
index 982c8d94..acea3b04 100644
--- a/src/allmydata/interfaces.py
+++ b/src/allmydata/interfaces.py
@@ -209,12 +209,31 @@ class RIStorageServer(RemoteInterface):
         necessary. A write vector applied to a share number that did not
         exist previously will cause that share to be created.
 
-        Each write vector is accompanied by a 'new_length' argument. If
-        new_length is not None, use it to set the size of the container. This
-        can be used to pre-allocate space for a series of upcoming writes, or
-        truncate existing data. If the container is growing, new_length will
-        be applied before datav. If the container is shrinking, it will be
-        applied afterwards. If new_length==0, the share will be deleted.
+        In Tahoe-LAFS v1.8.3 or later (except 1.9.0a1), if you send a write
+        vector whose offset is beyond the end of the current data, the space
+        between the end of the current data and the beginning of the write
+        vector will be filled with zero bytes. In earlier versions the
+        contents of this space was unspecified (and might end up containing
+        secrets).
+
+        Each write vector is accompanied by a 'new_length' argument, which
+        can be used to truncate the data. If new_length is not None and it is
+        less than the current size of the data (after applying all write
+        vectors), then the data will be truncated to new_length. If
+        new_length==0, the share will be deleted.
+
+        In Tahoe-LAFS v1.8.2 and earlier, new_length could also be used to
+        enlarge the file by sending a number larger than the size of the data
+        after applying all write vectors. That behavior was not used, and as
+        of Tahoe-LAFS v1.8.3 it no longer works and the new_length is ignored
+        in that case.
+
+        If a storage client can rely on a server being of version v1.8.3 or
+        later, it can extend the file efficiently by writing a single zero
+        byte just before the new end-of-file. Otherwise it must explicitly
+        write zeroes to all bytes between the old and new end-of-file. In any
+        case it should avoid sending new_length larger than the size of the
+        data after applying all write vectors.
 
         The read vector is used to extract data from all known shares,
         *before* any writes have been applied. The same vector is used for
diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py
index 447e6bcc..dc057db4 100644
--- a/src/allmydata/storage/mutable.py
+++ b/src/allmydata/storage/mutable.py
@@ -146,11 +146,19 @@ class MutableShareFile:
             return
         num_extra_leases = self._read_num_extra_leases(f)
         f.seek(old_extra_lease_offset)
-        extra_lease_data = f.read(4 + num_extra_leases * self.LEASE_SIZE)
+        leases_size = 4 + num_extra_leases * self.LEASE_SIZE
+        extra_lease_data = f.read(leases_size)
+
+        # Zero out the old lease info (in order to minimize the chance that
+        # it could accidentally be exposed to a reader later, re #1528).
+        f.seek(old_extra_lease_offset)
+        f.write('\x00' * leases_size)
+        f.flush()
+
+        # An interrupt here will corrupt the leases.
+
         f.seek(new_extra_lease_offset)
         f.write(extra_lease_data)
-        # an interrupt here will corrupt the leases, iff the move caused the
-        # extra leases to overlap.
         self._write_extra_lease_offset(f, new_extra_lease_offset)
 
     def _write_share_data(self, f, offset, data):
@@ -161,7 +169,11 @@ class MutableShareFile:
 
         if offset+length >= data_length:
             # They are expanding their data size.
+
             if self.DATA_OFFSET+offset+length > extra_lease_offset:
+                # TODO: allow containers to shrink. For now, they remain
+                # large.
+
                 # Their new data won't fit in the current container, so we
                 # have to move the leases. With luck, they're expanding it
                 # more than the size of the extra lease block, which will
@@ -175,6 +187,13 @@ class MutableShareFile:
             assert self.DATA_OFFSET+offset+length <= extra_lease_offset
             # Their data now fits in the current container. We must write
             # their new data and modify the recorded data size.
+
+            # Fill any newly exposed empty space with 0's.
+            if offset > data_length:
+                f.seek(self.DATA_OFFSET+data_length)
+                f.write('\x00'*(offset - data_length))
+                f.flush()
+
             new_data_length = offset+length
             self._write_data_length(f, new_data_length)
             # an interrupt here will result in a corrupted share
@@ -398,9 +417,12 @@ class MutableShareFile:
         for (offset, data) in datav:
             self._write_share_data(f, offset, data)
         if new_length is not None:
-            self._change_container_size(f, new_length)
-            f.seek(self.DATA_LENGTH_OFFSET)
-            f.write(struct.pack(">Q", new_length))
+            cur_length = self._read_data_length(f)
+            if new_length < cur_length:
+                self._write_data_length(f, new_length)
+                # TODO: if we're going to shrink the share file when the
+                # share data has shrunk, then call
+                # self._change_container_size() here.
         f.close()
 
 def testv_compare(a, op, b):
diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py
index 7dd3cb47..1f39c9ca 100644
--- a/src/allmydata/storage/server.py
+++ b/src/allmydata/storage/server.py
@@ -222,6 +222,7 @@ class StorageServer(service.MultiService, Referenceable):
                     { "maximum-immutable-share-size": remaining_space,
                       "tolerates-immutable-read-overrun": True,
                       "delete-mutable-shares-with-zero-length-writev": True,
+                      "fills-holes-with-zero-bytes": True,
                       "prevents-read-past-end-of-share-data": True,
                       },
                     "application-version": str(allmydata.__full_version__),
-- 
2.45.2