storage: more paranoid handling of bounds and palimpsests in mutable share files

author Zooko O'Whielacronx <zooko@zooko.com>

Mon, 12 Sep 2011 22:26:55 +0000 (15:26 -0700)

committer Zooko O'Whielacronx <zooko@zooko.com>

Mon, 12 Sep 2011 22:26:55 +0000 (15:26 -0700)
author Zooko O'Whielacronx <zooko@zooko.com>
Mon, 12 Sep 2011 22:26:55 +0000 (15:26 -0700)
committer Zooko O'Whielacronx <zooko@zooko.com>
Mon, 12 Sep 2011 22:26:55 +0000 (15:26 -0700)
diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py

index 982c8d94634e91c62af13d7ae8f2d1d214d2294f..acea3b04839942636b231e123b15348c53e273de 100644 (file)
--- a/src/allmydata/interfaces.py
+++ b/src/allmydata/interfaces.py
@@ -209,12 +209,31 @@ class RIStorageServer(RemoteInterface):
          necessary. A write vector applied to a share number that did not
          exist previously will cause that share to be created.
  
-        Each write vector is accompanied by a 'new_length' argument. If
-        new_length is not None, use it to set the size of the container. This
-        can be used to pre-allocate space for a series of upcoming writes, or
-        truncate existing data. If the container is growing, new_length will
-        be applied before datav. If the container is shrinking, it will be
-        applied afterwards. If new_length==0, the share will be deleted.
+        In Tahoe-LAFS v1.8.3 or later (except 1.9.0a1), if you send a write
+        vector whose offset is beyond the end of the current data, the space
+        between the end of the current data and the beginning of the write
+        vector will be filled with zero bytes. In earlier versions the
+        contents of this space was unspecified (and might end up containing
+        secrets).
+
+        Each write vector is accompanied by a 'new_length' argument, which
+        can be used to truncate the data. If new_length is not None and it is
+        less than the current size of the data (after applying all write
+        vectors), then the data will be truncated to new_length. If
+        new_length==0, the share will be deleted.
+
+        In Tahoe-LAFS v1.8.2 and earlier, new_length could also be used to
+        enlarge the file by sending a number larger than the size of the data
+        after applying all write vectors. That behavior was not used, and as
+        of Tahoe-LAFS v1.8.3 it no longer works and the new_length is ignored
+        in that case.
+
+        If a storage client can rely on a server being of version v1.8.3 or
+        later, it can extend the file efficiently by writing a single zero
+        byte just before the new end-of-file. Otherwise it must explicitly
+        write zeroes to all bytes between the old and new end-of-file. In any
+        case it should avoid sending new_length larger than the size of the
+        data after applying all write vectors.
  
          The read vector is used to extract data from all known shares,
          *before* any writes have been applied. The same vector is used for
diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py

index 447e6bcc2c8172b0024156f2a58f439db9590e0b..dc057db442e0c7550ab8379d6875a5ae1d98aaa2 100644 (file)
--- a/src/allmydata/storage/mutable.py
+++ b/src/allmydata/storage/mutable.py
@@ -146,11 +146,19 @@ class MutableShareFile:
              return
          num_extra_leases = self._read_num_extra_leases(f)
          f.seek(old_extra_lease_offset)
-        extra_lease_data = f.read(4 + num_extra_leases * self.LEASE_SIZE)
+        leases_size = 4 + num_extra_leases * self.LEASE_SIZE
+        extra_lease_data = f.read(leases_size)
+
+        # Zero out the old lease info (in order to minimize the chance that
+        # it could accidentally be exposed to a reader later, re #1528).
+        f.seek(old_extra_lease_offset)
+        f.write('\x00' * leases_size)
+        f.flush()
+
+        # An interrupt here will corrupt the leases.
+
          f.seek(new_extra_lease_offset)
          f.write(extra_lease_data)
-        # an interrupt here will corrupt the leases, iff the move caused the
-        # extra leases to overlap.
          self._write_extra_lease_offset(f, new_extra_lease_offset)
  
      def _write_share_data(self, f, offset, data):
@@ -161,7 +169,11 @@ class MutableShareFile:
  
          if offset+length >= data_length:
              # They are expanding their data size.
+
              if self.DATA_OFFSET+offset+length > extra_lease_offset:
+                # TODO: allow containers to shrink. For now, they remain
+                # large.
+
                  # Their new data won't fit in the current container, so we
                  # have to move the leases. With luck, they're expanding it
                  # more than the size of the extra lease block, which will
@@ -175,6 +187,13 @@ class MutableShareFile:
              assert self.DATA_OFFSET+offset+length <= extra_lease_offset
              # Their data now fits in the current container. We must write
              # their new data and modify the recorded data size.
+
+            # Fill any newly exposed empty space with 0's.
+            if offset > data_length:
+                f.seek(self.DATA_OFFSET+data_length)
+                f.write('\x00'*(offset - data_length))
+                f.flush()
+
              new_data_length = offset+length
              self._write_data_length(f, new_data_length)
              # an interrupt here will result in a corrupted share
@@ -398,9 +417,12 @@ class MutableShareFile:
          for (offset, data) in datav:
              self._write_share_data(f, offset, data)
          if new_length is not None:
-            self._change_container_size(f, new_length)
-            f.seek(self.DATA_LENGTH_OFFSET)
-            f.write(struct.pack(">Q", new_length))
+            cur_length = self._read_data_length(f)
+            if new_length < cur_length:
+                self._write_data_length(f, new_length)
+                # TODO: if we're going to shrink the share file when the
+                # share data has shrunk, then call
+                # self._change_container_size() here.
          f.close()
  
  def testv_compare(a, op, b):
diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py

index 7dd3cb4722b3036697ce5f733acbaafd1c85c711..1f39c9ca14dd10c4010c942c2c661ef2321b0238 100644 (file)
--- a/src/allmydata/storage/server.py
+++ b/src/allmydata/storage/server.py
@@ -222,6 +222,7 @@ class StorageServer(service.MultiService, Referenceable):
                      { "maximum-immutable-share-size": remaining_space,
                        "tolerates-immutable-read-overrun": True,
                        "delete-mutable-shares-with-zero-length-writev": True,
+                      "fills-holes-with-zero-bytes": True,
                        "prevents-read-past-end-of-share-data": True,
                        },
                      "application-version": str(allmydata.__full_version__),
author	Zooko O'Whielacronx <zooko@zooko.com>
	Mon, 12 Sep 2011 22:26:55 +0000 (15:26 -0700)
committer	Zooko O'Whielacronx <zooko@zooko.com>
	Mon, 12 Sep 2011 22:26:55 +0000 (15:26 -0700)
src/allmydata/interfaces.py		patch \| blob \| history
src/allmydata/storage/mutable.py		patch \| blob \| history
src/allmydata/storage/server.py		patch \| blob \| history