From: Zooko O'Whielacronx Date: Mon, 12 Sep 2011 22:26:55 +0000 (-0700) Subject: storage: more paranoid handling of bounds and palimpsests in mutable share files X-Git-Tag: allmydata-tahoe-1.9.0a2~43 X-Git-Url: https://git.rkrishnan.org/?p=tahoe-lafs%2Ftahoe-lafs.git;a=commitdiff_plain;h=32f80625c912be45ddc879b5bb780961f7392a99 storage: more paranoid handling of bounds and palimpsests in mutable share files * storage server ignores requests to extend shares by sending a new_length * storage server fills exposed holes (created by sending a write vector whose offset begins after the end of the current data) with 0 to avoid "palimpsest" exposure of previous contents * storage server zeroes out lease info at the old location when moving it to a new location ref. #1528 --- diff --git a/src/allmydata/interfaces.py b/src/allmydata/interfaces.py index 982c8d94..acea3b04 100644 --- a/src/allmydata/interfaces.py +++ b/src/allmydata/interfaces.py @@ -209,12 +209,31 @@ class RIStorageServer(RemoteInterface): necessary. A write vector applied to a share number that did not exist previously will cause that share to be created. - Each write vector is accompanied by a 'new_length' argument. If - new_length is not None, use it to set the size of the container. This - can be used to pre-allocate space for a series of upcoming writes, or - truncate existing data. If the container is growing, new_length will - be applied before datav. If the container is shrinking, it will be - applied afterwards. If new_length==0, the share will be deleted. + In Tahoe-LAFS v1.8.3 or later (except 1.9.0a1), if you send a write + vector whose offset is beyond the end of the current data, the space + between the end of the current data and the beginning of the write + vector will be filled with zero bytes. In earlier versions the + contents of this space was unspecified (and might end up containing + secrets). + + Each write vector is accompanied by a 'new_length' argument, which + can be used to truncate the data. If new_length is not None and it is + less than the current size of the data (after applying all write + vectors), then the data will be truncated to new_length. If + new_length==0, the share will be deleted. + + In Tahoe-LAFS v1.8.2 and earlier, new_length could also be used to + enlarge the file by sending a number larger than the size of the data + after applying all write vectors. That behavior was not used, and as + of Tahoe-LAFS v1.8.3 it no longer works and the new_length is ignored + in that case. + + If a storage client can rely on a server being of version v1.8.3 or + later, it can extend the file efficiently by writing a single zero + byte just before the new end-of-file. Otherwise it must explicitly + write zeroes to all bytes between the old and new end-of-file. In any + case it should avoid sending new_length larger than the size of the + data after applying all write vectors. The read vector is used to extract data from all known shares, *before* any writes have been applied. The same vector is used for diff --git a/src/allmydata/storage/mutable.py b/src/allmydata/storage/mutable.py index 447e6bcc..dc057db4 100644 --- a/src/allmydata/storage/mutable.py +++ b/src/allmydata/storage/mutable.py @@ -146,11 +146,19 @@ class MutableShareFile: return num_extra_leases = self._read_num_extra_leases(f) f.seek(old_extra_lease_offset) - extra_lease_data = f.read(4 + num_extra_leases * self.LEASE_SIZE) + leases_size = 4 + num_extra_leases * self.LEASE_SIZE + extra_lease_data = f.read(leases_size) + + # Zero out the old lease info (in order to minimize the chance that + # it could accidentally be exposed to a reader later, re #1528). + f.seek(old_extra_lease_offset) + f.write('\x00' * leases_size) + f.flush() + + # An interrupt here will corrupt the leases. + f.seek(new_extra_lease_offset) f.write(extra_lease_data) - # an interrupt here will corrupt the leases, iff the move caused the - # extra leases to overlap. self._write_extra_lease_offset(f, new_extra_lease_offset) def _write_share_data(self, f, offset, data): @@ -161,7 +169,11 @@ class MutableShareFile: if offset+length >= data_length: # They are expanding their data size. + if self.DATA_OFFSET+offset+length > extra_lease_offset: + # TODO: allow containers to shrink. For now, they remain + # large. + # Their new data won't fit in the current container, so we # have to move the leases. With luck, they're expanding it # more than the size of the extra lease block, which will @@ -175,6 +187,13 @@ class MutableShareFile: assert self.DATA_OFFSET+offset+length <= extra_lease_offset # Their data now fits in the current container. We must write # their new data and modify the recorded data size. + + # Fill any newly exposed empty space with 0's. + if offset > data_length: + f.seek(self.DATA_OFFSET+data_length) + f.write('\x00'*(offset - data_length)) + f.flush() + new_data_length = offset+length self._write_data_length(f, new_data_length) # an interrupt here will result in a corrupted share @@ -398,9 +417,12 @@ class MutableShareFile: for (offset, data) in datav: self._write_share_data(f, offset, data) if new_length is not None: - self._change_container_size(f, new_length) - f.seek(self.DATA_LENGTH_OFFSET) - f.write(struct.pack(">Q", new_length)) + cur_length = self._read_data_length(f) + if new_length < cur_length: + self._write_data_length(f, new_length) + # TODO: if we're going to shrink the share file when the + # share data has shrunk, then call + # self._change_container_size() here. f.close() def testv_compare(a, op, b): diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index 7dd3cb47..1f39c9ca 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -222,6 +222,7 @@ class StorageServer(service.MultiService, Referenceable): { "maximum-immutable-share-size": remaining_space, "tolerates-immutable-read-overrun": True, "delete-mutable-shares-with-zero-length-writev": True, + "fills-holes-with-zero-bytes": True, "prevents-read-past-end-of-share-data": True, }, "application-version": str(allmydata.__full_version__),