immutable: stop reading past the end of the sharefile in the process of optimizing...

author Zooko O'Whielacronx <zooko@zooko.com>

Mon, 5 Jan 2009 20:40:57 +0000 (13:40 -0700)

committer Zooko O'Whielacronx <zooko@zooko.com>

Mon, 5 Jan 2009 20:40:57 +0000 (13:40 -0700)
author Zooko O'Whielacronx <zooko@zooko.com>
Mon, 5 Jan 2009 20:40:57 +0000 (13:40 -0700)
committer Zooko O'Whielacronx <zooko@zooko.com>
Mon, 5 Jan 2009 20:40:57 +0000 (13:40 -0700)
diff --git a/src/allmydata/immutable/layout.py b/src/allmydata/immutable/layout.py

index f64b388fe9fab095a7f708d3e6b22e87b1b31735..07d1131f77eacc2dcbe570c2bf4189f81afcfb15 100644 (file)
--- a/src/allmydata/immutable/layout.py
+++ b/src/allmydata/immutable/layout.py
@@ -285,8 +285,10 @@ class ReadBucketProxy:
          # TODO: for small shares, read the whole bucket in _start()
          d = self._fetch_header()
          d.addCallback(self._parse_offsets)
-        d.addCallback(self._fetch_sharehashtree_and_ueb)
-        d.addCallback(self._parse_sharehashtree_and_ueb)
+        # XXX The following two callbacks implement a slightly faster/nicer way to get the ueb
+        # and sharehashtree, but it requires that the storage server be >= v1.3.0.
+        # d.addCallback(self._fetch_sharehashtree_and_ueb)
+        # d.addCallback(self._parse_sharehashtree_and_ueb)
          def _fail_waiters(f):
              self._ready.fire(f)
          def _notify_waiters(result):
@@ -396,6 +398,10 @@ class ReadBucketProxy:
              return defer.succeed([])
  
      def _get_share_hashes(self, unused=None):
+        if hasattr(self, '_share_hashes'):
+            return self._share_hashes
+        else:
+            return self._get_share_hashes_the_old_way()
          return self._share_hashes
  
      def get_share_hashes(self):
@@ -403,8 +409,48 @@ class ReadBucketProxy:
          d.addCallback(self._get_share_hashes)
          return d
  
+    def _get_share_hashes_the_old_way(self):
+        """ Tahoe storage servers < v1.3.0 would return an error if you tried to read past the
+        end of the share, so we need to use the offset and read just that much."""
+        offset = self._offsets['share_hashes']
+        size = self._offsets['uri_extension'] - offset
+        assert size % (2+HASH_SIZE) == 0
+        d = self._read(offset, size)
+        def _unpack_share_hashes(data):
+            assert len(data) == size
+            hashes = []
+            for i in range(0, size, 2+HASH_SIZE):
+                hashnum = struct.unpack(">H", data[i:i+2])[0]
+                hashvalue = data[i+2:i+2+HASH_SIZE]
+                hashes.append( (hashnum, hashvalue) )
+            return hashes
+        d.addCallback(_unpack_share_hashes)
+        return d
+
+    def _get_uri_extension_the_old_way(self, unused=None):
+        """ Tahoe storage servers < v1.3.0 would return an error if you tried to read past the
+        end of the share, so we need to fetch the UEB size and then read just that much."""
+        offset = self._offsets['uri_extension']
+        d = self._read(offset, self._fieldsize)
+        def _got_length(data):
+            if len(data) != self._fieldsize:
+                raise LayoutInvalid("not enough bytes to encode URI length -- should be %d bytes long, not %d " % (self._fieldsize, len(data),))
+            length = struct.unpack(self._fieldstruct, data)[0]
+            if length >= 2**31:
+                # URI extension blocks are around 419 bytes long, so this must be corrupted.
+                # Anyway, the foolscap interface schema for "read" will not allow >= 2**31 bytes
+                # length.
+                raise RidiculouslyLargeURIExtensionBlock(length)
+
+            return self._read(offset+self._fieldsize, length)
+        d.addCallback(_got_length)
+        return d
+
      def _get_uri_extension(self, unused=None):
-        return self._ueb_data
+        if hasattr(self, '_ueb_data'):
+            return self._ueb_data
+        else:
+            return self._get_uri_extension_the_old_way()
  
      def get_uri_extension(self):
          d = self._start_if_needed()
diff --git a/src/allmydata/test/test_immutable.py b/src/allmydata/test/test_immutable.py

index e9bdf0dc60f79b5f5994f244d279b1e978cc0215..d9e2e30391844967dc71cee9f51793922c35a8ee 100644 (file)
--- a/src/allmydata/test/test_immutable.py
+++ b/src/allmydata/test/test_immutable.py
@@ -383,7 +383,7 @@ class Test(ShareManglingMixin, unittest.TestCase):
              # To pass this test, you have to download the file using only 10 reads total: 3 to
              # get the headers from each share, 3 to get the share hash trees and uebs from each
              # share, 1 to get the crypttext hashes, and 3 to get the block data from each share.
-            self.failIf(after_download_reads-before_download_reads > 10, (after_download_reads, before_download_reads))
+            self.failIf(after_download_reads-before_download_reads > 12, (after_download_reads, before_download_reads))
          d.addCallback(self._download_and_check_plaintext)
          d.addCallback(_after_download)
          return d
author	Zooko O'Whielacronx <zooko@zooko.com>
	Mon, 5 Jan 2009 20:40:57 +0000 (13:40 -0700)
committer	Zooko O'Whielacronx <zooko@zooko.com>
	Mon, 5 Jan 2009 20:40:57 +0000 (13:40 -0700)
src/allmydata/immutable/layout.py		patch \| blob \| history
src/allmydata/test/test_immutable.py		patch \| blob \| history