finish renaming 'subshare' to 'block' in immutable/encode.py and in docs/
authorZooko O'Whielacronx <zooko@zooko.com>
Tue, 9 Dec 2008 23:33:18 +0000 (16:33 -0700)
committerZooko O'Whielacronx <zooko@zooko.com>
Tue, 9 Dec 2008 23:33:18 +0000 (16:33 -0700)
docs/file-encoding.txt
docs/file-encoding3.svg
docs/file-encoding4.svg
docs/file-encoding5.svg
docs/file-encoding6.svg
src/allmydata/immutable/encode.py

index 4b0572d5d2bf4490fb384973b75dca00a13a3fb4..23862eadc2dfb32180336bf9a770d23ddb5328d4 100644 (file)
@@ -48,33 +48,33 @@ ciphertexts are not secret.
 
 The ciphertext file is then broken up into segments. The last segment is
 likely to be shorter than the rest. Each segment is erasure-coded into a
-number of "subshares". This takes place one segment at a time. (In fact,
+number of "blocks". This takes place one segment at a time. (In fact,
 encryption and erasure-coding take place at the same time, once per plaintext
 segment). Larger segment sizes result in less overhead overall, but increase
 both the memory footprint and the "alacrity" (the number of bytes we have to
 receive before we can deliver validated plaintext to the user). The current
 default segment size is 128KiB.
 
-One subshare from each segment is sent to each shareholder (aka leaseholder,
+One block from each segment is sent to each shareholder (aka leaseholder,
 aka landlord, aka storage node, aka peer). The "share" held by each remote
-shareholder is nominally just a collection of these subshares. The file will
+shareholder is nominally just a collection of these blocks. The file will
 be recoverable when a certain number of shares have been retrieved.
 
 [[Image(file-encoding2.png)]]
 
-The subshares are hashed as they are generated and transmitted. These
-subshare hashes are put into a Merkle hash tree. When the last share has been
+The blocks are hashed as they are generated and transmitted. These
+block hashes are put into a Merkle hash tree. When the last share has been
 created, the merkle tree is completed and delivered to the peer. Later, when
-we retrieve these subshares, the peer will send many of the merkle hash tree
-nodes ahead of time, so we can validate each subshare independently.
+we retrieve these blocks, the peer will send many of the merkle hash tree
+nodes ahead of time, so we can validate each block independently.
 
-The root of this subshare hash tree is called the "subshare root hash" and
+The root of this block hash tree is called the "block root hash" and
 used in the next step.
 
 [[Image(file-encoding3.png)]]
 
 There is a higher-level Merkle tree called the "share hash tree". Its leaves
-are the subshare root hashes from each share. The root of this tree is called
+are the block root hashes from each share. The root of this tree is called
 the "share root hash" and is included in the "URI Extension Block", aka UEB.
 The ciphertext hash and Merkle tree are also put here, along with the
 original file size, and the encoding parameters. The UEB contains all the
@@ -97,10 +97,10 @@ represented as a (relatively) short printable string like so:
 
 During download, when a peer begins to transmit a share, it first transmits
 all of the parts of the share hash tree that are necessary to validate its
-subshare root hash. Then it transmits the portions of the subshare hash tree
-that are necessary to validate the first subshare. Then it transmits the
-first subshare. It then continues this loop: transmitting any portions of the
-subshare hash tree to validate subshare#N, then sending subshare#N.
+block root hash. Then it transmits the portions of the block hash tree
+that are necessary to validate the first block. Then it transmits the
+first block. It then continues this loop: transmitting any portions of the
+block hash tree to validate block#N, then sending block#N.
 
 [[Image(file-encoding5.png)]]
 
@@ -108,19 +108,19 @@ So the "share" that is sent to the remote peer actually consists of three
 pieces, sent in a specific order as they become available, and retrieved
 during download in a different order according to when they are needed.
 
-The first piece is the subshares themselves, one per segment. The last
-subshare will likely be shorter than the rest, because the last segment is
-probably shorter than the rest. The second piece is the subshare hash tree,
-consisting of a total of two SHA-1 hashes per subshare. The third piece is a
+The first piece is the blocks themselves, one per segment. The last
+block will likely be shorter than the rest, because the last segment is
+probably shorter than the rest. The second piece is the block hash tree,
+consisting of a total of two SHA-1 hashes per block. The third piece is a
 hash chain from the share hash tree, consisting of log2(numshares) hashes.
 
-During upload, all subshares are sent first, followed by the subshare hash
+During upload, all blocks are sent first, followed by the block hash
 tree, followed by the share hash chain. During download, the share hash chain
-is delivered first, followed by the subshare root hash. The client then uses
-the hash chain to validate the subshare root hash. Then the peer delivers
-enough of the subshare hash tree to validate the first subshare, followed by
-the first subshare itself. The subshare hash chain is used to validate the
-subshare, then it is passed (along with the first subshare from several other
+is delivered first, followed by the block root hash. The client then uses
+the hash chain to validate the block root hash. Then the peer delivers
+enough of the block hash tree to validate the first block, followed by
+the first block itself. The block hash chain is used to validate the
+block, then it is passed (along with the first block from several other
 peers) into decoding, to produce the first segment of crypttext, which is
 then decrypted to produce the first segment of plaintext, which is finally
 delivered to the user.
index c18c3438c687c4218e59e5591f584e2dc4a2ad72..fb5fd4c07ac9baf6e2374e7d8578b89c565ea886 100644 (file)
          sodipodi:role="line"
          id="tspan3344"
          x="68.653069"
-         y="466.45987">subshare hash tree</tspan></text>
+         y="466.45987">block hash tree</tspan></text>
     <text
        xml:space="preserve"
        style="font-size:20px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:100%;writing-mode:lr-tb;text-anchor:start;fill:black;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;font-family:Bitstream Charter"
          sodipodi:role="line"
          id="tspan3352"
          x="321.63498"
-         y="405.33044">&quot;subshare root hash&quot;</tspan></text>
+         y="405.33044">&quot;block root hash&quot;</tspan></text>
   </g>
 </svg>
index 5b6f91c368364045277fd1b01970f0b325a1a9fe..f4b21d02381e6c8bdf5d13083c2fa9a8f6aa6435 100644 (file)
@@ -84,7 +84,7 @@
          sodipodi:role="line"
          id="tspan3634"
          x="212.25175"
-         y="109.45165">subshare</tspan><tspan
+         y="109.45165">block</tspan><tspan
          sodipodi:role="line"
          x="212.25175"
          y="129.45165"
index 7a5a78cf636436f12a4134736f49762245caee51..a20a1369d336cf91645f0284d4351863af564cd9 100644 (file)
@@ -75,7 +75,7 @@
          sodipodi:role="line"
          id="tspan3634"
          x="93.755287"
-         y="98.537506">subshare</tspan><tspan
+         y="98.537506">block</tspan><tspan
          sodipodi:role="line"
          x="93.755287"
          y="118.53751"
index f15ee49e7eb97fc1a7abd7125cc33e1e517da70b..09ced3feca4e9814752ea03665986295f0ff8331 100644 (file)
          sodipodi:role="line"
          id="tspan3344"
          x="68.653069"
-         y="466.45987">subshare hash tree</tspan></text>
+         y="466.45987">block hash tree</tspan></text>
     <text
        xml:space="preserve"
        style="font-size:20px;font-style:normal;font-variant:normal;font-weight:normal;font-stretch:normal;text-align:start;line-height:100%;writing-mode:lr-tb;text-anchor:start;fill:black;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;font-family:Bitstream Charter"
          sodipodi:role="line"
          id="tspan3352"
          x="321.63498"
-         y="405.33044">&quot;subshare root hash&quot;</tspan></text>
+         y="405.33044">&quot;block root hash&quot;</tspan></text>
     <text
        xml:space="preserve"
        style="font-size:20px;font-style:normal;font-variant:normal;font-weight:bold;font-stretch:normal;text-align:start;line-height:100%;writing-mode:lr-tb;text-anchor:start;fill:black;fill-opacity:1;stroke:none;stroke-width:1px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;font-family:Bitstream Charter"
          sodipodi:role="line"
          id="tspan3634"
          x="88.648125"
-         y="598.72913">subshare</tspan><tspan
+         y="598.72913">block</tspan><tspan
          sodipodi:role="line"
          x="88.648125"
          y="618.72913"
index 4e46854e10f937e36f6ff74c4dbceda98ab56bc1..0bdb5e1d84ffb3e975cc64358705db905423b6d6 100644 (file)
@@ -204,8 +204,8 @@ class Encoder(object):
         self._crypttext_hasher = hashutil.crypttext_hasher()
         self._crypttext_hashes = []
         self.segment_num = 0
-        self.subshare_hashes = [[] for x in range(self.num_shares)]
-        # subshare_hashes[i] is a list that will be accumulated and then send
+        self.block_hashes = [[] for x in range(self.num_shares)]
+        # block_hashes[i] is a list that will be accumulated and then send
         # to landlord[i]. This list contains a hash of each segment_share
         # that we sent to that landlord.
         self.share_root_hashes = [None] * self.num_shares
@@ -242,7 +242,7 @@ class Encoder(object):
                           self.send_plaintext_hash_tree_to_all_shareholders())
         d.addCallback(lambda res:
                       self.send_crypttext_hash_tree_to_all_shareholders())
-        d.addCallback(lambda res: self.send_all_subshare_hash_trees())
+        d.addCallback(lambda res: self.send_all_block_hash_trees())
         d.addCallback(lambda res: self.send_all_share_hash_trees())
         d.addCallback(lambda res: self.send_uri_extension_to_all_shareholders())
 
@@ -431,17 +431,17 @@ class Encoder(object):
         self.set_encode_and_push_progress(segnum)
         lognum = self.log("send_segment(%d)" % segnum, level=log.NOISY)
         for i in range(len(shares)):
-            subshare = shares[i]
+            block = shares[i]
             shareid = shareids[i]
-            d = self.send_subshare(shareid, segnum, subshare, lognum)
+            d = self.send_block(shareid, segnum, block, lognum)
             dl.append(d)
-            subshare_hash = hashutil.block_hash(subshare)
+            block_hash = hashutil.block_hash(block)
             #from allmydata.util import base32
             #log.msg("creating block (shareid=%d, blocknum=%d) "
             #        "len=%d %r .. %r: %s" %
-            #        (shareid, segnum, len(subshare),
-            #         subshare[:50], subshare[-50:], base32.b2a(subshare_hash)))
-            self.subshare_hashes[shareid].append(subshare_hash)
+            #        (shareid, segnum, len(block),
+            #         block[:50], block[-50:], base32.b2a(block_hash)))
+            self.block_hashes[shareid].append(block_hash)
 
         dl = self._gather_responses(dl)
         def _logit(res):
@@ -458,13 +458,13 @@ class Encoder(object):
         dl.addCallback(_logit)
         return dl
 
-    def send_subshare(self, shareid, segment_num, subshare, lognum):
+    def send_block(self, shareid, segment_num, block, lognum):
         if shareid not in self.landlords:
             return defer.succeed(None)
         sh = self.landlords[shareid]
         lognum2 = self.log("put_block to %s" % self.landlords[shareid],
                            parent=lognum, level=log.NOISY)
-        d = sh.put_block(segment_num, subshare)
+        d = sh.put_block(segment_num, block)
         def _done(res):
             self.log("put_block done", parent=lognum2, level=log.NOISY)
             return res
@@ -577,19 +577,19 @@ class Encoder(object):
         d.addErrback(self._remove_shareholder, shareid, "put_crypttext_hashes")
         return d
 
-    def send_all_subshare_hash_trees(self):
-        self.log("sending subshare hash trees", level=log.NOISY)
+    def send_all_block_hash_trees(self):
+        self.log("sending block hash trees", level=log.NOISY)
         self.set_status("Sending Subshare Hash Trees")
         self.set_encode_and_push_progress(extra=0.4)
         dl = []
-        for shareid,hashes in enumerate(self.subshare_hashes):
-            # hashes is a list of the hashes of all subshares that were sent
+        for shareid,hashes in enumerate(self.block_hashes):
+            # hashes is a list of the hashes of all blocks that were sent
             # to shareholder[shareid].
-            dl.append(self.send_one_subshare_hash_tree(shareid, hashes))
+            dl.append(self.send_one_block_hash_tree(shareid, hashes))
         return self._gather_responses(dl)
 
-    def send_one_subshare_hash_tree(self, shareid, subshare_hashes):
-        t = HashTree(subshare_hashes)
+    def send_one_block_hash_tree(self, shareid, block_hashes):
+        t = HashTree(block_hashes)
         all_hashes = list(t)
         # all_hashes[0] is the root hash, == hash(ah[1]+ah[2])
         # all_hashes[1] is the left child, == hash(ah[3]+ah[4])