From cae54ab118553e4e41bafbdade902603a84d85f6 Mon Sep 17 00:00:00 2001
From: Brian Warner <warner@allmydata.com>
Date: Tue, 18 Nov 2008 00:29:44 -0700
Subject: [PATCH] tahoe.cfg: add controls for k and N (and shares-of-happiness)

---
 NEWS                                 |  3 +++
 docs/configuration.txt               | 34 ++++++++++++++++++++++++++++
 src/allmydata/client.py              |  5 ++++
 src/allmydata/scripts/create_node.py |  3 +++
 src/allmydata/test/common.py         |  8 +++++++
 src/allmydata/test/test_upload.py    | 28 ++++++++++++++++++++++-
 6 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/NEWS b/NEWS
index 3302133c..91717f05 100644
--- a/NEWS
+++ b/NEWS
@@ -61,6 +61,9 @@ commented out. (ticket #518)
 tahoe.cfg now has controls for the foolscap "keepalive" and "disconnect"
 timeouts (#521).
 
+tahoe.cfg now has controls for the encoding parameters: "shares.needed" and
+"shares.total" in the "[client]" section. The default parameters are still
+3-of-10.
 
 ** CLI Changes
 
diff --git a/docs/configuration.txt b/docs/configuration.txt
index 2d92fef1..5cc658ca 100644
--- a/docs/configuration.txt
+++ b/docs/configuration.txt
@@ -232,6 +232,40 @@ stats_gatherer.furl = (FURL string, optional)
  If provided, the node will connect to the given stats gatherer and provide
  it with operational statistics.
 
+shares.needed = (int, optional) aka "k"
+shares.total = (int, optional) aka "N", N >= k
+shares.happy = (int, optional) k <= happy <= N
+
+ These three values set the default encoding parameters. Each time a new file
+ is uploaded, erasure-coding is used to break the ciphertext into separate
+ pieces. There will be "N" (i.e. shares.total) pieces created, and the file
+ will be recoverable if any "k" (i.e. shares.needed) pieces are retrieved.
+ The default values are 3-of-10 (i.e. shares.needed = 3, shares.total = 10).
+ Setting k to 1 is equivalent to simple replication (uploading N copies of
+ the file).
+
+ These values control the tradeoff between storage overhead, performance, and
+ reliability. To a first approximation, a 1MB file will use (1MB*N/k) of
+ backend storage space (the actual value will be a bit more, because of other
+ forms of overhead). Up to N-k shares can be lost before the file becomes
+ unrecoverable, so assuming there are at least N servers, up to N-k servers
+ can be offline without losing the file. So large N/k ratios are more
+ reliable, and small N/k ratios use less disk space. Clearly, k must never be
+ smaller than N.
+
+ Large values of N will slow down upload operations slightly, since more
+ servers must be involved, and will slightly increase storage overhead due to
+ the hash trees that are created. Large values of k will cause downloads to
+ be marginally slower, because more servers must be involved. N cannot be
+ larger than 256, because of the 8-bit erasure-coding algorithm that Tahoe
+ uses.
+
+ If servers are lost during an upload, shares.happy determines whether the
+ upload is considered successful or not. If at least "shares.happy" shares
+ were placed, the upload is declared a success, otherwise it is declared a
+ failure. The default value is 7. This value must not be smaller than k nor
+ larger than N.
+
 
 == Storage Server Configuration ==
 
diff --git a/src/allmydata/client.py b/src/allmydata/client.py
index 5f048bfa..cde8037d 100644
--- a/src/allmydata/client.py
+++ b/src/allmydata/client.py
@@ -65,6 +65,7 @@ class Client(node.Node, pollmixin.PollMixin):
         node.Node.__init__(self, basedir)
         self.started_timestamp = time.time()
         self.logSource="Client"
+        self.DEFAULT_ENCODING_PARAMETERS = self.DEFAULT_ENCODING_PARAMETERS.copy()
         self.init_introducer_client()
         self.init_stats_provider()
         self.init_lease_secret()
@@ -185,6 +186,10 @@ class Client(node.Node, pollmixin.PollMixin):
 
     def init_client(self):
         helper_furl = self.get_config("client", "helper.furl", None)
+        DEP = self.DEFAULT_ENCODING_PARAMETERS
+        DEP["k"] = int(self.get_config("client", "shares.needed", DEP["k"]))
+        DEP["n"] = int(self.get_config("client", "shares.total", DEP["n"]))
+        DEP["happy"] = int(self.get_config("client", "shares.happy", DEP["happy"]))
         convergence_s = self.get_or_create_private_config('convergence', _make_secret)
         self.convergence = base32.a2b(convergence_s)
         self._node_cache = weakref.WeakValueDictionary() # uri -> node
diff --git a/src/allmydata/scripts/create_node.py b/src/allmydata/scripts/create_node.py
index 95049cf4..e0a7bf09 100644
--- a/src/allmydata/scripts/create_node.py
+++ b/src/allmydata/scripts/create_node.py
@@ -104,6 +104,9 @@ def create_client(basedir, config, out=sys.stdout, err=sys.stderr):
     c.write("helper.furl =\n")
     c.write("#key_generator.furl =\n")
     c.write("#stats_gatherer.furl =\n")
+    c.write("#shares.needed = 3\n")
+    c.write("#shares.happy = 7\n")
+    c.write("#shares.total = 10\n")
     c.write("\n")
 
     boolstr = {True:"true", False:"false"}
diff --git a/src/allmydata/test/common.py b/src/allmydata/test/common.py
index 63ebf2dd..e95a59d1 100644
--- a/src/allmydata/test/common.py
+++ b/src/allmydata/test/common.py
@@ -418,6 +418,10 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin):
             if self.stats_gatherer_furl:
                 write("stats_gatherer.furl", self.stats_gatherer_furl)
 
+        # give subclasses a chance to append liens to the node's tahoe.cfg
+        # files before they are launched.
+        self._set_up_nodes_extra_config()
+
         # start client[0], wait for it's tub to be ready (at which point it
         # will have registered the helper furl).
         c = self.add_service(client.Client(basedir=basedirs[0]))
@@ -452,6 +456,10 @@ class SystemTestMixin(pollmixin.PollMixin, testutil.StallMixin):
         d.addCallback(_connected)
         return d
 
+    def _set_up_nodes_extra_config(self):
+        # for overriding by subclasses
+        pass
+
     def _grab_stats(self, res):
         d = self.stats_gatherer.poll()
         return d
diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py
index 475051a8..6671b361 100644
--- a/src/allmydata/test/test_upload.py
+++ b/src/allmydata/test/test_upload.py
@@ -7,11 +7,12 @@ from twisted.python import log
 from twisted.internet import defer
 from foolscap import eventual
 
-from allmydata import uri
+from allmydata import uri, monitor
 from allmydata.immutable import upload
 from allmydata.interfaces import IFileURI, FileTooLargeError, NotEnoughSharesError
 from allmydata.util.assertutil import precondition
 from allmydata.util.deferredutil import DeferredListShouldSucceed
+from common import SystemTestMixin
 from common_util import ShouldFailMixin
 
 MiB = 1024*1024
@@ -542,6 +543,31 @@ class StorageIndex(unittest.TestCase):
         d.addCallback(_done)
         return d
 
+class EncodingParameters(SystemTestMixin, unittest.TestCase):
+    def test_configure_parameters(self):
+        self.basedir = self.mktemp()
+        DATA = "data" * 100
+        u = upload.Data(DATA, convergence="")
+        d = self.set_up_nodes()
+        d.addCallback(lambda res: self.clients[0].upload(u))
+        d.addCallback(lambda ur: self.clients[0].create_node_from_uri(ur.uri))
+        m = monitor.Monitor()
+        d.addCallback(lambda fn: fn.check(m))
+        def _check(cr):
+            data = cr.get_data()
+            self.failUnlessEqual(data["count-shares-needed"], 7)
+            self.failUnlessEqual(data["count-shares-expected"], 12)
+        d.addCallback(_check)
+        return d
+
+    def _set_up_nodes_extra_config(self):
+        f = open(os.path.join(self.getdir("client0"), "tahoe.cfg"), "wt")
+        f.write("\n")
+        f.write("[client]\n")
+        f.write("shares.needed = 7\n")
+        f.write("shares.total = 12\n")
+        f.write("\n")
+        f.close()
 
 # TODO:
 #  upload with exactly 75 peers (shares_of_happiness)
-- 
2.45.2