From 96eaca6a6dbea0f743694a9836993694f14b2f35 Mon Sep 17 00:00:00 2001
From: Leif Ryge <leif@synthesize.us>
Date: Tue, 1 Dec 2015 18:47:50 +0000
Subject: [PATCH] new feature: preferred storage servers

this includes a squash merge of dca1de6856d0bacb0d57cedcf1a81896910f4efd which
was previously seen in pull request #128, as well as daira's suggested changes
from pull request #204.
---
 docs/configuration.rst            | 25 +++++++++++++++++++++++++
 src/allmydata/client.py           |  4 +++-
 src/allmydata/storage_client.py   | 10 +++++++---
 src/allmydata/test/test_client.py | 12 ++++++++++++
 4 files changed, 47 insertions(+), 4 deletions(-)

diff --git a/docs/configuration.rst b/docs/configuration.rst
index ac75cca7..f9179376 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -396,6 +396,31 @@ Client Configuration
 .. _performance.rst: performance.rst
 .. _mutable.rst: specifications/mutable.rst
 
+``peers.preferred = (string, optional)``
+
+    This is an optional comma-separated list of Node IDs of servers that will
+    be tried first when selecting storage servers for reading or writing.
+
+    Servers should be identified here by their Node ID as it appears in the web
+    ui, underneath the server's nickname. For storage servers running tahoe
+    versions >=1.10 (if the introducer is also running tahoe >=1.10) this will
+    be a "Node Key" (which is prefixed with 'v0-'). For older nodes, it will be
+    a TubID instead. When a preferred server (and/or the introducer) is
+    upgraded to 1.10 or later, clients must adjust their configs accordingly.
+
+    Every node selected for upload, whether preferred or not, will still
+    receive the same number of shares (one, if there are ``N`` or more servers
+    accepting uploads). Preferred nodes are simply moved to the front of the
+    server selection lists computed for each file.
+
+    This is useful if a subset of your nodes have different availability or
+    connectivity characteristics than the rest of the grid. For instance, if
+    there are more than ``N`` servers on the grid, and ``K`` or more of them
+    are at a single physical location, it would make sense for clients at that
+    location to prefer their local servers so that they can maintain access to
+    all of their uploads without using the internet.
+
+
 Frontend Configuration
 ======================
 
diff --git a/src/allmydata/client.py b/src/allmydata/client.py
index bb6dce23..41840e85 100644
--- a/src/allmydata/client.py
+++ b/src/allmydata/client.py
@@ -344,7 +344,9 @@ class Client(node.Node, pollmixin.PollMixin):
     def init_client_storage_broker(self):
         # create a StorageFarmBroker object, for use by Uploader/Downloader
         # (and everybody else who wants to use storage servers)
-        sb = storage_client.StorageFarmBroker(self.tub, permute_peers=True)
+        ps = self.get_config("client", "peers.preferred", "").split(",")
+        preferred_peers = tuple([p.strip() for p in ps if p != ""])
+        sb = storage_client.StorageFarmBroker(self.tub, permute_peers=True, preferred_peers=preferred_peers)
         self.storage_broker = sb
 
         # load static server specifications from tahoe.cfg, if any.
diff --git a/src/allmydata/storage_client.py b/src/allmydata/storage_client.py
index e532db1a..dd9780f2 100644
--- a/src/allmydata/storage_client.py
+++ b/src/allmydata/storage_client.py
@@ -62,10 +62,11 @@ class StorageFarmBroker:
     I'm also responsible for subscribing to the IntroducerClient to find out
     about new servers as they are announced by the Introducer.
     """
-    def __init__(self, tub, permute_peers):
+    def __init__(self, tub, permute_peers, preferred_peers=()):
         self.tub = tub
         assert permute_peers # False not implemented yet
         self.permute_peers = permute_peers
+        self.preferred_peers = preferred_peers
         # self.servers maps serverid -> IServer, and keeps track of all the
         # storage servers that we've heard about. Each descriptor manages its
         # own Reconnector, and will give us a RemoteReference when we ask
@@ -121,10 +122,13 @@ class StorageFarmBroker:
     def get_servers_for_psi(self, peer_selection_index):
         # return a list of server objects (IServers)
         assert self.permute_peers == True
+        connected_servers = self.get_connected_servers()
+        preferred_servers = frozenset(s for s in connected_servers if s.get_longname() in self.preferred_peers)
         def _permuted(server):
             seed = server.get_permutation_seed()
-            return sha1(peer_selection_index + seed).digest()
-        return sorted(self.get_connected_servers(), key=_permuted)
+            is_unpreferred = server not in preferred_servers
+            return (is_unpreferred, sha1(peer_selection_index + seed).digest())
+        return sorted(connected_servers, key=_permuted)
 
     def get_all_serverids(self):
         return frozenset(self.servers.keys())
diff --git a/src/allmydata/test/test_client.py b/src/allmydata/test/test_client.py
index dfd9b8de..1819fa4a 100644
--- a/src/allmydata/test/test_client.py
+++ b/src/allmydata/test/test_client.py
@@ -262,6 +262,18 @@ class Basic(testutil.ReallyEqualMixin, unittest.TestCase):
         sb.servers.clear()
         self.failUnlessReallyEqual(self._permute(sb, "one"), [])
 
+    def test_permute_with_preferred(self):
+        sb = StorageFarmBroker(None, True, ['1','4'])
+        for k in ["%d" % i for i in range(5)]:
+            ann = {"anonymous-storage-FURL": "pb://abcde@nowhere/fake",
+                   "permutation-seed-base32": base32.b2a(k) }
+            sb.test_add_rref(k, "rref", ann)
+
+        self.failUnlessReallyEqual(self._permute(sb, "one"), ['1','4','3','0','2'])
+        self.failUnlessReallyEqual(self._permute(sb, "two"), ['4','1','0','2','3'])
+        sb.servers.clear()
+        self.failUnlessReallyEqual(self._permute(sb, "one"), [])
+
     def test_versions(self):
         basedir = "test_client.Basic.test_versions"
         os.mkdir(basedir)
-- 
2.45.2