remove 'provisioning'/'reliability' from WUI, add to misc/operations_helpers
authorBrian Warner <warner@lothar.com>
Wed, 15 Feb 2012 18:18:53 +0000 (18:18 +0000)
committerBrian Warner <warner@lothar.com>
Thu, 16 Feb 2012 22:29:05 +0000 (22:29 +0000)
Also remove docs related to reliability/provisioning pages

17 files changed:
docs/architecture.rst
docs/frontends/webapi.rst
misc/operations_helpers/provisioning/provisioning.py [new file with mode: 0644]
misc/operations_helpers/provisioning/provisioning.xhtml [new file with mode: 0644]
misc/operations_helpers/provisioning/reliability.py [new file with mode: 0644]
misc/operations_helpers/provisioning/reliability.xhtml [new file with mode: 0644]
misc/operations_helpers/provisioning/test_provisioning.py [new file with mode: 0644]
misc/operations_helpers/provisioning/web_reliability.py [new file with mode: 0644]
src/allmydata/provisioning.py [deleted file]
src/allmydata/reliability.py [deleted file]
src/allmydata/test/test_provisioning.py [deleted file]
src/allmydata/test/test_web.py
src/allmydata/web/provisioning.xhtml [deleted file]
src/allmydata/web/reliability.py [deleted file]
src/allmydata/web/reliability.xhtml [deleted file]
src/allmydata/web/root.py
src/allmydata/web/welcome.xhtml

index 3a9e08fbc0cc3eb92ed680019c3dc15ffc775e8a..362a179208ac229faf80dc9f65d22820d0e9d79e 100644 (file)
@@ -553,9 +553,3 @@ will be able to reduce the expansion factor down to a bare minimum while
 still retaining high reliability, but large unstable grids (where nodes are
 coming and going very quickly) may require more repair/verification bandwidth
 than actual upload/download traffic.
-
-Tahoe-LAFS nodes that run a webserver have a page dedicated to provisioning
-decisions: this tool may help you evaluate different expansion factors and
-view the disk consumption of each. It is also acquiring some sections with
-availability/reliability numbers, as well as preliminary cost analysis data.
-This tool will continue to evolve as our analysis improves.
index 47ab75429ecccdf642c55e9d720c363043a3a9d1..b67ee33450935e95b8a33b1d99f57aa510fe0a3e 100644 (file)
@@ -1805,17 +1805,6 @@ This is the "Welcome Page", and contains a few distinct sections::
  implementation hashes synchronously, so clients will probably never see
  progress-hash!=1.0).
 
-``GET /provisioning/``
-
- This page provides a basic tool to predict the likely storage and bandwidth
- requirements of a large Tahoe grid. It provides forms to input things like
- total number of users, number of files per user, average file size, number
- of servers, expansion ratio, hard drive failure rate, etc. It then provides
- numbers like how many disks per server will be needed, how many read
- operations per second should be expected, and the likely MTBF for files in
- the grid. This information is very preliminary, and the model upon which it
- is based still needs a lot of work.
-
 ``GET /helper_status/``
 
  If the node is running a helper (i.e. if [helper]enabled is set to True in
diff --git a/misc/operations_helpers/provisioning/provisioning.py b/misc/operations_helpers/provisioning/provisioning.py
new file mode 100644 (file)
index 0000000..9d9af0e
--- /dev/null
@@ -0,0 +1,772 @@
+
+from nevow import inevow, rend, tags as T
+import math
+from allmydata.util import mathutil
+from allmydata.web.common import getxmlfile
+
+# factorial and binomial copied from
+# http://mail.python.org/pipermail/python-list/2007-April/435718.html
+
+def factorial(n):
+    """factorial(n): return the factorial of the integer n.
+    factorial(0) = 1
+    factorial(n) with n<0 is -factorial(abs(n))
+    """
+    result = 1
+    for i in xrange(1, abs(n)+1):
+        result *= i
+    assert n >= 0
+    return result
+
+def binomial(n, k):
+    assert 0 <= k <= n
+    if k == 0 or k == n:
+        return 1
+    # calculate n!/k! as one product, avoiding factors that
+    # just get canceled
+    P = k+1
+    for i in xrange(k+2, n+1):
+        P *= i
+    # if you are paranoid:
+    # C, rem = divmod(P, factorial(n-k))
+    # assert rem == 0
+    # return C
+    return P//factorial(n-k)
+
+class ProvisioningTool(rend.Page):
+    addSlash = True
+    docFactory = getxmlfile("provisioning.xhtml")
+
+    def render_forms(self, ctx, data):
+        req = inevow.IRequest(ctx)
+
+        def getarg(name, astype=int):
+            if req.method != "POST":
+                return None
+            if name in req.fields:
+                return astype(req.fields[name].value)
+            return None
+        return self.do_forms(getarg)
+
+
+    def do_forms(self, getarg):
+        filled = getarg("filled", bool)
+
+        def get_and_set(name, options, default=None, astype=int):
+            current_value = getarg(name, astype)
+            i_select = T.select(name=name)
+            for (count, description) in options:
+                count = astype(count)
+                if ((current_value is not None and count == current_value) or
+                    (current_value is None and count == default)):
+                    o = T.option(value=str(count), selected="true")[description]
+                else:
+                    o = T.option(value=str(count))[description]
+                i_select = i_select[o]
+            if current_value is None:
+                current_value = default
+            return current_value, i_select
+
+        sections = {}
+        def add_input(section, text, entry):
+            if section not in sections:
+                sections[section] = []
+            sections[section].extend([T.div[text, ": ", entry], "\n"])
+
+        def add_output(section, entry):
+            if section not in sections:
+                sections[section] = []
+            sections[section].extend([entry, "\n"])
+
+        def build_section(section):
+            return T.fieldset[T.legend[section], sections[section]]
+
+        def number(value, suffix=""):
+            scaling = 1
+            if value < 1:
+                fmt = "%1.2g%s"
+            elif value < 100:
+                fmt = "%.1f%s"
+            elif value < 1000:
+                fmt = "%d%s"
+            elif value < 1e6:
+                fmt = "%.2fk%s"; scaling = 1e3
+            elif value < 1e9:
+                fmt = "%.2fM%s"; scaling = 1e6
+            elif value < 1e12:
+                fmt = "%.2fG%s"; scaling = 1e9
+            elif value < 1e15:
+                fmt = "%.2fT%s"; scaling = 1e12
+            elif value < 1e18:
+                fmt = "%.2fP%s"; scaling = 1e15
+            else:
+                fmt = "huge! %g%s"
+            return fmt % (value / scaling, suffix)
+
+        user_counts = [(5, "5 users"),
+                       (50, "50 users"),
+                       (200, "200 users"),
+                       (1000, "1k users"),
+                       (10000, "10k users"),
+                       (50000, "50k users"),
+                       (100000, "100k users"),
+                       (500000, "500k users"),
+                       (1000000, "1M users"),
+                       ]
+        num_users, i_num_users = get_and_set("num_users", user_counts, 50000)
+        add_input("Users",
+                  "How many users are on this network?", i_num_users)
+
+        files_per_user_counts = [(100, "100 files"),
+                                 (1000, "1k files"),
+                                 (10000, "10k files"),
+                                 (100000, "100k files"),
+                                 (1e6, "1M files"),
+                                 ]
+        files_per_user, i_files_per_user = get_and_set("files_per_user",
+                                                       files_per_user_counts,
+                                                       1000)
+        add_input("Users",
+                  "How many files for each user? (avg)",
+                  i_files_per_user)
+
+        space_per_user_sizes = [(1e6, "1MB"),
+                                (10e6, "10MB"),
+                                (100e6, "100MB"),
+                                (200e6, "200MB"),
+                                (1e9, "1GB"),
+                                (2e9, "2GB"),
+                                (5e9, "5GB"),
+                                (10e9, "10GB"),
+                                (100e9, "100GB"),
+                                (1e12, "1TB"),
+                                (2e12, "2TB"),
+                                (5e12, "5TB"),
+                                ]
+        # Estimate ~5gb per user as a more realistic case
+        space_per_user, i_space_per_user = get_and_set("space_per_user",
+                                                       space_per_user_sizes,
+                                                       5e9)
+        add_input("Users",
+                  "How much data for each user? (avg)",
+                  i_space_per_user)
+
+        sharing_ratios = [(1.0, "1.0x"),
+                          (1.1, "1.1x"),
+                          (2.0, "2.0x"),
+                          ]
+        sharing_ratio, i_sharing_ratio = get_and_set("sharing_ratio",
+                                                     sharing_ratios, 1.0,
+                                                     float)
+        add_input("Users",
+                  "What is the sharing ratio? (1.0x is no-sharing and"
+                  " no convergence)", i_sharing_ratio)
+
+        # Encoding parameters
+        encoding_choices = [("3-of-10-5", "3.3x (3-of-10, repair below 5)"),
+                            ("3-of-10-8", "3.3x (3-of-10, repair below 8)"),
+                            ("5-of-10-7", "2x (5-of-10, repair below 7)"),
+                            ("8-of-10-9", "1.25x (8-of-10, repair below 9)"),
+                            ("27-of-30-28", "1.1x (27-of-30, repair below 28"),
+                            ("25-of-100-50", "4x (25-of-100, repair below 50)"),
+                            ]
+        encoding_parameters, i_encoding_parameters = \
+                             get_and_set("encoding_parameters",
+                                         encoding_choices, "3-of-10-5", str)
+        encoding_pieces = encoding_parameters.split("-")
+        k = int(encoding_pieces[0])
+        assert encoding_pieces[1] == "of"
+        n = int(encoding_pieces[2])
+        # we repair the file when the number of available shares drops below
+        # this value
+        repair_threshold = int(encoding_pieces[3])
+
+        add_input("Servers",
+                  "What are the default encoding parameters?",
+                  i_encoding_parameters)
+
+        # Server info
+        num_server_choices = [ (5, "5 servers"),
+                               (10, "10 servers"),
+                               (15, "15 servers"),
+                               (30, "30 servers"),
+                               (50, "50 servers"),
+                               (100, "100 servers"),
+                               (200, "200 servers"),
+                               (300, "300 servers"),
+                               (500, "500 servers"),
+                               (1000, "1k servers"),
+                               (2000, "2k servers"),
+                               (5000, "5k servers"),
+                               (10e3, "10k servers"),
+                               (100e3, "100k servers"),
+                               (1e6, "1M servers"),
+                               ]
+        num_servers, i_num_servers = \
+                     get_and_set("num_servers", num_server_choices, 30, int)
+        add_input("Servers",
+                  "How many servers are there?", i_num_servers)
+
+        # availability is measured in dBA = -dBF, where 0dBF is 100% failure,
+        # 10dBF is 10% failure, 20dBF is 1% failure, etc
+        server_dBA_choices = [ (10, "90% [10dBA] (2.4hr/day)"),
+                               (13, "95% [13dBA] (1.2hr/day)"),
+                               (20, "99% [20dBA] (14min/day or 3.5days/year)"),
+                               (23, "99.5% [23dBA] (7min/day or 1.75days/year)"),
+                               (30, "99.9% [30dBA] (87sec/day or 9hours/year)"),
+                               (40, "99.99% [40dBA] (60sec/week or 53min/year)"),
+                               (50, "99.999% [50dBA] (5min per year)"),
+                               ]
+        server_dBA, i_server_availability = \
+                    get_and_set("server_availability",
+                                server_dBA_choices,
+                                20, int)
+        add_input("Servers",
+                  "What is the server availability?", i_server_availability)
+
+        drive_MTBF_choices = [ (40, "40,000 Hours"),
+                               ]
+        drive_MTBF, i_drive_MTBF = \
+                    get_and_set("drive_MTBF", drive_MTBF_choices, 40, int)
+        add_input("Drives",
+                  "What is the hard drive MTBF?", i_drive_MTBF)
+        # http://www.tgdaily.com/content/view/30990/113/
+        # http://labs.google.com/papers/disk_failures.pdf
+        # google sees:
+        #  1.7% of the drives they replaced were 0-1 years old
+        #  8% of the drives they repalced were 1-2 years old
+        #  8.6% were 2-3 years old
+        #  6% were 3-4 years old, about 8% were 4-5 years old
+
+        drive_size_choices = [ (100, "100 GB"),
+                               (250, "250 GB"),
+                               (500, "500 GB"),
+                               (750, "750 GB"),
+                               (1000, "1000 GB"),
+                               (2000, "2000 GB"),
+                               (3000, "3000 GB"),
+                               ]
+        drive_size, i_drive_size = \
+                    get_and_set("drive_size", drive_size_choices, 3000, int)
+        drive_size = drive_size * 1e9
+        add_input("Drives",
+                  "What is the capacity of each hard drive?", i_drive_size)
+        drive_failure_model_choices = [ ("E", "Exponential"),
+                                        ("U", "Uniform"),
+                                        ]
+        drive_failure_model, i_drive_failure_model = \
+                             get_and_set("drive_failure_model",
+                                         drive_failure_model_choices,
+                                         "E", str)
+        add_input("Drives",
+                  "How should we model drive failures?", i_drive_failure_model)
+
+        # drive_failure_rate is in failures per second
+        if drive_failure_model == "E":
+            drive_failure_rate = 1.0 / (drive_MTBF * 1000 * 3600)
+        else:
+            drive_failure_rate = 0.5 / (drive_MTBF * 1000 * 3600)
+
+        # deletion/gc/ownership mode
+        ownership_choices = [ ("A", "no deletion, no gc, no owners"),
+                              ("B", "deletion, no gc, no owners"),
+                              ("C", "deletion, share timers, no owners"),
+                              ("D", "deletion, no gc, yes owners"),
+                              ("E", "deletion, owner timers"),
+                              ]
+        ownership_mode, i_ownership_mode = \
+                        get_and_set("ownership_mode", ownership_choices,
+                                    "A", str)
+        add_input("Servers",
+                  "What is the ownership mode?", i_ownership_mode)
+
+        # client access behavior
+        access_rates = [ (1, "one file per day"),
+                         (10, "10 files per day"),
+                         (100, "100 files per day"),
+                         (1000, "1k files per day"),
+                         (10e3, "10k files per day"),
+                         (100e3, "100k files per day"),
+                         ]
+        download_files_per_day, i_download_rate = \
+                                get_and_set("download_rate", access_rates,
+                                            100, int)
+        add_input("Users",
+                  "How many files are downloaded per day?", i_download_rate)
+        download_rate = 1.0 * download_files_per_day / (24*60*60)
+
+        upload_files_per_day, i_upload_rate = \
+                              get_and_set("upload_rate", access_rates,
+                                          10, int)
+        add_input("Users",
+                  "How many files are uploaded per day?", i_upload_rate)
+        upload_rate = 1.0 * upload_files_per_day / (24*60*60)
+
+        delete_files_per_day, i_delete_rate = \
+                              get_and_set("delete_rate", access_rates,
+                                          10, int)
+        add_input("Users",
+                  "How many files are deleted per day?", i_delete_rate)
+        delete_rate = 1.0 * delete_files_per_day / (24*60*60)
+
+
+        # the value is in days
+        lease_timers = [ (1, "one refresh per day"),
+                         (7, "one refresh per week"),
+                         ]
+        lease_timer, i_lease = \
+                     get_and_set("lease_timer", lease_timers,
+                                 7, int)
+        add_input("Users",
+                  "How frequently do clients refresh files or accounts? "
+                  "(if necessary)",
+                  i_lease)
+        seconds_per_lease = 24*60*60*lease_timer
+
+        check_timer_choices = [ (1, "every week"),
+                                (4, "every month"),
+                                (8, "every two months"),
+                                (16, "every four months"),
+                                ]
+        check_timer, i_check_timer = \
+                     get_and_set("check_timer", check_timer_choices, 4, int)
+        add_input("Users",
+                  "How frequently should we check on each file?",
+                  i_check_timer)
+        file_check_interval = check_timer * 7 * 24 * 3600
+
+
+        if filled:
+            add_output("Users", T.div["Total users: %s" % number(num_users)])
+            add_output("Users",
+                       T.div["Files per user: %s" % number(files_per_user)])
+            file_size = 1.0 * space_per_user / files_per_user
+            add_output("Users",
+                       T.div["Average file size: ", number(file_size)])
+            total_files = num_users * files_per_user / sharing_ratio
+
+            add_output("Grid",
+                       T.div["Total number of files in grid: ",
+                             number(total_files)])
+            total_space = num_users * space_per_user / sharing_ratio
+            add_output("Grid",
+                       T.div["Total volume of plaintext in grid: ",
+                             number(total_space, "B")])
+
+            total_shares = n * total_files
+            add_output("Grid",
+                       T.div["Total shares in grid: ", number(total_shares)])
+            expansion = float(n) / float(k)
+
+            total_usage = expansion * total_space
+            add_output("Grid",
+                       T.div["Share data in grid: ", number(total_usage, "B")])
+
+            if n > num_servers:
+                # silly configuration, causes Tahoe2 to wrap and put multiple
+                # shares on some servers.
+                add_output("Servers",
+                           T.div["non-ideal: more shares than servers"
+                                 " (n=%d, servers=%d)" % (n, num_servers)])
+                # every file has at least one share on every server
+                buckets_per_server = total_files
+                shares_per_server = total_files * ((1.0 * n) / num_servers)
+            else:
+                # if nobody is full, then no lease requests will be turned
+                # down for lack of space, and no two shares for the same file
+                # will share a server. Therefore the chance that any given
+                # file has a share on any given server is n/num_servers.
+                buckets_per_server = total_files * ((1.0 * n) / num_servers)
+                # since each such represented file only puts one share on a
+                # server, the total number of shares per server is the same.
+                shares_per_server = buckets_per_server
+            add_output("Servers",
+                       T.div["Buckets per server: ",
+                             number(buckets_per_server)])
+            add_output("Servers",
+                       T.div["Shares per server: ",
+                             number(shares_per_server)])
+
+            # how much space is used on the storage servers for the shares?
+            #  the share data itself
+            share_data_per_server = total_usage / num_servers
+            add_output("Servers",
+                       T.div["Share data per server: ",
+                             number(share_data_per_server, "B")])
+            # this is determined empirically. H=hashsize=32, for a one-segment
+            # file and 3-of-10 encoding
+            share_validation_per_server = 266 * shares_per_server
+            # this could be 423*buckets_per_server, if we moved the URI
+            # extension into a separate file, but that would actually consume
+            # *more* space (minimum filesize is 4KiB), unless we moved all
+            # shares for a given bucket into a single file.
+            share_uri_extension_per_server = 423 * shares_per_server
+
+            # ownership mode adds per-bucket data
+            H = 32 # depends upon the desired security of delete/refresh caps
+            # bucket_lease_size is the amount of data needed to keep track of
+            # the delete/refresh caps for each bucket.
+            bucket_lease_size = 0
+            client_bucket_refresh_rate = 0
+            owner_table_size = 0
+            if ownership_mode in ("B", "C", "D", "E"):
+                bucket_lease_size = sharing_ratio * 1.0 * H
+            if ownership_mode in ("B", "C"):
+                # refreshes per second per client
+                client_bucket_refresh_rate = (1.0 * n * files_per_user /
+                                              seconds_per_lease)
+                add_output("Users",
+                           T.div["Client share refresh rate (outbound): ",
+                                 number(client_bucket_refresh_rate, "Hz")])
+                server_bucket_refresh_rate = (client_bucket_refresh_rate *
+                                              num_users / num_servers)
+                add_output("Servers",
+                           T.div["Server share refresh rate (inbound): ",
+                                 number(server_bucket_refresh_rate, "Hz")])
+            if ownership_mode in ("D", "E"):
+                # each server must maintain a bidirectional mapping from
+                # buckets to owners. One way to implement this would be to
+                # put a list of four-byte owner numbers into each bucket, and
+                # a list of four-byte share numbers into each owner (although
+                # of course we'd really just throw it into a database and let
+                # the experts take care of the details).
+                owner_table_size = 2*(buckets_per_server * sharing_ratio * 4)
+
+            if ownership_mode in ("E",):
+                # in this mode, clients must refresh one timer per server
+                client_account_refresh_rate = (1.0 * num_servers /
+                                               seconds_per_lease)
+                add_output("Users",
+                           T.div["Client account refresh rate (outbound): ",
+                                 number(client_account_refresh_rate, "Hz")])
+                server_account_refresh_rate = (client_account_refresh_rate *
+                                              num_users / num_servers)
+                add_output("Servers",
+                           T.div["Server account refresh rate (inbound): ",
+                                 number(server_account_refresh_rate, "Hz")])
+
+            # TODO: buckets vs shares here is a bit wonky, but in
+            # non-wrapping grids it shouldn't matter
+            share_lease_per_server = bucket_lease_size * buckets_per_server
+            share_ownertable_per_server = owner_table_size
+
+            share_space_per_server = (share_data_per_server +
+                                      share_validation_per_server +
+                                      share_uri_extension_per_server +
+                                      share_lease_per_server +
+                                      share_ownertable_per_server)
+            add_output("Servers",
+                       T.div["Share space per server: ",
+                             number(share_space_per_server, "B"),
+                             " (data ",
+                             number(share_data_per_server, "B"),
+                             ", validation ",
+                             number(share_validation_per_server, "B"),
+                             ", UEB ",
+                             number(share_uri_extension_per_server, "B"),
+                             ", lease ",
+                             number(share_lease_per_server, "B"),
+                             ", ownertable ",
+                             number(share_ownertable_per_server, "B"),
+                             ")",
+                             ])
+
+
+            # rates
+            client_download_share_rate = download_rate * k
+            client_download_byte_rate = download_rate * file_size
+            add_output("Users",
+                       T.div["download rate: shares = ",
+                             number(client_download_share_rate, "Hz"),
+                             " , bytes = ",
+                             number(client_download_byte_rate, "Bps"),
+                             ])
+            total_file_check_rate = 1.0 * total_files / file_check_interval
+            client_check_share_rate = total_file_check_rate / num_users
+            add_output("Users",
+                       T.div["file check rate: shares = ",
+                             number(client_check_share_rate, "Hz"),
+                             " (interval = %s)" %
+                             number(1 / client_check_share_rate, "s"),
+                             ])
+
+            client_upload_share_rate = upload_rate * n
+            # TODO: doesn't include overhead
+            client_upload_byte_rate = upload_rate * file_size * expansion
+            add_output("Users",
+                       T.div["upload rate: shares = ",
+                             number(client_upload_share_rate, "Hz"),
+                             " , bytes = ",
+                             number(client_upload_byte_rate, "Bps"),
+                             ])
+            client_delete_share_rate = delete_rate * n
+
+            server_inbound_share_rate = (client_upload_share_rate *
+                                         num_users / num_servers)
+            server_inbound_byte_rate = (client_upload_byte_rate *
+                                        num_users / num_servers)
+            add_output("Servers",
+                       T.div["upload rate (inbound): shares = ",
+                             number(server_inbound_share_rate, "Hz"),
+                             " , bytes = ",
+                              number(server_inbound_byte_rate, "Bps"),
+                             ])
+            add_output("Servers",
+                       T.div["share check rate (inbound): ",
+                             number(total_file_check_rate * n / num_servers,
+                                    "Hz"),
+                             ])
+
+            server_share_modify_rate = ((client_upload_share_rate +
+                                         client_delete_share_rate) *
+                                         num_users / num_servers)
+            add_output("Servers",
+                       T.div["share modify rate: shares = ",
+                             number(server_share_modify_rate, "Hz"),
+                             ])
+
+            server_outbound_share_rate = (client_download_share_rate *
+                                          num_users / num_servers)
+            server_outbound_byte_rate = (client_download_byte_rate *
+                                         num_users / num_servers)
+            add_output("Servers",
+                       T.div["download rate (outbound): shares = ",
+                             number(server_outbound_share_rate, "Hz"),
+                             " , bytes = ",
+                              number(server_outbound_byte_rate, "Bps"),
+                             ])
+
+
+            total_share_space = num_servers * share_space_per_server
+            add_output("Grid",
+                       T.div["Share space consumed: ",
+                             number(total_share_space, "B")])
+            add_output("Grid",
+                       T.div[" %% validation: %.2f%%" %
+                             (100.0 * share_validation_per_server /
+                              share_space_per_server)])
+            add_output("Grid",
+                       T.div[" %% uri-extension: %.2f%%" %
+                             (100.0 * share_uri_extension_per_server /
+                              share_space_per_server)])
+            add_output("Grid",
+                       T.div[" %% lease data: %.2f%%" %
+                             (100.0 * share_lease_per_server /
+                              share_space_per_server)])
+            add_output("Grid",
+                       T.div[" %% owner data: %.2f%%" %
+                             (100.0 * share_ownertable_per_server /
+                              share_space_per_server)])
+            add_output("Grid",
+                       T.div[" %% share data: %.2f%%" %
+                             (100.0 * share_data_per_server /
+                              share_space_per_server)])
+            add_output("Grid",
+                       T.div["file check rate: ",
+                             number(total_file_check_rate,
+                                    "Hz")])
+
+            total_drives = max(mathutil.div_ceil(int(total_share_space),
+                                                 int(drive_size)),
+                               num_servers)
+            add_output("Drives",
+                       T.div["Total drives: ", number(total_drives), " drives"])
+            drives_per_server = mathutil.div_ceil(total_drives, num_servers)
+            add_output("Servers",
+                       T.div["Drives per server: ", drives_per_server])
+
+            # costs
+            if drive_size == 3000 * 1e9:
+                add_output("Servers", T.div["3000GB drive: $250 each"])
+                drive_cost = 250
+            else:
+                add_output("Servers",
+                           T.div[T.b["unknown cost per drive, assuming $100"]])
+                drive_cost = 100
+
+            if drives_per_server <= 4:
+                add_output("Servers", T.div["1U box with <= 4 drives: $1500"])
+                server_cost = 1500 # typical 1U box
+            elif drives_per_server <= 12:
+                add_output("Servers", T.div["2U box with <= 12 drives: $2500"])
+                server_cost = 2500 # 2U box
+            else:
+                add_output("Servers",
+                           T.div[T.b["Note: too many drives per server, "
+                                     "assuming $3000"]])
+                server_cost = 3000
+
+            server_capital_cost = (server_cost + drives_per_server * drive_cost)
+            total_server_cost = float(num_servers * server_capital_cost)
+            add_output("Servers", T.div["Capital cost per server: $",
+                                        server_capital_cost])
+            add_output("Grid", T.div["Capital cost for all servers: $",
+                                     number(total_server_cost)])
+            # $70/Mbps/mo
+            # $44/server/mo power+space
+            server_bandwidth = max(server_inbound_byte_rate,
+                                   server_outbound_byte_rate)
+            server_bandwidth_mbps = mathutil.div_ceil(int(server_bandwidth*8),
+                                                      int(1e6))
+            server_monthly_cost = 70*server_bandwidth_mbps + 44
+            add_output("Servers", T.div["Monthly cost per server: $",
+                                        server_monthly_cost])
+            add_output("Users", T.div["Capital cost per user: $",
+                                      number(total_server_cost / num_users)])
+
+            # reliability
+            any_drive_failure_rate = total_drives * drive_failure_rate
+            any_drive_MTBF = 1 // any_drive_failure_rate  # in seconds
+            any_drive_MTBF_days = any_drive_MTBF / 86400
+            add_output("Drives",
+                       T.div["MTBF (any drive): ",
+                             number(any_drive_MTBF_days), " days"])
+            drive_replacement_monthly_cost = (float(drive_cost)
+                                              * any_drive_failure_rate
+                                              *30*86400)
+            add_output("Grid",
+                       T.div["Monthly cost of replacing drives: $",
+                             number(drive_replacement_monthly_cost)])
+
+            total_server_monthly_cost = float(num_servers * server_monthly_cost
+                                              + drive_replacement_monthly_cost)
+
+            add_output("Grid", T.div["Monthly cost for all servers: $",
+                                     number(total_server_monthly_cost)])
+            add_output("Users",
+                       T.div["Monthly cost per user: $",
+                             number(total_server_monthly_cost / num_users)])
+
+            # availability
+            file_dBA = self.file_availability(k, n, server_dBA)
+            user_files_dBA = self.many_files_availability(file_dBA,
+                                                          files_per_user)
+            all_files_dBA = self.many_files_availability(file_dBA, total_files)
+            add_output("Users",
+                       T.div["availability of: ",
+                             "arbitrary file = %d dBA, " % file_dBA,
+                             "all files of user1 = %d dBA, " % user_files_dBA,
+                             "all files in grid = %d dBA" % all_files_dBA,
+                             ],
+                       )
+
+            time_until_files_lost = (n-k+1) / any_drive_failure_rate
+            add_output("Grid",
+                       T.div["avg time until files are lost: ",
+                             number(time_until_files_lost, "s"), ", ",
+                             number(time_until_files_lost/86400, " days"),
+                             ])
+
+            share_data_loss_rate = any_drive_failure_rate * drive_size
+            add_output("Grid",
+                       T.div["share data loss rate: ",
+                             number(share_data_loss_rate,"Bps")])
+
+            # the worst-case survival numbers occur when we do a file check
+            # and the file is just above the threshold for repair (so we
+            # decide to not repair it). The question is then: what is the
+            # chance that the file will decay so badly before the next check
+            # that we can't recover it? The resulting probability is per
+            # check interval.
+            # Note that the chances of us getting into this situation are low.
+            P_disk_failure_during_interval = (drive_failure_rate *
+                                              file_check_interval)
+            disk_failure_dBF = 10*math.log10(P_disk_failure_during_interval)
+            disk_failure_dBA = -disk_failure_dBF
+            file_survives_dBA = self.file_availability(k, repair_threshold,
+                                                       disk_failure_dBA)
+            user_files_survives_dBA = self.many_files_availability( \
+                file_survives_dBA, files_per_user)
+            all_files_survives_dBA = self.many_files_availability( \
+                file_survives_dBA, total_files)
+            add_output("Users",
+                       T.div["survival of: ",
+                             "arbitrary file = %d dBA, " % file_survives_dBA,
+                             "all files of user1 = %d dBA, " %
+                             user_files_survives_dBA,
+                             "all files in grid = %d dBA" %
+                             all_files_survives_dBA,
+                             " (per worst-case check interval)",
+                             ])
+
+
+
+        all_sections = []
+        all_sections.append(build_section("Users"))
+        all_sections.append(build_section("Servers"))
+        all_sections.append(build_section("Drives"))
+        if "Grid" in sections:
+            all_sections.append(build_section("Grid"))
+
+        f = T.form(action=".", method="post", enctype="multipart/form-data")
+
+        if filled:
+            action = "Recompute"
+        else:
+            action = "Compute"
+
+        f = f[T.input(type="hidden", name="filled", value="true"),
+              T.input(type="submit", value=action),
+              all_sections,
+              ]
+
+        try:
+            from allmydata import reliability
+            # we import this just to test to see if the page is available
+            _hush_pyflakes = reliability
+            del _hush_pyflakes
+            f = [T.div[T.a(href="../reliability")["Reliability Math"]], f]
+        except ImportError:
+            pass
+
+        return f
+
+    def file_availability(self, k, n, server_dBA):
+        """
+        The full formula for the availability of a specific file is::
+
+         1 - sum([choose(N,i) * p**i * (1-p)**(N-i)] for i in range(k)])
+
+        Where choose(N,i) = N! / ( i! * (N-i)! ) . Note that each term of
+        this summation is the probability that there are exactly 'i' servers
+        available, and what we're doing is adding up the cases where i is too
+        low.
+
+        This is a nuisance to calculate at all accurately, especially once N
+        gets large, and when p is close to unity. So we make an engineering
+        approximation: if (1-p) is very small, then each [i] term is much
+        larger than the [i-1] term, and the sum is dominated by the i=k-1
+        term. This only works for (1-p) < 10%, and when the choose() function
+        doesn't rise fast enough to compensate. For high-expansion encodings
+        (3-of-10, 25-of-100), the choose() function is rising at the same
+        time as the (1-p)**(N-i) term, so that's not an issue. For
+        low-expansion encodings (7-of-10, 75-of-100) the two values are
+        moving in opposite directions, so more care must be taken.
+
+        Note that the p**i term has only a minor effect as long as (1-p)*N is
+        small, and even then the effect is attenuated by the 1-p term.
+        """
+
+        assert server_dBA > 9  # >=90% availability to use the approximation
+        factor = binomial(n, k-1)
+        factor_dBA = 10 * math.log10(factor)
+        exponent = n - k + 1
+        file_dBA = server_dBA * exponent - factor_dBA
+        return file_dBA
+
+    def many_files_availability(self, file_dBA, num_files):
+        """The probability that 'num_files' independent bernoulli trials will
+        succeed (i.e. we can recover all files in the grid at any given
+        moment) is p**num_files . Since p is close to unity, we express in p
+        in dBA instead, so we can get useful precision on q (=1-p), and then
+        the formula becomes::
+
+         P_some_files_unavailable = 1 - (1 - q)**num_files
+
+        That (1-q)**n expands with the usual binomial sequence, 1 - nq +
+        Xq**2 ... + Xq**n . We use the same approximation as before, since we
+        know q is close to zero, and we get to ignore all the terms past -nq.
+        """
+
+        many_files_dBA = file_dBA - 10 * math.log10(num_files)
+        return many_files_dBA
diff --git a/misc/operations_helpers/provisioning/provisioning.xhtml b/misc/operations_helpers/provisioning/provisioning.xhtml
new file mode 100644 (file)
index 0000000..bfa4edb
--- /dev/null
@@ -0,0 +1,18 @@
+<html xmlns:n="http://nevow.com/ns/nevow/0.1">
+  <head>
+    <title>Tahoe-LAFS - Provisioning Tool</title>
+    <link href="/tahoe.css" rel="stylesheet" type="text/css"/>
+    <link href="/icon.png" rel="shortcut icon" />
+    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+  </head>
+  <body>
+
+<h1>Tahoe-LAFS Provisioning Tool</h1>
+
+<p>This page will help you determine how much disk space and network
+bandwidth will be required by various sizes and types of Tahoe-LAFS networks.</p>
+
+<div n:render="forms" />
+
+  </body>
+</html>
diff --git a/misc/operations_helpers/provisioning/reliability.py b/misc/operations_helpers/provisioning/reliability.py
new file mode 100644 (file)
index 0000000..a0d6076
--- /dev/null
@@ -0,0 +1,251 @@
+#! /usr/bin/python
+
+import math
+from allmydata.util import statistics
+from numpy import array, matrix, dot
+
+DAY=24*60*60
+MONTH=31*DAY
+YEAR=365*DAY
+
+class ReliabilityModel:
+    """Generate a model of system-wide reliability, given several input
+    parameters.
+
+    This runs a simulation in which time is quantized down to 'delta' seconds
+    (default is one month): a smaller delta will result in a more accurate
+    simulation, but will take longer to run. 'report_span' simulated seconds
+    will be run.
+
+    The encoding parameters are provided as 'k' (minimum number of shares
+    needed to recover the file) and 'N' (total number of shares generated).
+    The default parameters are 3-of-10.
+
+    The first step is to build a probability of individual drive loss during
+    any given delta. This uses a simple exponential model, in which the
+    average drive lifetime is specified by the 'drive_lifetime' parameter
+    (default is 8 years).
+
+    The second step is to calculate a 'transition matrix': a table of
+    probabilities that shows, given A shares at the start of the delta, what
+    the chances are of having B shares left at the end of the delta. The
+    current code optimistically assumes all drives are independent. A
+    subclass could override that assumption.
+
+    An additional 'repair matrix' is created to show what happens when the
+    Checker/Repairer is run. In the simulation, the Checker will be run every
+    'check_period' seconds (default is one month), and the Repairer will be
+    run if it sees fewer than 'R' shares (default 7).
+
+    The third step is to finally run the simulation. An initial probability
+    vector is created (with a 100% chance of N shares and a 0% chance of
+    fewer than N shares), then it is multiplied by the transition matrix for
+    every delta of time. Each time the Checker is to be run, the repair
+    matrix is multiplied in, and some additional stats are accumulated
+    (average number of repairs that occur, average number of shares
+    regenerated per repair).
+
+    The output is a ReliabilityReport instance, which contains a table that
+    samples the state of the simulation once each 'report_period' seconds
+    (defaults to 3 months). Each row of this table will contain the
+    probability vector for one sample period (chance of having X shares, from
+    0 to N, at the end of the period). The report will also contain other
+    information.
+
+    """
+
+    @classmethod
+    def run(klass,
+            drive_lifetime=8*YEAR,
+            k=3, R=7, N=10,
+            delta=1*MONTH,
+            check_period=1*MONTH,
+            report_period=3*MONTH,
+            report_span=5*YEAR,
+            ):
+        self = klass()
+
+        check_period = check_period-1
+        P = self.p_in_period(drive_lifetime, delta)
+
+        decay = self.build_decay_matrix(N, P)
+
+        repair = self.build_repair_matrix(k, N, R)
+
+        #print "DECAY:", decay
+        #print "OLD-POST-REPAIR:", old_post_repair
+        #print "NEW-POST-REPAIR:", decay * repair
+        #print "REPAIR:", repair
+        #print "DIFF:", (old_post_repair - decay * repair)
+
+        START = array([0]*N + [1])
+        DEAD = array([1]*k + [0]*(1+N-k))
+        REPAIRp = array([0]*k + [1]*(R-k) + [0]*(1+N-R))
+        REPAIR_newshares = array([0]*k +
+                                 [N-i for i in range(k, R)] +
+                                 [0]*(1+N-R))
+        assert REPAIR_newshares.shape[0] == N+1
+        #print "START", START
+        #print "REPAIRp", REPAIRp
+        #print "REPAIR_newshares", REPAIR_newshares
+
+        unmaintained_state = START
+        maintained_state = START
+        last_check = 0
+        last_report = 0
+        P_repaired_last_check_period = 0.0
+        needed_repairs = []
+        needed_new_shares = []
+        report = ReliabilityReport()
+
+        for t in range(0, report_span+delta, delta):
+            # the .A[0] turns the one-row matrix back into an array
+            unmaintained_state = (unmaintained_state * decay).A[0]
+            maintained_state = (maintained_state * decay).A[0]
+            if (t-last_check) > check_period:
+                last_check = t
+                # we do a check-and-repair this frequently
+                need_repair = dot(maintained_state, REPAIRp)
+
+                P_repaired_last_check_period = need_repair
+                new_shares = dot(maintained_state, REPAIR_newshares)
+                needed_repairs.append(need_repair)
+                needed_new_shares.append(new_shares)
+
+                maintained_state = (maintained_state * repair).A[0]
+
+            if (t-last_report) > report_period:
+                last_report = t
+                P_dead_unmaintained = dot(unmaintained_state, DEAD)
+                P_dead_maintained = dot(maintained_state, DEAD)
+                cumulative_number_of_repairs = sum(needed_repairs)
+                cumulative_number_of_new_shares = sum(needed_new_shares)
+                report.add_sample(t, unmaintained_state, maintained_state,
+                                  P_repaired_last_check_period,
+                                  cumulative_number_of_repairs,
+                                  cumulative_number_of_new_shares,
+                                  P_dead_unmaintained, P_dead_maintained)
+
+        # record one more sample at the end of the run
+        P_dead_unmaintained = dot(unmaintained_state, DEAD)
+        P_dead_maintained = dot(maintained_state, DEAD)
+        cumulative_number_of_repairs = sum(needed_repairs)
+        cumulative_number_of_new_shares = sum(needed_new_shares)
+        report.add_sample(t, unmaintained_state, maintained_state,
+                          P_repaired_last_check_period,
+                          cumulative_number_of_repairs,
+                          cumulative_number_of_new_shares,
+                          P_dead_unmaintained, P_dead_maintained)
+
+        #def yandm(seconds):
+        #    return "%dy.%dm" % (int(seconds/YEAR), int( (seconds%YEAR)/MONTH))
+        #needed_repairs_total = sum(needed_repairs)
+        #needed_new_shares_total = sum(needed_new_shares)
+        #print "at 2y:"
+        #print " unmaintained", unmaintained_state
+        #print " maintained", maintained_state
+        #print " number of repairs", needed_repairs_total
+        #print " new shares generated", needed_new_shares_total
+        #repair_rate_inv = report_span / needed_repairs_total
+        #print "  avg repair rate: once every %s" % yandm(repair_rate_inv)
+        #print "  avg repair download: one share every %s" % yandm(repair_rate_inv/k)
+        #print "  avg repair upload: one share every %s" % yandm(report_span / needed_new_shares_total)
+
+        return report
+
+    def p_in_period(self, avg_lifetime, period):
+        """Given an average lifetime of a disk (using an exponential model),
+        what is the chance that a live disk will survive the next 'period'
+        seconds?"""
+
+        # eg p_in_period(8*YEAR, MONTH) = 98.94%
+        return math.exp(-1.0*period/avg_lifetime)
+
+    def build_decay_matrix(self, N, P):
+        """Return a decay matrix. decay[start_shares][end_shares] is the
+        conditional probability of finishing with end_shares, given that we
+        started with start_shares."""
+        decay_rows = []
+        decay_rows.append( [0.0]*(N+1) )
+        for start_shares in range(1, (N+1)):
+            end_shares = self.build_decay_row(start_shares, P)
+            decay_row = end_shares + [0.0] * (N-start_shares)
+            assert len(decay_row) == (N+1), len(decay_row)
+            decay_rows.append(decay_row)
+
+        decay = matrix(decay_rows)
+        return decay
+
+    def build_decay_row(self, start_shares, P):
+        """Return a decay row 'end_shares'. end_shares[i] is the chance that
+        we finish with i shares, given that we started with start_shares, for
+        all i between 0 and start_shares, inclusive. This implementation
+        assumes that all shares are independent (IID), but a more complex
+        model could incorporate inter-share failure correlations like having
+        two shares on the same server."""
+        end_shares = statistics.binomial_distribution_pmf(start_shares, P)
+        return end_shares
+
+    def build_repair_matrix(self, k, N, R):
+        """Return a repair matrix. repair[start][end]: is the conditional
+        probability of the repairer finishing with 'end' shares, given that
+        it began with 'start' shares (repair if fewer than R shares). The
+        repairer's behavior is deterministic, so all values in this matrix
+        are either 0 or 1. This matrix should be applied *after* the decay
+        matrix."""
+        new_repair_rows = []
+        for start_shares in range(0, N+1):
+            new_repair_row = [0] * (N+1)
+            if start_shares < k:
+                new_repair_row[start_shares] = 1
+            elif start_shares < R:
+                new_repair_row[N] = 1
+            else:
+                new_repair_row[start_shares] = 1
+            new_repair_rows.append(new_repair_row)
+
+        repair = matrix(new_repair_rows)
+        return repair
+
+class ReliabilityReport:
+    def __init__(self):
+        self.samples = []
+
+    def add_sample(self, when, unmaintained_shareprobs, maintained_shareprobs,
+                   P_repaired_last_check_period,
+                   cumulative_number_of_repairs,
+                   cumulative_number_of_new_shares,
+                   P_dead_unmaintained, P_dead_maintained):
+        """
+        when: the timestamp at the end of the report period
+        unmaintained_shareprobs: a vector of probabilities, element[S]
+                                 is the chance that there are S shares
+                                 left at the end of the report period.
+                                 This tracks what happens if no repair
+                                 is ever done.
+        maintained_shareprobs: same, but for 'maintained' grids, where
+                               check and repair is done at the end
+                               of each check period
+        P_repaired_last_check_period: a float, with the probability
+                                      that a repair was performed
+                                      at the end of the most recent
+                                      check period.
+        cumulative_number_of_repairs: a float, with the average number
+                                      of repairs that will have been
+                                      performed by the end of the
+                                      report period
+        cumulative_number_of_new_shares: a float, with the average number
+                                         of new shares that repair proceses
+                                         generated by the end of the report
+                                         period
+        P_dead_unmaintained: a float, with the chance that the file will
+                             be unrecoverable at the end of the period
+        P_dead_maintained: same, but for maintained grids
+
+        """
+        row = (when, unmaintained_shareprobs, maintained_shareprobs,
+               P_repaired_last_check_period,
+               cumulative_number_of_repairs,
+               cumulative_number_of_new_shares,
+               P_dead_unmaintained, P_dead_maintained)
+        self.samples.append(row)
diff --git a/misc/operations_helpers/provisioning/reliability.xhtml b/misc/operations_helpers/provisioning/reliability.xhtml
new file mode 100644 (file)
index 0000000..f8d93d1
--- /dev/null
@@ -0,0 +1,63 @@
+<html xmlns:n="http://nevow.com/ns/nevow/0.1">
+  <head>
+    <title>Tahoe-LAFS - Reliability Tool</title>
+    <link href="/tahoe.css" rel="stylesheet" type="text/css"/>
+    <link href="/icon.png" rel="shortcut icon" />
+    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+  </head>
+  <body>
+
+<h1>Tahoe-LAFS Reliability Tool</h1>
+
+<p>Given certain assumptions, this page calculates probability of share loss
+over time, to help make informed decisions about how much redundancy and
+repair bandwidth to configure on a Tahoe-LAFS grid.</p>
+
+<div n:render="forms" />
+
+<h2>Simulation Results</h2>
+
+<p>At the end of the report span (elapsed time <span n:render="report_span"
+/>), the simulated file had the following properties:</p>
+
+<ul>
+    <li>Probability of loss (no maintenance):
+        <span n:render="P_loss_unmaintained"/></li>
+    <li>Probability of loss (with maintenance):
+        <span n:render="P_loss_maintained"/></li>
+    <li>Average repair frequency:
+        once every <span n:render="P_repair_rate"/> secs</li>
+    <li>Average shares generated per repair:
+        <span n:render="P_repair_shares"/></li>
+</ul>
+
+<p>This table shows how the following properties change over time:</p>
+<ul>
+  <li>P_repair: the chance that a repair was performed in the most recent
+  check period.</li>
+  <li>P_dead (unmaintained): the chance that the file will be unrecoverable
+  without periodic check+repair</li>
+  <li>P_dead (maintained): the chance that the file will be unrecoverable even
+  with periodic check+repair</li>
+</ul>
+
+<div>
+<table n:render="sequence" n:data="simulation_table">
+  <tr n:pattern="header">
+    <td>t</td>
+    <td>P_repair</td>
+    <td>P_dead (unmaintained)</td>
+    <td>P_dead (maintained)</td>
+  </tr>
+  <tr n:pattern="item" n:render="simulation_row">
+    <td><n:slot name="t"/></td>
+    <td><n:slot name="P_repair"/></td>
+    <td><n:slot name="P_dead_unmaintained"/></td>
+    <td><n:slot name="P_dead_maintained"/></td>
+  </tr>
+  <tr n:pattern="empty"><td>no simulation data!</td></tr>
+</table>
+</div>
+
+  </body>
+</html>
diff --git a/misc/operations_helpers/provisioning/test_provisioning.py b/misc/operations_helpers/provisioning/test_provisioning.py
new file mode 100644 (file)
index 0000000..71bc657
--- /dev/null
@@ -0,0 +1,113 @@
+
+from twisted.trial import unittest
+from allmydata import provisioning
+ReliabilityModel = None
+try:
+    from allmydata.reliability import ReliabilityModel
+except ImportError:
+    pass # might not be importable, since it needs NumPy
+
+from nevow import inevow
+from zope.interface import implements
+
+class MyRequest:
+    implements(inevow.IRequest)
+    pass
+
+class Provisioning(unittest.TestCase):
+    def getarg(self, name, astype=int):
+        if name in self.fields:
+            return astype(self.fields[name])
+        return None
+
+    def test_load(self):
+        pt = provisioning.ProvisioningTool()
+        self.fields = {}
+        #r = MyRequest()
+        #r.fields = self.fields
+        #ctx = RequestContext()
+        #unfilled = pt.renderSynchronously(ctx)
+        lots_of_stan = pt.do_forms(self.getarg)
+        self.failUnless(lots_of_stan is not None)
+
+        self.fields = {'filled': True,
+                       "num_users": 50e3,
+                       "files_per_user": 1000,
+                       "space_per_user": 1e9,
+                       "sharing_ratio": 1.0,
+                       "encoding_parameters": "3-of-10-5",
+                       "num_servers": 30,
+                       "ownership_mode": "A",
+                       "download_rate": 100,
+                       "upload_rate": 10,
+                       "delete_rate": 10,
+                       "lease_timer": 7,
+                       }
+        #filled = pt.renderSynchronously(ctx)
+        more_stan = pt.do_forms(self.getarg)
+        self.failUnless(more_stan is not None)
+
+        # trigger the wraparound configuration
+        self.fields["num_servers"] = 5
+        #filled = pt.renderSynchronously(ctx)
+        more_stan = pt.do_forms(self.getarg)
+
+        # and other ownership modes
+        self.fields["ownership_mode"] = "B"
+        more_stan = pt.do_forms(self.getarg)
+        self.fields["ownership_mode"] = "E"
+        more_stan = pt.do_forms(self.getarg)
+
+    def test_provisioning_math(self):
+        self.failUnlessEqual(provisioning.binomial(10, 0), 1)
+        self.failUnlessEqual(provisioning.binomial(10, 1), 10)
+        self.failUnlessEqual(provisioning.binomial(10, 2), 45)
+        self.failUnlessEqual(provisioning.binomial(10, 9), 10)
+        self.failUnlessEqual(provisioning.binomial(10, 10), 1)
+
+DAY=24*60*60
+MONTH=31*DAY
+YEAR=365*DAY
+
+class Reliability(unittest.TestCase):
+    def test_basic(self):
+        if ReliabilityModel is None:
+            raise unittest.SkipTest("reliability model requires NumPy")
+
+        # test that numpy math works the way I think it does
+        import numpy
+        decay = numpy.matrix([[1,0,0],
+                             [.1,.9,0],
+                             [.01,.09,.9],
+                             ])
+        start = numpy.array([0,0,1])
+        g2 = (start * decay).A[0]
+        self.failUnlessEqual(repr(g2), repr(numpy.array([.01,.09,.9])))
+        g3 = (g2 * decay).A[0]
+        self.failUnlessEqual(repr(g3), repr(numpy.array([.028,.162,.81])))
+
+        # and the dot product
+        recoverable = numpy.array([0,1,1])
+        P_recoverable_g2 = numpy.dot(g2, recoverable)
+        self.failUnlessAlmostEqual(P_recoverable_g2, .9 + .09)
+        P_recoverable_g3 = numpy.dot(g3, recoverable)
+        self.failUnlessAlmostEqual(P_recoverable_g3, .81 + .162)
+
+        r = ReliabilityModel.run(delta=100000,
+                                 report_period=3*MONTH,
+                                 report_span=5*YEAR)
+        self.failUnlessEqual(len(r.samples), 20)
+
+        last_row = r.samples[-1]
+        #print last_row
+        (when, unmaintained_shareprobs, maintained_shareprobs,
+         P_repaired_last_check_period,
+         cumulative_number_of_repairs,
+         cumulative_number_of_new_shares,
+         P_dead_unmaintained, P_dead_maintained) = last_row
+        self.failUnless(isinstance(P_repaired_last_check_period, float))
+        self.failUnless(isinstance(P_dead_unmaintained, float))
+        self.failUnless(isinstance(P_dead_maintained, float))
+        self.failUnlessAlmostEqual(P_dead_unmaintained, 0.033591004555395272)
+        self.failUnlessAlmostEqual(P_dead_maintained, 3.2983995819177542e-08)
+
diff --git a/misc/operations_helpers/provisioning/web_reliability.py b/misc/operations_helpers/provisioning/web_reliability.py
new file mode 100644 (file)
index 0000000..d5d3406
--- /dev/null
@@ -0,0 +1,152 @@
+
+from nevow import rend, tags as T
+reliability = None # might not be usable
+try:
+    from allmydata import reliability # requires NumPy
+except ImportError:
+    pass
+from allmydata.web.common import getxmlfile, get_arg
+
+
+DAY=24*60*60
+MONTH=31*DAY
+YEAR=365*DAY
+
+def is_available():
+    if reliability:
+        return True
+    return False
+
+def yandm(seconds):
+    return "%dy.%dm" % (int(seconds/YEAR), int( (seconds%YEAR)/MONTH))
+
+class ReliabilityTool(rend.Page):
+    addSlash = True
+    docFactory = getxmlfile("reliability.xhtml")
+
+    DEFAULT_PARAMETERS = [
+        ("drive_lifetime", "8Y", "time",
+         "Average drive lifetime"),
+        ("k", 3, "int",
+         "Minimum number of shares needed to recover the file"),
+        ("R", 7, "int",
+         "Repair threshold: repair will not occur until fewer than R shares "
+         "are left"),
+        ("N", 10, "int",
+         "Total number of shares of the file generated"),
+        ("delta", "1M", "time", "Amount of time between each simulation step"),
+        ("check_period", "1M", "time",
+         "How often to run the checker and repair if fewer than R shares"),
+        ("report_period", "3M", "time",
+         "Amount of time between result rows in this report"),
+        ("report_span", "5Y", "time",
+         "Total amount of time covered by this report"),
+        ]
+
+    def parse_time(self, s):
+        if s.endswith("M"):
+            return int(s[:-1]) * MONTH
+        if s.endswith("Y"):
+            return int(s[:-1]) * YEAR
+        return int(s)
+
+    def format_time(self, s):
+        if s%YEAR == 0:
+            return "%dY" % (s/YEAR)
+        if s%MONTH == 0:
+            return "%dM" % (s/MONTH)
+        return "%d" % s
+
+    def get_parameters(self, ctx):
+        parameters = {}
+        for (name,default,argtype,description) in self.DEFAULT_PARAMETERS:
+            v = get_arg(ctx, name, default)
+            if argtype == "time":
+                value = self.parse_time(v)
+            else:
+                value = int(v)
+            parameters[name] = value
+        return parameters
+
+    def renderHTTP(self, ctx):
+        self.parameters = self.get_parameters(ctx)
+        self.results = reliability.ReliabilityModel.run(**self.parameters)
+        return rend.Page.renderHTTP(self, ctx)
+
+    def make_input(self, name, old_value):
+        return T.input(name=name, type="text", size="5",
+                       value=self.format_time(old_value))
+
+    def render_forms(self, ctx, data):
+        f = T.form(action=".", method="get")
+        table = []
+        for (name,default_value,argtype,description) in self.DEFAULT_PARAMETERS:
+            old_value = self.parameters[name]
+            i = self.make_input(name, old_value)
+            table.append(T.tr[T.td[name+":"], T.td[i], T.td[description]])
+        go = T.input(type="submit", value="Recompute")
+        return [T.h2["Simulation Parameters:"],
+                f[T.table[table], go],
+                ]
+
+    def data_simulation_table(self, ctx, data):
+        for row in self.results.samples:
+            yield row
+
+    def render_simulation_row(self, ctx, row):
+        (when, unmaintained_shareprobs, maintained_shareprobs,
+         P_repaired_last_check_period,
+         cumulative_number_of_repairs,
+         cumulative_number_of_new_shares,
+         P_dead_unmaintained, P_dead_maintained) = row
+        ctx.fillSlots("t", yandm(when))
+        ctx.fillSlots("P_repair", "%.6f" % P_repaired_last_check_period)
+        ctx.fillSlots("P_dead_unmaintained", "%.6g" % P_dead_unmaintained)
+        ctx.fillSlots("P_dead_maintained", "%.6g" % P_dead_maintained)
+        return ctx.tag
+
+    def render_report_span(self, ctx, row):
+        (when, unmaintained_shareprobs, maintained_shareprobs,
+         P_repaired_last_check_period,
+         cumulative_number_of_repairs,
+         cumulative_number_of_new_shares,
+         P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1]
+        return ctx.tag[yandm(when)]
+
+    def render_P_loss_unmaintained(self, ctx, row):
+        (when, unmaintained_shareprobs, maintained_shareprobs,
+         P_repaired_last_check_period,
+         cumulative_number_of_repairs,
+         cumulative_number_of_new_shares,
+         P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1]
+        return ctx.tag["%.6g (%1.8f%%)" % (P_dead_unmaintained,
+                                           100*P_dead_unmaintained)]
+
+    def render_P_loss_maintained(self, ctx, row):
+        (when, unmaintained_shareprobs, maintained_shareprobs,
+         P_repaired_last_check_period,
+         cumulative_number_of_repairs,
+         cumulative_number_of_new_shares,
+         P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1]
+        return ctx.tag["%.6g (%1.8f%%)" % (P_dead_maintained,
+                                           100*P_dead_maintained)]
+
+    def render_P_repair_rate(self, ctx, row):
+        (when, unmaintained_shareprobs, maintained_shareprobs,
+         P_repaired_last_check_period,
+         cumulative_number_of_repairs,
+         cumulative_number_of_new_shares,
+         P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1]
+        freq = when / cumulative_number_of_repairs
+        return ctx.tag["%.6g" % freq]
+
+    def render_P_repair_shares(self, ctx, row):
+        (when, unmaintained_shareprobs, maintained_shareprobs,
+         P_repaired_last_check_period,
+         cumulative_number_of_repairs,
+         cumulative_number_of_new_shares,
+         P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1]
+        generated_shares = cumulative_number_of_new_shares / cumulative_number_of_repairs
+        return ctx.tag["%1.2f" % generated_shares]
+
+
diff --git a/src/allmydata/provisioning.py b/src/allmydata/provisioning.py
deleted file mode 100644 (file)
index 9d9af0e..0000000
+++ /dev/null
@@ -1,772 +0,0 @@
-
-from nevow import inevow, rend, tags as T
-import math
-from allmydata.util import mathutil
-from allmydata.web.common import getxmlfile
-
-# factorial and binomial copied from
-# http://mail.python.org/pipermail/python-list/2007-April/435718.html
-
-def factorial(n):
-    """factorial(n): return the factorial of the integer n.
-    factorial(0) = 1
-    factorial(n) with n<0 is -factorial(abs(n))
-    """
-    result = 1
-    for i in xrange(1, abs(n)+1):
-        result *= i
-    assert n >= 0
-    return result
-
-def binomial(n, k):
-    assert 0 <= k <= n
-    if k == 0 or k == n:
-        return 1
-    # calculate n!/k! as one product, avoiding factors that
-    # just get canceled
-    P = k+1
-    for i in xrange(k+2, n+1):
-        P *= i
-    # if you are paranoid:
-    # C, rem = divmod(P, factorial(n-k))
-    # assert rem == 0
-    # return C
-    return P//factorial(n-k)
-
-class ProvisioningTool(rend.Page):
-    addSlash = True
-    docFactory = getxmlfile("provisioning.xhtml")
-
-    def render_forms(self, ctx, data):
-        req = inevow.IRequest(ctx)
-
-        def getarg(name, astype=int):
-            if req.method != "POST":
-                return None
-            if name in req.fields:
-                return astype(req.fields[name].value)
-            return None
-        return self.do_forms(getarg)
-
-
-    def do_forms(self, getarg):
-        filled = getarg("filled", bool)
-
-        def get_and_set(name, options, default=None, astype=int):
-            current_value = getarg(name, astype)
-            i_select = T.select(name=name)
-            for (count, description) in options:
-                count = astype(count)
-                if ((current_value is not None and count == current_value) or
-                    (current_value is None and count == default)):
-                    o = T.option(value=str(count), selected="true")[description]
-                else:
-                    o = T.option(value=str(count))[description]
-                i_select = i_select[o]
-            if current_value is None:
-                current_value = default
-            return current_value, i_select
-
-        sections = {}
-        def add_input(section, text, entry):
-            if section not in sections:
-                sections[section] = []
-            sections[section].extend([T.div[text, ": ", entry], "\n"])
-
-        def add_output(section, entry):
-            if section not in sections:
-                sections[section] = []
-            sections[section].extend([entry, "\n"])
-
-        def build_section(section):
-            return T.fieldset[T.legend[section], sections[section]]
-
-        def number(value, suffix=""):
-            scaling = 1
-            if value < 1:
-                fmt = "%1.2g%s"
-            elif value < 100:
-                fmt = "%.1f%s"
-            elif value < 1000:
-                fmt = "%d%s"
-            elif value < 1e6:
-                fmt = "%.2fk%s"; scaling = 1e3
-            elif value < 1e9:
-                fmt = "%.2fM%s"; scaling = 1e6
-            elif value < 1e12:
-                fmt = "%.2fG%s"; scaling = 1e9
-            elif value < 1e15:
-                fmt = "%.2fT%s"; scaling = 1e12
-            elif value < 1e18:
-                fmt = "%.2fP%s"; scaling = 1e15
-            else:
-                fmt = "huge! %g%s"
-            return fmt % (value / scaling, suffix)
-
-        user_counts = [(5, "5 users"),
-                       (50, "50 users"),
-                       (200, "200 users"),
-                       (1000, "1k users"),
-                       (10000, "10k users"),
-                       (50000, "50k users"),
-                       (100000, "100k users"),
-                       (500000, "500k users"),
-                       (1000000, "1M users"),
-                       ]
-        num_users, i_num_users = get_and_set("num_users", user_counts, 50000)
-        add_input("Users",
-                  "How many users are on this network?", i_num_users)
-
-        files_per_user_counts = [(100, "100 files"),
-                                 (1000, "1k files"),
-                                 (10000, "10k files"),
-                                 (100000, "100k files"),
-                                 (1e6, "1M files"),
-                                 ]
-        files_per_user, i_files_per_user = get_and_set("files_per_user",
-                                                       files_per_user_counts,
-                                                       1000)
-        add_input("Users",
-                  "How many files for each user? (avg)",
-                  i_files_per_user)
-
-        space_per_user_sizes = [(1e6, "1MB"),
-                                (10e6, "10MB"),
-                                (100e6, "100MB"),
-                                (200e6, "200MB"),
-                                (1e9, "1GB"),
-                                (2e9, "2GB"),
-                                (5e9, "5GB"),
-                                (10e9, "10GB"),
-                                (100e9, "100GB"),
-                                (1e12, "1TB"),
-                                (2e12, "2TB"),
-                                (5e12, "5TB"),
-                                ]
-        # Estimate ~5gb per user as a more realistic case
-        space_per_user, i_space_per_user = get_and_set("space_per_user",
-                                                       space_per_user_sizes,
-                                                       5e9)
-        add_input("Users",
-                  "How much data for each user? (avg)",
-                  i_space_per_user)
-
-        sharing_ratios = [(1.0, "1.0x"),
-                          (1.1, "1.1x"),
-                          (2.0, "2.0x"),
-                          ]
-        sharing_ratio, i_sharing_ratio = get_and_set("sharing_ratio",
-                                                     sharing_ratios, 1.0,
-                                                     float)
-        add_input("Users",
-                  "What is the sharing ratio? (1.0x is no-sharing and"
-                  " no convergence)", i_sharing_ratio)
-
-        # Encoding parameters
-        encoding_choices = [("3-of-10-5", "3.3x (3-of-10, repair below 5)"),
-                            ("3-of-10-8", "3.3x (3-of-10, repair below 8)"),
-                            ("5-of-10-7", "2x (5-of-10, repair below 7)"),
-                            ("8-of-10-9", "1.25x (8-of-10, repair below 9)"),
-                            ("27-of-30-28", "1.1x (27-of-30, repair below 28"),
-                            ("25-of-100-50", "4x (25-of-100, repair below 50)"),
-                            ]
-        encoding_parameters, i_encoding_parameters = \
-                             get_and_set("encoding_parameters",
-                                         encoding_choices, "3-of-10-5", str)
-        encoding_pieces = encoding_parameters.split("-")
-        k = int(encoding_pieces[0])
-        assert encoding_pieces[1] == "of"
-        n = int(encoding_pieces[2])
-        # we repair the file when the number of available shares drops below
-        # this value
-        repair_threshold = int(encoding_pieces[3])
-
-        add_input("Servers",
-                  "What are the default encoding parameters?",
-                  i_encoding_parameters)
-
-        # Server info
-        num_server_choices = [ (5, "5 servers"),
-                               (10, "10 servers"),
-                               (15, "15 servers"),
-                               (30, "30 servers"),
-                               (50, "50 servers"),
-                               (100, "100 servers"),
-                               (200, "200 servers"),
-                               (300, "300 servers"),
-                               (500, "500 servers"),
-                               (1000, "1k servers"),
-                               (2000, "2k servers"),
-                               (5000, "5k servers"),
-                               (10e3, "10k servers"),
-                               (100e3, "100k servers"),
-                               (1e6, "1M servers"),
-                               ]
-        num_servers, i_num_servers = \
-                     get_and_set("num_servers", num_server_choices, 30, int)
-        add_input("Servers",
-                  "How many servers are there?", i_num_servers)
-
-        # availability is measured in dBA = -dBF, where 0dBF is 100% failure,
-        # 10dBF is 10% failure, 20dBF is 1% failure, etc
-        server_dBA_choices = [ (10, "90% [10dBA] (2.4hr/day)"),
-                               (13, "95% [13dBA] (1.2hr/day)"),
-                               (20, "99% [20dBA] (14min/day or 3.5days/year)"),
-                               (23, "99.5% [23dBA] (7min/day or 1.75days/year)"),
-                               (30, "99.9% [30dBA] (87sec/day or 9hours/year)"),
-                               (40, "99.99% [40dBA] (60sec/week or 53min/year)"),
-                               (50, "99.999% [50dBA] (5min per year)"),
-                               ]
-        server_dBA, i_server_availability = \
-                    get_and_set("server_availability",
-                                server_dBA_choices,
-                                20, int)
-        add_input("Servers",
-                  "What is the server availability?", i_server_availability)
-
-        drive_MTBF_choices = [ (40, "40,000 Hours"),
-                               ]
-        drive_MTBF, i_drive_MTBF = \
-                    get_and_set("drive_MTBF", drive_MTBF_choices, 40, int)
-        add_input("Drives",
-                  "What is the hard drive MTBF?", i_drive_MTBF)
-        # http://www.tgdaily.com/content/view/30990/113/
-        # http://labs.google.com/papers/disk_failures.pdf
-        # google sees:
-        #  1.7% of the drives they replaced were 0-1 years old
-        #  8% of the drives they repalced were 1-2 years old
-        #  8.6% were 2-3 years old
-        #  6% were 3-4 years old, about 8% were 4-5 years old
-
-        drive_size_choices = [ (100, "100 GB"),
-                               (250, "250 GB"),
-                               (500, "500 GB"),
-                               (750, "750 GB"),
-                               (1000, "1000 GB"),
-                               (2000, "2000 GB"),
-                               (3000, "3000 GB"),
-                               ]
-        drive_size, i_drive_size = \
-                    get_and_set("drive_size", drive_size_choices, 3000, int)
-        drive_size = drive_size * 1e9
-        add_input("Drives",
-                  "What is the capacity of each hard drive?", i_drive_size)
-        drive_failure_model_choices = [ ("E", "Exponential"),
-                                        ("U", "Uniform"),
-                                        ]
-        drive_failure_model, i_drive_failure_model = \
-                             get_and_set("drive_failure_model",
-                                         drive_failure_model_choices,
-                                         "E", str)
-        add_input("Drives",
-                  "How should we model drive failures?", i_drive_failure_model)
-
-        # drive_failure_rate is in failures per second
-        if drive_failure_model == "E":
-            drive_failure_rate = 1.0 / (drive_MTBF * 1000 * 3600)
-        else:
-            drive_failure_rate = 0.5 / (drive_MTBF * 1000 * 3600)
-
-        # deletion/gc/ownership mode
-        ownership_choices = [ ("A", "no deletion, no gc, no owners"),
-                              ("B", "deletion, no gc, no owners"),
-                              ("C", "deletion, share timers, no owners"),
-                              ("D", "deletion, no gc, yes owners"),
-                              ("E", "deletion, owner timers"),
-                              ]
-        ownership_mode, i_ownership_mode = \
-                        get_and_set("ownership_mode", ownership_choices,
-                                    "A", str)
-        add_input("Servers",
-                  "What is the ownership mode?", i_ownership_mode)
-
-        # client access behavior
-        access_rates = [ (1, "one file per day"),
-                         (10, "10 files per day"),
-                         (100, "100 files per day"),
-                         (1000, "1k files per day"),
-                         (10e3, "10k files per day"),
-                         (100e3, "100k files per day"),
-                         ]
-        download_files_per_day, i_download_rate = \
-                                get_and_set("download_rate", access_rates,
-                                            100, int)
-        add_input("Users",
-                  "How many files are downloaded per day?", i_download_rate)
-        download_rate = 1.0 * download_files_per_day / (24*60*60)
-
-        upload_files_per_day, i_upload_rate = \
-                              get_and_set("upload_rate", access_rates,
-                                          10, int)
-        add_input("Users",
-                  "How many files are uploaded per day?", i_upload_rate)
-        upload_rate = 1.0 * upload_files_per_day / (24*60*60)
-
-        delete_files_per_day, i_delete_rate = \
-                              get_and_set("delete_rate", access_rates,
-                                          10, int)
-        add_input("Users",
-                  "How many files are deleted per day?", i_delete_rate)
-        delete_rate = 1.0 * delete_files_per_day / (24*60*60)
-
-
-        # the value is in days
-        lease_timers = [ (1, "one refresh per day"),
-                         (7, "one refresh per week"),
-                         ]
-        lease_timer, i_lease = \
-                     get_and_set("lease_timer", lease_timers,
-                                 7, int)
-        add_input("Users",
-                  "How frequently do clients refresh files or accounts? "
-                  "(if necessary)",
-                  i_lease)
-        seconds_per_lease = 24*60*60*lease_timer
-
-        check_timer_choices = [ (1, "every week"),
-                                (4, "every month"),
-                                (8, "every two months"),
-                                (16, "every four months"),
-                                ]
-        check_timer, i_check_timer = \
-                     get_and_set("check_timer", check_timer_choices, 4, int)
-        add_input("Users",
-                  "How frequently should we check on each file?",
-                  i_check_timer)
-        file_check_interval = check_timer * 7 * 24 * 3600
-
-
-        if filled:
-            add_output("Users", T.div["Total users: %s" % number(num_users)])
-            add_output("Users",
-                       T.div["Files per user: %s" % number(files_per_user)])
-            file_size = 1.0 * space_per_user / files_per_user
-            add_output("Users",
-                       T.div["Average file size: ", number(file_size)])
-            total_files = num_users * files_per_user / sharing_ratio
-
-            add_output("Grid",
-                       T.div["Total number of files in grid: ",
-                             number(total_files)])
-            total_space = num_users * space_per_user / sharing_ratio
-            add_output("Grid",
-                       T.div["Total volume of plaintext in grid: ",
-                             number(total_space, "B")])
-
-            total_shares = n * total_files
-            add_output("Grid",
-                       T.div["Total shares in grid: ", number(total_shares)])
-            expansion = float(n) / float(k)
-
-            total_usage = expansion * total_space
-            add_output("Grid",
-                       T.div["Share data in grid: ", number(total_usage, "B")])
-
-            if n > num_servers:
-                # silly configuration, causes Tahoe2 to wrap and put multiple
-                # shares on some servers.
-                add_output("Servers",
-                           T.div["non-ideal: more shares than servers"
-                                 " (n=%d, servers=%d)" % (n, num_servers)])
-                # every file has at least one share on every server
-                buckets_per_server = total_files
-                shares_per_server = total_files * ((1.0 * n) / num_servers)
-            else:
-                # if nobody is full, then no lease requests will be turned
-                # down for lack of space, and no two shares for the same file
-                # will share a server. Therefore the chance that any given
-                # file has a share on any given server is n/num_servers.
-                buckets_per_server = total_files * ((1.0 * n) / num_servers)
-                # since each such represented file only puts one share on a
-                # server, the total number of shares per server is the same.
-                shares_per_server = buckets_per_server
-            add_output("Servers",
-                       T.div["Buckets per server: ",
-                             number(buckets_per_server)])
-            add_output("Servers",
-                       T.div["Shares per server: ",
-                             number(shares_per_server)])
-
-            # how much space is used on the storage servers for the shares?
-            #  the share data itself
-            share_data_per_server = total_usage / num_servers
-            add_output("Servers",
-                       T.div["Share data per server: ",
-                             number(share_data_per_server, "B")])
-            # this is determined empirically. H=hashsize=32, for a one-segment
-            # file and 3-of-10 encoding
-            share_validation_per_server = 266 * shares_per_server
-            # this could be 423*buckets_per_server, if we moved the URI
-            # extension into a separate file, but that would actually consume
-            # *more* space (minimum filesize is 4KiB), unless we moved all
-            # shares for a given bucket into a single file.
-            share_uri_extension_per_server = 423 * shares_per_server
-
-            # ownership mode adds per-bucket data
-            H = 32 # depends upon the desired security of delete/refresh caps
-            # bucket_lease_size is the amount of data needed to keep track of
-            # the delete/refresh caps for each bucket.
-            bucket_lease_size = 0
-            client_bucket_refresh_rate = 0
-            owner_table_size = 0
-            if ownership_mode in ("B", "C", "D", "E"):
-                bucket_lease_size = sharing_ratio * 1.0 * H
-            if ownership_mode in ("B", "C"):
-                # refreshes per second per client
-                client_bucket_refresh_rate = (1.0 * n * files_per_user /
-                                              seconds_per_lease)
-                add_output("Users",
-                           T.div["Client share refresh rate (outbound): ",
-                                 number(client_bucket_refresh_rate, "Hz")])
-                server_bucket_refresh_rate = (client_bucket_refresh_rate *
-                                              num_users / num_servers)
-                add_output("Servers",
-                           T.div["Server share refresh rate (inbound): ",
-                                 number(server_bucket_refresh_rate, "Hz")])
-            if ownership_mode in ("D", "E"):
-                # each server must maintain a bidirectional mapping from
-                # buckets to owners. One way to implement this would be to
-                # put a list of four-byte owner numbers into each bucket, and
-                # a list of four-byte share numbers into each owner (although
-                # of course we'd really just throw it into a database and let
-                # the experts take care of the details).
-                owner_table_size = 2*(buckets_per_server * sharing_ratio * 4)
-
-            if ownership_mode in ("E",):
-                # in this mode, clients must refresh one timer per server
-                client_account_refresh_rate = (1.0 * num_servers /
-                                               seconds_per_lease)
-                add_output("Users",
-                           T.div["Client account refresh rate (outbound): ",
-                                 number(client_account_refresh_rate, "Hz")])
-                server_account_refresh_rate = (client_account_refresh_rate *
-                                              num_users / num_servers)
-                add_output("Servers",
-                           T.div["Server account refresh rate (inbound): ",
-                                 number(server_account_refresh_rate, "Hz")])
-
-            # TODO: buckets vs shares here is a bit wonky, but in
-            # non-wrapping grids it shouldn't matter
-            share_lease_per_server = bucket_lease_size * buckets_per_server
-            share_ownertable_per_server = owner_table_size
-
-            share_space_per_server = (share_data_per_server +
-                                      share_validation_per_server +
-                                      share_uri_extension_per_server +
-                                      share_lease_per_server +
-                                      share_ownertable_per_server)
-            add_output("Servers",
-                       T.div["Share space per server: ",
-                             number(share_space_per_server, "B"),
-                             " (data ",
-                             number(share_data_per_server, "B"),
-                             ", validation ",
-                             number(share_validation_per_server, "B"),
-                             ", UEB ",
-                             number(share_uri_extension_per_server, "B"),
-                             ", lease ",
-                             number(share_lease_per_server, "B"),
-                             ", ownertable ",
-                             number(share_ownertable_per_server, "B"),
-                             ")",
-                             ])
-
-
-            # rates
-            client_download_share_rate = download_rate * k
-            client_download_byte_rate = download_rate * file_size
-            add_output("Users",
-                       T.div["download rate: shares = ",
-                             number(client_download_share_rate, "Hz"),
-                             " , bytes = ",
-                             number(client_download_byte_rate, "Bps"),
-                             ])
-            total_file_check_rate = 1.0 * total_files / file_check_interval
-            client_check_share_rate = total_file_check_rate / num_users
-            add_output("Users",
-                       T.div["file check rate: shares = ",
-                             number(client_check_share_rate, "Hz"),
-                             " (interval = %s)" %
-                             number(1 / client_check_share_rate, "s"),
-                             ])
-
-            client_upload_share_rate = upload_rate * n
-            # TODO: doesn't include overhead
-            client_upload_byte_rate = upload_rate * file_size * expansion
-            add_output("Users",
-                       T.div["upload rate: shares = ",
-                             number(client_upload_share_rate, "Hz"),
-                             " , bytes = ",
-                             number(client_upload_byte_rate, "Bps"),
-                             ])
-            client_delete_share_rate = delete_rate * n
-
-            server_inbound_share_rate = (client_upload_share_rate *
-                                         num_users / num_servers)
-            server_inbound_byte_rate = (client_upload_byte_rate *
-                                        num_users / num_servers)
-            add_output("Servers",
-                       T.div["upload rate (inbound): shares = ",
-                             number(server_inbound_share_rate, "Hz"),
-                             " , bytes = ",
-                              number(server_inbound_byte_rate, "Bps"),
-                             ])
-            add_output("Servers",
-                       T.div["share check rate (inbound): ",
-                             number(total_file_check_rate * n / num_servers,
-                                    "Hz"),
-                             ])
-
-            server_share_modify_rate = ((client_upload_share_rate +
-                                         client_delete_share_rate) *
-                                         num_users / num_servers)
-            add_output("Servers",
-                       T.div["share modify rate: shares = ",
-                             number(server_share_modify_rate, "Hz"),
-                             ])
-
-            server_outbound_share_rate = (client_download_share_rate *
-                                          num_users / num_servers)
-            server_outbound_byte_rate = (client_download_byte_rate *
-                                         num_users / num_servers)
-            add_output("Servers",
-                       T.div["download rate (outbound): shares = ",
-                             number(server_outbound_share_rate, "Hz"),
-                             " , bytes = ",
-                              number(server_outbound_byte_rate, "Bps"),
-                             ])
-
-
-            total_share_space = num_servers * share_space_per_server
-            add_output("Grid",
-                       T.div["Share space consumed: ",
-                             number(total_share_space, "B")])
-            add_output("Grid",
-                       T.div[" %% validation: %.2f%%" %
-                             (100.0 * share_validation_per_server /
-                              share_space_per_server)])
-            add_output("Grid",
-                       T.div[" %% uri-extension: %.2f%%" %
-                             (100.0 * share_uri_extension_per_server /
-                              share_space_per_server)])
-            add_output("Grid",
-                       T.div[" %% lease data: %.2f%%" %
-                             (100.0 * share_lease_per_server /
-                              share_space_per_server)])
-            add_output("Grid",
-                       T.div[" %% owner data: %.2f%%" %
-                             (100.0 * share_ownertable_per_server /
-                              share_space_per_server)])
-            add_output("Grid",
-                       T.div[" %% share data: %.2f%%" %
-                             (100.0 * share_data_per_server /
-                              share_space_per_server)])
-            add_output("Grid",
-                       T.div["file check rate: ",
-                             number(total_file_check_rate,
-                                    "Hz")])
-
-            total_drives = max(mathutil.div_ceil(int(total_share_space),
-                                                 int(drive_size)),
-                               num_servers)
-            add_output("Drives",
-                       T.div["Total drives: ", number(total_drives), " drives"])
-            drives_per_server = mathutil.div_ceil(total_drives, num_servers)
-            add_output("Servers",
-                       T.div["Drives per server: ", drives_per_server])
-
-            # costs
-            if drive_size == 3000 * 1e9:
-                add_output("Servers", T.div["3000GB drive: $250 each"])
-                drive_cost = 250
-            else:
-                add_output("Servers",
-                           T.div[T.b["unknown cost per drive, assuming $100"]])
-                drive_cost = 100
-
-            if drives_per_server <= 4:
-                add_output("Servers", T.div["1U box with <= 4 drives: $1500"])
-                server_cost = 1500 # typical 1U box
-            elif drives_per_server <= 12:
-                add_output("Servers", T.div["2U box with <= 12 drives: $2500"])
-                server_cost = 2500 # 2U box
-            else:
-                add_output("Servers",
-                           T.div[T.b["Note: too many drives per server, "
-                                     "assuming $3000"]])
-                server_cost = 3000
-
-            server_capital_cost = (server_cost + drives_per_server * drive_cost)
-            total_server_cost = float(num_servers * server_capital_cost)
-            add_output("Servers", T.div["Capital cost per server: $",
-                                        server_capital_cost])
-            add_output("Grid", T.div["Capital cost for all servers: $",
-                                     number(total_server_cost)])
-            # $70/Mbps/mo
-            # $44/server/mo power+space
-            server_bandwidth = max(server_inbound_byte_rate,
-                                   server_outbound_byte_rate)
-            server_bandwidth_mbps = mathutil.div_ceil(int(server_bandwidth*8),
-                                                      int(1e6))
-            server_monthly_cost = 70*server_bandwidth_mbps + 44
-            add_output("Servers", T.div["Monthly cost per server: $",
-                                        server_monthly_cost])
-            add_output("Users", T.div["Capital cost per user: $",
-                                      number(total_server_cost / num_users)])
-
-            # reliability
-            any_drive_failure_rate = total_drives * drive_failure_rate
-            any_drive_MTBF = 1 // any_drive_failure_rate  # in seconds
-            any_drive_MTBF_days = any_drive_MTBF / 86400
-            add_output("Drives",
-                       T.div["MTBF (any drive): ",
-                             number(any_drive_MTBF_days), " days"])
-            drive_replacement_monthly_cost = (float(drive_cost)
-                                              * any_drive_failure_rate
-                                              *30*86400)
-            add_output("Grid",
-                       T.div["Monthly cost of replacing drives: $",
-                             number(drive_replacement_monthly_cost)])
-
-            total_server_monthly_cost = float(num_servers * server_monthly_cost
-                                              + drive_replacement_monthly_cost)
-
-            add_output("Grid", T.div["Monthly cost for all servers: $",
-                                     number(total_server_monthly_cost)])
-            add_output("Users",
-                       T.div["Monthly cost per user: $",
-                             number(total_server_monthly_cost / num_users)])
-
-            # availability
-            file_dBA = self.file_availability(k, n, server_dBA)
-            user_files_dBA = self.many_files_availability(file_dBA,
-                                                          files_per_user)
-            all_files_dBA = self.many_files_availability(file_dBA, total_files)
-            add_output("Users",
-                       T.div["availability of: ",
-                             "arbitrary file = %d dBA, " % file_dBA,
-                             "all files of user1 = %d dBA, " % user_files_dBA,
-                             "all files in grid = %d dBA" % all_files_dBA,
-                             ],
-                       )
-
-            time_until_files_lost = (n-k+1) / any_drive_failure_rate
-            add_output("Grid",
-                       T.div["avg time until files are lost: ",
-                             number(time_until_files_lost, "s"), ", ",
-                             number(time_until_files_lost/86400, " days"),
-                             ])
-
-            share_data_loss_rate = any_drive_failure_rate * drive_size
-            add_output("Grid",
-                       T.div["share data loss rate: ",
-                             number(share_data_loss_rate,"Bps")])
-
-            # the worst-case survival numbers occur when we do a file check
-            # and the file is just above the threshold for repair (so we
-            # decide to not repair it). The question is then: what is the
-            # chance that the file will decay so badly before the next check
-            # that we can't recover it? The resulting probability is per
-            # check interval.
-            # Note that the chances of us getting into this situation are low.
-            P_disk_failure_during_interval = (drive_failure_rate *
-                                              file_check_interval)
-            disk_failure_dBF = 10*math.log10(P_disk_failure_during_interval)
-            disk_failure_dBA = -disk_failure_dBF
-            file_survives_dBA = self.file_availability(k, repair_threshold,
-                                                       disk_failure_dBA)
-            user_files_survives_dBA = self.many_files_availability( \
-                file_survives_dBA, files_per_user)
-            all_files_survives_dBA = self.many_files_availability( \
-                file_survives_dBA, total_files)
-            add_output("Users",
-                       T.div["survival of: ",
-                             "arbitrary file = %d dBA, " % file_survives_dBA,
-                             "all files of user1 = %d dBA, " %
-                             user_files_survives_dBA,
-                             "all files in grid = %d dBA" %
-                             all_files_survives_dBA,
-                             " (per worst-case check interval)",
-                             ])
-
-
-
-        all_sections = []
-        all_sections.append(build_section("Users"))
-        all_sections.append(build_section("Servers"))
-        all_sections.append(build_section("Drives"))
-        if "Grid" in sections:
-            all_sections.append(build_section("Grid"))
-
-        f = T.form(action=".", method="post", enctype="multipart/form-data")
-
-        if filled:
-            action = "Recompute"
-        else:
-            action = "Compute"
-
-        f = f[T.input(type="hidden", name="filled", value="true"),
-              T.input(type="submit", value=action),
-              all_sections,
-              ]
-
-        try:
-            from allmydata import reliability
-            # we import this just to test to see if the page is available
-            _hush_pyflakes = reliability
-            del _hush_pyflakes
-            f = [T.div[T.a(href="../reliability")["Reliability Math"]], f]
-        except ImportError:
-            pass
-
-        return f
-
-    def file_availability(self, k, n, server_dBA):
-        """
-        The full formula for the availability of a specific file is::
-
-         1 - sum([choose(N,i) * p**i * (1-p)**(N-i)] for i in range(k)])
-
-        Where choose(N,i) = N! / ( i! * (N-i)! ) . Note that each term of
-        this summation is the probability that there are exactly 'i' servers
-        available, and what we're doing is adding up the cases where i is too
-        low.
-
-        This is a nuisance to calculate at all accurately, especially once N
-        gets large, and when p is close to unity. So we make an engineering
-        approximation: if (1-p) is very small, then each [i] term is much
-        larger than the [i-1] term, and the sum is dominated by the i=k-1
-        term. This only works for (1-p) < 10%, and when the choose() function
-        doesn't rise fast enough to compensate. For high-expansion encodings
-        (3-of-10, 25-of-100), the choose() function is rising at the same
-        time as the (1-p)**(N-i) term, so that's not an issue. For
-        low-expansion encodings (7-of-10, 75-of-100) the two values are
-        moving in opposite directions, so more care must be taken.
-
-        Note that the p**i term has only a minor effect as long as (1-p)*N is
-        small, and even then the effect is attenuated by the 1-p term.
-        """
-
-        assert server_dBA > 9  # >=90% availability to use the approximation
-        factor = binomial(n, k-1)
-        factor_dBA = 10 * math.log10(factor)
-        exponent = n - k + 1
-        file_dBA = server_dBA * exponent - factor_dBA
-        return file_dBA
-
-    def many_files_availability(self, file_dBA, num_files):
-        """The probability that 'num_files' independent bernoulli trials will
-        succeed (i.e. we can recover all files in the grid at any given
-        moment) is p**num_files . Since p is close to unity, we express in p
-        in dBA instead, so we can get useful precision on q (=1-p), and then
-        the formula becomes::
-
-         P_some_files_unavailable = 1 - (1 - q)**num_files
-
-        That (1-q)**n expands with the usual binomial sequence, 1 - nq +
-        Xq**2 ... + Xq**n . We use the same approximation as before, since we
-        know q is close to zero, and we get to ignore all the terms past -nq.
-        """
-
-        many_files_dBA = file_dBA - 10 * math.log10(num_files)
-        return many_files_dBA
diff --git a/src/allmydata/reliability.py b/src/allmydata/reliability.py
deleted file mode 100644 (file)
index a0d6076..0000000
+++ /dev/null
@@ -1,251 +0,0 @@
-#! /usr/bin/python
-
-import math
-from allmydata.util import statistics
-from numpy import array, matrix, dot
-
-DAY=24*60*60
-MONTH=31*DAY
-YEAR=365*DAY
-
-class ReliabilityModel:
-    """Generate a model of system-wide reliability, given several input
-    parameters.
-
-    This runs a simulation in which time is quantized down to 'delta' seconds
-    (default is one month): a smaller delta will result in a more accurate
-    simulation, but will take longer to run. 'report_span' simulated seconds
-    will be run.
-
-    The encoding parameters are provided as 'k' (minimum number of shares
-    needed to recover the file) and 'N' (total number of shares generated).
-    The default parameters are 3-of-10.
-
-    The first step is to build a probability of individual drive loss during
-    any given delta. This uses a simple exponential model, in which the
-    average drive lifetime is specified by the 'drive_lifetime' parameter
-    (default is 8 years).
-
-    The second step is to calculate a 'transition matrix': a table of
-    probabilities that shows, given A shares at the start of the delta, what
-    the chances are of having B shares left at the end of the delta. The
-    current code optimistically assumes all drives are independent. A
-    subclass could override that assumption.
-
-    An additional 'repair matrix' is created to show what happens when the
-    Checker/Repairer is run. In the simulation, the Checker will be run every
-    'check_period' seconds (default is one month), and the Repairer will be
-    run if it sees fewer than 'R' shares (default 7).
-
-    The third step is to finally run the simulation. An initial probability
-    vector is created (with a 100% chance of N shares and a 0% chance of
-    fewer than N shares), then it is multiplied by the transition matrix for
-    every delta of time. Each time the Checker is to be run, the repair
-    matrix is multiplied in, and some additional stats are accumulated
-    (average number of repairs that occur, average number of shares
-    regenerated per repair).
-
-    The output is a ReliabilityReport instance, which contains a table that
-    samples the state of the simulation once each 'report_period' seconds
-    (defaults to 3 months). Each row of this table will contain the
-    probability vector for one sample period (chance of having X shares, from
-    0 to N, at the end of the period). The report will also contain other
-    information.
-
-    """
-
-    @classmethod
-    def run(klass,
-            drive_lifetime=8*YEAR,
-            k=3, R=7, N=10,
-            delta=1*MONTH,
-            check_period=1*MONTH,
-            report_period=3*MONTH,
-            report_span=5*YEAR,
-            ):
-        self = klass()
-
-        check_period = check_period-1
-        P = self.p_in_period(drive_lifetime, delta)
-
-        decay = self.build_decay_matrix(N, P)
-
-        repair = self.build_repair_matrix(k, N, R)
-
-        #print "DECAY:", decay
-        #print "OLD-POST-REPAIR:", old_post_repair
-        #print "NEW-POST-REPAIR:", decay * repair
-        #print "REPAIR:", repair
-        #print "DIFF:", (old_post_repair - decay * repair)
-
-        START = array([0]*N + [1])
-        DEAD = array([1]*k + [0]*(1+N-k))
-        REPAIRp = array([0]*k + [1]*(R-k) + [0]*(1+N-R))
-        REPAIR_newshares = array([0]*k +
-                                 [N-i for i in range(k, R)] +
-                                 [0]*(1+N-R))
-        assert REPAIR_newshares.shape[0] == N+1
-        #print "START", START
-        #print "REPAIRp", REPAIRp
-        #print "REPAIR_newshares", REPAIR_newshares
-
-        unmaintained_state = START
-        maintained_state = START
-        last_check = 0
-        last_report = 0
-        P_repaired_last_check_period = 0.0
-        needed_repairs = []
-        needed_new_shares = []
-        report = ReliabilityReport()
-
-        for t in range(0, report_span+delta, delta):
-            # the .A[0] turns the one-row matrix back into an array
-            unmaintained_state = (unmaintained_state * decay).A[0]
-            maintained_state = (maintained_state * decay).A[0]
-            if (t-last_check) > check_period:
-                last_check = t
-                # we do a check-and-repair this frequently
-                need_repair = dot(maintained_state, REPAIRp)
-
-                P_repaired_last_check_period = need_repair
-                new_shares = dot(maintained_state, REPAIR_newshares)
-                needed_repairs.append(need_repair)
-                needed_new_shares.append(new_shares)
-
-                maintained_state = (maintained_state * repair).A[0]
-
-            if (t-last_report) > report_period:
-                last_report = t
-                P_dead_unmaintained = dot(unmaintained_state, DEAD)
-                P_dead_maintained = dot(maintained_state, DEAD)
-                cumulative_number_of_repairs = sum(needed_repairs)
-                cumulative_number_of_new_shares = sum(needed_new_shares)
-                report.add_sample(t, unmaintained_state, maintained_state,
-                                  P_repaired_last_check_period,
-                                  cumulative_number_of_repairs,
-                                  cumulative_number_of_new_shares,
-                                  P_dead_unmaintained, P_dead_maintained)
-
-        # record one more sample at the end of the run
-        P_dead_unmaintained = dot(unmaintained_state, DEAD)
-        P_dead_maintained = dot(maintained_state, DEAD)
-        cumulative_number_of_repairs = sum(needed_repairs)
-        cumulative_number_of_new_shares = sum(needed_new_shares)
-        report.add_sample(t, unmaintained_state, maintained_state,
-                          P_repaired_last_check_period,
-                          cumulative_number_of_repairs,
-                          cumulative_number_of_new_shares,
-                          P_dead_unmaintained, P_dead_maintained)
-
-        #def yandm(seconds):
-        #    return "%dy.%dm" % (int(seconds/YEAR), int( (seconds%YEAR)/MONTH))
-        #needed_repairs_total = sum(needed_repairs)
-        #needed_new_shares_total = sum(needed_new_shares)
-        #print "at 2y:"
-        #print " unmaintained", unmaintained_state
-        #print " maintained", maintained_state
-        #print " number of repairs", needed_repairs_total
-        #print " new shares generated", needed_new_shares_total
-        #repair_rate_inv = report_span / needed_repairs_total
-        #print "  avg repair rate: once every %s" % yandm(repair_rate_inv)
-        #print "  avg repair download: one share every %s" % yandm(repair_rate_inv/k)
-        #print "  avg repair upload: one share every %s" % yandm(report_span / needed_new_shares_total)
-
-        return report
-
-    def p_in_period(self, avg_lifetime, period):
-        """Given an average lifetime of a disk (using an exponential model),
-        what is the chance that a live disk will survive the next 'period'
-        seconds?"""
-
-        # eg p_in_period(8*YEAR, MONTH) = 98.94%
-        return math.exp(-1.0*period/avg_lifetime)
-
-    def build_decay_matrix(self, N, P):
-        """Return a decay matrix. decay[start_shares][end_shares] is the
-        conditional probability of finishing with end_shares, given that we
-        started with start_shares."""
-        decay_rows = []
-        decay_rows.append( [0.0]*(N+1) )
-        for start_shares in range(1, (N+1)):
-            end_shares = self.build_decay_row(start_shares, P)
-            decay_row = end_shares + [0.0] * (N-start_shares)
-            assert len(decay_row) == (N+1), len(decay_row)
-            decay_rows.append(decay_row)
-
-        decay = matrix(decay_rows)
-        return decay
-
-    def build_decay_row(self, start_shares, P):
-        """Return a decay row 'end_shares'. end_shares[i] is the chance that
-        we finish with i shares, given that we started with start_shares, for
-        all i between 0 and start_shares, inclusive. This implementation
-        assumes that all shares are independent (IID), but a more complex
-        model could incorporate inter-share failure correlations like having
-        two shares on the same server."""
-        end_shares = statistics.binomial_distribution_pmf(start_shares, P)
-        return end_shares
-
-    def build_repair_matrix(self, k, N, R):
-        """Return a repair matrix. repair[start][end]: is the conditional
-        probability of the repairer finishing with 'end' shares, given that
-        it began with 'start' shares (repair if fewer than R shares). The
-        repairer's behavior is deterministic, so all values in this matrix
-        are either 0 or 1. This matrix should be applied *after* the decay
-        matrix."""
-        new_repair_rows = []
-        for start_shares in range(0, N+1):
-            new_repair_row = [0] * (N+1)
-            if start_shares < k:
-                new_repair_row[start_shares] = 1
-            elif start_shares < R:
-                new_repair_row[N] = 1
-            else:
-                new_repair_row[start_shares] = 1
-            new_repair_rows.append(new_repair_row)
-
-        repair = matrix(new_repair_rows)
-        return repair
-
-class ReliabilityReport:
-    def __init__(self):
-        self.samples = []
-
-    def add_sample(self, when, unmaintained_shareprobs, maintained_shareprobs,
-                   P_repaired_last_check_period,
-                   cumulative_number_of_repairs,
-                   cumulative_number_of_new_shares,
-                   P_dead_unmaintained, P_dead_maintained):
-        """
-        when: the timestamp at the end of the report period
-        unmaintained_shareprobs: a vector of probabilities, element[S]
-                                 is the chance that there are S shares
-                                 left at the end of the report period.
-                                 This tracks what happens if no repair
-                                 is ever done.
-        maintained_shareprobs: same, but for 'maintained' grids, where
-                               check and repair is done at the end
-                               of each check period
-        P_repaired_last_check_period: a float, with the probability
-                                      that a repair was performed
-                                      at the end of the most recent
-                                      check period.
-        cumulative_number_of_repairs: a float, with the average number
-                                      of repairs that will have been
-                                      performed by the end of the
-                                      report period
-        cumulative_number_of_new_shares: a float, with the average number
-                                         of new shares that repair proceses
-                                         generated by the end of the report
-                                         period
-        P_dead_unmaintained: a float, with the chance that the file will
-                             be unrecoverable at the end of the period
-        P_dead_maintained: same, but for maintained grids
-
-        """
-        row = (when, unmaintained_shareprobs, maintained_shareprobs,
-               P_repaired_last_check_period,
-               cumulative_number_of_repairs,
-               cumulative_number_of_new_shares,
-               P_dead_unmaintained, P_dead_maintained)
-        self.samples.append(row)
diff --git a/src/allmydata/test/test_provisioning.py b/src/allmydata/test/test_provisioning.py
deleted file mode 100644 (file)
index 71bc657..0000000
+++ /dev/null
@@ -1,113 +0,0 @@
-
-from twisted.trial import unittest
-from allmydata import provisioning
-ReliabilityModel = None
-try:
-    from allmydata.reliability import ReliabilityModel
-except ImportError:
-    pass # might not be importable, since it needs NumPy
-
-from nevow import inevow
-from zope.interface import implements
-
-class MyRequest:
-    implements(inevow.IRequest)
-    pass
-
-class Provisioning(unittest.TestCase):
-    def getarg(self, name, astype=int):
-        if name in self.fields:
-            return astype(self.fields[name])
-        return None
-
-    def test_load(self):
-        pt = provisioning.ProvisioningTool()
-        self.fields = {}
-        #r = MyRequest()
-        #r.fields = self.fields
-        #ctx = RequestContext()
-        #unfilled = pt.renderSynchronously(ctx)
-        lots_of_stan = pt.do_forms(self.getarg)
-        self.failUnless(lots_of_stan is not None)
-
-        self.fields = {'filled': True,
-                       "num_users": 50e3,
-                       "files_per_user": 1000,
-                       "space_per_user": 1e9,
-                       "sharing_ratio": 1.0,
-                       "encoding_parameters": "3-of-10-5",
-                       "num_servers": 30,
-                       "ownership_mode": "A",
-                       "download_rate": 100,
-                       "upload_rate": 10,
-                       "delete_rate": 10,
-                       "lease_timer": 7,
-                       }
-        #filled = pt.renderSynchronously(ctx)
-        more_stan = pt.do_forms(self.getarg)
-        self.failUnless(more_stan is not None)
-
-        # trigger the wraparound configuration
-        self.fields["num_servers"] = 5
-        #filled = pt.renderSynchronously(ctx)
-        more_stan = pt.do_forms(self.getarg)
-
-        # and other ownership modes
-        self.fields["ownership_mode"] = "B"
-        more_stan = pt.do_forms(self.getarg)
-        self.fields["ownership_mode"] = "E"
-        more_stan = pt.do_forms(self.getarg)
-
-    def test_provisioning_math(self):
-        self.failUnlessEqual(provisioning.binomial(10, 0), 1)
-        self.failUnlessEqual(provisioning.binomial(10, 1), 10)
-        self.failUnlessEqual(provisioning.binomial(10, 2), 45)
-        self.failUnlessEqual(provisioning.binomial(10, 9), 10)
-        self.failUnlessEqual(provisioning.binomial(10, 10), 1)
-
-DAY=24*60*60
-MONTH=31*DAY
-YEAR=365*DAY
-
-class Reliability(unittest.TestCase):
-    def test_basic(self):
-        if ReliabilityModel is None:
-            raise unittest.SkipTest("reliability model requires NumPy")
-
-        # test that numpy math works the way I think it does
-        import numpy
-        decay = numpy.matrix([[1,0,0],
-                             [.1,.9,0],
-                             [.01,.09,.9],
-                             ])
-        start = numpy.array([0,0,1])
-        g2 = (start * decay).A[0]
-        self.failUnlessEqual(repr(g2), repr(numpy.array([.01,.09,.9])))
-        g3 = (g2 * decay).A[0]
-        self.failUnlessEqual(repr(g3), repr(numpy.array([.028,.162,.81])))
-
-        # and the dot product
-        recoverable = numpy.array([0,1,1])
-        P_recoverable_g2 = numpy.dot(g2, recoverable)
-        self.failUnlessAlmostEqual(P_recoverable_g2, .9 + .09)
-        P_recoverable_g3 = numpy.dot(g3, recoverable)
-        self.failUnlessAlmostEqual(P_recoverable_g3, .81 + .162)
-
-        r = ReliabilityModel.run(delta=100000,
-                                 report_period=3*MONTH,
-                                 report_span=5*YEAR)
-        self.failUnlessEqual(len(r.samples), 20)
-
-        last_row = r.samples[-1]
-        #print last_row
-        (when, unmaintained_shareprobs, maintained_shareprobs,
-         P_repaired_last_check_period,
-         cumulative_number_of_repairs,
-         cumulative_number_of_new_shares,
-         P_dead_unmaintained, P_dead_maintained) = last_row
-        self.failUnless(isinstance(P_repaired_last_check_period, float))
-        self.failUnless(isinstance(P_dead_unmaintained, float))
-        self.failUnless(isinstance(P_dead_maintained, float))
-        self.failUnlessAlmostEqual(P_dead_unmaintained, 0.033591004555395272)
-        self.failUnlessAlmostEqual(P_dead_maintained, 3.2983995819177542e-08)
-
index d918bc15412a0cdb972a10061537d40012546f96..edafd24250f1279f464e9e420b614b98c73560a5 100644 (file)
@@ -512,90 +512,6 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi
         d.addCallback(_check)
         return d
 
-    def test_provisioning(self):
-        d = self.GET("/provisioning/")
-        def _check(res):
-            self.failUnlessIn('Provisioning Tool', res)
-            self.failUnlessIn(FAVICON_MARKUP, res)
-
-            fields = {'filled': True,
-                      "num_users": int(50e3),
-                      "files_per_user": 1000,
-                      "space_per_user": int(1e9),
-                      "sharing_ratio": 1.0,
-                      "encoding_parameters": "3-of-10-5",
-                      "num_servers": 30,
-                      "ownership_mode": "A",
-                      "download_rate": 100,
-                      "upload_rate": 10,
-                      "delete_rate": 10,
-                      "lease_timer": 7,
-                      }
-            return self.POST("/provisioning/", **fields)
-
-        d.addCallback(_check)
-        def _check2(res):
-            self.failUnlessIn('Provisioning Tool', res)
-            self.failUnlessIn(FAVICON_MARKUP, res)
-            self.failUnlessIn("Share space consumed: 167.01TB", res)
-
-            fields = {'filled': True,
-                      "num_users": int(50e6),
-                      "files_per_user": 1000,
-                      "space_per_user": int(5e9),
-                      "sharing_ratio": 1.0,
-                      "encoding_parameters": "25-of-100-50",
-                      "num_servers": 30000,
-                      "ownership_mode": "E",
-                      "drive_failure_model": "U",
-                      "drive_size": 1000,
-                      "download_rate": 1000,
-                      "upload_rate": 100,
-                      "delete_rate": 100,
-                      "lease_timer": 7,
-                      }
-            return self.POST("/provisioning/", **fields)
-        d.addCallback(_check2)
-        def _check3(res):
-            self.failUnlessIn("Share space consumed: huge!", res)
-            fields = {'filled': True}
-            return self.POST("/provisioning/", **fields)
-        d.addCallback(_check3)
-        def _check4(res):
-            self.failUnlessIn("Share space consumed:", res)
-        d.addCallback(_check4)
-        return d
-
-    def test_reliability_tool(self):
-        try:
-            from allmydata import reliability
-            _hush_pyflakes = reliability
-            del _hush_pyflakes
-        except:
-            raise unittest.SkipTest("reliability tool requires NumPy")
-
-        d = self.GET("/reliability/")
-        def _check(res):
-            self.failUnlessIn('Reliability Tool', res)
-            fields = {'drive_lifetime': "8Y",
-                      "k": "3",
-                      "R": "7",
-                      "N": "10",
-                      "delta": "100000",
-                      "check_period": "1M",
-                      "report_period": "3M",
-                      "report_span": "5Y",
-                      }
-            return self.POST("/reliability/", **fields)
-
-        d.addCallback(_check)
-        def _check2(res):
-            self.failUnlessIn('Reliability Tool', res)
-            r = r'Probability of loss \(no maintenance\):\s+<span>0.033591'
-            self.failUnless(re.search(r, res), res)
-        d.addCallback(_check2)
-        return d
-
     def test_status(self):
         h = self.s.get_history()
         dl_num = h.list_all_download_statuses()[0].get_counter()
diff --git a/src/allmydata/web/provisioning.xhtml b/src/allmydata/web/provisioning.xhtml
deleted file mode 100644 (file)
index bfa4edb..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-<html xmlns:n="http://nevow.com/ns/nevow/0.1">
-  <head>
-    <title>Tahoe-LAFS - Provisioning Tool</title>
-    <link href="/tahoe.css" rel="stylesheet" type="text/css"/>
-    <link href="/icon.png" rel="shortcut icon" />
-    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-  </head>
-  <body>
-
-<h1>Tahoe-LAFS Provisioning Tool</h1>
-
-<p>This page will help you determine how much disk space and network
-bandwidth will be required by various sizes and types of Tahoe-LAFS networks.</p>
-
-<div n:render="forms" />
-
-  </body>
-</html>
diff --git a/src/allmydata/web/reliability.py b/src/allmydata/web/reliability.py
deleted file mode 100644 (file)
index d5d3406..0000000
+++ /dev/null
@@ -1,152 +0,0 @@
-
-from nevow import rend, tags as T
-reliability = None # might not be usable
-try:
-    from allmydata import reliability # requires NumPy
-except ImportError:
-    pass
-from allmydata.web.common import getxmlfile, get_arg
-
-
-DAY=24*60*60
-MONTH=31*DAY
-YEAR=365*DAY
-
-def is_available():
-    if reliability:
-        return True
-    return False
-
-def yandm(seconds):
-    return "%dy.%dm" % (int(seconds/YEAR), int( (seconds%YEAR)/MONTH))
-
-class ReliabilityTool(rend.Page):
-    addSlash = True
-    docFactory = getxmlfile("reliability.xhtml")
-
-    DEFAULT_PARAMETERS = [
-        ("drive_lifetime", "8Y", "time",
-         "Average drive lifetime"),
-        ("k", 3, "int",
-         "Minimum number of shares needed to recover the file"),
-        ("R", 7, "int",
-         "Repair threshold: repair will not occur until fewer than R shares "
-         "are left"),
-        ("N", 10, "int",
-         "Total number of shares of the file generated"),
-        ("delta", "1M", "time", "Amount of time between each simulation step"),
-        ("check_period", "1M", "time",
-         "How often to run the checker and repair if fewer than R shares"),
-        ("report_period", "3M", "time",
-         "Amount of time between result rows in this report"),
-        ("report_span", "5Y", "time",
-         "Total amount of time covered by this report"),
-        ]
-
-    def parse_time(self, s):
-        if s.endswith("M"):
-            return int(s[:-1]) * MONTH
-        if s.endswith("Y"):
-            return int(s[:-1]) * YEAR
-        return int(s)
-
-    def format_time(self, s):
-        if s%YEAR == 0:
-            return "%dY" % (s/YEAR)
-        if s%MONTH == 0:
-            return "%dM" % (s/MONTH)
-        return "%d" % s
-
-    def get_parameters(self, ctx):
-        parameters = {}
-        for (name,default,argtype,description) in self.DEFAULT_PARAMETERS:
-            v = get_arg(ctx, name, default)
-            if argtype == "time":
-                value = self.parse_time(v)
-            else:
-                value = int(v)
-            parameters[name] = value
-        return parameters
-
-    def renderHTTP(self, ctx):
-        self.parameters = self.get_parameters(ctx)
-        self.results = reliability.ReliabilityModel.run(**self.parameters)
-        return rend.Page.renderHTTP(self, ctx)
-
-    def make_input(self, name, old_value):
-        return T.input(name=name, type="text", size="5",
-                       value=self.format_time(old_value))
-
-    def render_forms(self, ctx, data):
-        f = T.form(action=".", method="get")
-        table = []
-        for (name,default_value,argtype,description) in self.DEFAULT_PARAMETERS:
-            old_value = self.parameters[name]
-            i = self.make_input(name, old_value)
-            table.append(T.tr[T.td[name+":"], T.td[i], T.td[description]])
-        go = T.input(type="submit", value="Recompute")
-        return [T.h2["Simulation Parameters:"],
-                f[T.table[table], go],
-                ]
-
-    def data_simulation_table(self, ctx, data):
-        for row in self.results.samples:
-            yield row
-
-    def render_simulation_row(self, ctx, row):
-        (when, unmaintained_shareprobs, maintained_shareprobs,
-         P_repaired_last_check_period,
-         cumulative_number_of_repairs,
-         cumulative_number_of_new_shares,
-         P_dead_unmaintained, P_dead_maintained) = row
-        ctx.fillSlots("t", yandm(when))
-        ctx.fillSlots("P_repair", "%.6f" % P_repaired_last_check_period)
-        ctx.fillSlots("P_dead_unmaintained", "%.6g" % P_dead_unmaintained)
-        ctx.fillSlots("P_dead_maintained", "%.6g" % P_dead_maintained)
-        return ctx.tag
-
-    def render_report_span(self, ctx, row):
-        (when, unmaintained_shareprobs, maintained_shareprobs,
-         P_repaired_last_check_period,
-         cumulative_number_of_repairs,
-         cumulative_number_of_new_shares,
-         P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1]
-        return ctx.tag[yandm(when)]
-
-    def render_P_loss_unmaintained(self, ctx, row):
-        (when, unmaintained_shareprobs, maintained_shareprobs,
-         P_repaired_last_check_period,
-         cumulative_number_of_repairs,
-         cumulative_number_of_new_shares,
-         P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1]
-        return ctx.tag["%.6g (%1.8f%%)" % (P_dead_unmaintained,
-                                           100*P_dead_unmaintained)]
-
-    def render_P_loss_maintained(self, ctx, row):
-        (when, unmaintained_shareprobs, maintained_shareprobs,
-         P_repaired_last_check_period,
-         cumulative_number_of_repairs,
-         cumulative_number_of_new_shares,
-         P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1]
-        return ctx.tag["%.6g (%1.8f%%)" % (P_dead_maintained,
-                                           100*P_dead_maintained)]
-
-    def render_P_repair_rate(self, ctx, row):
-        (when, unmaintained_shareprobs, maintained_shareprobs,
-         P_repaired_last_check_period,
-         cumulative_number_of_repairs,
-         cumulative_number_of_new_shares,
-         P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1]
-        freq = when / cumulative_number_of_repairs
-        return ctx.tag["%.6g" % freq]
-
-    def render_P_repair_shares(self, ctx, row):
-        (when, unmaintained_shareprobs, maintained_shareprobs,
-         P_repaired_last_check_period,
-         cumulative_number_of_repairs,
-         cumulative_number_of_new_shares,
-         P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1]
-        generated_shares = cumulative_number_of_new_shares / cumulative_number_of_repairs
-        return ctx.tag["%1.2f" % generated_shares]
-
-
diff --git a/src/allmydata/web/reliability.xhtml b/src/allmydata/web/reliability.xhtml
deleted file mode 100644 (file)
index f8d93d1..0000000
+++ /dev/null
@@ -1,63 +0,0 @@
-<html xmlns:n="http://nevow.com/ns/nevow/0.1">
-  <head>
-    <title>Tahoe-LAFS - Reliability Tool</title>
-    <link href="/tahoe.css" rel="stylesheet" type="text/css"/>
-    <link href="/icon.png" rel="shortcut icon" />
-    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-  </head>
-  <body>
-
-<h1>Tahoe-LAFS Reliability Tool</h1>
-
-<p>Given certain assumptions, this page calculates probability of share loss
-over time, to help make informed decisions about how much redundancy and
-repair bandwidth to configure on a Tahoe-LAFS grid.</p>
-
-<div n:render="forms" />
-
-<h2>Simulation Results</h2>
-
-<p>At the end of the report span (elapsed time <span n:render="report_span"
-/>), the simulated file had the following properties:</p>
-
-<ul>
-    <li>Probability of loss (no maintenance):
-        <span n:render="P_loss_unmaintained"/></li>
-    <li>Probability of loss (with maintenance):
-        <span n:render="P_loss_maintained"/></li>
-    <li>Average repair frequency:
-        once every <span n:render="P_repair_rate"/> secs</li>
-    <li>Average shares generated per repair:
-        <span n:render="P_repair_shares"/></li>
-</ul>
-
-<p>This table shows how the following properties change over time:</p>
-<ul>
-  <li>P_repair: the chance that a repair was performed in the most recent
-  check period.</li>
-  <li>P_dead (unmaintained): the chance that the file will be unrecoverable
-  without periodic check+repair</li>
-  <li>P_dead (maintained): the chance that the file will be unrecoverable even
-  with periodic check+repair</li>
-</ul>
-
-<div>
-<table n:render="sequence" n:data="simulation_table">
-  <tr n:pattern="header">
-    <td>t</td>
-    <td>P_repair</td>
-    <td>P_dead (unmaintained)</td>
-    <td>P_dead (maintained)</td>
-  </tr>
-  <tr n:pattern="item" n:render="simulation_row">
-    <td><n:slot name="t"/></td>
-    <td><n:slot name="P_repair"/></td>
-    <td><n:slot name="P_dead_unmaintained"/></td>
-    <td><n:slot name="P_dead_maintained"/></td>
-  </tr>
-  <tr n:pattern="empty"><td>no simulation data!</td></tr>
-</table>
-</div>
-
-  </body>
-</html>
index 615f98d1d6f21b956f4877822d3b09e126825062..47793201d72ab60c61efdbc3cf108371c55124a0 100644 (file)
@@ -2,18 +2,17 @@ import time, os
 
 from twisted.internet import address
 from twisted.web import http
-from nevow import rend, url, loaders, tags as T
+from nevow import rend, url, tags as T
 from nevow.inevow import IRequest
 from nevow.static import File as nevow_File # TODO: merge with static.File?
 from nevow.util import resource_filename
 
 import allmydata # to display import path
 from allmydata import get_package_versions_string
-from allmydata import provisioning
 from allmydata.util import idlib, log
 from allmydata.interfaces import IFileNode
 from allmydata.web import filenode, directory, unlinked, status, operations
-from allmydata.web import reliability, storage
+from allmydata.web import storage
 from allmydata.web.common import abbreviate_size, getxmlfile, WebError, \
      get_arg, RenderMixin, get_format, get_mutable_type
 
@@ -126,20 +125,6 @@ class IncidentReporter(RenderMixin, rend.Page):
         req.setHeader("content-type", "text/plain")
         return "Thank you for your report!"
 
-class NoReliability(rend.Page):
-    docFactory = loaders.xmlstr('''\
-<html xmlns:n="http://nevow.com/ns/nevow/0.1">
-  <head>
-    <title>AllMyData - Tahoe</title>
-    <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
-  </head>
-  <body>
-  <h2>"Reliability" page not available</h2>
-  <p>Please install the python "NumPy" module to enable this page.</p>
-  </body>
-</html>
-''')
-
 SPACE = u"\u00A0"*2
 
 class Root(rend.Page):
@@ -175,12 +160,6 @@ class Root(rend.Page):
         # needs to created on each request
         return status.HelperStatus(self.client.helper)
 
-    child_provisioning = provisioning.ProvisioningTool()
-    if reliability.is_available():
-        child_reliability = reliability.ReliabilityTool()
-    else:
-        child_reliability = NoReliability()
-
     child_report_incident = IncidentReporter()
     #child_server # let's reserve this for storage-server-over-HTTP
 
index b5a191e4b7e98f50ea2ec19f46b2efe060678524..6bf1debab8dce4dbf35943ba6363e458e47d7c66 100644 (file)
@@ -91,9 +91,6 @@
   <div>Please visit the <a target="_blank" href="http://tahoe-lafs.org">Tahoe-LAFS home page</a> for
   code updates and bug reporting.</div>
 
-  <div>The <a href="provisioning">provisioning tool</a> and <a
-  href="reliability">reliability calculator</a> may also be useful.</div>
-
   <div n:render="incident_button" />
 </div>