misc/operations_helpers/provisioning/provisioning.py

   1
   2 from nevow import inevow, rend, loaders, tags as T
   3 import math
   4 import util
   5
   6 # factorial and binomial copied from
   7 # http://mail.python.org/pipermail/python-list/2007-April/435718.html
   8
   9 def div_ceil(n, d):
  10     """
  11     The smallest integer k such that k*d >= n.
  12     """
  13     return (n/d) + (n%d != 0)
  14
  15 def factorial(n):
  16     """factorial(n): return the factorial of the integer n.
  17     factorial(0) = 1
  18     factorial(n) with n<0 is -factorial(abs(n))
  19     """
  20     result = 1
  21     for i in xrange(1, abs(n)+1):
  22         result *= i
  23     assert n >= 0
  24     return result
  25
  26 def binomial(n, k):
  27     assert 0 <= k <= n
  28     if k == 0 or k == n:
  29         return 1
  30     # calculate n!/k! as one product, avoiding factors that
  31     # just get canceled
  32     P = k+1
  33     for i in xrange(k+2, n+1):
  34         P *= i
  35     # if you are paranoid:
  36     # C, rem = divmod(P, factorial(n-k))
  37     # assert rem == 0
  38     # return C
  39     return P//factorial(n-k)
  40
  41 class ProvisioningTool(rend.Page):
  42     addSlash = True
  43     docFactory = loaders.xmlfile(util.sibling("provisioning.xhtml"))
  44
  45     def render_forms(self, ctx, data):
  46         req = inevow.IRequest(ctx)
  47
  48         def getarg(name, astype=int):
  49             if req.method != "POST":
  50                 return None
  51             if name in req.fields:
  52                 return astype(req.fields[name].value)
  53             return None
  54         return self.do_forms(getarg)
  55
  56
  57     def do_forms(self, getarg):
  58         filled = getarg("filled", bool)
  59
  60         def get_and_set(name, options, default=None, astype=int):
  61             current_value = getarg(name, astype)
  62             i_select = T.select(name=name)
  63             for (count, description) in options:
  64                 count = astype(count)
  65                 if ((current_value is not None and count == current_value) or
  66                     (current_value is None and count == default)):
  67                     o = T.option(value=str(count), selected="true")[description]
  68                 else:
  69                     o = T.option(value=str(count))[description]
  70                 i_select = i_select[o]
  71             if current_value is None:
  72                 current_value = default
  73             return current_value, i_select
  74
  75         sections = {}
  76         def add_input(section, text, entry):
  77             if section not in sections:
  78                 sections[section] = []
  79             sections[section].extend([T.div[text, ": ", entry], "\n"])
  80
  81         def add_output(section, entry):
  82             if section not in sections:
  83                 sections[section] = []
  84             sections[section].extend([entry, "\n"])
  85
  86         def build_section(section):
  87             return T.fieldset[T.legend[section], sections[section]]
  88
  89         def number(value, suffix=""):
  90             scaling = 1
  91             if value < 1:
  92                 fmt = "%1.2g%s"
  93             elif value < 100:
  94                 fmt = "%.1f%s"
  95             elif value < 1000:
  96                 fmt = "%d%s"
  97             elif value < 1e6:
  98                 fmt = "%.2fk%s"; scaling = 1e3
  99             elif value < 1e9:
 100                 fmt = "%.2fM%s"; scaling = 1e6
 101             elif value < 1e12:
 102                 fmt = "%.2fG%s"; scaling = 1e9
 103             elif value < 1e15:
 104                 fmt = "%.2fT%s"; scaling = 1e12
 105             elif value < 1e18:
 106                 fmt = "%.2fP%s"; scaling = 1e15
 107             else:
 108                 fmt = "huge! %g%s"
 109             return fmt % (value / scaling, suffix)
 110
 111         user_counts = [(5, "5 users"),
 112                        (50, "50 users"),
 113                        (200, "200 users"),
 114                        (1000, "1k users"),
 115                        (10000, "10k users"),
 116                        (50000, "50k users"),
 117                        (100000, "100k users"),
 118                        (500000, "500k users"),
 119                        (1000000, "1M users"),
 120                        ]
 121         num_users, i_num_users = get_and_set("num_users", user_counts, 50000)
 122         add_input("Users",
 123                   "How many users are on this network?", i_num_users)
 124
 125         files_per_user_counts = [(100, "100 files"),
 126                                  (1000, "1k files"),
 127                                  (10000, "10k files"),
 128                                  (100000, "100k files"),
 129                                  (1e6, "1M files"),
 130                                  ]
 131         files_per_user, i_files_per_user = get_and_set("files_per_user",
 132                                                        files_per_user_counts,
 133                                                        1000)
 134         add_input("Users",
 135                   "How many files for each user? (avg)",
 136                   i_files_per_user)
 137
 138         space_per_user_sizes = [(1e6, "1MB"),
 139                                 (10e6, "10MB"),
 140                                 (100e6, "100MB"),
 141                                 (200e6, "200MB"),
 142                                 (1e9, "1GB"),
 143                                 (2e9, "2GB"),
 144                                 (5e9, "5GB"),
 145                                 (10e9, "10GB"),
 146                                 (100e9, "100GB"),
 147                                 (1e12, "1TB"),
 148                                 (2e12, "2TB"),
 149                                 (5e12, "5TB"),
 150                                 ]
 151         # Estimate ~5gb per user as a more realistic case
 152         space_per_user, i_space_per_user = get_and_set("space_per_user",
 153                                                        space_per_user_sizes,
 154                                                        5e9)
 155         add_input("Users",
 156                   "How much data for each user? (avg)",
 157                   i_space_per_user)
 158
 159         sharing_ratios = [(1.0, "1.0x"),
 160                           (1.1, "1.1x"),
 161                           (2.0, "2.0x"),
 162                           ]
 163         sharing_ratio, i_sharing_ratio = get_and_set("sharing_ratio",
 164                                                      sharing_ratios, 1.0,
 165                                                      float)
 166         add_input("Users",
 167                   "What is the sharing ratio? (1.0x is no-sharing and"
 168                   " no convergence)", i_sharing_ratio)
 169
 170         # Encoding parameters
 171         encoding_choices = [("3-of-10-5", "3.3x (3-of-10, repair below 5)"),
 172                             ("3-of-10-8", "3.3x (3-of-10, repair below 8)"),
 173                             ("5-of-10-7", "2x (5-of-10, repair below 7)"),
 174                             ("8-of-10-9", "1.25x (8-of-10, repair below 9)"),
 175                             ("27-of-30-28", "1.1x (27-of-30, repair below 28"),
 176                             ("25-of-100-50", "4x (25-of-100, repair below 50)"),
 177                             ]
 178         encoding_parameters, i_encoding_parameters = \
 179                              get_and_set("encoding_parameters",
 180                                          encoding_choices, "3-of-10-5", str)
 181         encoding_pieces = encoding_parameters.split("-")
 182         k = int(encoding_pieces[0])
 183         assert encoding_pieces[1] == "of"
 184         n = int(encoding_pieces[2])
 185         # we repair the file when the number of available shares drops below
 186         # this value
 187         repair_threshold = int(encoding_pieces[3])
 188
 189         add_input("Servers",
 190                   "What are the default encoding parameters?",
 191                   i_encoding_parameters)
 192
 193         # Server info
 194         num_server_choices = [ (5, "5 servers"),
 195                                (10, "10 servers"),
 196                                (15, "15 servers"),
 197                                (30, "30 servers"),
 198                                (50, "50 servers"),
 199                                (100, "100 servers"),
 200                                (200, "200 servers"),
 201                                (300, "300 servers"),
 202                                (500, "500 servers"),
 203                                (1000, "1k servers"),
 204                                (2000, "2k servers"),
 205                                (5000, "5k servers"),
 206                                (10e3, "10k servers"),
 207                                (100e3, "100k servers"),
 208                                (1e6, "1M servers"),
 209                                ]
 210         num_servers, i_num_servers = \
 211                      get_and_set("num_servers", num_server_choices, 30, int)
 212         add_input("Servers",
 213                   "How many servers are there?", i_num_servers)
 214
 215         # availability is measured in dBA = -dBF, where 0dBF is 100% failure,
 216         # 10dBF is 10% failure, 20dBF is 1% failure, etc
 217         server_dBA_choices = [ (10, "90% [10dBA] (2.4hr/day)"),
 218                                (13, "95% [13dBA] (1.2hr/day)"),
 219                                (20, "99% [20dBA] (14min/day or 3.5days/year)"),
 220                                (23, "99.5% [23dBA] (7min/day or 1.75days/year)"),
 221                                (30, "99.9% [30dBA] (87sec/day or 9hours/year)"),
 222                                (40, "99.99% [40dBA] (60sec/week or 53min/year)"),
 223                                (50, "99.999% [50dBA] (5min per year)"),
 224                                ]
 225         server_dBA, i_server_availability = \
 226                     get_and_set("server_availability",
 227                                 server_dBA_choices,
 228                                 20, int)
 229         add_input("Servers",
 230                   "What is the server availability?", i_server_availability)
 231
 232         drive_MTBF_choices = [ (40, "40,000 Hours"),
 233                                ]
 234         drive_MTBF, i_drive_MTBF = \
 235                     get_and_set("drive_MTBF", drive_MTBF_choices, 40, int)
 236         add_input("Drives",
 237                   "What is the hard drive MTBF?", i_drive_MTBF)
 238         # http://www.tgdaily.com/content/view/30990/113/
 239         # http://labs.google.com/papers/disk_failures.pdf
 240         # google sees:
 241         #  1.7% of the drives they replaced were 0-1 years old
 242         #  8% of the drives they repalced were 1-2 years old
 243         #  8.6% were 2-3 years old
 244         #  6% were 3-4 years old, about 8% were 4-5 years old
 245
 246         drive_size_choices = [ (100, "100 GB"),
 247                                (250, "250 GB"),
 248                                (500, "500 GB"),
 249                                (750, "750 GB"),
 250                                (1000, "1000 GB"),
 251                                (2000, "2000 GB"),
 252                                (3000, "3000 GB"),
 253                                ]
 254         drive_size, i_drive_size = \
 255                     get_and_set("drive_size", drive_size_choices, 3000, int)
 256         drive_size = drive_size * 1e9
 257         add_input("Drives",
 258                   "What is the capacity of each hard drive?", i_drive_size)
 259         drive_failure_model_choices = [ ("E", "Exponential"),
 260                                         ("U", "Uniform"),
 261                                         ]
 262         drive_failure_model, i_drive_failure_model = \
 263                              get_and_set("drive_failure_model",
 264                                          drive_failure_model_choices,
 265                                          "E", str)
 266         add_input("Drives",
 267                   "How should we model drive failures?", i_drive_failure_model)
 268
 269         # drive_failure_rate is in failures per second
 270         if drive_failure_model == "E":
 271             drive_failure_rate = 1.0 / (drive_MTBF * 1000 * 3600)
 272         else:
 273             drive_failure_rate = 0.5 / (drive_MTBF * 1000 * 3600)
 274
 275         # deletion/gc/ownership mode
 276         ownership_choices = [ ("A", "no deletion, no gc, no owners"),
 277                               ("B", "deletion, no gc, no owners"),
 278                               ("C", "deletion, share timers, no owners"),
 279                               ("D", "deletion, no gc, yes owners"),
 280                               ("E", "deletion, owner timers"),
 281                               ]
 282         ownership_mode, i_ownership_mode = \
 283                         get_and_set("ownership_mode", ownership_choices,
 284                                     "A", str)
 285         add_input("Servers",
 286                   "What is the ownership mode?", i_ownership_mode)
 287
 288         # client access behavior
 289         access_rates = [ (1, "one file per day"),
 290                          (10, "10 files per day"),
 291                          (100, "100 files per day"),
 292                          (1000, "1k files per day"),
 293                          (10e3, "10k files per day"),
 294                          (100e3, "100k files per day"),
 295                          ]
 296         download_files_per_day, i_download_rate = \
 297                                 get_and_set("download_rate", access_rates,
 298                                             100, int)
 299         add_input("Users",
 300                   "How many files are downloaded per day?", i_download_rate)
 301         download_rate = 1.0 * download_files_per_day / (24*60*60)
 302
 303         upload_files_per_day, i_upload_rate = \
 304                               get_and_set("upload_rate", access_rates,
 305                                           10, int)
 306         add_input("Users",
 307                   "How many files are uploaded per day?", i_upload_rate)
 308         upload_rate = 1.0 * upload_files_per_day / (24*60*60)
 309
 310         delete_files_per_day, i_delete_rate = \
 311                               get_and_set("delete_rate", access_rates,
 312                                           10, int)
 313         add_input("Users",
 314                   "How many files are deleted per day?", i_delete_rate)
 315         delete_rate = 1.0 * delete_files_per_day / (24*60*60)
 316
 317
 318         # the value is in days
 319         lease_timers = [ (1, "one refresh per day"),
 320                          (7, "one refresh per week"),
 321                          ]
 322         lease_timer, i_lease = \
 323                      get_and_set("lease_timer", lease_timers,
 324                                  7, int)
 325         add_input("Users",
 326                   "How frequently do clients refresh files or accounts? "
 327                   "(if necessary)",
 328                   i_lease)
 329         seconds_per_lease = 24*60*60*lease_timer
 330
 331         check_timer_choices = [ (1, "every week"),
 332                                 (4, "every month"),
 333                                 (8, "every two months"),
 334                                 (16, "every four months"),
 335                                 ]
 336         check_timer, i_check_timer = \
 337                      get_and_set("check_timer", check_timer_choices, 4, int)
 338         add_input("Users",
 339                   "How frequently should we check on each file?",
 340                   i_check_timer)
 341         file_check_interval = check_timer * 7 * 24 * 3600
 342
 343
 344         if filled:
 345             add_output("Users", T.div["Total users: %s" % number(num_users)])
 346             add_output("Users",
 347                        T.div["Files per user: %s" % number(files_per_user)])
 348             file_size = 1.0 * space_per_user / files_per_user
 349             add_output("Users",
 350                        T.div["Average file size: ", number(file_size)])
 351             total_files = num_users * files_per_user / sharing_ratio
 352
 353             add_output("Grid",
 354                        T.div["Total number of files in grid: ",
 355                              number(total_files)])
 356             total_space = num_users * space_per_user / sharing_ratio
 357             add_output("Grid",
 358                        T.div["Total volume of plaintext in grid: ",
 359                              number(total_space, "B")])
 360
 361             total_shares = n * total_files
 362             add_output("Grid",
 363                        T.div["Total shares in grid: ", number(total_shares)])
 364             expansion = float(n) / float(k)
 365
 366             total_usage = expansion * total_space
 367             add_output("Grid",
 368                        T.div["Share data in grid: ", number(total_usage, "B")])
 369
 370             if n > num_servers:
 371                 # silly configuration, causes Tahoe2 to wrap and put multiple
 372                 # shares on some servers.
 373                 add_output("Servers",
 374                            T.div["non-ideal: more shares than servers"
 375                                  " (n=%d, servers=%d)" % (n, num_servers)])
 376                 # every file has at least one share on every server
 377                 buckets_per_server = total_files
 378                 shares_per_server = total_files * ((1.0 * n) / num_servers)
 379             else:
 380                 # if nobody is full, then no lease requests will be turned
 381                 # down for lack of space, and no two shares for the same file
 382                 # will share a server. Therefore the chance that any given
 383                 # file has a share on any given server is n/num_servers.
 384                 buckets_per_server = total_files * ((1.0 * n) / num_servers)
 385                 # since each such represented file only puts one share on a
 386                 # server, the total number of shares per server is the same.
 387                 shares_per_server = buckets_per_server
 388             add_output("Servers",
 389                        T.div["Buckets per server: ",
 390                              number(buckets_per_server)])
 391             add_output("Servers",
 392                        T.div["Shares per server: ",
 393                              number(shares_per_server)])
 394
 395             # how much space is used on the storage servers for the shares?
 396             #  the share data itself
 397             share_data_per_server = total_usage / num_servers
 398             add_output("Servers",
 399                        T.div["Share data per server: ",
 400                              number(share_data_per_server, "B")])
 401             # this is determined empirically. H=hashsize=32, for a one-segment
 402             # file and 3-of-10 encoding
 403             share_validation_per_server = 266 * shares_per_server
 404             # this could be 423*buckets_per_server, if we moved the URI
 405             # extension into a separate file, but that would actually consume
 406             # *more* space (minimum filesize is 4KiB), unless we moved all
 407             # shares for a given bucket into a single file.
 408             share_uri_extension_per_server = 423 * shares_per_server
 409
 410             # ownership mode adds per-bucket data
 411             H = 32 # depends upon the desired security of delete/refresh caps
 412             # bucket_lease_size is the amount of data needed to keep track of
 413             # the delete/refresh caps for each bucket.
 414             bucket_lease_size = 0
 415             client_bucket_refresh_rate = 0
 416             owner_table_size = 0
 417             if ownership_mode in ("B", "C", "D", "E"):
 418                 bucket_lease_size = sharing_ratio * 1.0 * H
 419             if ownership_mode in ("B", "C"):
 420                 # refreshes per second per client
 421                 client_bucket_refresh_rate = (1.0 * n * files_per_user /
 422                                               seconds_per_lease)
 423                 add_output("Users",
 424                            T.div["Client share refresh rate (outbound): ",
 425                                  number(client_bucket_refresh_rate, "Hz")])
 426                 server_bucket_refresh_rate = (client_bucket_refresh_rate *
 427                                               num_users / num_servers)
 428                 add_output("Servers",
 429                            T.div["Server share refresh rate (inbound): ",
 430                                  number(server_bucket_refresh_rate, "Hz")])
 431             if ownership_mode in ("D", "E"):
 432                 # each server must maintain a bidirectional mapping from
 433                 # buckets to owners. One way to implement this would be to
 434                 # put a list of four-byte owner numbers into each bucket, and
 435                 # a list of four-byte share numbers into each owner (although
 436                 # of course we'd really just throw it into a database and let
 437                 # the experts take care of the details).
 438                 owner_table_size = 2*(buckets_per_server * sharing_ratio * 4)
 439
 440             if ownership_mode in ("E",):
 441                 # in this mode, clients must refresh one timer per server
 442                 client_account_refresh_rate = (1.0 * num_servers /
 443                                                seconds_per_lease)
 444                 add_output("Users",
 445                            T.div["Client account refresh rate (outbound): ",
 446                                  number(client_account_refresh_rate, "Hz")])
 447                 server_account_refresh_rate = (client_account_refresh_rate *
 448                                               num_users / num_servers)
 449                 add_output("Servers",
 450                            T.div["Server account refresh rate (inbound): ",
 451                                  number(server_account_refresh_rate, "Hz")])
 452
 453             # TODO: buckets vs shares here is a bit wonky, but in
 454             # non-wrapping grids it shouldn't matter
 455             share_lease_per_server = bucket_lease_size * buckets_per_server
 456             share_ownertable_per_server = owner_table_size
 457
 458             share_space_per_server = (share_data_per_server +
 459                                       share_validation_per_server +
 460                                       share_uri_extension_per_server +
 461                                       share_lease_per_server +
 462                                       share_ownertable_per_server)
 463             add_output("Servers",
 464                        T.div["Share space per server: ",
 465                              number(share_space_per_server, "B"),
 466                              " (data ",
 467                              number(share_data_per_server, "B"),
 468                              ", validation ",
 469                              number(share_validation_per_server, "B"),
 470                              ", UEB ",
 471                              number(share_uri_extension_per_server, "B"),
 472                              ", lease ",
 473                              number(share_lease_per_server, "B"),
 474                              ", ownertable ",
 475                              number(share_ownertable_per_server, "B"),
 476                              ")",
 477                              ])
 478
 479
 480             # rates
 481             client_download_share_rate = download_rate * k
 482             client_download_byte_rate = download_rate * file_size
 483             add_output("Users",
 484                        T.div["download rate: shares = ",
 485                              number(client_download_share_rate, "Hz"),
 486                              " , bytes = ",
 487                              number(client_download_byte_rate, "Bps"),
 488                              ])
 489             total_file_check_rate = 1.0 * total_files / file_check_interval
 490             client_check_share_rate = total_file_check_rate / num_users
 491             add_output("Users",
 492                        T.div["file check rate: shares = ",
 493                              number(client_check_share_rate, "Hz"),
 494                              " (interval = %s)" %
 495                              number(1 / client_check_share_rate, "s"),
 496                              ])
 497
 498             client_upload_share_rate = upload_rate * n
 499             # TODO: doesn't include overhead
 500             client_upload_byte_rate = upload_rate * file_size * expansion
 501             add_output("Users",
 502                        T.div["upload rate: shares = ",
 503                              number(client_upload_share_rate, "Hz"),
 504                              " , bytes = ",
 505                              number(client_upload_byte_rate, "Bps"),
 506                              ])
 507             client_delete_share_rate = delete_rate * n
 508
 509             server_inbound_share_rate = (client_upload_share_rate *
 510                                          num_users / num_servers)
 511             server_inbound_byte_rate = (client_upload_byte_rate *
 512                                         num_users / num_servers)
 513             add_output("Servers",
 514                        T.div["upload rate (inbound): shares = ",
 515                              number(server_inbound_share_rate, "Hz"),
 516                              " , bytes = ",
 517                               number(server_inbound_byte_rate, "Bps"),
 518                              ])
 519             add_output("Servers",
 520                        T.div["share check rate (inbound): ",
 521                              number(total_file_check_rate * n / num_servers,
 522                                     "Hz"),
 523                              ])
 524
 525             server_share_modify_rate = ((client_upload_share_rate +
 526                                          client_delete_share_rate) *
 527                                          num_users / num_servers)
 528             add_output("Servers",
 529                        T.div["share modify rate: shares = ",
 530                              number(server_share_modify_rate, "Hz"),
 531                              ])
 532
 533             server_outbound_share_rate = (client_download_share_rate *
 534                                           num_users / num_servers)
 535             server_outbound_byte_rate = (client_download_byte_rate *
 536                                          num_users / num_servers)
 537             add_output("Servers",
 538                        T.div["download rate (outbound): shares = ",
 539                              number(server_outbound_share_rate, "Hz"),
 540                              " , bytes = ",
 541                               number(server_outbound_byte_rate, "Bps"),
 542                              ])
 543
 544
 545             total_share_space = num_servers * share_space_per_server
 546             add_output("Grid",
 547                        T.div["Share space consumed: ",
 548                              number(total_share_space, "B")])
 549             add_output("Grid",
 550                        T.div[" %% validation: %.2f%%" %
 551                              (100.0 * share_validation_per_server /
 552                               share_space_per_server)])
 553             add_output("Grid",
 554                        T.div[" %% uri-extension: %.2f%%" %
 555                              (100.0 * share_uri_extension_per_server /
 556                               share_space_per_server)])
 557             add_output("Grid",
 558                        T.div[" %% lease data: %.2f%%" %
 559                              (100.0 * share_lease_per_server /
 560                               share_space_per_server)])
 561             add_output("Grid",
 562                        T.div[" %% owner data: %.2f%%" %
 563                              (100.0 * share_ownertable_per_server /
 564                               share_space_per_server)])
 565             add_output("Grid",
 566                        T.div[" %% share data: %.2f%%" %
 567                              (100.0 * share_data_per_server /
 568                               share_space_per_server)])
 569             add_output("Grid",
 570                        T.div["file check rate: ",
 571                              number(total_file_check_rate,
 572                                     "Hz")])
 573
 574             total_drives = max(div_ceil(int(total_share_space),
 575                                         int(drive_size)),
 576                                num_servers)
 577             add_output("Drives",
 578                        T.div["Total drives: ", number(total_drives), " drives"])
 579             drives_per_server = div_ceil(total_drives, num_servers)
 580             add_output("Servers",
 581                        T.div["Drives per server: ", drives_per_server])
 582
 583             # costs
 584             if drive_size == 3000 * 1e9:
 585                 add_output("Servers", T.div["3000GB drive: $250 each"])
 586                 drive_cost = 250
 587             else:
 588                 add_output("Servers",
 589                            T.div[T.b["unknown cost per drive, assuming $100"]])
 590                 drive_cost = 100
 591
 592             if drives_per_server <= 4:
 593                 add_output("Servers", T.div["1U box with <= 4 drives: $1500"])
 594                 server_cost = 1500 # typical 1U box
 595             elif drives_per_server <= 12:
 596                 add_output("Servers", T.div["2U box with <= 12 drives: $2500"])
 597                 server_cost = 2500 # 2U box
 598             else:
 599                 add_output("Servers",
 600                            T.div[T.b["Note: too many drives per server, "
 601                                      "assuming $3000"]])
 602                 server_cost = 3000
 603
 604             server_capital_cost = (server_cost + drives_per_server * drive_cost)
 605             total_server_cost = float(num_servers * server_capital_cost)
 606             add_output("Servers", T.div["Capital cost per server: $",
 607                                         server_capital_cost])
 608             add_output("Grid", T.div["Capital cost for all servers: $",
 609                                      number(total_server_cost)])
 610             # $70/Mbps/mo
 611             # $44/server/mo power+space
 612             server_bandwidth = max(server_inbound_byte_rate,
 613                                    server_outbound_byte_rate)
 614             server_bandwidth_mbps = div_ceil(int(server_bandwidth*8), int(1e6))
 615             server_monthly_cost = 70*server_bandwidth_mbps + 44
 616             add_output("Servers", T.div["Monthly cost per server: $",
 617                                         server_monthly_cost])
 618             add_output("Users", T.div["Capital cost per user: $",
 619                                       number(total_server_cost / num_users)])
 620
 621             # reliability
 622             any_drive_failure_rate = total_drives * drive_failure_rate
 623             any_drive_MTBF = 1 // any_drive_failure_rate  # in seconds
 624             any_drive_MTBF_days = any_drive_MTBF / 86400
 625             add_output("Drives",
 626                        T.div["MTBF (any drive): ",
 627                              number(any_drive_MTBF_days), " days"])
 628             drive_replacement_monthly_cost = (float(drive_cost)
 629                                               * any_drive_failure_rate
 630                                               *30*86400)
 631             add_output("Grid",
 632                        T.div["Monthly cost of replacing drives: $",
 633                              number(drive_replacement_monthly_cost)])
 634
 635             total_server_monthly_cost = float(num_servers * server_monthly_cost
 636                                               + drive_replacement_monthly_cost)
 637
 638             add_output("Grid", T.div["Monthly cost for all servers: $",
 639                                      number(total_server_monthly_cost)])
 640             add_output("Users",
 641                        T.div["Monthly cost per user: $",
 642                              number(total_server_monthly_cost / num_users)])
 643
 644             # availability
 645             file_dBA = self.file_availability(k, n, server_dBA)
 646             user_files_dBA = self.many_files_availability(file_dBA,
 647                                                           files_per_user)
 648             all_files_dBA = self.many_files_availability(file_dBA, total_files)
 649             add_output("Users",
 650                        T.div["availability of: ",
 651                              "arbitrary file = %d dBA, " % file_dBA,
 652                              "all files of user1 = %d dBA, " % user_files_dBA,
 653                              "all files in grid = %d dBA" % all_files_dBA,
 654                              ],
 655                        )
 656
 657             time_until_files_lost = (n-k+1) / any_drive_failure_rate
 658             add_output("Grid",
 659                        T.div["avg time until files are lost: ",
 660                              number(time_until_files_lost, "s"), ", ",
 661                              number(time_until_files_lost/86400, " days"),
 662                              ])
 663
 664             share_data_loss_rate = any_drive_failure_rate * drive_size
 665             add_output("Grid",
 666                        T.div["share data loss rate: ",
 667                              number(share_data_loss_rate,"Bps")])
 668
 669             # the worst-case survival numbers occur when we do a file check
 670             # and the file is just above the threshold for repair (so we
 671             # decide to not repair it). The question is then: what is the
 672             # chance that the file will decay so badly before the next check
 673             # that we can't recover it? The resulting probability is per
 674             # check interval.
 675             # Note that the chances of us getting into this situation are low.
 676             P_disk_failure_during_interval = (drive_failure_rate *
 677                                               file_check_interval)
 678             disk_failure_dBF = 10*math.log10(P_disk_failure_during_interval)
 679             disk_failure_dBA = -disk_failure_dBF
 680             file_survives_dBA = self.file_availability(k, repair_threshold,
 681                                                        disk_failure_dBA)
 682             user_files_survives_dBA = self.many_files_availability( \
 683                 file_survives_dBA, files_per_user)
 684             all_files_survives_dBA = self.many_files_availability( \
 685                 file_survives_dBA, total_files)
 686             add_output("Users",
 687                        T.div["survival of: ",
 688                              "arbitrary file = %d dBA, " % file_survives_dBA,
 689                              "all files of user1 = %d dBA, " %
 690                              user_files_survives_dBA,
 691                              "all files in grid = %d dBA" %
 692                              all_files_survives_dBA,
 693                              " (per worst-case check interval)",
 694                              ])
 695
 696
 697
 698         all_sections = []
 699         all_sections.append(build_section("Users"))
 700         all_sections.append(build_section("Servers"))
 701         all_sections.append(build_section("Drives"))
 702         if "Grid" in sections:
 703             all_sections.append(build_section("Grid"))
 704
 705         f = T.form(action=".", method="post", enctype="multipart/form-data")
 706
 707         if filled:
 708             action = "Recompute"
 709         else:
 710             action = "Compute"
 711
 712         f = f[T.input(type="hidden", name="filled", value="true"),
 713               T.input(type="submit", value=action),
 714               all_sections,
 715               ]
 716
 717         try:
 718             from allmydata import reliability
 719             # we import this just to test to see if the page is available
 720             _hush_pyflakes = reliability
 721             del _hush_pyflakes
 722             f = [T.div[T.a(href="../reliability")["Reliability Math"]], f]
 723         except ImportError:
 724             pass
 725
 726         return f
 727
 728     def file_availability(self, k, n, server_dBA):
 729         """
 730         The full formula for the availability of a specific file is::
 731
 732          1 - sum([choose(N,i) * p**i * (1-p)**(N-i)] for i in range(k)])
 733
 734         Where choose(N,i) = N! / ( i! * (N-i)! ) . Note that each term of
 735         this summation is the probability that there are exactly 'i' servers
 736         available, and what we're doing is adding up the cases where i is too
 737         low.
 738
 739         This is a nuisance to calculate at all accurately, especially once N
 740         gets large, and when p is close to unity. So we make an engineering
 741         approximation: if (1-p) is very small, then each [i] term is much
 742         larger than the [i-1] term, and the sum is dominated by the i=k-1
 743         term. This only works for (1-p) < 10%, and when the choose() function
 744         doesn't rise fast enough to compensate. For high-expansion encodings
 745         (3-of-10, 25-of-100), the choose() function is rising at the same
 746         time as the (1-p)**(N-i) term, so that's not an issue. For
 747         low-expansion encodings (7-of-10, 75-of-100) the two values are
 748         moving in opposite directions, so more care must be taken.
 749
 750         Note that the p**i term has only a minor effect as long as (1-p)*N is
 751         small, and even then the effect is attenuated by the 1-p term.
 752         """
 753
 754         assert server_dBA > 9  # >=90% availability to use the approximation
 755         factor = binomial(n, k-1)
 756         factor_dBA = 10 * math.log10(factor)
 757         exponent = n - k + 1
 758         file_dBA = server_dBA * exponent - factor_dBA
 759         return file_dBA
 760
 761     def many_files_availability(self, file_dBA, num_files):
 762         """The probability that 'num_files' independent bernoulli trials will
 763         succeed (i.e. we can recover all files in the grid at any given
 764         moment) is p**num_files . Since p is close to unity, we express in p
 765         in dBA instead, so we can get useful precision on q (=1-p), and then
 766         the formula becomes::
 767
 768          P_some_files_unavailable = 1 - (1 - q)**num_files
 769
 770         That (1-q)**n expands with the usual binomial sequence, 1 - nq +
 771         Xq**2 ... + Xq**n . We use the same approximation as before, since we
 772         know q is close to zero, and we get to ignore all the terms past -nq.
 773         """
 774
 775         many_files_dBA = file_dBA - 10 * math.log10(num_files)
 776         return many_files_dBA