misc/operations_helpers/provisioning/provisioning.py

   1
   2 from nevow import inevow, rend, tags as T
   3 import math
   4 from allmydata.util import mathutil
   5 from allmydata.web.common import getxmlfile
   6
   7 # factorial and binomial copied from
   8 # http://mail.python.org/pipermail/python-list/2007-April/435718.html
   9
  10 def factorial(n):
  11     """factorial(n): return the factorial of the integer n.
  12     factorial(0) = 1
  13     factorial(n) with n<0 is -factorial(abs(n))
  14     """
  15     result = 1
  16     for i in xrange(1, abs(n)+1):
  17         result *= i
  18     assert n >= 0
  19     return result
  20
  21 def binomial(n, k):
  22     assert 0 <= k <= n
  23     if k == 0 or k == n:
  24         return 1
  25     # calculate n!/k! as one product, avoiding factors that
  26     # just get canceled
  27     P = k+1
  28     for i in xrange(k+2, n+1):
  29         P *= i
  30     # if you are paranoid:
  31     # C, rem = divmod(P, factorial(n-k))
  32     # assert rem == 0
  33     # return C
  34     return P//factorial(n-k)
  35
  36 class ProvisioningTool(rend.Page):
  37     addSlash = True
  38     docFactory = getxmlfile("provisioning.xhtml")
  39
  40     def render_forms(self, ctx, data):
  41         req = inevow.IRequest(ctx)
  42
  43         def getarg(name, astype=int):
  44             if req.method != "POST":
  45                 return None
  46             if name in req.fields:
  47                 return astype(req.fields[name].value)
  48             return None
  49         return self.do_forms(getarg)
  50
  51
  52     def do_forms(self, getarg):
  53         filled = getarg("filled", bool)
  54
  55         def get_and_set(name, options, default=None, astype=int):
  56             current_value = getarg(name, astype)
  57             i_select = T.select(name=name)
  58             for (count, description) in options:
  59                 count = astype(count)
  60                 if ((current_value is not None and count == current_value) or
  61                     (current_value is None and count == default)):
  62                     o = T.option(value=str(count), selected="true")[description]
  63                 else:
  64                     o = T.option(value=str(count))[description]
  65                 i_select = i_select[o]
  66             if current_value is None:
  67                 current_value = default
  68             return current_value, i_select
  69
  70         sections = {}
  71         def add_input(section, text, entry):
  72             if section not in sections:
  73                 sections[section] = []
  74             sections[section].extend([T.div[text, ": ", entry], "\n"])
  75
  76         def add_output(section, entry):
  77             if section not in sections:
  78                 sections[section] = []
  79             sections[section].extend([entry, "\n"])
  80
  81         def build_section(section):
  82             return T.fieldset[T.legend[section], sections[section]]
  83
  84         def number(value, suffix=""):
  85             scaling = 1
  86             if value < 1:
  87                 fmt = "%1.2g%s"
  88             elif value < 100:
  89                 fmt = "%.1f%s"
  90             elif value < 1000:
  91                 fmt = "%d%s"
  92             elif value < 1e6:
  93                 fmt = "%.2fk%s"; scaling = 1e3
  94             elif value < 1e9:
  95                 fmt = "%.2fM%s"; scaling = 1e6
  96             elif value < 1e12:
  97                 fmt = "%.2fG%s"; scaling = 1e9
  98             elif value < 1e15:
  99                 fmt = "%.2fT%s"; scaling = 1e12
 100             elif value < 1e18:
 101                 fmt = "%.2fP%s"; scaling = 1e15
 102             else:
 103                 fmt = "huge! %g%s"
 104             return fmt % (value / scaling, suffix)
 105
 106         user_counts = [(5, "5 users"),
 107                        (50, "50 users"),
 108                        (200, "200 users"),
 109                        (1000, "1k users"),
 110                        (10000, "10k users"),
 111                        (50000, "50k users"),
 112                        (100000, "100k users"),
 113                        (500000, "500k users"),
 114                        (1000000, "1M users"),
 115                        ]
 116         num_users, i_num_users = get_and_set("num_users", user_counts, 50000)
 117         add_input("Users",
 118                   "How many users are on this network?", i_num_users)
 119
 120         files_per_user_counts = [(100, "100 files"),
 121                                  (1000, "1k files"),
 122                                  (10000, "10k files"),
 123                                  (100000, "100k files"),
 124                                  (1e6, "1M files"),
 125                                  ]
 126         files_per_user, i_files_per_user = get_and_set("files_per_user",
 127                                                        files_per_user_counts,
 128                                                        1000)
 129         add_input("Users",
 130                   "How many files for each user? (avg)",
 131                   i_files_per_user)
 132
 133         space_per_user_sizes = [(1e6, "1MB"),
 134                                 (10e6, "10MB"),
 135                                 (100e6, "100MB"),
 136                                 (200e6, "200MB"),
 137                                 (1e9, "1GB"),
 138                                 (2e9, "2GB"),
 139                                 (5e9, "5GB"),
 140                                 (10e9, "10GB"),
 141                                 (100e9, "100GB"),
 142                                 (1e12, "1TB"),
 143                                 (2e12, "2TB"),
 144                                 (5e12, "5TB"),
 145                                 ]
 146         # Estimate ~5gb per user as a more realistic case
 147         space_per_user, i_space_per_user = get_and_set("space_per_user",
 148                                                        space_per_user_sizes,
 149                                                        5e9)
 150         add_input("Users",
 151                   "How much data for each user? (avg)",
 152                   i_space_per_user)
 153
 154         sharing_ratios = [(1.0, "1.0x"),
 155                           (1.1, "1.1x"),
 156                           (2.0, "2.0x"),
 157                           ]
 158         sharing_ratio, i_sharing_ratio = get_and_set("sharing_ratio",
 159                                                      sharing_ratios, 1.0,
 160                                                      float)
 161         add_input("Users",
 162                   "What is the sharing ratio? (1.0x is no-sharing and"
 163                   " no convergence)", i_sharing_ratio)
 164
 165         # Encoding parameters
 166         encoding_choices = [("3-of-10-5", "3.3x (3-of-10, repair below 5)"),
 167                             ("3-of-10-8", "3.3x (3-of-10, repair below 8)"),
 168                             ("5-of-10-7", "2x (5-of-10, repair below 7)"),
 169                             ("8-of-10-9", "1.25x (8-of-10, repair below 9)"),
 170                             ("27-of-30-28", "1.1x (27-of-30, repair below 28"),
 171                             ("25-of-100-50", "4x (25-of-100, repair below 50)"),
 172                             ]
 173         encoding_parameters, i_encoding_parameters = \
 174                              get_and_set("encoding_parameters",
 175                                          encoding_choices, "3-of-10-5", str)
 176         encoding_pieces = encoding_parameters.split("-")
 177         k = int(encoding_pieces[0])
 178         assert encoding_pieces[1] == "of"
 179         n = int(encoding_pieces[2])
 180         # we repair the file when the number of available shares drops below
 181         # this value
 182         repair_threshold = int(encoding_pieces[3])
 183
 184         add_input("Servers",
 185                   "What are the default encoding parameters?",
 186                   i_encoding_parameters)
 187
 188         # Server info
 189         num_server_choices = [ (5, "5 servers"),
 190                                (10, "10 servers"),
 191                                (15, "15 servers"),
 192                                (30, "30 servers"),
 193                                (50, "50 servers"),
 194                                (100, "100 servers"),
 195                                (200, "200 servers"),
 196                                (300, "300 servers"),
 197                                (500, "500 servers"),
 198                                (1000, "1k servers"),
 199                                (2000, "2k servers"),
 200                                (5000, "5k servers"),
 201                                (10e3, "10k servers"),
 202                                (100e3, "100k servers"),
 203                                (1e6, "1M servers"),
 204                                ]
 205         num_servers, i_num_servers = \
 206                      get_and_set("num_servers", num_server_choices, 30, int)
 207         add_input("Servers",
 208                   "How many servers are there?", i_num_servers)
 209
 210         # availability is measured in dBA = -dBF, where 0dBF is 100% failure,
 211         # 10dBF is 10% failure, 20dBF is 1% failure, etc
 212         server_dBA_choices = [ (10, "90% [10dBA] (2.4hr/day)"),
 213                                (13, "95% [13dBA] (1.2hr/day)"),
 214                                (20, "99% [20dBA] (14min/day or 3.5days/year)"),
 215                                (23, "99.5% [23dBA] (7min/day or 1.75days/year)"),
 216                                (30, "99.9% [30dBA] (87sec/day or 9hours/year)"),
 217                                (40, "99.99% [40dBA] (60sec/week or 53min/year)"),
 218                                (50, "99.999% [50dBA] (5min per year)"),
 219                                ]
 220         server_dBA, i_server_availability = \
 221                     get_and_set("server_availability",
 222                                 server_dBA_choices,
 223                                 20, int)
 224         add_input("Servers",
 225                   "What is the server availability?", i_server_availability)
 226
 227         drive_MTBF_choices = [ (40, "40,000 Hours"),
 228                                ]
 229         drive_MTBF, i_drive_MTBF = \
 230                     get_and_set("drive_MTBF", drive_MTBF_choices, 40, int)
 231         add_input("Drives",
 232                   "What is the hard drive MTBF?", i_drive_MTBF)
 233         # http://www.tgdaily.com/content/view/30990/113/
 234         # http://labs.google.com/papers/disk_failures.pdf
 235         # google sees:
 236         #  1.7% of the drives they replaced were 0-1 years old
 237         #  8% of the drives they repalced were 1-2 years old
 238         #  8.6% were 2-3 years old
 239         #  6% were 3-4 years old, about 8% were 4-5 years old
 240
 241         drive_size_choices = [ (100, "100 GB"),
 242                                (250, "250 GB"),
 243                                (500, "500 GB"),
 244                                (750, "750 GB"),
 245                                (1000, "1000 GB"),
 246                                (2000, "2000 GB"),
 247                                (3000, "3000 GB"),
 248                                ]
 249         drive_size, i_drive_size = \
 250                     get_and_set("drive_size", drive_size_choices, 3000, int)
 251         drive_size = drive_size * 1e9
 252         add_input("Drives",
 253                   "What is the capacity of each hard drive?", i_drive_size)
 254         drive_failure_model_choices = [ ("E", "Exponential"),
 255                                         ("U", "Uniform"),
 256                                         ]
 257         drive_failure_model, i_drive_failure_model = \
 258                              get_and_set("drive_failure_model",
 259                                          drive_failure_model_choices,
 260                                          "E", str)
 261         add_input("Drives",
 262                   "How should we model drive failures?", i_drive_failure_model)
 263
 264         # drive_failure_rate is in failures per second
 265         if drive_failure_model == "E":
 266             drive_failure_rate = 1.0 / (drive_MTBF * 1000 * 3600)
 267         else:
 268             drive_failure_rate = 0.5 / (drive_MTBF * 1000 * 3600)
 269
 270         # deletion/gc/ownership mode
 271         ownership_choices = [ ("A", "no deletion, no gc, no owners"),
 272                               ("B", "deletion, no gc, no owners"),
 273                               ("C", "deletion, share timers, no owners"),
 274                               ("D", "deletion, no gc, yes owners"),
 275                               ("E", "deletion, owner timers"),
 276                               ]
 277         ownership_mode, i_ownership_mode = \
 278                         get_and_set("ownership_mode", ownership_choices,
 279                                     "A", str)
 280         add_input("Servers",
 281                   "What is the ownership mode?", i_ownership_mode)
 282
 283         # client access behavior
 284         access_rates = [ (1, "one file per day"),
 285                          (10, "10 files per day"),
 286                          (100, "100 files per day"),
 287                          (1000, "1k files per day"),
 288                          (10e3, "10k files per day"),
 289                          (100e3, "100k files per day"),
 290                          ]
 291         download_files_per_day, i_download_rate = \
 292                                 get_and_set("download_rate", access_rates,
 293                                             100, int)
 294         add_input("Users",
 295                   "How many files are downloaded per day?", i_download_rate)
 296         download_rate = 1.0 * download_files_per_day / (24*60*60)
 297
 298         upload_files_per_day, i_upload_rate = \
 299                               get_and_set("upload_rate", access_rates,
 300                                           10, int)
 301         add_input("Users",
 302                   "How many files are uploaded per day?", i_upload_rate)
 303         upload_rate = 1.0 * upload_files_per_day / (24*60*60)
 304
 305         delete_files_per_day, i_delete_rate = \
 306                               get_and_set("delete_rate", access_rates,
 307                                           10, int)
 308         add_input("Users",
 309                   "How many files are deleted per day?", i_delete_rate)
 310         delete_rate = 1.0 * delete_files_per_day / (24*60*60)
 311
 312
 313         # the value is in days
 314         lease_timers = [ (1, "one refresh per day"),
 315                          (7, "one refresh per week"),
 316                          ]
 317         lease_timer, i_lease = \
 318                      get_and_set("lease_timer", lease_timers,
 319                                  7, int)
 320         add_input("Users",
 321                   "How frequently do clients refresh files or accounts? "
 322                   "(if necessary)",
 323                   i_lease)
 324         seconds_per_lease = 24*60*60*lease_timer
 325
 326         check_timer_choices = [ (1, "every week"),
 327                                 (4, "every month"),
 328                                 (8, "every two months"),
 329                                 (16, "every four months"),
 330                                 ]
 331         check_timer, i_check_timer = \
 332                      get_and_set("check_timer", check_timer_choices, 4, int)
 333         add_input("Users",
 334                   "How frequently should we check on each file?",
 335                   i_check_timer)
 336         file_check_interval = check_timer * 7 * 24 * 3600
 337
 338
 339         if filled:
 340             add_output("Users", T.div["Total users: %s" % number(num_users)])
 341             add_output("Users",
 342                        T.div["Files per user: %s" % number(files_per_user)])
 343             file_size = 1.0 * space_per_user / files_per_user
 344             add_output("Users",
 345                        T.div["Average file size: ", number(file_size)])
 346             total_files = num_users * files_per_user / sharing_ratio
 347
 348             add_output("Grid",
 349                        T.div["Total number of files in grid: ",
 350                              number(total_files)])
 351             total_space = num_users * space_per_user / sharing_ratio
 352             add_output("Grid",
 353                        T.div["Total volume of plaintext in grid: ",
 354                              number(total_space, "B")])
 355
 356             total_shares = n * total_files
 357             add_output("Grid",
 358                        T.div["Total shares in grid: ", number(total_shares)])
 359             expansion = float(n) / float(k)
 360
 361             total_usage = expansion * total_space
 362             add_output("Grid",
 363                        T.div["Share data in grid: ", number(total_usage, "B")])
 364
 365             if n > num_servers:
 366                 # silly configuration, causes Tahoe2 to wrap and put multiple
 367                 # shares on some servers.
 368                 add_output("Servers",
 369                            T.div["non-ideal: more shares than servers"
 370                                  " (n=%d, servers=%d)" % (n, num_servers)])
 371                 # every file has at least one share on every server
 372                 buckets_per_server = total_files
 373                 shares_per_server = total_files * ((1.0 * n) / num_servers)
 374             else:
 375                 # if nobody is full, then no lease requests will be turned
 376                 # down for lack of space, and no two shares for the same file
 377                 # will share a server. Therefore the chance that any given
 378                 # file has a share on any given server is n/num_servers.
 379                 buckets_per_server = total_files * ((1.0 * n) / num_servers)
 380                 # since each such represented file only puts one share on a
 381                 # server, the total number of shares per server is the same.
 382                 shares_per_server = buckets_per_server
 383             add_output("Servers",
 384                        T.div["Buckets per server: ",
 385                              number(buckets_per_server)])
 386             add_output("Servers",
 387                        T.div["Shares per server: ",
 388                              number(shares_per_server)])
 389
 390             # how much space is used on the storage servers for the shares?
 391             #  the share data itself
 392             share_data_per_server = total_usage / num_servers
 393             add_output("Servers",
 394                        T.div["Share data per server: ",
 395                              number(share_data_per_server, "B")])
 396             # this is determined empirically. H=hashsize=32, for a one-segment
 397             # file and 3-of-10 encoding
 398             share_validation_per_server = 266 * shares_per_server
 399             # this could be 423*buckets_per_server, if we moved the URI
 400             # extension into a separate file, but that would actually consume
 401             # *more* space (minimum filesize is 4KiB), unless we moved all
 402             # shares for a given bucket into a single file.
 403             share_uri_extension_per_server = 423 * shares_per_server
 404
 405             # ownership mode adds per-bucket data
 406             H = 32 # depends upon the desired security of delete/refresh caps
 407             # bucket_lease_size is the amount of data needed to keep track of
 408             # the delete/refresh caps for each bucket.
 409             bucket_lease_size = 0
 410             client_bucket_refresh_rate = 0
 411             owner_table_size = 0
 412             if ownership_mode in ("B", "C", "D", "E"):
 413                 bucket_lease_size = sharing_ratio * 1.0 * H
 414             if ownership_mode in ("B", "C"):
 415                 # refreshes per second per client
 416                 client_bucket_refresh_rate = (1.0 * n * files_per_user /
 417                                               seconds_per_lease)
 418                 add_output("Users",
 419                            T.div["Client share refresh rate (outbound): ",
 420                                  number(client_bucket_refresh_rate, "Hz")])
 421                 server_bucket_refresh_rate = (client_bucket_refresh_rate *
 422                                               num_users / num_servers)
 423                 add_output("Servers",
 424                            T.div["Server share refresh rate (inbound): ",
 425                                  number(server_bucket_refresh_rate, "Hz")])
 426             if ownership_mode in ("D", "E"):
 427                 # each server must maintain a bidirectional mapping from
 428                 # buckets to owners. One way to implement this would be to
 429                 # put a list of four-byte owner numbers into each bucket, and
 430                 # a list of four-byte share numbers into each owner (although
 431                 # of course we'd really just throw it into a database and let
 432                 # the experts take care of the details).
 433                 owner_table_size = 2*(buckets_per_server * sharing_ratio * 4)
 434
 435             if ownership_mode in ("E",):
 436                 # in this mode, clients must refresh one timer per server
 437                 client_account_refresh_rate = (1.0 * num_servers /
 438                                                seconds_per_lease)
 439                 add_output("Users",
 440                            T.div["Client account refresh rate (outbound): ",
 441                                  number(client_account_refresh_rate, "Hz")])
 442                 server_account_refresh_rate = (client_account_refresh_rate *
 443                                               num_users / num_servers)
 444                 add_output("Servers",
 445                            T.div["Server account refresh rate (inbound): ",
 446                                  number(server_account_refresh_rate, "Hz")])
 447
 448             # TODO: buckets vs shares here is a bit wonky, but in
 449             # non-wrapping grids it shouldn't matter
 450             share_lease_per_server = bucket_lease_size * buckets_per_server
 451             share_ownertable_per_server = owner_table_size
 452
 453             share_space_per_server = (share_data_per_server +
 454                                       share_validation_per_server +
 455                                       share_uri_extension_per_server +
 456                                       share_lease_per_server +
 457                                       share_ownertable_per_server)
 458             add_output("Servers",
 459                        T.div["Share space per server: ",
 460                              number(share_space_per_server, "B"),
 461                              " (data ",
 462                              number(share_data_per_server, "B"),
 463                              ", validation ",
 464                              number(share_validation_per_server, "B"),
 465                              ", UEB ",
 466                              number(share_uri_extension_per_server, "B"),
 467                              ", lease ",
 468                              number(share_lease_per_server, "B"),
 469                              ", ownertable ",
 470                              number(share_ownertable_per_server, "B"),
 471                              ")",
 472                              ])
 473
 474
 475             # rates
 476             client_download_share_rate = download_rate * k
 477             client_download_byte_rate = download_rate * file_size
 478             add_output("Users",
 479                        T.div["download rate: shares = ",
 480                              number(client_download_share_rate, "Hz"),
 481                              " , bytes = ",
 482                              number(client_download_byte_rate, "Bps"),
 483                              ])
 484             total_file_check_rate = 1.0 * total_files / file_check_interval
 485             client_check_share_rate = total_file_check_rate / num_users
 486             add_output("Users",
 487                        T.div["file check rate: shares = ",
 488                              number(client_check_share_rate, "Hz"),
 489                              " (interval = %s)" %
 490                              number(1 / client_check_share_rate, "s"),
 491                              ])
 492
 493             client_upload_share_rate = upload_rate * n
 494             # TODO: doesn't include overhead
 495             client_upload_byte_rate = upload_rate * file_size * expansion
 496             add_output("Users",
 497                        T.div["upload rate: shares = ",
 498                              number(client_upload_share_rate, "Hz"),
 499                              " , bytes = ",
 500                              number(client_upload_byte_rate, "Bps"),
 501                              ])
 502             client_delete_share_rate = delete_rate * n
 503
 504             server_inbound_share_rate = (client_upload_share_rate *
 505                                          num_users / num_servers)
 506             server_inbound_byte_rate = (client_upload_byte_rate *
 507                                         num_users / num_servers)
 508             add_output("Servers",
 509                        T.div["upload rate (inbound): shares = ",
 510                              number(server_inbound_share_rate, "Hz"),
 511                              " , bytes = ",
 512                               number(server_inbound_byte_rate, "Bps"),
 513                              ])
 514             add_output("Servers",
 515                        T.div["share check rate (inbound): ",
 516                              number(total_file_check_rate * n / num_servers,
 517                                     "Hz"),
 518                              ])
 519
 520             server_share_modify_rate = ((client_upload_share_rate +
 521                                          client_delete_share_rate) *
 522                                          num_users / num_servers)
 523             add_output("Servers",
 524                        T.div["share modify rate: shares = ",
 525                              number(server_share_modify_rate, "Hz"),
 526                              ])
 527
 528             server_outbound_share_rate = (client_download_share_rate *
 529                                           num_users / num_servers)
 530             server_outbound_byte_rate = (client_download_byte_rate *
 531                                          num_users / num_servers)
 532             add_output("Servers",
 533                        T.div["download rate (outbound): shares = ",
 534                              number(server_outbound_share_rate, "Hz"),
 535                              " , bytes = ",
 536                               number(server_outbound_byte_rate, "Bps"),
 537                              ])
 538
 539
 540             total_share_space = num_servers * share_space_per_server
 541             add_output("Grid",
 542                        T.div["Share space consumed: ",
 543                              number(total_share_space, "B")])
 544             add_output("Grid",
 545                        T.div[" %% validation: %.2f%%" %
 546                              (100.0 * share_validation_per_server /
 547                               share_space_per_server)])
 548             add_output("Grid",
 549                        T.div[" %% uri-extension: %.2f%%" %
 550                              (100.0 * share_uri_extension_per_server /
 551                               share_space_per_server)])
 552             add_output("Grid",
 553                        T.div[" %% lease data: %.2f%%" %
 554                              (100.0 * share_lease_per_server /
 555                               share_space_per_server)])
 556             add_output("Grid",
 557                        T.div[" %% owner data: %.2f%%" %
 558                              (100.0 * share_ownertable_per_server /
 559                               share_space_per_server)])
 560             add_output("Grid",
 561                        T.div[" %% share data: %.2f%%" %
 562                              (100.0 * share_data_per_server /
 563                               share_space_per_server)])
 564             add_output("Grid",
 565                        T.div["file check rate: ",
 566                              number(total_file_check_rate,
 567                                     "Hz")])
 568
 569             total_drives = max(mathutil.div_ceil(int(total_share_space),
 570                                                  int(drive_size)),
 571                                num_servers)
 572             add_output("Drives",
 573                        T.div["Total drives: ", number(total_drives), " drives"])
 574             drives_per_server = mathutil.div_ceil(total_drives, num_servers)
 575             add_output("Servers",
 576                        T.div["Drives per server: ", drives_per_server])
 577
 578             # costs
 579             if drive_size == 3000 * 1e9:
 580                 add_output("Servers", T.div["3000GB drive: $250 each"])
 581                 drive_cost = 250
 582             else:
 583                 add_output("Servers",
 584                            T.div[T.b["unknown cost per drive, assuming $100"]])
 585                 drive_cost = 100
 586
 587             if drives_per_server <= 4:
 588                 add_output("Servers", T.div["1U box with <= 4 drives: $1500"])
 589                 server_cost = 1500 # typical 1U box
 590             elif drives_per_server <= 12:
 591                 add_output("Servers", T.div["2U box with <= 12 drives: $2500"])
 592                 server_cost = 2500 # 2U box
 593             else:
 594                 add_output("Servers",
 595                            T.div[T.b["Note: too many drives per server, "
 596                                      "assuming $3000"]])
 597                 server_cost = 3000
 598
 599             server_capital_cost = (server_cost + drives_per_server * drive_cost)
 600             total_server_cost = float(num_servers * server_capital_cost)
 601             add_output("Servers", T.div["Capital cost per server: $",
 602                                         server_capital_cost])
 603             add_output("Grid", T.div["Capital cost for all servers: $",
 604                                      number(total_server_cost)])
 605             # $70/Mbps/mo
 606             # $44/server/mo power+space
 607             server_bandwidth = max(server_inbound_byte_rate,
 608                                    server_outbound_byte_rate)
 609             server_bandwidth_mbps = mathutil.div_ceil(int(server_bandwidth*8),
 610                                                       int(1e6))
 611             server_monthly_cost = 70*server_bandwidth_mbps + 44
 612             add_output("Servers", T.div["Monthly cost per server: $",
 613                                         server_monthly_cost])
 614             add_output("Users", T.div["Capital cost per user: $",
 615                                       number(total_server_cost / num_users)])
 616
 617             # reliability
 618             any_drive_failure_rate = total_drives * drive_failure_rate
 619             any_drive_MTBF = 1 // any_drive_failure_rate  # in seconds
 620             any_drive_MTBF_days = any_drive_MTBF / 86400
 621             add_output("Drives",
 622                        T.div["MTBF (any drive): ",
 623                              number(any_drive_MTBF_days), " days"])
 624             drive_replacement_monthly_cost = (float(drive_cost)
 625                                               * any_drive_failure_rate
 626                                               *30*86400)
 627             add_output("Grid",
 628                        T.div["Monthly cost of replacing drives: $",
 629                              number(drive_replacement_monthly_cost)])
 630
 631             total_server_monthly_cost = float(num_servers * server_monthly_cost
 632                                               + drive_replacement_monthly_cost)
 633
 634             add_output("Grid", T.div["Monthly cost for all servers: $",
 635                                      number(total_server_monthly_cost)])
 636             add_output("Users",
 637                        T.div["Monthly cost per user: $",
 638                              number(total_server_monthly_cost / num_users)])
 639
 640             # availability
 641             file_dBA = self.file_availability(k, n, server_dBA)
 642             user_files_dBA = self.many_files_availability(file_dBA,
 643                                                           files_per_user)
 644             all_files_dBA = self.many_files_availability(file_dBA, total_files)
 645             add_output("Users",
 646                        T.div["availability of: ",
 647                              "arbitrary file = %d dBA, " % file_dBA,
 648                              "all files of user1 = %d dBA, " % user_files_dBA,
 649                              "all files in grid = %d dBA" % all_files_dBA,
 650                              ],
 651                        )
 652
 653             time_until_files_lost = (n-k+1) / any_drive_failure_rate
 654             add_output("Grid",
 655                        T.div["avg time until files are lost: ",
 656                              number(time_until_files_lost, "s"), ", ",
 657                              number(time_until_files_lost/86400, " days"),
 658                              ])
 659
 660             share_data_loss_rate = any_drive_failure_rate * drive_size
 661             add_output("Grid",
 662                        T.div["share data loss rate: ",
 663                              number(share_data_loss_rate,"Bps")])
 664
 665             # the worst-case survival numbers occur when we do a file check
 666             # and the file is just above the threshold for repair (so we
 667             # decide to not repair it). The question is then: what is the
 668             # chance that the file will decay so badly before the next check
 669             # that we can't recover it? The resulting probability is per
 670             # check interval.
 671             # Note that the chances of us getting into this situation are low.
 672             P_disk_failure_during_interval = (drive_failure_rate *
 673                                               file_check_interval)
 674             disk_failure_dBF = 10*math.log10(P_disk_failure_during_interval)
 675             disk_failure_dBA = -disk_failure_dBF
 676             file_survives_dBA = self.file_availability(k, repair_threshold,
 677                                                        disk_failure_dBA)
 678             user_files_survives_dBA = self.many_files_availability( \
 679                 file_survives_dBA, files_per_user)
 680             all_files_survives_dBA = self.many_files_availability( \
 681                 file_survives_dBA, total_files)
 682             add_output("Users",
 683                        T.div["survival of: ",
 684                              "arbitrary file = %d dBA, " % file_survives_dBA,
 685                              "all files of user1 = %d dBA, " %
 686                              user_files_survives_dBA,
 687                              "all files in grid = %d dBA" %
 688                              all_files_survives_dBA,
 689                              " (per worst-case check interval)",
 690                              ])
 691
 692
 693
 694         all_sections = []
 695         all_sections.append(build_section("Users"))
 696         all_sections.append(build_section("Servers"))
 697         all_sections.append(build_section("Drives"))
 698         if "Grid" in sections:
 699             all_sections.append(build_section("Grid"))
 700
 701         f = T.form(action=".", method="post", enctype="multipart/form-data")
 702
 703         if filled:
 704             action = "Recompute"
 705         else:
 706             action = "Compute"
 707
 708         f = f[T.input(type="hidden", name="filled", value="true"),
 709               T.input(type="submit", value=action),
 710               all_sections,
 711               ]
 712
 713         try:
 714             from allmydata import reliability
 715             # we import this just to test to see if the page is available
 716             _hush_pyflakes = reliability
 717             del _hush_pyflakes
 718             f = [T.div[T.a(href="../reliability")["Reliability Math"]], f]
 719         except ImportError:
 720             pass
 721
 722         return f
 723
 724     def file_availability(self, k, n, server_dBA):
 725         """
 726         The full formula for the availability of a specific file is::
 727
 728          1 - sum([choose(N,i) * p**i * (1-p)**(N-i)] for i in range(k)])
 729
 730         Where choose(N,i) = N! / ( i! * (N-i)! ) . Note that each term of
 731         this summation is the probability that there are exactly 'i' servers
 732         available, and what we're doing is adding up the cases where i is too
 733         low.
 734
 735         This is a nuisance to calculate at all accurately, especially once N
 736         gets large, and when p is close to unity. So we make an engineering
 737         approximation: if (1-p) is very small, then each [i] term is much
 738         larger than the [i-1] term, and the sum is dominated by the i=k-1
 739         term. This only works for (1-p) < 10%, and when the choose() function
 740         doesn't rise fast enough to compensate. For high-expansion encodings
 741         (3-of-10, 25-of-100), the choose() function is rising at the same
 742         time as the (1-p)**(N-i) term, so that's not an issue. For
 743         low-expansion encodings (7-of-10, 75-of-100) the two values are
 744         moving in opposite directions, so more care must be taken.
 745
 746         Note that the p**i term has only a minor effect as long as (1-p)*N is
 747         small, and even then the effect is attenuated by the 1-p term.
 748         """
 749
 750         assert server_dBA > 9  # >=90% availability to use the approximation
 751         factor = binomial(n, k-1)
 752         factor_dBA = 10 * math.log10(factor)
 753         exponent = n - k + 1
 754         file_dBA = server_dBA * exponent - factor_dBA
 755         return file_dBA
 756
 757     def many_files_availability(self, file_dBA, num_files):
 758         """The probability that 'num_files' independent bernoulli trials will
 759         succeed (i.e. we can recover all files in the grid at any given
 760         moment) is p**num_files . Since p is close to unity, we express in p
 761         in dBA instead, so we can get useful precision on q (=1-p), and then
 762         the formula becomes::
 763
 764          P_some_files_unavailable = 1 - (1 - q)**num_files
 765
 766         That (1-q)**n expands with the usual binomial sequence, 1 - nq +
 767         Xq**2 ... + Xq**n . We use the same approximation as before, since we
 768         know q is close to zero, and we get to ignore all the terms past -nq.
 769         """
 770
 771         many_files_dBA = file_dBA - 10 * math.log10(num_files)
 772         return many_files_dBA