]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - misc/operations_helpers/provisioning/provisioning.py
9d9af0ea6e92a7998debe24e6c1d6c1414589050
[tahoe-lafs/tahoe-lafs.git] / misc / operations_helpers / provisioning / provisioning.py
1
2 from nevow import inevow, rend, tags as T
3 import math
4 from allmydata.util import mathutil
5 from allmydata.web.common import getxmlfile
6
7 # factorial and binomial copied from
8 # http://mail.python.org/pipermail/python-list/2007-April/435718.html
9
10 def factorial(n):
11     """factorial(n): return the factorial of the integer n.
12     factorial(0) = 1
13     factorial(n) with n<0 is -factorial(abs(n))
14     """
15     result = 1
16     for i in xrange(1, abs(n)+1):
17         result *= i
18     assert n >= 0
19     return result
20
21 def binomial(n, k):
22     assert 0 <= k <= n
23     if k == 0 or k == n:
24         return 1
25     # calculate n!/k! as one product, avoiding factors that
26     # just get canceled
27     P = k+1
28     for i in xrange(k+2, n+1):
29         P *= i
30     # if you are paranoid:
31     # C, rem = divmod(P, factorial(n-k))
32     # assert rem == 0
33     # return C
34     return P//factorial(n-k)
35
36 class ProvisioningTool(rend.Page):
37     addSlash = True
38     docFactory = getxmlfile("provisioning.xhtml")
39
40     def render_forms(self, ctx, data):
41         req = inevow.IRequest(ctx)
42
43         def getarg(name, astype=int):
44             if req.method != "POST":
45                 return None
46             if name in req.fields:
47                 return astype(req.fields[name].value)
48             return None
49         return self.do_forms(getarg)
50
51
52     def do_forms(self, getarg):
53         filled = getarg("filled", bool)
54
55         def get_and_set(name, options, default=None, astype=int):
56             current_value = getarg(name, astype)
57             i_select = T.select(name=name)
58             for (count, description) in options:
59                 count = astype(count)
60                 if ((current_value is not None and count == current_value) or
61                     (current_value is None and count == default)):
62                     o = T.option(value=str(count), selected="true")[description]
63                 else:
64                     o = T.option(value=str(count))[description]
65                 i_select = i_select[o]
66             if current_value is None:
67                 current_value = default
68             return current_value, i_select
69
70         sections = {}
71         def add_input(section, text, entry):
72             if section not in sections:
73                 sections[section] = []
74             sections[section].extend([T.div[text, ": ", entry], "\n"])
75
76         def add_output(section, entry):
77             if section not in sections:
78                 sections[section] = []
79             sections[section].extend([entry, "\n"])
80
81         def build_section(section):
82             return T.fieldset[T.legend[section], sections[section]]
83
84         def number(value, suffix=""):
85             scaling = 1
86             if value < 1:
87                 fmt = "%1.2g%s"
88             elif value < 100:
89                 fmt = "%.1f%s"
90             elif value < 1000:
91                 fmt = "%d%s"
92             elif value < 1e6:
93                 fmt = "%.2fk%s"; scaling = 1e3
94             elif value < 1e9:
95                 fmt = "%.2fM%s"; scaling = 1e6
96             elif value < 1e12:
97                 fmt = "%.2fG%s"; scaling = 1e9
98             elif value < 1e15:
99                 fmt = "%.2fT%s"; scaling = 1e12
100             elif value < 1e18:
101                 fmt = "%.2fP%s"; scaling = 1e15
102             else:
103                 fmt = "huge! %g%s"
104             return fmt % (value / scaling, suffix)
105
106         user_counts = [(5, "5 users"),
107                        (50, "50 users"),
108                        (200, "200 users"),
109                        (1000, "1k users"),
110                        (10000, "10k users"),
111                        (50000, "50k users"),
112                        (100000, "100k users"),
113                        (500000, "500k users"),
114                        (1000000, "1M users"),
115                        ]
116         num_users, i_num_users = get_and_set("num_users", user_counts, 50000)
117         add_input("Users",
118                   "How many users are on this network?", i_num_users)
119
120         files_per_user_counts = [(100, "100 files"),
121                                  (1000, "1k files"),
122                                  (10000, "10k files"),
123                                  (100000, "100k files"),
124                                  (1e6, "1M files"),
125                                  ]
126         files_per_user, i_files_per_user = get_and_set("files_per_user",
127                                                        files_per_user_counts,
128                                                        1000)
129         add_input("Users",
130                   "How many files for each user? (avg)",
131                   i_files_per_user)
132
133         space_per_user_sizes = [(1e6, "1MB"),
134                                 (10e6, "10MB"),
135                                 (100e6, "100MB"),
136                                 (200e6, "200MB"),
137                                 (1e9, "1GB"),
138                                 (2e9, "2GB"),
139                                 (5e9, "5GB"),
140                                 (10e9, "10GB"),
141                                 (100e9, "100GB"),
142                                 (1e12, "1TB"),
143                                 (2e12, "2TB"),
144                                 (5e12, "5TB"),
145                                 ]
146         # Estimate ~5gb per user as a more realistic case
147         space_per_user, i_space_per_user = get_and_set("space_per_user",
148                                                        space_per_user_sizes,
149                                                        5e9)
150         add_input("Users",
151                   "How much data for each user? (avg)",
152                   i_space_per_user)
153
154         sharing_ratios = [(1.0, "1.0x"),
155                           (1.1, "1.1x"),
156                           (2.0, "2.0x"),
157                           ]
158         sharing_ratio, i_sharing_ratio = get_and_set("sharing_ratio",
159                                                      sharing_ratios, 1.0,
160                                                      float)
161         add_input("Users",
162                   "What is the sharing ratio? (1.0x is no-sharing and"
163                   " no convergence)", i_sharing_ratio)
164
165         # Encoding parameters
166         encoding_choices = [("3-of-10-5", "3.3x (3-of-10, repair below 5)"),
167                             ("3-of-10-8", "3.3x (3-of-10, repair below 8)"),
168                             ("5-of-10-7", "2x (5-of-10, repair below 7)"),
169                             ("8-of-10-9", "1.25x (8-of-10, repair below 9)"),
170                             ("27-of-30-28", "1.1x (27-of-30, repair below 28"),
171                             ("25-of-100-50", "4x (25-of-100, repair below 50)"),
172                             ]
173         encoding_parameters, i_encoding_parameters = \
174                              get_and_set("encoding_parameters",
175                                          encoding_choices, "3-of-10-5", str)
176         encoding_pieces = encoding_parameters.split("-")
177         k = int(encoding_pieces[0])
178         assert encoding_pieces[1] == "of"
179         n = int(encoding_pieces[2])
180         # we repair the file when the number of available shares drops below
181         # this value
182         repair_threshold = int(encoding_pieces[3])
183
184         add_input("Servers",
185                   "What are the default encoding parameters?",
186                   i_encoding_parameters)
187
188         # Server info
189         num_server_choices = [ (5, "5 servers"),
190                                (10, "10 servers"),
191                                (15, "15 servers"),
192                                (30, "30 servers"),
193                                (50, "50 servers"),
194                                (100, "100 servers"),
195                                (200, "200 servers"),
196                                (300, "300 servers"),
197                                (500, "500 servers"),
198                                (1000, "1k servers"),
199                                (2000, "2k servers"),
200                                (5000, "5k servers"),
201                                (10e3, "10k servers"),
202                                (100e3, "100k servers"),
203                                (1e6, "1M servers"),
204                                ]
205         num_servers, i_num_servers = \
206                      get_and_set("num_servers", num_server_choices, 30, int)
207         add_input("Servers",
208                   "How many servers are there?", i_num_servers)
209
210         # availability is measured in dBA = -dBF, where 0dBF is 100% failure,
211         # 10dBF is 10% failure, 20dBF is 1% failure, etc
212         server_dBA_choices = [ (10, "90% [10dBA] (2.4hr/day)"),
213                                (13, "95% [13dBA] (1.2hr/day)"),
214                                (20, "99% [20dBA] (14min/day or 3.5days/year)"),
215                                (23, "99.5% [23dBA] (7min/day or 1.75days/year)"),
216                                (30, "99.9% [30dBA] (87sec/day or 9hours/year)"),
217                                (40, "99.99% [40dBA] (60sec/week or 53min/year)"),
218                                (50, "99.999% [50dBA] (5min per year)"),
219                                ]
220         server_dBA, i_server_availability = \
221                     get_and_set("server_availability",
222                                 server_dBA_choices,
223                                 20, int)
224         add_input("Servers",
225                   "What is the server availability?", i_server_availability)
226
227         drive_MTBF_choices = [ (40, "40,000 Hours"),
228                                ]
229         drive_MTBF, i_drive_MTBF = \
230                     get_and_set("drive_MTBF", drive_MTBF_choices, 40, int)
231         add_input("Drives",
232                   "What is the hard drive MTBF?", i_drive_MTBF)
233         # http://www.tgdaily.com/content/view/30990/113/
234         # http://labs.google.com/papers/disk_failures.pdf
235         # google sees:
236         #  1.7% of the drives they replaced were 0-1 years old
237         #  8% of the drives they repalced were 1-2 years old
238         #  8.6% were 2-3 years old
239         #  6% were 3-4 years old, about 8% were 4-5 years old
240
241         drive_size_choices = [ (100, "100 GB"),
242                                (250, "250 GB"),
243                                (500, "500 GB"),
244                                (750, "750 GB"),
245                                (1000, "1000 GB"),
246                                (2000, "2000 GB"),
247                                (3000, "3000 GB"),
248                                ]
249         drive_size, i_drive_size = \
250                     get_and_set("drive_size", drive_size_choices, 3000, int)
251         drive_size = drive_size * 1e9
252         add_input("Drives",
253                   "What is the capacity of each hard drive?", i_drive_size)
254         drive_failure_model_choices = [ ("E", "Exponential"),
255                                         ("U", "Uniform"),
256                                         ]
257         drive_failure_model, i_drive_failure_model = \
258                              get_and_set("drive_failure_model",
259                                          drive_failure_model_choices,
260                                          "E", str)
261         add_input("Drives",
262                   "How should we model drive failures?", i_drive_failure_model)
263
264         # drive_failure_rate is in failures per second
265         if drive_failure_model == "E":
266             drive_failure_rate = 1.0 / (drive_MTBF * 1000 * 3600)
267         else:
268             drive_failure_rate = 0.5 / (drive_MTBF * 1000 * 3600)
269
270         # deletion/gc/ownership mode
271         ownership_choices = [ ("A", "no deletion, no gc, no owners"),
272                               ("B", "deletion, no gc, no owners"),
273                               ("C", "deletion, share timers, no owners"),
274                               ("D", "deletion, no gc, yes owners"),
275                               ("E", "deletion, owner timers"),
276                               ]
277         ownership_mode, i_ownership_mode = \
278                         get_and_set("ownership_mode", ownership_choices,
279                                     "A", str)
280         add_input("Servers",
281                   "What is the ownership mode?", i_ownership_mode)
282
283         # client access behavior
284         access_rates = [ (1, "one file per day"),
285                          (10, "10 files per day"),
286                          (100, "100 files per day"),
287                          (1000, "1k files per day"),
288                          (10e3, "10k files per day"),
289                          (100e3, "100k files per day"),
290                          ]
291         download_files_per_day, i_download_rate = \
292                                 get_and_set("download_rate", access_rates,
293                                             100, int)
294         add_input("Users",
295                   "How many files are downloaded per day?", i_download_rate)
296         download_rate = 1.0 * download_files_per_day / (24*60*60)
297
298         upload_files_per_day, i_upload_rate = \
299                               get_and_set("upload_rate", access_rates,
300                                           10, int)
301         add_input("Users",
302                   "How many files are uploaded per day?", i_upload_rate)
303         upload_rate = 1.0 * upload_files_per_day / (24*60*60)
304
305         delete_files_per_day, i_delete_rate = \
306                               get_and_set("delete_rate", access_rates,
307                                           10, int)
308         add_input("Users",
309                   "How many files are deleted per day?", i_delete_rate)
310         delete_rate = 1.0 * delete_files_per_day / (24*60*60)
311
312
313         # the value is in days
314         lease_timers = [ (1, "one refresh per day"),
315                          (7, "one refresh per week"),
316                          ]
317         lease_timer, i_lease = \
318                      get_and_set("lease_timer", lease_timers,
319                                  7, int)
320         add_input("Users",
321                   "How frequently do clients refresh files or accounts? "
322                   "(if necessary)",
323                   i_lease)
324         seconds_per_lease = 24*60*60*lease_timer
325
326         check_timer_choices = [ (1, "every week"),
327                                 (4, "every month"),
328                                 (8, "every two months"),
329                                 (16, "every four months"),
330                                 ]
331         check_timer, i_check_timer = \
332                      get_and_set("check_timer", check_timer_choices, 4, int)
333         add_input("Users",
334                   "How frequently should we check on each file?",
335                   i_check_timer)
336         file_check_interval = check_timer * 7 * 24 * 3600
337
338
339         if filled:
340             add_output("Users", T.div["Total users: %s" % number(num_users)])
341             add_output("Users",
342                        T.div["Files per user: %s" % number(files_per_user)])
343             file_size = 1.0 * space_per_user / files_per_user
344             add_output("Users",
345                        T.div["Average file size: ", number(file_size)])
346             total_files = num_users * files_per_user / sharing_ratio
347
348             add_output("Grid",
349                        T.div["Total number of files in grid: ",
350                              number(total_files)])
351             total_space = num_users * space_per_user / sharing_ratio
352             add_output("Grid",
353                        T.div["Total volume of plaintext in grid: ",
354                              number(total_space, "B")])
355
356             total_shares = n * total_files
357             add_output("Grid",
358                        T.div["Total shares in grid: ", number(total_shares)])
359             expansion = float(n) / float(k)
360
361             total_usage = expansion * total_space
362             add_output("Grid",
363                        T.div["Share data in grid: ", number(total_usage, "B")])
364
365             if n > num_servers:
366                 # silly configuration, causes Tahoe2 to wrap and put multiple
367                 # shares on some servers.
368                 add_output("Servers",
369                            T.div["non-ideal: more shares than servers"
370                                  " (n=%d, servers=%d)" % (n, num_servers)])
371                 # every file has at least one share on every server
372                 buckets_per_server = total_files
373                 shares_per_server = total_files * ((1.0 * n) / num_servers)
374             else:
375                 # if nobody is full, then no lease requests will be turned
376                 # down for lack of space, and no two shares for the same file
377                 # will share a server. Therefore the chance that any given
378                 # file has a share on any given server is n/num_servers.
379                 buckets_per_server = total_files * ((1.0 * n) / num_servers)
380                 # since each such represented file only puts one share on a
381                 # server, the total number of shares per server is the same.
382                 shares_per_server = buckets_per_server
383             add_output("Servers",
384                        T.div["Buckets per server: ",
385                              number(buckets_per_server)])
386             add_output("Servers",
387                        T.div["Shares per server: ",
388                              number(shares_per_server)])
389
390             # how much space is used on the storage servers for the shares?
391             #  the share data itself
392             share_data_per_server = total_usage / num_servers
393             add_output("Servers",
394                        T.div["Share data per server: ",
395                              number(share_data_per_server, "B")])
396             # this is determined empirically. H=hashsize=32, for a one-segment
397             # file and 3-of-10 encoding
398             share_validation_per_server = 266 * shares_per_server
399             # this could be 423*buckets_per_server, if we moved the URI
400             # extension into a separate file, but that would actually consume
401             # *more* space (minimum filesize is 4KiB), unless we moved all
402             # shares for a given bucket into a single file.
403             share_uri_extension_per_server = 423 * shares_per_server
404
405             # ownership mode adds per-bucket data
406             H = 32 # depends upon the desired security of delete/refresh caps
407             # bucket_lease_size is the amount of data needed to keep track of
408             # the delete/refresh caps for each bucket.
409             bucket_lease_size = 0
410             client_bucket_refresh_rate = 0
411             owner_table_size = 0
412             if ownership_mode in ("B", "C", "D", "E"):
413                 bucket_lease_size = sharing_ratio * 1.0 * H
414             if ownership_mode in ("B", "C"):
415                 # refreshes per second per client
416                 client_bucket_refresh_rate = (1.0 * n * files_per_user /
417                                               seconds_per_lease)
418                 add_output("Users",
419                            T.div["Client share refresh rate (outbound): ",
420                                  number(client_bucket_refresh_rate, "Hz")])
421                 server_bucket_refresh_rate = (client_bucket_refresh_rate *
422                                               num_users / num_servers)
423                 add_output("Servers",
424                            T.div["Server share refresh rate (inbound): ",
425                                  number(server_bucket_refresh_rate, "Hz")])
426             if ownership_mode in ("D", "E"):
427                 # each server must maintain a bidirectional mapping from
428                 # buckets to owners. One way to implement this would be to
429                 # put a list of four-byte owner numbers into each bucket, and
430                 # a list of four-byte share numbers into each owner (although
431                 # of course we'd really just throw it into a database and let
432                 # the experts take care of the details).
433                 owner_table_size = 2*(buckets_per_server * sharing_ratio * 4)
434
435             if ownership_mode in ("E",):
436                 # in this mode, clients must refresh one timer per server
437                 client_account_refresh_rate = (1.0 * num_servers /
438                                                seconds_per_lease)
439                 add_output("Users",
440                            T.div["Client account refresh rate (outbound): ",
441                                  number(client_account_refresh_rate, "Hz")])
442                 server_account_refresh_rate = (client_account_refresh_rate *
443                                               num_users / num_servers)
444                 add_output("Servers",
445                            T.div["Server account refresh rate (inbound): ",
446                                  number(server_account_refresh_rate, "Hz")])
447
448             # TODO: buckets vs shares here is a bit wonky, but in
449             # non-wrapping grids it shouldn't matter
450             share_lease_per_server = bucket_lease_size * buckets_per_server
451             share_ownertable_per_server = owner_table_size
452
453             share_space_per_server = (share_data_per_server +
454                                       share_validation_per_server +
455                                       share_uri_extension_per_server +
456                                       share_lease_per_server +
457                                       share_ownertable_per_server)
458             add_output("Servers",
459                        T.div["Share space per server: ",
460                              number(share_space_per_server, "B"),
461                              " (data ",
462                              number(share_data_per_server, "B"),
463                              ", validation ",
464                              number(share_validation_per_server, "B"),
465                              ", UEB ",
466                              number(share_uri_extension_per_server, "B"),
467                              ", lease ",
468                              number(share_lease_per_server, "B"),
469                              ", ownertable ",
470                              number(share_ownertable_per_server, "B"),
471                              ")",
472                              ])
473
474
475             # rates
476             client_download_share_rate = download_rate * k
477             client_download_byte_rate = download_rate * file_size
478             add_output("Users",
479                        T.div["download rate: shares = ",
480                              number(client_download_share_rate, "Hz"),
481                              " , bytes = ",
482                              number(client_download_byte_rate, "Bps"),
483                              ])
484             total_file_check_rate = 1.0 * total_files / file_check_interval
485             client_check_share_rate = total_file_check_rate / num_users
486             add_output("Users",
487                        T.div["file check rate: shares = ",
488                              number(client_check_share_rate, "Hz"),
489                              " (interval = %s)" %
490                              number(1 / client_check_share_rate, "s"),
491                              ])
492
493             client_upload_share_rate = upload_rate * n
494             # TODO: doesn't include overhead
495             client_upload_byte_rate = upload_rate * file_size * expansion
496             add_output("Users",
497                        T.div["upload rate: shares = ",
498                              number(client_upload_share_rate, "Hz"),
499                              " , bytes = ",
500                              number(client_upload_byte_rate, "Bps"),
501                              ])
502             client_delete_share_rate = delete_rate * n
503
504             server_inbound_share_rate = (client_upload_share_rate *
505                                          num_users / num_servers)
506             server_inbound_byte_rate = (client_upload_byte_rate *
507                                         num_users / num_servers)
508             add_output("Servers",
509                        T.div["upload rate (inbound): shares = ",
510                              number(server_inbound_share_rate, "Hz"),
511                              " , bytes = ",
512                               number(server_inbound_byte_rate, "Bps"),
513                              ])
514             add_output("Servers",
515                        T.div["share check rate (inbound): ",
516                              number(total_file_check_rate * n / num_servers,
517                                     "Hz"),
518                              ])
519
520             server_share_modify_rate = ((client_upload_share_rate +
521                                          client_delete_share_rate) *
522                                          num_users / num_servers)
523             add_output("Servers",
524                        T.div["share modify rate: shares = ",
525                              number(server_share_modify_rate, "Hz"),
526                              ])
527
528             server_outbound_share_rate = (client_download_share_rate *
529                                           num_users / num_servers)
530             server_outbound_byte_rate = (client_download_byte_rate *
531                                          num_users / num_servers)
532             add_output("Servers",
533                        T.div["download rate (outbound): shares = ",
534                              number(server_outbound_share_rate, "Hz"),
535                              " , bytes = ",
536                               number(server_outbound_byte_rate, "Bps"),
537                              ])
538
539
540             total_share_space = num_servers * share_space_per_server
541             add_output("Grid",
542                        T.div["Share space consumed: ",
543                              number(total_share_space, "B")])
544             add_output("Grid",
545                        T.div[" %% validation: %.2f%%" %
546                              (100.0 * share_validation_per_server /
547                               share_space_per_server)])
548             add_output("Grid",
549                        T.div[" %% uri-extension: %.2f%%" %
550                              (100.0 * share_uri_extension_per_server /
551                               share_space_per_server)])
552             add_output("Grid",
553                        T.div[" %% lease data: %.2f%%" %
554                              (100.0 * share_lease_per_server /
555                               share_space_per_server)])
556             add_output("Grid",
557                        T.div[" %% owner data: %.2f%%" %
558                              (100.0 * share_ownertable_per_server /
559                               share_space_per_server)])
560             add_output("Grid",
561                        T.div[" %% share data: %.2f%%" %
562                              (100.0 * share_data_per_server /
563                               share_space_per_server)])
564             add_output("Grid",
565                        T.div["file check rate: ",
566                              number(total_file_check_rate,
567                                     "Hz")])
568
569             total_drives = max(mathutil.div_ceil(int(total_share_space),
570                                                  int(drive_size)),
571                                num_servers)
572             add_output("Drives",
573                        T.div["Total drives: ", number(total_drives), " drives"])
574             drives_per_server = mathutil.div_ceil(total_drives, num_servers)
575             add_output("Servers",
576                        T.div["Drives per server: ", drives_per_server])
577
578             # costs
579             if drive_size == 3000 * 1e9:
580                 add_output("Servers", T.div["3000GB drive: $250 each"])
581                 drive_cost = 250
582             else:
583                 add_output("Servers",
584                            T.div[T.b["unknown cost per drive, assuming $100"]])
585                 drive_cost = 100
586
587             if drives_per_server <= 4:
588                 add_output("Servers", T.div["1U box with <= 4 drives: $1500"])
589                 server_cost = 1500 # typical 1U box
590             elif drives_per_server <= 12:
591                 add_output("Servers", T.div["2U box with <= 12 drives: $2500"])
592                 server_cost = 2500 # 2U box
593             else:
594                 add_output("Servers",
595                            T.div[T.b["Note: too many drives per server, "
596                                      "assuming $3000"]])
597                 server_cost = 3000
598
599             server_capital_cost = (server_cost + drives_per_server * drive_cost)
600             total_server_cost = float(num_servers * server_capital_cost)
601             add_output("Servers", T.div["Capital cost per server: $",
602                                         server_capital_cost])
603             add_output("Grid", T.div["Capital cost for all servers: $",
604                                      number(total_server_cost)])
605             # $70/Mbps/mo
606             # $44/server/mo power+space
607             server_bandwidth = max(server_inbound_byte_rate,
608                                    server_outbound_byte_rate)
609             server_bandwidth_mbps = mathutil.div_ceil(int(server_bandwidth*8),
610                                                       int(1e6))
611             server_monthly_cost = 70*server_bandwidth_mbps + 44
612             add_output("Servers", T.div["Monthly cost per server: $",
613                                         server_monthly_cost])
614             add_output("Users", T.div["Capital cost per user: $",
615                                       number(total_server_cost / num_users)])
616
617             # reliability
618             any_drive_failure_rate = total_drives * drive_failure_rate
619             any_drive_MTBF = 1 // any_drive_failure_rate  # in seconds
620             any_drive_MTBF_days = any_drive_MTBF / 86400
621             add_output("Drives",
622                        T.div["MTBF (any drive): ",
623                              number(any_drive_MTBF_days), " days"])
624             drive_replacement_monthly_cost = (float(drive_cost)
625                                               * any_drive_failure_rate
626                                               *30*86400)
627             add_output("Grid",
628                        T.div["Monthly cost of replacing drives: $",
629                              number(drive_replacement_monthly_cost)])
630
631             total_server_monthly_cost = float(num_servers * server_monthly_cost
632                                               + drive_replacement_monthly_cost)
633
634             add_output("Grid", T.div["Monthly cost for all servers: $",
635                                      number(total_server_monthly_cost)])
636             add_output("Users",
637                        T.div["Monthly cost per user: $",
638                              number(total_server_monthly_cost / num_users)])
639
640             # availability
641             file_dBA = self.file_availability(k, n, server_dBA)
642             user_files_dBA = self.many_files_availability(file_dBA,
643                                                           files_per_user)
644             all_files_dBA = self.many_files_availability(file_dBA, total_files)
645             add_output("Users",
646                        T.div["availability of: ",
647                              "arbitrary file = %d dBA, " % file_dBA,
648                              "all files of user1 = %d dBA, " % user_files_dBA,
649                              "all files in grid = %d dBA" % all_files_dBA,
650                              ],
651                        )
652
653             time_until_files_lost = (n-k+1) / any_drive_failure_rate
654             add_output("Grid",
655                        T.div["avg time until files are lost: ",
656                              number(time_until_files_lost, "s"), ", ",
657                              number(time_until_files_lost/86400, " days"),
658                              ])
659
660             share_data_loss_rate = any_drive_failure_rate * drive_size
661             add_output("Grid",
662                        T.div["share data loss rate: ",
663                              number(share_data_loss_rate,"Bps")])
664
665             # the worst-case survival numbers occur when we do a file check
666             # and the file is just above the threshold for repair (so we
667             # decide to not repair it). The question is then: what is the
668             # chance that the file will decay so badly before the next check
669             # that we can't recover it? The resulting probability is per
670             # check interval.
671             # Note that the chances of us getting into this situation are low.
672             P_disk_failure_during_interval = (drive_failure_rate *
673                                               file_check_interval)
674             disk_failure_dBF = 10*math.log10(P_disk_failure_during_interval)
675             disk_failure_dBA = -disk_failure_dBF
676             file_survives_dBA = self.file_availability(k, repair_threshold,
677                                                        disk_failure_dBA)
678             user_files_survives_dBA = self.many_files_availability( \
679                 file_survives_dBA, files_per_user)
680             all_files_survives_dBA = self.many_files_availability( \
681                 file_survives_dBA, total_files)
682             add_output("Users",
683                        T.div["survival of: ",
684                              "arbitrary file = %d dBA, " % file_survives_dBA,
685                              "all files of user1 = %d dBA, " %
686                              user_files_survives_dBA,
687                              "all files in grid = %d dBA" %
688                              all_files_survives_dBA,
689                              " (per worst-case check interval)",
690                              ])
691
692
693
694         all_sections = []
695         all_sections.append(build_section("Users"))
696         all_sections.append(build_section("Servers"))
697         all_sections.append(build_section("Drives"))
698         if "Grid" in sections:
699             all_sections.append(build_section("Grid"))
700
701         f = T.form(action=".", method="post", enctype="multipart/form-data")
702
703         if filled:
704             action = "Recompute"
705         else:
706             action = "Compute"
707
708         f = f[T.input(type="hidden", name="filled", value="true"),
709               T.input(type="submit", value=action),
710               all_sections,
711               ]
712
713         try:
714             from allmydata import reliability
715             # we import this just to test to see if the page is available
716             _hush_pyflakes = reliability
717             del _hush_pyflakes
718             f = [T.div[T.a(href="../reliability")["Reliability Math"]], f]
719         except ImportError:
720             pass
721
722         return f
723
724     def file_availability(self, k, n, server_dBA):
725         """
726         The full formula for the availability of a specific file is::
727
728          1 - sum([choose(N,i) * p**i * (1-p)**(N-i)] for i in range(k)])
729
730         Where choose(N,i) = N! / ( i! * (N-i)! ) . Note that each term of
731         this summation is the probability that there are exactly 'i' servers
732         available, and what we're doing is adding up the cases where i is too
733         low.
734
735         This is a nuisance to calculate at all accurately, especially once N
736         gets large, and when p is close to unity. So we make an engineering
737         approximation: if (1-p) is very small, then each [i] term is much
738         larger than the [i-1] term, and the sum is dominated by the i=k-1
739         term. This only works for (1-p) < 10%, and when the choose() function
740         doesn't rise fast enough to compensate. For high-expansion encodings
741         (3-of-10, 25-of-100), the choose() function is rising at the same
742         time as the (1-p)**(N-i) term, so that's not an issue. For
743         low-expansion encodings (7-of-10, 75-of-100) the two values are
744         moving in opposite directions, so more care must be taken.
745
746         Note that the p**i term has only a minor effect as long as (1-p)*N is
747         small, and even then the effect is attenuated by the 1-p term.
748         """
749
750         assert server_dBA > 9  # >=90% availability to use the approximation
751         factor = binomial(n, k-1)
752         factor_dBA = 10 * math.log10(factor)
753         exponent = n - k + 1
754         file_dBA = server_dBA * exponent - factor_dBA
755         return file_dBA
756
757     def many_files_availability(self, file_dBA, num_files):
758         """The probability that 'num_files' independent bernoulli trials will
759         succeed (i.e. we can recover all files in the grid at any given
760         moment) is p**num_files . Since p is close to unity, we express in p
761         in dBA instead, so we can get useful precision on q (=1-p), and then
762         the formula becomes::
763
764          P_some_files_unavailable = 1 - (1 - q)**num_files
765
766         That (1-q)**n expands with the usual binomial sequence, 1 - nq +
767         Xq**2 ... + Xq**n . We use the same approximation as before, since we
768         know q is close to zero, and we get to ignore all the terms past -nq.
769         """
770
771         many_files_dBA = file_dBA - 10 * math.log10(num_files)
772         return many_files_dBA