From: Brian Warner Date: Wed, 15 Feb 2012 18:18:53 +0000 (+0000) Subject: remove 'provisioning'/'reliability' from WUI, add to misc/operations_helpers X-Git-Url: https://git.rkrishnan.org/components/com_hotproperty/simplejson/configuration.rst?a=commitdiff_plain;h=11f939470467863f2bd200aeada7c7c58f55ee6e;p=tahoe-lafs%2Ftahoe-lafs.git remove 'provisioning'/'reliability' from WUI, add to misc/operations_helpers Also remove docs related to reliability/provisioning pages --- diff --git a/docs/architecture.rst b/docs/architecture.rst index 3a9e08fb..362a1792 100644 --- a/docs/architecture.rst +++ b/docs/architecture.rst @@ -553,9 +553,3 @@ will be able to reduce the expansion factor down to a bare minimum while still retaining high reliability, but large unstable grids (where nodes are coming and going very quickly) may require more repair/verification bandwidth than actual upload/download traffic. - -Tahoe-LAFS nodes that run a webserver have a page dedicated to provisioning -decisions: this tool may help you evaluate different expansion factors and -view the disk consumption of each. It is also acquiring some sections with -availability/reliability numbers, as well as preliminary cost analysis data. -This tool will continue to evolve as our analysis improves. diff --git a/docs/frontends/webapi.rst b/docs/frontends/webapi.rst index 47ab7542..b67ee334 100644 --- a/docs/frontends/webapi.rst +++ b/docs/frontends/webapi.rst @@ -1805,17 +1805,6 @@ This is the "Welcome Page", and contains a few distinct sections:: implementation hashes synchronously, so clients will probably never see progress-hash!=1.0). -``GET /provisioning/`` - - This page provides a basic tool to predict the likely storage and bandwidth - requirements of a large Tahoe grid. It provides forms to input things like - total number of users, number of files per user, average file size, number - of servers, expansion ratio, hard drive failure rate, etc. It then provides - numbers like how many disks per server will be needed, how many read - operations per second should be expected, and the likely MTBF for files in - the grid. This information is very preliminary, and the model upon which it - is based still needs a lot of work. - ``GET /helper_status/`` If the node is running a helper (i.e. if [helper]enabled is set to True in diff --git a/misc/operations_helpers/provisioning/provisioning.py b/misc/operations_helpers/provisioning/provisioning.py new file mode 100644 index 00000000..9d9af0ea --- /dev/null +++ b/misc/operations_helpers/provisioning/provisioning.py @@ -0,0 +1,772 @@ + +from nevow import inevow, rend, tags as T +import math +from allmydata.util import mathutil +from allmydata.web.common import getxmlfile + +# factorial and binomial copied from +# http://mail.python.org/pipermail/python-list/2007-April/435718.html + +def factorial(n): + """factorial(n): return the factorial of the integer n. + factorial(0) = 1 + factorial(n) with n<0 is -factorial(abs(n)) + """ + result = 1 + for i in xrange(1, abs(n)+1): + result *= i + assert n >= 0 + return result + +def binomial(n, k): + assert 0 <= k <= n + if k == 0 or k == n: + return 1 + # calculate n!/k! as one product, avoiding factors that + # just get canceled + P = k+1 + for i in xrange(k+2, n+1): + P *= i + # if you are paranoid: + # C, rem = divmod(P, factorial(n-k)) + # assert rem == 0 + # return C + return P//factorial(n-k) + +class ProvisioningTool(rend.Page): + addSlash = True + docFactory = getxmlfile("provisioning.xhtml") + + def render_forms(self, ctx, data): + req = inevow.IRequest(ctx) + + def getarg(name, astype=int): + if req.method != "POST": + return None + if name in req.fields: + return astype(req.fields[name].value) + return None + return self.do_forms(getarg) + + + def do_forms(self, getarg): + filled = getarg("filled", bool) + + def get_and_set(name, options, default=None, astype=int): + current_value = getarg(name, astype) + i_select = T.select(name=name) + for (count, description) in options: + count = astype(count) + if ((current_value is not None and count == current_value) or + (current_value is None and count == default)): + o = T.option(value=str(count), selected="true")[description] + else: + o = T.option(value=str(count))[description] + i_select = i_select[o] + if current_value is None: + current_value = default + return current_value, i_select + + sections = {} + def add_input(section, text, entry): + if section not in sections: + sections[section] = [] + sections[section].extend([T.div[text, ": ", entry], "\n"]) + + def add_output(section, entry): + if section not in sections: + sections[section] = [] + sections[section].extend([entry, "\n"]) + + def build_section(section): + return T.fieldset[T.legend[section], sections[section]] + + def number(value, suffix=""): + scaling = 1 + if value < 1: + fmt = "%1.2g%s" + elif value < 100: + fmt = "%.1f%s" + elif value < 1000: + fmt = "%d%s" + elif value < 1e6: + fmt = "%.2fk%s"; scaling = 1e3 + elif value < 1e9: + fmt = "%.2fM%s"; scaling = 1e6 + elif value < 1e12: + fmt = "%.2fG%s"; scaling = 1e9 + elif value < 1e15: + fmt = "%.2fT%s"; scaling = 1e12 + elif value < 1e18: + fmt = "%.2fP%s"; scaling = 1e15 + else: + fmt = "huge! %g%s" + return fmt % (value / scaling, suffix) + + user_counts = [(5, "5 users"), + (50, "50 users"), + (200, "200 users"), + (1000, "1k users"), + (10000, "10k users"), + (50000, "50k users"), + (100000, "100k users"), + (500000, "500k users"), + (1000000, "1M users"), + ] + num_users, i_num_users = get_and_set("num_users", user_counts, 50000) + add_input("Users", + "How many users are on this network?", i_num_users) + + files_per_user_counts = [(100, "100 files"), + (1000, "1k files"), + (10000, "10k files"), + (100000, "100k files"), + (1e6, "1M files"), + ] + files_per_user, i_files_per_user = get_and_set("files_per_user", + files_per_user_counts, + 1000) + add_input("Users", + "How many files for each user? (avg)", + i_files_per_user) + + space_per_user_sizes = [(1e6, "1MB"), + (10e6, "10MB"), + (100e6, "100MB"), + (200e6, "200MB"), + (1e9, "1GB"), + (2e9, "2GB"), + (5e9, "5GB"), + (10e9, "10GB"), + (100e9, "100GB"), + (1e12, "1TB"), + (2e12, "2TB"), + (5e12, "5TB"), + ] + # Estimate ~5gb per user as a more realistic case + space_per_user, i_space_per_user = get_and_set("space_per_user", + space_per_user_sizes, + 5e9) + add_input("Users", + "How much data for each user? (avg)", + i_space_per_user) + + sharing_ratios = [(1.0, "1.0x"), + (1.1, "1.1x"), + (2.0, "2.0x"), + ] + sharing_ratio, i_sharing_ratio = get_and_set("sharing_ratio", + sharing_ratios, 1.0, + float) + add_input("Users", + "What is the sharing ratio? (1.0x is no-sharing and" + " no convergence)", i_sharing_ratio) + + # Encoding parameters + encoding_choices = [("3-of-10-5", "3.3x (3-of-10, repair below 5)"), + ("3-of-10-8", "3.3x (3-of-10, repair below 8)"), + ("5-of-10-7", "2x (5-of-10, repair below 7)"), + ("8-of-10-9", "1.25x (8-of-10, repair below 9)"), + ("27-of-30-28", "1.1x (27-of-30, repair below 28"), + ("25-of-100-50", "4x (25-of-100, repair below 50)"), + ] + encoding_parameters, i_encoding_parameters = \ + get_and_set("encoding_parameters", + encoding_choices, "3-of-10-5", str) + encoding_pieces = encoding_parameters.split("-") + k = int(encoding_pieces[0]) + assert encoding_pieces[1] == "of" + n = int(encoding_pieces[2]) + # we repair the file when the number of available shares drops below + # this value + repair_threshold = int(encoding_pieces[3]) + + add_input("Servers", + "What are the default encoding parameters?", + i_encoding_parameters) + + # Server info + num_server_choices = [ (5, "5 servers"), + (10, "10 servers"), + (15, "15 servers"), + (30, "30 servers"), + (50, "50 servers"), + (100, "100 servers"), + (200, "200 servers"), + (300, "300 servers"), + (500, "500 servers"), + (1000, "1k servers"), + (2000, "2k servers"), + (5000, "5k servers"), + (10e3, "10k servers"), + (100e3, "100k servers"), + (1e6, "1M servers"), + ] + num_servers, i_num_servers = \ + get_and_set("num_servers", num_server_choices, 30, int) + add_input("Servers", + "How many servers are there?", i_num_servers) + + # availability is measured in dBA = -dBF, where 0dBF is 100% failure, + # 10dBF is 10% failure, 20dBF is 1% failure, etc + server_dBA_choices = [ (10, "90% [10dBA] (2.4hr/day)"), + (13, "95% [13dBA] (1.2hr/day)"), + (20, "99% [20dBA] (14min/day or 3.5days/year)"), + (23, "99.5% [23dBA] (7min/day or 1.75days/year)"), + (30, "99.9% [30dBA] (87sec/day or 9hours/year)"), + (40, "99.99% [40dBA] (60sec/week or 53min/year)"), + (50, "99.999% [50dBA] (5min per year)"), + ] + server_dBA, i_server_availability = \ + get_and_set("server_availability", + server_dBA_choices, + 20, int) + add_input("Servers", + "What is the server availability?", i_server_availability) + + drive_MTBF_choices = [ (40, "40,000 Hours"), + ] + drive_MTBF, i_drive_MTBF = \ + get_and_set("drive_MTBF", drive_MTBF_choices, 40, int) + add_input("Drives", + "What is the hard drive MTBF?", i_drive_MTBF) + # http://www.tgdaily.com/content/view/30990/113/ + # http://labs.google.com/papers/disk_failures.pdf + # google sees: + # 1.7% of the drives they replaced were 0-1 years old + # 8% of the drives they repalced were 1-2 years old + # 8.6% were 2-3 years old + # 6% were 3-4 years old, about 8% were 4-5 years old + + drive_size_choices = [ (100, "100 GB"), + (250, "250 GB"), + (500, "500 GB"), + (750, "750 GB"), + (1000, "1000 GB"), + (2000, "2000 GB"), + (3000, "3000 GB"), + ] + drive_size, i_drive_size = \ + get_and_set("drive_size", drive_size_choices, 3000, int) + drive_size = drive_size * 1e9 + add_input("Drives", + "What is the capacity of each hard drive?", i_drive_size) + drive_failure_model_choices = [ ("E", "Exponential"), + ("U", "Uniform"), + ] + drive_failure_model, i_drive_failure_model = \ + get_and_set("drive_failure_model", + drive_failure_model_choices, + "E", str) + add_input("Drives", + "How should we model drive failures?", i_drive_failure_model) + + # drive_failure_rate is in failures per second + if drive_failure_model == "E": + drive_failure_rate = 1.0 / (drive_MTBF * 1000 * 3600) + else: + drive_failure_rate = 0.5 / (drive_MTBF * 1000 * 3600) + + # deletion/gc/ownership mode + ownership_choices = [ ("A", "no deletion, no gc, no owners"), + ("B", "deletion, no gc, no owners"), + ("C", "deletion, share timers, no owners"), + ("D", "deletion, no gc, yes owners"), + ("E", "deletion, owner timers"), + ] + ownership_mode, i_ownership_mode = \ + get_and_set("ownership_mode", ownership_choices, + "A", str) + add_input("Servers", + "What is the ownership mode?", i_ownership_mode) + + # client access behavior + access_rates = [ (1, "one file per day"), + (10, "10 files per day"), + (100, "100 files per day"), + (1000, "1k files per day"), + (10e3, "10k files per day"), + (100e3, "100k files per day"), + ] + download_files_per_day, i_download_rate = \ + get_and_set("download_rate", access_rates, + 100, int) + add_input("Users", + "How many files are downloaded per day?", i_download_rate) + download_rate = 1.0 * download_files_per_day / (24*60*60) + + upload_files_per_day, i_upload_rate = \ + get_and_set("upload_rate", access_rates, + 10, int) + add_input("Users", + "How many files are uploaded per day?", i_upload_rate) + upload_rate = 1.0 * upload_files_per_day / (24*60*60) + + delete_files_per_day, i_delete_rate = \ + get_and_set("delete_rate", access_rates, + 10, int) + add_input("Users", + "How many files are deleted per day?", i_delete_rate) + delete_rate = 1.0 * delete_files_per_day / (24*60*60) + + + # the value is in days + lease_timers = [ (1, "one refresh per day"), + (7, "one refresh per week"), + ] + lease_timer, i_lease = \ + get_and_set("lease_timer", lease_timers, + 7, int) + add_input("Users", + "How frequently do clients refresh files or accounts? " + "(if necessary)", + i_lease) + seconds_per_lease = 24*60*60*lease_timer + + check_timer_choices = [ (1, "every week"), + (4, "every month"), + (8, "every two months"), + (16, "every four months"), + ] + check_timer, i_check_timer = \ + get_and_set("check_timer", check_timer_choices, 4, int) + add_input("Users", + "How frequently should we check on each file?", + i_check_timer) + file_check_interval = check_timer * 7 * 24 * 3600 + + + if filled: + add_output("Users", T.div["Total users: %s" % number(num_users)]) + add_output("Users", + T.div["Files per user: %s" % number(files_per_user)]) + file_size = 1.0 * space_per_user / files_per_user + add_output("Users", + T.div["Average file size: ", number(file_size)]) + total_files = num_users * files_per_user / sharing_ratio + + add_output("Grid", + T.div["Total number of files in grid: ", + number(total_files)]) + total_space = num_users * space_per_user / sharing_ratio + add_output("Grid", + T.div["Total volume of plaintext in grid: ", + number(total_space, "B")]) + + total_shares = n * total_files + add_output("Grid", + T.div["Total shares in grid: ", number(total_shares)]) + expansion = float(n) / float(k) + + total_usage = expansion * total_space + add_output("Grid", + T.div["Share data in grid: ", number(total_usage, "B")]) + + if n > num_servers: + # silly configuration, causes Tahoe2 to wrap and put multiple + # shares on some servers. + add_output("Servers", + T.div["non-ideal: more shares than servers" + " (n=%d, servers=%d)" % (n, num_servers)]) + # every file has at least one share on every server + buckets_per_server = total_files + shares_per_server = total_files * ((1.0 * n) / num_servers) + else: + # if nobody is full, then no lease requests will be turned + # down for lack of space, and no two shares for the same file + # will share a server. Therefore the chance that any given + # file has a share on any given server is n/num_servers. + buckets_per_server = total_files * ((1.0 * n) / num_servers) + # since each such represented file only puts one share on a + # server, the total number of shares per server is the same. + shares_per_server = buckets_per_server + add_output("Servers", + T.div["Buckets per server: ", + number(buckets_per_server)]) + add_output("Servers", + T.div["Shares per server: ", + number(shares_per_server)]) + + # how much space is used on the storage servers for the shares? + # the share data itself + share_data_per_server = total_usage / num_servers + add_output("Servers", + T.div["Share data per server: ", + number(share_data_per_server, "B")]) + # this is determined empirically. H=hashsize=32, for a one-segment + # file and 3-of-10 encoding + share_validation_per_server = 266 * shares_per_server + # this could be 423*buckets_per_server, if we moved the URI + # extension into a separate file, but that would actually consume + # *more* space (minimum filesize is 4KiB), unless we moved all + # shares for a given bucket into a single file. + share_uri_extension_per_server = 423 * shares_per_server + + # ownership mode adds per-bucket data + H = 32 # depends upon the desired security of delete/refresh caps + # bucket_lease_size is the amount of data needed to keep track of + # the delete/refresh caps for each bucket. + bucket_lease_size = 0 + client_bucket_refresh_rate = 0 + owner_table_size = 0 + if ownership_mode in ("B", "C", "D", "E"): + bucket_lease_size = sharing_ratio * 1.0 * H + if ownership_mode in ("B", "C"): + # refreshes per second per client + client_bucket_refresh_rate = (1.0 * n * files_per_user / + seconds_per_lease) + add_output("Users", + T.div["Client share refresh rate (outbound): ", + number(client_bucket_refresh_rate, "Hz")]) + server_bucket_refresh_rate = (client_bucket_refresh_rate * + num_users / num_servers) + add_output("Servers", + T.div["Server share refresh rate (inbound): ", + number(server_bucket_refresh_rate, "Hz")]) + if ownership_mode in ("D", "E"): + # each server must maintain a bidirectional mapping from + # buckets to owners. One way to implement this would be to + # put a list of four-byte owner numbers into each bucket, and + # a list of four-byte share numbers into each owner (although + # of course we'd really just throw it into a database and let + # the experts take care of the details). + owner_table_size = 2*(buckets_per_server * sharing_ratio * 4) + + if ownership_mode in ("E",): + # in this mode, clients must refresh one timer per server + client_account_refresh_rate = (1.0 * num_servers / + seconds_per_lease) + add_output("Users", + T.div["Client account refresh rate (outbound): ", + number(client_account_refresh_rate, "Hz")]) + server_account_refresh_rate = (client_account_refresh_rate * + num_users / num_servers) + add_output("Servers", + T.div["Server account refresh rate (inbound): ", + number(server_account_refresh_rate, "Hz")]) + + # TODO: buckets vs shares here is a bit wonky, but in + # non-wrapping grids it shouldn't matter + share_lease_per_server = bucket_lease_size * buckets_per_server + share_ownertable_per_server = owner_table_size + + share_space_per_server = (share_data_per_server + + share_validation_per_server + + share_uri_extension_per_server + + share_lease_per_server + + share_ownertable_per_server) + add_output("Servers", + T.div["Share space per server: ", + number(share_space_per_server, "B"), + " (data ", + number(share_data_per_server, "B"), + ", validation ", + number(share_validation_per_server, "B"), + ", UEB ", + number(share_uri_extension_per_server, "B"), + ", lease ", + number(share_lease_per_server, "B"), + ", ownertable ", + number(share_ownertable_per_server, "B"), + ")", + ]) + + + # rates + client_download_share_rate = download_rate * k + client_download_byte_rate = download_rate * file_size + add_output("Users", + T.div["download rate: shares = ", + number(client_download_share_rate, "Hz"), + " , bytes = ", + number(client_download_byte_rate, "Bps"), + ]) + total_file_check_rate = 1.0 * total_files / file_check_interval + client_check_share_rate = total_file_check_rate / num_users + add_output("Users", + T.div["file check rate: shares = ", + number(client_check_share_rate, "Hz"), + " (interval = %s)" % + number(1 / client_check_share_rate, "s"), + ]) + + client_upload_share_rate = upload_rate * n + # TODO: doesn't include overhead + client_upload_byte_rate = upload_rate * file_size * expansion + add_output("Users", + T.div["upload rate: shares = ", + number(client_upload_share_rate, "Hz"), + " , bytes = ", + number(client_upload_byte_rate, "Bps"), + ]) + client_delete_share_rate = delete_rate * n + + server_inbound_share_rate = (client_upload_share_rate * + num_users / num_servers) + server_inbound_byte_rate = (client_upload_byte_rate * + num_users / num_servers) + add_output("Servers", + T.div["upload rate (inbound): shares = ", + number(server_inbound_share_rate, "Hz"), + " , bytes = ", + number(server_inbound_byte_rate, "Bps"), + ]) + add_output("Servers", + T.div["share check rate (inbound): ", + number(total_file_check_rate * n / num_servers, + "Hz"), + ]) + + server_share_modify_rate = ((client_upload_share_rate + + client_delete_share_rate) * + num_users / num_servers) + add_output("Servers", + T.div["share modify rate: shares = ", + number(server_share_modify_rate, "Hz"), + ]) + + server_outbound_share_rate = (client_download_share_rate * + num_users / num_servers) + server_outbound_byte_rate = (client_download_byte_rate * + num_users / num_servers) + add_output("Servers", + T.div["download rate (outbound): shares = ", + number(server_outbound_share_rate, "Hz"), + " , bytes = ", + number(server_outbound_byte_rate, "Bps"), + ]) + + + total_share_space = num_servers * share_space_per_server + add_output("Grid", + T.div["Share space consumed: ", + number(total_share_space, "B")]) + add_output("Grid", + T.div[" %% validation: %.2f%%" % + (100.0 * share_validation_per_server / + share_space_per_server)]) + add_output("Grid", + T.div[" %% uri-extension: %.2f%%" % + (100.0 * share_uri_extension_per_server / + share_space_per_server)]) + add_output("Grid", + T.div[" %% lease data: %.2f%%" % + (100.0 * share_lease_per_server / + share_space_per_server)]) + add_output("Grid", + T.div[" %% owner data: %.2f%%" % + (100.0 * share_ownertable_per_server / + share_space_per_server)]) + add_output("Grid", + T.div[" %% share data: %.2f%%" % + (100.0 * share_data_per_server / + share_space_per_server)]) + add_output("Grid", + T.div["file check rate: ", + number(total_file_check_rate, + "Hz")]) + + total_drives = max(mathutil.div_ceil(int(total_share_space), + int(drive_size)), + num_servers) + add_output("Drives", + T.div["Total drives: ", number(total_drives), " drives"]) + drives_per_server = mathutil.div_ceil(total_drives, num_servers) + add_output("Servers", + T.div["Drives per server: ", drives_per_server]) + + # costs + if drive_size == 3000 * 1e9: + add_output("Servers", T.div["3000GB drive: $250 each"]) + drive_cost = 250 + else: + add_output("Servers", + T.div[T.b["unknown cost per drive, assuming $100"]]) + drive_cost = 100 + + if drives_per_server <= 4: + add_output("Servers", T.div["1U box with <= 4 drives: $1500"]) + server_cost = 1500 # typical 1U box + elif drives_per_server <= 12: + add_output("Servers", T.div["2U box with <= 12 drives: $2500"]) + server_cost = 2500 # 2U box + else: + add_output("Servers", + T.div[T.b["Note: too many drives per server, " + "assuming $3000"]]) + server_cost = 3000 + + server_capital_cost = (server_cost + drives_per_server * drive_cost) + total_server_cost = float(num_servers * server_capital_cost) + add_output("Servers", T.div["Capital cost per server: $", + server_capital_cost]) + add_output("Grid", T.div["Capital cost for all servers: $", + number(total_server_cost)]) + # $70/Mbps/mo + # $44/server/mo power+space + server_bandwidth = max(server_inbound_byte_rate, + server_outbound_byte_rate) + server_bandwidth_mbps = mathutil.div_ceil(int(server_bandwidth*8), + int(1e6)) + server_monthly_cost = 70*server_bandwidth_mbps + 44 + add_output("Servers", T.div["Monthly cost per server: $", + server_monthly_cost]) + add_output("Users", T.div["Capital cost per user: $", + number(total_server_cost / num_users)]) + + # reliability + any_drive_failure_rate = total_drives * drive_failure_rate + any_drive_MTBF = 1 // any_drive_failure_rate # in seconds + any_drive_MTBF_days = any_drive_MTBF / 86400 + add_output("Drives", + T.div["MTBF (any drive): ", + number(any_drive_MTBF_days), " days"]) + drive_replacement_monthly_cost = (float(drive_cost) + * any_drive_failure_rate + *30*86400) + add_output("Grid", + T.div["Monthly cost of replacing drives: $", + number(drive_replacement_monthly_cost)]) + + total_server_monthly_cost = float(num_servers * server_monthly_cost + + drive_replacement_monthly_cost) + + add_output("Grid", T.div["Monthly cost for all servers: $", + number(total_server_monthly_cost)]) + add_output("Users", + T.div["Monthly cost per user: $", + number(total_server_monthly_cost / num_users)]) + + # availability + file_dBA = self.file_availability(k, n, server_dBA) + user_files_dBA = self.many_files_availability(file_dBA, + files_per_user) + all_files_dBA = self.many_files_availability(file_dBA, total_files) + add_output("Users", + T.div["availability of: ", + "arbitrary file = %d dBA, " % file_dBA, + "all files of user1 = %d dBA, " % user_files_dBA, + "all files in grid = %d dBA" % all_files_dBA, + ], + ) + + time_until_files_lost = (n-k+1) / any_drive_failure_rate + add_output("Grid", + T.div["avg time until files are lost: ", + number(time_until_files_lost, "s"), ", ", + number(time_until_files_lost/86400, " days"), + ]) + + share_data_loss_rate = any_drive_failure_rate * drive_size + add_output("Grid", + T.div["share data loss rate: ", + number(share_data_loss_rate,"Bps")]) + + # the worst-case survival numbers occur when we do a file check + # and the file is just above the threshold for repair (so we + # decide to not repair it). The question is then: what is the + # chance that the file will decay so badly before the next check + # that we can't recover it? The resulting probability is per + # check interval. + # Note that the chances of us getting into this situation are low. + P_disk_failure_during_interval = (drive_failure_rate * + file_check_interval) + disk_failure_dBF = 10*math.log10(P_disk_failure_during_interval) + disk_failure_dBA = -disk_failure_dBF + file_survives_dBA = self.file_availability(k, repair_threshold, + disk_failure_dBA) + user_files_survives_dBA = self.many_files_availability( \ + file_survives_dBA, files_per_user) + all_files_survives_dBA = self.many_files_availability( \ + file_survives_dBA, total_files) + add_output("Users", + T.div["survival of: ", + "arbitrary file = %d dBA, " % file_survives_dBA, + "all files of user1 = %d dBA, " % + user_files_survives_dBA, + "all files in grid = %d dBA" % + all_files_survives_dBA, + " (per worst-case check interval)", + ]) + + + + all_sections = [] + all_sections.append(build_section("Users")) + all_sections.append(build_section("Servers")) + all_sections.append(build_section("Drives")) + if "Grid" in sections: + all_sections.append(build_section("Grid")) + + f = T.form(action=".", method="post", enctype="multipart/form-data") + + if filled: + action = "Recompute" + else: + action = "Compute" + + f = f[T.input(type="hidden", name="filled", value="true"), + T.input(type="submit", value=action), + all_sections, + ] + + try: + from allmydata import reliability + # we import this just to test to see if the page is available + _hush_pyflakes = reliability + del _hush_pyflakes + f = [T.div[T.a(href="../reliability")["Reliability Math"]], f] + except ImportError: + pass + + return f + + def file_availability(self, k, n, server_dBA): + """ + The full formula for the availability of a specific file is:: + + 1 - sum([choose(N,i) * p**i * (1-p)**(N-i)] for i in range(k)]) + + Where choose(N,i) = N! / ( i! * (N-i)! ) . Note that each term of + this summation is the probability that there are exactly 'i' servers + available, and what we're doing is adding up the cases where i is too + low. + + This is a nuisance to calculate at all accurately, especially once N + gets large, and when p is close to unity. So we make an engineering + approximation: if (1-p) is very small, then each [i] term is much + larger than the [i-1] term, and the sum is dominated by the i=k-1 + term. This only works for (1-p) < 10%, and when the choose() function + doesn't rise fast enough to compensate. For high-expansion encodings + (3-of-10, 25-of-100), the choose() function is rising at the same + time as the (1-p)**(N-i) term, so that's not an issue. For + low-expansion encodings (7-of-10, 75-of-100) the two values are + moving in opposite directions, so more care must be taken. + + Note that the p**i term has only a minor effect as long as (1-p)*N is + small, and even then the effect is attenuated by the 1-p term. + """ + + assert server_dBA > 9 # >=90% availability to use the approximation + factor = binomial(n, k-1) + factor_dBA = 10 * math.log10(factor) + exponent = n - k + 1 + file_dBA = server_dBA * exponent - factor_dBA + return file_dBA + + def many_files_availability(self, file_dBA, num_files): + """The probability that 'num_files' independent bernoulli trials will + succeed (i.e. we can recover all files in the grid at any given + moment) is p**num_files . Since p is close to unity, we express in p + in dBA instead, so we can get useful precision on q (=1-p), and then + the formula becomes:: + + P_some_files_unavailable = 1 - (1 - q)**num_files + + That (1-q)**n expands with the usual binomial sequence, 1 - nq + + Xq**2 ... + Xq**n . We use the same approximation as before, since we + know q is close to zero, and we get to ignore all the terms past -nq. + """ + + many_files_dBA = file_dBA - 10 * math.log10(num_files) + return many_files_dBA diff --git a/misc/operations_helpers/provisioning/provisioning.xhtml b/misc/operations_helpers/provisioning/provisioning.xhtml new file mode 100644 index 00000000..bfa4edb7 --- /dev/null +++ b/misc/operations_helpers/provisioning/provisioning.xhtml @@ -0,0 +1,18 @@ + + + Tahoe-LAFS - Provisioning Tool + + + + + + +

Tahoe-LAFS Provisioning Tool

+ +

This page will help you determine how much disk space and network +bandwidth will be required by various sizes and types of Tahoe-LAFS networks.

+ +
+ + + diff --git a/misc/operations_helpers/provisioning/reliability.py b/misc/operations_helpers/provisioning/reliability.py new file mode 100644 index 00000000..a0d60769 --- /dev/null +++ b/misc/operations_helpers/provisioning/reliability.py @@ -0,0 +1,251 @@ +#! /usr/bin/python + +import math +from allmydata.util import statistics +from numpy import array, matrix, dot + +DAY=24*60*60 +MONTH=31*DAY +YEAR=365*DAY + +class ReliabilityModel: + """Generate a model of system-wide reliability, given several input + parameters. + + This runs a simulation in which time is quantized down to 'delta' seconds + (default is one month): a smaller delta will result in a more accurate + simulation, but will take longer to run. 'report_span' simulated seconds + will be run. + + The encoding parameters are provided as 'k' (minimum number of shares + needed to recover the file) and 'N' (total number of shares generated). + The default parameters are 3-of-10. + + The first step is to build a probability of individual drive loss during + any given delta. This uses a simple exponential model, in which the + average drive lifetime is specified by the 'drive_lifetime' parameter + (default is 8 years). + + The second step is to calculate a 'transition matrix': a table of + probabilities that shows, given A shares at the start of the delta, what + the chances are of having B shares left at the end of the delta. The + current code optimistically assumes all drives are independent. A + subclass could override that assumption. + + An additional 'repair matrix' is created to show what happens when the + Checker/Repairer is run. In the simulation, the Checker will be run every + 'check_period' seconds (default is one month), and the Repairer will be + run if it sees fewer than 'R' shares (default 7). + + The third step is to finally run the simulation. An initial probability + vector is created (with a 100% chance of N shares and a 0% chance of + fewer than N shares), then it is multiplied by the transition matrix for + every delta of time. Each time the Checker is to be run, the repair + matrix is multiplied in, and some additional stats are accumulated + (average number of repairs that occur, average number of shares + regenerated per repair). + + The output is a ReliabilityReport instance, which contains a table that + samples the state of the simulation once each 'report_period' seconds + (defaults to 3 months). Each row of this table will contain the + probability vector for one sample period (chance of having X shares, from + 0 to N, at the end of the period). The report will also contain other + information. + + """ + + @classmethod + def run(klass, + drive_lifetime=8*YEAR, + k=3, R=7, N=10, + delta=1*MONTH, + check_period=1*MONTH, + report_period=3*MONTH, + report_span=5*YEAR, + ): + self = klass() + + check_period = check_period-1 + P = self.p_in_period(drive_lifetime, delta) + + decay = self.build_decay_matrix(N, P) + + repair = self.build_repair_matrix(k, N, R) + + #print "DECAY:", decay + #print "OLD-POST-REPAIR:", old_post_repair + #print "NEW-POST-REPAIR:", decay * repair + #print "REPAIR:", repair + #print "DIFF:", (old_post_repair - decay * repair) + + START = array([0]*N + [1]) + DEAD = array([1]*k + [0]*(1+N-k)) + REPAIRp = array([0]*k + [1]*(R-k) + [0]*(1+N-R)) + REPAIR_newshares = array([0]*k + + [N-i for i in range(k, R)] + + [0]*(1+N-R)) + assert REPAIR_newshares.shape[0] == N+1 + #print "START", START + #print "REPAIRp", REPAIRp + #print "REPAIR_newshares", REPAIR_newshares + + unmaintained_state = START + maintained_state = START + last_check = 0 + last_report = 0 + P_repaired_last_check_period = 0.0 + needed_repairs = [] + needed_new_shares = [] + report = ReliabilityReport() + + for t in range(0, report_span+delta, delta): + # the .A[0] turns the one-row matrix back into an array + unmaintained_state = (unmaintained_state * decay).A[0] + maintained_state = (maintained_state * decay).A[0] + if (t-last_check) > check_period: + last_check = t + # we do a check-and-repair this frequently + need_repair = dot(maintained_state, REPAIRp) + + P_repaired_last_check_period = need_repair + new_shares = dot(maintained_state, REPAIR_newshares) + needed_repairs.append(need_repair) + needed_new_shares.append(new_shares) + + maintained_state = (maintained_state * repair).A[0] + + if (t-last_report) > report_period: + last_report = t + P_dead_unmaintained = dot(unmaintained_state, DEAD) + P_dead_maintained = dot(maintained_state, DEAD) + cumulative_number_of_repairs = sum(needed_repairs) + cumulative_number_of_new_shares = sum(needed_new_shares) + report.add_sample(t, unmaintained_state, maintained_state, + P_repaired_last_check_period, + cumulative_number_of_repairs, + cumulative_number_of_new_shares, + P_dead_unmaintained, P_dead_maintained) + + # record one more sample at the end of the run + P_dead_unmaintained = dot(unmaintained_state, DEAD) + P_dead_maintained = dot(maintained_state, DEAD) + cumulative_number_of_repairs = sum(needed_repairs) + cumulative_number_of_new_shares = sum(needed_new_shares) + report.add_sample(t, unmaintained_state, maintained_state, + P_repaired_last_check_period, + cumulative_number_of_repairs, + cumulative_number_of_new_shares, + P_dead_unmaintained, P_dead_maintained) + + #def yandm(seconds): + # return "%dy.%dm" % (int(seconds/YEAR), int( (seconds%YEAR)/MONTH)) + #needed_repairs_total = sum(needed_repairs) + #needed_new_shares_total = sum(needed_new_shares) + #print "at 2y:" + #print " unmaintained", unmaintained_state + #print " maintained", maintained_state + #print " number of repairs", needed_repairs_total + #print " new shares generated", needed_new_shares_total + #repair_rate_inv = report_span / needed_repairs_total + #print " avg repair rate: once every %s" % yandm(repair_rate_inv) + #print " avg repair download: one share every %s" % yandm(repair_rate_inv/k) + #print " avg repair upload: one share every %s" % yandm(report_span / needed_new_shares_total) + + return report + + def p_in_period(self, avg_lifetime, period): + """Given an average lifetime of a disk (using an exponential model), + what is the chance that a live disk will survive the next 'period' + seconds?""" + + # eg p_in_period(8*YEAR, MONTH) = 98.94% + return math.exp(-1.0*period/avg_lifetime) + + def build_decay_matrix(self, N, P): + """Return a decay matrix. decay[start_shares][end_shares] is the + conditional probability of finishing with end_shares, given that we + started with start_shares.""" + decay_rows = [] + decay_rows.append( [0.0]*(N+1) ) + for start_shares in range(1, (N+1)): + end_shares = self.build_decay_row(start_shares, P) + decay_row = end_shares + [0.0] * (N-start_shares) + assert len(decay_row) == (N+1), len(decay_row) + decay_rows.append(decay_row) + + decay = matrix(decay_rows) + return decay + + def build_decay_row(self, start_shares, P): + """Return a decay row 'end_shares'. end_shares[i] is the chance that + we finish with i shares, given that we started with start_shares, for + all i between 0 and start_shares, inclusive. This implementation + assumes that all shares are independent (IID), but a more complex + model could incorporate inter-share failure correlations like having + two shares on the same server.""" + end_shares = statistics.binomial_distribution_pmf(start_shares, P) + return end_shares + + def build_repair_matrix(self, k, N, R): + """Return a repair matrix. repair[start][end]: is the conditional + probability of the repairer finishing with 'end' shares, given that + it began with 'start' shares (repair if fewer than R shares). The + repairer's behavior is deterministic, so all values in this matrix + are either 0 or 1. This matrix should be applied *after* the decay + matrix.""" + new_repair_rows = [] + for start_shares in range(0, N+1): + new_repair_row = [0] * (N+1) + if start_shares < k: + new_repair_row[start_shares] = 1 + elif start_shares < R: + new_repair_row[N] = 1 + else: + new_repair_row[start_shares] = 1 + new_repair_rows.append(new_repair_row) + + repair = matrix(new_repair_rows) + return repair + +class ReliabilityReport: + def __init__(self): + self.samples = [] + + def add_sample(self, when, unmaintained_shareprobs, maintained_shareprobs, + P_repaired_last_check_period, + cumulative_number_of_repairs, + cumulative_number_of_new_shares, + P_dead_unmaintained, P_dead_maintained): + """ + when: the timestamp at the end of the report period + unmaintained_shareprobs: a vector of probabilities, element[S] + is the chance that there are S shares + left at the end of the report period. + This tracks what happens if no repair + is ever done. + maintained_shareprobs: same, but for 'maintained' grids, where + check and repair is done at the end + of each check period + P_repaired_last_check_period: a float, with the probability + that a repair was performed + at the end of the most recent + check period. + cumulative_number_of_repairs: a float, with the average number + of repairs that will have been + performed by the end of the + report period + cumulative_number_of_new_shares: a float, with the average number + of new shares that repair proceses + generated by the end of the report + period + P_dead_unmaintained: a float, with the chance that the file will + be unrecoverable at the end of the period + P_dead_maintained: same, but for maintained grids + + """ + row = (when, unmaintained_shareprobs, maintained_shareprobs, + P_repaired_last_check_period, + cumulative_number_of_repairs, + cumulative_number_of_new_shares, + P_dead_unmaintained, P_dead_maintained) + self.samples.append(row) diff --git a/misc/operations_helpers/provisioning/reliability.xhtml b/misc/operations_helpers/provisioning/reliability.xhtml new file mode 100644 index 00000000..f8d93d15 --- /dev/null +++ b/misc/operations_helpers/provisioning/reliability.xhtml @@ -0,0 +1,63 @@ + + + Tahoe-LAFS - Reliability Tool + + + + + + +

Tahoe-LAFS Reliability Tool

+ +

Given certain assumptions, this page calculates probability of share loss +over time, to help make informed decisions about how much redundancy and +repair bandwidth to configure on a Tahoe-LAFS grid.

+ +
+ +

Simulation Results

+ +

At the end of the report span (elapsed time ), the simulated file had the following properties:

+ +
    +
  • Probability of loss (no maintenance): +
  • +
  • Probability of loss (with maintenance): +
  • +
  • Average repair frequency: + once every secs
  • +
  • Average shares generated per repair: +
  • +
+ +

This table shows how the following properties change over time:

+
    +
  • P_repair: the chance that a repair was performed in the most recent + check period.
  • +
  • P_dead (unmaintained): the chance that the file will be unrecoverable + without periodic check+repair
  • +
  • P_dead (maintained): the chance that the file will be unrecoverable even + with periodic check+repair
  • +
+ +
+ + + + + + + + + + + + + + +
tP_repairP_dead (unmaintained)P_dead (maintained)
no simulation data!
+
+ + + diff --git a/misc/operations_helpers/provisioning/test_provisioning.py b/misc/operations_helpers/provisioning/test_provisioning.py new file mode 100644 index 00000000..71bc6570 --- /dev/null +++ b/misc/operations_helpers/provisioning/test_provisioning.py @@ -0,0 +1,113 @@ + +from twisted.trial import unittest +from allmydata import provisioning +ReliabilityModel = None +try: + from allmydata.reliability import ReliabilityModel +except ImportError: + pass # might not be importable, since it needs NumPy + +from nevow import inevow +from zope.interface import implements + +class MyRequest: + implements(inevow.IRequest) + pass + +class Provisioning(unittest.TestCase): + def getarg(self, name, astype=int): + if name in self.fields: + return astype(self.fields[name]) + return None + + def test_load(self): + pt = provisioning.ProvisioningTool() + self.fields = {} + #r = MyRequest() + #r.fields = self.fields + #ctx = RequestContext() + #unfilled = pt.renderSynchronously(ctx) + lots_of_stan = pt.do_forms(self.getarg) + self.failUnless(lots_of_stan is not None) + + self.fields = {'filled': True, + "num_users": 50e3, + "files_per_user": 1000, + "space_per_user": 1e9, + "sharing_ratio": 1.0, + "encoding_parameters": "3-of-10-5", + "num_servers": 30, + "ownership_mode": "A", + "download_rate": 100, + "upload_rate": 10, + "delete_rate": 10, + "lease_timer": 7, + } + #filled = pt.renderSynchronously(ctx) + more_stan = pt.do_forms(self.getarg) + self.failUnless(more_stan is not None) + + # trigger the wraparound configuration + self.fields["num_servers"] = 5 + #filled = pt.renderSynchronously(ctx) + more_stan = pt.do_forms(self.getarg) + + # and other ownership modes + self.fields["ownership_mode"] = "B" + more_stan = pt.do_forms(self.getarg) + self.fields["ownership_mode"] = "E" + more_stan = pt.do_forms(self.getarg) + + def test_provisioning_math(self): + self.failUnlessEqual(provisioning.binomial(10, 0), 1) + self.failUnlessEqual(provisioning.binomial(10, 1), 10) + self.failUnlessEqual(provisioning.binomial(10, 2), 45) + self.failUnlessEqual(provisioning.binomial(10, 9), 10) + self.failUnlessEqual(provisioning.binomial(10, 10), 1) + +DAY=24*60*60 +MONTH=31*DAY +YEAR=365*DAY + +class Reliability(unittest.TestCase): + def test_basic(self): + if ReliabilityModel is None: + raise unittest.SkipTest("reliability model requires NumPy") + + # test that numpy math works the way I think it does + import numpy + decay = numpy.matrix([[1,0,0], + [.1,.9,0], + [.01,.09,.9], + ]) + start = numpy.array([0,0,1]) + g2 = (start * decay).A[0] + self.failUnlessEqual(repr(g2), repr(numpy.array([.01,.09,.9]))) + g3 = (g2 * decay).A[0] + self.failUnlessEqual(repr(g3), repr(numpy.array([.028,.162,.81]))) + + # and the dot product + recoverable = numpy.array([0,1,1]) + P_recoverable_g2 = numpy.dot(g2, recoverable) + self.failUnlessAlmostEqual(P_recoverable_g2, .9 + .09) + P_recoverable_g3 = numpy.dot(g3, recoverable) + self.failUnlessAlmostEqual(P_recoverable_g3, .81 + .162) + + r = ReliabilityModel.run(delta=100000, + report_period=3*MONTH, + report_span=5*YEAR) + self.failUnlessEqual(len(r.samples), 20) + + last_row = r.samples[-1] + #print last_row + (when, unmaintained_shareprobs, maintained_shareprobs, + P_repaired_last_check_period, + cumulative_number_of_repairs, + cumulative_number_of_new_shares, + P_dead_unmaintained, P_dead_maintained) = last_row + self.failUnless(isinstance(P_repaired_last_check_period, float)) + self.failUnless(isinstance(P_dead_unmaintained, float)) + self.failUnless(isinstance(P_dead_maintained, float)) + self.failUnlessAlmostEqual(P_dead_unmaintained, 0.033591004555395272) + self.failUnlessAlmostEqual(P_dead_maintained, 3.2983995819177542e-08) + diff --git a/misc/operations_helpers/provisioning/web_reliability.py b/misc/operations_helpers/provisioning/web_reliability.py new file mode 100644 index 00000000..d5d34061 --- /dev/null +++ b/misc/operations_helpers/provisioning/web_reliability.py @@ -0,0 +1,152 @@ + +from nevow import rend, tags as T +reliability = None # might not be usable +try: + from allmydata import reliability # requires NumPy +except ImportError: + pass +from allmydata.web.common import getxmlfile, get_arg + + +DAY=24*60*60 +MONTH=31*DAY +YEAR=365*DAY + +def is_available(): + if reliability: + return True + return False + +def yandm(seconds): + return "%dy.%dm" % (int(seconds/YEAR), int( (seconds%YEAR)/MONTH)) + +class ReliabilityTool(rend.Page): + addSlash = True + docFactory = getxmlfile("reliability.xhtml") + + DEFAULT_PARAMETERS = [ + ("drive_lifetime", "8Y", "time", + "Average drive lifetime"), + ("k", 3, "int", + "Minimum number of shares needed to recover the file"), + ("R", 7, "int", + "Repair threshold: repair will not occur until fewer than R shares " + "are left"), + ("N", 10, "int", + "Total number of shares of the file generated"), + ("delta", "1M", "time", "Amount of time between each simulation step"), + ("check_period", "1M", "time", + "How often to run the checker and repair if fewer than R shares"), + ("report_period", "3M", "time", + "Amount of time between result rows in this report"), + ("report_span", "5Y", "time", + "Total amount of time covered by this report"), + ] + + def parse_time(self, s): + if s.endswith("M"): + return int(s[:-1]) * MONTH + if s.endswith("Y"): + return int(s[:-1]) * YEAR + return int(s) + + def format_time(self, s): + if s%YEAR == 0: + return "%dY" % (s/YEAR) + if s%MONTH == 0: + return "%dM" % (s/MONTH) + return "%d" % s + + def get_parameters(self, ctx): + parameters = {} + for (name,default,argtype,description) in self.DEFAULT_PARAMETERS: + v = get_arg(ctx, name, default) + if argtype == "time": + value = self.parse_time(v) + else: + value = int(v) + parameters[name] = value + return parameters + + def renderHTTP(self, ctx): + self.parameters = self.get_parameters(ctx) + self.results = reliability.ReliabilityModel.run(**self.parameters) + return rend.Page.renderHTTP(self, ctx) + + def make_input(self, name, old_value): + return T.input(name=name, type="text", size="5", + value=self.format_time(old_value)) + + def render_forms(self, ctx, data): + f = T.form(action=".", method="get") + table = [] + for (name,default_value,argtype,description) in self.DEFAULT_PARAMETERS: + old_value = self.parameters[name] + i = self.make_input(name, old_value) + table.append(T.tr[T.td[name+":"], T.td[i], T.td[description]]) + go = T.input(type="submit", value="Recompute") + return [T.h2["Simulation Parameters:"], + f[T.table[table], go], + ] + + def data_simulation_table(self, ctx, data): + for row in self.results.samples: + yield row + + def render_simulation_row(self, ctx, row): + (when, unmaintained_shareprobs, maintained_shareprobs, + P_repaired_last_check_period, + cumulative_number_of_repairs, + cumulative_number_of_new_shares, + P_dead_unmaintained, P_dead_maintained) = row + ctx.fillSlots("t", yandm(when)) + ctx.fillSlots("P_repair", "%.6f" % P_repaired_last_check_period) + ctx.fillSlots("P_dead_unmaintained", "%.6g" % P_dead_unmaintained) + ctx.fillSlots("P_dead_maintained", "%.6g" % P_dead_maintained) + return ctx.tag + + def render_report_span(self, ctx, row): + (when, unmaintained_shareprobs, maintained_shareprobs, + P_repaired_last_check_period, + cumulative_number_of_repairs, + cumulative_number_of_new_shares, + P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1] + return ctx.tag[yandm(when)] + + def render_P_loss_unmaintained(self, ctx, row): + (when, unmaintained_shareprobs, maintained_shareprobs, + P_repaired_last_check_period, + cumulative_number_of_repairs, + cumulative_number_of_new_shares, + P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1] + return ctx.tag["%.6g (%1.8f%%)" % (P_dead_unmaintained, + 100*P_dead_unmaintained)] + + def render_P_loss_maintained(self, ctx, row): + (when, unmaintained_shareprobs, maintained_shareprobs, + P_repaired_last_check_period, + cumulative_number_of_repairs, + cumulative_number_of_new_shares, + P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1] + return ctx.tag["%.6g (%1.8f%%)" % (P_dead_maintained, + 100*P_dead_maintained)] + + def render_P_repair_rate(self, ctx, row): + (when, unmaintained_shareprobs, maintained_shareprobs, + P_repaired_last_check_period, + cumulative_number_of_repairs, + cumulative_number_of_new_shares, + P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1] + freq = when / cumulative_number_of_repairs + return ctx.tag["%.6g" % freq] + + def render_P_repair_shares(self, ctx, row): + (when, unmaintained_shareprobs, maintained_shareprobs, + P_repaired_last_check_period, + cumulative_number_of_repairs, + cumulative_number_of_new_shares, + P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1] + generated_shares = cumulative_number_of_new_shares / cumulative_number_of_repairs + return ctx.tag["%1.2f" % generated_shares] + + diff --git a/src/allmydata/provisioning.py b/src/allmydata/provisioning.py deleted file mode 100644 index 9d9af0ea..00000000 --- a/src/allmydata/provisioning.py +++ /dev/null @@ -1,772 +0,0 @@ - -from nevow import inevow, rend, tags as T -import math -from allmydata.util import mathutil -from allmydata.web.common import getxmlfile - -# factorial and binomial copied from -# http://mail.python.org/pipermail/python-list/2007-April/435718.html - -def factorial(n): - """factorial(n): return the factorial of the integer n. - factorial(0) = 1 - factorial(n) with n<0 is -factorial(abs(n)) - """ - result = 1 - for i in xrange(1, abs(n)+1): - result *= i - assert n >= 0 - return result - -def binomial(n, k): - assert 0 <= k <= n - if k == 0 or k == n: - return 1 - # calculate n!/k! as one product, avoiding factors that - # just get canceled - P = k+1 - for i in xrange(k+2, n+1): - P *= i - # if you are paranoid: - # C, rem = divmod(P, factorial(n-k)) - # assert rem == 0 - # return C - return P//factorial(n-k) - -class ProvisioningTool(rend.Page): - addSlash = True - docFactory = getxmlfile("provisioning.xhtml") - - def render_forms(self, ctx, data): - req = inevow.IRequest(ctx) - - def getarg(name, astype=int): - if req.method != "POST": - return None - if name in req.fields: - return astype(req.fields[name].value) - return None - return self.do_forms(getarg) - - - def do_forms(self, getarg): - filled = getarg("filled", bool) - - def get_and_set(name, options, default=None, astype=int): - current_value = getarg(name, astype) - i_select = T.select(name=name) - for (count, description) in options: - count = astype(count) - if ((current_value is not None and count == current_value) or - (current_value is None and count == default)): - o = T.option(value=str(count), selected="true")[description] - else: - o = T.option(value=str(count))[description] - i_select = i_select[o] - if current_value is None: - current_value = default - return current_value, i_select - - sections = {} - def add_input(section, text, entry): - if section not in sections: - sections[section] = [] - sections[section].extend([T.div[text, ": ", entry], "\n"]) - - def add_output(section, entry): - if section not in sections: - sections[section] = [] - sections[section].extend([entry, "\n"]) - - def build_section(section): - return T.fieldset[T.legend[section], sections[section]] - - def number(value, suffix=""): - scaling = 1 - if value < 1: - fmt = "%1.2g%s" - elif value < 100: - fmt = "%.1f%s" - elif value < 1000: - fmt = "%d%s" - elif value < 1e6: - fmt = "%.2fk%s"; scaling = 1e3 - elif value < 1e9: - fmt = "%.2fM%s"; scaling = 1e6 - elif value < 1e12: - fmt = "%.2fG%s"; scaling = 1e9 - elif value < 1e15: - fmt = "%.2fT%s"; scaling = 1e12 - elif value < 1e18: - fmt = "%.2fP%s"; scaling = 1e15 - else: - fmt = "huge! %g%s" - return fmt % (value / scaling, suffix) - - user_counts = [(5, "5 users"), - (50, "50 users"), - (200, "200 users"), - (1000, "1k users"), - (10000, "10k users"), - (50000, "50k users"), - (100000, "100k users"), - (500000, "500k users"), - (1000000, "1M users"), - ] - num_users, i_num_users = get_and_set("num_users", user_counts, 50000) - add_input("Users", - "How many users are on this network?", i_num_users) - - files_per_user_counts = [(100, "100 files"), - (1000, "1k files"), - (10000, "10k files"), - (100000, "100k files"), - (1e6, "1M files"), - ] - files_per_user, i_files_per_user = get_and_set("files_per_user", - files_per_user_counts, - 1000) - add_input("Users", - "How many files for each user? (avg)", - i_files_per_user) - - space_per_user_sizes = [(1e6, "1MB"), - (10e6, "10MB"), - (100e6, "100MB"), - (200e6, "200MB"), - (1e9, "1GB"), - (2e9, "2GB"), - (5e9, "5GB"), - (10e9, "10GB"), - (100e9, "100GB"), - (1e12, "1TB"), - (2e12, "2TB"), - (5e12, "5TB"), - ] - # Estimate ~5gb per user as a more realistic case - space_per_user, i_space_per_user = get_and_set("space_per_user", - space_per_user_sizes, - 5e9) - add_input("Users", - "How much data for each user? (avg)", - i_space_per_user) - - sharing_ratios = [(1.0, "1.0x"), - (1.1, "1.1x"), - (2.0, "2.0x"), - ] - sharing_ratio, i_sharing_ratio = get_and_set("sharing_ratio", - sharing_ratios, 1.0, - float) - add_input("Users", - "What is the sharing ratio? (1.0x is no-sharing and" - " no convergence)", i_sharing_ratio) - - # Encoding parameters - encoding_choices = [("3-of-10-5", "3.3x (3-of-10, repair below 5)"), - ("3-of-10-8", "3.3x (3-of-10, repair below 8)"), - ("5-of-10-7", "2x (5-of-10, repair below 7)"), - ("8-of-10-9", "1.25x (8-of-10, repair below 9)"), - ("27-of-30-28", "1.1x (27-of-30, repair below 28"), - ("25-of-100-50", "4x (25-of-100, repair below 50)"), - ] - encoding_parameters, i_encoding_parameters = \ - get_and_set("encoding_parameters", - encoding_choices, "3-of-10-5", str) - encoding_pieces = encoding_parameters.split("-") - k = int(encoding_pieces[0]) - assert encoding_pieces[1] == "of" - n = int(encoding_pieces[2]) - # we repair the file when the number of available shares drops below - # this value - repair_threshold = int(encoding_pieces[3]) - - add_input("Servers", - "What are the default encoding parameters?", - i_encoding_parameters) - - # Server info - num_server_choices = [ (5, "5 servers"), - (10, "10 servers"), - (15, "15 servers"), - (30, "30 servers"), - (50, "50 servers"), - (100, "100 servers"), - (200, "200 servers"), - (300, "300 servers"), - (500, "500 servers"), - (1000, "1k servers"), - (2000, "2k servers"), - (5000, "5k servers"), - (10e3, "10k servers"), - (100e3, "100k servers"), - (1e6, "1M servers"), - ] - num_servers, i_num_servers = \ - get_and_set("num_servers", num_server_choices, 30, int) - add_input("Servers", - "How many servers are there?", i_num_servers) - - # availability is measured in dBA = -dBF, where 0dBF is 100% failure, - # 10dBF is 10% failure, 20dBF is 1% failure, etc - server_dBA_choices = [ (10, "90% [10dBA] (2.4hr/day)"), - (13, "95% [13dBA] (1.2hr/day)"), - (20, "99% [20dBA] (14min/day or 3.5days/year)"), - (23, "99.5% [23dBA] (7min/day or 1.75days/year)"), - (30, "99.9% [30dBA] (87sec/day or 9hours/year)"), - (40, "99.99% [40dBA] (60sec/week or 53min/year)"), - (50, "99.999% [50dBA] (5min per year)"), - ] - server_dBA, i_server_availability = \ - get_and_set("server_availability", - server_dBA_choices, - 20, int) - add_input("Servers", - "What is the server availability?", i_server_availability) - - drive_MTBF_choices = [ (40, "40,000 Hours"), - ] - drive_MTBF, i_drive_MTBF = \ - get_and_set("drive_MTBF", drive_MTBF_choices, 40, int) - add_input("Drives", - "What is the hard drive MTBF?", i_drive_MTBF) - # http://www.tgdaily.com/content/view/30990/113/ - # http://labs.google.com/papers/disk_failures.pdf - # google sees: - # 1.7% of the drives they replaced were 0-1 years old - # 8% of the drives they repalced were 1-2 years old - # 8.6% were 2-3 years old - # 6% were 3-4 years old, about 8% were 4-5 years old - - drive_size_choices = [ (100, "100 GB"), - (250, "250 GB"), - (500, "500 GB"), - (750, "750 GB"), - (1000, "1000 GB"), - (2000, "2000 GB"), - (3000, "3000 GB"), - ] - drive_size, i_drive_size = \ - get_and_set("drive_size", drive_size_choices, 3000, int) - drive_size = drive_size * 1e9 - add_input("Drives", - "What is the capacity of each hard drive?", i_drive_size) - drive_failure_model_choices = [ ("E", "Exponential"), - ("U", "Uniform"), - ] - drive_failure_model, i_drive_failure_model = \ - get_and_set("drive_failure_model", - drive_failure_model_choices, - "E", str) - add_input("Drives", - "How should we model drive failures?", i_drive_failure_model) - - # drive_failure_rate is in failures per second - if drive_failure_model == "E": - drive_failure_rate = 1.0 / (drive_MTBF * 1000 * 3600) - else: - drive_failure_rate = 0.5 / (drive_MTBF * 1000 * 3600) - - # deletion/gc/ownership mode - ownership_choices = [ ("A", "no deletion, no gc, no owners"), - ("B", "deletion, no gc, no owners"), - ("C", "deletion, share timers, no owners"), - ("D", "deletion, no gc, yes owners"), - ("E", "deletion, owner timers"), - ] - ownership_mode, i_ownership_mode = \ - get_and_set("ownership_mode", ownership_choices, - "A", str) - add_input("Servers", - "What is the ownership mode?", i_ownership_mode) - - # client access behavior - access_rates = [ (1, "one file per day"), - (10, "10 files per day"), - (100, "100 files per day"), - (1000, "1k files per day"), - (10e3, "10k files per day"), - (100e3, "100k files per day"), - ] - download_files_per_day, i_download_rate = \ - get_and_set("download_rate", access_rates, - 100, int) - add_input("Users", - "How many files are downloaded per day?", i_download_rate) - download_rate = 1.0 * download_files_per_day / (24*60*60) - - upload_files_per_day, i_upload_rate = \ - get_and_set("upload_rate", access_rates, - 10, int) - add_input("Users", - "How many files are uploaded per day?", i_upload_rate) - upload_rate = 1.0 * upload_files_per_day / (24*60*60) - - delete_files_per_day, i_delete_rate = \ - get_and_set("delete_rate", access_rates, - 10, int) - add_input("Users", - "How many files are deleted per day?", i_delete_rate) - delete_rate = 1.0 * delete_files_per_day / (24*60*60) - - - # the value is in days - lease_timers = [ (1, "one refresh per day"), - (7, "one refresh per week"), - ] - lease_timer, i_lease = \ - get_and_set("lease_timer", lease_timers, - 7, int) - add_input("Users", - "How frequently do clients refresh files or accounts? " - "(if necessary)", - i_lease) - seconds_per_lease = 24*60*60*lease_timer - - check_timer_choices = [ (1, "every week"), - (4, "every month"), - (8, "every two months"), - (16, "every four months"), - ] - check_timer, i_check_timer = \ - get_and_set("check_timer", check_timer_choices, 4, int) - add_input("Users", - "How frequently should we check on each file?", - i_check_timer) - file_check_interval = check_timer * 7 * 24 * 3600 - - - if filled: - add_output("Users", T.div["Total users: %s" % number(num_users)]) - add_output("Users", - T.div["Files per user: %s" % number(files_per_user)]) - file_size = 1.0 * space_per_user / files_per_user - add_output("Users", - T.div["Average file size: ", number(file_size)]) - total_files = num_users * files_per_user / sharing_ratio - - add_output("Grid", - T.div["Total number of files in grid: ", - number(total_files)]) - total_space = num_users * space_per_user / sharing_ratio - add_output("Grid", - T.div["Total volume of plaintext in grid: ", - number(total_space, "B")]) - - total_shares = n * total_files - add_output("Grid", - T.div["Total shares in grid: ", number(total_shares)]) - expansion = float(n) / float(k) - - total_usage = expansion * total_space - add_output("Grid", - T.div["Share data in grid: ", number(total_usage, "B")]) - - if n > num_servers: - # silly configuration, causes Tahoe2 to wrap and put multiple - # shares on some servers. - add_output("Servers", - T.div["non-ideal: more shares than servers" - " (n=%d, servers=%d)" % (n, num_servers)]) - # every file has at least one share on every server - buckets_per_server = total_files - shares_per_server = total_files * ((1.0 * n) / num_servers) - else: - # if nobody is full, then no lease requests will be turned - # down for lack of space, and no two shares for the same file - # will share a server. Therefore the chance that any given - # file has a share on any given server is n/num_servers. - buckets_per_server = total_files * ((1.0 * n) / num_servers) - # since each such represented file only puts one share on a - # server, the total number of shares per server is the same. - shares_per_server = buckets_per_server - add_output("Servers", - T.div["Buckets per server: ", - number(buckets_per_server)]) - add_output("Servers", - T.div["Shares per server: ", - number(shares_per_server)]) - - # how much space is used on the storage servers for the shares? - # the share data itself - share_data_per_server = total_usage / num_servers - add_output("Servers", - T.div["Share data per server: ", - number(share_data_per_server, "B")]) - # this is determined empirically. H=hashsize=32, for a one-segment - # file and 3-of-10 encoding - share_validation_per_server = 266 * shares_per_server - # this could be 423*buckets_per_server, if we moved the URI - # extension into a separate file, but that would actually consume - # *more* space (minimum filesize is 4KiB), unless we moved all - # shares for a given bucket into a single file. - share_uri_extension_per_server = 423 * shares_per_server - - # ownership mode adds per-bucket data - H = 32 # depends upon the desired security of delete/refresh caps - # bucket_lease_size is the amount of data needed to keep track of - # the delete/refresh caps for each bucket. - bucket_lease_size = 0 - client_bucket_refresh_rate = 0 - owner_table_size = 0 - if ownership_mode in ("B", "C", "D", "E"): - bucket_lease_size = sharing_ratio * 1.0 * H - if ownership_mode in ("B", "C"): - # refreshes per second per client - client_bucket_refresh_rate = (1.0 * n * files_per_user / - seconds_per_lease) - add_output("Users", - T.div["Client share refresh rate (outbound): ", - number(client_bucket_refresh_rate, "Hz")]) - server_bucket_refresh_rate = (client_bucket_refresh_rate * - num_users / num_servers) - add_output("Servers", - T.div["Server share refresh rate (inbound): ", - number(server_bucket_refresh_rate, "Hz")]) - if ownership_mode in ("D", "E"): - # each server must maintain a bidirectional mapping from - # buckets to owners. One way to implement this would be to - # put a list of four-byte owner numbers into each bucket, and - # a list of four-byte share numbers into each owner (although - # of course we'd really just throw it into a database and let - # the experts take care of the details). - owner_table_size = 2*(buckets_per_server * sharing_ratio * 4) - - if ownership_mode in ("E",): - # in this mode, clients must refresh one timer per server - client_account_refresh_rate = (1.0 * num_servers / - seconds_per_lease) - add_output("Users", - T.div["Client account refresh rate (outbound): ", - number(client_account_refresh_rate, "Hz")]) - server_account_refresh_rate = (client_account_refresh_rate * - num_users / num_servers) - add_output("Servers", - T.div["Server account refresh rate (inbound): ", - number(server_account_refresh_rate, "Hz")]) - - # TODO: buckets vs shares here is a bit wonky, but in - # non-wrapping grids it shouldn't matter - share_lease_per_server = bucket_lease_size * buckets_per_server - share_ownertable_per_server = owner_table_size - - share_space_per_server = (share_data_per_server + - share_validation_per_server + - share_uri_extension_per_server + - share_lease_per_server + - share_ownertable_per_server) - add_output("Servers", - T.div["Share space per server: ", - number(share_space_per_server, "B"), - " (data ", - number(share_data_per_server, "B"), - ", validation ", - number(share_validation_per_server, "B"), - ", UEB ", - number(share_uri_extension_per_server, "B"), - ", lease ", - number(share_lease_per_server, "B"), - ", ownertable ", - number(share_ownertable_per_server, "B"), - ")", - ]) - - - # rates - client_download_share_rate = download_rate * k - client_download_byte_rate = download_rate * file_size - add_output("Users", - T.div["download rate: shares = ", - number(client_download_share_rate, "Hz"), - " , bytes = ", - number(client_download_byte_rate, "Bps"), - ]) - total_file_check_rate = 1.0 * total_files / file_check_interval - client_check_share_rate = total_file_check_rate / num_users - add_output("Users", - T.div["file check rate: shares = ", - number(client_check_share_rate, "Hz"), - " (interval = %s)" % - number(1 / client_check_share_rate, "s"), - ]) - - client_upload_share_rate = upload_rate * n - # TODO: doesn't include overhead - client_upload_byte_rate = upload_rate * file_size * expansion - add_output("Users", - T.div["upload rate: shares = ", - number(client_upload_share_rate, "Hz"), - " , bytes = ", - number(client_upload_byte_rate, "Bps"), - ]) - client_delete_share_rate = delete_rate * n - - server_inbound_share_rate = (client_upload_share_rate * - num_users / num_servers) - server_inbound_byte_rate = (client_upload_byte_rate * - num_users / num_servers) - add_output("Servers", - T.div["upload rate (inbound): shares = ", - number(server_inbound_share_rate, "Hz"), - " , bytes = ", - number(server_inbound_byte_rate, "Bps"), - ]) - add_output("Servers", - T.div["share check rate (inbound): ", - number(total_file_check_rate * n / num_servers, - "Hz"), - ]) - - server_share_modify_rate = ((client_upload_share_rate + - client_delete_share_rate) * - num_users / num_servers) - add_output("Servers", - T.div["share modify rate: shares = ", - number(server_share_modify_rate, "Hz"), - ]) - - server_outbound_share_rate = (client_download_share_rate * - num_users / num_servers) - server_outbound_byte_rate = (client_download_byte_rate * - num_users / num_servers) - add_output("Servers", - T.div["download rate (outbound): shares = ", - number(server_outbound_share_rate, "Hz"), - " , bytes = ", - number(server_outbound_byte_rate, "Bps"), - ]) - - - total_share_space = num_servers * share_space_per_server - add_output("Grid", - T.div["Share space consumed: ", - number(total_share_space, "B")]) - add_output("Grid", - T.div[" %% validation: %.2f%%" % - (100.0 * share_validation_per_server / - share_space_per_server)]) - add_output("Grid", - T.div[" %% uri-extension: %.2f%%" % - (100.0 * share_uri_extension_per_server / - share_space_per_server)]) - add_output("Grid", - T.div[" %% lease data: %.2f%%" % - (100.0 * share_lease_per_server / - share_space_per_server)]) - add_output("Grid", - T.div[" %% owner data: %.2f%%" % - (100.0 * share_ownertable_per_server / - share_space_per_server)]) - add_output("Grid", - T.div[" %% share data: %.2f%%" % - (100.0 * share_data_per_server / - share_space_per_server)]) - add_output("Grid", - T.div["file check rate: ", - number(total_file_check_rate, - "Hz")]) - - total_drives = max(mathutil.div_ceil(int(total_share_space), - int(drive_size)), - num_servers) - add_output("Drives", - T.div["Total drives: ", number(total_drives), " drives"]) - drives_per_server = mathutil.div_ceil(total_drives, num_servers) - add_output("Servers", - T.div["Drives per server: ", drives_per_server]) - - # costs - if drive_size == 3000 * 1e9: - add_output("Servers", T.div["3000GB drive: $250 each"]) - drive_cost = 250 - else: - add_output("Servers", - T.div[T.b["unknown cost per drive, assuming $100"]]) - drive_cost = 100 - - if drives_per_server <= 4: - add_output("Servers", T.div["1U box with <= 4 drives: $1500"]) - server_cost = 1500 # typical 1U box - elif drives_per_server <= 12: - add_output("Servers", T.div["2U box with <= 12 drives: $2500"]) - server_cost = 2500 # 2U box - else: - add_output("Servers", - T.div[T.b["Note: too many drives per server, " - "assuming $3000"]]) - server_cost = 3000 - - server_capital_cost = (server_cost + drives_per_server * drive_cost) - total_server_cost = float(num_servers * server_capital_cost) - add_output("Servers", T.div["Capital cost per server: $", - server_capital_cost]) - add_output("Grid", T.div["Capital cost for all servers: $", - number(total_server_cost)]) - # $70/Mbps/mo - # $44/server/mo power+space - server_bandwidth = max(server_inbound_byte_rate, - server_outbound_byte_rate) - server_bandwidth_mbps = mathutil.div_ceil(int(server_bandwidth*8), - int(1e6)) - server_monthly_cost = 70*server_bandwidth_mbps + 44 - add_output("Servers", T.div["Monthly cost per server: $", - server_monthly_cost]) - add_output("Users", T.div["Capital cost per user: $", - number(total_server_cost / num_users)]) - - # reliability - any_drive_failure_rate = total_drives * drive_failure_rate - any_drive_MTBF = 1 // any_drive_failure_rate # in seconds - any_drive_MTBF_days = any_drive_MTBF / 86400 - add_output("Drives", - T.div["MTBF (any drive): ", - number(any_drive_MTBF_days), " days"]) - drive_replacement_monthly_cost = (float(drive_cost) - * any_drive_failure_rate - *30*86400) - add_output("Grid", - T.div["Monthly cost of replacing drives: $", - number(drive_replacement_monthly_cost)]) - - total_server_monthly_cost = float(num_servers * server_monthly_cost - + drive_replacement_monthly_cost) - - add_output("Grid", T.div["Monthly cost for all servers: $", - number(total_server_monthly_cost)]) - add_output("Users", - T.div["Monthly cost per user: $", - number(total_server_monthly_cost / num_users)]) - - # availability - file_dBA = self.file_availability(k, n, server_dBA) - user_files_dBA = self.many_files_availability(file_dBA, - files_per_user) - all_files_dBA = self.many_files_availability(file_dBA, total_files) - add_output("Users", - T.div["availability of: ", - "arbitrary file = %d dBA, " % file_dBA, - "all files of user1 = %d dBA, " % user_files_dBA, - "all files in grid = %d dBA" % all_files_dBA, - ], - ) - - time_until_files_lost = (n-k+1) / any_drive_failure_rate - add_output("Grid", - T.div["avg time until files are lost: ", - number(time_until_files_lost, "s"), ", ", - number(time_until_files_lost/86400, " days"), - ]) - - share_data_loss_rate = any_drive_failure_rate * drive_size - add_output("Grid", - T.div["share data loss rate: ", - number(share_data_loss_rate,"Bps")]) - - # the worst-case survival numbers occur when we do a file check - # and the file is just above the threshold for repair (so we - # decide to not repair it). The question is then: what is the - # chance that the file will decay so badly before the next check - # that we can't recover it? The resulting probability is per - # check interval. - # Note that the chances of us getting into this situation are low. - P_disk_failure_during_interval = (drive_failure_rate * - file_check_interval) - disk_failure_dBF = 10*math.log10(P_disk_failure_during_interval) - disk_failure_dBA = -disk_failure_dBF - file_survives_dBA = self.file_availability(k, repair_threshold, - disk_failure_dBA) - user_files_survives_dBA = self.many_files_availability( \ - file_survives_dBA, files_per_user) - all_files_survives_dBA = self.many_files_availability( \ - file_survives_dBA, total_files) - add_output("Users", - T.div["survival of: ", - "arbitrary file = %d dBA, " % file_survives_dBA, - "all files of user1 = %d dBA, " % - user_files_survives_dBA, - "all files in grid = %d dBA" % - all_files_survives_dBA, - " (per worst-case check interval)", - ]) - - - - all_sections = [] - all_sections.append(build_section("Users")) - all_sections.append(build_section("Servers")) - all_sections.append(build_section("Drives")) - if "Grid" in sections: - all_sections.append(build_section("Grid")) - - f = T.form(action=".", method="post", enctype="multipart/form-data") - - if filled: - action = "Recompute" - else: - action = "Compute" - - f = f[T.input(type="hidden", name="filled", value="true"), - T.input(type="submit", value=action), - all_sections, - ] - - try: - from allmydata import reliability - # we import this just to test to see if the page is available - _hush_pyflakes = reliability - del _hush_pyflakes - f = [T.div[T.a(href="../reliability")["Reliability Math"]], f] - except ImportError: - pass - - return f - - def file_availability(self, k, n, server_dBA): - """ - The full formula for the availability of a specific file is:: - - 1 - sum([choose(N,i) * p**i * (1-p)**(N-i)] for i in range(k)]) - - Where choose(N,i) = N! / ( i! * (N-i)! ) . Note that each term of - this summation is the probability that there are exactly 'i' servers - available, and what we're doing is adding up the cases where i is too - low. - - This is a nuisance to calculate at all accurately, especially once N - gets large, and when p is close to unity. So we make an engineering - approximation: if (1-p) is very small, then each [i] term is much - larger than the [i-1] term, and the sum is dominated by the i=k-1 - term. This only works for (1-p) < 10%, and when the choose() function - doesn't rise fast enough to compensate. For high-expansion encodings - (3-of-10, 25-of-100), the choose() function is rising at the same - time as the (1-p)**(N-i) term, so that's not an issue. For - low-expansion encodings (7-of-10, 75-of-100) the two values are - moving in opposite directions, so more care must be taken. - - Note that the p**i term has only a minor effect as long as (1-p)*N is - small, and even then the effect is attenuated by the 1-p term. - """ - - assert server_dBA > 9 # >=90% availability to use the approximation - factor = binomial(n, k-1) - factor_dBA = 10 * math.log10(factor) - exponent = n - k + 1 - file_dBA = server_dBA * exponent - factor_dBA - return file_dBA - - def many_files_availability(self, file_dBA, num_files): - """The probability that 'num_files' independent bernoulli trials will - succeed (i.e. we can recover all files in the grid at any given - moment) is p**num_files . Since p is close to unity, we express in p - in dBA instead, so we can get useful precision on q (=1-p), and then - the formula becomes:: - - P_some_files_unavailable = 1 - (1 - q)**num_files - - That (1-q)**n expands with the usual binomial sequence, 1 - nq + - Xq**2 ... + Xq**n . We use the same approximation as before, since we - know q is close to zero, and we get to ignore all the terms past -nq. - """ - - many_files_dBA = file_dBA - 10 * math.log10(num_files) - return many_files_dBA diff --git a/src/allmydata/reliability.py b/src/allmydata/reliability.py deleted file mode 100644 index a0d60769..00000000 --- a/src/allmydata/reliability.py +++ /dev/null @@ -1,251 +0,0 @@ -#! /usr/bin/python - -import math -from allmydata.util import statistics -from numpy import array, matrix, dot - -DAY=24*60*60 -MONTH=31*DAY -YEAR=365*DAY - -class ReliabilityModel: - """Generate a model of system-wide reliability, given several input - parameters. - - This runs a simulation in which time is quantized down to 'delta' seconds - (default is one month): a smaller delta will result in a more accurate - simulation, but will take longer to run. 'report_span' simulated seconds - will be run. - - The encoding parameters are provided as 'k' (minimum number of shares - needed to recover the file) and 'N' (total number of shares generated). - The default parameters are 3-of-10. - - The first step is to build a probability of individual drive loss during - any given delta. This uses a simple exponential model, in which the - average drive lifetime is specified by the 'drive_lifetime' parameter - (default is 8 years). - - The second step is to calculate a 'transition matrix': a table of - probabilities that shows, given A shares at the start of the delta, what - the chances are of having B shares left at the end of the delta. The - current code optimistically assumes all drives are independent. A - subclass could override that assumption. - - An additional 'repair matrix' is created to show what happens when the - Checker/Repairer is run. In the simulation, the Checker will be run every - 'check_period' seconds (default is one month), and the Repairer will be - run if it sees fewer than 'R' shares (default 7). - - The third step is to finally run the simulation. An initial probability - vector is created (with a 100% chance of N shares and a 0% chance of - fewer than N shares), then it is multiplied by the transition matrix for - every delta of time. Each time the Checker is to be run, the repair - matrix is multiplied in, and some additional stats are accumulated - (average number of repairs that occur, average number of shares - regenerated per repair). - - The output is a ReliabilityReport instance, which contains a table that - samples the state of the simulation once each 'report_period' seconds - (defaults to 3 months). Each row of this table will contain the - probability vector for one sample period (chance of having X shares, from - 0 to N, at the end of the period). The report will also contain other - information. - - """ - - @classmethod - def run(klass, - drive_lifetime=8*YEAR, - k=3, R=7, N=10, - delta=1*MONTH, - check_period=1*MONTH, - report_period=3*MONTH, - report_span=5*YEAR, - ): - self = klass() - - check_period = check_period-1 - P = self.p_in_period(drive_lifetime, delta) - - decay = self.build_decay_matrix(N, P) - - repair = self.build_repair_matrix(k, N, R) - - #print "DECAY:", decay - #print "OLD-POST-REPAIR:", old_post_repair - #print "NEW-POST-REPAIR:", decay * repair - #print "REPAIR:", repair - #print "DIFF:", (old_post_repair - decay * repair) - - START = array([0]*N + [1]) - DEAD = array([1]*k + [0]*(1+N-k)) - REPAIRp = array([0]*k + [1]*(R-k) + [0]*(1+N-R)) - REPAIR_newshares = array([0]*k + - [N-i for i in range(k, R)] + - [0]*(1+N-R)) - assert REPAIR_newshares.shape[0] == N+1 - #print "START", START - #print "REPAIRp", REPAIRp - #print "REPAIR_newshares", REPAIR_newshares - - unmaintained_state = START - maintained_state = START - last_check = 0 - last_report = 0 - P_repaired_last_check_period = 0.0 - needed_repairs = [] - needed_new_shares = [] - report = ReliabilityReport() - - for t in range(0, report_span+delta, delta): - # the .A[0] turns the one-row matrix back into an array - unmaintained_state = (unmaintained_state * decay).A[0] - maintained_state = (maintained_state * decay).A[0] - if (t-last_check) > check_period: - last_check = t - # we do a check-and-repair this frequently - need_repair = dot(maintained_state, REPAIRp) - - P_repaired_last_check_period = need_repair - new_shares = dot(maintained_state, REPAIR_newshares) - needed_repairs.append(need_repair) - needed_new_shares.append(new_shares) - - maintained_state = (maintained_state * repair).A[0] - - if (t-last_report) > report_period: - last_report = t - P_dead_unmaintained = dot(unmaintained_state, DEAD) - P_dead_maintained = dot(maintained_state, DEAD) - cumulative_number_of_repairs = sum(needed_repairs) - cumulative_number_of_new_shares = sum(needed_new_shares) - report.add_sample(t, unmaintained_state, maintained_state, - P_repaired_last_check_period, - cumulative_number_of_repairs, - cumulative_number_of_new_shares, - P_dead_unmaintained, P_dead_maintained) - - # record one more sample at the end of the run - P_dead_unmaintained = dot(unmaintained_state, DEAD) - P_dead_maintained = dot(maintained_state, DEAD) - cumulative_number_of_repairs = sum(needed_repairs) - cumulative_number_of_new_shares = sum(needed_new_shares) - report.add_sample(t, unmaintained_state, maintained_state, - P_repaired_last_check_period, - cumulative_number_of_repairs, - cumulative_number_of_new_shares, - P_dead_unmaintained, P_dead_maintained) - - #def yandm(seconds): - # return "%dy.%dm" % (int(seconds/YEAR), int( (seconds%YEAR)/MONTH)) - #needed_repairs_total = sum(needed_repairs) - #needed_new_shares_total = sum(needed_new_shares) - #print "at 2y:" - #print " unmaintained", unmaintained_state - #print " maintained", maintained_state - #print " number of repairs", needed_repairs_total - #print " new shares generated", needed_new_shares_total - #repair_rate_inv = report_span / needed_repairs_total - #print " avg repair rate: once every %s" % yandm(repair_rate_inv) - #print " avg repair download: one share every %s" % yandm(repair_rate_inv/k) - #print " avg repair upload: one share every %s" % yandm(report_span / needed_new_shares_total) - - return report - - def p_in_period(self, avg_lifetime, period): - """Given an average lifetime of a disk (using an exponential model), - what is the chance that a live disk will survive the next 'period' - seconds?""" - - # eg p_in_period(8*YEAR, MONTH) = 98.94% - return math.exp(-1.0*period/avg_lifetime) - - def build_decay_matrix(self, N, P): - """Return a decay matrix. decay[start_shares][end_shares] is the - conditional probability of finishing with end_shares, given that we - started with start_shares.""" - decay_rows = [] - decay_rows.append( [0.0]*(N+1) ) - for start_shares in range(1, (N+1)): - end_shares = self.build_decay_row(start_shares, P) - decay_row = end_shares + [0.0] * (N-start_shares) - assert len(decay_row) == (N+1), len(decay_row) - decay_rows.append(decay_row) - - decay = matrix(decay_rows) - return decay - - def build_decay_row(self, start_shares, P): - """Return a decay row 'end_shares'. end_shares[i] is the chance that - we finish with i shares, given that we started with start_shares, for - all i between 0 and start_shares, inclusive. This implementation - assumes that all shares are independent (IID), but a more complex - model could incorporate inter-share failure correlations like having - two shares on the same server.""" - end_shares = statistics.binomial_distribution_pmf(start_shares, P) - return end_shares - - def build_repair_matrix(self, k, N, R): - """Return a repair matrix. repair[start][end]: is the conditional - probability of the repairer finishing with 'end' shares, given that - it began with 'start' shares (repair if fewer than R shares). The - repairer's behavior is deterministic, so all values in this matrix - are either 0 or 1. This matrix should be applied *after* the decay - matrix.""" - new_repair_rows = [] - for start_shares in range(0, N+1): - new_repair_row = [0] * (N+1) - if start_shares < k: - new_repair_row[start_shares] = 1 - elif start_shares < R: - new_repair_row[N] = 1 - else: - new_repair_row[start_shares] = 1 - new_repair_rows.append(new_repair_row) - - repair = matrix(new_repair_rows) - return repair - -class ReliabilityReport: - def __init__(self): - self.samples = [] - - def add_sample(self, when, unmaintained_shareprobs, maintained_shareprobs, - P_repaired_last_check_period, - cumulative_number_of_repairs, - cumulative_number_of_new_shares, - P_dead_unmaintained, P_dead_maintained): - """ - when: the timestamp at the end of the report period - unmaintained_shareprobs: a vector of probabilities, element[S] - is the chance that there are S shares - left at the end of the report period. - This tracks what happens if no repair - is ever done. - maintained_shareprobs: same, but for 'maintained' grids, where - check and repair is done at the end - of each check period - P_repaired_last_check_period: a float, with the probability - that a repair was performed - at the end of the most recent - check period. - cumulative_number_of_repairs: a float, with the average number - of repairs that will have been - performed by the end of the - report period - cumulative_number_of_new_shares: a float, with the average number - of new shares that repair proceses - generated by the end of the report - period - P_dead_unmaintained: a float, with the chance that the file will - be unrecoverable at the end of the period - P_dead_maintained: same, but for maintained grids - - """ - row = (when, unmaintained_shareprobs, maintained_shareprobs, - P_repaired_last_check_period, - cumulative_number_of_repairs, - cumulative_number_of_new_shares, - P_dead_unmaintained, P_dead_maintained) - self.samples.append(row) diff --git a/src/allmydata/test/test_provisioning.py b/src/allmydata/test/test_provisioning.py deleted file mode 100644 index 71bc6570..00000000 --- a/src/allmydata/test/test_provisioning.py +++ /dev/null @@ -1,113 +0,0 @@ - -from twisted.trial import unittest -from allmydata import provisioning -ReliabilityModel = None -try: - from allmydata.reliability import ReliabilityModel -except ImportError: - pass # might not be importable, since it needs NumPy - -from nevow import inevow -from zope.interface import implements - -class MyRequest: - implements(inevow.IRequest) - pass - -class Provisioning(unittest.TestCase): - def getarg(self, name, astype=int): - if name in self.fields: - return astype(self.fields[name]) - return None - - def test_load(self): - pt = provisioning.ProvisioningTool() - self.fields = {} - #r = MyRequest() - #r.fields = self.fields - #ctx = RequestContext() - #unfilled = pt.renderSynchronously(ctx) - lots_of_stan = pt.do_forms(self.getarg) - self.failUnless(lots_of_stan is not None) - - self.fields = {'filled': True, - "num_users": 50e3, - "files_per_user": 1000, - "space_per_user": 1e9, - "sharing_ratio": 1.0, - "encoding_parameters": "3-of-10-5", - "num_servers": 30, - "ownership_mode": "A", - "download_rate": 100, - "upload_rate": 10, - "delete_rate": 10, - "lease_timer": 7, - } - #filled = pt.renderSynchronously(ctx) - more_stan = pt.do_forms(self.getarg) - self.failUnless(more_stan is not None) - - # trigger the wraparound configuration - self.fields["num_servers"] = 5 - #filled = pt.renderSynchronously(ctx) - more_stan = pt.do_forms(self.getarg) - - # and other ownership modes - self.fields["ownership_mode"] = "B" - more_stan = pt.do_forms(self.getarg) - self.fields["ownership_mode"] = "E" - more_stan = pt.do_forms(self.getarg) - - def test_provisioning_math(self): - self.failUnlessEqual(provisioning.binomial(10, 0), 1) - self.failUnlessEqual(provisioning.binomial(10, 1), 10) - self.failUnlessEqual(provisioning.binomial(10, 2), 45) - self.failUnlessEqual(provisioning.binomial(10, 9), 10) - self.failUnlessEqual(provisioning.binomial(10, 10), 1) - -DAY=24*60*60 -MONTH=31*DAY -YEAR=365*DAY - -class Reliability(unittest.TestCase): - def test_basic(self): - if ReliabilityModel is None: - raise unittest.SkipTest("reliability model requires NumPy") - - # test that numpy math works the way I think it does - import numpy - decay = numpy.matrix([[1,0,0], - [.1,.9,0], - [.01,.09,.9], - ]) - start = numpy.array([0,0,1]) - g2 = (start * decay).A[0] - self.failUnlessEqual(repr(g2), repr(numpy.array([.01,.09,.9]))) - g3 = (g2 * decay).A[0] - self.failUnlessEqual(repr(g3), repr(numpy.array([.028,.162,.81]))) - - # and the dot product - recoverable = numpy.array([0,1,1]) - P_recoverable_g2 = numpy.dot(g2, recoverable) - self.failUnlessAlmostEqual(P_recoverable_g2, .9 + .09) - P_recoverable_g3 = numpy.dot(g3, recoverable) - self.failUnlessAlmostEqual(P_recoverable_g3, .81 + .162) - - r = ReliabilityModel.run(delta=100000, - report_period=3*MONTH, - report_span=5*YEAR) - self.failUnlessEqual(len(r.samples), 20) - - last_row = r.samples[-1] - #print last_row - (when, unmaintained_shareprobs, maintained_shareprobs, - P_repaired_last_check_period, - cumulative_number_of_repairs, - cumulative_number_of_new_shares, - P_dead_unmaintained, P_dead_maintained) = last_row - self.failUnless(isinstance(P_repaired_last_check_period, float)) - self.failUnless(isinstance(P_dead_unmaintained, float)) - self.failUnless(isinstance(P_dead_maintained, float)) - self.failUnlessAlmostEqual(P_dead_unmaintained, 0.033591004555395272) - self.failUnlessAlmostEqual(P_dead_maintained, 3.2983995819177542e-08) - diff --git a/src/allmydata/test/test_web.py b/src/allmydata/test/test_web.py index d918bc15..edafd242 100644 --- a/src/allmydata/test/test_web.py +++ b/src/allmydata/test/test_web.py @@ -512,90 +512,6 @@ class Web(WebMixin, WebErrorMixin, testutil.StallMixin, testutil.ReallyEqualMixi d.addCallback(_check) return d - def test_provisioning(self): - d = self.GET("/provisioning/") - def _check(res): - self.failUnlessIn('Provisioning Tool', res) - self.failUnlessIn(FAVICON_MARKUP, res) - - fields = {'filled': True, - "num_users": int(50e3), - "files_per_user": 1000, - "space_per_user": int(1e9), - "sharing_ratio": 1.0, - "encoding_parameters": "3-of-10-5", - "num_servers": 30, - "ownership_mode": "A", - "download_rate": 100, - "upload_rate": 10, - "delete_rate": 10, - "lease_timer": 7, - } - return self.POST("/provisioning/", **fields) - - d.addCallback(_check) - def _check2(res): - self.failUnlessIn('Provisioning Tool', res) - self.failUnlessIn(FAVICON_MARKUP, res) - self.failUnlessIn("Share space consumed: 167.01TB", res) - - fields = {'filled': True, - "num_users": int(50e6), - "files_per_user": 1000, - "space_per_user": int(5e9), - "sharing_ratio": 1.0, - "encoding_parameters": "25-of-100-50", - "num_servers": 30000, - "ownership_mode": "E", - "drive_failure_model": "U", - "drive_size": 1000, - "download_rate": 1000, - "upload_rate": 100, - "delete_rate": 100, - "lease_timer": 7, - } - return self.POST("/provisioning/", **fields) - d.addCallback(_check2) - def _check3(res): - self.failUnlessIn("Share space consumed: huge!", res) - fields = {'filled': True} - return self.POST("/provisioning/", **fields) - d.addCallback(_check3) - def _check4(res): - self.failUnlessIn("Share space consumed:", res) - d.addCallback(_check4) - return d - - def test_reliability_tool(self): - try: - from allmydata import reliability - _hush_pyflakes = reliability - del _hush_pyflakes - except: - raise unittest.SkipTest("reliability tool requires NumPy") - - d = self.GET("/reliability/") - def _check(res): - self.failUnlessIn('Reliability Tool', res) - fields = {'drive_lifetime': "8Y", - "k": "3", - "R": "7", - "N": "10", - "delta": "100000", - "check_period": "1M", - "report_period": "3M", - "report_span": "5Y", - } - return self.POST("/reliability/", **fields) - - d.addCallback(_check) - def _check2(res): - self.failUnlessIn('Reliability Tool', res) - r = r'Probability of loss \(no maintenance\):\s+0.033591' - self.failUnless(re.search(r, res), res) - d.addCallback(_check2) - return d - def test_status(self): h = self.s.get_history() dl_num = h.list_all_download_statuses()[0].get_counter() diff --git a/src/allmydata/web/provisioning.xhtml b/src/allmydata/web/provisioning.xhtml deleted file mode 100644 index bfa4edb7..00000000 --- a/src/allmydata/web/provisioning.xhtml +++ /dev/null @@ -1,18 +0,0 @@ - - - Tahoe-LAFS - Provisioning Tool - - - - - - -

Tahoe-LAFS Provisioning Tool

- -

This page will help you determine how much disk space and network -bandwidth will be required by various sizes and types of Tahoe-LAFS networks.

- -
- - - diff --git a/src/allmydata/web/reliability.py b/src/allmydata/web/reliability.py deleted file mode 100644 index d5d34061..00000000 --- a/src/allmydata/web/reliability.py +++ /dev/null @@ -1,152 +0,0 @@ - -from nevow import rend, tags as T -reliability = None # might not be usable -try: - from allmydata import reliability # requires NumPy -except ImportError: - pass -from allmydata.web.common import getxmlfile, get_arg - - -DAY=24*60*60 -MONTH=31*DAY -YEAR=365*DAY - -def is_available(): - if reliability: - return True - return False - -def yandm(seconds): - return "%dy.%dm" % (int(seconds/YEAR), int( (seconds%YEAR)/MONTH)) - -class ReliabilityTool(rend.Page): - addSlash = True - docFactory = getxmlfile("reliability.xhtml") - - DEFAULT_PARAMETERS = [ - ("drive_lifetime", "8Y", "time", - "Average drive lifetime"), - ("k", 3, "int", - "Minimum number of shares needed to recover the file"), - ("R", 7, "int", - "Repair threshold: repair will not occur until fewer than R shares " - "are left"), - ("N", 10, "int", - "Total number of shares of the file generated"), - ("delta", "1M", "time", "Amount of time between each simulation step"), - ("check_period", "1M", "time", - "How often to run the checker and repair if fewer than R shares"), - ("report_period", "3M", "time", - "Amount of time between result rows in this report"), - ("report_span", "5Y", "time", - "Total amount of time covered by this report"), - ] - - def parse_time(self, s): - if s.endswith("M"): - return int(s[:-1]) * MONTH - if s.endswith("Y"): - return int(s[:-1]) * YEAR - return int(s) - - def format_time(self, s): - if s%YEAR == 0: - return "%dY" % (s/YEAR) - if s%MONTH == 0: - return "%dM" % (s/MONTH) - return "%d" % s - - def get_parameters(self, ctx): - parameters = {} - for (name,default,argtype,description) in self.DEFAULT_PARAMETERS: - v = get_arg(ctx, name, default) - if argtype == "time": - value = self.parse_time(v) - else: - value = int(v) - parameters[name] = value - return parameters - - def renderHTTP(self, ctx): - self.parameters = self.get_parameters(ctx) - self.results = reliability.ReliabilityModel.run(**self.parameters) - return rend.Page.renderHTTP(self, ctx) - - def make_input(self, name, old_value): - return T.input(name=name, type="text", size="5", - value=self.format_time(old_value)) - - def render_forms(self, ctx, data): - f = T.form(action=".", method="get") - table = [] - for (name,default_value,argtype,description) in self.DEFAULT_PARAMETERS: - old_value = self.parameters[name] - i = self.make_input(name, old_value) - table.append(T.tr[T.td[name+":"], T.td[i], T.td[description]]) - go = T.input(type="submit", value="Recompute") - return [T.h2["Simulation Parameters:"], - f[T.table[table], go], - ] - - def data_simulation_table(self, ctx, data): - for row in self.results.samples: - yield row - - def render_simulation_row(self, ctx, row): - (when, unmaintained_shareprobs, maintained_shareprobs, - P_repaired_last_check_period, - cumulative_number_of_repairs, - cumulative_number_of_new_shares, - P_dead_unmaintained, P_dead_maintained) = row - ctx.fillSlots("t", yandm(when)) - ctx.fillSlots("P_repair", "%.6f" % P_repaired_last_check_period) - ctx.fillSlots("P_dead_unmaintained", "%.6g" % P_dead_unmaintained) - ctx.fillSlots("P_dead_maintained", "%.6g" % P_dead_maintained) - return ctx.tag - - def render_report_span(self, ctx, row): - (when, unmaintained_shareprobs, maintained_shareprobs, - P_repaired_last_check_period, - cumulative_number_of_repairs, - cumulative_number_of_new_shares, - P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1] - return ctx.tag[yandm(when)] - - def render_P_loss_unmaintained(self, ctx, row): - (when, unmaintained_shareprobs, maintained_shareprobs, - P_repaired_last_check_period, - cumulative_number_of_repairs, - cumulative_number_of_new_shares, - P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1] - return ctx.tag["%.6g (%1.8f%%)" % (P_dead_unmaintained, - 100*P_dead_unmaintained)] - - def render_P_loss_maintained(self, ctx, row): - (when, unmaintained_shareprobs, maintained_shareprobs, - P_repaired_last_check_period, - cumulative_number_of_repairs, - cumulative_number_of_new_shares, - P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1] - return ctx.tag["%.6g (%1.8f%%)" % (P_dead_maintained, - 100*P_dead_maintained)] - - def render_P_repair_rate(self, ctx, row): - (when, unmaintained_shareprobs, maintained_shareprobs, - P_repaired_last_check_period, - cumulative_number_of_repairs, - cumulative_number_of_new_shares, - P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1] - freq = when / cumulative_number_of_repairs - return ctx.tag["%.6g" % freq] - - def render_P_repair_shares(self, ctx, row): - (when, unmaintained_shareprobs, maintained_shareprobs, - P_repaired_last_check_period, - cumulative_number_of_repairs, - cumulative_number_of_new_shares, - P_dead_unmaintained, P_dead_maintained) = self.results.samples[-1] - generated_shares = cumulative_number_of_new_shares / cumulative_number_of_repairs - return ctx.tag["%1.2f" % generated_shares] - - diff --git a/src/allmydata/web/reliability.xhtml b/src/allmydata/web/reliability.xhtml deleted file mode 100644 index f8d93d15..00000000 --- a/src/allmydata/web/reliability.xhtml +++ /dev/null @@ -1,63 +0,0 @@ - - - Tahoe-LAFS - Reliability Tool - - - - - - -

Tahoe-LAFS Reliability Tool

- -

Given certain assumptions, this page calculates probability of share loss -over time, to help make informed decisions about how much redundancy and -repair bandwidth to configure on a Tahoe-LAFS grid.

- -
- -

Simulation Results

- -

At the end of the report span (elapsed time ), the simulated file had the following properties:

- -
    -
  • Probability of loss (no maintenance): -
  • -
  • Probability of loss (with maintenance): -
  • -
  • Average repair frequency: - once every secs
  • -
  • Average shares generated per repair: -
  • -
- -

This table shows how the following properties change over time:

-
    -
  • P_repair: the chance that a repair was performed in the most recent - check period.
  • -
  • P_dead (unmaintained): the chance that the file will be unrecoverable - without periodic check+repair
  • -
  • P_dead (maintained): the chance that the file will be unrecoverable even - with periodic check+repair
  • -
- -
- - - - - - - - - - - - - - -
tP_repairP_dead (unmaintained)P_dead (maintained)
no simulation data!
-
- - - diff --git a/src/allmydata/web/root.py b/src/allmydata/web/root.py index 615f98d1..47793201 100644 --- a/src/allmydata/web/root.py +++ b/src/allmydata/web/root.py @@ -2,18 +2,17 @@ import time, os from twisted.internet import address from twisted.web import http -from nevow import rend, url, loaders, tags as T +from nevow import rend, url, tags as T from nevow.inevow import IRequest from nevow.static import File as nevow_File # TODO: merge with static.File? from nevow.util import resource_filename import allmydata # to display import path from allmydata import get_package_versions_string -from allmydata import provisioning from allmydata.util import idlib, log from allmydata.interfaces import IFileNode from allmydata.web import filenode, directory, unlinked, status, operations -from allmydata.web import reliability, storage +from allmydata.web import storage from allmydata.web.common import abbreviate_size, getxmlfile, WebError, \ get_arg, RenderMixin, get_format, get_mutable_type @@ -126,20 +125,6 @@ class IncidentReporter(RenderMixin, rend.Page): req.setHeader("content-type", "text/plain") return "Thank you for your report!" -class NoReliability(rend.Page): - docFactory = loaders.xmlstr('''\ - - - AllMyData - Tahoe - - - -

"Reliability" page not available

-

Please install the python "NumPy" module to enable this page.

- - -''') - SPACE = u"\u00A0"*2 class Root(rend.Page): @@ -175,12 +160,6 @@ class Root(rend.Page): # needs to created on each request return status.HelperStatus(self.client.helper) - child_provisioning = provisioning.ProvisioningTool() - if reliability.is_available(): - child_reliability = reliability.ReliabilityTool() - else: - child_reliability = NoReliability() - child_report_incident = IncidentReporter() #child_server # let's reserve this for storage-server-over-HTTP diff --git a/src/allmydata/web/welcome.xhtml b/src/allmydata/web/welcome.xhtml index b5a191e4..6bf1deba 100644 --- a/src/allmydata/web/welcome.xhtml +++ b/src/allmydata/web/welcome.xhtml @@ -91,9 +91,6 @@
Please visit the Tahoe-LAFS home page for code updates and bug reporting.
-
The provisioning tool and reliability calculator may also be useful.
-