From c7254c5f1d43e6e98cb432c23c65820d03e9f35d Mon Sep 17 00:00:00 2001 From: Brian Warner Date: Mon, 16 Mar 2009 22:10:41 -0700 Subject: [PATCH] GC: add date-cutoff -based expiration, add proposed docs --- docs/proposed/garbage-collection.txt | 280 +++++++++++++++++++++++++ src/allmydata/storage/expirer.py | 49 +++-- src/allmydata/storage/server.py | 11 +- src/allmydata/test/test_storage.py | 177 +++++++++++++++- src/allmydata/web/storage.py | 23 +- src/allmydata/web/storage_status.xhtml | 2 +- 6 files changed, 500 insertions(+), 42 deletions(-) create mode 100644 docs/proposed/garbage-collection.txt diff --git a/docs/proposed/garbage-collection.txt b/docs/proposed/garbage-collection.txt new file mode 100644 index 00000000..51998d67 --- /dev/null +++ b/docs/proposed/garbage-collection.txt @@ -0,0 +1,280 @@ += Garbage Collection in Tahoe = + +When a file or directory in the virtual filesystem is no longer referenced, +the space that its shares occupied on each storage server can be freed, +making room for other shares. Tahoe currently uses a garbage collection +("GC") mechanism to implement this space-reclamation process. Each share has +one or more "leases", which are managed by clients who want the +file/directory to be retained. The storage server accepts each share for a +pre-defined period of time, and is allowed to delete the share if all of the +leases are cancelled or allowed to expire. + +Garbage collection is not enabled by default: storage servers will not delete +shares without being explicitly configured to do so. When GC is enabled, +clients are responsible for renewing their leases on a periodic basis at +least frequently enough to prevent any of the leases from expiring before the +next renewal pass. + +There are several tradeoffs to be considered when choosing the renewal timer +and the lease duration, and there is no single optimal pair of values. See +the "lease-tradeoffs.svg" diagram to get an idea for the tradeoffs involved. +If lease renewal occurs quickly and with 100% reliability, than any renewal +time that is shorter than the lease duration will suffice, but a larger ratio +of duration-over-renewal-time will be more robust in the face of occasional +delays or failures. + +The current recommended values for a small Tahoe grid are to renew the leases +once a week, and to give each lease a duration of 31 days. Renewing leases +can be expected to take about one second per file/directory, depending upon +the number of servers and the network speeds involved. Note that in the +current release, the server code enforces a 31 day lease duration: there is +not yet a way for the client to request a different duration (however the +server can use the "expire.override_lease_duration" configuration setting to +increase or decrease the effective duration to something other than 31 days). + +== Client-side Renewal == + +If all of the files and directories which you care about are reachable from a +single starting point (usually referred to as a "rootcap"), and you store +that rootcap as an alias (via "tahoe create-alias"), then the simplest way to +renew these leases is with the following CLI command: + + tahoe deep-check --add-lease ALIAS: + +This will recursively walk every directory under the given alias and renew +the leases on all files and directories. (You may want to add a --repair flag +to perform repair at the same time). Simply run this command once a week (or +whatever other renewal period your grid recommends) and make sure it +completes successfully. As a side effect, a manifest of all unique files and +directories will be emitted to stdout, as well as a summary of file sizes and +counts. It may be useful to track these statistics over time. + +Note that newly uploaded files (and newly created directories) get an initial +lease too: the --add-lease process is only needed to ensure that all older +objects have up-to-date leases on them. + +For larger systems (such as a commercial grid), a separate "maintenance +daemon" is under development. This daemon will acquire manifests from +rootcaps on a periodic basis, keep track of checker results, manage +lease-addition, and prioritize repair needs, using multiple worker nodes to +perform these jobs in parallel. Eventually, this daemon will be made +appropriate for use by individual users as well, and may be incorporated +directly into the client node. + +== Server Side Expiration == + +Expiration must be explicitly enabled on each storage server, since the +default behavior is to never expire shares. Expiration is enabled by adding +config keys to the "[storage]" section of the tahoe.cfg file (as described +below) and restarting the server node. + +Each lease has two parameters: a create/renew timestamp and a duration. The +timestamp is updated when the share is first uploaded (i.e. the file or +directory is created), and updated again each time the lease is renewed (i.e. +"tahoe check --add-lease" is performed). The duration is currently fixed at +31 days, and the "nominal lease expiration time" is simply $duration seconds +after the $create_renew timestamp. (In a future release of Tahoe, the client +will get to request a specific duration, and the server will accept or reject +the request depending upon its local configuration, so that servers can +achieve better control over their storage obligations). + +The lease-expiration code has two modes of operation. The first is age-based: +leases are expired when their age is greater than their duration. This is the +preferred mode: as long as clients consistently update their leases on a +periodic basis, and that period is shorter than the lease duration, then all +active files and directories will be preserved, and the garbage will +collected in a timely fashion. + +Since there is not yet a way for clients to request a lease duration of other +than 31 days, there is a tahoe.cfg setting to override the duration of all +leases. If, for example, this alternative duration is set to 60 days, then +clients could safely renew their leases with an add-lease operation perhaps +once every 50 days: even though nominally their leases would expire 31 days +after the renewal, the server would not actually expire the leases until 60 +days after renewal. + +The other mode is an absolute-date-cutoff: it compares the create/renew +timestamp against some absolute date, and expires any lease which was not +created or renewed since the cutoff date. If all clients have performed an +add-lease some time after March 20th, you could tell the storage server to +expire all leases that were created or last renewed on March 19th or earlier. +This is most useful if you have a manual (non-periodic) add-lease process. +Note that there is not much point to running a storage server in this mode +for a long period of time: once the lease-checker has examined all shares and +expired whatever it is going to expire, the second and subsequent passes are +not going to find any new leases to remove. + +The tahoe.cfg file uses the following keys to control lease expiration: + +[storage] + +expire.enabled = (boolean, optional) + + If this is True, the storage server will delete shares on which all leases + have expired. Other controls dictate when leases are considered to have + expired. The default is False. + +expire.mode = (string, "age" or "date-cutoff", required if expiration enabled) + + If this string is "age", the age-based expiration scheme is used, and the + "expire.override_lease_duration" setting can be provided to influence the + lease ages. If it is "date-cutoff", the absolute-date-cutoff mode is used, + and the "expire.cutoff_date" setting must be provided to specify the cutoff + date. The mode setting currently has no default: you must provide a value. + + In a future release, this setting is likely to default to "age", but in this + release it was deemed safer to require an explicit mode specification. + +expire.override_lease_duration = (duration string, optional) + + When age-based expiration is in use, a lease will be expired if its + "lease.create_renew" timestamp plus its "lease.duration" time is + earlier/older than the current time. This key, if present, overrides the + duration value for all leases, changing the algorithm from: + + if (lease.create_renew_timestamp + lease.duration) < now: + expire_lease() + + to: + + if (lease.create_renew_timestamp + override_lease_duration) < now: + expire_lease() + + The value of this setting is a "duration string", which is a number of days, + months, or years, followed by a units suffix, and optionally separated by a + space, such as one of the following: + + 7days + 31day + 60 days + 2mo + 3 month + 12 months + 2years + + This key is meant to compensate for the fact that clients do not yet have + the ability to ask for leases that last longer than 31 days. A grid which + wants to use faster or slower GC than a 31-day lease timer permits can use + this parameter to implement it. The current fixed 31-day lease duration + makes the server behave as if "lease.override_lease_duration = 31days" had + been passed. + + This key is only valid when age-based expiration is in use (i.e. when + "expire.mode = age" is used). It will be rejected if date-cutoff expiration + is in use. + +expire.cutoff_date = (date string, required if mode=date-cutoff) + + When date-cutoff expiration is in use, a lease will be expired if its + create/renew timestamp is older than the cutoff date. This string will be a + date in the following format: + + 16-Jan-2009 + 02-Feb-2008 + 25-Dec-2007 + + The actual cutoff time shall be midnight UTC at the beginning of the given + day. Lease timers should naturally be generous enough to not depend upon + differences in timezone: there should be at least a few days between the + last renewal time and the cutoff date. + + This key is only valid when cutoff-based expiration is in use (i.e. when + "expire.mode = date-cutoff"). It will be rejected if age-based expiration is + in use. + +expire.immutable = (boolean, optional) + + If this is False, then immutable shares will never be deleted, even if their + leases have expired. This can be used in special situations to perform GC on + mutable files but not immutable ones. The default is True. + +expire.mutable = (boolean, optional) + + If this is False, then mutable shares will never be deleted, even if their + leases have expired. This can be used in special situations to perform GC on + immutable files but not mutable ones. The default is True. + +== Expiration Progress == + +In the current release, leases are stored as metadata in each share file, and +no separate database is maintained. As a result, checking and expiring leases +on a large server may require multiple reads from each of several million +share files. This process can take a long time and be very disk-intensive, so +a "share crawler" is used. The crawler limits the amount of time looking at +shares to a reasonable percentage of the storage server's overall usage: by +default it uses no more than 10% CPU, and yields to other code after 100ms. A +typical server with 1.1M shares was observed to take 3.5 days to perform this +rate-limited crawl through the whole set of shares, with expiration disabled. +It is expected to take perhaps 4 or 5 days to do the crawl with expiration +turned on. + +The crawler's status is displayed on the "Storage Server Status Page", a web +page dedicated to the storage server. This page resides at $NODEURL/storage, +and there is a link to it from the front "welcome" page. The "Lease +Expiration crawler" section of the status page shows the progress of the +current crawler cycle, expected completion time, amount of space recovered, +and details of how many shares have been examined. + +The crawler's state is persistent: restarting the node will not cause it to +lose significant progress. The state file is located in two files +($BASEDIR/storage/lease_checker.state and lease_checker.history), and the +crawler can be forcibly reset by stopping the node, deleting these two files, +then restarting the node. + +== Future Directions == + +Tahoe's GC mechanism is undergoing significant changes. The global +mark-and-sweep garbage-collection scheme can require considerable network +traffic for large grids, interfering with the bandwidth available for regular +uploads and downloads (and for non-Tahoe users of the network). + +A preferable method might be to have a timer-per-client instead of a +timer-per-lease: the leases would not be expired until/unless the client had +not checked in with the server for a pre-determined duration. This would +reduce the network traffic considerably (one message per week instead of +thousands), but retain the same general failure characteristics. + +In addition, using timers is not fail-safe (from the client's point of view), +in that a client which leaves the network for an extended period of time may +return to discover that all of their files have been garbage-collected. (It +*is* fail-safe from the server's point of view, in that a server is not +obligated to provide disk space in perpetuity to an unresponsive client). It +may be useful to create a "renewal agent" to which a client can pass a list +of renewal-caps: the agent then takes the responsibility for keeping these +leases renewed, so the client can go offline safely. Of course, this requires +a certain amount of coordination: the renewal agent should not be keeping +files alive that the client has actually deleted. The client can send the +renewal-agent a manifest of renewal caps, and each new manifest should +replace the previous set. + +The GC mechanism is also not immediate: a client which deletes a file will +nevertheless be consuming extra disk space (and might be charged or otherwise +held accountable for it) until the ex-file's leases finally expire on their +own. If the client is certain that they've removed their last reference to +the file, they could accelerate the GC process by cancelling their lease. The +current storage server API provides a method to cancel a lease, but the +client must be careful to coordinate with anyone else who might be +referencing the same lease (perhaps a second directory in the same virtual +drive), otherwise they might accidentally remove a lease that should have +been retained. + +In the current release, these leases are each associated with a single "node +secret" (stored in $BASEDIR/private/secret), which is used to generate +renewal- and cancel- secrets for each lease. Two nodes with different secrets +will produce separate leases, and will not be able to renew or cancel each +others' leases. + +Once the Accounting project is in place, leases will be scoped by a +sub-delegatable "account id" instead of a node secret, so clients will be able +to manage multiple leases per file. In addition, servers will be able to +identify which shares are leased by which clients, so that clients can safely +reconcile their idea of which files/directories are active against the +server's list, and explicitly cancel leases on objects that aren't on the +active list. + +By reducing the size of the "lease scope", the coordination problem is made +easier. In general, mark-and-sweep is easier to implement (it requires mere +vigilance, rather than coordination), so unless the space used by deleted +files is not expiring fast enough, the renew/expire timed lease approach is +recommended. + diff --git a/src/allmydata/storage/expirer.py b/src/allmydata/storage/expirer.py index 9a7d13c1..714a63ac 100644 --- a/src/allmydata/storage/expirer.py +++ b/src/allmydata/storage/expirer.py @@ -15,7 +15,7 @@ class LeaseCheckingCrawler(ShareCrawler): status page, including:: Space recovered during this cycle-so-far: - actual (only if expire_leases=True): + actual (only if expiration_enabled=True): num-buckets, num-shares, sum of share sizes, real disk usage ('real disk usage' means we use stat(fn).st_blocks*512 and include any space used by the directory) @@ -37,7 +37,7 @@ class LeaseCheckingCrawler(ShareCrawler): Histogram of leases-per-share: this-cycle-to-date last 10 cycles <-- separate pickle - Histogram of lease ages, buckets = expiration_time/10 + Histogram of lease ages, buckets = 1day cycle-to-date last 10 cycles <-- separate pickle @@ -49,10 +49,16 @@ class LeaseCheckingCrawler(ShareCrawler): minimum_cycle_time = 12*60*60 # not more than twice per day def __init__(self, server, statefile, historyfile, - expire_leases, expiration_time): + expiration_enabled, expiration_mode): self.historyfile = historyfile - self.expire_leases = expire_leases - self.age_limit = expiration_time + self.expiration_enabled = expiration_enabled + self.mode = expiration_mode + if self.mode[0] not in ("age", "date-cutoff"): + raise ValueError("garbage-collection mode '%s' must be 'age' or 'date-cutoff'" % self.mode[0]) + if self.mode[0] == "age": + assert isinstance(expiration_mode[1], int) # seconds + elif self.mode[0] == "date-cutoff": + assert isinstance(expiration_mode[1], int) # seconds-since-epoch ShareCrawler.__init__(self, server, statefile) def add_initial_state(self): @@ -159,10 +165,19 @@ class LeaseCheckingCrawler(ShareCrawler): num_valid_leases_original += 1 # expired-or-not according to our configured age limit - if age < self.age_limit: - num_valid_leases_configured += 1 + if self.mode[0] == "age": + age_limit = self.mode[1] + if age < age_limit: + num_valid_leases_configured += 1 + else: + expired_leases_configured.append(li) else: - expired_leases_configured.append(li) + assert self.mode[0] == "date-cutoff" + date_cutoff = self.mode[1] + if grant_renew_time > date_cutoff: + num_valid_leases_configured += 1 + else: + expired_leases_configured.append(li) so_far = self.state["cycle-to-date"] self.increment(so_far["leases-per-share-histogram"], num_leases, 1) @@ -172,7 +187,7 @@ class LeaseCheckingCrawler(ShareCrawler): would_keep_share = [1, 1, 1] - if self.expire_leases: + if self.expiration_enabled: for li in expired_leases_configured: sf.cancel_lease(li.cancel_secret) @@ -183,7 +198,7 @@ class LeaseCheckingCrawler(ShareCrawler): if num_valid_leases_configured == 0: would_keep_share[1] = 0 self.increment_space("configured-leasetimer", s) - if self.expire_leases: + if self.expiration_enabled: would_keep_share[2] = 0 self.increment_space("actual", s) @@ -211,7 +226,7 @@ class LeaseCheckingCrawler(ShareCrawler): d[k] += delta def add_lease_age_to_histogram(self, age): - bucket_interval = self.age_limit / 10.0 + bucket_interval = 24*60*60 bucket_number = int(age/bucket_interval) bucket_start = bucket_number * bucket_interval bucket_end = bucket_start + bucket_interval @@ -235,8 +250,8 @@ class LeaseCheckingCrawler(ShareCrawler): start = self.state["current-cycle-start-time"] now = time.time() h["cycle-start-finish-times"] = (start, now) - h["expiration-enabled"] = self.expire_leases - h["configured-expiration-time"] = self.age_limit + h["expiration-enabled"] = self.expiration_enabled + h["configured-expiration-mode"] = self.mode s = self.state["cycle-to-date"] @@ -277,7 +292,7 @@ class LeaseCheckingCrawler(ShareCrawler): cycle-to-date: expiration-enabled - configured-expiration-time + configured-expiration-mode lease-age-histogram (list of (minage,maxage,sharecount) tuples) leases-per-share-histogram corrupt-shares (list of (si_b32,shnum) tuples, minimal verification) @@ -302,7 +317,7 @@ class LeaseCheckingCrawler(ShareCrawler): history: maps cyclenum to a dict with the following keys: cycle-start-finish-times expiration-enabled - configured-expiration-time + configured-expiration-mode lease-age-histogram leases-per-share-histogram corrupt-shares @@ -344,8 +359,8 @@ class LeaseCheckingCrawler(ShareCrawler): lah = so_far["lease-age-histogram"] so_far["lease-age-histogram"] = self.convert_lease_age_histogram(lah) - so_far["expiration-enabled"] = self.expire_leases - so_far["configured-expiration-time"] = self.age_limit + so_far["expiration-enabled"] = self.expiration_enabled + so_far["configured-expiration-mode"] = self.mode so_far_sr = so_far["space-recovered"] remaining_sr = {} diff --git a/src/allmydata/storage/server.py b/src/allmydata/storage/server.py index e922854a..1f07ed7e 100644 --- a/src/allmydata/storage/server.py +++ b/src/allmydata/storage/server.py @@ -40,7 +40,8 @@ class StorageServer(service.MultiService, Referenceable): def __init__(self, storedir, nodeid, reserved_space=0, discard_storage=False, readonly_storage=False, stats_provider=None, - expire_leases=False, expiration_time=31*24*60*60): + expiration_enabled=False, + expiration_mode=("age", 31*24*60*60)): service.MultiService.__init__(self) assert isinstance(nodeid, str) assert len(nodeid) == 20 @@ -81,20 +82,20 @@ class StorageServer(service.MultiService, Referenceable): "cancel": [], } self.add_bucket_counter() - self.add_lease_checker(expire_leases, expiration_time) + self.add_lease_checker(expiration_enabled, expiration_mode) def add_bucket_counter(self): statefile = os.path.join(self.storedir, "bucket_counter.state") self.bucket_counter = BucketCountingCrawler(self, statefile) self.bucket_counter.setServiceParent(self) - def add_lease_checker(self, expire_leases, expiration_time): + def add_lease_checker(self, expiration_enabled, expiration_mode): statefile = os.path.join(self.storedir, "lease_checker.state") historyfile = os.path.join(self.storedir, "lease_checker.history") klass = self.LeaseCheckerClass self.lease_checker = klass(self, statefile, historyfile, - expire_leases=expire_leases, - expiration_time=expiration_time) + expiration_enabled=expiration_enabled, + expiration_mode=expiration_mode) self.lease_checker.setServiceParent(self) def count(self, name, delta=1): diff --git a/src/allmydata/test/test_storage.py b/src/allmydata/test/test_storage.py index 286efe11..85d39033 100644 --- a/src/allmydata/test/test_storage.py +++ b/src/allmydata/test/test_storage.py @@ -1611,6 +1611,8 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin, WebRenderingMixin): ss.setServiceParent(self.s) + DAY = 24*60*60 + d = eventual.fireEventually() # now examine the state right after the first bucket has been @@ -1625,12 +1627,12 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin, WebRenderingMixin): so_far = initial_state["cycle-to-date"] self.failUnlessEqual(so_far["expiration-enabled"], False) - self.failUnless("configured-expiration-time" in so_far) + self.failUnless("configured-expiration-mode" in so_far) self.failUnless("lease-age-histogram" in so_far) lah = so_far["lease-age-histogram"] self.failUnlessEqual(type(lah), list) self.failUnlessEqual(len(lah), 1) - self.failUnlessEqual(lah, [ (0.0, lc.age_limit/10.0, 1) ] ) + self.failUnlessEqual(lah, [ (0.0, DAY, 1) ] ) self.failUnlessEqual(so_far["leases-per-share-histogram"], {1: 1}) self.failUnlessEqual(so_far["buckets-examined"], 1) self.failUnlessEqual(so_far["shares-examined"], 1) @@ -1684,13 +1686,13 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin, WebRenderingMixin): self.failUnless("cycle-start-finish-times" in last) self.failUnlessEqual(type(last["cycle-start-finish-times"]), tuple) self.failUnlessEqual(last["expiration-enabled"], False) - self.failUnless("configured-expiration-time" in last) + self.failUnless("configured-expiration-mode" in last) self.failUnless("lease-age-histogram" in last) lah = last["lease-age-histogram"] self.failUnlessEqual(type(lah), list) self.failUnlessEqual(len(lah), 1) - self.failUnlessEqual(lah, [ (0.0, lc.age_limit/10.0, 6) ] ) + self.failUnlessEqual(lah, [ (0.0, DAY, 6) ] ) self.failUnlessEqual(last["leases-per-share-histogram"], {1: 2, 2: 2}) @@ -1742,14 +1744,14 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin, WebRenderingMixin): return raise IndexError("unable to renew non-existent lease") - def test_expire(self): - basedir = "storage/LeaseCrawler/expire" + def test_expire_age(self): + basedir = "storage/LeaseCrawler/expire_age" fileutil.make_dirs(basedir) # setting expiration_time to 2000 means that any lease which is more # than 2000s old will be expired. ss = InstrumentedStorageServer(basedir, "\x00" * 20, - expire_leases=True, - expiration_time=2000) + expiration_enabled=True, + expiration_mode=("age",2000)) # make it start sooner than usual. lc = ss.lease_checker lc.slow_start = 0 @@ -1841,7 +1843,8 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin, WebRenderingMixin): last = s["history"][0] self.failUnlessEqual(last["expiration-enabled"], True) - self.failUnlessEqual(last["configured-expiration-time"], 2000) + self.failUnlessEqual(last["configured-expiration-mode"], + ("age",2000)) self.failUnlessEqual(last["buckets-examined"], 4) self.failUnlessEqual(last["shares-examined"], 4) self.failUnlessEqual(last["leases-per-share-histogram"], @@ -1871,10 +1874,164 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin, WebRenderingMixin): def _check_html(html): s = remove_tags(html) self.failUnlessIn("Expiration Enabled: expired leases will be removed", s) + self.failUnlessIn("leases created or last renewed more than 33 minutes ago will be considered expired", s) + self.failUnlessIn(" recovered: 2 shares, 2 buckets, ", s) + d.addCallback(_check_html) + return d + + def test_expire_date_cutoff(self): + basedir = "storage/LeaseCrawler/expire_date_cutoff" + fileutil.make_dirs(basedir) + # setting date-cutoff to 2000 seconds ago means that any lease which + # is more than 2000s old will be expired. + now = time.time() + then = int(now - 2000) + ss = InstrumentedStorageServer(basedir, "\x00" * 20, + expiration_enabled=True, + expiration_mode=("date-cutoff",then)) + # make it start sooner than usual. + lc = ss.lease_checker + lc.slow_start = 0 + lc.stop_after_first_bucket = True + webstatus = StorageStatus(ss) + + # create a few shares, with some leases on them + self.make_shares(ss) + [immutable_si_0, immutable_si_1, mutable_si_2, mutable_si_3] = self.sis + + def count_shares(si): + return len(list(ss._iter_share_files(si))) + def _get_sharefile(si): + return list(ss._iter_share_files(si))[0] + def count_leases(si): + return len(list(_get_sharefile(si).get_leases())) + + self.failUnlessEqual(count_shares(immutable_si_0), 1) + self.failUnlessEqual(count_leases(immutable_si_0), 1) + self.failUnlessEqual(count_shares(immutable_si_1), 1) + self.failUnlessEqual(count_leases(immutable_si_1), 2) + self.failUnlessEqual(count_shares(mutable_si_2), 1) + self.failUnlessEqual(count_leases(mutable_si_2), 1) + self.failUnlessEqual(count_shares(mutable_si_3), 1) + self.failUnlessEqual(count_leases(mutable_si_3), 2) + + # artificially crank back the expiration time on the first lease of + # each share, to make it look like was renewed 3000s ago. To achieve + # this, we need to set the expiration time to now-3000+31days. This + # will change when the lease format is improved to contain both + # create/renew time and duration. + new_expiration_time = now - 3000 + 31*24*60*60 + + # Some shares have an extra lease which is set to expire at the + # default time in 31 days from now (age=31days). We then run the + # crawler, which will expire the first lease, making some shares get + # deleted and others stay alive (with one remaining lease) + + sf0 = _get_sharefile(immutable_si_0) + self.backdate_lease(sf0, self.renew_secrets[0], new_expiration_time) + sf0_size = os.stat(sf0.home).st_size + + # immutable_si_1 gets an extra lease + sf1 = _get_sharefile(immutable_si_1) + self.backdate_lease(sf1, self.renew_secrets[1], new_expiration_time) + + sf2 = _get_sharefile(mutable_si_2) + self.backdate_lease(sf2, self.renew_secrets[3], new_expiration_time) + sf2_size = os.stat(sf2.home).st_size + + # mutable_si_3 gets an extra lease + sf3 = _get_sharefile(mutable_si_3) + self.backdate_lease(sf3, self.renew_secrets[4], new_expiration_time) + + ss.setServiceParent(self.s) + + d = eventual.fireEventually() + # examine the state right after the first bucket has been processed + def _after_first_bucket(ignored): + p = lc.get_progress() + self.failUnless(p["cycle-in-progress"]) + d.addCallback(_after_first_bucket) + d.addCallback(lambda ign: self.render1(webstatus)) + def _check_html_in_cycle(html): + s = remove_tags(html) + # the first bucket encountered gets deleted, and its prefix + # happens to be about 1/5th of the way through the ring, so the + # predictor thinks we'll have 5 shares and that we'll delete them + # all. This part of the test depends upon the SIs landing right + # where they do now. + self.failUnlessIn("The remainder of this cycle is expected to " + "recover: 4 shares, 4 buckets", s) + self.failUnlessIn("The whole cycle is expected to examine " + "5 shares in 5 buckets and to recover: " + "5 shares, 5 buckets", s) + d.addCallback(_check_html_in_cycle) + + # wait for the crawler to finish the first cycle. Two shares should + # have been removed + def _wait(): + return bool(lc.get_state()["last-cycle-finished"] is not None) + d.addCallback(lambda ign: self.poll(_wait)) + + def _after_first_cycle(ignored): + self.failUnlessEqual(count_shares(immutable_si_0), 0) + self.failUnlessEqual(count_shares(immutable_si_1), 1) + self.failUnlessEqual(count_leases(immutable_si_1), 1) + self.failUnlessEqual(count_shares(mutable_si_2), 0) + self.failUnlessEqual(count_shares(mutable_si_3), 1) + self.failUnlessEqual(count_leases(mutable_si_3), 1) + + s = lc.get_state() + last = s["history"][0] + + self.failUnlessEqual(last["expiration-enabled"], True) + self.failUnlessEqual(last["configured-expiration-mode"], + ("date-cutoff",then)) + self.failUnlessEqual(last["buckets-examined"], 4) + self.failUnlessEqual(last["shares-examined"], 4) + self.failUnlessEqual(last["leases-per-share-histogram"], + {1: 2, 2: 2}) + + rec = last["space-recovered"] + self.failUnlessEqual(rec["actual-numbuckets"], 2) + self.failUnlessEqual(rec["original-leasetimer-numbuckets"], 0) + self.failUnlessEqual(rec["configured-leasetimer-numbuckets"], 2) + self.failUnlessEqual(rec["actual-numshares"], 2) + self.failUnlessEqual(rec["original-leasetimer-numshares"], 0) + self.failUnlessEqual(rec["configured-leasetimer-numshares"], 2) + size = sf0_size + sf2_size + self.failUnlessEqual(rec["actual-sharebytes"], size) + self.failUnlessEqual(rec["original-leasetimer-sharebytes"], 0) + self.failUnlessEqual(rec["configured-leasetimer-sharebytes"], size) + # different platforms have different notions of "blocks used by + # this file", so merely assert that it's a number + self.failUnless(rec["actual-diskbytes"] >= 0, + rec["actual-diskbytes"]) + self.failUnless(rec["original-leasetimer-diskbytes"] >= 0, + rec["original-leasetimer-diskbytes"]) + self.failUnless(rec["configured-leasetimer-diskbytes"] >= 0, + rec["configured-leasetimer-diskbytes"]) + d.addCallback(_after_first_cycle) + d.addCallback(lambda ign: self.render1(webstatus)) + def _check_html(html): + s = remove_tags(html) + self.failUnlessIn("Expiration Enabled:" + " expired leases will be removed", s) + date = time.strftime("%d-%b-%Y", time.gmtime(then)) + self.failUnlessIn("leases created or last renewed before %s" + " will be considered expired" % date, s) self.failUnlessIn(" recovered: 2 shares, 2 buckets, ", s) d.addCallback(_check_html) return d + def test_bad_mode(self): + basedir = "storage/LeaseCrawler/bad_mode" + fileutil.make_dirs(basedir) + e = self.failUnlessRaises(ValueError, + StorageServer, basedir, "\x00" * 20, + expiration_mode=("bogus", 0)) + self.failUnless("garbage-collection mode 'bogus'" + " must be 'age' or 'date-cutoff'" in str(e), str(e)) + def test_limited_history(self): basedir = "storage/LeaseCrawler/limited_history" fileutil.make_dirs(basedir) @@ -1970,7 +2127,7 @@ class LeaseCrawler(unittest.TestCase, pollmixin.PollMixin, WebRenderingMixin): basedir = "storage/LeaseCrawler/no_st_blocks" fileutil.make_dirs(basedir) ss = No_ST_BLOCKS_StorageServer(basedir, "\x00" * 20, - expiration_time=-1000) + expiration_mode=("age",-1000)) # a negative expiration_time= means the "configured-leasetimer-" # space-recovered counts will be non-zero, since all shares will have # expired by then diff --git a/src/allmydata/web/storage.py b/src/allmydata/web/storage.py index f827bb58..dec972ba 100644 --- a/src/allmydata/web/storage.py +++ b/src/allmydata/web/storage.py @@ -122,16 +122,22 @@ class StorageStatus(rend.Page): def render_lease_expiration_enabled(self, ctx, data): lc = self.storage.lease_checker - if lc.expire_leases: + if lc.expiration_enabled: return ctx.tag["Enabled: expired leases will be removed"] else: return ctx.tag["Disabled: scan-only mode, no leases will be removed"] - def render_lease_expiration_age_limit(self, ctx, data): - lc = self.storage.lease_checker - return ctx.tag["leases created or last renewed more than %s ago " - "will be considered expired" - % abbreviate_time(lc.age_limit)] + def render_lease_expiration_mode(self, ctx, data): + mode = self.storage.lease_checker.mode + if mode[0] == "age": + return ctx.tag["leases created or last renewed more than %s ago " + "will be considered expired" + % abbreviate_time(mode[1])] + else: + assert mode[0] == "date-cutoff" + date = time.strftime("%d-%b-%Y", time.gmtime(mode[1])) + return ctx.tag["leases created or last renewed before %s " + "will be considered expired" % date] def format_recovered(self, sr, a): def maybe(d): @@ -189,9 +195,8 @@ class StorageStatus(rend.Page): self.format_recovered(ecr, "configured-leasetimer")) add("if we were using each lease's default 31-day lease lifetime " - "(instead of our configured %s lifetime), " - "this cycle would be expected to recover: " - % abbreviate_time(so_far["configured-expiration-time"]), + "(instead of our configured node), " + "this cycle would be expected to recover: ", self.format_recovered(ecr, "original-leasetimer")) if so_far["corrupt-shares"]: diff --git a/src/allmydata/web/storage_status.xhtml b/src/allmydata/web/storage_status.xhtml index 16c4307c..7579e591 100644 --- a/src/allmydata/web/storage_status.xhtml +++ b/src/allmydata/web/storage_status.xhtml @@ -73,7 +73,7 @@