servers = {} # {serverid: set(sharenums)}
corruptshare_locators = [] # (serverid, storageindex, sharenum)
incompatibleshare_locators = [] # (serverid, storageindex, sharenum)
+ servers_responding = set() # serverid
for verified, server, corrupt, incompatible, responded in results:
server_id = server.get_serverid()
corruptshare_locators.append((server_id, SI, sharenum))
for sharenum in incompatible:
incompatibleshare_locators.append((server_id, SI, sharenum))
+ if responded:
+ servers_responding.add(server_id)
d['count-shares-good'] = len(verifiedshares)
d['count-good-share-hosts'] = len([s for s in servers.keys() if servers[s]])
d['count-recoverable-versions'] = 0
d['count-unrecoverable-versions'] = 1
- d['servers-responding'] = list(servers)
+ d['servers-responding'] = list(servers_responding)
d['sharemap'] = verifiedshares
# no such thing as wrong shares of an immutable file
d['count-wrong-shares'] = 0
assert isinstance(sm, DictOfSets), sm
sm.update(ur.sharemap)
servers_responding = set(prr.data['servers-responding'])
- servers_responding.union(ur.sharemap.iterkeys())
- prr.data['servers-responding'] = list(servers_responding)
+ for shnum, serverids in ur.sharemap.items():
+ servers_responding.update(serverids)
+ servers_responding = sorted(servers_responding)
+ prr.data['servers-responding'] = servers_responding
prr.data['count-shares-good'] = len(sm)
prr.data['count-good-share-hosts'] = len(sm)
is_healthy = bool(len(sm) >= verifycap.total_shares)
def _call():
if self.broken:
+ if self.broken is not True: # a counter, not boolean
+ self.broken -= 1
raise IntentionalError("I was asked to break")
if self.hung_until:
d2 = defer.Deferred()
del self.proxies_by_id[serverid]
self.rebuild_serverlist()
- def break_server(self, serverid):
+ def break_server(self, serverid, count=True):
# mark the given server as broken, so it will throw exceptions when
- # asked to hold a share or serve a share
- self.wrappers_by_id[serverid].broken = True
+ # asked to hold a share or serve a share. If count= is a number,
+ # throw that many exceptions before starting to work again.
+ self.wrappers_by_id[serverid].broken = count
def hang_server(self, serverid):
# hang the given server
self.failUnless(data['count-shares-needed'] == 3, data)
self.failUnless(data['count-shares-expected'] == 10, data)
self.failUnless(data['count-good-share-hosts'] == 9, data)
- self.failUnless(len(data['servers-responding']) == 10, data)
+ self.failUnless(len(data['servers-responding']) == 9, data)
self.failUnless(len(data['list-corrupt-shares']) == 0, data)
def test_corrupt_file_verno(self):
d.addCallback(_check)
return d
+ def test_servers_responding(self):
+ self.basedir = "repairer/Repairer/servers_responding"
+ self.set_up_grid(num_clients=2)
+ d = self.upload_and_stash()
+ # now cause one of the servers to not respond during the pre-repair
+ # filecheck, but then *do* respond to the post-repair filecheck
+ def _then(ign):
+ ss = self.g.servers_by_number[0]
+ self.g.break_server(ss.my_nodeid, count=1)
+ self.delete_shares_numbered(self.uri, [9])
+ return self.c0_filenode.check_and_repair(Monitor())
+ d.addCallback(_then)
+ def _check(rr):
+ # this exercises a bug in which the servers-responding list did
+ # not include servers that responded to the Repair, but which did
+ # not respond to the pre-repair filecheck
+ prr = rr.get_post_repair_results()
+ expected = set(self.g.get_all_serverids())
+ self.failUnlessEqual(expected, set(prr.data["servers-responding"]))
+ d.addCallback(_check)
+ return d
# XXX extend these tests to show that the checker detects which specific
# share on which specific server is broken -- this is necessary so that the