]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/scripts/tahoe_check.py
tahoe_check.py: tolerate missing fields in check results for LIT files/dirs. fixes...
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / scripts / tahoe_check.py
1
2 import urllib
3 import simplejson
4 from twisted.protocols.basic import LineOnlyReceiver
5 from allmydata.scripts.common import get_alias, DEFAULT_ALIAS, escape_path, \
6                                      UnknownAliasError
7 from allmydata.scripts.common_http import do_http, format_http_error
8 from allmydata.util.encodingutil import quote_output, quote_path
9
10 class Checker:
11     pass
12
13 def _quote_serverid_index_share(serverid, storage_index, sharenum):
14     return "server %s, SI %s, shnum %r" % (quote_output(serverid, quotemarks=False),
15                                            quote_output(storage_index, quotemarks=False),
16                                            sharenum)
17
18 def check(options):
19     stdout = options.stdout
20     stderr = options.stderr
21     nodeurl = options['node-url']
22     if not nodeurl.endswith("/"):
23         nodeurl += "/"
24     where = options.where
25     try:
26         rootcap, path = get_alias(options.aliases, where, DEFAULT_ALIAS)
27     except UnknownAliasError, e:
28         e.display(stderr)
29         return 1
30     if path == '/':
31         path = ''
32     url = nodeurl + "uri/%s" % urllib.quote(rootcap)
33     if path:
34         url += "/" + escape_path(path)
35     # todo: should it end with a slash?
36     url += "?t=check&output=JSON"
37     if options["verify"]:
38         url += "&verify=true"
39     if options["repair"]:
40         url += "&repair=true"
41     if options["add-lease"]:
42         url += "&add-lease=true"
43
44     resp = do_http("POST", url)
45     if resp.status != 200:
46         print >>stderr, format_http_error("ERROR", resp)
47         return 1
48     jdata = resp.read()
49     if options.get("raw"):
50         stdout.write(jdata)
51         stdout.write("\n")
52         return 0
53     data = simplejson.loads(jdata)
54
55     if options["repair"]:
56         # show repair status
57         if data["pre-repair-results"]["results"]["healthy"]:
58             summary = "healthy"
59         else:
60             summary = "not healthy"
61         stdout.write("Summary: %s\n" % summary)
62         cr = data["pre-repair-results"]["results"]
63         stdout.write(" storage index: %s\n" % quote_output(data["storage-index"], quotemarks=False))
64         stdout.write(" good-shares: %r (encoding is %r-of-%r)\n"
65                      % (cr["count-shares-good"],
66                         cr["count-shares-needed"],
67                         cr["count-shares-expected"]))
68         stdout.write(" wrong-shares: %r\n" % cr["count-wrong-shares"])
69         corrupt = cr["list-corrupt-shares"]
70         if corrupt:
71             stdout.write(" corrupt shares:\n")
72             for (serverid, storage_index, sharenum) in corrupt:
73                 stdout.write("  %s\n" % _quote_serverid_index_share(serverid, storage_index, sharenum))
74         if data["repair-attempted"]:
75             if data["repair-successful"]:
76                 stdout.write(" repair successful\n")
77             else:
78                 stdout.write(" repair failed\n")
79     else:
80         # LIT files and directories do not have a "summary" field.
81         summary = data.get("summary", "Healthy (LIT)")
82         stdout.write("Summary: %s\n" % quote_output(summary, quotemarks=False))
83         cr = data["results"]
84         stdout.write(" storage index: %s\n" % quote_output(data["storage-index"], quotemarks=False))
85
86         if all([field in cr for field in ("count-shares-good", "count-shares-needed",
87                                           "count-shares-expected", "count-wrong-shares")]):
88             stdout.write(" good-shares: %r (encoding is %r-of-%r)\n"
89                          % (cr["count-shares-good"],
90                             cr["count-shares-needed"],
91                             cr["count-shares-expected"]))
92             stdout.write(" wrong-shares: %r\n" % cr["count-wrong-shares"])
93
94         corrupt = cr.get("list-corrupt-shares", [])
95         if corrupt:
96             stdout.write(" corrupt shares:\n")
97             for (serverid, storage_index, sharenum) in corrupt:
98                 stdout.write("  %s\n" % _quote_serverid_index_share(serverid, storage_index, sharenum))
99
100     return 0
101
102
103 class FakeTransport:
104     disconnecting = False
105
106 class DeepCheckOutput(LineOnlyReceiver):
107     delimiter = "\n"
108     def __init__(self, streamer, options):
109         self.streamer = streamer
110         self.transport = FakeTransport()
111
112         self.verbose = bool(options["verbose"])
113         self.stdout = options.stdout
114         self.stderr = options.stderr
115         self.num_objects = 0
116         self.files_healthy = 0
117         self.files_unhealthy = 0
118         self.in_error = False
119
120     def lineReceived(self, line):
121         if self.in_error:
122             print >>self.stderr, quote_output(line, quotemarks=False)
123             return
124         if line.startswith("ERROR:"):
125             self.in_error = True
126             self.streamer.rc = 1
127             print >>self.stderr, quote_output(line, quotemarks=False)
128             return
129
130         d = simplejson.loads(line)
131         stdout = self.stdout
132         if d["type"] not in ("file", "directory"):
133             return
134         self.num_objects += 1
135         # non-verbose means print a progress marker every 100 files
136         if self.num_objects % 100 == 0:
137             print >>stdout, "%d objects checked.." % self.num_objects
138         cr = d["check-results"]
139         if cr["results"]["healthy"]:
140             self.files_healthy += 1
141         else:
142             self.files_unhealthy += 1
143         if self.verbose:
144             # verbose means also print one line per file
145             path = d["path"]
146             if not path:
147                 path = ["<root>"]
148
149             # LIT files and directories do not have a "summary" field.
150             summary = cr.get("summary", "Healthy (LIT)")
151             print >>stdout, "%s: %s" % (quote_path(path), quote_output(summary, quotemarks=False))
152
153         # always print out corrupt shares
154         for shareloc in cr["results"].get("list-corrupt-shares", []):
155             (serverid, storage_index, sharenum) = shareloc
156             print >>stdout, " corrupt: %s" % _quote_serverid_index_share(serverid, storage_index, sharenum)
157
158     def done(self):
159         if self.in_error:
160             return
161         stdout = self.stdout
162         print >>stdout, "done: %d objects checked, %d healthy, %d unhealthy" \
163               % (self.num_objects, self.files_healthy, self.files_unhealthy)
164
165 class DeepCheckAndRepairOutput(LineOnlyReceiver):
166     delimiter = "\n"
167     def __init__(self, streamer, options):
168         self.streamer = streamer
169         self.transport = FakeTransport()
170
171         self.verbose = bool(options["verbose"])
172         self.stdout = options.stdout
173         self.stderr = options.stderr
174         self.num_objects = 0
175         self.pre_repair_files_healthy = 0
176         self.pre_repair_files_unhealthy = 0
177         self.repairs_attempted = 0
178         self.repairs_successful = 0
179         self.post_repair_files_healthy = 0
180         self.post_repair_files_unhealthy = 0
181         self.in_error = False
182
183     def lineReceived(self, line):
184         if self.in_error:
185             print >>self.stderr, quote_output(line, quotemarks=False)
186             return
187         if line.startswith("ERROR:"):
188             self.in_error = True
189             self.streamer.rc = 1
190             print >>self.stderr, quote_output(line, quotemarks=False)
191             return
192
193         d = simplejson.loads(line)
194         stdout = self.stdout
195         if d["type"] not in ("file", "directory"):
196             return
197         self.num_objects += 1
198         # non-verbose means print a progress marker every 100 files
199         if self.num_objects % 100 == 0:
200             print >>stdout, "%d objects checked.." % self.num_objects
201         crr = d["check-and-repair-results"]
202         if d["storage-index"]:
203             if crr["pre-repair-results"]["results"]["healthy"]:
204                 was_healthy = True
205                 self.pre_repair_files_healthy += 1
206             else:
207                 was_healthy = False
208                 self.pre_repair_files_unhealthy += 1
209             if crr["post-repair-results"]["results"]["healthy"]:
210                 self.post_repair_files_healthy += 1
211             else:
212                 self.post_repair_files_unhealthy += 1
213         else:
214             # LIT file
215             was_healthy = True
216             self.pre_repair_files_healthy += 1
217             self.post_repair_files_healthy += 1
218         if crr["repair-attempted"]:
219             self.repairs_attempted += 1
220             if crr["repair-successful"]:
221                 self.repairs_successful += 1
222         if self.verbose:
223             # verbose means also print one line per file
224             path = d["path"]
225             if not path:
226                 path = ["<root>"]
227             # we don't seem to have a summary available, so build one
228             if was_healthy:
229                 summary = "healthy"
230             else:
231                 summary = "not healthy"
232             print >>stdout, "%s: %s" % (quote_path(path), summary)
233
234         # always print out corrupt shares
235         prr = crr.get("pre-repair-results", {})
236         for shareloc in prr.get("results", {}).get("list-corrupt-shares", []):
237             (serverid, storage_index, sharenum) = shareloc
238             print >>stdout, " corrupt: %s" % _quote_serverid_index_share(serverid, storage_index, sharenum)
239
240         # always print out repairs
241         if crr["repair-attempted"]:
242             if crr["repair-successful"]:
243                 print >>stdout, " repair successful"
244             else:
245                 print >>stdout, " repair failed"
246
247     def done(self):
248         if self.in_error:
249             return
250         stdout = self.stdout
251         print >>stdout, "done: %d objects checked" % self.num_objects
252         print >>stdout, " pre-repair: %d healthy, %d unhealthy" \
253               % (self.pre_repair_files_healthy,
254                  self.pre_repair_files_unhealthy)
255         print >>stdout, " %d repairs attempted, %d successful, %d failed" \
256               % (self.repairs_attempted,
257                  self.repairs_successful,
258                  (self.repairs_attempted - self.repairs_successful))
259         print >>stdout, " post-repair: %d healthy, %d unhealthy" \
260               % (self.post_repair_files_healthy,
261                  self.post_repair_files_unhealthy)
262
263 class DeepCheckStreamer(LineOnlyReceiver):
264
265     def run(self, options):
266         stdout = options.stdout
267         stderr = options.stderr
268         self.rc = 0
269         self.options = options
270         nodeurl = options['node-url']
271         if not nodeurl.endswith("/"):
272             nodeurl += "/"
273         self.nodeurl = nodeurl
274         where = options.where
275         try:
276             rootcap, path = get_alias(options.aliases, where, DEFAULT_ALIAS)
277         except UnknownAliasError, e:
278             e.display(stderr)
279             return 1
280         if path == '/':
281             path = ''
282         url = nodeurl + "uri/%s" % urllib.quote(rootcap)
283         if path:
284             url += "/" + escape_path(path)
285         # todo: should it end with a slash?
286         url += "?t=stream-deep-check"
287         if options["verify"]:
288             url += "&verify=true"
289         if options["repair"]:
290             url += "&repair=true"
291             output = DeepCheckAndRepairOutput(self, options)
292         else:
293             output = DeepCheckOutput(self, options)
294         if options["add-lease"]:
295             url += "&add-lease=true"
296         resp = do_http("POST", url)
297         if resp.status not in (200, 302):
298             print >>stderr, format_http_error("ERROR", resp)
299             return 1
300
301         # use Twisted to split this into lines
302         while True:
303             chunk = resp.read(100)
304             if not chunk:
305                 break
306             if self.options["raw"]:
307                 stdout.write(chunk)
308             else:
309                 output.dataReceived(chunk)
310         if not self.options["raw"]:
311             output.done()
312         return self.rc
313
314 def deepcheck(options):
315     return DeepCheckStreamer().run(options)