2 # do not import any allmydata modules at this level. Do that from inside
3 # individual functions instead.
4 import sys, struct, time, os
5 from twisted.python import usage
7 class DumpOptions(usage.Options):
9 return "Usage: tahoe debug dump-share SHARE_FILENAME"
11 def getUsage(self, width=None):
12 t = usage.Options.getUsage(self, width)
14 Print lots of information about the given share, by parsing the share's
15 contents. This includes share type, lease information, encoding parameters,
16 hash-tree roots, public keys, and segment sizes. This command also emits a
17 verify-cap for the file that uses the share.
19 tahoe debug dump-share testgrid/node-3/storage/shares/4v/4vozh77tsrw7mdhnj7qvp5ky74/0
24 def parseArgs(self, filename):
25 self['filename'] = filename
27 def dump_share(config, out=sys.stdout, err=sys.stderr):
28 from allmydata import uri, storage
29 from allmydata.util import base32
31 # check the version, to see if we have a mutable or immutable share
32 print >>out, "share filename: %s" % config['filename']
34 f = open(config['filename'], "rb")
37 if prefix == storage.MutableShareFile.MAGIC:
38 return dump_mutable_share(config, out, err)
39 # otherwise assume it's immutable
40 f = storage.ShareFile(config['filename'])
41 # use a ReadBucketProxy to parse the bucket and find the uri extension
42 bp = storage.ReadBucketProxy(None)
43 offsets = bp._parse_offsets(f.read_share_data(0, 0x24))
44 seek = offsets['uri_extension']
45 length = struct.unpack(">L", f.read_share_data(seek, 4))[0]
47 UEB_data = f.read_share_data(seek, length)
49 unpacked = uri.unpack_extension_readable(UEB_data)
50 keys1 = ("size", "num_segments", "segment_size",
51 "needed_shares", "total_shares")
52 keys2 = ("codec_name", "codec_params", "tail_codec_params")
53 keys3 = ("plaintext_hash", "plaintext_root_hash",
54 "crypttext_hash", "crypttext_root_hash",
55 "share_root_hash", "UEB_hash")
56 display_keys = {"size": "file_size"}
59 dk = display_keys.get(k, k)
60 print >>out, "%20s: %s" % (dk, unpacked[k])
64 dk = display_keys.get(k, k)
65 print >>out, "%20s: %s" % (dk, unpacked[k])
69 dk = display_keys.get(k, k)
70 print >>out, "%20s: %s" % (dk, unpacked[k])
72 leftover = set(unpacked.keys()) - set(keys1 + keys2 + keys3)
75 print >>out, "LEFTOVER:"
76 for k in sorted(leftover):
77 print >>out, "%20s: %s" % (k, unpacked[k])
79 # the storage index isn't stored in the share itself, so we depend upon
80 # knowing the parent directory name to get it
81 pieces = config['filename'].split(os.sep)
82 if len(pieces) >= 2 and base32.could_be_base32_encoded(pieces[-2]):
83 storage_index = base32.a2b(pieces[-2])
84 uri_extension_hash = base32.a2b(unpacked["UEB_hash"])
85 u = uri.CHKFileVerifierURI(storage_index, uri_extension_hash,
86 unpacked["needed_shares"],
87 unpacked["total_shares"], unpacked["size"])
88 verify_cap = u.to_string()
89 print >>out, "%20s: %s" % ("verify-cap", verify_cap)
92 sizes['data'] = bp._data_size
93 sizes['validation'] = (offsets['uri_extension'] -
94 offsets['plaintext_hash_tree'])
95 sizes['uri-extension'] = len(UEB_data)
97 print >>out, " Size of data within the share:"
98 for k in sorted(sizes):
99 print >>out, "%20s: %s" % (k, sizes[k])
101 # display lease information too
102 leases = list(f.iter_leases())
104 for i,lease in enumerate(leases):
105 when = format_expiration_time(lease.expiration_time)
106 print >>out, " Lease #%d: owner=%d, expire in %s" \
107 % (i, lease.owner_num, when)
109 print >>out, " No leases."
114 def format_expiration_time(expiration_time):
116 remains = expiration_time - now
117 when = "%ds" % remains
118 if remains > 24*3600:
119 when += " (%d days)" % (remains / (24*3600))
121 when += " (%d hours)" % (remains / 3600)
125 def dump_mutable_share(config, out, err):
126 from allmydata import storage
127 from allmydata.util import base32, idlib
128 m = storage.MutableShareFile(config['filename'])
129 f = open(config['filename'], "rb")
130 WE, nodeid = m._read_write_enabler_and_nodeid(f)
131 num_extra_leases = m._read_num_extra_leases(f)
132 data_length = m._read_data_length(f)
133 extra_lease_offset = m._read_extra_lease_offset(f)
134 container_size = extra_lease_offset - m.DATA_OFFSET
135 leases = list(m._enumerate_leases(f))
137 share_type = "unknown"
138 f.seek(m.DATA_OFFSET)
139 if f.read(1) == "\x00":
140 # this slot contains an SMDF share
145 print >>out, "Mutable slot found:"
146 print >>out, " share_type: %s" % share_type
147 print >>out, " write_enabler: %s" % base32.b2a(WE)
148 print >>out, " WE for nodeid: %s" % idlib.nodeid_b2a(nodeid)
149 print >>out, " num_extra_leases: %d" % num_extra_leases
150 print >>out, " container_size: %d" % container_size
151 print >>out, " data_length: %d" % data_length
153 for (leasenum, lease) in leases:
155 print >>out, " Lease #%d:" % leasenum
156 print >>out, " ownerid: %d" % lease.owner_num
157 when = format_expiration_time(lease.expiration_time)
158 print >>out, " expires in %s" % when
159 print >>out, " renew_secret: %s" % base32.b2a(lease.renew_secret)
160 print >>out, " cancel_secret: %s" % base32.b2a(lease.cancel_secret)
161 print >>out, " secrets are for nodeid: %s" % idlib.nodeid_b2a(lease.nodeid)
163 print >>out, "No leases."
166 if share_type == "SDMF":
167 dump_SDMF_share(m.DATA_OFFSET, data_length, config, out, err)
171 def dump_SDMF_share(offset, length, config, out, err):
172 from allmydata.mutable.layout import unpack_share
173 from allmydata.mutable.common import NeedMoreDataError
174 from allmydata.util import base32, hashutil
175 from allmydata.uri import SSKVerifierURI
177 f = open(config['filename'], "rb")
179 data = f.read(min(length, 2000))
183 pieces = unpack_share(data)
184 except NeedMoreDataError, e:
185 # retry once with the larger size
186 size = e.needed_bytes
187 f = open(config['filename'], "rb")
189 data = f.read(min(length, size))
191 pieces = unpack_share(data)
193 (seqnum, root_hash, IV, k, N, segsize, datalen,
194 pubkey, signature, share_hash_chain, block_hash_tree,
195 share_data, enc_privkey) = pieces
197 print >>out, " SDMF contents:"
198 print >>out, " seqnum: %d" % seqnum
199 print >>out, " root_hash: %s" % base32.b2a(root_hash)
200 print >>out, " IV: %s" % base32.b2a(IV)
201 print >>out, " required_shares: %d" % k
202 print >>out, " total_shares: %d" % N
203 print >>out, " segsize: %d" % segsize
204 print >>out, " datalen: %d" % datalen
205 print >>out, " enc_privkey: %d bytes" % len(enc_privkey)
206 print >>out, " pubkey: %d bytes" % len(pubkey)
207 print >>out, " signature: %d bytes" % len(signature)
208 share_hash_ids = ",".join(sorted([str(hid)
209 for hid in share_hash_chain.keys()]))
210 print >>out, " share_hash_chain: %s" % share_hash_ids
211 print >>out, " block_hash_tree: %d nodes" % len(block_hash_tree)
213 # the storage index isn't stored in the share itself, so we depend upon
214 # knowing the parent directory name to get it
215 pieces = config['filename'].split(os.sep)
216 if len(pieces) >= 2 and base32.could_be_base32_encoded(pieces[-2]):
217 storage_index = base32.a2b(pieces[-2])
218 fingerprint = hashutil.ssk_pubkey_fingerprint_hash(pubkey)
219 u = SSKVerifierURI(storage_index, fingerprint)
220 verify_cap = u.to_string()
221 print >>out, " verify-cap:", verify_cap
227 class DumpCapOptions(usage.Options):
228 def getSynopsis(self):
229 return "Usage: tahoe debug dump-cap [options] FILECAP"
232 None, "storage server nodeid (ascii), to construct WE and secrets."],
233 ["client-secret", "c", None,
234 "client's base secret (ascii), to construct secrets"],
235 ["client-dir", "d", None,
236 "client's base directory, from which a -c secret will be read"],
238 def parseArgs(self, cap):
241 def getUsage(self, width=None):
242 t = usage.Options.getUsage(self, width)
244 Print information about the given cap-string (aka: URI, file-cap, dir-cap,
245 read-cap, write-cap). The URI string is parsed and unpacked. This prints the
246 type of the cap, its storage index, and any derived keys.
248 tahoe debug dump-cap URI:SSK-Verifier:4vozh77tsrw7mdhnj7qvp5ky74:q7f3dwz76sjys4kqfdt3ocur2pay3a6rftnkqmi2uxu3vqsdsofq
250 This may be useful to determine if a read-cap and a write-cap refer to the
251 same time, or to extract the storage-index from a file-cap (to then use with
254 If additional information is provided (storage server nodeid and/or client
255 base secret), this command will compute the shared secrets used for the
256 write-enabler and for lease-renewal.
261 def dump_cap(config, out=sys.stdout, err=sys.stderr):
262 from allmydata import uri
263 from allmydata.util import base32
264 from base64 import b32decode
265 import urlparse, urllib
270 nodeid = b32decode(config['nodeid'].upper())
272 if config['client-secret']:
273 secret = base32.a2b(config['client-secret'])
274 elif config['client-dir']:
275 secretfile = os.path.join(config['client-dir'], "private", "secret")
277 secret = base32.a2b(open(secretfile, "r").read().strip())
278 except EnvironmentError:
281 if cap.startswith("http"):
282 scheme, netloc, path, params, query, fragment = urlparse.urlparse(cap)
283 assert path.startswith("/uri/")
284 cap = urllib.unquote(path[len("/uri/"):])
286 u = uri.from_string(cap)
289 dump_uri_instance(u, nodeid, secret, out, err)
291 def _dump_secrets(storage_index, secret, nodeid, out):
292 from allmydata.util import hashutil
293 from allmydata.util import base32
296 crs = hashutil.my_renewal_secret_hash(secret)
297 print >>out, " client renewal secret:", base32.b2a(crs)
298 frs = hashutil.file_renewal_secret_hash(crs, storage_index)
299 print >>out, " file renewal secret:", base32.b2a(frs)
301 renew = hashutil.bucket_renewal_secret_hash(frs, nodeid)
302 print >>out, " lease renewal secret:", base32.b2a(renew)
303 ccs = hashutil.my_cancel_secret_hash(secret)
304 print >>out, " client cancel secret:", base32.b2a(ccs)
305 fcs = hashutil.file_cancel_secret_hash(ccs, storage_index)
306 print >>out, " file cancel secret:", base32.b2a(fcs)
308 cancel = hashutil.bucket_cancel_secret_hash(fcs, nodeid)
309 print >>out, " lease cancel secret:", base32.b2a(cancel)
311 def dump_uri_instance(u, nodeid, secret, out, err, show_header=True):
312 from allmydata import storage, uri
313 from allmydata.util import base32, hashutil
315 if isinstance(u, uri.CHKFileURI):
317 print >>out, "CHK File:"
318 print >>out, " key:", base32.b2a(u.key)
319 print >>out, " UEB hash:", base32.b2a(u.uri_extension_hash)
320 print >>out, " size:", u.size
321 print >>out, " k/N: %d/%d" % (u.needed_shares, u.total_shares)
322 print >>out, " storage index:", storage.si_b2a(u.storage_index)
323 _dump_secrets(u.storage_index, secret, nodeid, out)
324 elif isinstance(u, uri.CHKFileVerifierURI):
326 print >>out, "CHK Verifier URI:"
327 print >>out, " UEB hash:", base32.b2a(u.uri_extension_hash)
328 print >>out, " size:", u.size
329 print >>out, " k/N: %d/%d" % (u.needed_shares, u.total_shares)
330 print >>out, " storage index:", storage.si_b2a(u.storage_index)
332 elif isinstance(u, uri.LiteralFileURI):
334 print >>out, "Literal File URI:"
335 print >>out, " data:", u.data
337 elif isinstance(u, uri.WriteableSSKFileURI):
339 print >>out, "SSK Writeable URI:"
340 print >>out, " writekey:", base32.b2a(u.writekey)
341 print >>out, " readkey:", base32.b2a(u.readkey)
342 print >>out, " storage index:", storage.si_b2a(u.storage_index)
343 print >>out, " fingerprint:", base32.b2a(u.fingerprint)
346 we = hashutil.ssk_write_enabler_hash(u.writekey, nodeid)
347 print >>out, " write_enabler:", base32.b2a(we)
349 _dump_secrets(u.storage_index, secret, nodeid, out)
351 elif isinstance(u, uri.ReadonlySSKFileURI):
353 print >>out, "SSK Read-only URI:"
354 print >>out, " readkey:", base32.b2a(u.readkey)
355 print >>out, " storage index:", storage.si_b2a(u.storage_index)
356 print >>out, " fingerprint:", base32.b2a(u.fingerprint)
357 elif isinstance(u, uri.SSKVerifierURI):
359 print >>out, "SSK Verifier URI:"
360 print >>out, " storage index:", storage.si_b2a(u.storage_index)
361 print >>out, " fingerprint:", base32.b2a(u.fingerprint)
363 elif isinstance(u, uri.NewDirectoryURI):
365 print >>out, "Directory Writeable URI:"
366 dump_uri_instance(u._filenode_uri, nodeid, secret, out, err, False)
367 elif isinstance(u, uri.ReadonlyNewDirectoryURI):
369 print >>out, "Directory Read-only URI:"
370 dump_uri_instance(u._filenode_uri, nodeid, secret, out, err, False)
371 elif isinstance(u, uri.NewDirectoryURIVerifier):
373 print >>out, "Directory Verifier URI:"
374 dump_uri_instance(u._filenode_uri, nodeid, secret, out, err, False)
376 print >>out, "unknown cap type"
378 class FindSharesOptions(usage.Options):
379 def getSynopsis(self):
380 return "Usage: tahoe debug find-shares STORAGE_INDEX NODEDIRS.."
381 def parseArgs(self, storage_index_s, *nodedirs):
382 self.si_s = storage_index_s
383 self.nodedirs = nodedirs
384 def getUsage(self, width=None):
385 t = usage.Options.getUsage(self, width)
387 Locate all shares for the given storage index. This command looks through one
388 or more node directories to find the shares. It returns a list of filenames,
389 one per line, for each share file found.
391 tahoe debug find-shares 4vozh77tsrw7mdhnj7qvp5ky74 testgrid/node-*
393 It may be useful during testing, when running a test grid in which all the
394 nodes are on a local disk. The share files thus located can be counted,
395 examined (with dump-share), or corrupted/deleted to test checker/repairer.
399 def find_shares(config, out=sys.stdout, err=sys.stderr):
400 """Given a storage index and a list of node directories, emit a list of
401 all matching shares to stdout, one per line. For example:
403 find-shares.py 44kai1tui348689nrw8fjegc8c ~/testnet/node-*
407 /home/warner/testnet/node-1/storage/shares/44k/44kai1tui348689nrw8fjegc8c/5
408 /home/warner/testnet/node-1/storage/shares/44k/44kai1tui348689nrw8fjegc8c/9
409 /home/warner/testnet/node-2/storage/shares/44k/44kai1tui348689nrw8fjegc8c/2
411 from allmydata import storage
413 sharedir = storage.storage_index_to_dir(storage.si_a2b(config.si_s))
414 for d in config.nodedirs:
415 d = os.path.join(os.path.expanduser(d), "storage/shares", sharedir)
416 if os.path.exists(d):
417 for shnum in os.listdir(d):
418 print >>out, os.path.join(d, shnum)
423 class CatalogSharesOptions(usage.Options):
427 def parseArgs(self, *nodedirs):
428 self.nodedirs = nodedirs
430 raise usage.UsageError("must specify at least one node directory")
432 def getSynopsis(self):
433 return "Usage: tahoe debug catalog-shares NODEDIRS.."
435 def getUsage(self, width=None):
436 t = usage.Options.getUsage(self, width)
438 Locate all shares in the given node directories, and emit a one-line summary
439 of each share. Run it like this:
441 tahoe debug catalog-shares testgrid/node-* >allshares.txt
443 The lines it emits will look like the following:
445 CHK $SI $k/$N $filesize $UEB_hash $expiration $abspath_sharefile
446 SDMF $SI $k/$N $filesize $seqnum/$roothash $expiration $abspath_sharefile
447 UNKNOWN $abspath_sharefile
449 This command can be used to build up a catalog of shares from many storage
450 servers and then sort the results to compare all shares for the same file. If
451 you see shares with the same SI but different parameters/filesize/UEB_hash,
452 then something is wrong. The misc/find-share/anomalies.py script may be
457 def describe_share(abs_sharefile, si_s, shnum_s, now, out, err):
458 from allmydata import uri, storage
459 from allmydata.mutable.layout import unpack_share
460 from allmydata.mutable.common import NeedMoreDataError
461 from allmydata.util import base32
464 f = open(abs_sharefile, "rb")
467 if prefix == storage.MutableShareFile.MAGIC:
469 m = storage.MutableShareFile(abs_sharefile)
470 WE, nodeid = m._read_write_enabler_and_nodeid(f)
471 num_extra_leases = m._read_num_extra_leases(f)
472 data_length = m._read_data_length(f)
473 extra_lease_offset = m._read_extra_lease_offset(f)
474 container_size = extra_lease_offset - m.DATA_OFFSET
475 leases = list(m._enumerate_leases(f))
476 expiration_time = min( [lease[1].expiration_time
477 for lease in leases] )
478 expiration = max(0, expiration_time - now)
480 share_type = "unknown"
481 f.seek(m.DATA_OFFSET)
482 if f.read(1) == "\x00":
483 # this slot contains an SMDF share
486 if share_type == "SDMF":
487 f.seek(m.DATA_OFFSET)
488 data = f.read(min(data_length, 2000))
491 pieces = unpack_share(data)
492 except NeedMoreDataError, e:
493 # retry once with the larger size
494 size = e.needed_bytes
495 f.seek(m.DATA_OFFSET)
496 data = f.read(min(data_length, size))
497 pieces = unpack_share(data)
498 (seqnum, root_hash, IV, k, N, segsize, datalen,
499 pubkey, signature, share_hash_chain, block_hash_tree,
500 share_data, enc_privkey) = pieces
502 print >>out, "SDMF %s %d/%d %d #%d:%s %d %s" % \
503 (si_s, k, N, datalen,
504 seqnum, base32.b2a(root_hash),
505 expiration, abs_sharefile)
507 print >>out, "UNKNOWN mutable %s" % (abs_sharefile,)
509 elif struct.unpack(">L", prefix[:4]) == (1,):
512 sf = storage.ShareFile(abs_sharefile)
513 # use a ReadBucketProxy to parse the bucket and find the uri extension
514 bp = storage.ReadBucketProxy(None)
515 offsets = bp._parse_offsets(sf.read_share_data(0, 0x24))
516 seek = offsets['uri_extension']
517 length = struct.unpack(">L", sf.read_share_data(seek, 4))[0]
519 UEB_data = sf.read_share_data(seek, length)
520 expiration_time = min( [lease.expiration_time
521 for lease in sf.iter_leases()] )
522 expiration = max(0, expiration_time - now)
524 unpacked = uri.unpack_extension_readable(UEB_data)
525 k = unpacked["needed_shares"]
526 N = unpacked["total_shares"]
527 filesize = unpacked["size"]
528 ueb_hash = unpacked["UEB_hash"]
530 print >>out, "CHK %s %d/%d %d %s %d %s" % (si_s, k, N, filesize,
531 ueb_hash, expiration,
535 print >>out, "UNKNOWN really-unknown %s" % (abs_sharefile,)
540 def catalog_shares(config, out=sys.stdout, err=sys.stderr):
542 for d in config.nodedirs:
543 d = os.path.join(os.path.expanduser(d), "storage/shares")
545 abbrevs = os.listdir(d)
546 except EnvironmentError:
547 # ignore nodes that have storage turned off altogether
550 for abbrevdir in abbrevs:
551 if abbrevdir == "incoming":
553 abbrevdir = os.path.join(d, abbrevdir)
554 for si_s in os.listdir(abbrevdir):
555 si_dir = os.path.join(abbrevdir, si_s)
556 for shnum_s in os.listdir(si_dir):
557 abs_sharefile = os.path.join(si_dir, shnum_s)
558 abs_sharefile = os.path.abspath(abs_sharefile)
559 assert os.path.isfile(abs_sharefile)
560 describe_share(abs_sharefile, si_s, shnum_s, now,
565 class ReplOptions(usage.Options):
568 def repl(options, out=sys.stdout, err=sys.stderr):
570 return code.interact()
573 class DebugCommand(usage.Options):
575 ["dump-share", None, DumpOptions,
576 "Unpack and display the contents of a share (uri_extension and leases)."],
577 ["dump-cap", None, DumpCapOptions, "Unpack a read-cap or write-cap"],
578 ["find-shares", None, FindSharesOptions, "Locate sharefiles in node dirs"],
579 ["catalog-shares", None, CatalogSharesOptions, "Describe shares in node dirs"],
580 ["repl", None, ReplOptions, "Open a python interpreter"],
582 def postOptions(self):
583 if not hasattr(self, 'subOptions'):
584 raise usage.UsageError("must specify a subcommand")
585 def getSynopsis(self):
586 return "Usage: tahoe debug SUBCOMMAND"
587 def getUsage(self, width=None):
588 #t = usage.Options.getUsage(self, width)
591 tahoe debug dump-share Unpack and display the contents of a share
592 tahoe debug dump-cap Unpack a read-cap or write-cap
593 tahoe debug find-shares Locate sharefiles in node directories
594 tahoe debug catalog-shares Describe all shares in node dirs
596 Please run e.g. 'tahoe debug dump-share --help' for more details on each
602 "dump-share": dump_share,
603 "dump-cap": dump_cap,
604 "find-shares": find_shares,
605 "catalog-shares": catalog_shares,
610 def do_debug(options):
611 so = options.subOptions
612 f = subDispatch[options.subCommand]
613 return f(so, options.stdout, options.stderr)
617 ["debug", None, DebugCommand, "debug subcommands: use 'tahoe debug' for a list"],