2 # do not import any allmydata modules at this level. Do that from inside
3 # individual functions instead.
4 import struct, time, os
5 from twisted.python import usage
7 class DumpOptions(usage.Options):
9 return "Usage: tahoe debug dump-share SHARE_FILENAME"
12 ["offsets", None, "Display a table of section offsets"],
15 def getUsage(self, width=None):
16 t = usage.Options.getUsage(self, width)
18 Print lots of information about the given share, by parsing the share's
19 contents. This includes share type, lease information, encoding parameters,
20 hash-tree roots, public keys, and segment sizes. This command also emits a
21 verify-cap for the file that uses the share.
23 tahoe debug dump-share testgrid/node-3/storage/shares/4v/4vozh77tsrw7mdhnj7qvp5ky74/0
28 def parseArgs(self, filename):
29 self['filename'] = filename
31 def dump_share(options):
32 from allmydata import storage
36 # check the version, to see if we have a mutable or immutable share
37 print >>out, "share filename: %s" % options['filename']
39 f = open(options['filename'], "rb")
42 if prefix == storage.MutableShareFile.MAGIC:
43 return dump_mutable_share(options)
44 # otherwise assume it's immutable
45 return dump_immutable_share(options)
47 def dump_immutable_share(options):
48 from allmydata import uri, storage
49 from allmydata.util import base32
52 f = storage.ShareFile(options['filename'])
53 # use a ReadBucketProxy to parse the bucket and find the uri extension
54 bp = storage.ReadBucketProxy(None)
55 offsets = bp._parse_offsets(f.read_share_data(0, 0x24))
56 seek = offsets['uri_extension']
57 length = struct.unpack(">L", f.read_share_data(seek, 4))[0]
59 UEB_data = f.read_share_data(seek, length)
61 unpacked = uri.unpack_extension_readable(UEB_data)
62 keys1 = ("size", "num_segments", "segment_size",
63 "needed_shares", "total_shares")
64 keys2 = ("codec_name", "codec_params", "tail_codec_params")
65 keys3 = ("plaintext_hash", "plaintext_root_hash",
66 "crypttext_hash", "crypttext_root_hash",
67 "share_root_hash", "UEB_hash")
68 display_keys = {"size": "file_size"}
71 dk = display_keys.get(k, k)
72 print >>out, "%20s: %s" % (dk, unpacked[k])
76 dk = display_keys.get(k, k)
77 print >>out, "%20s: %s" % (dk, unpacked[k])
81 dk = display_keys.get(k, k)
82 print >>out, "%20s: %s" % (dk, unpacked[k])
84 leftover = set(unpacked.keys()) - set(keys1 + keys2 + keys3)
87 print >>out, "LEFTOVER:"
88 for k in sorted(leftover):
89 print >>out, "%20s: %s" % (k, unpacked[k])
91 # the storage index isn't stored in the share itself, so we depend upon
92 # knowing the parent directory name to get it
93 pieces = options['filename'].split(os.sep)
94 if len(pieces) >= 2 and base32.could_be_base32_encoded(pieces[-2]):
95 storage_index = base32.a2b(pieces[-2])
96 uri_extension_hash = base32.a2b(unpacked["UEB_hash"])
97 u = uri.CHKFileVerifierURI(storage_index, uri_extension_hash,
98 unpacked["needed_shares"],
99 unpacked["total_shares"], unpacked["size"])
100 verify_cap = u.to_string()
101 print >>out, "%20s: %s" % ("verify-cap", verify_cap)
104 sizes['data'] = bp._data_size
105 sizes['validation'] = (offsets['uri_extension'] -
106 offsets['plaintext_hash_tree'])
107 sizes['uri-extension'] = len(UEB_data)
109 print >>out, " Size of data within the share:"
110 for k in sorted(sizes):
111 print >>out, "%20s: %s" % (k, sizes[k])
113 if options['offsets']:
115 print >>out, " Section Offsets:"
116 print >>out, "%20s: %s" % ("share data", f._data_offset)
117 for k in ["data", "plaintext_hash_tree", "crypttext_hash_tree",
118 "block_hashes", "share_hashes", "uri_extension"]:
119 name = {"data": "block data"}.get(k,k)
120 offset = f._data_offset + offsets[k]
121 print >>out, " %20s: %s (0x%x)" % (name, offset, offset)
122 print >>out, "%20s: %s" % ("leases", f._lease_offset)
125 # display lease information too
127 leases = list(f.iter_leases())
129 for i,lease in enumerate(leases):
130 when = format_expiration_time(lease.expiration_time)
131 print >>out, " Lease #%d: owner=%d, expire in %s" \
132 % (i, lease.owner_num, when)
134 print >>out, " No leases."
139 def format_expiration_time(expiration_time):
141 remains = expiration_time - now
142 when = "%ds" % remains
143 if remains > 24*3600:
144 when += " (%d days)" % (remains / (24*3600))
146 when += " (%d hours)" % (remains / 3600)
150 def dump_mutable_share(options):
151 from allmydata import storage
152 from allmydata.util import base32, idlib
154 m = storage.MutableShareFile(options['filename'])
155 f = open(options['filename'], "rb")
156 WE, nodeid = m._read_write_enabler_and_nodeid(f)
157 num_extra_leases = m._read_num_extra_leases(f)
158 data_length = m._read_data_length(f)
159 extra_lease_offset = m._read_extra_lease_offset(f)
160 container_size = extra_lease_offset - m.DATA_OFFSET
161 leases = list(m._enumerate_leases(f))
163 share_type = "unknown"
164 f.seek(m.DATA_OFFSET)
165 if f.read(1) == "\x00":
166 # this slot contains an SMDF share
171 print >>out, "Mutable slot found:"
172 print >>out, " share_type: %s" % share_type
173 print >>out, " write_enabler: %s" % base32.b2a(WE)
174 print >>out, " WE for nodeid: %s" % idlib.nodeid_b2a(nodeid)
175 print >>out, " num_extra_leases: %d" % num_extra_leases
176 print >>out, " container_size: %d" % container_size
177 print >>out, " data_length: %d" % data_length
179 for (leasenum, lease) in leases:
181 print >>out, " Lease #%d:" % leasenum
182 print >>out, " ownerid: %d" % lease.owner_num
183 when = format_expiration_time(lease.expiration_time)
184 print >>out, " expires in %s" % when
185 print >>out, " renew_secret: %s" % base32.b2a(lease.renew_secret)
186 print >>out, " cancel_secret: %s" % base32.b2a(lease.cancel_secret)
187 print >>out, " secrets are for nodeid: %s" % idlib.nodeid_b2a(lease.nodeid)
189 print >>out, "No leases."
192 if share_type == "SDMF":
193 dump_SDMF_share(m, data_length, options)
197 def dump_SDMF_share(m, length, options):
198 from allmydata.mutable.layout import unpack_share, unpack_header
199 from allmydata.mutable.common import NeedMoreDataError
200 from allmydata.util import base32, hashutil
201 from allmydata.uri import SSKVerifierURI
203 offset = m.DATA_OFFSET
207 f = open(options['filename'], "rb")
209 data = f.read(min(length, 2000))
213 pieces = unpack_share(data)
214 except NeedMoreDataError, e:
215 # retry once with the larger size
216 size = e.needed_bytes
217 f = open(options['filename'], "rb")
219 data = f.read(min(length, size))
221 pieces = unpack_share(data)
223 (seqnum, root_hash, IV, k, N, segsize, datalen,
224 pubkey, signature, share_hash_chain, block_hash_tree,
225 share_data, enc_privkey) = pieces
226 (ig_version, ig_seqnum, ig_roothash, ig_IV, ig_k, ig_N, ig_segsize,
227 ig_datalen, offsets) = unpack_header(data)
229 print >>out, " SDMF contents:"
230 print >>out, " seqnum: %d" % seqnum
231 print >>out, " root_hash: %s" % base32.b2a(root_hash)
232 print >>out, " IV: %s" % base32.b2a(IV)
233 print >>out, " required_shares: %d" % k
234 print >>out, " total_shares: %d" % N
235 print >>out, " segsize: %d" % segsize
236 print >>out, " datalen: %d" % datalen
237 print >>out, " enc_privkey: %d bytes" % len(enc_privkey)
238 print >>out, " pubkey: %d bytes" % len(pubkey)
239 print >>out, " signature: %d bytes" % len(signature)
240 share_hash_ids = ",".join(sorted([str(hid)
241 for hid in share_hash_chain.keys()]))
242 print >>out, " share_hash_chain: %s" % share_hash_ids
243 print >>out, " block_hash_tree: %d nodes" % len(block_hash_tree)
245 # the storage index isn't stored in the share itself, so we depend upon
246 # knowing the parent directory name to get it
247 pieces = options['filename'].split(os.sep)
248 if len(pieces) >= 2 and base32.could_be_base32_encoded(pieces[-2]):
249 storage_index = base32.a2b(pieces[-2])
250 fingerprint = hashutil.ssk_pubkey_fingerprint_hash(pubkey)
251 u = SSKVerifierURI(storage_index, fingerprint)
252 verify_cap = u.to_string()
253 print >>out, " verify-cap:", verify_cap
255 if options['offsets']:
256 # NOTE: this offset-calculation code is fragile, and needs to be
257 # merged with MutableShareFile's internals.
259 print >>out, " Section Offsets:"
260 def printoffset(name, value, shift=0):
261 print >>out, "%s%20s: %s (0x%x)" % (" "*shift, name, value, value)
262 printoffset("first lease", m.HEADER_SIZE)
263 printoffset("share data", m.DATA_OFFSET)
264 o_seqnum = m.DATA_OFFSET + struct.calcsize(">B")
265 printoffset("seqnum", o_seqnum, 2)
266 o_root_hash = m.DATA_OFFSET + struct.calcsize(">BQ")
267 printoffset("root_hash", o_root_hash, 2)
268 for k in ["signature", "share_hash_chain", "block_hash_tree",
270 "enc_privkey", "EOF"]:
271 name = {"share_data": "block data",
272 "EOF": "end of share data"}.get(k,k)
273 offset = m.DATA_OFFSET + offsets[k]
274 printoffset(name, offset, 2)
275 f = open(options['filename'], "rb")
276 printoffset("extra leases", m._read_extra_lease_offset(f) + 4)
283 class DumpCapOptions(usage.Options):
284 def getSynopsis(self):
285 return "Usage: tahoe debug dump-cap [options] FILECAP"
288 None, "storage server nodeid (ascii), to construct WE and secrets."],
289 ["client-secret", "c", None,
290 "client's base secret (ascii), to construct secrets"],
291 ["client-dir", "d", None,
292 "client's base directory, from which a -c secret will be read"],
294 def parseArgs(self, cap):
297 def getUsage(self, width=None):
298 t = usage.Options.getUsage(self, width)
300 Print information about the given cap-string (aka: URI, file-cap, dir-cap,
301 read-cap, write-cap). The URI string is parsed and unpacked. This prints the
302 type of the cap, its storage index, and any derived keys.
304 tahoe debug dump-cap URI:SSK-Verifier:4vozh77tsrw7mdhnj7qvp5ky74:q7f3dwz76sjys4kqfdt3ocur2pay3a6rftnkqmi2uxu3vqsdsofq
306 This may be useful to determine if a read-cap and a write-cap refer to the
307 same time, or to extract the storage-index from a file-cap (to then use with
310 If additional information is provided (storage server nodeid and/or client
311 base secret), this command will compute the shared secrets used for the
312 write-enabler and for lease-renewal.
317 def dump_cap(options):
318 from allmydata import uri
319 from allmydata.util import base32
320 from base64 import b32decode
321 import urlparse, urllib
326 if options['nodeid']:
327 nodeid = b32decode(options['nodeid'].upper())
329 if options['client-secret']:
330 secret = base32.a2b(options['client-secret'])
331 elif options['client-dir']:
332 secretfile = os.path.join(options['client-dir'], "private", "secret")
334 secret = base32.a2b(open(secretfile, "r").read().strip())
335 except EnvironmentError:
338 if cap.startswith("http"):
339 scheme, netloc, path, params, query, fragment = urlparse.urlparse(cap)
340 assert path.startswith("/uri/")
341 cap = urllib.unquote(path[len("/uri/"):])
343 u = uri.from_string(cap)
346 dump_uri_instance(u, nodeid, secret, out)
348 def _dump_secrets(storage_index, secret, nodeid, out):
349 from allmydata.util import hashutil
350 from allmydata.util import base32
353 crs = hashutil.my_renewal_secret_hash(secret)
354 print >>out, " client renewal secret:", base32.b2a(crs)
355 frs = hashutil.file_renewal_secret_hash(crs, storage_index)
356 print >>out, " file renewal secret:", base32.b2a(frs)
358 renew = hashutil.bucket_renewal_secret_hash(frs, nodeid)
359 print >>out, " lease renewal secret:", base32.b2a(renew)
360 ccs = hashutil.my_cancel_secret_hash(secret)
361 print >>out, " client cancel secret:", base32.b2a(ccs)
362 fcs = hashutil.file_cancel_secret_hash(ccs, storage_index)
363 print >>out, " file cancel secret:", base32.b2a(fcs)
365 cancel = hashutil.bucket_cancel_secret_hash(fcs, nodeid)
366 print >>out, " lease cancel secret:", base32.b2a(cancel)
368 def dump_uri_instance(u, nodeid, secret, out, show_header=True):
369 from allmydata import storage, uri
370 from allmydata.util import base32, hashutil
372 if isinstance(u, uri.CHKFileURI):
374 print >>out, "CHK File:"
375 print >>out, " key:", base32.b2a(u.key)
376 print >>out, " UEB hash:", base32.b2a(u.uri_extension_hash)
377 print >>out, " size:", u.size
378 print >>out, " k/N: %d/%d" % (u.needed_shares, u.total_shares)
379 print >>out, " storage index:", storage.si_b2a(u.storage_index)
380 _dump_secrets(u.storage_index, secret, nodeid, out)
381 elif isinstance(u, uri.CHKFileVerifierURI):
383 print >>out, "CHK Verifier URI:"
384 print >>out, " UEB hash:", base32.b2a(u.uri_extension_hash)
385 print >>out, " size:", u.size
386 print >>out, " k/N: %d/%d" % (u.needed_shares, u.total_shares)
387 print >>out, " storage index:", storage.si_b2a(u.storage_index)
389 elif isinstance(u, uri.LiteralFileURI):
391 print >>out, "Literal File URI:"
392 print >>out, " data:", u.data
394 elif isinstance(u, uri.WriteableSSKFileURI):
396 print >>out, "SSK Writeable URI:"
397 print >>out, " writekey:", base32.b2a(u.writekey)
398 print >>out, " readkey:", base32.b2a(u.readkey)
399 print >>out, " storage index:", storage.si_b2a(u.storage_index)
400 print >>out, " fingerprint:", base32.b2a(u.fingerprint)
403 we = hashutil.ssk_write_enabler_hash(u.writekey, nodeid)
404 print >>out, " write_enabler:", base32.b2a(we)
406 _dump_secrets(u.storage_index, secret, nodeid, out)
408 elif isinstance(u, uri.ReadonlySSKFileURI):
410 print >>out, "SSK Read-only URI:"
411 print >>out, " readkey:", base32.b2a(u.readkey)
412 print >>out, " storage index:", storage.si_b2a(u.storage_index)
413 print >>out, " fingerprint:", base32.b2a(u.fingerprint)
414 elif isinstance(u, uri.SSKVerifierURI):
416 print >>out, "SSK Verifier URI:"
417 print >>out, " storage index:", storage.si_b2a(u.storage_index)
418 print >>out, " fingerprint:", base32.b2a(u.fingerprint)
420 elif isinstance(u, uri.NewDirectoryURI):
422 print >>out, "Directory Writeable URI:"
423 dump_uri_instance(u._filenode_uri, nodeid, secret, out, False)
424 elif isinstance(u, uri.ReadonlyNewDirectoryURI):
426 print >>out, "Directory Read-only URI:"
427 dump_uri_instance(u._filenode_uri, nodeid, secret, out, False)
428 elif isinstance(u, uri.NewDirectoryURIVerifier):
430 print >>out, "Directory Verifier URI:"
431 dump_uri_instance(u._filenode_uri, nodeid, secret, out, False)
433 print >>out, "unknown cap type"
435 class FindSharesOptions(usage.Options):
436 def getSynopsis(self):
437 return "Usage: tahoe debug find-shares STORAGE_INDEX NODEDIRS.."
438 def parseArgs(self, storage_index_s, *nodedirs):
439 self.si_s = storage_index_s
440 self.nodedirs = nodedirs
441 def getUsage(self, width=None):
442 t = usage.Options.getUsage(self, width)
444 Locate all shares for the given storage index. This command looks through one
445 or more node directories to find the shares. It returns a list of filenames,
446 one per line, for each share file found.
448 tahoe debug find-shares 4vozh77tsrw7mdhnj7qvp5ky74 testgrid/node-*
450 It may be useful during testing, when running a test grid in which all the
451 nodes are on a local disk. The share files thus located can be counted,
452 examined (with dump-share), or corrupted/deleted to test checker/repairer.
456 def find_shares(options):
457 """Given a storage index and a list of node directories, emit a list of
458 all matching shares to stdout, one per line. For example:
460 find-shares.py 44kai1tui348689nrw8fjegc8c ~/testnet/node-*
464 /home/warner/testnet/node-1/storage/shares/44k/44kai1tui348689nrw8fjegc8c/5
465 /home/warner/testnet/node-1/storage/shares/44k/44kai1tui348689nrw8fjegc8c/9
466 /home/warner/testnet/node-2/storage/shares/44k/44kai1tui348689nrw8fjegc8c/2
468 from allmydata import storage
471 sharedir = storage.storage_index_to_dir(storage.si_a2b(options.si_s))
472 for d in options.nodedirs:
473 d = os.path.join(os.path.expanduser(d), "storage/shares", sharedir)
474 if os.path.exists(d):
475 for shnum in os.listdir(d):
476 print >>out, os.path.join(d, shnum)
481 class CatalogSharesOptions(usage.Options):
485 def parseArgs(self, *nodedirs):
486 self.nodedirs = nodedirs
488 raise usage.UsageError("must specify at least one node directory")
490 def getSynopsis(self):
491 return "Usage: tahoe debug catalog-shares NODEDIRS.."
493 def getUsage(self, width=None):
494 t = usage.Options.getUsage(self, width)
496 Locate all shares in the given node directories, and emit a one-line summary
497 of each share. Run it like this:
499 tahoe debug catalog-shares testgrid/node-* >allshares.txt
501 The lines it emits will look like the following:
503 CHK $SI $k/$N $filesize $UEB_hash $expiration $abspath_sharefile
504 SDMF $SI $k/$N $filesize $seqnum/$roothash $expiration $abspath_sharefile
505 UNKNOWN $abspath_sharefile
507 This command can be used to build up a catalog of shares from many storage
508 servers and then sort the results to compare all shares for the same file. If
509 you see shares with the same SI but different parameters/filesize/UEB_hash,
510 then something is wrong. The misc/find-share/anomalies.py script may be
515 def describe_share(abs_sharefile, si_s, shnum_s, now, out):
516 from allmydata import uri, storage
517 from allmydata.mutable.layout import unpack_share
518 from allmydata.mutable.common import NeedMoreDataError
519 from allmydata.util import base32
522 f = open(abs_sharefile, "rb")
525 if prefix == storage.MutableShareFile.MAGIC:
527 m = storage.MutableShareFile(abs_sharefile)
528 WE, nodeid = m._read_write_enabler_and_nodeid(f)
529 num_extra_leases = m._read_num_extra_leases(f)
530 data_length = m._read_data_length(f)
531 extra_lease_offset = m._read_extra_lease_offset(f)
532 container_size = extra_lease_offset - m.DATA_OFFSET
533 leases = list(m._enumerate_leases(f))
534 expiration_time = min( [lease[1].expiration_time
535 for lease in leases] )
536 expiration = max(0, expiration_time - now)
538 share_type = "unknown"
539 f.seek(m.DATA_OFFSET)
540 if f.read(1) == "\x00":
541 # this slot contains an SMDF share
544 if share_type == "SDMF":
545 f.seek(m.DATA_OFFSET)
546 data = f.read(min(data_length, 2000))
549 pieces = unpack_share(data)
550 except NeedMoreDataError, e:
551 # retry once with the larger size
552 size = e.needed_bytes
553 f.seek(m.DATA_OFFSET)
554 data = f.read(min(data_length, size))
555 pieces = unpack_share(data)
556 (seqnum, root_hash, IV, k, N, segsize, datalen,
557 pubkey, signature, share_hash_chain, block_hash_tree,
558 share_data, enc_privkey) = pieces
560 print >>out, "SDMF %s %d/%d %d #%d:%s %d %s" % \
561 (si_s, k, N, datalen,
562 seqnum, base32.b2a(root_hash),
563 expiration, abs_sharefile)
565 print >>out, "UNKNOWN mutable %s" % (abs_sharefile,)
567 elif struct.unpack(">L", prefix[:4]) == (1,):
570 sf = storage.ShareFile(abs_sharefile)
571 # use a ReadBucketProxy to parse the bucket and find the uri extension
572 bp = storage.ReadBucketProxy(None)
573 offsets = bp._parse_offsets(sf.read_share_data(0, 0x24))
574 seek = offsets['uri_extension']
575 length = struct.unpack(">L", sf.read_share_data(seek, 4))[0]
577 UEB_data = sf.read_share_data(seek, length)
578 expiration_time = min( [lease.expiration_time
579 for lease in sf.iter_leases()] )
580 expiration = max(0, expiration_time - now)
582 unpacked = uri.unpack_extension_readable(UEB_data)
583 k = unpacked["needed_shares"]
584 N = unpacked["total_shares"]
585 filesize = unpacked["size"]
586 ueb_hash = unpacked["UEB_hash"]
588 print >>out, "CHK %s %d/%d %d %s %d %s" % (si_s, k, N, filesize,
589 ueb_hash, expiration,
593 print >>out, "UNKNOWN really-unknown %s" % (abs_sharefile,)
598 def catalog_shares(options):
601 for d in options.nodedirs:
602 d = os.path.join(os.path.expanduser(d), "storage/shares")
604 abbrevs = os.listdir(d)
605 except EnvironmentError:
606 # ignore nodes that have storage turned off altogether
609 for abbrevdir in abbrevs:
610 if abbrevdir == "incoming":
612 abbrevdir = os.path.join(d, abbrevdir)
613 for si_s in os.listdir(abbrevdir):
614 si_dir = os.path.join(abbrevdir, si_s)
615 for shnum_s in os.listdir(si_dir):
616 abs_sharefile = os.path.join(si_dir, shnum_s)
617 abs_sharefile = os.path.abspath(abs_sharefile)
618 assert os.path.isfile(abs_sharefile)
619 describe_share(abs_sharefile, si_s, shnum_s, now, out)
623 class ReplOptions(usage.Options):
628 return code.interact()
631 class DebugCommand(usage.Options):
633 ["dump-share", None, DumpOptions,
634 "Unpack and display the contents of a share (uri_extension and leases)."],
635 ["dump-cap", None, DumpCapOptions, "Unpack a read-cap or write-cap"],
636 ["find-shares", None, FindSharesOptions, "Locate sharefiles in node dirs"],
637 ["catalog-shares", None, CatalogSharesOptions, "Describe shares in node dirs"],
638 ["repl", None, ReplOptions, "Open a python interpreter"],
640 def postOptions(self):
641 if not hasattr(self, 'subOptions'):
642 raise usage.UsageError("must specify a subcommand")
643 def getSynopsis(self):
644 return "Usage: tahoe debug SUBCOMMAND"
645 def getUsage(self, width=None):
646 #t = usage.Options.getUsage(self, width)
649 tahoe debug dump-share Unpack and display the contents of a share
650 tahoe debug dump-cap Unpack a read-cap or write-cap
651 tahoe debug find-shares Locate sharefiles in node directories
652 tahoe debug catalog-shares Describe all shares in node dirs
654 Please run e.g. 'tahoe debug dump-share --help' for more details on each
660 "dump-share": dump_share,
661 "dump-cap": dump_cap,
662 "find-shares": find_shares,
663 "catalog-shares": catalog_shares,
668 def do_debug(options):
669 so = options.subOptions
670 so.stdout = options.stdout
671 so.stderr = options.stderr
672 f = subDispatch[options.subCommand]
677 ["debug", None, DebugCommand, "debug subcommands: use 'tahoe debug' for a list"],