From 094c0eebe36993781043590282d52ae6e5dcc60f Mon Sep 17 00:00:00 2001 From: Daira Hopwood Date: Fri, 16 Oct 2015 17:25:01 +0100 Subject: [PATCH] Changes to debug.py. Signed-off-by: Daira Hopwood --- src/allmydata/scripts/debug.py | 247 ++++++++++++++++++--------------- 1 file changed, 133 insertions(+), 114 deletions(-) diff --git a/src/allmydata/scripts/debug.py b/src/allmydata/scripts/debug.py index b01ca32a..7598c752 100644 --- a/src/allmydata/scripts/debug.py +++ b/src/allmydata/scripts/debug.py @@ -2,9 +2,11 @@ # do not import any allmydata modules at this level. Do that from inside # individual functions instead. import struct, time, os, sys + from twisted.python import usage, failure from twisted.internet import defer from twisted.scripts import trial as twisted_trial + from foolscap.logging import cli as foolscap_cli from allmydata.scripts.common import BaseOptions @@ -30,33 +32,34 @@ verify-cap for the file that uses the share. from allmydata.util.encodingutil import argv_to_abspath self['filename'] = argv_to_abspath(filename) + def dump_share(options): - from allmydata.storage.backends.disk.mutable import MutableShareFile + from allmydata.storage.backends.disk.disk_backend import get_disk_share from allmydata.util.encodingutil import quote_output out = options.stdout + filename = options['filename'] # check the version, to see if we have a mutable or immutable share - print >>out, "share filename: %s" % quote_output(options['filename']) + print >>out, "share filename: %s" % quote_output(filename) - f = open(options['filename'], "rb") - prefix = f.read(32) - f.close() - if prefix == MutableShareFile.MAGIC: - return dump_mutable_share(options) - # otherwise assume it's immutable - return dump_immutable_share(options) + share = get_disk_share(filename) + + if share.sharetype == "mutable": + return dump_mutable_share(options, share) + else: + assert share.sharetype == "immutable", share.sharetype + return dump_immutable_share(options, share) -def dump_immutable_share(options): - from allmydata.storage.backends.disk.immutable import ShareFile +def dump_immutable_share(options, share): out = options.stdout - f = ShareFile(options['filename']) - dump_immutable_chk_share(f, out, options) + dump_immutable_chk_share(share, out, options) print >>out return 0 -def dump_immutable_chk_share(f, out, options): + +def dump_immutable_chk_share(share, out, options): from allmydata import uri from allmydata.util import base32 from allmydata.immutable.layout import ReadBucketProxy @@ -64,13 +67,18 @@ def dump_immutable_chk_share(f, out, options): # use a ReadBucketProxy to parse the bucket and find the uri extension bp = ReadBucketProxy(None, None, '') - offsets = bp._parse_offsets(f.read_share_data(0, 0x44)) + f = open(share._get_path(), "rb") + # XXX yuck, private API + def read_share_data(offset, length): + return share._read_share_data(f, offset, length) + + offsets = bp._parse_offsets(read_share_data(0, 0x44)) print >>out, "%20s: %d" % ("version", bp._version) seek = offsets['uri_extension'] length = struct.unpack(bp._fieldstruct, - f.read_share_data(seek, bp._fieldsize))[0] + read_share_data(seek, bp._fieldsize))[0] seek += bp._fieldsize - UEB_data = f.read_share_data(seek, length) + UEB_data = read_share_data(seek, length) unpacked = uri.unpack_extension_readable(UEB_data) keys1 = ("size", "num_segments", "segment_size", @@ -130,13 +138,14 @@ def dump_immutable_chk_share(f, out, options): if options['offsets']: print >>out print >>out, " Section Offsets:" - print >>out, "%20s: %s" % ("share data", f.DATA_OFFSET) + print >>out, "%20s: %s" % ("share data", share.DATA_OFFSET) for k in ["data", "plaintext_hash_tree", "crypttext_hash_tree", "block_hashes", "share_hashes", "uri_extension"]: name = {"data": "block data"}.get(k,k) - offset = f.DATA_OFFSET + offsets[k] + offset = share.DATA_OFFSET + offsets[k] print >>out, " %20s: %s (0x%x)" % (name, offset, offset) + def format_expiration_time(expiration_time): now = time.time() remains = expiration_time - now @@ -148,11 +157,9 @@ def format_expiration_time(expiration_time): return when -def dump_mutable_share(options): - from allmydata.storage.backends.disk.mutable import MutableShareFile +def dump_mutable_share(options, m): from allmydata.util import base32, idlib out = options.stdout - m = MutableShareFile(options['filename']) f = open(options['filename'], "rb") WE, nodeid = m._read_write_enabler_and_nodeid(f) data_length = m._read_data_length(f) @@ -184,6 +191,7 @@ def dump_mutable_share(options): return 0 + def dump_SDMF_share(m, length, options): from allmydata.mutable.layout import unpack_share, unpack_header from allmydata.mutable.common import NeedMoreDataError @@ -247,7 +255,7 @@ def dump_SDMF_share(m, length, options): if options['offsets']: # NOTE: this offset-calculation code is fragile, and needs to be - # merged with MutableShareFile's internals. + # merged with MutableDiskShare's internals. print >>out print >>out, " Section Offsets:" def printoffset(name, value, shift=0): @@ -270,6 +278,7 @@ def dump_SDMF_share(m, length, options): print >>out + def dump_MDMF_share(m, length, options): from allmydata.mutable.layout import MDMFSlotReadProxy from allmydata.util import base32, hashutil @@ -340,7 +349,7 @@ def dump_MDMF_share(m, length, options): if options['offsets']: # NOTE: this offset-calculation code is fragile, and needs to be - # merged with MutableShareFile's internals. + # merged with MutableDiskShare's internals. print >>out print >>out, " Section Offsets:" @@ -367,7 +376,6 @@ def dump_MDMF_share(m, length, options): print >>out - class DumpCapOptions(BaseOptions): def getSynopsis(self): return "Usage: tahoe [global-options] debug dump-cap [options] FILECAP" @@ -490,7 +498,6 @@ def dump_uri_instance(u, nodeid, out, show_header=True): print >>out, " storage index:", si_b2a(u.get_storage_index()) print >>out, " fingerprint:", base32.b2a(u.fingerprint) - elif isinstance(u, uri.ImmutableDirectoryURI): # CHK-based directory if show_header: print >>out, "CHK Directory URI:" @@ -529,6 +536,7 @@ def dump_uri_instance(u, nodeid, out, show_header=True): else: print >>out, "unknown cap type" + class FindSharesOptions(BaseOptions): def getSynopsis(self): return "Usage: tahoe [global-options] debug find-shares STORAGE_INDEX NODEDIRS.." @@ -562,16 +570,18 @@ def find_shares(options): /home/warner/testnet/node-1/storage/shares/44k/44kai1tui348689nrw8fjegc8c/9 /home/warner/testnet/node-2/storage/shares/44k/44kai1tui348689nrw8fjegc8c/2 """ - from allmydata.storage.server import si_a2b, storage_index_to_dir - from allmydata.util.encodingutil import listdir_unicode, quote_local_unicode_path + from allmydata.storage.common import si_a2b, NUM_RE + from allmydata.storage.backends.disk.disk_backend import si_si2dir + from allmydata.util import fileutil + from allmydata.util.encodingutil import quote_local_unicode_path out = options.stdout - sharedir = storage_index_to_dir(si_a2b(options.si_s)) - for d in options.nodedirs: - d = os.path.join(d, "storage", "shares", sharedir) - if os.path.exists(d): - for shnum in listdir_unicode(d): - print >>out, quote_local_unicode_path(os.path.join(d, shnum), quotemarks=False) + si = si_a2b(options.si_s) + for nodedir in options.nodedirs: + sharedir = si_si2dir(os.path.join(nodedir, "storage", "shares"), si) + for shnumstr in fileutil.listdir(sharedir, filter=NUM_RE): + sharefile = os.path.join(sharedir, shnumstr) + print >>out, quote_local_unicode_path(sharefile, quotemarks=False) return 0 @@ -603,9 +613,10 @@ This command can be used to build up a catalog of shares from many storage servers and then sort the results to compare all shares for the same file. If you see shares with the same SI but different parameters/filesize/UEB_hash, then something is wrong. The misc/find-share/anomalies.py script may be -useful for purpose. +useful for that purpose. """ + def call(c, *args, **kwargs): # take advantage of the fact that ImmediateReadBucketProxy returns # Deferreds that are already fired @@ -617,28 +628,34 @@ def call(c, *args, **kwargs): failures[0].raiseException() return results[0] + def describe_share(abs_sharefile, si_s, shnum_s, now, out): from allmydata import uri - from allmydata.storage.backends.disk.mutable import MutableShareFile - from allmydata.storage.backends.disk.immutable import ShareFile + from allmydata.storage.backends.disk.disk_backend import get_disk_share + from allmydata.storage.common import UnknownMutableContainerVersionError, UnknownImmutableContainerVersionError from allmydata.mutable.layout import unpack_share from allmydata.mutable.common import NeedMoreDataError from allmydata.immutable.layout import ReadBucketProxy from allmydata.util import base32 from allmydata.util.encodingutil import quote_output - import struct + + try: + share = get_disk_share(abs_sharefile) + except UnknownMutableContainerVersionError: + print >>out, "UNKNOWN mutable %s" % quote_output(abs_sharefile) + return + except UnknownImmutableContainerVersionError: + print >>out, "UNKNOWN really-unknown %s" % quote_output(abs_sharefile) + return f = open(abs_sharefile, "rb") - prefix = f.read(32) - if prefix == MutableShareFile.MAGIC: - # mutable share - m = MutableShareFile(abs_sharefile) - WE, nodeid = m._read_write_enabler_and_nodeid(f) - data_length = m._read_data_length(f) + if share.sharetype == "mutable": + WE, nodeid = share._read_write_enabler_and_nodeid(f) + data_length = share._read_data_length(f) share_type = "unknown" - f.seek(m.DATA_OFFSET) + f.seek(share.DATA_OFFSET) version = f.read(1) if version == "\x00": # this slot contains an SMDF share @@ -647,7 +664,7 @@ def describe_share(abs_sharefile, si_s, shnum_s, now, out): share_type = "MDMF" if share_type == "SDMF": - f.seek(m.DATA_OFFSET) + f.seek(share.DATA_OFFSET) data = f.read(min(data_length, 2000)) try: @@ -655,7 +672,7 @@ def describe_share(abs_sharefile, si_s, shnum_s, now, out): except NeedMoreDataError, e: # retry once with the larger size size = e.needed_bytes - f.seek(m.DATA_OFFSET) + f.seek(share.DATA_OFFSET) data = f.read(min(data_length, size)) pieces = unpack_share(data) (seqnum, root_hash, IV, k, N, segsize, datalen, @@ -674,7 +691,7 @@ def describe_share(abs_sharefile, si_s, shnum_s, now, out): def _read(self, readvs, force_remote=False, queue=False): data = [] for (where,length) in readvs: - f.seek(m.DATA_OFFSET+where) + f.seek(share.DATA_OFFSET+where) data.append(f.read(length)) return defer.succeed({fake_shnum: data}) @@ -698,21 +715,20 @@ def describe_share(abs_sharefile, si_s, shnum_s, now, out): else: print >>out, "UNKNOWN mutable %s" % quote_output(abs_sharefile) - elif struct.unpack(">L", prefix[:4]) == (1,): + else: # immutable class ImmediateReadBucketProxy(ReadBucketProxy): - def __init__(self, sf): - self.sf = sf + def __init__(self, share): + self.share = share ReadBucketProxy.__init__(self, None, None, "") def __repr__(self): return "" def _read(self, offset, size): - return defer.succeed(sf.read_share_data(offset, size)) + return defer.maybeDeferred(self.share.read_share_data, offset, size) # use a ReadBucketProxy to parse the bucket and find the uri extension - sf = ShareFile(abs_sharefile) - bp = ImmediateReadBucketProxy(sf) + bp = ImmediateReadBucketProxy(share) UEB_data = call(bp.get_uri_extension) unpacked = uri.unpack_extension_readable(UEB_data) @@ -725,59 +741,53 @@ def describe_share(abs_sharefile, si_s, shnum_s, now, out): print >>out, "CHK %s %d/%d %d %s - %s" % (si_s, k, N, filesize, ueb_hash, quote_output(abs_sharefile)) - else: - print >>out, "UNKNOWN really-unknown %s" % quote_output(abs_sharefile) - f.close() + def catalog_shares(options): - from allmydata.util.encodingutil import listdir_unicode, quote_output + from allmydata.util import fileutil + from allmydata.util.encodingutil import quote_output out = options.stdout err = options.stderr now = time.time() - for d in options.nodedirs: - d = os.path.join(d, "storage", "shares") + for node_dir in options.nodedirs: + shares_dir = os.path.join(node_dir, "storage", "shares") try: - abbrevs = listdir_unicode(d) + prefixes = fileutil.listdir(shares_dir) except EnvironmentError: # ignore nodes that have storage turned off altogether pass else: - for abbrevdir in sorted(abbrevs): - if abbrevdir == "incoming": + for prefix in sorted(prefixes): + if prefix == "incoming": continue - abbrevdir = os.path.join(d, abbrevdir) + prefix_dir = os.path.join(shares_dir, prefix) # this tool may get run against bad disks, so we can't assume - # that listdir_unicode will always succeed. Try to catalog as much + # that fileutil.listdir will always succeed. Try to catalog as much # as possible. try: - sharedirs = listdir_unicode(abbrevdir) - for si_s in sorted(sharedirs): - si_dir = os.path.join(abbrevdir, si_s) - catalog_shares_one_abbrevdir(si_s, si_dir, now, out,err) + share_dirs = fileutil.listdir(prefix_dir) + for si_s in sorted(share_dirs): + si_dir = os.path.join(prefix_dir, si_s) + catalog_shareset(si_s, si_dir, now, out, err) except: - print >>err, "Error processing %s" % quote_output(abbrevdir) + print >>err, "Error processing %s" % quote_output(prefix_dir) failure.Failure().printTraceback(err) return 0 -def _as_number(s): - try: - return int(s) - except ValueError: - return "not int" - -def catalog_shares_one_abbrevdir(si_s, si_dir, now, out, err): - from allmydata.util.encodingutil import listdir_unicode, quote_output +def catalog_shareset(si_s, si_dir, now, out, err): + from allmydata.storage.common import NUM_RE + from allmydata.util import fileutil + from allmydata.util.encodingutil import quote_output try: - for shnum_s in sorted(listdir_unicode(si_dir), key=_as_number): + for shnum_s in sorted(fileutil.listdir(si_dir, filter=NUM_RE), key=int): abs_sharefile = os.path.join(si_dir, shnum_s) assert os.path.isfile(abs_sharefile) try: - describe_share(abs_sharefile, si_s, shnum_s, now, - out) + describe_share(abs_sharefile, si_s, shnum_s, now, out) except: print >>err, "Error processing %s" % quote_output(abs_sharefile) failure.Failure().printTraceback(err) @@ -785,6 +795,7 @@ def catalog_shares_one_abbrevdir(si_s, si_dir, now, out, err): print >>err, "Error processing %s" % quote_output(si_dir) failure.Failure().printTraceback(err) + class CorruptShareOptions(BaseOptions): def getSynopsis(self): return "Usage: tahoe [global-options] debug corrupt-share SHARE_FILENAME" @@ -805,63 +816,71 @@ to flip a single random bit of the block data. Obviously, this command should not be used in normal operation. """ + def parseArgs(self, filename): self['filename'] = filename + def corrupt_share(options): + do_corrupt_share(options.stdout, options['filename'], options['offset']) + +def do_corrupt_share(out, filename, offset="block-random"): import random - from allmydata.storage.backends.disk.mutable import MutableShareFile - from allmydata.storage.backends.disk.immutable import ShareFile + from allmydata.storage.backends.disk.disk_backend import get_disk_share from allmydata.mutable.layout import unpack_header from allmydata.immutable.layout import ReadBucketProxy - out = options.stdout - fn = options['filename'] - assert options["offset"] == "block-random", "other offsets not implemented" - # first, what kind of share is it? + + assert offset == "block-random", "other offsets not implemented" def flip_bit(start, end): offset = random.randrange(start, end) bit = random.randrange(0, 8) print >>out, "[%d..%d): %d.b%d" % (start, end, offset, bit) - f = open(fn, "rb+") - f.seek(offset) - d = f.read(1) - d = chr(ord(d) ^ 0x01) - f.seek(offset) - f.write(d) - f.close() - - f = open(fn, "rb") - prefix = f.read(32) - f.close() - if prefix == MutableShareFile.MAGIC: - # mutable - m = MutableShareFile(fn) - f = open(fn, "rb") - f.seek(m.DATA_OFFSET) - data = f.read(2000) - # make sure this slot contains an SMDF share - assert data[0] == "\x00", "non-SDMF mutable shares not supported" - f.close() + f = open(filename, "rb+") + try: + f.seek(offset) + d = f.read(1) + d = chr(ord(d) ^ 0x01) + f.seek(offset) + f.write(d) + finally: + f.close() + + # what kind of share is it? + + share = get_disk_share(filename) + if share.sharetype == "mutable": + f = open(filename, "rb") + try: + f.seek(share.DATA_OFFSET) + data = f.read(2000) + # make sure this slot contains an SMDF share + assert data[0] == "\x00", "non-SDMF mutable shares not supported" + finally: + f.close() (version, ig_seqnum, ig_roothash, ig_IV, ig_k, ig_N, ig_segsize, ig_datalen, offsets) = unpack_header(data) assert version == 0, "we only handle v0 SDMF files" - start = m.DATA_OFFSET + offsets["share_data"] - end = m.DATA_OFFSET + offsets["enc_privkey"] + start = share.DATA_OFFSET + offsets["share_data"] + end = share.DATA_OFFSET + offsets["enc_privkey"] flip_bit(start, end) else: # otherwise assume it's immutable - f = ShareFile(fn) bp = ReadBucketProxy(None, None, '') - offsets = bp._parse_offsets(f.read_share_data(0, 0x24)) - start = f.DATA_OFFSET + offsets["data"] - end = f.DATA_OFFSET + offsets["plaintext_hash_tree"] + f = open(filename, "rb") + try: + # XXX yuck, private API + header = share._read_share_data(f, 0, 0x24) + finally: + f.close() + offsets = bp._parse_offsets(header) + start = share.DATA_OFFSET + offsets["data"] + end = share.DATA_OFFSET + offsets["plaintext_hash_tree"] flip_bit(start, end) - class ReplOptions(BaseOptions): def getSynopsis(self): return "Usage: tahoe [global-options] debug repl" -- 2.45.2