The filesystem which gets my vote for most undeservedly popular is ext3, and it has a hard limit of 32,000 entries in a directory. Many other filesystems (even ones that I like more than I like ext3) have either hard limits or bad performance consequences or weird edge cases when you get too many entries in a single directory.
This patch makes it so that there is a layer of intermediate directories between the "shares" directory and the actual storage-index directory (the one whose name contains the entire storage index (z-base-32 encoded) and which contains one or more share files named by their share number).
The intermediate directories are named by the first 14 bits of the storage index, which means there are at most 16384 of them. (This also means that the intermediate directory names are not a leading prefix of the storage-index directory names -- to do that would have required us to have intermediate directories limited to either 1024 (2-char), which is too few, or 32768 (3-chars of a full 5 bits each), which would overrun ext3's funny hard limit of 32,000.))
This closes #150, and please see the "convertshares.py" script attached to #150 to convert your old tahoe-0.7.0 storage/shares directory into a new tahoe-0.8.0 storage/shares directory.
# storage/
# storage/shares/incoming
-# incoming/ holds temp dirs named $STORAGEINDEX/$SHARENUM which will be
-# moved to storage/shares/$STORAGEINDEX/$SHARENUM upon success
-# storage/shares/$STORAGEINDEX
-# storage/shares/$STORAGEINDEX/$SHARENUM
+# incoming/ holds temp dirs named $START/$STORAGEINDEX/$SHARENUM which will
+# be moved to storage/shares/$START/$STORAGEINDEX/$SHARENUM upon success
+# storage/shares/$START/$STORAGEINDEX
+# storage/shares/$START/$STORAGEINDEX/$SHARENUM
+
+# Where "$START" denotes the first 14 bits worth of $STORAGEINDEX (that's 3
+# base-32 chars, but the last one has only 4 bits in it -- i.e. only 16 possible
+# chars in the last position).
# $SHARENUM matches this regex:
NUM_RE=re.compile("^[0-9]+$")
# B+0x44: expiration time, 4 bytes big-endian seconds-since-epoch
# B+0x48: next lease, or end of record
+def storage_index_to_dir(storageindex):
+ return os.path.join(idlib.b2a_l(storageindex[:2], 14), idlib.b2a(storageindex))
+
class ShareFile:
LEASE_SIZE = struct.calcsize(">L32s32sL")
self.closed = False
self.throw_out_all_data = False
# touch the file, so later callers will see that we're working on it.
+ assert not os.path.exists(incominghome)
+ fileutil.make_dirs(os.path.dirname(incominghome))
# Also construct the metadata.
- assert not os.path.exists(self.incominghome)
- f = open(self.incominghome, 'wb')
+ f = open(incominghome, 'wb')
f.write(struct.pack(">LLL", 1, size, 0))
f.close()
- self._sharefile = ShareFile(self.incominghome)
+ self._sharefile = ShareFile(incominghome)
# also, add our lease to the file now, so that other ones can be
# added by simultaneous uploaders
self._sharefile.add_lease(lease_info)
def remote_close(self):
precondition(not self.closed)
+
+ fileutil.make_dirs(os.path.dirname(self.finalhome))
fileutil.rename(self.incominghome, self.finalhome)
+ try:
+ os.rmdir(os.path.dirname(self.incominghome))
+ os.rmdir(os.path.dirname(os.path.dirname(self.incominghome)))
+ os.rmdir(os.path.dirname(os.path.dirname(os.path.dirname(self.incominghome))))
+ except EnvironmentError:
+ pass
self._sharefile = None
self.closed = True
self._canary.dontNotifyOnDisconnect(self._disconnect_marker)
filelen = os.stat(self.finalhome)[stat.ST_SIZE]
self.ss.bucket_writer_closed(self, filelen)
- # if we were the last share to be moved, remove the incoming/
- # directory that was our parent
- parentdir = os.path.split(self.incominghome)[0]
- if not os.listdir(parentdir):
- os.rmdir(parentdir)
-
def _disconnected(self):
if not self.closed:
self._abort()
class BucketReader(Referenceable):
implements(RIBucketReader)
- def __init__(self, home):
- self._share_file = ShareFile(home)
+ def __init__(self, sharefname):
+ self._share_file = ShareFile(sharefname)
def remote_read(self, offset, length):
return self._share_file.read_share_data(offset, length)
# to a particular owner.
alreadygot = set()
bucketwriters = {} # k: shnum, v: BucketWriter
- si_s = idlib.b2a(storage_index)
+ si_s = storage_index_to_dir(storage_index)
# in this implementation, the lease information (including secrets)
# goes into the share files themselves. It could also be put into a
pass
elif no_limits or remaining_space >= space_per_bucket:
# ok! we need to create the new share file.
- fileutil.make_dirs(os.path.join(self.incomingdir, si_s))
bw = BucketWriter(self, incominghome, finalhome,
space_per_bucket, lease_info, canary)
if self.no_storage:
raise IndexError("no such lease to renew")
def remote_cancel_lease(self, storage_index, cancel_secret):
- storagedir = os.path.join(self.sharedir, idlib.b2a(storage_index))
+ storagedir = os.path.join(self.sharedir, storage_index_to_dir(storage_index))
remaining_files = 0
total_space_freed = 0
"""Return a list of (shnum, pathname) tuples for files that hold
shares for this storage_index. In each tuple, 'shnum' will always be
the integer form of the last component of 'pathname'."""
- storagedir = os.path.join(self.sharedir, idlib.b2a(storage_index))
+ storagedir = os.path.join(self.sharedir, storage_index_to_dir(storage_index))
try:
for f in os.listdir(storagedir):
if NUM_RE.match(f):
pass
def _get_incoming_shares(self, storage_index):
- incomingdir = os.path.join(self.incomingdir, idlib.b2a(storage_index))
+ incomingdir = os.path.join(self.incomingdir, storage_index_to_dir(storage_index))
try:
for f in os.listdir(incomingdir):
if NUM_RE.match(f):
secrets,
test_and_write_vectors,
read_vector):
- si_s = idlib.b2a(storage_index)
+ si_s = storage_index_to_dir(storage_index)
(write_enabler, renew_secret, cancel_secret) = secrets
# shares exist if there is a file for them
bucketdir = os.path.join(self.sharedir, si_s)
return share
def remote_slot_readv(self, storage_index, shares, readv):
- si_s = idlib.b2a(storage_index)
+ si_s = storage_index_to_dir(storage_index)
# shares exist if there is a file for them
bucketdir = os.path.join(self.sharedir, si_s)
if not os.path.isdir(bucketdir):
from allmydata import interfaces
from allmydata.util import fileutil, hashutil, idlib
from allmydata.storage import BucketWriter, BucketReader, \
- WriteBucketProxy, ReadBucketProxy, StorageServer, MutableShareFile
+ WriteBucketProxy, ReadBucketProxy, StorageServer, MutableShareFile, \
+ storage_index_to_dir
from allmydata.interfaces import BadWriteEnablerError
from allmydata.test.common import LoggingServiceParent
bw.remote_close()
# now read from it
- br = BucketReader(final)
+ br = BucketReader(bw.finalhome)
self.failUnlessEqual(br.remote_read(0, 25), "a"*25)
self.failUnlessEqual(br.remote_read(25, 25), "b"*25)
self.failUnlessEqual(br.remote_read(50, 7), "c"*7)
pass
def test_create(self):
- bw, rb, final = self.make_bucket("test_create", 500)
+ bw, rb, sharefname = self.make_bucket("test_create", 500)
bp = WriteBucketProxy(rb,
data_size=300,
segment_size=10,
for i in (1,9,13)]
uri_extension = "s" + "E"*498 + "e"
- bw, rb, final = self.make_bucket("test_readwrite", 1414)
+ bw, rb, sharefname = self.make_bucket("test_readwrite", 1414)
bp = WriteBucketProxy(rb,
data_size=95,
segment_size=25,
# now read everything back
def _start_reading(res):
- br = BucketReader(final)
+ br = BucketReader(sharefname)
rb = RemoteBucket()
rb.target = br
rbp = ReadBucketProxy(rb)
renew_secret, cancel_secret,
sharenums, size, FakeCanary())
+ def test_dont_overfill_dirs(self):
+ """
+ This test asserts that if you add a second share whose storage index
+ share lots of leading bits with an extant share (but isn't the exact
+ same storage index), this won't add an entry to the share directory.
+ """
+ ss = self.create("test_dont_overfill_dirs")
+ already, writers = self.allocate(ss, "storageindex", [0], 10)
+ for i, wb in writers.items():
+ wb.remote_write(0, "%10d" % i)
+ wb.remote_close()
+ storedir = os.path.join(self.workdir("test_dont_overfill_dirs"),
+ "shares")
+ children_of_storedir = set(os.listdir(storedir))
+
+ # Now store another one under another storageindex that has leading
+ # chars the same as the first storageindex.
+ already, writers = self.allocate(ss, "storageindey", [0], 10)
+ for i, wb in writers.items():
+ wb.remote_write(0, "%10d" % i)
+ wb.remote_close()
+ storedir = os.path.join(self.workdir("test_dont_overfill_dirs"),
+ "shares")
+ new_children_of_storedir = set(os.listdir(storedir))
+ self.failUnlessEqual(children_of_storedir, new_children_of_storedir)
+
def test_remove_incoming(self):
ss = self.create("test_remove_incoming")
already, writers = self.allocate(ss, "vid", range(3), 10)
for i,wb in writers.items():
wb.remote_write(0, "%10d" % i)
wb.remote_close()
- incomingdir = os.path.join(self.workdir("test_remove_incoming"),
- "shares", "incoming")
- leftover_dirs = os.listdir(incomingdir)
- self.failUnlessEqual(leftover_dirs, [])
+ incomingdir = os.path.dirname(os.path.dirname(os.path.dirname(wb.incominghome)))
+ self.failIf(os.path.exists(incomingdir))
def test_allocate(self):
ss = self.create("test_allocate")
# create a random non-numeric file in the bucket directory, to
# exercise the code that's supposed to ignore those.
bucket_dir = os.path.join(self.workdir("test_leases"),
- "shares", idlib.b2a("si1"))
+ "shares", storage_index_to_dir("si1"))
f = open(os.path.join(bucket_dir, "ignore_me.txt"), "w")
f.write("you ought to be ignoring me\n")
f.close()
for i in range(self.numclients):
incdir = os.path.join(self.getdir("client%d" % i),
"storage", "shares", "incoming")
- self.failUnlessEqual(os.listdir(incdir), [])
+ self.failIf(os.path.exists(incdir) and os.listdir(incdir))
d.addCallback(_disconnected)
def _wait_for_reconnect(res):
if not filenames:
continue
pieces = dirpath.split(os.sep)
- if pieces[-3] == "storage" and pieces[-2] == "shares":
- # we're sitting in .../storage/shares/$SINDEX , and there
+ if pieces[-4] == "storage" and pieces[-3] == "shares":
+ # we're sitting in .../storage/shares/$START/$SINDEX , and there
# are sharefiles here
- assert pieces[-4].startswith("client")
- client_num = int(pieces[-4][-1])
+ assert pieces[-5].startswith("client")
+ client_num = int(pieces[-5][-1])
storage_index_s = pieces[-1]
storage_index = idlib.a2b(storage_index_s)
for sharename in filenames:
if not filenames:
continue
pieces = dirpath.split(os.sep)
- if pieces[-3] == "storage" and pieces[-2] == "shares":
- # we're sitting in .../storage/shares/$SINDEX , and there are
- # sharefiles here
+ if pieces[-4] == "storage" and pieces[-3] == "shares":
+ # we're sitting in .../storage/shares/$START/$SINDEX , and there
+ # are sharefiles here
filename = os.path.join(dirpath, filenames[0])
# peek at the magic to see if it is a chk share
magic = open(filename, "rb").read(4)