From: Brian Warner Date: Thu, 12 Jul 2007 20:22:36 +0000 (-0700) Subject: implement URI:LIT, which stores small (<55B) files inside the URI itself. Fixes #81. X-Git-Url: https://git.rkrishnan.org/%5B/%5D%20/FOOURL?a=commitdiff_plain;h=def63d193eea0233530fc0aecba5368f8c9956dc;p=tahoe-lafs%2Ftahoe-lafs.git implement URI:LIT, which stores small (<55B) files inside the URI itself. Fixes #81. --- diff --git a/src/allmydata/test/test_system.py b/src/allmydata/test/test_system.py index cbc39651..066746cc 100644 --- a/src/allmydata/test/test_system.py +++ b/src/allmydata/test/test_system.py @@ -23,6 +23,11 @@ def flush_but_dont_ignore(res): d.addCallback(_done) return d +LARGE_DATA = """ +This is some data to publish to the virtual drive, which needs to be large +enough to not fit inside a LIT uri. +""" + class SystemTest(testutil.SignalMixin, unittest.TestCase): def setUp(self): @@ -234,7 +239,7 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase): def test_vdrive(self): self.basedir = "system/SystemTest/test_vdrive" - self.data = DATA = "Some data to publish to the virtual drive\n" + self.data = LARGE_DATA d = self.set_up_nodes() d.addCallback(self.log, "starting publish") d.addCallback(self._do_publish) @@ -568,7 +573,7 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase): self.failUnless("size: %d\n" % len(self.data) in output) self.failUnless("num_segments: 1\n" in output) # segment_size is always a multiple of needed_shares - self.failUnless("segment_size: 50\n" in output) + self.failUnless("segment_size: 125\n" in output) self.failUnless("total_shares: 100\n" in output) # keys which are supposed to be present for key in ("size", "num_segments", "segment_size", diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py index f1372d5e..43548831 100644 --- a/src/allmydata/test/test_upload.py +++ b/src/allmydata/test/test_upload.py @@ -4,7 +4,7 @@ from twisted.python.failure import Failure from cStringIO import StringIO from allmydata import upload, encode -from allmydata.uri import unpack_uri +from allmydata.uri import unpack_uri, unpack_lit from test_encode import FakePeer @@ -15,6 +15,18 @@ class FakeClient: return [ ("%20d"%fakeid, "%20d"%fakeid, FakePeer(self.mode),) for fakeid in range(50) ] +DATA = """ +Once upon a time, there was a beautiful princess named Buttercup. She lived +in a magical land where every file was stored securely among millions of +machines, and nobody ever worried about their data being lost ever again. +The End. +""" +assert len(DATA) > upload.Uploader.URI_LIT_SIZE_THRESHOLD + +SIZE_ZERO = 0 +SIZE_SMALL = 16 +SIZE_LARGE = len(DATA) + class GoodServer(unittest.TestCase): def setUp(self): self.node = FakeClient(mode="good") @@ -22,7 +34,13 @@ class GoodServer(unittest.TestCase): self.u.running = True self.u.parent = self.node - def _check(self, uri): + def _check_small(self, uri, size): + self.failUnless(isinstance(uri, str)) + self.failUnless(uri.startswith("URI:LIT:")) + d = unpack_lit(uri) + self.failUnlessEqual(len(d), size) + + def _check_large(self, uri, size): self.failUnless(isinstance(uri, str)) self.failUnless(uri.startswith("URI:")) d = unpack_uri(uri) @@ -30,31 +48,76 @@ class GoodServer(unittest.TestCase): self.failUnlessEqual(len(d['storage_index']), 32) self.failUnless(isinstance(d['key'], str)) self.failUnlessEqual(len(d['key']), 16) + self.failUnlessEqual(d['size'], size) + + def get_data(self, size): + return DATA[:size] + + def test_data_zero(self): + data = self.get_data(SIZE_ZERO) + d = self.u.upload_data(data) + d.addCallback(self._check_small, SIZE_ZERO) + return d - def testData(self): - data = "This is some data to upload" + def test_data_small(self): + data = self.get_data(SIZE_SMALL) d = self.u.upload_data(data) - d.addCallback(self._check) + d.addCallback(self._check_small, SIZE_SMALL) return d - testData.timeout = 300 - def testFileHandle(self): - data = "This is some data to upload" + def test_data_large(self): + data = self.get_data(SIZE_LARGE) + d = self.u.upload_data(data) + d.addCallback(self._check_large, SIZE_LARGE) + return d + + def test_filehandle_zero(self): + data = self.get_data(SIZE_ZERO) d = self.u.upload_filehandle(StringIO(data)) - d.addCallback(self._check) + d.addCallback(self._check_small, SIZE_ZERO) + return d + + def test_filehandle_small(self): + data = self.get_data(SIZE_SMALL) + d = self.u.upload_filehandle(StringIO(data)) + d.addCallback(self._check_small, SIZE_SMALL) + return d + + def test_filehandle_large(self): + data = self.get_data(SIZE_LARGE) + d = self.u.upload_filehandle(StringIO(data)) + d.addCallback(self._check_large, SIZE_LARGE) + return d + + def test_filename_zero(self): + fn = "Uploader-test_filename_zero.data" + f = open(fn, "wb") + data = self.get_data(SIZE_ZERO) + f.write(data) + f.close() + d = self.u.upload_filename(fn) + d.addCallback(self._check_small, SIZE_ZERO) + return d + + def test_filename_small(self): + fn = "Uploader-test_filename_small.data" + f = open(fn, "wb") + data = self.get_data(SIZE_SMALL) + f.write(data) + f.close() + d = self.u.upload_filename(fn) + d.addCallback(self._check_small, SIZE_SMALL) return d - testFileHandle.timeout = 300 - def testFilename(self): - fn = "Uploader-testFilename.data" + def test_filename_large(self): + fn = "Uploader-test_filename_large.data" f = open(fn, "wb") - data = "This is some data to upload" + data = self.get_data(SIZE_LARGE) f.write(data) f.close() d = self.u.upload_filename(fn) - d.addCallback(self._check) + d.addCallback(self._check_large, SIZE_LARGE) return d - testFilename.test = 300 class FullServer(unittest.TestCase): def setUp(self): @@ -66,8 +129,8 @@ class FullServer(unittest.TestCase): def _should_fail(self, f): self.failUnless(isinstance(f, Failure) and f.check(encode.NotEnoughPeersError)) - def testData(self): - data = "This is some data to upload" + def test_data_large(self): + data = DATA d = self.u.upload_data(data) d.addBoth(self._should_fail) return d diff --git a/src/allmydata/test/test_uri.py b/src/allmydata/test/test_uri.py new file mode 100644 index 00000000..6f2c64e9 --- /dev/null +++ b/src/allmydata/test/test_uri.py @@ -0,0 +1,84 @@ + +from twisted.trial import unittest +from allmydata import uri +from allmydata.util import hashutil + +class LIT(unittest.TestCase): + def test_pack(self): + data = "This is some small data" + u = uri.pack_lit(data) + self.failUnlessEqual(uri.get_uri_type(u), "LIT") + self.failUnlessEqual(uri.unpack_lit(u), data) + + def test_nonascii(self): + data = "This contains \x00 and URI:LIT: and \n, oh my." + u = uri.pack_lit(data) + self.failUnlessEqual(uri.get_uri_type(u), "LIT") + self.failUnlessEqual(uri.unpack_lit(u), data) + +class CHK(unittest.TestCase): + def test_pack(self): + storage_index = hashutil.tagged_hash("foo", "bar") + key = "\x00" * 16 + uri_extension_hash = hashutil.uri_extension_hash("stuff") + needed_shares = 25 + total_shares = 100 + size = 1234 + u = uri.pack_uri(storage_index=storage_index, + key=key, + uri_extension_hash=uri_extension_hash, + needed_shares=needed_shares, + total_shares=total_shares, + size=size) + self.failUnlessEqual(uri.get_uri_type(u), "CHK") + d = uri.unpack_uri(u) + self.failUnlessEqual(d['storage_index'], storage_index) + self.failUnlessEqual(d['key'], key) + self.failUnlessEqual(d['uri_extension_hash'], uri_extension_hash) + self.failUnlessEqual(d['needed_shares'], needed_shares) + self.failUnlessEqual(d['total_shares'], total_shares) + self.failUnlessEqual(d['size'], size) + +class Extension(unittest.TestCase): + def test_pack(self): + data = {"stuff": "value", + "size": 12, + "needed_shares": 3, + "big_hash": hashutil.tagged_hash("foo", "bar"), + } + ext = uri.pack_extension(data) + d = uri.unpack_extension(ext) + self.failUnlessEqual(d["stuff"], "value") + self.failUnlessEqual(d["size"], 12) + self.failUnlessEqual(d["big_hash"], hashutil.tagged_hash("foo", "bar")) + + readable = uri.unpack_extension_readable(ext) + +class Dirnode(unittest.TestCase): + def test_pack(self): + furl = "pb://stuff@morestuff:stuff/andstuff" + writekey = "\x01" * 16 + + u = uri.pack_dirnode_uri(furl, writekey) + self.failUnless(uri.is_dirnode_uri(u)) + self.failIf(uri.is_dirnode_uri("NOT A DIRNODE URI")) + self.failIf(uri.is_dirnode_uri("URI:stuff")) + self.failUnless(uri.is_mutable_dirnode_uri(u)) + self.failIf(uri.is_mutable_dirnode_uri("NOT A DIRNODE URI")) + self.failIf(uri.is_mutable_dirnode_uri("URI:stuff")) + self.failUnlessEqual(uri.get_uri_type(u), "DIR") + + rou = uri.make_immutable_dirnode_uri(u) + self.failUnless(uri.is_dirnode_uri(rou)) + self.failIf(uri.is_mutable_dirnode_uri(rou)) + self.failUnlessEqual(uri.get_uri_type(rou), "DIR-RO") + + d = uri.unpack_dirnode_uri(u) + self.failUnlessEqual(d[0], furl) + self.failUnlessEqual(d[1], writekey) + + d2 = uri.unpack_dirnode_uri(rou) + self.failUnlessEqual(d2[0], furl) + rk = hashutil.dir_read_key_hash(writekey) + self.failUnlessEqual(d2[1], rk) + diff --git a/src/allmydata/upload.py b/src/allmydata/upload.py index 6b42ac3e..7b5a6b06 100644 --- a/src/allmydata/upload.py +++ b/src/allmydata/upload.py @@ -6,13 +6,14 @@ from foolscap import Referenceable from allmydata.util import idlib, hashutil from allmydata import encode, storageserver -from allmydata.uri import pack_uri +from allmydata.uri import pack_uri, pack_lit from allmydata.interfaces import IUploadable, IUploader from allmydata.Crypto.Cipher import AES from cStringIO import StringIO import collections, random + class HaveAllPeersError(Exception): # we use this to jump out of the loop pass @@ -261,6 +262,20 @@ class FileUploader: size=self._size, ) +class LiteralUploader: + + def __init__(self, client, options={}): + self._client = client + self._options = options + + def set_filehandle(self, filehandle): + self._filehandle = filehandle + + def start(self): + self._filehandle.seek(0) + data = self._filehandle.read() + return defer.succeed(pack_lit(data)) + class FileName: implements(IUploadable) @@ -296,6 +311,7 @@ class Uploader(service.MultiService): implements(IUploader) name = "uploader" uploader_class = FileUploader + URI_LIT_SIZE_THRESHOLD = 55 needed_shares = 25 # Number of shares required to reconstruct a file. desired_shares = 75 # We will abort an upload unless we can allocate space for at least this many. @@ -341,12 +357,20 @@ class Uploader(service.MultiService): assert self.running f = IUploadable(f) fh = f.get_filehandle() - u = self.uploader_class(self.parent, options) - u.set_filehandle(fh) - u.set_params(self.needed_shares, self.desired_shares, self.total_shares) - plaintext_hash, key, crypttext_hash = self.compute_id_strings(fh) - u.set_encryption_key(key) - u.set_id_strings(crypttext_hash, plaintext_hash) + fh.seek(0,2) + size = fh.tell() + fh.seek(0) + if size <= self.URI_LIT_SIZE_THRESHOLD: + u = LiteralUploader(self.parent, options) + u.set_filehandle(fh) + else: + u = self.uploader_class(self.parent, options) + u.set_filehandle(fh) + u.set_params(self.needed_shares, self.desired_shares, + self.total_shares) + plaintext_hash, key, crypttext_hash = self.compute_id_strings(fh) + u.set_encryption_key(key) + u.set_id_strings(crypttext_hash, plaintext_hash) d = u.start() def _done(res): f.close_filehandle(fh) diff --git a/src/allmydata/uri.py b/src/allmydata/uri.py index f50736d2..02a2725a 100644 --- a/src/allmydata/uri.py +++ b/src/allmydata/uri.py @@ -2,6 +2,16 @@ import re from allmydata.util import idlib, hashutil +def get_uri_type(uri): + assert uri.startswith("URI:") + if uri.startswith("URI:DIR:"): + return "DIR" + if uri.startswith("URI:DIR-RO:"): + return "DIR-RO" + if uri.startswith("URI:LIT:"): + return "LIT" + return "CHK" + # the URI shall be an ascii representation of the file. It shall contain # enough information to retrieve and validate the contents. It shall be # expressed in a limited character set (namely [TODO]). @@ -87,6 +97,15 @@ def unpack_extension_readable(data): unpacked[k] = idlib.b2a(unpacked[k]) return unpacked +def pack_lit(data): + return "URI:LIT:%s" % idlib.b2a(data) + +def unpack_lit(uri): + assert uri.startswith("URI:LIT:") + data_s = uri[len("URI:LIT:"):] + return idlib.a2b(data_s) + + def is_dirnode_uri(uri): return uri.startswith("URI:DIR:") or uri.startswith("URI:DIR-RO:") def is_mutable_dirnode_uri(uri):