implement URI:LIT, which stores small (<55B) files inside the URI itself. Fixes #81.
authorBrian Warner <warner@allmydata.com>
Thu, 12 Jul 2007 20:22:36 +0000 (13:22 -0700)
committerBrian Warner <warner@allmydata.com>
Thu, 12 Jul 2007 20:22:36 +0000 (13:22 -0700)
src/allmydata/test/test_system.py
src/allmydata/test/test_upload.py
src/allmydata/test/test_uri.py [new file with mode: 0644]
src/allmydata/upload.py
src/allmydata/uri.py

index cbc3965146d743c68c37071fe81f41ddac66d735..066746cce2134698ecc1e96a96349530c6613f10 100644 (file)
@@ -23,6 +23,11 @@ def flush_but_dont_ignore(res):
     d.addCallback(_done)
     return d
 
+LARGE_DATA = """
+This is some data to publish to the virtual drive, which needs to be large
+enough to not fit inside a LIT uri.
+"""
+
 class SystemTest(testutil.SignalMixin, unittest.TestCase):
 
     def setUp(self):
@@ -234,7 +239,7 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
 
     def test_vdrive(self):
         self.basedir = "system/SystemTest/test_vdrive"
-        self.data = DATA = "Some data to publish to the virtual drive\n"
+        self.data = LARGE_DATA
         d = self.set_up_nodes()
         d.addCallback(self.log, "starting publish")
         d.addCallback(self._do_publish)
@@ -568,7 +573,7 @@ class SystemTest(testutil.SignalMixin, unittest.TestCase):
         self.failUnless("size: %d\n" % len(self.data) in output)
         self.failUnless("num_segments: 1\n" in output)
         # segment_size is always a multiple of needed_shares
-        self.failUnless("segment_size: 50\n" in output)
+        self.failUnless("segment_size: 125\n" in output)
         self.failUnless("total_shares: 100\n" in output)
         # keys which are supposed to be present
         for key in ("size", "num_segments", "segment_size",
index f1372d5e91eec863e3941b7f300a8677125b88be..43548831bfbc5ea9663876fa6fb6d9ceb2b1fc41 100644 (file)
@@ -4,7 +4,7 @@ from twisted.python.failure import Failure
 from cStringIO import StringIO
 
 from allmydata import upload, encode
-from allmydata.uri import unpack_uri
+from allmydata.uri import unpack_uri, unpack_lit
 
 from test_encode import FakePeer
 
@@ -15,6 +15,18 @@ class FakeClient:
         return [ ("%20d"%fakeid, "%20d"%fakeid, FakePeer(self.mode),)
                  for fakeid in range(50) ]
 
+DATA = """
+Once upon a time, there was a beautiful princess named Buttercup. She lived
+in a magical land where every file was stored securely among millions of
+machines, and nobody ever worried about their data being lost ever again.
+The End.
+"""
+assert len(DATA) > upload.Uploader.URI_LIT_SIZE_THRESHOLD
+
+SIZE_ZERO = 0
+SIZE_SMALL = 16
+SIZE_LARGE = len(DATA)
+
 class GoodServer(unittest.TestCase):
     def setUp(self):
         self.node = FakeClient(mode="good")
@@ -22,7 +34,13 @@ class GoodServer(unittest.TestCase):
         self.u.running = True
         self.u.parent = self.node
 
-    def _check(self, uri):
+    def _check_small(self, uri, size):
+        self.failUnless(isinstance(uri, str))
+        self.failUnless(uri.startswith("URI:LIT:"))
+        d = unpack_lit(uri)
+        self.failUnlessEqual(len(d), size)
+
+    def _check_large(self, uri, size):
         self.failUnless(isinstance(uri, str))
         self.failUnless(uri.startswith("URI:"))
         d = unpack_uri(uri)
@@ -30,31 +48,76 @@ class GoodServer(unittest.TestCase):
         self.failUnlessEqual(len(d['storage_index']), 32)
         self.failUnless(isinstance(d['key'], str))
         self.failUnlessEqual(len(d['key']), 16)
+        self.failUnlessEqual(d['size'], size)
+
+    def get_data(self, size):
+        return DATA[:size]
+
+    def test_data_zero(self):
+        data = self.get_data(SIZE_ZERO)
+        d = self.u.upload_data(data)
+        d.addCallback(self._check_small, SIZE_ZERO)
+        return d
 
-    def testData(self):
-        data = "This is some data to upload"
+    def test_data_small(self):
+        data = self.get_data(SIZE_SMALL)
         d = self.u.upload_data(data)
-        d.addCallback(self._check)
+        d.addCallback(self._check_small, SIZE_SMALL)
         return d
-    testData.timeout = 300
 
-    def testFileHandle(self):
-        data = "This is some data to upload"
+    def test_data_large(self):
+        data = self.get_data(SIZE_LARGE)
+        d = self.u.upload_data(data)
+        d.addCallback(self._check_large, SIZE_LARGE)
+        return d
+
+    def test_filehandle_zero(self):
+        data = self.get_data(SIZE_ZERO)
         d = self.u.upload_filehandle(StringIO(data))
-        d.addCallback(self._check)
+        d.addCallback(self._check_small, SIZE_ZERO)
+        return d
+
+    def test_filehandle_small(self):
+        data = self.get_data(SIZE_SMALL)
+        d = self.u.upload_filehandle(StringIO(data))
+        d.addCallback(self._check_small, SIZE_SMALL)
+        return d
+
+    def test_filehandle_large(self):
+        data = self.get_data(SIZE_LARGE)
+        d = self.u.upload_filehandle(StringIO(data))
+        d.addCallback(self._check_large, SIZE_LARGE)
+        return d
+
+    def test_filename_zero(self):
+        fn = "Uploader-test_filename_zero.data"
+        f = open(fn, "wb")
+        data = self.get_data(SIZE_ZERO)
+        f.write(data)
+        f.close()
+        d = self.u.upload_filename(fn)
+        d.addCallback(self._check_small, SIZE_ZERO)
+        return d
+
+    def test_filename_small(self):
+        fn = "Uploader-test_filename_small.data"
+        f = open(fn, "wb")
+        data = self.get_data(SIZE_SMALL)
+        f.write(data)
+        f.close()
+        d = self.u.upload_filename(fn)
+        d.addCallback(self._check_small, SIZE_SMALL)
         return d
-    testFileHandle.timeout = 300
 
-    def testFilename(self):
-        fn = "Uploader-testFilename.data"
+    def test_filename_large(self):
+        fn = "Uploader-test_filename_large.data"
         f = open(fn, "wb")
-        data = "This is some data to upload"
+        data = self.get_data(SIZE_LARGE)
         f.write(data)
         f.close()
         d = self.u.upload_filename(fn)
-        d.addCallback(self._check)
+        d.addCallback(self._check_large, SIZE_LARGE)
         return d
-    testFilename.test = 300
 
 class FullServer(unittest.TestCase):
     def setUp(self):
@@ -66,8 +129,8 @@ class FullServer(unittest.TestCase):
     def _should_fail(self, f):
         self.failUnless(isinstance(f, Failure) and f.check(encode.NotEnoughPeersError))
 
-    def testData(self):
-        data = "This is some data to upload"
+    def test_data_large(self):
+        data = DATA
         d = self.u.upload_data(data)
         d.addBoth(self._should_fail)
         return d
diff --git a/src/allmydata/test/test_uri.py b/src/allmydata/test/test_uri.py
new file mode 100644 (file)
index 0000000..6f2c64e
--- /dev/null
@@ -0,0 +1,84 @@
+
+from twisted.trial import unittest
+from allmydata import uri
+from allmydata.util import hashutil
+
+class LIT(unittest.TestCase):
+    def test_pack(self):
+        data = "This is some small data"
+        u = uri.pack_lit(data)
+        self.failUnlessEqual(uri.get_uri_type(u), "LIT")
+        self.failUnlessEqual(uri.unpack_lit(u), data)
+
+    def test_nonascii(self):
+        data = "This contains \x00 and URI:LIT: and \n, oh my."
+        u = uri.pack_lit(data)
+        self.failUnlessEqual(uri.get_uri_type(u), "LIT")
+        self.failUnlessEqual(uri.unpack_lit(u), data)
+
+class CHK(unittest.TestCase):
+    def test_pack(self):
+        storage_index = hashutil.tagged_hash("foo", "bar")
+        key = "\x00" * 16
+        uri_extension_hash = hashutil.uri_extension_hash("stuff")
+        needed_shares = 25
+        total_shares = 100
+        size = 1234
+        u = uri.pack_uri(storage_index=storage_index,
+                         key=key,
+                         uri_extension_hash=uri_extension_hash,
+                         needed_shares=needed_shares,
+                         total_shares=total_shares,
+                         size=size)
+        self.failUnlessEqual(uri.get_uri_type(u), "CHK")
+        d = uri.unpack_uri(u)
+        self.failUnlessEqual(d['storage_index'], storage_index)
+        self.failUnlessEqual(d['key'], key)
+        self.failUnlessEqual(d['uri_extension_hash'], uri_extension_hash)
+        self.failUnlessEqual(d['needed_shares'], needed_shares)
+        self.failUnlessEqual(d['total_shares'], total_shares)
+        self.failUnlessEqual(d['size'], size)
+
+class Extension(unittest.TestCase):
+    def test_pack(self):
+        data = {"stuff": "value",
+                "size": 12,
+                "needed_shares": 3,
+                "big_hash": hashutil.tagged_hash("foo", "bar"),
+                }
+        ext = uri.pack_extension(data)
+        d = uri.unpack_extension(ext)
+        self.failUnlessEqual(d["stuff"], "value")
+        self.failUnlessEqual(d["size"], 12)
+        self.failUnlessEqual(d["big_hash"], hashutil.tagged_hash("foo", "bar"))
+
+        readable = uri.unpack_extension_readable(ext)
+
+class Dirnode(unittest.TestCase):
+    def test_pack(self):
+        furl = "pb://stuff@morestuff:stuff/andstuff"
+        writekey = "\x01" * 16
+
+        u = uri.pack_dirnode_uri(furl, writekey)
+        self.failUnless(uri.is_dirnode_uri(u))
+        self.failIf(uri.is_dirnode_uri("NOT A DIRNODE URI"))
+        self.failIf(uri.is_dirnode_uri("URI:stuff"))
+        self.failUnless(uri.is_mutable_dirnode_uri(u))
+        self.failIf(uri.is_mutable_dirnode_uri("NOT A DIRNODE URI"))
+        self.failIf(uri.is_mutable_dirnode_uri("URI:stuff"))
+        self.failUnlessEqual(uri.get_uri_type(u), "DIR")
+
+        rou = uri.make_immutable_dirnode_uri(u)
+        self.failUnless(uri.is_dirnode_uri(rou))
+        self.failIf(uri.is_mutable_dirnode_uri(rou))
+        self.failUnlessEqual(uri.get_uri_type(rou), "DIR-RO")
+
+        d = uri.unpack_dirnode_uri(u)
+        self.failUnlessEqual(d[0], furl)
+        self.failUnlessEqual(d[1], writekey)
+
+        d2 = uri.unpack_dirnode_uri(rou)
+        self.failUnlessEqual(d2[0], furl)
+        rk = hashutil.dir_read_key_hash(writekey)
+        self.failUnlessEqual(d2[1], rk)
+
index 6b42ac3ec5ccfec9d5788420f14acef2f09aa852..7b5a6b060eb0f35deba365d754cb3678d011153c 100644 (file)
@@ -6,13 +6,14 @@ from foolscap import Referenceable
 
 from allmydata.util import idlib, hashutil
 from allmydata import encode, storageserver
-from allmydata.uri import pack_uri
+from allmydata.uri import pack_uri, pack_lit
 from allmydata.interfaces import IUploadable, IUploader
 from allmydata.Crypto.Cipher import AES
 
 from cStringIO import StringIO
 import collections, random
 
+
 class HaveAllPeersError(Exception):
     # we use this to jump out of the loop
     pass
@@ -261,6 +262,20 @@ class FileUploader:
                         size=self._size,
                         )
 
+class LiteralUploader:
+
+    def __init__(self, client, options={}):
+        self._client = client
+        self._options = options
+
+    def set_filehandle(self, filehandle):
+        self._filehandle = filehandle
+
+    def start(self):
+        self._filehandle.seek(0)
+        data = self._filehandle.read()
+        return defer.succeed(pack_lit(data))
+
 
 class FileName:
     implements(IUploadable)
@@ -296,6 +311,7 @@ class Uploader(service.MultiService):
     implements(IUploader)
     name = "uploader"
     uploader_class = FileUploader
+    URI_LIT_SIZE_THRESHOLD = 55
 
     needed_shares = 25 # Number of shares required to reconstruct a file.
     desired_shares = 75 # We will abort an upload unless we can allocate space for at least this many.
@@ -341,12 +357,20 @@ class Uploader(service.MultiService):
         assert self.running
         f = IUploadable(f)
         fh = f.get_filehandle()
-        u = self.uploader_class(self.parent, options)
-        u.set_filehandle(fh)
-        u.set_params(self.needed_shares, self.desired_shares, self.total_shares)
-        plaintext_hash, key, crypttext_hash = self.compute_id_strings(fh)
-        u.set_encryption_key(key)
-        u.set_id_strings(crypttext_hash, plaintext_hash)
+        fh.seek(0,2)
+        size = fh.tell()
+        fh.seek(0)
+        if size <= self.URI_LIT_SIZE_THRESHOLD:
+            u = LiteralUploader(self.parent, options)
+            u.set_filehandle(fh)
+        else:
+            u = self.uploader_class(self.parent, options)
+            u.set_filehandle(fh)
+            u.set_params(self.needed_shares, self.desired_shares,
+                         self.total_shares)
+            plaintext_hash, key, crypttext_hash = self.compute_id_strings(fh)
+            u.set_encryption_key(key)
+            u.set_id_strings(crypttext_hash, plaintext_hash)
         d = u.start()
         def _done(res):
             f.close_filehandle(fh)
index f50736d2ef35227c733c2d01010377fd851eb51a..02a2725ae2cdabe9163c6fe57cdd515208919ce0 100644 (file)
@@ -2,6 +2,16 @@
 import re
 from allmydata.util import idlib, hashutil
 
+def get_uri_type(uri):
+    assert uri.startswith("URI:")
+    if uri.startswith("URI:DIR:"):
+        return "DIR"
+    if uri.startswith("URI:DIR-RO:"):
+        return "DIR-RO"
+    if uri.startswith("URI:LIT:"):
+        return "LIT"
+    return "CHK"
+
 # the URI shall be an ascii representation of the file. It shall contain
 # enough information to retrieve and validate the contents. It shall be
 # expressed in a limited character set (namely [TODO]).
@@ -87,6 +97,15 @@ def unpack_extension_readable(data):
             unpacked[k] = idlib.b2a(unpacked[k])
     return unpacked
 
+def pack_lit(data):
+    return "URI:LIT:%s" % idlib.b2a(data)
+
+def unpack_lit(uri):
+    assert uri.startswith("URI:LIT:")
+    data_s = uri[len("URI:LIT:"):]
+    return idlib.a2b(data_s)
+
+
 def is_dirnode_uri(uri):
     return uri.startswith("URI:DIR:") or uri.startswith("URI:DIR-RO:")
 def is_mutable_dirnode_uri(uri):