From: Brian Warner Date: Mon, 2 Jun 2008 23:57:01 +0000 (-0700) Subject: Don't allow uploads of large files (about 12GiB or larger), since they're doomed... X-Git-Url: https://git.rkrishnan.org/uri?a=commitdiff_plain;h=8c37b8e3af2f4d1b5dd1825f5bf9fdf339d834d6;p=tahoe-lafs%2Ftahoe-lafs.git Don't allow uploads of large files (about 12GiB or larger), since they're doomed to be corrupted. Closes #439 --- diff --git a/src/allmydata/storage.py b/src/allmydata/storage.py index 8c086dd0..44316962 100644 --- a/src/allmydata/storage.py +++ b/src/allmydata/storage.py @@ -1071,6 +1071,9 @@ def allocated_size(data_size, num_segments, num_share_hashes, uri_extension_starts_at = wbp._offsets['uri_extension'] return uri_extension_starts_at + 4 + uri_extension_size +class FileTooLargeError(Exception): + pass + class WriteBucketProxy: implements(IStorageBucketWriter) def __init__(self, rref, data_size, segment_size, num_segments, @@ -1081,6 +1084,9 @@ class WriteBucketProxy: self._num_segments = num_segments self._nodeid = nodeid + if segment_size >= 2**32 or data_size >= 2**32: + raise FileTooLargeError("This file is too large to be uploaded (data_size).") + effective_segments = mathutil.next_power_of_k(num_segments,2) self._segment_hash_size = (2*effective_segments - 1) * HASH_SIZE # how many share hashes are included in each share? This will be @@ -1103,6 +1109,9 @@ class WriteBucketProxy: x += self._share_hash_size offsets['uri_extension'] = x + if x >= 2**32: + raise FileTooLargeError("This file is too large to be uploaded (offsets).") + offset_data = struct.pack(">LLLLLLLLL", 1, # version number segment_size, diff --git a/src/allmydata/test/test_upload.py b/src/allmydata/test/test_upload.py index 798badb2..e0f2c93d 100644 --- a/src/allmydata/test/test_upload.py +++ b/src/allmydata/test/test_upload.py @@ -3,12 +3,14 @@ import os from twisted.trial import unittest from twisted.python.failure import Failure from twisted.python import log +from twisted.internet import defer from cStringIO import StringIO -from allmydata import upload, encode, uri +from allmydata import upload, encode, uri, storage from allmydata.interfaces import IFileURI from allmydata.util.assertutil import precondition from allmydata.util.deferredutil import DeferredListShouldSucceed +from allmydata.util.testutil import ShouldFailMixin from foolscap import eventual MiB = 1024*1024 @@ -156,6 +158,26 @@ class FakeClient: def get_cancel_secret(self): return "" +class GiganticUploadable(upload.FileHandle): + def __init__(self, size): + self._size = size + self._fp = 0 + + def get_encryption_key(self): + return defer.succeed("\x00" * 16) + def get_size(self): + return defer.succeed(self._size) + def read(self, length): + left = self._size - self._fp + length = min(left, length) + self._fp += length + if self._fp > 1000000: + # terminate the test early. + raise RuntimeError("we shouldn't be allowed to get this far") + return defer.succeed(["\x00" * length]) + def close(self): + pass + DATA = """ Once upon a time, there was a beautiful princess named Buttercup. She lived in a magical land where every file was stored securely among millions of @@ -178,7 +200,7 @@ def upload_filehandle(uploader, fh): u = upload.FileHandle(fh, convergence=None) return uploader.upload(u) -class GoodServer(unittest.TestCase): +class GoodServer(unittest.TestCase, ShouldFailMixin): def setUp(self): self.node = FakeClient(mode="good") self.u = upload.Uploader() @@ -210,6 +232,27 @@ class GoodServer(unittest.TestCase): def get_data(self, size): return DATA[:size] + def test_too_large(self): + # we currently impose a sizelimit on uploaded files, because of + # limitations in the share format (see ticket #346 for details). The + # limit is set to ensure that no share is larger than 4GiB. Make sure + # that we reject files larger than that. + k = 3; happy = 7; n = 10 + self.set_encoding_parameters(k, happy, n) + data1 = GiganticUploadable(k*4*1024*1024*1024) + d = self.shouldFail(storage.FileTooLargeError, "test_too_large-data1", + "This file is too large to be uploaded (data_size)", + self.u.upload, data1) + data2 = GiganticUploadable(k*4*1024*1024*1024-3) + d.addCallback(lambda res: + self.shouldFail(storage.FileTooLargeError, + "test_too_large-data2", + "This file is too large to be uploaded (offsets)", + self.u.upload, data2)) + # I don't know where the actual limit is.. it depends upon how large + # the hash trees wind up. It's somewhere close to k*4GiB-ln2(size). + return d + def test_data_zero(self): data = self.get_data(SIZE_ZERO) d = upload_data(self.u, data)