2 For reading and caching from slow file system (e.g. DVDs or network).
4 python mirrorfs.py <sourcedir> <cachedir> <mountpoint>
6 Makes <mountpoint> show a read-only copy of the files in <sourcedir>,
7 caching all data ever read in the <cachedir> to avoid reading it
8 twice. This script also features optimistic read-ahead: once a
9 file is accessed, and as long as no other file is accessed, the
10 whole file is read and cached as fast as the <sourcedir> will
13 You have to clean up <cachedir> manually before mounting a modified
14 or different <sourcedir>.
16 import sys, os, posixpath, stat
21 __file__ = sys.argv[0]
22 this_dir = os.path.dirname(os.path.abspath(__file__))
24 # ____________________________________________________________
26 sys.path.append(os.path.dirname(this_dir))
27 from blockfs import valuetree
28 from handler import Handler
29 import greenhandler, greensock
30 from objectfs import ObjectFs
34 class MirrorFS(ObjectFs):
37 def __init__(self, srcdir, cachedir):
39 self.cachedir = cachedir
40 self.table = valuetree.ValueTree(os.path.join(cachedir, 'table'), 'q')
41 if '' not in self.table:
42 self.initial_read_dir('')
45 self.rawfile = open(os.path.join(cachedir, 'raw'), 'r+b')
47 self.rawfile = open(os.path.join(cachedir, 'raw'), 'w+b')
48 ObjectFs.__init__(self, DirNode(self, ''))
49 self.readahead_at = None
50 greenhandler.autogreenlet(self.readahead)
57 greensock.sleep(0.001)
58 while not self.readahead_at:
60 path, blocknum = self.readahead_at
61 self.readahead_at = None
63 self.readblock(path, blocknum, really=False)
67 def initial_read_dir(self, path):
68 print 'Reading initial directory structure...', path
69 dirname = os.path.join(self.srcdir, path)
70 for name in os.listdir(dirname):
71 filename = os.path.join(dirname, name)
72 st = os.stat(filename)
73 if stat.S_ISDIR(st.st_mode):
74 self.initial_read_dir(posixpath.join(path, name))
78 self.table[posixpath.join(path, name)] = q,
80 def __getitem__(self, key):
81 self.tablelock.acquire()
83 return self.table[key]
85 self.tablelock.release()
87 def readblock(self, path, blocknum, really=True):
88 s = '%s/%d' % (path, blocknum)
93 self.readahead_at = None
94 f = open(os.path.join(self.srcdir, path), 'rb')
95 f.seek(blocknum * BLOCKSIZE)
96 data = f.read(BLOCKSIZE)
101 data += '\x00' * (BLOCKSIZE - len(data))
102 self.rawfile.seek(0, 2)
103 q = self.rawfile.tell()
104 self.rawfile.write(data)
112 self.rawfile.seek(q, 0)
113 data = self.rawfile.read(BLOCKSIZE)
116 if self.readahead_at is None:
117 self.readahead_at = path, blocknum + 1
123 def __init__(self, mfs, path):
129 def join(self, name):
130 path = posixpath.join(self.path, name)
131 q, = self.mfs.table[path]
133 return DirNode(self.mfs, path)
135 return FileNode(self.mfs, path)
139 for key, value in self.mfs.table.iteritemsfrom(self.path):
140 if not key.startswith(self.path):
142 tail = key[len(self.path):].lstrip('/')
143 if tail and '/' not in tail:
147 class FileNode(Node):
150 q, = self.mfs.table[self.path]
154 return FileStream(self.mfs, self.path)
156 class FileStream(object):
158 def __init__(self, mfs, path):
162 self.size, = self.mfs.table[path]
167 def read(self, count):
169 end = min(self.pos + count, self.size)
170 while self.pos < end:
171 blocknum, offset = divmod(self.pos, BLOCKSIZE)
172 data = self.mfs.readblock(self.path, blocknum)
174 data = data[:end - self.pos]
177 self.pos += len(data)
178 return ''.join(result)
180 # ____________________________________________________________
182 if __name__ == '__main__':
184 srcdir, cachedir, mountpoint = sys.argv[1:]
185 mirrorfs = MirrorFS(srcdir, cachedir)
187 handler = Handler(mountpoint, mirrorfs)
188 greenhandler.add_handler(handler)
189 greenhandler.mainloop()