]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - contrib/fuse/impl_b/pyfuse/mirrorfs.py
add Protovis.js-based download-status timeline visualization
[tahoe-lafs/tahoe-lafs.git] / contrib / fuse / impl_b / pyfuse / mirrorfs.py
1 """
2 For reading and caching from slow file system (e.g. DVDs or network).
3
4     python mirrorfs.py <sourcedir> <cachedir> <mountpoint>
5
6 Makes <mountpoint> show a read-only copy of the files in <sourcedir>,
7 caching all data ever read in the <cachedir> to avoid reading it
8 twice.  This script also features optimistic read-ahead: once a
9 file is accessed, and as long as no other file is accessed, the
10 whole file is read and cached as fast as the <sourcedir> will
11 provide it.
12
13 You have to clean up <cachedir> manually before mounting a modified
14 or different <sourcedir>.
15 """
16 import sys, os, posixpath, stat
17
18 try:
19     __file__
20 except NameError:
21     __file__ = sys.argv[0]
22 this_dir = os.path.dirname(os.path.abspath(__file__))
23
24 # ____________________________________________________________
25
26 sys.path.append(os.path.dirname(this_dir))
27 from blockfs import valuetree
28 from handler import Handler
29 import greenhandler, greensock
30 from objectfs import ObjectFs
31
32 BLOCKSIZE = 65536
33
34 class MirrorFS(ObjectFs):
35     rawfd = None
36
37     def __init__(self, srcdir, cachedir):
38         self.srcdir = srcdir
39         self.cachedir = cachedir
40         self.table = valuetree.ValueTree(os.path.join(cachedir, 'table'), 'q')
41         if '' not in self.table:
42             self.initial_read_dir('')
43             self.table[''] = -1,
44         try:
45             self.rawfile = open(os.path.join(cachedir, 'raw'), 'r+b')
46         except IOError:
47             self.rawfile = open(os.path.join(cachedir, 'raw'), 'w+b')
48         ObjectFs.__init__(self, DirNode(self, ''))
49         self.readahead_at = None
50         greenhandler.autogreenlet(self.readahead)
51
52     def close(self):
53         self.table.close()
54
55     def readahead(self):
56         while True:
57             greensock.sleep(0.001)
58             while not self.readahead_at:
59                 greensock.sleep(1)
60             path, blocknum = self.readahead_at
61             self.readahead_at = None
62             try:
63                 self.readblock(path, blocknum, really=False)
64             except EOFError:
65                 pass
66
67     def initial_read_dir(self, path):
68         print 'Reading initial directory structure...', path
69         dirname = os.path.join(self.srcdir, path)
70         for name in os.listdir(dirname):
71             filename = os.path.join(dirname, name)
72             st = os.stat(filename)
73             if stat.S_ISDIR(st.st_mode):
74                 self.initial_read_dir(posixpath.join(path, name))
75                 q = -1
76             else:
77                 q = st.st_size
78             self.table[posixpath.join(path, name)] = q,
79
80     def __getitem__(self, key):
81         self.tablelock.acquire()
82         try:
83             return self.table[key]
84         finally:
85             self.tablelock.release()
86
87     def readblock(self, path, blocknum, really=True):
88         s = '%s/%d' % (path, blocknum)
89         try:
90             q, = self.table[s]
91         except KeyError:
92             print s
93             self.readahead_at = None
94             f = open(os.path.join(self.srcdir, path), 'rb')
95             f.seek(blocknum * BLOCKSIZE)
96             data = f.read(BLOCKSIZE)
97             f.close()
98             if not data:
99                 q = -2
100             else:
101                 data += '\x00' * (BLOCKSIZE - len(data))
102                 self.rawfile.seek(0, 2)
103                 q = self.rawfile.tell()
104                 self.rawfile.write(data)
105             self.table[s] = q,
106             if q == -2:
107                 raise EOFError
108         else:
109             if q == -2:
110                 raise EOFError
111             if really:
112                 self.rawfile.seek(q, 0)
113                 data = self.rawfile.read(BLOCKSIZE)
114             else:
115                 data = None
116         if self.readahead_at is None:
117             self.readahead_at = path, blocknum + 1
118         return data
119
120
121 class Node(object):
122
123     def __init__(self, mfs, path):
124         self.mfs = mfs
125         self.path = path
126
127 class DirNode(Node):
128
129     def join(self, name):
130         path = posixpath.join(self.path, name)
131         q, = self.mfs.table[path]
132         if q == -1:
133             return DirNode(self.mfs, path)
134         else:
135             return FileNode(self.mfs, path)
136
137     def listdir(self):
138         result = []
139         for key, value in self.mfs.table.iteritemsfrom(self.path):
140             if not key.startswith(self.path):
141                 break
142             tail = key[len(self.path):].lstrip('/')
143             if tail and '/' not in tail:
144                 result.append(tail)
145         return result
146
147 class FileNode(Node):
148
149     def size(self):
150         q, = self.mfs.table[self.path]
151         return q
152
153     def read(self):
154         return FileStream(self.mfs, self.path)
155
156 class FileStream(object):
157
158     def __init__(self, mfs, path):
159         self.mfs = mfs
160         self.path = path
161         self.pos = 0
162         self.size, = self.mfs.table[path]
163
164     def seek(self, p):
165         self.pos = p
166
167     def read(self, count):
168         result = []
169         end = min(self.pos + count, self.size)
170         while self.pos < end:
171             blocknum, offset = divmod(self.pos, BLOCKSIZE)
172             data = self.mfs.readblock(self.path, blocknum)
173             data = data[offset:]
174             data = data[:end - self.pos]
175             assert len(data) > 0
176             result.append(data)
177             self.pos += len(data)
178         return ''.join(result)
179
180 # ____________________________________________________________
181
182 if __name__ == '__main__':
183     import sys
184     srcdir, cachedir, mountpoint = sys.argv[1:]
185     mirrorfs = MirrorFS(srcdir, cachedir)
186     try:
187         handler = Handler(mountpoint, mirrorfs)
188         greenhandler.add_handler(handler)
189         greenhandler.mainloop()
190     finally:
191         mirrorfs.close()