]> git.rkrishnan.org Git - tahoe-lafs/tahoe-lafs.git/blob - src/allmydata/scripts/tahoe_backup.py
Unicode fixes.
[tahoe-lafs/tahoe-lafs.git] / src / allmydata / scripts / tahoe_backup.py
1
2 import os.path
3 import time
4 import urllib
5 import simplejson
6 import datetime
7 from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS, \
8                                      UnknownAliasError
9 from allmydata.scripts.common_http import do_http, HTTPError, format_http_error
10 from allmydata.util import time_format
11 from allmydata.scripts import backupdb
12 from allmydata.util.stringutils import listdir_unicode, open_unicode, quote_output, to_str
13 from allmydata.util.assertutil import precondition
14
15
16 def get_local_metadata(path):
17     metadata = {}
18
19     # posix stat(2) metadata, depends on the platform
20     os.stat_float_times(True)
21     s = os.stat(path)
22     metadata["ctime"] = s.st_ctime
23     metadata["mtime"] = s.st_mtime
24
25     misc_fields = ("st_mode", "st_ino", "st_dev", "st_uid", "st_gid")
26     macos_misc_fields = ("st_rsize", "st_creator", "st_type")
27     for field in misc_fields + macos_misc_fields:
28         if hasattr(s, field):
29             metadata[field] = getattr(s, field)
30
31     # TODO: extended attributes, like on OS-X's HFS+
32     return metadata
33
34 def mkdir(contents, options):
35     kids = dict([ (childname, (contents[childname][0],
36                                {"ro_uri": contents[childname][1],
37                                 "metadata": contents[childname][2],
38                                 }))
39                   for childname in contents
40                   ])
41     body = simplejson.dumps(kids).encode("utf-8")
42     url = options['node-url'] + "uri?t=mkdir-immutable"
43     resp = do_http("POST", url, body)
44     if resp.status < 200 or resp.status >= 300:
45         raise HTTPError("Error during mkdir", resp)
46
47     dircap = to_str(resp.read().strip())
48     return dircap
49
50 def put_child(dirurl, childname, childcap):
51     assert dirurl[-1] == "/"
52     url = dirurl + urllib.quote(childname) + "?t=uri"
53     resp = do_http("PUT", url, childcap)
54     if resp.status not in (200, 201):
55         raise HTTPError("Error during put_child", resp)
56
57 class BackupProcessingError(Exception):
58     pass
59
60 class BackerUpper:
61     def __init__(self, options):
62         self.options = options
63         self.files_uploaded = 0
64         self.files_reused = 0
65         self.files_checked = 0
66         self.files_skipped = 0
67         self.directories_created = 0
68         self.directories_reused = 0
69         self.directories_checked = 0
70         self.directories_skipped = 0
71
72     def run(self):
73         options = self.options
74         nodeurl = options['node-url']
75         self.verbosity = 1
76         if options['quiet']:
77             self.verbosity = 0
78         if options['verbose']:
79             self.verbosity = 2
80         stdout = options.stdout
81         stderr = options.stderr
82
83         start_timestamp = datetime.datetime.now()
84         self.backupdb = None
85         bdbfile = os.path.join(options["node-directory"],
86                                "private", "backupdb.sqlite")
87         bdbfile = os.path.abspath(bdbfile)
88         self.backupdb = backupdb.get_backupdb(bdbfile, stderr)
89         if not self.backupdb:
90             print >>stderr, "ERROR: Unable to load backup db."
91             return 1
92
93         try:
94             rootcap, path = get_alias(options.aliases, options.to_dir, DEFAULT_ALIAS)
95         except UnknownAliasError, e:
96             e.display(stderr)
97             return 1
98         to_url = nodeurl + "uri/%s/" % urllib.quote(rootcap)
99         if path:
100             to_url += escape_path(path)
101         if not to_url.endswith("/"):
102             to_url += "/"
103
104         archives_url = to_url + "Archives/"
105
106         # first step: make sure the target directory exists, as well as the
107         # Archives/ subdirectory.
108         resp = do_http("GET", archives_url + "?t=json")
109         if resp.status == 404:
110             resp = do_http("POST", archives_url + "?t=mkdir")
111             if resp.status != 200:
112                 print >>stderr, format_http_error("Unable to create target directory", resp)
113                 return 1
114
115         # second step: process the tree
116         new_backup_dircap = self.process(options.from_dir)
117
118         # third: attach the new backup to the list
119         now = time_format.iso_utc(int(time.time()), sep="_") + "Z"
120
121         put_child(archives_url, now, new_backup_dircap)
122         put_child(to_url, "Latest", new_backup_dircap)
123         end_timestamp = datetime.datetime.now()
124         # calc elapsed time, omitting microseconds
125         elapsed_time = str(end_timestamp - start_timestamp).split('.')[0]
126
127         if self.verbosity >= 1:
128             print >>stdout, (" %d files uploaded (%d reused), "
129                              "%d files skipped, "
130                              "%d directories created (%d reused), "
131                              "%d directories skipped"
132                              % (self.files_uploaded,
133                                 self.files_reused,
134                                 self.files_skipped,
135                                 self.directories_created,
136                                 self.directories_reused,
137                                 self.directories_skipped))
138             if self.verbosity >= 2:
139                 print >>stdout, (" %d files checked, %d directories checked"
140                                  % (self.files_checked,
141                                     self.directories_checked))
142             print >>stdout, " backup done, elapsed time: %s" % elapsed_time
143
144         # The command exits with code 2 if files or directories were skipped
145         if self.files_skipped or self.directories_skipped:
146             return 2
147
148         # done!
149         return 0
150
151     def verboseprint(self, msg):
152         precondition(isinstance(msg, str), msg)
153         if self.verbosity >= 2:
154             print >>self.options.stdout, msg
155
156     def warn(self, msg):
157         precondition(isinstance(msg, str), msg)
158         print >>self.options.stderr, msg
159
160     def process(self, localpath):
161         precondition(isinstance(localpath, unicode), localpath)
162         # returns newdircap
163
164         self.verboseprint("processing %s" % quote_output(localpath))
165         create_contents = {} # childname -> (type, rocap, metadata)
166         compare_contents = {} # childname -> rocap
167
168         try:
169             children = listdir_unicode(localpath)
170         except EnvironmentError:
171             self.directories_skipped += 1
172             self.warn("WARNING: permission denied on directory %s" % quote_output(localpath))
173             children = []
174
175         for child in self.options.filter_listdir(children):
176             childpath = os.path.join(localpath, child)
177             child = unicode(child)
178             # note: symlinks to directories are both islink() and isdir()
179             if os.path.isdir(childpath) and not os.path.islink(childpath):
180                 metadata = get_local_metadata(childpath)
181                 # recurse on the child directory
182                 childcap = self.process(childpath)
183                 assert isinstance(childcap, str)
184                 create_contents[child] = ("dirnode", childcap, metadata)
185                 compare_contents[child] = childcap
186             elif os.path.isfile(childpath) and not os.path.islink(childpath):
187                 try:
188                     childcap, metadata = self.upload(childpath)
189                     assert isinstance(childcap, str)
190                     create_contents[child] = ("filenode", childcap, metadata)
191                     compare_contents[child] = childcap
192                 except EnvironmentError:
193                     self.files_skipped += 1
194                     self.warn("WARNING: permission denied on file %s" % quote_output(childpath))
195             else:
196                 self.files_skipped += 1
197                 if os.path.islink(childpath):
198                     self.warn("WARNING: cannot backup symlink %s" % quote_output(childpath))
199                 else:
200                     self.warn("WARNING: cannot backup special file %s" % quote_output(childpath))
201
202         must_create, r = self.check_backupdb_directory(compare_contents)
203         if must_create:
204             self.verboseprint(" creating directory for %s" % quote_output(localpath))
205             newdircap = mkdir(create_contents, self.options)
206             assert isinstance(newdircap, str)
207             if r:
208                 r.did_create(newdircap)
209             self.directories_created += 1
210             return newdircap
211         else:
212             self.verboseprint(" re-using old directory for %s" % quote_output(localpath))
213             self.directories_reused += 1
214             return r.was_created()
215
216     def check_backupdb_file(self, childpath):
217         if not self.backupdb:
218             return True, None
219         use_timestamps = not self.options["ignore-timestamps"]
220         r = self.backupdb.check_file(childpath, use_timestamps)
221
222         if not r.was_uploaded():
223             return True, r
224
225         if not r.should_check():
226             # the file was uploaded or checked recently, so we can just use
227             # it
228             return False, r
229
230         # we must check the file before using the results
231         filecap = r.was_uploaded()
232         self.verboseprint("checking %s" % quote_output(filecap))
233         nodeurl = self.options['node-url']
234         checkurl = nodeurl + "uri/%s?t=check&output=JSON" % urllib.quote(filecap)
235         self.files_checked += 1
236         resp = do_http("POST", checkurl)
237         if resp.status != 200:
238             # can't check, so we must assume it's bad
239             return True, r
240
241         cr = simplejson.loads(resp.read())
242         healthy = cr["results"]["healthy"]
243         if not healthy:
244             # must upload
245             return True, r
246         # file is healthy, no need to upload
247         r.did_check_healthy(cr)
248         return False, r
249
250     def check_backupdb_directory(self, compare_contents):
251         if not self.backupdb:
252             return True, None
253         r = self.backupdb.check_directory(compare_contents)
254
255         if not r.was_created():
256             return True, r
257
258         if not r.should_check():
259             # the file was uploaded or checked recently, so we can just use
260             # it
261             return False, r
262
263         # we must check the directory before re-using it
264         dircap = r.was_created()
265         self.verboseprint("checking %s" % quote_output(dircap))
266         nodeurl = self.options['node-url']
267         checkurl = nodeurl + "uri/%s?t=check&output=JSON" % urllib.quote(dircap)
268         self.directories_checked += 1
269         resp = do_http("POST", checkurl)
270         if resp.status != 200:
271             # can't check, so we must assume it's bad
272             return True, r
273
274         cr = simplejson.loads(resp.read())
275         healthy = cr["results"]["healthy"]
276         if not healthy:
277             # must create
278             return True, r
279         # directory is healthy, no need to upload
280         r.did_check_healthy(cr)
281         return False, r
282
283     # This function will raise an IOError exception when called on an unreadable file
284     def upload(self, childpath):
285         precondition(isinstance(childpath, unicode), childpath)
286
287         #self.verboseprint("uploading %s.." % quote_output(childpath))
288         metadata = get_local_metadata(childpath)
289
290         # we can use the backupdb here
291         must_upload, bdb_results = self.check_backupdb_file(childpath)
292
293         if must_upload:
294             self.verboseprint("uploading %s.." % quote_output(childpath))
295             infileobj = open_unicode(childpath, "rb")
296             url = self.options['node-url'] + "uri"
297             resp = do_http("PUT", url, infileobj)
298             if resp.status not in (200, 201):
299                 raise HTTPError("Error during file PUT", resp)
300
301             filecap = resp.read().strip()
302             self.verboseprint(" %s -> %s" % (quote_output(childpath, quotemarks=False),
303                                              quote_output(filecap, quotemarks=False)))
304             #self.verboseprint(" metadata: %s" % (quote_output(metadata, quotemarks=False),))
305
306             if bdb_results:
307                 bdb_results.did_upload(filecap)
308
309             self.files_uploaded += 1
310             return filecap, metadata
311
312         else:
313             self.verboseprint("skipping %s.." % quote_output(childpath))
314             self.files_reused += 1
315             return bdb_results.was_uploaded(), metadata
316
317 def backup(options):
318     bu = BackerUpper(options)
319     return bu.run()