7 from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS, \
9 from allmydata.scripts.common_http import do_http, HTTPError, format_http_error
10 from allmydata.util import time_format
11 from allmydata.scripts import backupdb
12 from allmydata.util.stringutils import listdir_unicode, open_unicode, quote_output, to_str
13 from allmydata.util.assertutil import precondition
16 def get_local_metadata(path):
19 # posix stat(2) metadata, depends on the platform
20 os.stat_float_times(True)
22 metadata["ctime"] = s.st_ctime
23 metadata["mtime"] = s.st_mtime
25 misc_fields = ("st_mode", "st_ino", "st_dev", "st_uid", "st_gid")
26 macos_misc_fields = ("st_rsize", "st_creator", "st_type")
27 for field in misc_fields + macos_misc_fields:
29 metadata[field] = getattr(s, field)
31 # TODO: extended attributes, like on OS-X's HFS+
34 def mkdir(contents, options):
35 kids = dict([ (childname, (contents[childname][0],
36 {"ro_uri": contents[childname][1],
37 "metadata": contents[childname][2],
39 for childname in contents
41 body = simplejson.dumps(kids).encode("utf-8")
42 url = options['node-url'] + "uri?t=mkdir-immutable"
43 resp = do_http("POST", url, body)
44 if resp.status < 200 or resp.status >= 300:
45 raise HTTPError("Error during mkdir", resp)
47 dircap = to_str(resp.read().strip())
50 def put_child(dirurl, childname, childcap):
51 assert dirurl[-1] == "/"
52 url = dirurl + urllib.quote(childname) + "?t=uri"
53 resp = do_http("PUT", url, childcap)
54 if resp.status not in (200, 201):
55 raise HTTPError("Error during put_child", resp)
57 class BackupProcessingError(Exception):
61 def __init__(self, options):
62 self.options = options
63 self.files_uploaded = 0
65 self.files_checked = 0
66 self.files_skipped = 0
67 self.directories_created = 0
68 self.directories_reused = 0
69 self.directories_checked = 0
70 self.directories_skipped = 0
73 options = self.options
74 nodeurl = options['node-url']
78 if options['verbose']:
80 stdout = options.stdout
81 stderr = options.stderr
83 start_timestamp = datetime.datetime.now()
85 bdbfile = os.path.join(options["node-directory"],
86 "private", "backupdb.sqlite")
87 bdbfile = os.path.abspath(bdbfile)
88 self.backupdb = backupdb.get_backupdb(bdbfile, stderr)
90 print >>stderr, "ERROR: Unable to load backup db."
94 rootcap, path = get_alias(options.aliases, options.to_dir, DEFAULT_ALIAS)
95 except UnknownAliasError, e:
98 to_url = nodeurl + "uri/%s/" % urllib.quote(rootcap)
100 to_url += escape_path(path)
101 if not to_url.endswith("/"):
104 archives_url = to_url + "Archives/"
106 # first step: make sure the target directory exists, as well as the
107 # Archives/ subdirectory.
108 resp = do_http("GET", archives_url + "?t=json")
109 if resp.status == 404:
110 resp = do_http("POST", archives_url + "?t=mkdir")
111 if resp.status != 200:
112 print >>stderr, format_http_error("Unable to create target directory", resp)
115 # second step: process the tree
116 new_backup_dircap = self.process(options.from_dir)
118 # third: attach the new backup to the list
119 now = time_format.iso_utc(int(time.time()), sep="_") + "Z"
121 put_child(archives_url, now, new_backup_dircap)
122 put_child(to_url, "Latest", new_backup_dircap)
123 end_timestamp = datetime.datetime.now()
124 # calc elapsed time, omitting microseconds
125 elapsed_time = str(end_timestamp - start_timestamp).split('.')[0]
127 if self.verbosity >= 1:
128 print >>stdout, (" %d files uploaded (%d reused), "
130 "%d directories created (%d reused), "
131 "%d directories skipped"
132 % (self.files_uploaded,
135 self.directories_created,
136 self.directories_reused,
137 self.directories_skipped))
138 if self.verbosity >= 2:
139 print >>stdout, (" %d files checked, %d directories checked"
140 % (self.files_checked,
141 self.directories_checked))
142 print >>stdout, " backup done, elapsed time: %s" % elapsed_time
144 # The command exits with code 2 if files or directories were skipped
145 if self.files_skipped or self.directories_skipped:
151 def verboseprint(self, msg):
152 precondition(isinstance(msg, str), msg)
153 if self.verbosity >= 2:
154 print >>self.options.stdout, msg
157 precondition(isinstance(msg, str), msg)
158 print >>self.options.stderr, msg
160 def process(self, localpath):
161 precondition(isinstance(localpath, unicode), localpath)
164 self.verboseprint("processing %s" % quote_output(localpath))
165 create_contents = {} # childname -> (type, rocap, metadata)
166 compare_contents = {} # childname -> rocap
169 children = listdir_unicode(localpath)
170 except EnvironmentError:
171 self.directories_skipped += 1
172 self.warn("WARNING: permission denied on directory %s" % quote_output(localpath))
175 for child in self.options.filter_listdir(children):
176 childpath = os.path.join(localpath, child)
177 child = unicode(child)
178 # note: symlinks to directories are both islink() and isdir()
179 if os.path.isdir(childpath) and not os.path.islink(childpath):
180 metadata = get_local_metadata(childpath)
181 # recurse on the child directory
182 childcap = self.process(childpath)
183 assert isinstance(childcap, str)
184 create_contents[child] = ("dirnode", childcap, metadata)
185 compare_contents[child] = childcap
186 elif os.path.isfile(childpath) and not os.path.islink(childpath):
188 childcap, metadata = self.upload(childpath)
189 assert isinstance(childcap, str)
190 create_contents[child] = ("filenode", childcap, metadata)
191 compare_contents[child] = childcap
192 except EnvironmentError:
193 self.files_skipped += 1
194 self.warn("WARNING: permission denied on file %s" % quote_output(childpath))
196 self.files_skipped += 1
197 if os.path.islink(childpath):
198 self.warn("WARNING: cannot backup symlink %s" % quote_output(childpath))
200 self.warn("WARNING: cannot backup special file %s" % quote_output(childpath))
202 must_create, r = self.check_backupdb_directory(compare_contents)
204 self.verboseprint(" creating directory for %s" % quote_output(localpath))
205 newdircap = mkdir(create_contents, self.options)
206 assert isinstance(newdircap, str)
208 r.did_create(newdircap)
209 self.directories_created += 1
212 self.verboseprint(" re-using old directory for %s" % quote_output(localpath))
213 self.directories_reused += 1
214 return r.was_created()
216 def check_backupdb_file(self, childpath):
217 if not self.backupdb:
219 use_timestamps = not self.options["ignore-timestamps"]
220 r = self.backupdb.check_file(childpath, use_timestamps)
222 if not r.was_uploaded():
225 if not r.should_check():
226 # the file was uploaded or checked recently, so we can just use
230 # we must check the file before using the results
231 filecap = r.was_uploaded()
232 self.verboseprint("checking %s" % quote_output(filecap))
233 nodeurl = self.options['node-url']
234 checkurl = nodeurl + "uri/%s?t=check&output=JSON" % urllib.quote(filecap)
235 self.files_checked += 1
236 resp = do_http("POST", checkurl)
237 if resp.status != 200:
238 # can't check, so we must assume it's bad
241 cr = simplejson.loads(resp.read())
242 healthy = cr["results"]["healthy"]
246 # file is healthy, no need to upload
247 r.did_check_healthy(cr)
250 def check_backupdb_directory(self, compare_contents):
251 if not self.backupdb:
253 r = self.backupdb.check_directory(compare_contents)
255 if not r.was_created():
258 if not r.should_check():
259 # the file was uploaded or checked recently, so we can just use
263 # we must check the directory before re-using it
264 dircap = r.was_created()
265 self.verboseprint("checking %s" % quote_output(dircap))
266 nodeurl = self.options['node-url']
267 checkurl = nodeurl + "uri/%s?t=check&output=JSON" % urllib.quote(dircap)
268 self.directories_checked += 1
269 resp = do_http("POST", checkurl)
270 if resp.status != 200:
271 # can't check, so we must assume it's bad
274 cr = simplejson.loads(resp.read())
275 healthy = cr["results"]["healthy"]
279 # directory is healthy, no need to upload
280 r.did_check_healthy(cr)
283 # This function will raise an IOError exception when called on an unreadable file
284 def upload(self, childpath):
285 precondition(isinstance(childpath, unicode), childpath)
287 #self.verboseprint("uploading %s.." % quote_output(childpath))
288 metadata = get_local_metadata(childpath)
290 # we can use the backupdb here
291 must_upload, bdb_results = self.check_backupdb_file(childpath)
294 self.verboseprint("uploading %s.." % quote_output(childpath))
295 infileobj = open_unicode(childpath, "rb")
296 url = self.options['node-url'] + "uri"
297 resp = do_http("PUT", url, infileobj)
298 if resp.status not in (200, 201):
299 raise HTTPError("Error during file PUT", resp)
301 filecap = resp.read().strip()
302 self.verboseprint(" %s -> %s" % (quote_output(childpath, quotemarks=False),
303 quote_output(filecap, quotemarks=False)))
304 #self.verboseprint(" metadata: %s" % (quote_output(metadata, quotemarks=False),))
307 bdb_results.did_upload(filecap)
309 self.files_uploaded += 1
310 return filecap, metadata
313 self.verboseprint("skipping %s.." % quote_output(childpath))
314 self.files_reused += 1
315 return bdb_results.was_uploaded(), metadata
318 bu = BackerUpper(options)