7 from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS
8 from allmydata.scripts.common_http import do_http
9 from allmydata import uri
10 from allmydata.util import time_format
11 from allmydata.scripts import backupdb
13 class HTTPError(Exception):
16 def raiseHTTPError(msg, resp):
17 msg = msg + ": %s %s %s" % (resp.status, resp.reason, resp.read())
20 def readonly(writedircap):
21 return uri.from_string_dirnode(writedircap).get_readonly().to_string()
23 def parse_old_timestamp(s, options):
25 if not s.endswith("Z"):
27 # This returns seconds-since-epoch for an ISO-8601-ish-formatted UTC
28 # time string. This might raise ValueError if the string is not in the
30 when = time_format.iso_utc_time_to_seconds(s[:-1])
36 # "2008-11-16 10.34 PM" (localtime)
37 if s[-3:] in (" AM", " PM"):
38 # this might raise ValueError
39 when = time.strptime(s[:-3], "%Y-%m-%d %I.%M")
47 # "2008-11-16 10.34.56 PM" (localtime)
48 if s[-3:] in (" AM", " PM"):
49 # this might raise ValueError
50 when = time.strptime(s[:-3], "%Y-%m-%d %I.%M.%S")
58 # "2008-12-31 18.21.43"
59 when = time.strptime(s, "%Y-%m-%d %H.%M.%S")
64 print >>options.stderr, "unable to parse old timestamp '%s', ignoring" % s
67 def get_local_metadata(path):
70 # posix stat(2) metadata, depends on the platform
71 os.stat_float_times(True)
73 metadata["ctime"] = s.st_ctime
74 metadata["mtime"] = s.st_mtime
76 misc_fields = ("st_mode", "st_ino", "st_dev", "st_uid", "st_gid")
77 macos_misc_fields = ("st_rsize", "st_creator", "st_type")
78 for field in misc_fields + macos_misc_fields:
80 metadata[field] = getattr(s, field)
82 # TODO: extended attributes, like on OS-X's HFS+
85 def mkdir(contents, options):
86 url = options['node-url'] + "uri?t=mkdir"
87 resp = do_http("POST", url)
88 if resp.status < 200 or resp.status >= 300:
89 raiseHTTPError("error during mkdir", resp)
90 dircap = str(resp.read().strip())
91 url = options['node-url'] + "uri/%s?t=set_children" % urllib.quote(dircap)
92 body = dict([ (childname, (contents[childname][0],
93 {"ro_uri": contents[childname][1],
94 "metadata": contents[childname][2],
96 for childname in contents
98 resp = do_http("POST", url, simplejson.dumps(body))
99 if resp.status != 200:
100 raiseHTTPError("error during set_children", resp)
103 def put_child(dirurl, childname, childcap):
104 assert dirurl[-1] == "/"
105 url = dirurl + urllib.quote(childname) + "?t=uri"
106 resp = do_http("PUT", url, childcap)
107 if resp.status not in (200, 201):
108 raiseHTTPError("error during put_child", resp)
110 def directory_is_changed(a, b):
111 # each is a mapping from childname to (type, cap, metadata)
112 significant_metadata = ("ctime", "mtime")
113 # other metadata keys are preserved, but changes to them won't trigger a
116 if set(a.keys()) != set(b.keys()):
119 a_type, a_cap, a_metadata = a[childname]
120 b_type, b_cap, b_metadata = b[childname]
125 for k in significant_metadata:
126 if a_metadata.get(k) != b_metadata.get(k):
130 class BackupProcessingError(Exception):
134 def __init__(self, options):
135 self.options = options
136 self.files_uploaded = 0
137 self.files_reused = 0
138 self.files_checked = 0
139 self.directories_read = 0
140 self.directories_created = 0
141 self.directories_reused = 0
142 self.directories_checked = 0
145 options = self.options
146 nodeurl = options['node-url']
147 from_dir = options.from_dir
148 to_dir = options.to_dir
152 if options['verbose']:
154 stdin = options.stdin
155 stdout = options.stdout
156 stderr = options.stderr
158 start_timestamp = datetime.datetime.now()
160 bdbfile = os.path.join(options["node-directory"],
161 "private", "backupdb.sqlite")
162 bdbfile = os.path.abspath(bdbfile)
163 self.backupdb = backupdb.get_backupdb(bdbfile, stderr)
164 if not self.backupdb:
165 print >>stderr, "ERROR: Unable to load backup db."
168 rootcap, path = get_alias(options.aliases, options.to_dir, DEFAULT_ALIAS)
169 to_url = nodeurl + "uri/%s/" % urllib.quote(rootcap)
171 to_url += escape_path(path)
172 if not to_url.endswith("/"):
175 archives_url = to_url + "Archives/"
176 latest_url = to_url + "Latest"
178 # first step: make sure the target directory exists, as well as the
179 # Archives/ subdirectory.
180 resp = do_http("GET", archives_url + "?t=json")
181 if resp.status == 404:
182 resp = do_http("POST", archives_url + "?t=mkdir")
183 if resp.status != 200:
184 print >>stderr, "Unable to create target directory: %s %s %s" % \
185 (resp.status, resp.reason, resp.read())
189 jdata = simplejson.load(resp)
190 (otype, attrs) = jdata
191 archives_dir = attrs["children"]
193 # second step: locate the most recent backup in TODIR/Archives/*
194 latest_backup_time = 0
195 latest_backup_name = None
196 latest_backup_dircap = None
198 # we have various time formats. The allmydata.com windows backup tool
199 # appears to create things like "2008-11-16 10.34 PM". This script
200 # creates things like "2008-11-16--17.34Z".
201 for archive_name in archives_dir.keys():
202 if archives_dir[archive_name][0] != "dirnode":
204 when = parse_old_timestamp(archive_name, options)
206 if when > latest_backup_time:
207 latest_backup_time = when
208 latest_backup_name = archive_name
209 latest_backup_dircap = str(archives_dir[archive_name][1]["ro_uri"])
211 # third step: process the tree
212 new_backup_dircap = self.process(options.from_dir, latest_backup_dircap)
214 # fourth: attach the new backup to the list
215 new_readonly_backup_dircap = readonly(new_backup_dircap)
216 now = time_format.iso_utc(int(time.time()), sep="_") + "Z"
218 put_child(archives_url, now, new_readonly_backup_dircap)
219 put_child(to_url, "Latest", new_readonly_backup_dircap)
220 end_timestamp = datetime.datetime.now()
221 # calc elapsed time, omitting microseconds
222 elapsed_time = str(end_timestamp - start_timestamp).split('.')[0]
224 if self.verbosity >= 1:
225 print >>stdout, (" %d files uploaded (%d reused), "
226 "%d directories created (%d reused)"
227 % (self.files_uploaded,
229 self.directories_created,
230 self.directories_reused))
231 if self.verbosity >= 2:
232 print >>stdout, (" %d files checked, %d directories checked, "
233 "%d directories read"
234 % (self.files_checked,
235 self.directories_checked,
236 self.directories_read))
237 print >>stdout, " backup done, elapsed time: %s" % elapsed_time
241 def verboseprint(self, msg):
242 if self.verbosity >= 2:
243 print >>self.options.stdout, msg
245 def process(self, localpath, olddircap):
248 self.verboseprint("processing %s, olddircap %s" % (localpath, olddircap))
251 olddircontents = self.readdir(olddircap)
253 newdircontents = {} # childname -> (type, rocap, metadata)
254 for child in self.options.filter_listdir(os.listdir(localpath)):
255 childpath = os.path.join(localpath, child)
256 if os.path.isdir(childpath):
257 metadata = get_local_metadata(childpath)
259 if olddircontents is not None and child in olddircontents:
260 oldchildcap = olddircontents[child][1]
261 # recurse on the child directory
262 newchilddircap = self.process(childpath, oldchildcap)
263 newdircontents[child] = ("dirnode", newchilddircap, metadata)
264 elif os.path.isfile(childpath):
265 newfilecap, metadata = self.upload(childpath)
266 newdircontents[child] = ("filenode", newfilecap, metadata)
268 raise BackupProcessingError("Cannot backup this file %r" % childpath)
271 and olddircontents is not None
272 and not directory_is_changed(newdircontents, olddircontents)
274 self.verboseprint(" %s not changed, re-using old directory" % localpath)
275 # yay! they're identical!
276 self.directories_reused += 1
279 self.verboseprint(" %s changed, making new directory" % localpath)
280 # something changed, or there was no previous directory, so we
281 # must make a new directory
282 newdircap = mkdir(newdircontents, self.options)
283 self.directories_created += 1
284 return readonly(newdircap)
286 def check_backupdb(self, childpath):
287 if not self.backupdb:
289 use_timestamps = not self.options["ignore-timestamps"]
290 r = self.backupdb.check_file(childpath, use_timestamps)
292 if not r.was_uploaded():
295 if not r.should_check():
296 # the file was uploaded or checked recently, so we can just use
300 # we must check the file before using the results
301 filecap = r.was_uploaded()
302 self.verboseprint("checking %s" % filecap)
303 nodeurl = self.options['node-url']
304 checkurl = nodeurl + "uri/%s?t=check&output=JSON" % urllib.quote(filecap)
305 self.files_checked += 1
306 resp = do_http("POST", checkurl)
307 if resp.status != 200:
308 # can't check, so we must assume it's bad
311 cr = simplejson.loads(resp.read())
312 healthy = cr["results"]["healthy"]
316 # file is healthy, no need to upload
317 r.did_check_healthy(cr)
320 def readdir(self, dircap):
321 # returns a dict of (childname: (type, readcap, metadata)), or None
322 # if the dircap didn't point to a directory
323 self.directories_read += 1
324 url = self.options['node-url'] + "uri/%s?t=json" % urllib.quote(dircap)
325 resp = do_http("GET", url)
326 if resp.status != 200:
327 raiseHTTPError("Error during directory GET", resp)
328 jd = simplejson.load(resp)
330 if ntype != "dirnode":
333 for (childname, (childtype, childdata)) in ndata["children"].items():
334 contents[childname] = (childtype,
335 str(childdata["ro_uri"]),
336 childdata["metadata"])
339 def upload(self, childpath):
340 #self.verboseprint("uploading %s.." % childpath)
341 metadata = get_local_metadata(childpath)
343 # we can use the backupdb here
344 must_upload, bdb_results = self.check_backupdb(childpath)
347 self.verboseprint("uploading %s.." % childpath)
348 infileobj = open(os.path.expanduser(childpath), "rb")
349 url = self.options['node-url'] + "uri"
350 resp = do_http("PUT", url, infileobj)
351 if resp.status not in (200, 201):
352 raiseHTTPError("Error during file PUT", resp)
353 filecap = resp.read().strip()
354 self.verboseprint(" %s -> %s" % (childpath, filecap))
355 #self.verboseprint(" metadata: %s" % (metadata,))
358 bdb_results.did_upload(filecap)
360 self.files_uploaded += 1
361 return filecap, metadata
364 self.verboseprint("skipping %s.." % childpath)
365 self.files_reused += 1
366 return bdb_results.was_uploaded(), metadata
369 bu = BackerUpper(options)