7 from allmydata.scripts.common import get_alias, escape_path, DEFAULT_ALIAS
8 from allmydata.scripts.common_http import do_http
9 from allmydata.util import time_format
10 from allmydata.scripts import backupdb
12 class HTTPError(Exception):
15 def raiseHTTPError(msg, resp):
16 msg = msg + ": %s %s %s" % (resp.status, resp.reason, resp.read())
19 def get_local_metadata(path):
22 # posix stat(2) metadata, depends on the platform
23 os.stat_float_times(True)
25 metadata["ctime"] = s.st_ctime
26 metadata["mtime"] = s.st_mtime
28 misc_fields = ("st_mode", "st_ino", "st_dev", "st_uid", "st_gid")
29 macos_misc_fields = ("st_rsize", "st_creator", "st_type")
30 for field in misc_fields + macos_misc_fields:
32 metadata[field] = getattr(s, field)
34 # TODO: extended attributes, like on OS-X's HFS+
37 def mkdir(contents, options):
38 kids = dict([ (childname, (contents[childname][0],
39 {"ro_uri": contents[childname][1],
40 "metadata": contents[childname][2],
42 for childname in contents
44 body = simplejson.dumps(kids).encode("utf-8")
45 url = options['node-url'] + "uri?t=mkdir-immutable"
46 resp = do_http("POST", url, body)
47 if resp.status < 200 or resp.status >= 300:
48 raiseHTTPError("error during mkdir", resp)
49 dircap = str(resp.read().strip())
52 def put_child(dirurl, childname, childcap):
53 assert dirurl[-1] == "/"
54 url = dirurl + urllib.quote(childname) + "?t=uri"
55 resp = do_http("PUT", url, childcap)
56 if resp.status not in (200, 201):
57 raiseHTTPError("error during put_child", resp)
59 class BackupProcessingError(Exception):
63 def __init__(self, options):
64 self.options = options
65 self.files_uploaded = 0
67 self.files_checked = 0
68 self.directories_created = 0
69 self.directories_reused = 0
70 self.directories_checked = 0
73 options = self.options
74 nodeurl = options['node-url']
78 if options['verbose']:
80 stdout = options.stdout
81 stderr = options.stderr
83 start_timestamp = datetime.datetime.now()
85 bdbfile = os.path.join(options["node-directory"],
86 "private", "backupdb.sqlite")
87 bdbfile = os.path.abspath(bdbfile)
88 self.backupdb = backupdb.get_backupdb(bdbfile, stderr)
90 print >>stderr, "ERROR: Unable to load backup db."
93 rootcap, path = get_alias(options.aliases, options.to_dir, DEFAULT_ALIAS)
94 to_url = nodeurl + "uri/%s/" % urllib.quote(rootcap)
96 to_url += escape_path(path)
97 if not to_url.endswith("/"):
100 archives_url = to_url + "Archives/"
102 # first step: make sure the target directory exists, as well as the
103 # Archives/ subdirectory.
104 resp = do_http("GET", archives_url + "?t=json")
105 if resp.status == 404:
106 resp = do_http("POST", archives_url + "?t=mkdir")
107 if resp.status != 200:
108 print >>stderr, "Unable to create target directory: %s %s %s" % \
109 (resp.status, resp.reason, resp.read())
112 # second step: process the tree
113 new_backup_dircap = self.process(options.from_dir)
115 # third: attach the new backup to the list
116 now = time_format.iso_utc(int(time.time()), sep="_") + "Z"
118 put_child(archives_url, now, new_backup_dircap)
119 put_child(to_url, "Latest", new_backup_dircap)
120 end_timestamp = datetime.datetime.now()
121 # calc elapsed time, omitting microseconds
122 elapsed_time = str(end_timestamp - start_timestamp).split('.')[0]
124 if self.verbosity >= 1:
125 print >>stdout, (" %d files uploaded (%d reused), "
126 "%d directories created (%d reused)"
127 % (self.files_uploaded,
129 self.directories_created,
130 self.directories_reused))
131 if self.verbosity >= 2:
132 print >>stdout, (" %d files checked, %d directories checked"
133 % (self.files_checked,
134 self.directories_checked))
135 print >>stdout, " backup done, elapsed time: %s" % elapsed_time
139 def verboseprint(self, msg):
140 if self.verbosity >= 2:
141 print >>self.options.stdout, msg
143 def process(self, localpath):
146 self.verboseprint("processing %s" % localpath)
147 create_contents = {} # childname -> (type, rocap, metadata)
148 compare_contents = {} # childname -> rocap
149 for child in self.options.filter_listdir(os.listdir(localpath)):
150 childpath = os.path.join(localpath, child)
151 child = unicode(child)
152 if os.path.isdir(childpath):
153 metadata = get_local_metadata(childpath)
154 # recurse on the child directory
155 childcap = self.process(childpath)
156 assert isinstance(childcap, str)
157 create_contents[child] = ("dirnode", childcap, metadata)
158 compare_contents[child] = childcap
159 elif os.path.isfile(childpath):
160 childcap, metadata = self.upload(childpath)
161 assert isinstance(childcap, str)
162 create_contents[child] = ("filenode", childcap, metadata)
163 compare_contents[child] = childcap
165 raise BackupProcessingError("Cannot backup child %r" % childpath)
167 must_create, r = self.check_backupdb_directory(compare_contents)
169 self.verboseprint(" creating directory for %s" % localpath)
170 newdircap = mkdir(create_contents, self.options)
171 assert isinstance(newdircap, str)
173 r.did_create(newdircap)
174 self.directories_created += 1
177 self.verboseprint(" re-using old directory for %s" % localpath)
178 self.directories_reused += 1
179 return r.was_created()
181 def check_backupdb_file(self, childpath):
182 if not self.backupdb:
184 use_timestamps = not self.options["ignore-timestamps"]
185 r = self.backupdb.check_file(childpath, use_timestamps)
187 if not r.was_uploaded():
190 if not r.should_check():
191 # the file was uploaded or checked recently, so we can just use
195 # we must check the file before using the results
196 filecap = r.was_uploaded()
197 self.verboseprint("checking %s" % filecap)
198 nodeurl = self.options['node-url']
199 checkurl = nodeurl + "uri/%s?t=check&output=JSON" % urllib.quote(filecap)
200 self.files_checked += 1
201 resp = do_http("POST", checkurl)
202 if resp.status != 200:
203 # can't check, so we must assume it's bad
206 cr = simplejson.loads(resp.read())
207 healthy = cr["results"]["healthy"]
211 # file is healthy, no need to upload
212 r.did_check_healthy(cr)
215 def check_backupdb_directory(self, compare_contents):
216 if not self.backupdb:
218 r = self.backupdb.check_directory(compare_contents)
220 if not r.was_created():
223 if not r.should_check():
224 # the file was uploaded or checked recently, so we can just use
228 # we must check the directory before re-using it
229 dircap = r.was_created()
230 self.verboseprint("checking %s" % dircap)
231 nodeurl = self.options['node-url']
232 checkurl = nodeurl + "uri/%s?t=check&output=JSON" % urllib.quote(dircap)
233 self.directories_checked += 1
234 resp = do_http("POST", checkurl)
235 if resp.status != 200:
236 # can't check, so we must assume it's bad
239 cr = simplejson.loads(resp.read())
240 healthy = cr["results"]["healthy"]
244 # directory is healthy, no need to upload
245 r.did_check_healthy(cr)
248 def upload(self, childpath):
249 #self.verboseprint("uploading %s.." % childpath)
250 metadata = get_local_metadata(childpath)
252 # we can use the backupdb here
253 must_upload, bdb_results = self.check_backupdb_file(childpath)
256 self.verboseprint("uploading %s.." % childpath)
257 infileobj = open(os.path.expanduser(childpath), "rb")
258 url = self.options['node-url'] + "uri"
259 resp = do_http("PUT", url, infileobj)
260 if resp.status not in (200, 201):
261 raiseHTTPError("Error during file PUT", resp)
262 filecap = resp.read().strip()
263 self.verboseprint(" %s -> %s" % (childpath, filecap))
264 #self.verboseprint(" metadata: %s" % (metadata,))
267 bdb_results.did_upload(filecap)
269 self.files_uploaded += 1
270 return filecap, metadata
273 self.verboseprint("skipping %s.." % childpath)
274 self.files_reused += 1
275 return bdb_results.was_uploaded(), metadata
278 bu = BackerUpper(options)