import urllib
import simplejson
-from allmydata.scripts.common_http import do_http
-from allmydata.scripts.tahoe_backup import parse_old_timestamp, readonly, \
- raiseHTTPError, HTTPError
-from allmydata.util import hashutil, base32
+from allmydata.scripts.common_http import do_http, HTTPError
+from allmydata.util import hashutil, base32, time_format
+from allmydata.util.stringutils import to_str, quote_output, quote_path
from allmydata.util.netstring import netstring
from allmydata.scripts.common import get_alias, DEFAULT_ALIAS
+from allmydata import uri
+
+
+def readonly(writedircap):
+ return uri.from_string_dirnode(writedircap).get_readonly().to_string()
+
+def parse_old_timestamp(s, options):
+ try:
+ if not s.endswith("Z"):
+ raise ValueError
+ # This returns seconds-since-epoch for an ISO-8601-ish-formatted UTC
+ # time string. This might raise ValueError if the string is not in the
+ # right format.
+ when = time_format.iso_utc_time_to_seconds(s[:-1])
+ return when
+ except ValueError:
+ pass
+
+ try:
+ # "2008-11-16 10.34 PM" (localtime)
+ if s[-3:] in (" AM", " PM"):
+ # this might raise ValueError
+ when = time.strptime(s[:-3], "%Y-%m-%d %I.%M")
+ if s[-3:] == "PM":
+ when += 12*60*60
+ return when
+ except ValueError:
+ pass
+
+ try:
+ # "2008-11-16 10.34.56 PM" (localtime)
+ if s[-3:] in (" AM", " PM"):
+ # this might raise ValueError
+ when = time.strptime(s[:-3], "%Y-%m-%d %I.%M.%S")
+ if s[-3:] == "PM":
+ when += 12*60*60
+ return when
+ except ValueError:
+ pass
+
+ try:
+ # "2008-12-31 18.21.43"
+ when = time.strptime(s, "%Y-%m-%d %H.%M.%S")
+ return when
+ except ValueError:
+ pass
+
+ print >>options.stderr, "unable to parse old timestamp '%s', ignoring" % s
+ return None
+
TAG = "consolidator_dirhash_v1"
self.rootcap, path = get_alias(options.aliases, options.where,
DEFAULT_ALIAS)
assert path == ""
+ # TODO: allow dbfile and backupfile to be Unicode
self.dbfile = options["dbfile"]
assert self.dbfile, "--dbfile is required"
self.backupfile = options["backupfile"]
url = self.nodeurl + "uri/%s?t=json" % urllib.quote(dircap)
resp = do_http("GET", url)
if resp.status != 200:
- raiseHTTPError("Error during directory GET", resp)
+ raise HTTPError("Error during directory GET", resp)
jd = simplejson.load(resp)
ntype, ndata = jd
if ntype != "dirnode":
for (childname, (childtype, childdata)) in kids.items():
if childtype != "dirnode":
continue
- potential_systems[childname] = str(childdata["rw_uri"])
+ if "rw_uri" not in childdata:
+ self.msg("%s: not writeable" % quote_output(childname))
+ continue
+ potential_systems[childname] = to_str(childdata["rw_uri"])
backup_data = {"Backups": data, "systems": {}, "archives": {}}
systems = {}
for name, sdircap in potential_systems.items():
sdata = self.read_directory_json(sdircap)
kids = sdata["children"]
if not u"Archives" in kids and not u"Latest Backup" in kids:
- self.msg("%s: not a backupdir, no 'Archives' and 'Latest'" % name)
+ self.msg("%s: not a backupdir, no 'Archives' and 'Latest'" % quote_output(name))
+ continue
+ archives_capdata = kids[u"Archives"][1]
+ if "rw_uri" not in archives_capdata:
+ self.msg("%s: /Archives is not writeable" % quote_output(name))
continue
- self.msg("%s is a system" % name)
+ self.msg("%s is a system" % quote_output(name))
backup_data["systems"][name] = sdata
- archives_dircap = kids[u"Archives"][1]["rw_uri"]
+ archives_dircap = to_str(archives_capdata["rw_uri"])
archives_data = self.read_directory_json(archives_dircap)
backup_data["archives"][name] = archives_data
systems[name] = archives_dircap
# [$NAME, writecap, $NAME-readonly, readcap] : processed, not replaced
# [None, None, $NAME, readcap] : processed and replaced
- self.msg("consolidating system %s" % system_name)
+ self.msg("consolidating system %s" % quote_output(system_name))
self.directories_reused = 0
self.directories_used_as_is = 0
self.directories_created = 0
children = sorted(data["children"].items())
for i, (childname, (childtype, childdata)) in enumerate(children):
if childtype != "dirnode":
- self.msg("non-dirnode %s in Archives/" % childname)
+ self.msg("non-dirnode %s in Archives/" % quote_output(childname))
continue
- timename = childname
- if childname.endswith("-readonly"):
- timename = childname[:-len("-readonly")]
+ timename = to_str(childname)
+ if timename.endswith("-readonly"):
+ timename = timename[:-len("-readonly")]
timestamp = parse_old_timestamp(timename, self.options)
assert timestamp is not None, timename
snapshots.setdefault(timestamp, [None, None, None, None])
# need to re-scan it
is_readonly = not childdata.has_key("rw_uri")
if is_readonly:
- readcap = str(childdata["ro_uri"])
+ readcap = to_str(childdata["ro_uri"])
if self.must_rescan_readonly_snapshots:
self.msg(" scanning old %s (%d/%d)" %
- (childname, i+1, len(children)))
- self.scan_old_directory(str(childdata["ro_uri"]))
+ (quote_output(childname), i+1, len(children)))
+ self.scan_old_directory(to_str(childdata["ro_uri"]))
snapshots[timestamp][2] = childname
snapshots[timestamp][3] = readcap
else:
- writecap = str(childdata["rw_uri"])
+ writecap = to_str(childdata["rw_uri"])
snapshots[timestamp][0] = childname
snapshots[timestamp][1] = writecap
snapshots = [ [timestamp] + values
assert roname
assert not rwname
first_snapshot = False
- self.msg(" %s already readonly" % roname)
+ self.msg(" %s already readonly" % quote_output(roname))
continue
if readcap and writecap:
# we've processed it, creating a -readonly version, but we
assert roname
assert rwname
first_snapshot = False
- self.msg(" %s processed but not yet replaced" % roname)
+ self.msg(" %s processed but not yet replaced" % quote_output(roname))
if self.options["really"]:
- self.msg(" replacing %s with %s" % (rwname, roname))
+ self.msg(" replacing %s with %s" % (quote_output(rwname), quote_output(roname)))
self.put_child(archives_dircap, rwname, readcap)
self.delete_child(archives_dircap, roname)
continue
first_snapshot = False
readcap = readonly(writecap)
self.directories_used_as_is += 1
- self.msg(" %s: oldest snapshot, using as-is" % rwname)
+ self.msg(" %s: oldest snapshot, using as-is" % quote_output(rwname))
self.scan_old_directory(readcap)
else:
# for the others, we must scan their contents and build up a new
# readonly directory (which shares common subdirs with previous
# backups)
- self.msg(" %s: processing (%d/%d)" % (rwname, i+1, len(snapshots)))
+ self.msg(" %s: processing (%d/%d)" % (quote_output(rwname), i+1, len(snapshots)))
started = time.time()
readcap = self.process_directory(readonly(writecap), (rwname,))
elapsed = time.time() - started
eta = "%ds" % (elapsed * (len(snapshots) - i-1))
if self.options["really"]:
- self.msg(" replaced %s" % rwname)
+ self.msg(" replaced %s" % quote_output(rwname))
self.put_child(archives_dircap, rwname, readcap)
else:
- self.msg(" created %s" % roname)
+ self.msg(" created %s" % quote_output(roname))
self.put_child(archives_dircap, roname, readcap)
snapshot_created = self.directories_created - start_created
snapshot_used_as_is = self.directories_used_as_is - start_used_as_is
snapshot_reused = self.directories_reused - start_reused
self.msg(" %s: done: %d dirs created, %d used as-is, %d reused, eta %s"
- % (rwname,
+ % (quote_output(rwname),
snapshot_created, snapshot_used_as_is, snapshot_reused,
eta))
# done!
# for my contents. In all cases I return a directory readcap that
# points to my contents.
- assert isinstance(readcap, str)
+ readcap = to_str(readcap)
self.directories_seen.add(readcap)
# build up contents to pass to mkdir() (which uses t=set_children)
for (childname, (childtype, childdata)) in sorted(data["children"].items()):
if childtype == "dirnode":
childpath = path + (childname,)
- old_childcap = str(childdata["ro_uri"])
+ old_childcap = to_str(childdata["ro_uri"])
childcap = self.process_directory(old_childcap, childpath)
if childcap != old_childcap:
children_modified = True
contents[childname] = ("dirnode", childcap, None)
else:
- childcap = str(childdata["ro_uri"])
+ childcap = to_str(childdata["ro_uri"])
contents[childname] = (childtype, childcap, None)
hashkids.append( (childname, childcap) )
old_dircap = self.get_old_dirhash(dirhash)
if old_dircap:
if self.options["verbose"]:
- self.msg(" %r: reused" % "/".join(path))
+ self.msg(" %s: reused" % quote_path(path))
assert isinstance(old_dircap, str)
self.directories_reused += 1
self.directories_used.add(old_dircap)
if not children_modified:
# we're allowed to use this directory as-is
if self.options["verbose"]:
- self.msg(" %r: used as-is" % "/".join(path))
+ self.msg(" %s: used as-is" % quote_path(path))
new_dircap = readonly(readcap)
assert isinstance(new_dircap, str)
self.store_dirhash(dirhash, new_dircap)
return new_dircap
# otherwise, we need to create a new directory
if self.options["verbose"]:
- self.msg(" %r: created" % "/".join(path))
+ self.msg(" %s: created" % quote_path(path))
new_dircap = readonly(self.mkdir(contents))
assert isinstance(new_dircap, str)
self.store_dirhash(dirhash, new_dircap)
urllib.quote(childname))
resp = do_http("PUT", url, childcap)
if resp.status not in (200, 201):
- raiseHTTPError("error during put_child", resp)
+ raise HTTPError("Error during put_child", resp)
def delete_child(self, dircap, childname):
url = self.nodeurl + "uri/%s/%s" % (urllib.quote(dircap),
urllib.quote(childname))
resp = do_http("DELETE", url)
if resp.status not in (200, 201):
- raiseHTTPError("error during delete_child", resp)
+ raise HTTPError("Error during delete_child", resp)
def mkdir(self, contents):
url = self.nodeurl + "uri?t=mkdir"
resp = do_http("POST", url)
if resp.status < 200 or resp.status >= 300:
- raiseHTTPError("error during mkdir", resp)
- dircap = str(resp.read().strip())
+ raise HTTPError("Error during mkdir", resp)
+ dircap = to_str(resp.read().strip())
url = self.nodeurl + "uri/%s?t=set_children" % urllib.quote(dircap)
body = dict([ (childname, (contents[childname][0],
{"ro_uri": contents[childname][1],
])
resp = do_http("POST", url, simplejson.dumps(body))
if resp.status != 200:
- raiseHTTPError("error during set_children", resp)
+ raise HTTPError("Error during set_children", resp)
return dircap
def scan_old_directory(self, dircap, ancestors=()):
data = self.read_directory_json(dircap)
kids = []
for (childname, (childtype, childdata)) in data["children"].items():
- childcap = str(childdata["ro_uri"])
+ childcap = to_str(childdata["ro_uri"])
if childtype == "dirnode":
self.scan_old_directory(childcap, ancestors)
kids.append( (childname, childcap) )
def hash_directory_contents(self, kids):
kids.sort()
- s = "".join([netstring(childname.encode("utf-8"))+netstring(childcap)
+ s = "".join([netstring(to_str(childname))+netstring(childcap)
for (childname, childcap) in kids])
return hashutil.tagged_hash(TAG, s)