From: Alberto Berti Date: Sun, 22 Feb 2009 18:08:29 +0000 (-0700) Subject: Added --exclude, --exclude-from and --exclude-vcs options to backup command. X-Git-Tag: allmydata-tahoe-1.4.0~151 X-Git-Url: https://git.rkrishnan.org/Site/Content/Exhibitors/simplejson/?a=commitdiff_plain;h=5d57da93fd336692041f1dd6dd9d0f9276a211d4;p=tahoe-lafs%2Ftahoe-lafs.git Added --exclude, --exclude-from and --exclude-vcs options to backup command. It is still impossible to specify absolute exclusion path, only relative. I must check with tar or rsync how they allow them to be specified. --- diff --git a/src/allmydata/scripts/cli.py b/src/allmydata/scripts/cli.py index dd23537e..09966a69 100644 --- a/src/allmydata/scripts/cli.py +++ b/src/allmydata/scripts/cli.py @@ -1,5 +1,5 @@ -import os.path, re, sys +import os.path, re, sys, fnmatch from twisted.python import usage from allmydata.scripts.common import BaseOptions, get_aliases @@ -200,6 +200,9 @@ class LnOptions(VDriveOptions): def getSynopsis(self): return "%s ln FROM TO" % (os.path.basename(sys.argv[0]),) +class BackupConfigurationError(Exception): + pass + class BackupOptions(VDriveOptions): optFlags = [ ("verbose", "v", "Be noisy about what is happening."), @@ -207,6 +210,15 @@ class BackupOptions(VDriveOptions): ("ignore-timestamps", None, "Do not use backupdb timestamps to decide if a local file is unchanged."), ] + vcs_patterns = ('CVS', 'RCS', 'SCCS', '.git', '.gitignore', '.cvsignore','.svn', + '.arch-ids','{arch}', '=RELEASE-ID', '=meta-update', '=update', + '.bzr', '.bzrignore', '.bzrtags', '.hg', '.hgignore', '.hgrags', + '_darcs') + + def __init__(self): + super(BackupOptions, self).__init__() + self['exclude'] = [] + def parseArgs(self, localdir, topath): self.from_dir = localdir self.to_dir = topath @@ -214,6 +226,48 @@ class BackupOptions(VDriveOptions): def getSynopsis(Self): return "%s backup FROM ALIAS:TO" % os.path.basename(sys.argv[0]) + def opt_exclude(self, pattern): + """Ignore files matching a glob pattern. You may give multiple + '--exclude' options.""" + g = pattern.strip() + if g: + exclude = self['exclude'] + if g not in exclude: + exclude.append(g) + + def opt_exclude_from(self, filepath): + """Ignore file matching glob patterns listed in file, one per + line.""" + try: + exclude_file = file(filepath) + except: + raise BackupConfigurationError('Error opening exclude file %r.' % filepath) + try: + for line in exclude_file: + self.opt_exclude(line) + finally: + exclude_file.close() + + def opt_exclude_vcs(self): + """Exclude files and directories used by following version + control systems: 'CVS', 'RCS', 'SCCS', 'SVN', 'Arch', + 'Bazaar', 'Mercurial', and 'Darcs'.""" + for pattern in self.vcs_patterns: + self.opt_exclude(pattern) + + def filter_listdir(self, listdir): + """Yields non-excluded childpaths in path.""" + exclude = self['exclude'] + excluded_dirmembers = [] + if listdir and exclude: + # expand patterns with a reduce taste + for pattern in exclude: + excluded_dirmembers += fnmatch.filter(listdir, pattern) + # do subtraction + for filename in listdir: + if filename not in excluded_dirmembers: + yield filename + longdesc = """Add a versioned backup of the local FROM directory to a timestamped subdir of the (tahoe) TO/Archives directory, sharing as many files and directories as possible with the previous backup. Creates TO/Latest as a reference to the latest backup. Behaves somewhat like 'rsync -a --link-dest=TO/Archives/(previous) FROM TO/Archives/(new); ln -sf TO/Archives/(new) TO/Latest'.""" class WebopenOptions(VDriveOptions): diff --git a/src/allmydata/scripts/tahoe_backup.py b/src/allmydata/scripts/tahoe_backup.py index bbde7623..55e07fc0 100644 --- a/src/allmydata/scripts/tahoe_backup.py +++ b/src/allmydata/scripts/tahoe_backup.py @@ -237,7 +237,7 @@ class BackerUpper: olddircontents = self.readdir(olddircap) newdircontents = {} # childname -> (type, rocap, metadata) - for child in os.listdir(localpath): + for child in self.options.filter_listdir(os.listdir(localpath)): childpath = os.path.join(localpath, child) if os.path.isdir(childpath): metadata = get_local_metadata(childpath)