#!/usr/bin/env python2.4
# svn-fast-backup: use rsync snapshots for very fast FSFS repository backup.
# Multiple FSFS backups share data via hardlinks, meaning old backups are
# almost free, since a newer revision of a repository is almost a complete
# superset of an older revision.
# This is good for replacing incremental log-dump+restore-style backups
# because it is just as space-conserving and even faster; there is no
# inter-backup state (old backups are essentially caches); each backup
# directory is self-contained. It keeps the same interface as svn-hot-backup
# (if you use --force), but only works for FSFS repositories.
# Author: Karl Chen <quarl@quarl.org>
## quarl 2005-08-17 initial version
## quarl 2005-09-01 refactor, documentation; new options: --force, --keep,
## --simulate, --trace
# $HeadURL: http://svn.apache.org/repos/asf/subversion/branches/1.6.x/contrib/server-side/svn-fast-backup $
# $LastChangedRevision: 923804 $
# $LastChangedDate: 2010-03-16 15:22:28 +0000 (Tue, 16 Mar 2010) $
# $LastChangedBy: cmpilato $
# Originally based on svn-hot-backup.py, whose copyright notice states:
# ====================================================================
# Copyright (c) 2000-2004 CollabNet. All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at http://subversion.tigris.org/license-1.html.
# If newer versions of this license are posted there, you may use a
# newer version instead, at your option.
#
# This software consists of voluntary contributions made by many
# individuals. For exact contribution history, see the revision
# history and logs, available at http://subversion.tigris.org/.
# ====================================================================
######################################################################
import sys, os, re
import getopt
import subprocess # python2.4
######################################################################
# Global Settings
svnlook = "svnlook" # Path to svnlook
svnadmin = "svnadmin" # Path to svnadmin
rsync = "rsync" # Path to rsync
######################################################################
# Command line arguments
def usage():
raise SystemExit("""Syntax: %s [OPTIONS] repos_path backup_dir
Makes a hot backup of a Subversion FSFS repository at REPOS_PATH to
BACKUP_DIR/repos-rev.
If a previous version exists, make hard links of its files using rsync.
As multiple FSFS backups share data via hardlinks, old backups use
almost no space, since a newer revision of a repository is almost a complete
superset of an older revision (excluding direct repository modifications).
Keeps up to N backups and deletes the rest. (N includes the current backup.)
OPTIONS:
-h, --help This screen
-q, --quiet Quieter than usual
-k, --keep=N Keep N backups instead of 64
-k, --keep=all Keep all backups (never delete any)
-f, --force Make a new backup even if one with current revision exists
-t, --trace Show actions
-s, --simulate Don't perform actions
""" %sys.argv[0])
class Options: pass
def default_options():
options = Options()
options.force = False
options.trace = False
options.simulate = False
options.quiet = False
options.keep = 64 # Number of backups to keep around
return options
def parse_commandline():
options = default_options()
try:
opts, args = getopt.getopt(sys.argv[1:], 'qhk:fts', ['quiet', 'help', 'keep=', 'force',
'trace', 'simulate'])
except getopt.GetoptError, e:
print >>sys.stderr, "Error:", e
usage()
for (o,a) in opts:
if o == '-h' or o == '--help':
usage()
elif o == '-q' or o == '--quiet':
options.quiet = True
elif o == '-f' or o == '--force':
options.force = True
elif o == '-t' or o == '--trace':
options.trace = True
elif o == '-s' or o == '--simulate':
options.simulate = True
elif o == '-k' or o == '--keep':
if a.strip().lower() == 'all':
options.keep = 0
else:
options.keep = int(a)
else:
raise Exception("Internal error")
if len(args) != 2:
usage()
# Path to repository
options.repo_dir = args[0]
# Where to store the repository backup. The backup will be placed in a
# *subdirectory* of this location, named after the youngest revision.
options.backup_dir = os.path.abspath(args[1])
options.repo = os.path.basename(os.path.abspath(options.repo_dir))
return options
def comparator(a, b):
# We pass in filenames so there is never a case where they are equal.
regexp = re.compile("-(?P<revision>[0-9]+)(-(?P<increment>[0-9]+))?$")
matcha = regexp.search(a)
matchb = regexp.search(b)
reva = int(matcha.groupdict()['revision'])
revb = int(matchb.groupdict()['revision'])
if (reva < revb):
return -1
elif (reva > revb):
return 1
else:
inca = matcha.groupdict()['increment']
incb = matchb.groupdict()['increment']
if not inca:
return -1
elif not incb:
return 1;
elif (int(inca) < int(incb)):
return -1
else:
return 1
def pipe(command):
return subprocess.Popen(command, stdout=subprocess.PIPE).communicate()[0].strip()
def readfile(filename):
try:
return open(filename).read().strip()
except:
return ''
def runcmd(cmd):
if options.trace:
print >>sys.stderr, '#', cmd
if options.simulate:
return 0
return subprocess.call(cmd)
def deltree(path):
runcmd(['rm', '-r', path])
def get_youngest_revision():
if readfile(os.path.join('db', 'fs-type')) != 'fsfs':
raise SystemExit("Path '%s' doesn't contain a FSFS repository"%options.repo_dir)
return pipe([svnlook,"youngest","."])
def list_repo_backups():
'''Return a sorted list of backups for this repository.'''
regexp = re.compile(options.repo + "-[0-9]+(-[0-9]+)?$")
directory_list = [x for x in os.listdir(options.backup_dir) if regexp.match(x)]
directory_list.sort(comparator)
return directory_list
def delete_old_backups():
if options.keep <= 0:
return
for item in list_repo_backups()[:-options.keep]:
old_backup_subdir = os.path.join(options.backup_dir, item)
print " Removing old backup: ", old_backup_subdir
deltree(old_backup_subdir)
def find_next_backup_name(youngest):
# If there is already a backup of this revision, then append the next
# highest increment to the path. We still need to do a backup because the
# repository might have changed despite no new revision having been
# created. We find the highest increment and add one rather than start
# from 1 and increment because the starting increments may have already
# been removed due to options.keep.
regexp = re.compile(options.repo + "-" + youngest + "(-(?P<increment>[0-9]+))?$")
directory_list = os.listdir(options.backup_dir)
young_list = [ x for x in directory_list if regexp.match(x) ]
young_list.sort(comparator)
if not young_list:
return "%s-%s" %(options.repo, youngest)
# Backups for this revision exist already.
if not options.force:
if not options.quiet:
print "Backup already exists at",young_list[-1]
raise SystemExit
increment = int(regexp.match(young_list[-1]).groupdict()['increment'] or '0')
return "%s-%s-%d" %(options.repo, youngest, increment+1)
def do_rsync_backup():
youngest = get_youngest_revision()
if not options.quiet:
print "Beginning hot backup of '%s' (youngest revision is %s)..." %(options.repo, youngest),
backup_subdir = os.path.join(options.backup_dir, find_next_backup_name(youngest))
backup_tmpdir = backup_subdir + '.tmp'
if os.path.exists(backup_tmpdir):
raise SystemExit("%s: Backup in progress? '%s' exists -- aborting."%(sys.argv[0],backup_tmpdir))
if not options.simulate:
os.mkdir(backup_tmpdir) # ensures atomicity
if os.path.exists(backup_subdir):
# Check again after doing mkdir (which serves as a mutex acquire) --
# just in case another process just finished the same backup.
if not options.quiet:
print "Backup already exists at",backup_subdir
raise SystemExit
previous_backups = list_repo_backups()
### Use rsync to make a copy.
# We need to copy the 'current' file first.
# Don't copy the transactions/ directory.
# See http://svn.apache.org/repos/asf/subversion/trunk/notes/fsfs
rsync_dest = os.path.join(backup_tmpdir,'')
# copy db/current. -R tells rsync to use relative pathnames.
if runcmd([rsync, '-aR', 'db/current', rsync_dest]):
raise "%s: rsync failed" %sys.argv[0]
# Now copy everything else.
cmd = [rsync, '-a',
'--exclude', 'db/current',
'--exclude', 'db/transactions/*',
'--exclude', 'db/log.*',
'.', rsync_dest]
# If there's a previous backup, make hard links against the latest.
if previous_backups:
cmd += ['--link-dest', os.path.join(options.backup_dir, previous_backups[-1])]
if runcmd(cmd):
raise "%s: rsync failed" %sys.argv[0]
# Rename to final name.
if not options.simulate:
os.rename(backup_tmpdir, backup_subdir)
print "Finished backup to", backup_subdir
options = parse_commandline()
os.chdir(options.repo_dir)
do_rsync_backup()
delete_old_backups()