#!/bin/bash # # FSBackup - fsbackup.sh - use tar to back up files # # FSBackup v1.6 # # $Id: fsbackup.sh,v 1.3 2005/04/16 07:59:16 mitch Exp $ # # Copyright (c) 2003-2005, FullSpan Software (www.fullspan.com) # # Licensed under the BSD License # OSI Certified Open Source Software (www.opensource.org) # # You may not use this file except in compliance with the License. You should # have received a copy of the License with this distribution, or you can find # it at: http://www.fullspan.com/shared/license.html. # # NO WARRANTY - USE AT YOUR OWN RISK. All software and other materials # distributed under the License are provided on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ###################################################################### # # Usage messages # ###################################################################### BRIEF_USAGE=\ " Usage: fsbackup.sh type bkname defdir destdir [options] Run with --help (or see fsbackup.html) for detailed usage instructions. " # NOTE: In the usage message below, there are one or more examples like this: # # echo -e "file1\\ndir2" # # where the backslash has been doubled so that the example appears correctly in # the usage output. However, if you are reading or copying the example directly # from this script, make sure that in your own file you only use a single # backslash: # # echo -e "file1\ndir2" USAGE=$(cat <<'EOT' Usage: fsbackup.sh type bkname defdir destdir [options] FSBackup creates a .tar.gz file containing the files you specify. It uses GNU tar and gzip. REQUIRED PARAMETERS The first 4 parameters are required and must be given in the order shown above. type can be: full, diff, or incr. full is a backup of all files. diff is a differential backup: a backup of new and modified files since the most recent full backup. incr is an incremental backup: a backup of new and modified files since the most recent backup of any type. bkname is a descriptive name for the backup. Choose a name that is suitable for use in a filename, since this name is used as the base of the filename for the backup definition file, data file, log file, and timestamp file. defdir is the directory where the backup definition file (see below) is stored. destdir is the directory where the files created by FSBackup will be stored; it must already exist when you invoke FSBackup. DIRECTORIES FSBackup creates several directories under (if they do not exist yet): /current holds the most recent full backup for each , plus any incremental or differential backups that were done since that full backup. /archive holds previous full backups and their corresponding incremental and/or differential backups. /tstamp contains timestamp files that are used to support the incremental and differential features. By tracking the most recent backup time, FSBackup is able to instruct tar to only capture files created or updated after that time. OPERATION FSBackup creates the backup file --.tar.gz and the log file --.log in the directory /current. For example, if the backup is named 'backup-websrv', then the filenames for a full backup would look like: backup-websrv-2003-04-09-011500-full.tar.gz backup-websrv-2003-04-09-011500-full.log When a full backup is created, FSBackup examines the /current directory to see if there are any existing backups for the same . If so, those files are moved to the /archive directory before beginning the new full backup. OPTIONS The following options are supported and can be specified in any order: -maxa[rchive] number Default is 4. This is the number of prior full backup sets that FSBackup will keep in the 'archive' directory (in addition to the current bacukp set that is kept in the 'current' directory). After this threshold is reached, when a new full backup is created, the oldest prior backup in 'archive' is deleted. -c[ollect] collectdir If the collect option is specified, the backup and log file will be copied to the directory . This is useful for doing local backups on several computers, and having the files pushed to a central location, usually in preparation for copying to removable media. -maxc[ollect] number Default is 2. This is the number of prior full backup sets that FSBackup will keep in the directory. After this threshold is reached, when a new full backup is created, the oldest prior backup in is deleted. -n[otify] address If the notify option is specified, a brief summary of the backup (the number of files and size of the backup file) will be sent to the address given. Specify an email address to send the summary via email, or specify an address of '-' to print the summary to stdout. BACKUP FILE SELECTION Create a backup definition file called -list.sh in the directory. This file should contain a set of commands to generate the list of files and/or directories to back up. The definition file will be 'source'd (not invoked) to generate the list of files. It can be as simple or complex as needed. It can simply echo a list of files, or it can run one or more programs to generate the list. The list must be in the format of one file or directory per line, as consumed by the tar --files-from option. Here is a very simple definition file: echo 'somefile' echo 'somedir' Here is a more complex file: #!/bin/bash cd /home/user1/data echo -e '-C\\n/home/someuser/data' echo -e "file1\\ndir2" cd /home/user2/data echo '-C' echo '/home/user2/data' find . -maxdepth 1 -not -name 'db' -not -name '.' -printf '%P\\n' ls -t db/checkpoint.* | head -1 Here are some tips for creating a backup definition file: 1. The simplest technique is to just echo a file or directory name. You can also echo a list of names, as long as you insert newlines so that each name appears on its own line when the script is run. 2. The '#!/bin/bash' line is optional. Because the -list.sh file will be sourced, not invoked, it is not required. However, you may want to include this line to simplify running the -list.sh as a standalone script for testing, so you do not have to prefix the script name with 'source ' or '. '. 3. Similarly, the 'cd' commands are not needed for FSBackup, but they allow the -list.sh script to be run for testing, so you can see the file list that will be generated when FSBackup runs the script. 4. The -C commands are a tar feature that allow you to instruct tar to change directories. For example, say you need to backup two files that are in separate directory trees (i.e., their only common parent directory is /): /home/jane/a/b/c/d/hello.txt /var/db/x/y/foo.log Without the -C flag, you would have to run FSBackup from the / directory and use a backup definition file like this: echo 'home/jane/a/b/c/d/hello.txt' echo 'var/db/x/y/foo.log' The tar file will store the files with all the path information shown, that is, 'home/jane/a/b/c/d/hello.txt' and 'var/db/x/y/foo.log'. But let's say you don't want to store all the parent directories. With the -C option, you could use a definition file like this: cd /home/jane/a/b/c echo '-C' echo '/home/jane/a/b/c' echo 'd/hello.txt' cd /var/db/x echo '-C\\n/var/db/x' echo 'y/foo.log' With this file, tar will store the files as 'd/hello.txt' and 'y/foo.log'. Obviously this example is simplified, but you can see that the -C option is handy (in fact, essential) when you want to 'factor out' intervening directories. Notice that the -C needs to be specified on one line, and then the directory on the next line (or, you can use a newline character to force the directory name onto a new line). 5. tar does not support wildcards in file or directory names that you specify for inclusion in the tar file. Therefore, to use wildcards you should use ls, find, or some other command in your backup definition file, to expand the wildcards to specific names that tar will process. tar does support wildcards for files that you want to exclude - this is discussed below. 6. To test your backup definition file, first run it (or source it) as a shell script, and examine the output. The list of files and directories is what will be fed to tar. Then, do a test backup and examine the resulting log file and tar.gz file, to ensure that the files you intended are included. FILE EXCLUSION Optionally, you can create a file called -exclude.txt in the directory. This file should contain a list of filenames or filename patterns to exclude from the backup. The format of the file is one pattern per line, as consumed by the tar --exclude-from option. For example: *.bak EOT ) ###################################################################### # # Functions # ###################################################################### TSTAMP_AWK_PATTERN="[[:digit:]]{4}-[[:digit:]]{2}-[[:digit:]]{2}-[[:digit:]]{6}" errExit () { echo "Error:" echo " $1" echo "Run with no parameters for brief help, or with --help for extended help" exit 1 } getNumFiles () { find "$1" -maxdepth 1 -name "$2" -printf "%f\n" 2>/dev/null | wc -l | awk '{ gsub(/ /, ""); print }' } moveFilesToArchive () { # Params: DATADIR ARCHDIR BKNAME MOVE_FILE_LIST=$(find "$1" -maxdepth 1 -name "$3-*" -printf "%P\n" | \ awk --re-interval -v datadir="$1" -v bkname="$3" -v tstamp_pattern="$TSTAMP_AWK_PATTERN" \ 'BEGIN { pattern = "^" bkname "-(" tstamp_pattern ")-(full|diff|incr)\\.(tar\\.gz|log)$" } { if (match($0, pattern)) print datadir "/" $0 }') if [ ! -z "$MOVE_FILE_LIST" ]; then mkdir -p "$2" echo -n "$MOVE_FILE_LIST" | xargs mv --target-directory="$2" fi } removeAgedFiles () { # Params: MAXARCHIVE ARCHDIR BKNAME ["trace"] if [ ! -e "$ARCHDIR" ]; then return 0; fi if [ $1 -gt 0 ]; then # Find the timestamp of the oldest full backup that we need to keep # This is not the filesystem timestamp (which can change if the file # is copied), but the timestamp portion of the filename KEEPSTAMP=$(find "$2" -maxdepth 1 -name "$3-*-full.tar.gz" -printf "%P\n" | \ sort -r | awk --re-interval -v maxarchive="$1" -v bkname="$3" \ -v tstamp_pattern="$TSTAMP_AWK_PATTERN" \ 'BEGIN { pattern = "^" bkname "-(" tstamp_pattern ")-full\\.tar\\.gz$" numfound = 0 } { if (match($0, pattern, arr) && arr[1] != "") { numfound++ if (numfound == maxarchive) { print arr[1] exit 0 } } }') fi if [ "$4" == "trace" ]; then if [ $1 -le 0 ]; then echo "Will delete all files" elif [ -z "$KEEPSTAMP" ]; then echo "Will not delete any files" else echo "Will delete files older than KEEPSTAMP: $KEEPSTAMP" fi fi if [ $1 -gt 0 ] && [ -z "$KEEPSTAMP" ]; then return 0 fi # Delete all the files for our bkname that are older than KEEPSTAMP DELETE_FILE_LIST=$(find "$2" -maxdepth 1 -name "$3-*" -printf "%P\n" | \ sort -r | \ awk --re-interval -v maxarchive="$1" -v archdir="$2" -v bkname="$3" -v trace="$4" -v keepstamp="$KEEPSTAMP" \ -v tstamp_pattern="$TSTAMP_AWK_PATTERN" \ 'BEGIN { pattern = "^" bkname "-(" tstamp_pattern ")-(full|diff|incr)\\.(tar\\.gz|log)$" doTrace = (trace == "trace") } # Find all the files for this bkname. Of these files: # If maxarchive is 0, delete all files # Otherwise, delete all files with a timestamp less than keepstamp # (that is, all files older than oldest one we need to retain) { if (match($0, pattern, arr)) { doDelete = (maxarchive == 0 || (keepstamp != "" && arr[1] != "" && arr[1] < keepstamp)) heading = "" if (doTrace) heading = (doDelete ? "delete: " : "keep: ") if (doDelete || doTrace) print heading archdir "/" $0 } }') if [ ! -z "$DELETE_FILE_LIST" ]; then if [ "$4" == "trace" ]; then echo "$DELETE_FILE_LIST" else echo -n "$DELETE_FILE_LIST" | xargs rm -f fi fi } ###################################################################### # # Main # ###################################################################### # Process command line case "$1" in -h* | --h*) echo -e "$USAGE" exit 0 ;; esac if [ $# -lt 4 ]; then echo -e "$BRIEF_USAGE" exit 1 fi BKTYPE="$1" BKNAME="$2" DEFDIR="$3" DESTDIR="$4" DATADIR="$DESTDIR/current" ARCHDIR="$DESTDIR/archive" TSTAMPDIR="$DESTDIR/tstamp" case "$BKTYPE" in full | diff | incr) ;; *) errExit "Unknown backup type: $BKTYPE" ;; esac shift 4 NOTIFY_DEST="" COLLECTDIR="" MAXARCHIVE="4" MAXCOLLECT="2" while [ ! -z "$1" ]; do case "$1" in -n*) NOTIFY_DEST="$2" if [ -z "$NOTIFY_DEST" ]; then errExit "The notify option requires a notification address" fi shift 2 ;; -c*) COLLECTDIR="$2" if [ -z "$COLLECTDIR" ]; then errExit "The collect option requires a collectdir parameter" fi shift 2 ;; -maxa*) MAXARCHIVE="$2" if [ -z "$MAXARCHIVE" ]; then errExit "The maxarchive option requires a number parameter" fi if ! echo "$MAXARCHIVE" | egrep -q "^[0-9]{1,3}$"; then errExit "The maxarchive number parameter must be an integer between 0 and 999" fi shift 2 ;; -maxc*) MAXCOLLECT="$2" if [ -z "$MAXCOLLECT" ]; then errExit "The maxcollect option requires a number parameter" fi if ! echo "$MAXCOLLECT" | egrep -q "^[0-9]{1,3}$"; then errExit "The maxcollect number parameter must be an integer between 0 and 999" fi shift 2 ;; *) errExit "Unrecognized option: $1" ;; esac done # Compute variables # There are two timestamp formats. # # The internal format is recorded inside the "tstamp" file, for usage by tar: # # Format: YYYY-MM-DD hh:mm:ss ZON # Example: 2003-04-07 17:22:45 PDT # # The external format is used as part of the backup filename. It is the same # as the tstamp format, except that the punctuation characters are changed to # make the timestamp more suitable for use in a filename, and the time zone # is dropped. # # Format: YYYY-MM-DD-hhmmss # Example: 2003-04-07-172245 TSTAMP_INTERNAL=$(date '+%Y-%m-%d %T %Z') TSTAMP_EXTERNAL=$(echo $TSTAMP_INTERNAL | awk '{ gsub(/ ...$/, ""); gsub(/ /, "-"); gsub(/:/, ""); print }') BKFILE_BASE="$BKNAME-$TSTAMP_EXTERNAL-$BKTYPE" BKFILE="$DATADIR/$BKFILE_BASE.tar" LOGFILE="$DATADIR/$BKFILE_BASE.log" INCLFILE="$DEFDIR/$BKNAME-list.sh" EXCLFILE="$DEFDIR/$BKNAME-exclude.txt" TSTAMPFILE_FULL="$TSTAMPDIR/$BKNAME-tstamp-full.txt" TSTAMPFILE_PARTIAL="$TSTAMPDIR/$BKNAME-tstamp-partial.txt" if [ "$BKTYPE" = "full" ]; then TSTAMPFILE="$TSTAMPFILE_FULL" else TSTAMPFILE="$TSTAMPFILE_PARTIAL" fi # Validate state of backup and include files if [ -e "$BKFILE" ]; then errExit "Backup file already exists: $BKFILE" fi if [ ! -e "$INCLFILE" ]; then errExit "Missing list of files to backup: $INCLFILE" fi # The EXCLUDE_CLAUSE and NEWER_CLAUSE are only needed sometimes. But because # they are quoted on the tar command line (e.g., "$EXCLUDE_CLAUSE"), tar # complains if they are empty (""). Therefore, we stuff them with a default # dummy value that is harmless: a repetition of the --create parameter that is # already being used on the tar command line. EXCLUDE_CLAUSE="--create" if [ -e "$EXCLFILE" ]; then EXCLUDE_CLAUSE="--exclude-from=$EXCLFILE" fi NEWER_CLAUSE="--create" # For a differential backup, our timestamp is from the most recent full backup if [ "$BKTYPE" = "diff" ]; then if [ ! -e "$TSTAMPFILE_FULL" ]; then errExit "You must do a full backup before you can do a differential backup" fi NEWER_CLAUSE="--newer=$(cat "$TSTAMPFILE_FULL")" # For an incremental backup, our timestamp is from the most recent backup of any kind elif [ "$BKTYPE" = "incr" ]; then if [ ! -e "$TSTAMPFILE_FULL" ] && [ ! -e "$TSTAMPFILE_PARTIAL" ]; then errExit "You must do a full backup before you can do an incremental backup" fi TSTAMP_FULL="" TSTAMP_PARTIAL="" if [ -e "$TSTAMPFILE_FULL" ]; then TSTAMP_FULL="$(cat "$TSTAMPFILE_FULL")" fi if [ -e "$TSTAMPFILE_PARTIAL" ]; then TSTAMP_PARTIAL="$(cat "$TSTAMPFILE_PARTIAL")" fi # If only one tstamp exists, use that one if [ -z "$TSTAMP_FULL" ]; then TSTAMP_MOST_RECENT="$TSTAMP_PARTIAL" elif [ -z "$TSTAMP_PARTIAL" ]; then TSTAMP_MOST_RECENT="$TSTAMP_FULL" else # Both files exist, use the most recent if [ "$TSTAMPFILE_FULL" -nt "$TSTAMPFILE_PARTIAL" ]; then TSTAMP_MOST_RECENT="$TSTAMP_FULL" else TSTAMP_MOST_RECENT="$TSTAMP_PARTIAL" fi fi NEWER_CLAUSE="--newer=$TSTAMP_MOST_RECENT" fi # Create the destination directories if they do not exist yet mkdir -p "$DATADIR" mkdir -p "$TSTAMPDIR" # If this is a full backup, archive the existing files (if any) for this # backup name, and remove any old archive files over the max number that # we need to keep if [ "$BKTYPE" = "full" ]; then moveFilesToArchive "$DATADIR" "$ARCHDIR" "$BKNAME" removeAgedFiles "$MAXARCHIVE" "$ARCHDIR" "$BKNAME" fi # Do the backup . "$INCLFILE" | \ tar \ --create \ --verbose \ "$EXCLUDE_CLAUSE" \ "$NEWER_CLAUSE" \ --files-from=- \ "--file=$BKFILE" \ >"$LOGFILE" 2>&1 if [ ! -e "$BKFILE" ]; then errExit "Backup failed: no file created: $BKFILE" fi # Run the gzip command separately instead of using the -z or --gzip option # on the tar command. This maintains the original name of the .tar file, # even if the .tar.gz file file is renamed. gzip "$BKFILE" BKFILE_GZ="$BKFILE.gz" if [ ! -e "$BKFILE_GZ" ]; then errExit "Backup failed: no gzip file created: $BKFILE_GZ" fi # Update the tstamp file (this only happens if the backup is # successful - if it failed, we would have exited before now if [ "$BKTYPE" = "full" ]; then echo "$TSTAMP_INTERNAL" >$TSTAMPFILE_FULL rm -f $TSTAMPFILE_PARTIAL else echo "$TSTAMP_INTERNAL" >$TSTAMPFILE_PARTIAL fi # For Windows / Cygwin, it takes a moment for the file size to be updated if [ ! -z "$NOTIFY_DEST" ] || [ ! -z "$COLLECTDIR" ]; then case "$OSTYPE" in cygwin* ) sleep 15 ;; esac fi if [ ! -z "$NOTIFY_DEST" ]; then # Get the number of files (not counting directories) in the tar file. # It would be much faster to do this by grepping the log file instead # of reading the tar file, but reading the tar file is a good validation # that we have created a usable file. # # Fast but non-validating: NUMFILES=$(cat "$LOGFILE" | grep -v '/$' | wc -l | awk '{ gsub(/ /, ""); print }') NUMFILES=$(tar -tzf "$BKFILE_GZ" | grep -v '/$' | wc -l | awk '{ gsub(/ /, ""); print }') # Get the file size TARSIZE=$(du -k "$BKFILE_GZ" | cut -f 1) # Report the summary SUMMARY="$BKFILE_BASE.tar.gz: $NUMFILES files, $TARSIZE KB" if [ "$NOTIFY_DEST" = "-" ]; then echo "$SUMMARY" elif [ ! -z "$FSBACKUP_MAIL" ]; then "$FSBACKUP_MAIL" "$SUMMARY" "$NOTIFY_DEST" else mail -s "$SUMMARY" "$NOTIFY_DEST" <