pcp

#!/usr/bin/env python
# Parallel cp program
# Copyright (c) Genome Research Ltd 2012
# Author Guy Coates <guy.coates@gmail.com>
# This program is released under GNU Public License V2 (GPLv2)

""" This program copies a directory tree in parallel.

Algorithm:

pcp runs in two phases. Phase I is a parallel walk of the file tree, involving all
MPI ranks in a peer-to-peer algorithm. The walk constructs the list of files to be 
copied and creates the destination directory higherarcy.

In phase II, the actual files are copied. Phase II uses a master-slave algorithm.
R0 is the master and dispatches file copy instructions to the slaves (R1...Rn).
Although slightly less efficient than the peer-to-peer algorithm in phase I,
using master-slave simplifies the checkpoint/restore implementation.

As stat is a relatively slow operation on lustre, the code jumps through some
hoops to avoid doing stats during Phase I.

"""

#import rpdb2
#rpdb2.start_embedded_debugger("XXXX", fAllowRemote=True,timeout=10)

import argparse
import hashlib
import fnmatch
import os
import stat
import sys
import traceback
import time
import ctypes
import sqlite3
import pickle
import math
import random
import signal
import gzip

try:
    from pcplib import lustreapi
    WITHLUSTRE = True
except:
    WITHLUSTRE = False

from pcplib import parallelwalk
from pcplib import statfs
from pcplib import safestat
from collections import deque
from mpi4py import MPI
import pkg_resources
import errno

try:
    __version__ = pkg_resources.require("pcp")[0].version
except pkg_resources.DistributionNotFound:
    __version__ = "UNRELEASED"

clib = ctypes.CDLL("libc.so.6", use_errno=True)

class Timer:
    """Simple timer / stopwatch class."""
    def __init__(self):
        self.running = False
        self.elapsedtime = 0
        self.stoptime = 0
        self.starttime = 0

    def reset(self):
        """Reset the timer to 0."""
        self.__init__()

    def start(self):
        """Start the timer."""
        self.running = True
        self.starttime = time.time()

    def stop(self):
        """Stop the timer."""
        self.running = False
        self.stoptime = time.time()
        self.elapsedtime += self.stoptime - self.starttime

    def read(self):
        """Get the elapsed time."""
        if self.running:
            return (time.time() - self.starttime) + self.elapsedtime
        else:
            return self.elapsedtime

def timestamp():
    return time.strftime("%b %d %H:%M:%S")


def createDB():
# This database holds all the information about files to be copied,
# their checksums as well as the state of the copy.
# State 
# 0 Not copied.
# 1 Dispatched for copy.
# 2 Copy complete.
# 3 Dispatched for md5
# 4 md5 complete

    filedb = sqlite3.connect(":memory:")
    filedb.text_factory = str
    filedb.execute("""CREATE TABLE FILECPY(
ID INTEGER PRIMARY KEY AUTOINCREMENT,
SORTORDER INTEGER DEFAULT -1,
FILENAME TEXT,
STATE INTEGER DEFAULT 0,
SRCMD5 TEXT,
SIZE INTEGER,
CHUNKS INTEGER DEFAULT -1,
ATTEMPTS INTEGER DEFAULT 0,
LASTRANK INTEGER DEFAULT 0)""")
    filedb.execute("""CREATE INDEX COPY_IDX ON FILECPY(STATE, SORTORDER, LASTRANK)""")
    # Table to hold program arguments
    filedb.execute("""CREATE TABLE ARGUMENTS(
ID INTEGER PRIMARY KEY AUTOINCREMENT,
ARGS BLOB)""")
    return(filedb)

# Dump the database out to disk
def dumpDB(statedb, filename):
    tmpfile = filename+"__PARTIAL__"
    dbfile = gzip.open(tmpfile, "wb")
    for l in statedb.iterdump():
        dbfile.write(l + "\n")
    dbfile.close()
    os.rename(tmpfile, filename)

# Restore the database state from a previous run so we
# can resume a copy.
def restoreDB(filename):
    filedb = sqlite3.connect(":memory:")
    filedb.text_factory = str
    dumpfile = gzip.open(filename, "rb")
    filedb.executescript(dumpfile.read())
    filedb.commit()
    dumpfile.close()
    argp = filedb.execute("SELECT ARGS FROM ARGUMENTS WHERE ID == 1").fetchone()
    args = pickle.loads(argp[0])

    filedb.execute("UPDATE FILECPY SET STATE = 0 WHERE STATE = 1;")
    filedb.execute("UPDATE FILECPY SET STATE = 2 WHERE STATE = 3;")
    filedb.execute("UPDATE FILECPY SET ATTEMPTS = 0;")
    filedb.execute("UPDATE FILECPY SET LASTRANK = 0;")
    return(filedb, args)
    

def parseargs():
    parser = MPIargparse(description=
                                     "Copy a directory tree in parallel",
                                     formatter_class =
                                     argparse.RawDescriptionHelpFormatter,
                                   epilog="""

This program traverses a directory tree and copies files in the tree in
parallel. It does not copy individual files in parallel. It should be invoked
via mpirun.

If run with the -l flag or -lf flags pcp will be stripe aware. -l will cause
stripe information to be copied from the source files and directories. -lf will 
cause all files and directories on the destination to be striped, regardless of
the striping on the source.

Striping behaviour can be further modified with -ls and -ld. A minimum file size
can be set with -ls. Files below this size will not be striped, regardless of
the souce striping. -ld will cause all directories to be unstriped.

-l requires that the source and destination filesystems must be lustre.
-lf can be used when only the destination filesystem is lustre.

For maximum efficiency, ensure tasks are spread across as many different
machines as possible to prevent local network bottlenecks.

""")


    parser.add_argument("SOURCE", help="source directory", nargs="?")
    parser.add_argument("DEST", help="destination directory", nargs="?")

    parser.add_argument("-b", 
                        help="Copy files larger than C Mbytes in C Mbyte chunks",
                        default=500, type=int, metavar="C")

    parser.add_argument("-c", help="verify copy with checksum", default=False,
                        action="store_true")
    parser.add_argument("-d", help="dead worker timeout (seconds)", default=10,
                        type=int)
    parser.add_argument("-g", help="only copy files matching glob",
                        default=None)
    parser.add_argument("-i",
                        help=("Create incremental backup with hard links to PREVBKUP."
                              " Files are compared by mode, owner, mtime and size."
                              " If a source file matches the corresponding file in directory PREVBKUP,"
                              " the file in PREVBKUP is hard linked to the destination."
                              " Otherwise a source file with no matching destination file"
                              " is copied from source to destination."),
                        type=str, metavar="PREVBKUP", default=None)
    parser.add_argument("-n", "--dry-run",
                        help="perform a trial run with no copies made",
                        action="store_true", default=False)
    parser.add_argument("-t",
                        help="retry file copies N times in case of IO errors",
                        type=int, metavar="N", default=3)
    parser.add_argument("-p",
                        help=("preserve permissions and timestamps,"
                              " and ownership if running as root"),
                        default=False, action="store_true")
    parser.add_argument("-v", help="verbose", default=False,
                        action="store_true")
    parser.add_argument("-V", "--version", help="print version number",
                        action='version',
                        version=os.path.basename(sys.argv[0]) + \
                            " version " + __version__)
    group = parser.add_mutually_exclusive_group()
    group.add_argument("-l", help="copy lustre stripe information",
                       default=False, action="store_true")
    group.add_argument("-lf",
                       help=("Force striping of all files and directories. Can be combined"
                             " with -ls and -ld."), default=False, action="store_true")
    parser.add_argument("-ls",
                        help=("do not stripe files smaller than B "
                              "bytes. Implies -l. Size can be suffixed"
                              "with k,M,G,T,P"), metavar="B", default=0)
    parser.add_argument("-ld",
                        help="Do not stripe diretories.", default=False,
                        action="store_true")
    parser.add_argument("-u",
                        help="Copy only when the source file is newer than the destination file,"
                        " or the destination file is missing.", default=False, action="store_true")

    parser.add_argument("-R",
                        help=("Restart a copy from a checkpoint file DUMPFILE."),
                        type=str, metavar="DUMPFILE", default=None)
    parser.add_argument("-Rv",
                        help=("Verify previous copy from a checkpoint file DUMPFILE."),
                        type=str, metavar="DUMPFILE", default=None)

    parser.add_argument("-K",
                       help=("Enable and write checkpoints to file DUMPFILE."),
                       type=str, metavar="DUMPFILE", default=None)
    parser.add_argument("-Km",
                        help=("Checkpoint every N minutes."),
                        type=int, metavar="N", default=60)
    parser.add_argument("-Kx",
                        help=("Checkpoint before exit to retain history of transfer"),
                        default=False, action="store_true")

    if len(sys.argv) == 1:
        parser.print_help()
        Abort()
        
    args = parser.parse_args()

    # setting blocksize = 0 will disable chunk copying.
    if args.b == 0:
        args.b = INFINITY

    if not args.SOURCE and  not (args.R or args.Rv):
        print "You must specify a source directory!"
        parser.print_help()
        Abort()

    if not args.DEST and  not (args.R or args.Rv):
        print "You must specify a destination directory!"
        parser.print_help()
        Abort()

    if args.ls != 0:
        args.ls = SIConvert(args.ls)
        if args.ls == -1:
            print "Error: incorrect size specification."
            Abort()
    return(args)

def Abort():
    if rank == 0 and STARTEDCOPY and DUMPDB:
        try:
            print "Attempting write state database to %s..." %(DUMPDB),
            dumpDB(statedb, DUMPDB)
            print "Done."
        except IOError as dberr:
            print "FAILED!" 
            print dberr

    """Clean down all the MPI Processes."""
    MPI.COMM_WORLD.Abort(1)
    exit (1)

def sanitycheck(sourcedir, destdir):
    """Perform some sanity checks, including creating the destination
    directory if it does not exist and ensuring excessive parallelism is not
    used."""
    realsource = os.path.realpath(sourcedir)
    realdest = os.path.realpath(destdir)
    if realsource == realdest:
        print 
        print ("ERROR: Source and destination directory are the same!")
        print
        Abort()
        
    if not WITHLUSTRE and (LSTRIPE or FORCESTRIPE or NODIRSTRIPE or MINSTRIPESIZE):
        print
        print ("Error: Lustre stripe options specified but lustreapi is not available.")
        print
        Abort()

    # If we are using lustre stripe options, check that the source and
    # destination filesystems are lustre.
    if LSTRIPE:
        fstype = statfs.fstype(sourcedir)
        if fstype != lustreapi.LUSTREMAGIC:
            print ""
            print ("ERROR: You have asked me to copy lustre striping attributes, but"
                   " %s is not a lustre directory.") % sourcedir
            print "Exiting."
            Abort()
    # The destination might not exist yet, so walk up the path until we find the
    # mountpoint.
    if LSTRIPE or FORCESTRIPE:
        path = realdest
        while not os.path.ismount(path):
            path = os.path.dirname(path)
        fstype = statfs.fstype(path)
        if fstype != lustreapi.LUSTREMAGIC:
            print ""
            print ("ERROR: You have asked me to set lustre striping attributes, but"
            "%s is not a lustre filesystem") % path
            print "Exiting."
            Abort()

def scantree(sourcedir, destdir, statedb):
    """walk the src file tree, create the destination directories and put the
    files to be copied into the database."""
    totaldirs = 0
    totalscanned = 0

    if rank == 0:
        startime = time.time()
        if not  os.path.isdir(sourcedir):
            print "R%i: Error: %s not a directory" % (rank, sourcedir)
            Abort()

    # results are ([directories][files to be copied ][total files])
    # FIXME: change to a proper data structure.
    walker = copydirtree(comm, results=[[],[],0])
    listofpaths = walker.Execute(sourcedir)

    if rank == 0:
        for l in listofpaths:
            for f in l[1]:
                statedb.execute("""INSERT INTO FILECPY (FILENAME) VALUES (?)""",
                            (f,))

            totaldirs += len(l[0])
            totalscanned += l[2]
            
        endtime = time.time()
        walltime = endtime - startime
        totalfiles = statedb.execute("SELECT COUNT(*) FROM FILECPY").fetchone()[0]

        rate = (totalfiles + totaldirs) / walltime
        walltime = time.strftime("%H hrs %M mins %S secs",
                                     time.gmtime(walltime))
        print ("Phase I done: Scanned %i files, %i dirs in %s"
               " (%.0f items/sec)."
               % (totalscanned, totaldirs, walltime, rate))
        print " %i files will be copied." %totalfiles
        # Shuffle rows. If we don't do this, chunks of files tend to be copied at
        # the same time, causing hot OSTs in the case of unstriped files.
        statedb.execute("""UPDATE FILECPY SET SORTORDER = ABS(RANDOM() % ?)""",
                        (totalfiles,))
    return()

def fadviseSeqNoCache(fileD):
    """Advise the kernel that we are only going to access file-descriptor
    fileD once, sequentially."""
    POSIX_FADV_SEQUENTIAL = 2
    POSIX_FADV_DONTNEED = 4
    offset = ctypes.c_int64(0)
    length = ctypes.c_int64(0)
    clib.posix_fadvise(fileD, offset, length, POSIX_FADV_SEQUENTIAL)
    clib.posix_fadvise(fileD, offset, length, POSIX_FADV_DONTNEED)

def md5copy(src, dst, blksize, MD5SUM, chunk):
    """Combined copy / md5 calcuation function. Copies data from src to dst in
    blksize chunks. If MD5SUM is true, it also calculates the md5sum of the
    source file. Returns the md5sum of the source and the number of bytes copied."""
    md5hash = hashlib.new("md5")
    bytescopied = 0
    infile = open(src, "rb")

    if chunk < 0:
        # Copy the file in one go:
        outfile = open(dst, "wb")
        fadviseSeqNoCache(infile.fileno())
        fadviseSeqNoCache(outfile.fileno())
        while True:
            data = infile.read(blksize)
            if not data:
                break
            outfile.write(data)
            bytescopied += len(data)
            if MD5SUM:
                md5hash.update(data)
    
    else:
        # copy CHUNKSIZE bytes:
        outfile = open(dst, "r+")
        fadviseSeqNoCache(infile.fileno())
        fadviseSeqNoCache(outfile.fileno())
        infile.seek(chunk*CHUNKSIZE)
        outfile.seek(chunk*CHUNKSIZE)
        
        nreads, remainder = divmod(CHUNKSIZE, blksize)
        for i in xrange(nreads):
            data = infile.read(blksize)
            outfile.write(data)
            bytescopied += len(data)
            if MD5SUM:
                md5hash.update(data)
        if remainder > 0:
            data = infile.read(remainder)
            outfile.write(data)
            bytescopied += len(data)
            if MD5SUM:
                md5hash.update(data)

    infile.close()
    outfile.close()
    digest = md5hash.hexdigest()

    if not MD5SUM:
        digest = None
    return(digest, bytescopied)

def createstripefile(src, dst, size):
    """Create a file dst with the lustre stripe information copied from src, unless 
    filesystem is < size, in which case we set the striping to 1."""
    stripestatus = 0
    if LSTRIPE:
        layout = lustreapi.getstripe(src)
    if (LSTRIPE and layout.isstriped()) or FORCESTRIPE:
        if size < MINSTRIPESIZE:
            stripestatus = -1
            count = 1
        else:
            stripestatus = 1
            count = -1
    else:
        count = 1
    if not DRYRUN:
        try:
            lustreapi.setstripe(dst, stripecount=count)
        except IOError, error:
            if error.errno == errno.EEXIST:
                # file exists; blow it away and try again...
                os.remove(dst)
                lustreapi.setstripe(dst, stripecount=count)
            else:
                raise

    return(stripestatus)

def calcmd5(filename, chunk):
    """calculate the md5sum of a file. Returns a tuple of  (md5sum,amount of
    data checksummed), or (None,0) in the case of symlinks."""
    md5hash = hashlib.new("md5")

    # Use the optimal blocksize for IO.
    filestat = safestat.safestat(filename)
    blksize = filestat.st_blksize
    mode = filestat.st_mode

    byteschecked = 0

    if stat.S_ISLNK(mode):
        return(None, byteschecked)

    fh = open(filename, "rb")
    fadviseSeqNoCache(fh.fileno())
    # MD5 the whole file
    if chunk < 0:
        while True:
            data = fh.read(blksize)
            byteschecked += len(data)
            if not data:
                break
            md5hash.update(data)

    else:
        #MD5 just our chunk
        fh.seek(chunk*CHUNKSIZE)
        nreads, remainder = divmod(CHUNKSIZE, blksize)
        for i in xrange(nreads):
            data = fh.read(blksize)
            md5hash.update(data)
            byteschecked += len(data)
        if remainder > 0:
            data = fh.read(remainder)
            md5hash.update(data)
            byteschecked += len(data)
            
    fh.close()
    digest = md5hash.hexdigest()
    return(digest, byteschecked)


def ConsumeWork(sourcedir, destdir):
    """Listen for work from the dispatcher and copies/md5sums files as
    appropriate. When send the SHUTDOWN message the worker will send
    performance stats back to the master."""

    filescopied = 0
    md5done = 0
    bytescopied = 0
    byteschksummed = 0
    md5timer = Timer()
    copytimer = Timer()

    # Poll for work.
    while True:
        msg = comm.recv(source=0, tag=1)
        action = msg[0]
        if action == "SHUTDOWN":
            break
        (filename, idx, chunk) = msg[1]
        md5sum = None
        destination = mungePath(sourcedir, destdir, filename)

        if action == "COPY":
            copytimer.start()
            try:
                size, speed, md5sum, stripestatus, status = \
                    copyFile(filename, destination, chunk)

            except (IOError, OSError) as error:
                speed = 0
                size = 0
                stripestatus = 0
                # permission denied errors are not fatal. Skip over the file
                # and carry on.
                if error.errno == errno.EACCES:
                    status = 3
                # File might have moved whilst we copied it!
                elif error.errno == errno.ENOENT:
                    status = 5
                else:
                    status = 1

            if status == 0 or status == 4 or status == 7:
                bytescopied += size
                filescopied += 1
            msg = ("COPYRESULT",( md5sum, idx, rank, status, speed,
                                  size, stripestatus))
            comm.send(msg, dest=0, tag=1)
            copytimer.stop()

        if action == "MD5":
            md5timer.start()
            if DRYRUN:
                size = 0
                status = 0
                md5sum = "DEADBEAFdeadbeafDEADBEAFdeadbeaf"
            else:
                try:
                    md5sum, size = calcmd5(destination, chunk)
                    status = 0
                except (IOError, OSError):
                    size = 0
                    status = 1
            msg = ("MD5RESULT", (md5sum, idx, rank, status, None, None, None))
            comm.send(msg, dest=0, tag=1)
            md5done += 1
            byteschksummed += size
            md5timer.stop()

    # Return stats
    comm.gather((filescopied, md5done, bytescopied, byteschksummed,
                 copytimer.read(), md5timer.read()), root=0)
    
    return(0)

def checkAlive(rank, workers, timeout):
    """Quirky farm nodes can cause the MPI runtime to lock up during the task
    spawn. This routine checks whether nodes can exchange messages. If a node
    has not responded after timeout seconds we bail."""

    if rank > 0:
        msg = ("ALIVE", (rank,))
        comm.send(msg, dest=0, tag=3)
    else:
        expectedworkers = set(range(1, workers))
        aliveworkers = set()
        giveuptime = time.time() + timeout
        while time.time() < giveuptime:
            if comm.Iprobe(source=MPI.ANY_SOURCE, tag=3):
                msg = comm.recv(source=MPI.ANY_SOURCE, tag=3)
                status = msg[0]
                rank = msg[1][0]

                aliveworkers.add(rank)
                if len(aliveworkers) == len(expectedworkers):
                    print "R0: All workers have reported in."
                    return
        print ("Error: The following workers did not report in after"
               " %i seconds") % timeout
        awol = expectedworkers.difference(aliveworkers)
        for i in awol:
            print "R%i" % i
        Abort()

def DispatchWork(statedb):
    """The dispatcher sends  copy/md5 tasks out to idle workers. If copy/md5
    tasks fail the dispatcher will re-queue them for retries."""

    global WARNINGS
    global CHECKPOINTNOW
    global COPYREMAINS
    global MD5REMAINS
    global TOTALROWS
    global RVERRORS

    # Queue containing worker who are ready for work.
    idleworkers = deque()
    idleworkers.extend(range(1, workers))
    # Start the checkpoint timer
    if DUMPDB:
        cptimer = Timer()
        cptimer.start()
    TOTALROWS = statedb.execute \
        ("""SELECT COUNT(*) FROM FILECPY""").fetchone()[0]

    if VERIFY:
        COPYREMAINS = 0
        MD5REMAINS = statedb.execute \
        ("""SELECT COUNT(*) FROM FILECPY WHERE STATE == 4""").fetchone()[0]
        for errfile, chunk in statedb.execute \
          ("SELECT FILENAME, CHUNKS FROM FILECPY WHERE STATE < 4"):
            RVERRORS += 1
            destfile = mungePath(sourcedir, destdir, errfile)
            if chunk < 0:
                print "COPYFAIL:%s" % destfile
            else:
                print "COPYFAIL,%d:%s" % (chunk,destfile)
    else:
        COPYREMAINS = statedb.execute \
            ("""SELECT COUNT(*) FROM FILECPY WHERE STATE == 0""").fetchone()[0]
        if MD5SUM:
	    MD5REMAINS = statedb.execute \
	    ("""SELECT COUNT(*) FROM FILECPY WHERE STATE < ?""",(ENDSTATE,)).fetchone()[0]
        else:
            MD5REMAINS = 0

    # loop until we have no more work to send.
    while COPYREMAINS > 0 or MD5REMAINS > 0:
        # See if we need to checkpoint
        if DUMPDB and not VERIFY:
            if cptimer.read() > DUMPINTERVAL:
                print "RO: Writing checkpoint to %s..." %DUMPDB,
                dumpDB(statedb, DUMPDB)
                print "Done"
                cptimer.reset()
                cptimer.start()

        if CHECKPOINTNOW and not VERIFY:
            if not DUMPDB:
                dumpfile = "pcp_checkpoint.db"
            else:
                dumpfile = DUMPDB
            print "R0: SIGUSR1: Writing checkpoint to %s..." %dumpfile,
            dumpDB(statedb, dumpfile)
            print "Done"
            CHECKPOINTNOW = False

        # Listen for workers reporting in and deal with the results
        if comm.Iprobe(source=MPI.ANY_SOURCE, tag=1):
            msg = comm.recv(source=MPI.ANY_SOURCE, tag=1)
            action = msg[0]
            payload = msg[1]

            workerrank = msg[1][2]
            idleworkers.appendleft(workerrank)

            if action == "COPYRESULT":
                processCopy(statedb, payload)

            if action == "MD5RESULT":
                processMD5(statedb, payload)

        # try for dispatch
        if len(idleworkers) > 0:
            worker = idleworkers.pop()

            if VERIFY:
                task = statedb.execute("SELECT FILENAME, ID, CHUNKS FROM FILECPY WHERE STATE == 4 ORDER BY SORTORDER LIMIT 1").fetchone()
                if task:
                    statedb.execute("""UPDATE FILECPY SET STATE = 5 WHERE ID = ?""",(task[1],))
                    msg = ("MD5", (task[0], task[1], task[2]))
                    comm.send(msg, dest=worker, tag=1)
                    continue

            else:
		# 2 workers is a special case; we can't do MD5sum or retries on
		# a different nodes, as we only have 1 worker node.
		if workers == 2:
		    lastrank = -1
		else:
		    lastrank = worker
		task = statedb.execute("""SELECT FILENAME, ID, CHUNKS FROM FILECPY WHERE STATE == 0 AND
				      LASTRANK <> ? ORDER BY SORTORDER LIMIT 1""",(lastrank, )).fetchone()
		if task:
		    statedb.execute("""UPDATE FILECPY SET STATE = 1 WHERE ID = ?""",(task[1],))
		    msg = ("COPY", (task[0], task[1], task[2]))
		    comm.send(msg, dest=worker, tag=1)
		    continue

		if MD5SUM:
		    task = statedb.execute("""SELECT FILENAME, ID, CHUNKS FROM FILECPY WHERE STATE == 2 AND
			   LASTRANK <> ? ORDER BY SORTORDER LIMIT 1""",(lastrank, )).fetchone()
		    if task:
			statedb.execute("""UPDATE FILECPY SET STATE = 3 WHERE ID = ?""",(task[1],))
			msg = ("MD5", (task[0], task[1], task[2]))
			comm.send(msg, dest=worker, tag=1)
			continue

            # There is work, but not for this worker. Send to the back of the queue
            idleworkers.appendleft(worker)

    if VERBOSE:
        print "R0: No more work to do."

def processMD5(statedb, payload):
    global WARNINGS
    global COPYREMAINS
    global MD5REMAINS
    global RVERRORS

    md5sum = payload[0]
    idx = payload[1]
    workerrank = payload[2]
    status = payload[3]
    speed = payload[4]
    size = payload[5]
    stripestatus = payload[6]

    filename, attempt, srcmd5, chunk = statedb.execute("""SELECT FILENAME, ATTEMPTS, SRCMD5,
    CHUNKS FROM FILECPY WHERE ID = ?""", (idx,)).fetchone()
    if status == 0:
        if VERIFY:
            MD5REMAINS -= 1
            statedb.execute("UPDATE FILECPY SET STATE = 6 WHERE ID = ?", (idx,))
            if srcmd5 != md5sum:
                RVERRORS += 1
                destfile = mungePath(sourcedir, destdir, filename)
                if chunk < 0:
                    print "MD5FAIL:%s" % destfile
                else:
                    print "MD5FAIL,%d:%s" % (chunk,destfile)
        
        elif srcmd5 == md5sum:
            statedb.execute("""UPDATE FILECPY SET STATE = 4
                            WHERE ID = ?""", (idx,))
            MD5REMAINS -= 1
            if VERBOSE:
                if chunk < 0:
                    print "R%i: %s %s md5sum verified (%s)" \
                        % (workerrank, timestamp(), filename, md5sum)
                else:
                    print "R%i: %s %s chunk %i md5sum verified (%s)" \
                        % (workerrank, timestamp(), filename, chunk, md5sum)

        else:
            # This is bad; we got a md5 mismatch, but no IO
            # exceptions were thrown.
            attempt += 1 
            statedb.execute("""UPDATE FILECPY SET STATE = 0, SRCMD5 = NULL, ATTEMPTS = ?,
                            LASTRANK = ? WHERE ID =?""",(attempt, workerrank, idx))
            COPYREMAINS += 1
            if attempt < MAXTRIES:
                WARNINGS +=1 
                print ("R%i: %s WARNING: SILENT DATA CORRUPTION %s"
                       " md5sum  mismatch (%s:%s). Re-queuing copy %i."
                       % (workerrank, timestamp(), filename, srcmd5, md5sum, attempt))
                # TODO: Save corrupt segments of files.
                if chunk < 0:
                    corruptfile = destfile+"_CORRUPTED_%i" %attempt
                    destfile = mungePath(sourcedir, destdir, filename)
                    print ("R%i: %s Renaming corrupt file as %s for later analysis." 
                           % (workerrank, timestamp(), corruptfile))
                    os.rename (destfile, corruptfile)
            else:
                print "%s ERROR: Max number of copies reached on %s."\
                    %(timestamp(), filename)
                Abort()

    else:
        # md5 calc failed due to a detected error.
        if VERIFY:
            MD5REMAINS -= 1
            statedb.execute("UPDATE FILECPY SET STATE = 6 WHERE ID = ?", (idx,))
            RVERRORS += 1
            destfile = mungePath(sourcedir, destdir, filename)
            if chunk < 0:
                print "READFAIL:%s" % destfile
            else:
                print "READFAIL,%d:%s" % (chunk,destfile)
        else:
	    attempt += 1
	    statedb.execute("""UPDATE FILECPY SET ATTEMPTS = ?, LASTRANK = ?, STATE = 2
			    WHERE ID =?""",(attempt, workerrank, idx))
	    if attempt < MAXTRIES:
		WARNINGS += 1
		print ("R%i: %s WARNING: Error calculating destination"
		       " md5sum of %s on attempt %i. Re-trying..." \
			   %(workerrank, timestamp(), filename, attempt))
	    else:
		# Retries exceeded.
		print ("R%i %s ERROR: Max number of md5 attempts reached on %s."
		       %(timestamp(), filename))
		Abort()
    return()

def processCopy(statedb, payload):
    global WARNINGS
    global COPYREMAINS
    global MD5REMAINS
    global TOTALROWS
    
    md5sum = payload[0]
    idx = payload[1]
    workerrank = payload[2]
    status = payload[3]
    speed = payload[4]
    size = payload[5]
    stripestatus = payload[6]

    filename, attempt, chunk = statedb.execute("""SELECT FILENAME, ATTEMPTS, CHUNKS FROM FILECPY 
                        WHERE ID = ?""",(idx, )).fetchone()

    # Copy is complete. 
    if status == 0 or status == 4 or status == 7:
        statedb.execute("""UPDATE FILECPY SET STATE = 2, SRCMD5 = ?, LASTRANK = ?,
                        SIZE = ? WHERE ID = ? """,(md5sum, workerrank, size, idx))
        COPYREMAINS -= 1
        if VERBOSE:
            stripetxt = ""
            if LSTRIPE or FORCESTRIPE:
                if stripestatus == 1:
                    stripetxt = "(striped)"
                elif stripestatus == 0:
                    stripetxt = "(unstriped)"
                elif stripestatus == -1:
                    stripetxt = "(small file: ignored striping)"

            if chunk < 0:
                print "R%i: %s copied %s %s %s (%s/s)" \
                    % (workerrank, timestamp(), filename, stripetxt,
                       prettyPrint(size), prettyPrint(speed))
            else:
                print "R%i: %s copied %s chunk %i %s (%s/s)" \
                    % (workerrank, timestamp(), filename, chunk,
                       prettyPrint(size), prettyPrint(speed))

            if status == 4:
                # unable to preserve permissions
                WARNINGS += 1
                print ("R%i: %s WARNING: unable to preserve"
                       " permissions on %s") \
                       % (workerrank, timestamp(), filename)
            elif status == 7:
                # unable to preserve ownership
                WARNINGS += 1
                print ("R%i: %s WARNING: unable to preserve"
                       " ownership of %s") \
                       % (workerrank, timestamp(), filename)
    # copy failed.
    elif status ==1:
        if attempt < MAXTRIES:
            attempt += 1
            statedb.execute("""UPDATE FILECPY SET ATTEMPTS = ?,
                            LASTRANK = ?, STATE = 0 WHERE ID = ? """,(attempt, 
                                                                     workerrank, idx))
            WARNINGS += 1
            print ("R%i: %s WARNING: Error copying %s on attempt %i"
                   " Retrying..."
                   % (workerrank, timestamp(), filename,
                      attempt))
        else:
            print "%s ERROR: Max number of copies reached on %s" \
                %(timestamp(), filename)
            Abort()

    # Copy failed permenantly but non-fatally. Mark as done without bothering to retry.
    elif status == 2:
        # nonstandard filetype
        statedb.execute("""UPDATE FILECPY SET STATE = ?
        WHERE ID = ? """,(ENDSTATE, idx))
        COPYREMAINS -= 1
        MD5REMAINS -= 1
        WARNINGS +=1 
        print "R%i: %s WARNING: unable to copy %s (%s). Skipping..." \
            % (workerrank, timestamp(), filename, md5sum)
    elif status == 3:
        # permission denied
        statedb.execute("""UPDATE FILECPY SET STATE = ? 
        WHERE ID = ? """,(ENDSTATE, idx))
        COPYREMAINS -= 1
        MD5REMAINS -= 1
        WARNINGS += 1
        print "R%i: %s WARNING: permission denied on %s. Skipping..." \
            % (workerrank, timestamp(), filename)

    elif status == 5:
        # File does not exist. We do retry here, as we might be on
        # a node that does not have the FS mounted.
        if attempt < MAXTRIES:
            attempt += 1
            statedb.execute("""UPDATE FILECPY SET STATE = 0, ATTEMPTS = ?,
                            LASTRANK = ? WHERE ID = ?""", 
                            (attempt, workerrank, idx))
            WARNINGS += 1
            print ("R%i: %s WARNING: %s No such file or directory"
                   " attempt %i. Retrying..."
                   % (workerrank, timestamp(), filename, attempt))
        else:
            # Treat non-existance as a non-fatal error.
            # The user might simply have moved the file during the copy
            statedb.execute("""UPDATE FILECPY SET STATE = ?, SRCMD5 = ? 
                            WHERE ID = ? """,(ENDSTATE, md5sum, idx))
            COPYREMAINS -= 1
            MD5REMAINS -= 1
            WARNINGS += 1 
            print ("R%i: %s WARNING %s No such file or directory on "
                   "attempt %i. Maybe someone moved the file?"
                   " Skipping...") %(workerrank, timestamp(),
                                     filename, attempt)

    elif status == 6:
        chunks = int(math.ceil(size / float(CHUNKSIZE)))
        with statedb:
            for i in range(chunks):
                sortid = random.randint(0, TOTALROWS + chunks)
                statedb.execute("INSERT INTO FILECPY (FILENAME, SORTORDER, CHUNKS) VALUES (?,?,?)",
                           (filename, sortid, i))
            statedb.execute("DELETE FROM FILECPY WHERE ID = ?", (idx,))
            COPYREMAINS += chunks-1
            TOTALROWS += chunks
            if MD5SUM:
                MD5REMAINS += chunks-1

        if VERBOSE:
            stripetxt = ""
            if LSTRIPE or FORCESTRIPE:
                if stripestatus == 1:
                    stripetxt = "(striped)"
                elif stripestatus == 0:
                    stripetxt = "(unstriped)"
                elif stripestatus == -1:
                    stripetxt = "(small file: ignored striping)"
            print ("R%i: %s Large file %s: copying in %i chunks." 
                   %(workerrank, filename, stripetxt, chunks))
    return()

def ShutdownWorkers(starttime):
    """Tell workers we have no more work for them and collate the stats"""
    totalfiles = 0
    totalbytes = 0
    if VERBOSE:
        print "R0: Sending SHUTDOWN to workers"

    for r in range(1, workers):
        msg = ("SHUTDOWN",())
        comm.send(msg, dest=r, tag=1)
        if VERBOSE:
            print "rank %i shutdown" % r

    if VERBOSE:
        print "R0: Gathering results"
    data = comm.gather(0, root=0)

    # Gather the runtime statistics
    endtime = time.time()
    totalelapsedtime = endtime - starttime

    print ""
    print "Copy Statisics:"

    for r in range(1, workers):
        filescopied, md5done, bytescopied, byteschksummed, copytime, \
            md5time = data[r]
        totalfiles += filescopied
        totalbytes += bytescopied

        # If tasks did not do anything, set time=1; prevent div by
        # zero in the stats calcs below.
        if filescopied == 0:
            copytime = 1
        if md5done == 0:
            md5time = 1

        print "Rank %i copied %s in %i files (%s/s)" \
            % (r, prettyPrint(bytescopied), filescopied,
               prettyPrint(bytescopied / copytime))
        if MD5SUM:
            print "Rank %i checksummed %s in %i files (%s/s)" \
                % (r, prettyPrint(byteschksummed), md5done,
                   prettyPrint(byteschksummed / md5time))

    print ("Total data copied: %s in %i files (%s/s)" 
           %(prettyPrint(totalbytes), totalfiles,
             prettyPrint(totalbytes / totalelapsedtime)))
    print ("Total Time for copy: %s" 
           %time.strftime("%H hrs %M mins %S secs", 
                          time.gmtime(totalelapsedtime)))
    print "Warnings %i" % WARNINGS

def copyDir(sourcedir, destdir):
    """Create destdir, setting stripe attributes to be the
    same as sourcedir."""
    global WARNINGS

    # Don't worry is the destination directory already exists

    try:
        os.mkdir(destdir)
    except OSError, error:
        if error.errno != errno.EEXIST:
            print "cannot create `%s':" % destdir,
            print os.strerror(error.errno)
            WARNINGS += 1

    try:
        if LSTRIPE or FORCESTRIPE:
            layout = lustreapi.getstripe(sourcedir)
            if ( layout.isstriped or FORCESTRIPE ) and not NODIRSTRIPE:
                lustreapi.setstripe(destdir, stripecount=-1)
            else:
                lustreapi.setstripe(destdir, stripecount=1)

    except IOError, error:
        if error.errno != errno.EACCES:
            raise
        else:
            print "R%i WARNING: Unable to set striping on %s" \
                % (rank, destdir)


def fixupDirTimeStamp(sourcedir):
    walker = fixtimestamp(comm)
    walker.Execute(sourcedir)


def mungePath(src, dst, f):
    """Convert the sourcepath to the desinationpath"""
    suffix = f.partition(src)[2]
    dest = dst + suffix
    return(dest)

def copyFile (src, dst, chunk):
    """Copy a file from src to dst. The copy is lustre stripe aware.
    Returns (bytes copied,speed,md5sum,stripestatus,status).
    status = 0 # copy worked
    status = 1 # IO error
    status = 2 # non standard filetype
    status = 3 # permission denied
    status = 4 # unable to preserve permissions
    status = 5 # file does not exist
    status = 6 # file is to be copied in chunks
    status = 7 # unable to preserve ownership
    """

    md5sum = None
    speed = 0
    status = 0
    stripestatus = 0   # 0 non-striped, 1 striped, -1, ignored.
    starttime = time.time()

    srcstat = safestat.safestat(src)
    mode = srcstat.st_mode
    size = srcstat.st_size
    blksize = srcstat.st_blksize
    
    # regular files
    if stat.S_ISREG(mode):
        if size > CHUNKSIZE:
            # We've found a large file
            if chunk == -1:
                if LSTRIPE or FORCESTRIPE:
                    stripestatus = createstripefile(src, dst, size)
                # Create a spare file to fill in later.

                if not DRYRUN:
                    outfile = open(dst, "wb")
                    outfile.truncate(size)
                    outfile.close()

                return(size, 0, 0, stripestatus, 6)

        else:
            if LSTRIPE or FORCESTRIPE:
                stripestatus = createstripefile(src, dst, size)


        if DRYRUN:
            md5sum = "DEADBEAFdeadbeafDEADBEAFdeadbeaf"
            bytescopied = 0
        else:
            if PRESERVE:
                md5sum, bytescopied = md5copy(src, dst, blksize, MD5SUM, chunk)
                if os.geteuid() == 0:
		    try:
			os.chown(dst, srcstat.st_uid, srcstat.st_gid)
		    except OSError, error:
                        # Cannot preserve the ownership
			if error.errno == errno.EPERM:
			    status = 7
			else:
			    raise
                try:
                    os.chmod(dst, srcstat.st_mode)
                    os.utime(dst, (srcstat.st_atime, srcstat.st_mtime))
                except OSError, error:
                    # Cannot preserve the permissions or timestamps
                    if error.errno == errno.EPERM:
                        status = 4
                    else:
                        raise
            else:
                md5sum, bytescopied = md5copy(src, dst, blksize, MD5SUM, chunk)
            if VERBOSE:
                endtime = time.time()
                if size == 0:
                    speed = 0
                else:
                    speed = bytescopied / (endtime - starttime)
        return(bytescopied, speed, md5sum, stripestatus, status)

    # symlinks
    if stat.S_ISLNK(mode):
        if DRYRUN:
            md5sum = "DEADBEAFdeadbeafDEADBEAFdeadbeaf"
        else:
            linkto = os.readlink(src)
            try:
                os.symlink(linkto, dst)
            except OSError, error:
                if error.errno == errno.EEXIST:
                    os.remove(dst)
                    os.symlink(linkto, dst)
                else:
                    raise
            if PRESERVE:
                if os.geteuid() == 0:
		    try:
			os.lchown(dst, srcstat.st_uid, srcstat.st_gid)
		    except OSError, error:
                        # Cannot preserve the ownership
			if error.errno == errno.EPERM:
			    status = 7
			else:
			    raise
        return(0, 0, md5sum, 0, 0)

    # special files
    filemode = stat.S_IFMT(mode)
    if filemode == 0010000:
        filetype = "FIFO"
    elif filemode == 0020000:
        filetype = "character device"
    elif filemode == 0060000:
        filetype = "block device"
    elif filemode == 0140000:
        filetype = "socket"
    else:
        filetype = "unknown"
    return(0, 0, filetype, 0, 2)

def prettyPrint(bytes):
    """convert bytes to k/M/G/T etc"""
    abrevs = (
        (1<<50,"Pbytes"),
        (1<<40,"Tbytes"),
        (1<<30,"Gbytes"),
        (1<<20,"Mbytes"),
        (1<<10,"kbytes"),
        (1,"bytes")
        )

    for factor, suffix in abrevs:
        if bytes >= factor:
            break
    string = "%.*f %s" % (2, bytes / float(factor), suffix)
    return (string)


def SIConvert(amount):
    """convert human readable size to bytes (eg 1k -> 1024). Returns -1 if
    the format is unknown."""

    table = dict(K=1, k=1, M=2, G=3, T=4, P=5)

    try:
        number = int(amount)
        return(number)
    except ValueError:
        pass

    try:
        number, suffix = amount[:-1], amount[-1]
        multi = table[suffix]
        number = int(number)
    except:
        return(-1)
    return(number * (1024 ** multi))

def checkVersion():
    """Check the MPI version for known buggy versions."""
    mpivendor, mpiversion = MPI.get_vendor()
    if mpivendor == "Open MPI":
        print "WARNING: appears to have problems with openmpi on"
        print "large job sizes; if pcp hangs, consider using mpich"
        print "instead."
    return()

def distribArgs(args):
    """If we have been restarted from a checkpoint we need to 
    pass the original command line arguments to all of the workers."""
    args = comm.bcast(args, root=0)
    return(args)

class copydirtree(parallelwalk.ParallelWalk):
    """Walk the source directory tree in parallel, creating the destination tree
    as we go. Return the list of files we encountered."""
    def ProcessFile(self, filename):
        global WARNINGS
        self.results[2] += 1
        if UPDATE:
            # Get mtime of destination file:
            destination = mungePath(sourcedir, destdir, filename)
            try:
                dststat = safestat.safestat(destination)
            except OSError, error:
                # We can't access the file at the destination, so copy it.
                self.results[1].append(filename)
                return()
            # Get mtime of source file:
            try:
                srcstat = safestat.safestat(filename)
            except:
                # We can't access the source file, so skip it:
		print "Skipping source file '%s':" % filename,
		print os.strerror(error.errno)
		WARNINGS += 1
                return()
            # If source is newer, queue the file for copying:
            if srcstat.st_mtime > dststat.st_mtime:
                self.results[1].append(filename)
        elif PREVBKUP is not None:
            # Get attributes of files from sourcedir, destdir and previous backup:
            dstfile = mungePath(sourcedir, destdir, filename)
            try:
                dststat = safestat.safestat(dstfile)
            except OSError, error:
                if error.errno == errno.ENOENT:
                    dststat = None
                else:
                    raise
	    reffile = mungePath(sourcedir, PREVBKUP, filename)
	    try:
		refstat = safestat.safestat(reffile)
	    except OSError, error:
		if error.errno == errno.ENOENT:
		    refstat = None
		else:
		    raise
	    if dststat is None and refstat is None:
		# No alternative copies exist, so queue srcfile for copying:
		self.results[1].append(filename)
		return()
            try:
                srcstat = safestat.safestat(filename)
            except:
                # We can't access the source file, so skip it:
		print "Skipping source file '%s':" % filename,
		print os.strerror(error.errno)
		WARNINGS += 1
                return()
            # Decide whether to copy srcfile, hard link reffile or keep dstfile:
            if ( dststat is not None and
                 srcstat.st_mode == dststat.st_mode and
                 srcstat.st_uid == dststat.st_uid and
                 srcstat.st_gid == dststat.st_gid and
                 # Python truncates mtimes to ms resulting in incorrect comparision
                 # on filesystems which support ns mtime resolution
                 int(srcstat.st_mtime) == int(dststat.st_mtime) and
                 srcstat.st_size == dststat.st_size ):
                # src and dest seem to match, keep existing dstfile:
                return()
            elif ( refstat is not None and
                   srcstat.st_mode == refstat.st_mode and
                   srcstat.st_uid == refstat.st_uid and
                   srcstat.st_gid == refstat.st_gid and
                   int(srcstat.st_mtime) == int(refstat.st_mtime) and
                   srcstat.st_size == refstat.st_size and
                   not DRYRUN ):
                # src and ref seem to match, create hard link:
                try:
                    if dststat is None:
                        os.link(reffile, dstfile)
                    else:
                        os.remove(dstfile)
                        os.link(reffile, dstfile)
                # If we run into errors hard linking, go for a normal copy.
                except OSError as error:
                    print "Unable to hard link %s -> %s" %(reffile, dstfile)
                    print os.strerror(error.errno)
                    print "Will attempt to copy file instead."
                    WARNINGS += 1
                    self.results[1].append(filename)
                        
            else: 
                # Queue srcfile for copying,
                # removing dstfile if copying could modify another hard linked file:
                if ( dststat is not None and
                  dststat.st_nlink > 1 ):
                    os.remove(dstfile)
                self.results[1].append(filename)
        else:
            # Unconditionally queue srcfile for copying:
            self.results[1].append(filename)
        return()

    def ProcessDir(self, directoryname):
        newdir = mungePath(sourcedir, destdir, directoryname)
        self.results[0].append(newdir)
        if not DRYRUN:
            copyDir(directoryname, newdir)


class fixtimestamp(parallelwalk.ParallelWalk):
    """Walk the source directory tree and copy the timestamps to the 
    destination tree."""
    def ProcessDir(self, directoryname):
        global WARNINGS
        stat = safestat.safestat(directoryname)
        newdir = mungePath(sourcedir, destdir, directoryname)
        if not DRYRUN:
	    if os.geteuid() == 0:
		try:
		    os.chown(newdir, stat.st_uid, stat.st_gid)
		except OSError, error:
		    if error.errno == errno.EPERM:
                        print "R%i WARNING: Unable to set ownership of %s" \
                            % (rank, newdir)
                        WARNINGS += 1
		    else:
			raise
            try:
                os.chmod(newdir, stat.st_mode)
                os.utime(newdir, (stat.st_atime, stat.st_mtime))
            except OSError, error:
                if error.errno == errno.EPERM:
                    print "R%i WARNING: Unable to set permissions on %s" \
                        % (rank, newdir)
                    WARNINGS += 1
                else:
                    raise


class MPIargparse(argparse.ArgumentParser):
    """Subclass argparse so we can add a call to Abort, to tidy up MPI bits and pieces."""
    def error(self,message):
        self.print_usage(sys.stderr)
        Abort()

    def print_help(self, file=None):
        argparse.ArgumentParser.print_help(self, file=None)
        Abort()

def handler(signum, frame):
    global CHECKPOINTNOW
    if rank == 0 and STARTEDCOPY:
        CHECKPOINTNOW = True

# Main program

comm = MPI.COMM_WORLD
rank = comm.Get_rank()
workers = comm.size
hostname = os.uname()[1]
INFINITY = float("inf")
STARTEDCOPY = False  # flag to see whether we can start checkpointing.
resumed = False
VERIFY = False
# Signal handler to checkpoint on SIGUSR1
signal.signal(signal.SIGUSR1, handler)

try:
    args = parseargs()
    # Basic sanity checks for MPI.
    if rank == 0:
        checkVersion()
        if workers < 2:
            print ("ERROR: Only %i processes running. Did you invoke me via"
                   " mpirun?") % workers
            print ("This program requires at least 2 processes to run"
                   " correctly.")
            exit(0)

    # Check that we are actually alive
    timeout = args.d
    checkAlive(rank, workers, timeout)
    
    # Check to see if we are resumed from checkpoint, and restore the runtime
    # options to all ranks if we are.
    if rank == 0:
        if args.R:
            statedb, args = restoreDB(args.R)
            resumed = True
        elif args.Rv:
            statedb, args = restoreDB(args.Rv)
            if args.c:
                VERIFY = True
            else:
                print "No verification possible - no checksums in checkpoint file"
                Abort()
        else:
            resumed = False
            statedb = createDB()
            pargs = pickle.dumps(args)
            statedb.execute("""INSERT OR REPLACE INTO ARGUMENTS (ARGS, ID)
                    VALUES(?,1)""", (pargs,))
    args, resumed, VERIFY  = distribArgs((args, resumed, VERIFY))
    if rank > 0:
        statedb = None

    MD5SUM = args.c        # checksum copy
    DRYRUN = args.dry_run  # Dry run
    MAXTRIES = args.t      # number of retries on IO error
    PRESERVE = args.p      # preserve permissions etc
    LSTRIPE = args.l       # preserve lustre information
    MINSTRIPESIZE = args.ls  # don't stripe for files smaller than this
    FORCESTRIPE = args.lf   # Stripe all files regardless of source striping
    NODIRSTRIPE = args.ld # Stripe all directories regardless of source striping
    WARNINGS = 0 # number of warning
    VERBOSE = args.v    # Should we be verbose
    DUMPDB = args.K     # Checkpoint to this directory.
    DUMPINTERVAL = args.Km * 60 # Checkpoint period
    DUMPEXIT = args.Kx  # Checkpoint before exit
    CHECKPOINTNOW = False
    TOTALROWS = 0 # The total number of files and chunks to be copied.
    COPYREMAINS = 0 # remaining number of items to copy.
    MD5REMAINS = 0 # remaining number of items to md5.
    global RVERRORS  # number of files that fail verification (with option -Rv)
    RVERRORS = 0
    sourcedir = args.SOURCE.rstrip(os.path.sep) # source
    destdir = args.DEST.rstrip(os.path.sep)  # destination
    if args.i is None:
        PREVBKUP = None
    else:
        PREVBKUP = args.i.rstrip(os.path.sep) # reference for hard linking unmodified files
    glob = args.g    # only copy files matching glob
    UPDATE = args.u # Are we doing an update copy?
    CHUNKSIZE = 1024 * 1024 * args.b

    # Set the final state of process
    if MD5SUM:
        ENDSTATE = 4
    else:
        ENDSTATE = 2

    if rank == 0:
        # master process
        print "Starting %i processes." % workers

        if VERIFY:
            print ("Verifying from a checkpoint. Command line parameters"
                   " will be taken from the checkpoint file.")
            print "SOURCE %s" %sourcedir
            print "DESTINATION %s" %destdir
        else:
	    if not WITHLUSTRE:
		print "lustreapi is not available; disabling lustre specific features."

	    if resumed:
		print ("Resuming a copy from a checkpoint. Command line parameters"
		       " will be taken from the checkpoint file.")
		print "SOURCE %s" %sourcedir
		print "DESTINATION %s" %destdir

	    if UPDATE:
		print "Will only copy files if source is newer than destination"
		print " or destination does not exist."

	    if DUMPDB:
		print "Will checkpoint every %i minutes to %s" %(args.Km, DUMPDB)
		if DUMPEXIT:
		    print "Will also checkpoint on exit."
	    if LSTRIPE:
		print "Will copy lustre stripe information."

	    if args.b < INFINITY:
		print "Files larger than %i Mbytes will be copied in parallel chunks." %args.b
	    else:
		print "Chunk copying disabled: files will be copied in one go."

	    if FORCESTRIPE:
		print "Will force stripe all files."
	    if (LSTRIPE or FORCESTRIPE) and NODIRSTRIPE:
		print "Will not stripe directories."
	    if (LSTRIPE or FORCESTRIPE) and  MINSTRIPESIZE > 0:
		print "Will not stripe files smaller than %s" \
		    % prettyPrint(MINSTRIPESIZE)
	    if MD5SUM:
		print "Will md5 verify copies."

        sanitycheck(sourcedir, destdir)
        starttime = time.time()

    # All ranks take part in the scan
    if not (resumed or VERIFY):
        if rank == 0:
            print ""
            print "Starting phase I: Scanning and copying directory structure..."
        scantree(sourcedir, destdir, statedb)

    if rank == 0:
        if not (resumed or VERIFY):
            if glob:
                totalfiles = statedb.execute("SELECT COUNT(*) FROM FILECPY").fetchone()[0]
                results = statedb.execute("DELETE FROM FILECPY WHERE NOT FILENAME GLOB ?",
                                          (glob,))
                matchingfiles = statedb.execute("SELECT COUNT(*) FROM FILECPY").fetchone()[0]

                print "Will only copy files matching %s (%i of %i)" \
                    % (glob, matchingfiles, totalfiles)
        STARTEDCOPY = True
        print ""
        if resumed:
            print "Resuming phase II: Copying files..."
        elif VERIFY:
            print "Verifying against checkpoint file ..."
        else:
            print "Starting phase II: Copying files..."

        DispatchWork(statedb)
        print "Phase II done."

        STARTEDCOPY = False
        ShutdownWorkers(starttime)

    else:
        # file copy workers
        ConsumeWork(sourcedir, destdir)

    if VERIFY:
        if RVERRORS > 0:
            print "ERROR: %d files were not verified correctly." % RVERRORS
            Abort()
        else:
            exit(0)

    else:
        if rank == 0:
	    if PRESERVE:
		print
		print "Starting phase III: Setting directory timestamps..."
		starttime = time.time()
		fixupDirTimeStamp(sourcedir)
		endtime = time.time()
		walltime = time.strftime("%H hrs %M mins %S secs",
					 time.gmtime(endtime-starttime))
		print "Phase III Done. %s" %walltime

	    if DUMPDB and DUMPEXIT:
		print "Creating checkpoint of final state..."
		dumpDB(statedb, DUMPDB)
		print "Checkpoint done."
        else:
	    if PRESERVE:
		fixupDirTimeStamp(sourcedir)

    exit(0)

# We need to call MPI ABORT in our exception handler,
# otherwise the other MPI processes spin forever.
#except Exception, err:
except  (Exception, KeyboardInterrupt), err:
    print "Exception on rank %i host %s:" %(rank, hostname)
    print traceback.format_exc()
    Abort()