#!/bin/bash -e
#
# dbload [options] gbRoot databases timefile workdir
#
# Load genbank/refseq sequences.  This is a light weight script 
# that checks for new alignments and for lock files.  This is the
# common functionality for the *-dbload scripts
#
# timefile is name of time file to check for a new alignments
# being ready, relative to gbRoot
#
# Options:  
#   -inclEsts - do both ESTs and mRNAs, otherwise do just mRNAs.
#   -extFileUpdate - update gbSeq/gbExtFile entries to point to the
#    latest release.  A limit on the number to update is set in this
#    file.
#   -allowLargeDeletes - see gbDbLoadStep
#   -rebuildDerived - see gbDbLoadStep
#   -delayDays=n - delay dbload until build is at least n days older
#    than last load time.
#   -grepIndexRootDir=dir - Location to build grepIndex files. overrides genbank.conf
#    grepIndex.rootDir variable.

# errors terminate with message
set -e
trap "echo Error: `hostname` dbload failed >&2; exit 1" ERR
exec </dev/null

maxExtFileUpdate=10000000

# parse command line
inclEsts=NO
opts=""
largeDeletes=NO
delayDays=0
while [[ $1 == -* ]] ; do
    opt=$1
    shift
    case "$opt" in
        -inclEsts) 
            inclEsts=YES ;;
        -extFileUpdate) 
            opts="$opts -maxExtFileUpdate=$maxExtFileUpdate" ;;
        -allowLargeDeletes)
            opts="$opts $opt"
            largeDeletes=YES
            ;;
        -rebuildDerived)
            opts="$opts $opt" ;;
        -delayDays=*)
            delayDays=$(echo "$opt" | sed -e 's/-delayDays=//') ;;
        -grepIndexRootDir=*)
            opts="$opts $opt" ;;
        -*) echo "Error: invalid option $opt" >&2
            exit 1 ;;
    esac
done

if [ $# != 4 ] ; then
    echo "wrong # args: dbload [options] gbRoot databases timefile workdir" >&2
    exit 1
fi

gbRoot=$1
databases="$2"
timeFile=$3
workdir=$4

cd $gbRoot
. $gbRoot/lib/gbCommon.sh

verb=3

# data files and dirs
dbloadVar=var/dbload/`hostname`
lastTime=$dbloadVar/`basename $timeFile`
lastTimeTmp=$lastTime.tmp
dbloadTime=$dbloadVar/dbload.time

# use our own .hg.conf for genbank db user
# WARNING: must also be changed in etc/gbCommon.pm
if [ $(hostname) = "hgwbeta" ] ; then
    export HGDB_CONF=$gbRoot/etc/.hg.mysqlbeta.conf
elif [ $(hostname) = "hgnfs1" ] ; then
    export HGDB_CONF=$gbRoot/etc/.hg.mysqlrr.conf
    # FIXME: tmp hack during transtion to Arnolds
    opts="$opts -grepIndexRootDir=/gbdb/genbank/grepIndex"
else
    export HGDB_CONF=$gbRoot/etc/.hg.conf
fi

mkdir -p $dbloadVar

# Checking for an existing lock file, Silently exits if lock file exists
# and is less than one day old,  error if older.
gbCheckLock $dbloadVar/run/dbload.lock

# check if the build is newer that what was last loaded
if ! gbCmpTimeFiles $timeFile $lastTime $delayDays ; then
    exit 0
fi

# save build time before load
cp -f $timeFile $lastTimeTmp

if [ $inclEsts = YES ] ; then
    typeArg=""
else
    typeArg="-type=mrna"
fi

# update databases
nice gbDbLoadStep -verbose=${verb} $typeArg $opts -workdir=$workdir $databases

# on success, save time load completely and build time we started with
mv -f $lastTimeTmp $lastTime
gbMkTimeFile $dbloadTime

# save tables statistics for all databases
nice dumpTableStats

# issue a reminder if -allowLargeDeletes was used, so we remember to turn it off
if [ $largeDeletes = YES ] ; then
    echo "WARNING: -allowLargeDeletes is enabled"
    echo "WARNING: -allowLargeDeletes is enabled"
    echo "WARNING: -allowLargeDeletes is enabled"
fi

echo "`hostname`: dbload completed"

