#!/usr/bin/perl
#
# ccdsImportStep [-verbose]
#
# Import latest CCDS dump into the ccds database.
# Uses a table ccds.importVersion to track what
# version was loaded.
#
# -verbose - print details
#
# $Id: ccdsImportStep,v 1.2 2009/09/15 06:52:29 markd Exp $
#
use strict;
use warnings;
use File::Basename;
use FindBin;
use lib "$FindBin::Bin/../lib";
use gbCommon;

my $ccdsDb = "ccds";
my $ccdsTmpDb = "ccds_tmp";
my $ccdsOldDb = "ccds_old";
my $ccdsVersionTbl = "importVersion";

# Get the latest ccds dump file name from the server
sub getDumpFileName() {
    my $downloadDir = "data/download/ccds";
    
    my @dumpFiles = glob($downloadDir . "/CCDS.*.tar.gz");
    @dumpFiles = sort @dumpFiles;

    if ($#dumpFiles < 0) {
        die("can't find CCDS dump files in $downloadDir");
    }
    return $dumpFiles[$#dumpFiles];
}

# parse the date out of the CCDS dump file
sub parseDumpFileDate($) {
    my($dumpFile) = @_;
    # CCDS.20080630.tar.gz
    my $date = basename($dumpFile);
    $date =~ s/^CCDS\.([0-9]{4})([0-9]{2})([0-9]{2})\.tar\.gz$/$1-$2-$3/;
    return $date;
}

# retrieve the date from the ccds importVersion , undef if table doesn't exist.
sub getCcdsImportVersion() {
    if (haveMysqlTbl($ccdsDb, $ccdsVersionTbl)) {
        my $dd = callMysql("-Ne 'SELECT dumpDate FROM $ccdsVersionTbl' $ccdsDb");
        chomp($dd);
        if ($gbCommon::verbose) {
            prMsg("date in $ccdsDb.$ccdsVersionTbl: $dd");
        }
        if ($dd ne "") {
            return $dd;
        }
    } else {
        if ($gbCommon::verbose) {
            prMsg("no $ccdsDb.$ccdsVersionTbl");
        }
    }
    return undef;
}

# determine if the ccds database needs update
sub ccdsNeedsUpdated($) {
    my($dumpDate) = @_;
    my $ccdsVersion = getCcdsImportVersion();
    return (!defined($ccdsVersion)) || ($dumpDate gt $ccdsVersion);
}

# extract the ccds dump files into a tmp dir
sub ccdsExtractDmp($) {
    my($dumpFile) = @_;
    my $tmpDir = getTmpDir("ccds");
    runProg("tar -C $tmpDir -zxf $dumpFile");
    return $tmpDir;
}

# create and load import date table.
sub ccdsCreateImportVersion($) {
    my($dumpDate) = @_;
    runMysql("-Ne 'CREATE TABLE $ccdsVersionTbl (dumpDate date not null)' $ccdsTmpDb");
    runMysql("-Ne 'INSERT INTO $ccdsVersionTbl (dumpDate) VALUES (\"$dumpDate\")' $ccdsTmpDb");
}

# fill in hash with list of tables.
sub listTblsIntoHash($$$) {
    my($db, $tblHashRef, $allTblHashRef) = @_;
    foreach my $tbl (listMysqlTbls($db)) {
        ${$tblHashRef}{$tbl} = $tbl;  # gag me with a spoon
        ${$allTblHashRef}{$tbl} = $tbl;
    }
}

# return list of refs to three lists with table classification
#   [tblsInCurrentOnly, tblsInBoth, tblsInNewOnly]
sub getTableSets($) {
    my($haveCcdsDb) = @_;
    my(%currentTbls, %newTbls, %allTbls);
    listTblsIntoHash($ccdsTmpDb,\%newTbls, \%allTbls);
    if ($haveCcdsDb) {
        listTblsIntoHash($ccdsDb,\%currentTbls, \%allTbls);
    }
    my $inCurrentOnly = [];
    my $inBoth = [];
    my $inNewOnly = [];

    foreach my $tbl (keys %allTbls) {
        if (exists($currentTbls{$tbl}) && exists($newTbls{$tbl})) {
            push(@{$inBoth}, $tbl);
        } elsif (exists($currentTbls{$tbl})) {
            push(@{$inCurrentOnly}, $tbl);
        } elsif (exists($newTbls{$tbl})) {
            push(@{$inNewOnly}, $tbl);
            
        } else {
            die("bad bug");
        }
    }
    return ($inCurrentOnly, $inBoth, $inNewOnly)
}

# rename tables into place from tbl database.  Wish there was a just a rename
# database!
sub renameTables($) {
    my($haveCcdsDb) = @_;
    my($inCurrentOnly, $inBoth, $inNewOnly) = getTableSets($haveCcdsDb);
    my @pairs;

    # build command like:
    #  RENAME TABLE OLD_TABLE TO BACKUP_TABLE, NEW_TABLE TO OLD_TABLE, ...;
    for my $tbl (@{$inCurrentOnly}) {
        push(@pairs, "$ccdsDb.$tbl TO $ccdsOldDb.$tbl");
    }
    for my $tbl (@{$inBoth}) {
        push(@pairs, "$ccdsDb.$tbl TO $ccdsOldDb.$tbl");
        push(@pairs, "$ccdsTmpDb.$tbl TO $ccdsDb.$tbl");
    }
    for my $tbl (@{$inNewOnly}) {
        push(@pairs, "$ccdsTmpDb.$tbl TO $ccdsDb.$tbl");
    }
    my $cmd = "RENAME TABLE " . join(",", @pairs);
    runMysql("-Ne '$cmd'");
}

# create a database
sub createDatabase($) {
    my($db) = @_;
    runMysql("-Ne 'CREATE DATABASE $db'");
}

# drop a database
sub dropDatabase($) {
    my($db) = @_;
    runMysql("-Ne 'DROP DATABASE IF EXISTS $db'");
}

# create the ccds database
sub ccdsDbLoad($$$) {
    my($dumpFile, $dumpDate, $haveCcdsDb) = @_;
    # load into tmp database
    dropDatabase($ccdsTmpDb);
    dropDatabase($ccdsOldDb);
    
    my $tmpDir = ccdsExtractDmp($dumpFile);
    runProg("ccdsImport $ccdsTmpDb " . join(" ", glob($tmpDir . "/data/*.txt")));
    ccdsCreateImportVersion($dumpDate);
    # use atomic rename if ccds database exists
    if ($haveCcdsDb) {
        createDatabase($ccdsOldDb);
    } else {
        createDatabase($ccdsDb);
    }
    renameTables($haveCcdsDb);
    dropDatabase($ccdsTmpDb);
    dropDatabase($ccdsOldDb);
    removeDir($tmpDir);
}

# Entry point
while (($#ARGV >= 0) && ($ARGV[0] =~/^-.*/)) {
    my $opt = $ARGV[0];
    shift @ARGV;
    if ($opt eq "-verbose") {
        $gbCommon::verbose = 1;
    } else {
        gbError("invalid option \"$opt\"");
    }
}

if ($#ARGV >= 0) {
    die("Wrong \# args: ccdsImportStep [-verbose]");
}

# use different task dir to allow running parallel with genbank
beginTask("ccdsimport", "import");
my $dumpFile = getDumpFileName();
my $dumpDate = parseDumpFileDate($dumpFile);
my $haveCcdsDb = haveMysqlDb($ccdsDb);

if ((!$haveCcdsDb) || ccdsNeedsUpdated($dumpDate)) {
    ccdsDbLoad($dumpFile, $dumpDate, $haveCcdsDb);
    makeTimeFile("var/ccdsimport/ccdsimport.time");
} else {
    prMsg("$ccdsDb is up-to-date with dump $dumpDate");
}

endTask();
