# for emacs: -*- mode: sh; -*-

# This file describes how we make the BlastTab tables for knownGenes
#	sgdGenes flyBaseGenes wormGenes

# Run on the occassion of Ensembl v59 release to update danRer6 ensembl
#	blast tabs on all other organisms.  In the process, discovered
#	some confusion in previous builds of blastTabs, therefore everything
#	rebuilt

############################################################################
# DONE 2010-08-18 in this case in this danRer6 directory
#	where you can find this procedure encapsulated in scripts.
#	the copy here is merely the contents of the scripts that run
#	these procedures.

mkdir /hive/data/genomes/danRer6/bed/ensGene.59/blastTab
cd /hive/data/genomes/danRer6/bed/ensGene.59/blastTab

export runDir="/hive/data/genomes/danRer6/bed/ensGene.59/blastTab"

############################################################################
# prepare peptide fasta files for all
mkdir -p ${runDir}
cd ${runDir}
# get all the protein sets to be used here
pepPredToFa danRer6 ensPep danRer6.ensembl.faa
pepPredToFa sacCer2 sgdPep sacCer2.sgd.faa
pepPredToFa dm3 flyBasePep dm3.flyBase.faa
pepPredToFa ce6 sangerPep ce6.sanger.faa
for D in hg19 mm9 rn4
do
pepPredToFa $D knownGenePep $D.known.faa
done

 
############################################################################
# construct each *.config.ra file
# known gene tables
for D in hg19 mm9 rn4
do
mkdir -p $D
echo "targetGenesetPrefix known
targetDb $D" > ${D}/${D}.config.ra
echo "queryDbs hg19 mm9 rn4 ce6 dm3 danRer6 sacCer2" \
        | sed -e "s/ ${D}//" >> ${D}/${D}.config.ra
echo "hg19Fa ${runDir}/hg19.known.faa
mm9Fa ${runDir}/mm9.known.faa
rn4Fa ${runDir}/rn4.known.faa
sacCer2Fa ${runDir}/sacCer2.sgd.faa
ce6Fa ${runDir}/ce6.sanger.faa
dm3Fa ${runDir}/dm3.flyBase.faa
danRer6Fa ${runDir}/danRer6.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" >> ${D}/${D}.config.ra
done

# sgd gene tables
for D in sacCer2
do
mkdir -p $D
echo "targetGenesetPrefix sgd
targetDb $D
queryDbs hg19 mm9 rn4 ce6 dm3 danRer6
hg19Fa ${runDir}/hg19.known.faa
mm9Fa ${runDir}/mm9.known.faa
rn4Fa ${runDir}/rn4.known.faa
sacCer2Fa ${runDir}/sacCer2.sgd.faa
ce6Fa ${runDir}/ce6.sanger.faa
dm3Fa ${runDir}/dm3.flyBase.faa
danRer6Fa ${runDir}/danRer6.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra
done

# flyBase table
for D in dm3
do
mkdir -p $D
echo "targetGenesetPrefix flyBase
targetDb $D
queryDbs hg19 mm9 rn4 ce6 sacCer2 danRer6
hg19Fa ${runDir}/hg19.known.faa
mm9Fa ${runDir}/mm9.known.faa
rn4Fa ${runDir}/rn4.known.faa
sacCer2Fa ${runDir}/sacCer2.sgd.faa
ce6Fa ${runDir}/ce6.sanger.faa
dm3Fa ${runDir}/dm3.flyBase.faa
danRer6Fa ${runDir}/danRer6.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra
done

# sanger table
for D in ce6
do
mkdir -p $D
echo "targetGenesetPrefix sanger
targetDb $D
queryDbs hg19 mm9 rn4 dm3 sacCer2 danRer6
hg19Fa ${runDir}/hg19.known.faa
mm9Fa ${runDir}/mm9.known.faa
rn4Fa ${runDir}/rn4.known.faa
sacCer2Fa ${runDir}/sacCer2.sgd.faa
ce6Fa ${runDir}/ce6.sanger.faa
dm3Fa ${runDir}/dm3.flyBase.faa
danRer6Fa ${runDir}/danRer6.ensembl.faa
buildDir ${runDir}/${D}
scratchDir ${runDir}/${D}/tmp" > ${D}/${D}.config.ra
done

###########################################################################
# in each directory ce6 rn4 dm3 mm9 sacCer2 hg19
#	running the kluster job
cd sacCer6
time nice -n +19 doHgNearBlastp.pl -workhorse=hgwdev -clusterHub=pk \
	sacCer2.config.ra > do.log 2>&1
real    39m3.306s
user    0m0.589s
sys     0m0.628s

cd ../dm3
nice -n +19 doHgNearBlastp.pl -workhorse=hgwdev -clusterHub=swarm \
	dm3.config.ra > do.log 2>&1
cd ../ce6
nice -n +19 doHgNearBlastp.pl -workhorse=hgwdev -clusterHub=pk \
	ce6.config.ra > do.log 2>&1
cd ../mm9
nice -n +19 doHgNearBlastp.pl -workhorse=hgwdev -clusterHub=swarm \
	mm9.config.ra > do.log 2>&1
cd ../rn4
nice -n +19 doHgNearBlastp.pl -workhorse=hgwdev -clusterHub=swarm \
	rn4.config.ra > do.log 2>&1
cd ../hg19
nice -n +19 doHgNearBlastp.pl -workhorse=hgwdev -clusterHub=pk \
	hg19.config.ra > do.log 2>&1

###########################################################################
# running syntenic best on human, mouse, rat combinations
synBlastp.csh mm9 rn4
# db=mm9
# otherDb=rn4
# genePredToFakePsl:
# pslMap via /gbdb/mm9/liftOver/mm9ToRn4.over.chain.gz :
# hgLoadPsl:
# Processing mm9.rn4.kg.psl
# hgMapToGene:
# mm9.rnBlastTab:
# old number of unique query values:
# 33419
# old number of unique target values
# 7090
# new number of unique query values:
# 15038
# new number of unique target values
# 6915
synBlastp.csh mm9 hg19
# db=mm9
# otherDb=hg19
# genePredToFakePsl:
# pslMap via /gbdb/mm9/liftOver/mm9ToHg19.over.chain.gz :
# hgLoadPsl:
# Processing mm9.hg19.kg.psl
# hgMapToGene:
# mm9.hgBlastTab:
# old number of unique query values:
# 42100
# old number of unique target values
# 22528
# new number of unique query values:
# 38924
# new number of unique target values
# 21877
synBlastp.csh hg19 mm9
# db=hg19
# otherDb=mm9
# genePredToFakePsl:
# pslMap via /gbdb/hg19/liftOver/hg19ToMm9.over.chain.gz :
# hgLoadPsl:
# Processing hg19.mm9.kg.psl
# hgMapToGene:
# hg19.mmBlastTab:
# old number of unique query values:
# 60635
# old number of unique target values
# 22100
# new number of unique query values:
# 57302
# new number of unique target values
# 21627
synBlastp.csh hg19 rn4
# db=hg19
# otherDb=rn4
# genePredToFakePsl:
# pslMap via /gbdb/hg19/liftOver/hg19ToRn4.over.chain.gz :
# hgLoadPsl:
# Processing hg19.rn4.kg.psl
# hgMapToGene:
# hg19.rnBlastTab:
# old number of unique query values:
# 48744
# old number of unique target values
# 6894
# new number of unique query values:
# 25086
# new number of unique target values
# 6669
synBlastp.csh rn4 mm9
# db=rn4
# otherDb=mm9
# genePredToFakePsl:
# pslMap via /gbdb/rn4/liftOver/rn4ToMm9.over.chain.gz :
# hgLoadPsl:
# Processing rn4.mm9.kg.psl
# hgMapToGene:
# rn4.mmBlastTab:
# old number of unique query values:
# 8024
# old number of unique target values
# 7095
# new number of unique query values:
# 7751
# new number of unique target values
# 6949
synBlastp.csh rn4 hg19
# db=rn4
# otherDb=hg19
# genePredToFakePsl:
# pslMap via /gbdb/rn4/liftOver/rn4ToHg19.over.chain.gz :
# hgLoadPsl:
# Processing rn4.hg19.kg.psl
# hgMapToGene:
# rn4.hgBlastTab:
# old number of unique query values:
# 7993
# old number of unique target values
# 6915
# new number of unique query values:
# 7489
# new number of unique target values
# 6636

###########################################################################
#  running recip best to ce6 dm3 and sacCer2
# on human, mouse and rat
export TARGET=hg
export TARGET_DB=hg19
for DB in ce6 dm3 sacCer2
do
    echo $DB
    aToB=run.${TARGET_DB}.$DB
    bToA=run.$DB.${TARGET_DB}
    mkdir $aToB $bToA
    cat ../${DB}/$aToB/out/*.tab > $aToB/all.tab
    cat ../${DB}/$bToA/out/*.tab > $bToA/all.tab
    echo blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    dbBlastTab=xxBlastTab
    case $DB in
        ce6) dbBlastTab=ceBlastTab;;
        dm3) dbBlastTab=dmBlastTab;;
        danRer6) dbBlastTab=drBlastTab;;
        sacCer2) dbBlastTab=scBlastTab;;
    esac
    echo hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    echo hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
    hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
done

export TARGET=mm
export TARGET_DB=mm9
for DB in ce6 dm3 sacCer2
do
    echo $DB
    aToB=run.${TARGET_DB}.$DB
    bToA=run.$DB.${TARGET_DB}
    mkdir $aToB $bToA
    cat ../${DB}/$aToB/out/*.tab > $aToB/all.tab
    cat ../${DB}/$bToA/out/*.tab > $bToA/all.tab
    echo blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    dbBlastTab=xxBlastTab
    case $DB in
        ce6) dbBlastTab=ceBlastTab;;
        dm3) dbBlastTab=dmBlastTab;;
        danRer6) dbBlastTab=drBlastTab;;
        sacCer2) dbBlastTab=scBlastTab;;
    esac
    echo hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    echo hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
    hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
done

export TARGET=rn
export TARGET_DB=rn4
for DB in ce6 dm3 sacCer2
do
    echo $DB
    aToB=run.${TARGET_DB}.$DB
    bToA=run.$DB.${TARGET_DB}
    mkdir $aToB $bToA
    cat ../${DB}/$aToB/out/*.tab > $aToB/all.tab
    cat ../${DB}/$bToA/out/*.tab > $bToA/all.tab
    echo blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    dbBlastTab=xxBlastTab
    case $DB in
        ce6) dbBlastTab=ceBlastTab;;
        dm3) dbBlastTab=dmBlastTab;;
        danRer6) dbBlastTab=drBlastTab;;
        sacCer2) dbBlastTab=scBlastTab;;
    esac
    echo hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    echo hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
    hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
done

export TARGET=ce
export TARGET_DB=ce6
for DB in dm3 sacCer2
do
    echo $DB
    aToB=run.${TARGET_DB}.$DB
    bToA=run.$DB.${TARGET_DB}
    mkdir $aToB $bToA
    cat ../${DB}/$aToB/out/*.tab > $aToB/all.tab
    cat ../${DB}/$bToA/out/*.tab > $bToA/all.tab
    echo blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    dbBlastTab=xxBlastTab
    case $DB in
        ce6) dbBlastTab=ceBlastTab;;
        dm3) dbBlastTab=dmBlastTab;;
        danRer6) dbBlastTab=drBlastTab;;
        sacCer2) dbBlastTab=scBlastTab;;
    esac
    echo hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    echo hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
    hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
done


export TARGET=sc
export TARGET_DB=sacCer2
for DB in dm3 ce6
do
    echo $DB
    aToB=run.${TARGET_DB}.$DB
    bToA=run.$DB.${TARGET_DB}
    mkdir $aToB $bToA
    cat ../${DB}/$aToB/out/*.tab > $aToB/all.tab
    cat ../${DB}/$bToA/out/*.tab > $bToA/all.tab
    echo blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    dbBlastTab=xxBlastTab
    case $DB in
        ce6) dbBlastTab=ceBlastTab;;
        dm3) dbBlastTab=dmBlastTab;;
        danRer6) dbBlastTab=drBlastTab;;
        sacCer2) dbBlastTab=scBlastTab;;
    esac
    echo hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    echo hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
    hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
done

export TARGET=dm
export TARGET_DB=dm3
for DB in sacCer2 ce6
do
    echo $DB
    aToB=run.${TARGET_DB}.$DB
    bToA=run.$DB.${TARGET_DB}
    mkdir $aToB $bToA
    cat ../${DB}/$aToB/out/*.tab > $aToB/all.tab
    cat ../${DB}/$bToA/out/*.tab > $bToA/all.tab
    echo blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    dbBlastTab=xxBlastTab
    case $DB in
        ce6) dbBlastTab=ceBlastTab;;
        dm3) dbBlastTab=dmBlastTab;;
        danRer6) dbBlastTab=drBlastTab;;
        sacCer2) dbBlastTab=scBlastTab;;
    esac
    echo hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    hgLoadBlastTab ${TARGET_DB} $dbBlastTab $aToB/recipBest.tab
    echo hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
    hgLoadBlastTab $DB ${TARGET}BlastTab $bToA/recipBest.tab
done

DB=danRer6
for tDB in hg19 mm9 rn4 ce6 dm3 sacCer2
do
    echo $DB
    aToB=run.${DB}.${tDB}
    bToA=run.${tDB}.${DB}
    mkdir -p $aToB
    cat ../${tDB}/$aToB/out/*.tab > $aToB/all.tab
    cat ../${tDB}/$bToA/out/*.tab > $bToA/all.tab
    blastRecipBest $aToB/all.tab $bToA/all.tab \
        $aToB/recipBest.tab $bToA/recipBest.tab
    dbBlastTab=xxBlastTab
    case $DB in
        ce6) dbBlastTab=ceBlastTab;;
        dm3) dbBlastTab=dmBlastTab;;
        danRer6) dbBlastTab=drBlastTab;;
        sacCer2) dbBlastTab=scBlastTab;;
    esac
    echo "hgLoadBlastTab ${tDB} $dbBlastTab $aToB/recipBest.tab"
    hgLoadBlastTab ${tDB} $dbBlastTab $aToB/recipBest.tab
done

# hgLoadBlastTab hg19 drBlastTab run.danRer6.hg19/recipBest.tab
# Loading database with 12146 rows

# hgLoadBlastTab mm9 drBlastTab run.danRer6.mm9/recipBest.tab
# Loading database with 11997 rows

# hgLoadBlastTab rn4 drBlastTab run.danRer6.rn4/recipBest.tab
# Loading database with 5142 rows

# hgLoadBlastTab ce6 drBlastTab run.danRer6.ce6/recipBest.tab
# Loading database with 4865 rows

# hgLoadBlastTab dm3 drBlastTab run.danRer6.dm3/recipBest.tab
# Loading database with 5765 rows

# hgLoadBlastTab sacCer2 drBlastTab run.danRer6.sacCer2/recipBest.tab
# Loading database with 2174 rows

