# for emacs: -*- mode: sh; -*-


# Ciona Intestinalis V1.0 from JGI

# DOWNLOAD SEQUENCE 

    ssh eieio
    mkdir /cluster/store5/squirt
    cd /cluster/store5/squirt
    wget ftp://ftp.jgi-psf.org/pub/JGI_data/Ciona/v1.0/ciona.fasta.gz
    gunzip ciona.fasta

    # translate to nib
    ln -s /cluster/store5/squirt/ci1 ~/ci1
    cd ~/ci1
    mkdir scaffolds

    faSplit byname ciona.fasta scaffolds/

    mkdir nib
    cd nib
    foreach i (../scaffolds/*.fa)
        faToNib $i `basename $i .fa`.nib
    end

# Create the database.
    ssh hgwdev
    echo 'create database ci1' | hgsql ''
# CREATING GRP TABLE FOR TRACK GROUPING 
    echo "create table grp (PRIMARY KEY(NAME)) select * from rn1.grp" \
      | hgsql ci1

    echo 'insert into blatServers values("ci1", "blat10", "17780", "1"); \
          insert into blatServers values("ci1", "blat10", "17781", "0");' \
      | hgsql -h genome-testdb hgcentraltest

# STORING O+O SEQUENCE AND ASSEMBLY INFORMATION
    # Make symbolic links in /gbdb/ci1/nib to the real nibs.
    ssh hgwdev
    ln -s /cluster/store5/squirt/ci1/nib /gbdb/ci1/nib
    # Load /gbdb/ci1/nib paths into database 
    hgsql ci1  < ~kent/src/hg/lib/chromInfo.sql
    cd ~/ci1
    hgNibSeq -preMadeNib ci1 /gbdb/ci1/nib scaffolds/*.fa

    # make gcPercent table
    ssh hgwdev
    mkdir -p ~/ci1/bed/gcPercent
    cd ~/ci1/bed/gcPercent
    hgsql ci1  < ~/kent/src/hg/lib/gcPercent.sql
    hgGcPercent ci1 ../../nib

### AUTO UPDATE GENBANK MRNA RUN 
    # Update genbank config and source in CVS:
    cd ~/kent/src/hg/makeDb/genbank
    cvsup .

    # Edit etc/genbank.conf and add these lines:
# ci1 (ciona intestinalis)
ci1.genome = /iscratch/i/squirt/ci1/nib/Scaffold*.nib
ci1.lift = no
ci1.refseq.mrna.native.load = yes
ci1.refseq.mrna.xeno.load = yes
ci1.refseq.mrna.xeno.pslReps = -minCover=0.25 -minAli=0.15 -nearTop=0.005
ci1.genbank.mrna.xeno.load = yes
ci1.genbank.est.xeno.load = no
ci1.downloadDir = ci1
ci1.perChromTables = no
# ci1 

    cvs ci etc/genbank.conf
    make
    # Install to /cluster/data/genbank:
    make install-server

    ssh eieio
    cd /cluster/data/genbank
    # This is an -initial run, mRNA only:
    nice bin/gbAlignStep -clusterRootDir=/cluster/bluearc/genbank \
      -iserver=kkr1u00 -iserver=kkr2u00 -iserver=kkr3u00 -iserver=kkr4u00 \
      -iserver=kkr5u00 -iserver=kkr6u00 -iserver=kkr7u00 -iserver=kkr8u00 \
      -srcDb=genbank -type=mrna -verbose=1 -initial ci1
    # Load results:
    ssh hgwdev
    cd /cluster/data/genbank
    nice bin/gbDbLoadStep -verbose=1 -drop -initialLoad ci1
    # Clean up:
    rm -r /cluster/bluearc/genbank/work/initial.ci1

    ssh eieio
    # -initial for ESTs (now with /cluster/store7 and iservers):
    nice bin/gbAlignStep -clusterRootDir=/cluster/store7/genbank \
      -iserver=kkr1u00 -iserver=kkr2u00 -iserver=kkr3u00 -iserver=kkr4u00 \
      -iserver=kkr5u00 -iserver=kkr6u00 -iserver=kkr7u00 -iserver=kkr8u00 \
      -srcDb=genbank -type=est -verbose=1 -initial ci1
    # Load results:
    ssh hgwdev
    cd /cluster/data/genbank
    nice bin/gbDbLoadStep -verbose=1 ci1

    # ./bin/gbDbLoadStep -noPerChrom=ci1 -initialLoad ci1 
    # ./bin/i386/gbLoadRna -drop ci1
# drop too short indices
    foreach i (all_mrna intronEst all_est xenoMrna)
	echo "drop index tName on $i ;" | hgsql ci1
	echo "drop index tName_2 on $i ;" | hgsql ci1
	echo "drop index tName_3 on $i ;" | hgsql ci1
	echo "dropped indices on $i"
    end
    foreach i (all_mrna intronEst all_est xenoMrna)
	echo "create index tName on $i (tName(13), bin) ;" 
	echo "create index tName on $i (tName(13), bin) ;" | hgsql ci1
	echo "create index tName_2 on $i  (tName(13), tStart);"
	echo "create index tName_2 on $i  (tName(13), tStart);" | hgsql ci1
	echo "create index tName_3 on $i  (tName(13), tEnd);" 
	echo "create index tName_3 on $i  (tName(13), tEnd);" | hgsql ci1
	echo "created indices on $i"
    end

### gap and repeats tables
    ssh hgwdev
    mkdir -p /cluster/data/ci1/bed/gapRmsk
    cd /cluster/data/ci1/bed/gapRmsk
    simpleGap /cluster/data/ci1/nib gap.bed repeats.bed
    echo "drop table gap;" | hgsql ci1
    hgsql ci1 < ~/kent/src/hg/lib/gap.sql   
    hgLoadBed -oldTable ci1 gap gap.bed
    echo "drop index bin on gap ;" | hgsql ci1
    echo "create index chrom on gap (chrom(13), bin) ;" | hgsql ci1
    echo "create index chrom_2 on gap  (chrom(13), chromStart);" | hgsql ci1
    echo "create index chrom_3 on gap  (chrom(13), chromEnd);" | hgsql ci1

    # do RepeatMasking
    cd /cluster/data/ci
    echo "drop index bin on rmsk;" | hgsql ci1
    echo "drop index genoStart on rmsk;" | hgsql ci1
    echo "drop index genoEnd on rmsk;" | hgsql ci1
    echo "create index chrom_2 on rmsk  (genoName(13), genoStart);" | hgsql ci1
    echo "create index chrom_3 on rmsk  (genoName(13), genoEnd);" | hgsql ci1

    ssh eieio
    mkdir -p /cluster/data/ci1/bed/simpleRepeat
    cd /cluster/data/ci1/bed/simpleRepeat
    mkdir trf
    for i in ../../scaffolds/*.fa
    do 
    	trfBig $i /dev/null -bedAt=trf/`basename $i .fa`.bed > /dev/null 2>&1 ; echo $i;
    done
    cat trf/* > simpleRepeat.bed

    ssh hgwdev
    hgLoadBed ci1 simpleRepeat /cluster/data/ci1/bed/simpleRepeat/simpleRepeat.bed \
    	-sqlTable=$HOME/kent/src/hg/lib/simpleRepeat.sql 

    ssh eieio
    cd /cluster/data/ci1/bed/simpleRepeat
    mkdir -p trfMask
    cd trf
    for i in *.bed
    do
      awk '{if ($5 <= 12) print;}' $i > ../trfMask/$i
    done

    # use RepeatMasker and simpleRepeat to build masked fa's
    cd /cluster/data/ci1
    mkdir maskedScaffolds
    cd scaffolds
    for i in *.fa
    do
        maskOutFa -soft $i ../bed/RM/out/$i.out ../maskedScaffolds/$i 
        maskOutFa -softAdd ../maskedScaffolds/$i ../bed/simpleRepeat/trfMask/`basename $i .fa`.bed  ../maskedScaffolds/$i 
    done

    # Rebuild the nib files, using the soft masking in the fa:                                           
    mkdir -p /cluster/data/ci1/nib
    cd /cluster/data/ci1/nib/maskedScaffolds
    
    for i in *.fa
    do
      faToNib -softMask $i ../nib/`basename $i .fa`.nib
    done

    # Make one big 2bit file as well, and make a link to it in
    # /gbdb/ci1/nib because hgBlat looks there (or so says galGal2)
    faToTwoBit *.fa ../ci1.2bit                                                                     
    ln -s /cluster/data/ci1/ci1.2bit /gbdb/ci1/nib/

### JGI ciona genes
# the GFF file on the JGI web site is garbage (per Daniel Rokhsar (dsrokhsar@lbl.gov)
# build a genPred file from predicted transcripts and predicted proteins.
# first blat transcripts against ci1
    ssh kk
    mkdir /cluster/data/ci1/bed/jgiGene
    cd /cluster/data/ci1/bed/jgiGene
    wget "ftp://ftp.jgi-psf.org/pub/JGI_data/Ciona/v1.0/ciona.mrna.fasta.gz"
    gunzip ciona.mrna.fasta.gz
    cat << '_EOF_' > gsub
#LOOP
/cluster/bin/i386/blat -mask=lower -q=dna -t=dna {check in exists $(path1)} {check in line+ $(path2)} {check out line+ psl/$(root1)/$(root2).psl}
#ENDLOOP
'_EOF_'
    ls -1S /iscratch/i/squirt/ci1/nib/* > squirt.lst
    mkdir fas
    faSplit sequence *.fasta 40 fas/ci
    ls -1S fas/* > mrna.lst
    gensub2 squirt.lst mrna.lst gsub spec
    mkdir psl
    cd psl
    #!/bin/sh
    for i in `cat ../squirt.lst`
    do
	mkdir `basename $i .nib`
    done
    cd ..
    para create spec
    para push
# Completed: 100040 of 100040 jobs
# CPU time in finished jobs:      78739s    1312.32m    21.87h    0.91d  0.002 y
# IO & Wait Time:                284574s    4742.89m    79.05h    3.29d  0.009 y
# Average job time:                   4s       0.06m     0.00h    0.00d
# Longest job:                       37s       0.62m     0.01h    0.00d
# Submission to last job:          1766s      29.43m     0.49h    0.02d

    ssh eieio
    pslSort dirs raw.psl /tmp psl/*
    sed "1,5d" raw.psl | sort -rn | pslUniq stdin mrna.psl  

# now blat proteins against predicted transcripts to get CDS
    wget "ftp://ftp.jgi-psf.org/pub/JGI_data/Ciona/v1.0/ciona.prot.fasta.gz" 
    gunzip ciona.prot.fasta.gz
    blat -q=prot -t=dnax ciona.mrna.fasta ciona.prot.fasta cds.psl
    sort -rn cds.psl | pslUniq stdin stdout | awk "{printf \"%s\t%s..%s\n\",\$14,\$16+1,\$17}" > jgiGene.cds
    mrnaToGene -cdsFile=jgiGene.cds mrna.psl jgiGene.gp -genePredExt > log 2>&1 
    gzip *fasta
    rm -rf batch* err fas *.tab log psl *.psl  para* spec

    ssh hgwdev
    cd /cluster/data/ci1/bed/jgiGene
    ldHgGene ci1 jgiGene jgiGene.gp -predTab -genePredExt
    hgPepPred ci1 generic jgiGenePep /cluster/data/ci1/bed/jgiGene/ciona.prot.fasta

### SNAP GENE PREDICTIONS FROM COLIN DEWEY (DONE 4/23/04 angie)
    ssh hgwdev
    mkdir /cluster/data/ci1/bed/snap
    cd /cluster/data/ci1/bed/snap
    # contact: Colin Dewey <cdewey@eecs.berkeley.edu>
    wget http://hanuman.math.berkeley.edu/~cdewey/tracks/ci1/SNAP.gff.gz
    gunzip SNAP.gff.gz
    ldHgGene -gtf -frame -id -geneName ci1 snapGene SNAP.gff

### tBLASTn human proteins
    ssh kk
    cd /cluster/data/ci1/bed
    mkdir tblastnHg16KG
    cd tblastnHg16KG
    ls -1S /iscratch/i/squirt/ci1/blastDb/*.nsq | sed "s/.nsq//" > squirt.lst
    mkdir kgfas
    cd kgfas
    split -l 164 /cluster/data/hg16/bed/blat.hg16KG.2004-05-27/uniq.psl kg
    foreach i (kg*)
	pslxToFa $i $i.fa
    end
    cd ..
    ssh kkr1u00
    mkdir  /iscratch/i/squirt/ci1/kgfas
    cd /cluster/data/ci1/bed/tblastnHg16KG/kgfas
    cp *.fa  /iscratch/i/squirt/ci1/kgfas
    exit
    ls -1S /iscratch/i/squirt/ci1/kgfas/*.fa > kg.lst   
    # get blastGsub and blastSome from previous version
    gensub2 squirt.lst kg.lst blastGsub blastSpec

    mkdir blastOut
    cd blastOut
    foreach i (`cat ../kg.lst`)
	mkdir `basename $i .fa`
    end
    cd ..
    para create blastSpec
    para push
#  Completed: 10040 of 10040 jobs
#  CPU time in finished jobs:    2489065s   41484.41m   691.41h   28.81d  0.079 y
#  IO & Wait Time:                214441s    3574.02m    59.57h    2.48d  0.007 y
#  Average job time:                 269s       4.49m     0.07h    0.00d
#  Longest job:                      732s      12.20m     0.20h    0.01d
#  Submission to last job:          7374s     122.90m     2.05h    0.09d
    cd blastOut
    foreach i (kg??)
	cat $i/q.*.psl > q.$i.psl
	cat $i/q.*.bscore > q.$i.bscore
	cat $i/t.*.psl > t.$i.psl
	cat $i/t.*.bscore > t.$i.bscore
	scoreSort t.$i.psl t.$i.bscore ts.$i.psl ts.$i.bscore
	echo $i
    end
    cd ..
    ls -1 blastOut/q.kg??.psl > chain.lst    
    cat << '_EOF_' > chainGsub
#LOOP
./chainSome blastOut/$(root1).psl blastOut/$(root1).bscore /cluster/data/ci1/nib /cluster/data/hg16/bed/blat.hg16KG/known.fa /scratch/blast/data/BLOSUM80   blastOut/$(root1).0.psl blastOut/$(root1).0.bscore
#ENDLOOP
'_EOF_'

    gensub2 chain.lst single chainGsub chainSpec
    para create chainSpec
    ./shove 40 10
# Completed: 251 of 251 jobs
# CPU time in finished jobs:       3564s      59.40m     0.99h    0.04d  0.000 y
# IO & Wait Time:                  5806s      96.77m     1.61h    0.07d  0.000 y
# Average job time:                  37s       0.62m     0.01h    0.00d
# Longest job:                       57s       0.95m     0.02h    0.00d
# Submission to last job:          1494s      24.90m     0.41h    0.02d
    cd blastOut
    cat q.*.0.psl > allQ0.psl
    cat q.*.0.bscore > allQ0.bscore                                                                                       
    scoreSort allQ0.psl allQ0.bscore allProts.psl allProts.bscore
    pslUniq allProts.psl uniqProts.psl
    sort -n --key=16 uniqProts.psl | sort --key=14 | uniq > bestHg16KG.psl
    # cp bestHg16KG.psl blastHg16KG.psl
    protDat bestHg16KG.psl /cluster/data/hg16/bed/blat.*7/hg16KG.psl /cluster/data/hg16/bed/blat.*7/kg.mapNames blastHg16KG.psl

    hgLoadPsl ci1 blastHg16KG.psl     
    echo "drop index bin on blastHg16KG ;" | hgsql ci1
    echo "drop index tStart on blastHg16KG ;" | hgsql ci1
    echo "drop index tEnd on blastHg16KG ;" | hgsql ci1
    echo "drop index qName on blastHg16KG ;" | hgsql ci1
    echo "create index tName on blastHg16KG (tName(13), bin) ;" | hgsql ci1
    echo "create index tName_2 on blastHg16KG  (tName(13), tStart);" | hgsql ci1
    echo "create index tName_3 on blastHg16KG  (tName(13), tEnd);" | hgsql ci1
    echo "create index qName on blastHg16KG  (qName(10));" | hgsql ci1

    foreach i ( kg?? )
	findExons uniqProts.psl ts.$i.psl x.$i.psl xa.$i.psl
    end
    cat x.*.psl > allXA.psl
    liftUp -type=.psl -nohead -pslQ -isPtoG liftAllXA.psl /cluster/data/hg16/bed/blat.*7/genome.lft warn allXA.psl
    sort -n --key=16 liftAllXA.psl | sort --key=14 | uniq | sed "s/ci1.//" |  pslPosTarget stdin  tblastHg16.psl
    hgLoadPsl ci1 tblastHg16.psl

    simpleChain tblastHg16.psl chainHg16.chain
    chainSwap chainHg16.chain chainCi1.chain
    ./revChainScore chainCi1.chain chainCi1S.chain
    chainSwap chainCi1S.chain chainHg16S.chain
    hgLoadChain hg16 chainCi1ProtEx chainCi1S.chain
    hgLoadChain ci1 chainHg16ProtEx  chainHg16S.chain

### blat CioSav1
    ssh kk
    mkdir /cluster/data/ci1/bed/blatCioSav1
    cd  /cluster/data/ci1/bed/blatCioSav1

    cat << '_EOF_' > gsub
#LOOP
/cluster/bin/i386/blat -mask=lower -q=dnax -t=dnax {check in exists $(path1)} {check in line+ $(path2)} {check out line+ psl/$(root1)/$(root2).psl}
#ENDLOOP
'_EOF_'
    ls -1S /scratch/hg/ci1/nib/Scaffold_*.nib > squirt.lst
    ls -1S /iscratch/i/squirt/ci1/Savignyi/*.fa > other.lst
    gensub2 squirt.lst other.lst gsub spec
    mkdir psl
    cd psl
    for i in `cat ../squirt.lst`
    do
	mkdir `basename $i .nib`
    done
    cd ..
    para create spec
    para push

    ssh eieo
    pslSort dirs blatCioSav1.psl /tmp psl/*

    hgLoadPsl ci1 blatCioSav1.psl
### end blat

# MAKE DOWNLOADABLE SEQUENCE FILES 
    ssh kksilo
    cd /cluster/data/ci1
    #- Build the .zip files
    cat << '_EOF_' > jkStuff/zipAll.csh
rm -rf zip
mkdir zip
cd bed/RM
zip -j ../../zip/ScaffoldOut.zip out/Scaffold*.out
cd ../../
zip -j zip/ScaffoldFa.zip scaffolds/*.fa
zip -j zip/ScaffoldFaMasked.zip maskedScaffolds/*.fa
cd bed/simpleRepeat
zip -j ../../zip/ScaffoldTrf.zip trfMask/*.bed
cd ../..
cd /cluster/data/genbank
./bin/i386/gbGetSeqs -db=ci1 -native GenBank mrna /cluster/data/ci1/zip/mrna.fa
cd /cluster/data/ci1/zip
zip -j mrna.zip mrna.fa
'_EOF_'
    # << this line makes emacs coloring happy
    csh ./jkStuff/zipAll.csh |& tee zipAll.log
    cd zip
    #- Look at zipAll.log to make sure all file lists look reasonable.  
    #- Check zip file integrity:
    foreach f (*.zip)
      unzip -t $f > $f.test
      tail -1 $f.test
    end
    wc -l *.zip.test

    #- Copy the .zip files to hgwdev:/usr/local/apache/...
    ssh hgwdev
    cd /cluster/data/ci1/zip
    set gp = /usr/local/apache/htdocs/goldenPath/ci1
    mkdir -p $gp/bigZips
    cp -p *.zip $gp/bigZips
    # mkdir -p $gp/scaffolds
    # foreach f ( ../*/chr*.fa )
      # zip -j $gp/chromosomes/$f:t.zip $f
    # end

    cd $gp/bigZips
    md5sum *.zip > md5sum.txt
    # cd $gp/chromosomes
    # md5sum *.zip > md5sum.txt
    # Take a look at bigZips/* and chromosomes/*, update their README.txt's

# CREATE gc5Base wiggle TRACK (DONE, 2004-04-16, Hiram)

    # Perform a gc count with a 5 base window. 
    # Also compute a "zoomed" view for display efficiency.

    #	on the file server for this disk intensive operation
    ssh eieio
    mkdir /cluster/data/ci1/bed/gc5Base
    cd /cluster/data/ci1/bed/gc5Base

    #   in the script below, the 'grep -w GC' selects the lines of
    #   output from hgGcPercent that are real data and not just some
    #   information from hgGcPercent.  The awk computes the number
    #   of bases that hgGcPercent claimed it measured, which is not
    #   necessarily always 5 if it ran into gaps, and then the division
    #   by 10.0 scales down the numbers from hgGcPercent to the range
    #   [0-100].  Two columns come out of the awk print statement:
    #   <position> and <value> which are fed into wigAsciiToBinary through
    #   the pipe.  It is set at a dataSpan of 5 because each value
    #   represents the measurement over five bases beginning with
    #   <position>.  The result files end up in ./wigData5.
    cat << '_EOF_' > runGcPercent.sh
#!/bin/sh
mkdir -p wigData5
mkdir -p dataLimits5
for n in /cluster/data/ci1/nib/Scaff*.nib
do
        c=`basename ${n} | sed -e "s/.nib//"`
        C=`echo $c | sed -e "s/Scaffold_//"`
        echo -n "working on ${c} - ${C} ... "
        hgGcPercent -chr=${c} -doGaps \
                -file=stdout -win=5 ci1 /cluster/data/ci1/nib | grep -w GC | \
                awk '{printf "%d\t%.1f\n", $2+1, $5/10.0 }' | \
        wigAsciiToBinary \
        -dataSpan=5 -chrom=${c} -wibFile=wigData5/gc5Base_${C} \
        -name=${C} stdin 2> dataLimits5/${C}
echo "done"
done
'_EOF_'

    chmod +x runGcPercent.sh
    time ./runGcPercent.sh
#	real    11m29.211s
#	user    9m48.090s
#	sys     3m41.940s

    # load the .wig files back on hgwdev:
    ssh hgwdev
    cd /cluster/data/ci1/bed/gc5Base
    hgLoadWiggle -pathPrefix=/gbdb/ci1/wib/gc5Base ci1 gc5Base wigData5/*.wig
    # and symlink the .wib files into /gbdb
    mkdir -p /gbdb/ci1/wib/gc5Base
    #	arg list too long to do this simply
    cd wigData5
    ls *.wib | while read FN
do
    ln -s `pwd`/${FN} /gbdb/ci1/wib/gc5Base
    echo ${FN}
done
    # to speed up display for whole chromosome views, compute a "zoomed"
    # view and load that on top of the existing table.  The savings
    # comes from the number of data table rows the browser needs to load
    # for a full chromosome view.  Without the zoomed view there are
    # over 43,000 data rows for chrom 1.  With the zoomed view there are
    # only 222 rows needed for the display.  If your original data was
    # at 1 value per base the savings would be even greater.
    #   Pretty much the same data calculation
    # situation as above, although this time note the use of the
    # 'wigZoom -dataSpan=1000 stdin' in the pipeline.  This will average
    # together the data points coming out of the awk print statment over
    # a span of 1000 bases.  Thus each <position> coming out of wigZoom
    # will represent the measurement of GC in the next 1000 bases.  Note
    # the use of -dataSpan=1000 on the wigAsciiToBinary to account for
    # this type of data.  You want your dataSpan here to be an exact
    # multiple of your original dataSpan (5*200=1000) and on the order
    # of at least 1000, doesn't need to go too high.  For data that is
    # originally at 1 base per value, a convenient span is: -dataSpan=1024
    # A new set of result files ends up in ./wigData5_1K/*.wi[gb]

    cat << '_EOF_' > runZoom.sh
#!/bin/sh
                                                                                
mkdir -p wigData5_1K
mkdir -p dataLimits5_1K
                                                                                
for n in /cluster/data/ci1/nib/Scaff*.nib
do
        c=`basename ${n} | sed -e "s/.nib//"`
        C=`echo $c | sed -e "s/Scaffold_//"`
        echo -n "working on ${c} - ${C} ... "
        hgGcPercent -chr=${c} -doGaps \
                -file=stdout -win=5 ci1 /cluster/data/ci1/nib | grep -w GC | \
                awk '{printf "%d\t%.1f\n", $2+1, $5/10.0}' | \
        wigZoom -dataSpan=1000 stdin | wigAsciiToBinary \
        -dataSpan=1000 -chrom=${c} -wibFile=wigData5_1K/gc5Base_${C}_1K \
        -name=${C} stdin 2> dataLimits5_1K/${C}
echo "done"
done
'_EOF_'
    chmod +x runZoom.sh
    time ./runZoom.sh

    #   Then load these .wig files into the same database as above
    ssh hgwdev
    hgLoadWiggle -pathPrefix=/gbdb/ci1/wib/gc5Base -oldTable ci1 \
	gc5Base wigData5_1K/*.wig
    # and symlink these .wib files into /gbdb
    mkdir /gbdb/ci1/wib/gc5Base
    #	arg list too long to do this simply
    cd wigData5_1K
    ls *.wib | while read FN
do
    ln -s `pwd`/${FN} /gbdb/ci1/wib/gc5Base
    echo ${FN}
done


# SWAP CI1-CIOSAV1 BLASTZ 
    ssh kksilo
    mkdir -p /cluster/data/ci1/bed/blastz.cioSav1.swap.2004-08-03
    ln -s /cluster/data/ci1/bed/blastz.cioSav1.swap.2004-08-03 /cluster/data/ci1/bed/blastz.cioSav1
    cd /cluster/data/ci1/bed/blastz.cioSav1
    set aliDir = /cluster/data/cioSav1/bed/blastz.ci1
    cp $aliDir/S1.len S2.len
    cp $aliDir/S2.len S1.len
    cat $aliDir/axtChrom/*.axt | axtSwap stdin $aliDir/S1.len $aliDir/S2.len stdout | axtSort stdin all.axt
    nice axtChain -verbose=0 all.axt /iscratch/i/ci1/nib /iscratch/i/cioSav1/nib all.chain &
    wait

    # Load chains into database
    ssh hgwdev
    cd /cluster/data/ci1/bed/blastz.cioSav1
    hgLoadChain ci1 chainCioSav1 all.chain
    rm *.tab

# NET CIOSAV1 BLASTZ 
    ssh kksilo
    cd /cluster/data/ci1/bed/blastz.cioSav1
    chainPreNet all.chain S1.len S2.len stdout | chainNet stdin -minSpace=1 S1.len S2.len stdout /dev/null 
    	\ | netSyntenic stdin noClass.net

    # Add classification info using db tables:
    ssh hgwdev
    cd /cluster/data/ci1/bed/blastz.cioSav1
    netClass -noAr noClass.net ci1 cioSav1 cioSav1.net

    exit
    # back at kksilo
    # Make a 'syntenic' subset:
    rm noClass.net 
    netFilter -syn cioSav1.net > cioSav1Syn.net

    # Load the nets into database 
    ssh hgwdev
    cd /cluster/data/ci1/bed/blastz.cioSav1
    netFilter -minGap=10 cioSav1.net |  hgLoadNet ci1 netCioSav1 stdin
    netFilter -minGap=10 cioSav1Syn.net  | hgLoadNet ci1 netSyntenyCioSav1 stdin

# MAKE Human Proteins track (hg17)
    ssh kksilo
    mkdir -p /cluster/data/ci1/bed/tblastn.hg17KG
    cd /cluster/data/ci1/bed/tblastn.hg17KG
    ls -1S /iscratch/i/ci1/blastDb/*.nsq | sed "s/\.nsq//" > ciona.lst

    mkdir kgfa
    split -l 100 /cluster/data/hg17/bed/blat.hg17KG/hg17KG.psl kgfa/kg
    cd kgfa
    for i in *; do pslxToFa $i $i.fa; rm $i; done
    cd ..
    ls -1S kgfa/*.fa > kg.lst
    mkdir blastOut
    for i in `cat kg.lst`; do  mkdir blastOut/`basename $i .fa`; done
    cat << '_EOF_' > blastGsub
#LOOP
blastSome $(path1) {check in line $(path2)} {check out exists blastOut/$(root2)/q.$(root1).psl } 
#ENDLOOP
'_EOF_'
    cat << '_EOF_' > blastSome
#!/bin/sh
BLASTMAT=/iscratch/i/blast/data
export BLASTMAT
f=/tmp/`basename $3`
for eVal in 0.01 0.001 0.0001 0.00001 0.000001 1E-09 1E-11
do
if /scratch/blast/blastall -M BLOSUM80 -m 0 -F no -e $eVal -p tblastn -d $1 -i $2 -o $f.8
then
        mv $f.8 $f.1
        break;
fi
done
if test -f  $f.1
then
if /cluster/bin/i386/blastToPsl $f.1 $f.2
then
        liftUp -nosort -type=".psl" -pslQ -nohead $3.tmp /cluster/data/hg17/bed/blat.hg17KG/protein.lft warn $f.2
	if pslCheck -prot $3.tmp
	then
	    mv $3.tmp $3
	    rm -f $f.1 $f.2 
	fi
        exit 0
    fi
fi
rm -f $f.1 $f.2 $3.tmp $f.8
exit 1
'_EOF_'

    chmod +x blastSome
    gensub2 ciona.lst kg.lst blastGsub blastSpec

    ssh kk
    cd /cluster/data/ci1/bed/tblastn.hg17KG
    para create blastSpec
    para push
# Completed: 16520 of 16520 jobs
# CPU time in finished jobs:    2469697s   41161.62m   686.03h   28.58d  0.078 y
# IO & Wait Time:               2404362s   40072.69m   667.88h   27.83d  0.076 y
# Average job time:                 295s       4.92m     0.08h    0.00d
# Longest job:                     3067s      51.12m     0.85h    0.04d

    cat << '_EOF_' > chainGsub
#LOOP
chainSome $(path1)
#ENDLOOP
'_EOF_'

    cat << '_EOF_' > chainSome
(cd $1; cat q.*.psl | simpleChain -prot -outPsl -maxGap=7500 stdin ../c.`basename $1`.psl)
'_EOF_'
    chmod +x chainSome

    ls -1dS `pwd`/blastOut/kg?? > chain.lst
    gensub2 chain.lst single chainGsub chainSpec

    ssh kki
    cd /cluster/data/ci1/bed/tblastn.hg17KG
    para create chainSpec
    para push
# Completed: 413 of 413 jobs
# CPU time in finished jobs:         43s       0.72m     0.01h    0.00d  0.000 y
# IO & Wait Time:                  1752s      29.19m     0.49h    0.02d  0.000 y
# Average job time:                   4s       0.07m     0.00h    0.00d
# Longest job:                       12s       0.20m     0.00h    0.00d
# Submission to last job:           243s       4.05m     0.07h    0.00d

    exit
    # back to eieio
    cd /cluster/data/ci1/bed/tblastn.hg17KG/blastOut
    for i in kg??
    do 
	awk "(\$13 - \$12)/\$11 > 0.6 {print}" c.$i.psl > c60.$i.psl
	sort -rn c60.$i.psl | pslUniq stdin u.$i.psl
	awk "((\$1 / \$11) ) > 0.60 { print   }" c60.$i.psl > m60.$i.psl
	echo $i
    done

    cat u.*.psl m60* | sort -T /tmp -k 14,14 -k 17,17n -k 17,17n  | uniq  > ../blastHg17KG.psl
    cd ..

    ssh hgwdev
    cd /cluster/data/ci1/bed/tblastn.hg17KG
    hgLoadPsl ci1 blastHg17KG.psl
    echo "drop index tName on blastHg17KG ;" | hgsql ci1
    echo "drop index tName_2 on blastHg17KG ;" | hgsql ci1
    echo "drop index tName_3 on blastHg17KG ;" | hgsql ci1
    echo "create index tName on blastHg17KG (tName(13), bin) ;" | hgsql ci1
    echo "create index tName_2 on blastHg17KG  (tName(13), tStart);" | hgsql ci1
    echo "create index tName_3 on blastHg17KG  (tName(13), tEnd);" | hgsql ci1
    exit

    # back to kksilo
    rm -rf blastOut

# End tblastn
