# for emacs: -*- mode: sh; -*-

 
# This file describes building the browser database for the archaeal
# species Methanosarcina acetivorans.

# DOWNLOAD SEQUENCE FROM GENBANK (DONE 9/30/05)

    ssh hgwdev
    mkdir /cluster/store5/archae/picrTorr1
    ln -s /cluster/store5/archae/picrTorr1 /cluster/data/picrTorr1
    cd /cluster/data/picrTorr1
    cp /projects/lowelab/db/Bacteria/Picrophilus_torridus_DSM_9790/Picr_torr_DSM_9790.fa chr.fa
    # Edit header of chr.fa to '> picrTorr1'

# CREATE DATABASES AND A BUNCH OF INITIAL STUFF (DONE 9/30/05)

    echo 'create database picrTorr1' | hgsql ''
    cd /cluster/data/picrTorr1
    mkdir nib
    faToNib chr.fa nib/chr.nib
    hgNibSeq picrTorr1 /cluster/data/picrTorr1/nib chr.fa
    faSize -detailed chr.fa > chrom.sizes
    mkdir -p /gbdb/picrTorr1/nib
    echo "create table grp (PRIMARY KEY(NAME)) select * from hg16.grp" \
	    | hgsql picrTorr1
    echo 'INSERT INTO dbDb \
        (name, description, nibPath, organism, \
                defaultPos, active, orderKey, genome, scientificName, \
                htmlPath, hgNearOk) values \
        ("picrTorr1", "June 2004", "/gbdb/picrTorr1/nib", "Picrophilus torridus", \
               "chr:500000-550000", 1, 282, "Picrophilus torridus", \
                "Picrophilus torridus DSM", "/gbdb/picrTorr1/html/description.html", \
                0);' \
      | hgsql hgcentraltest
    echo 'INSERT INTO defaultDb (genome, name) values ("Picrophilus torridus", "picrTorr1");' \
      | hgsql hgcentraltest
    echo 'INSERT INTO genomeClade (genome, clade, priority) values ("Picrophilus torridus", "archaea",282);'  \
      | hgsql hgcentraltest

    cd ~/kent/src/hg/makeDb/trackDb
    # add the trackDb directories
    mkdir -p archae/picrTorr1
    cvs add archae/picrTorr1
    cvs commit archae/picrTorr1

# GC20BASE (DONE - 9/30/05)
    mkdir -p /cluster/data/picrTorr1/bed/gc20Base
    cd /cluster/data/picrTorr1/bed/gc20Base
    hgGcPercent -wigOut -doGaps -file=stdout -win=20 picrTorr1 \
        /cluster/data/picrTorr1/nib | wigEncode stdin gc20Base.wig gc20Base.wib
    cd /cluster/data/picrTorr1/bed/gc20Base
    mkdir /gbdb/picrTorr1/wib
    ln -s `pwd`/gc20Base.wib /gbdb/picrTorr1/wib
    hgLoadWiggle -pathPrefix=/gbdb/picrTorr1/wib picrTorr1 gc20Base gc20Base.wig
    #	verify index is correct: check cardinality column
    hgsql picrTorr1 -e "show index from gc20Base;"

# TANDEM REPEAT MASKER (DONE - 9/30/05)

    mkdir -p /cluster/data/picrTorr1/bed/simpleRepeat
    cd /cluster/data/picrTorr1
    trfBig chr.fa /dev/null -bedAt=/cluster/data/picrTorr1/bed/simpleRepeat/chr.bed
    cd /cluster/data/picrTorr1/bed/simpleRepeat
    cat chr.bed | sed -e 's/picrTorr1/chr/' > temp
    mv temp chr.bed
    hgLoadBed picrTorr1 simpleRepeat *.bed -sqlTable=/cluster/home/kpollard/kent/src/hg/lib/simpleRepeat.sql

# TIGR GENES (FINISHED 10/3/05)
    # First go to http://www.tigr.org/tigr-scripts/CMR2/gene_attribute_form.dbi
    # and fill out the web form as follows:
    #   - Pick "Retrieve attributes for the specified DNA feature within a specific 
    #     organism and/or a specific role category".
    #       * Pick genome, and "Primary and TIGR annotation ORFs" 
    #         from the 1st and 3rd box.
    #       * Select everything from "Choose TIGR Annotation Gene Attributes"
    #       * Select "Primary Locus Name" from "Choose Primary Annotation Gene Attributes"
    #       * Select everything from "Choose Other Gene Attributes"
    #   - Click submit, and click save as tab-delimited file.

    mkdir /cluster/data/picrTorr1/bed/tigrCmrORFs
    cp picrTorr1-tigr.tab /cluster/data/picrTorr1/bed/tigrCmrORFs
    cd /cluster/data/picrTorr1/bed/tigrCmrORFs
    ~aamp/bin/i386/tigrCmrToBed picrTorr1-tigr.tab picrTorr1-tigr.bed
    cat picrTorr1-tigr.bed | sed -e 's/chr1/chr/' > temp
    mv temp picrTorr1-tigr.bed
    hgLoadBed -tab picrTorr1 tigrCmrGene picrTorr1-tigr.bed -sqlTable=/cluster/home/kpollard/kent/src/hg/lib/tigrCmrGene.sql
    echo "rename table tigrCmrGene to tigrCmrORFs;" | hgsql picrTorr1

# DESCRIPTION PAGE (DONE 10/3/05)

    # Write ~/kent/src/hg/makeDb/trackDb/archae/picrTorr1/description.html
    chmod a+r ~/kent/src/hg/makeDb/trackDb/archae/picrTorr1/description.html
    # Check it in.
    mkdir -p /cluster/data/picrTorr1/html/
    cp ~/kent/src/hg/makeDb/trackDb/archae/picrTorr1/description.html /cluster/data/picrTorr1/html/description.html
    mkdir /gbdb/picrTorr1/html
    ln -s /cluster/data/picrTorr1/html/description.html /gbdb/picrTorr1/html/

# MULTIZ with therVolc, therAcid, and ferrAcid
# DONE (10/11/05), kpollard

    cd /cluster/data/picrTorr1/bed/
    mkdir conservation
    cd conservation
    cp /cluster/data/therAcid1/bed/conservation/HoxD55.q .
    cp /cluster/data/therAcid1/bed/conservation/*.chr .
    cp /cluster/data/therAcid1/bed/conservation/*.nib .
    cp /cluster/data/therAcid1/bed/conservation/*.2bit .

    #chrom sizes
    faSize -detailed *.chr > chrom.sizes

    #blastz 
    blastz picrTorr1.chr therAcid1.chr Q=HoxD55.q > picrTorr1-therAcid1.lav
    blastz picrTorr1.chr therVolc1.chr Q=HoxD55.q > picrTorr1-therVolc1.lav
    blastz picrTorr1.chr ferrAcid1.chr Q=HoxD55.q > picrTorr1-ferrAcid1.lav

    /cluster/bin/i386/lavToAxt picrTorr1-therAcid1.lav . . picrTorr1-therAcid1.axt
    /cluster/bin/i386/lavToAxt picrTorr1-therVolc1.lav . . picrTorr1-therVolc1.axt
    /cluster/bin/i386/lavToAxt picrTorr1-ferrAcid1.lav . ferrAcid1.2bit picrTorr1-ferrAcid1.axt

    axtBest picrTorr1-therAcid1.axt picrTorr1.chr -winSize=500 -minScore=5000 picrTorr1-therAcid1-best.axt
    axtBest picrTorr1-therVolc1.axt picrTorr1.chr -winSize=500 -minScore=5000 picrTorr1-therVolc1-best.axt
    axtBest picrTorr1-ferrAcid1.axt picrTorr1.chr -winSize=500 -minScore=5000 picrTorr1-ferrAcid1-best.axt

    axtToMaf picrTorr1-therAcid1-best.axt chrom.sizes chrom.sizes picrTorr1-therAcid1.maf
    axtToMaf picrTorr1-therVolc1-best.axt chrom.sizes chrom.sizes picrTorr1-therVolc1.maf
    axtToMaf picrTorr1-ferrAcid1-best.axt chrom.sizes chrom.sizes picrTorr1-ferrAcid1.maf

    #multiz
    #remove extra header lines
    multiz picrTorr1-therAcid1.maf picrTorr1-therVolc1.maf - > picrTorr1-therAcid1-therVolc1.maf
    multiz picrTorr1-ferrAcid1.maf picrTorr1-therAcid1-therVolc1.maf - > picrTorr1-therAcid1-therVolc1-ferrAcid1.maf

    #phyloHMM
    /cluster/bin/phast/msa_view -i MAF -M picrTorr1.chr -o SS picrTorr1-therAcid1-therVolc1-ferrAcid1.maf > picrTorr1.ss
    /cluster/bin/phast/phyloFit -i SS picrTorr1.ss -t "(ferrAcid1,(picrTorr1,(therAcid1,therVolc1)))" -o PtTaTvFa
    /cluster/bin/phast/msa_view -i SS picrTorr1.ss --summary-only
    #add GC content to next call
    /cluster/bin/phast/phastCons picrTorr1.ss PtTaTvFa.mod --gc 0.3966 \
    --target-coverage 0.7 --estimate-trees ther-tree \
    --expected-lengths 25 --no-post-probs --ignore-missing \
    --nrates 1,1
    /cluster/bin/phast/phastCons picrTorr1.ss \
    ther-tree.cons.mod,ther-tree.noncons.mod \
    --target-coverage 0.7 --expected-lengths 25 \
    --viterbi picrTorr1-elements.bed --score \
    --require-informative 0 --seqname chr > cons.dat
    wigEncode cons.dat phastCons.wig phastCons.wib
    /cluster/bin/phast/draw_tree PtTaTvFa.mod > ther-tree.ps 
    #compare to therAcid1 tree.

    #move data
    mkdir wib
    mv phastCons.wib wib/phastCons.wib
    mv phastCons.wig wib/phastCons.wig
    ln -s /cluster/data/picrTorr1/bed/conservation/wib/phastCons.wib /gbdb/picrTorr1/wib
    mkdir /gbdb/picrTorr1/pwMaf
    mkdir -p otherSpp/therAcid1 otherSpp/therVolc1 otherSpp/ferrAcid1
    mv picrTorr1-therVolc1.maf otherSpp/therVolc1/chr.maf
    mv picrTorr1-therAcid1.maf otherSpp/therAcid1/chr.maf
    mv picrTorr1-ferrAcid1.maf otherSpp/ferrAcid1/chr.maf
    ln -s /cluster/data/picrTorr1/bed/conservation/otherSpp/therVolc1 /gbdb/picrTorr1/pwMaf/therVolc1_pwMaf
    ln -s /cluster/data/picrTorr1/bed/conservation/otherSpp/ferrAcid1 /gbdb/picrTorr1/pwMaf/ferrAcid1_pwMaf
    ln -s /cluster/data/picrTorr1/bed/conservation/otherSpp/therAcid1 /gbdb/picrTorr1/pwMaf/therAcid1_pwMaf
    mkdir multiz
    mv picrTorr1-therAcid1-therVolc1-ferrAcid1.maf multiz/chr.maf
    ln -s /cluster/data/picrTorr1/bed/conservation/multiz /gbdb/picrTorr1/multizPtTaTvFa

    #load
    hgLoadWiggle picrTorr1 phastCons /cluster/data/picrTorr1/bed/conservation/wib/phastCons.wig
    hgLoadMaf -warn picrTorr1 multizPtTaTvFa
    hgLoadMaf -warn picrTorr1 therVolc1_pwMaf -pathPrefix=/gbdb/picrTorr1/pwMaf/therVolc1_pwMaf
    hgLoadMaf -warn picrTorr1 ferrAcid1_pwMaf -pathPrefix=/gbdb/picrTorr1/pwMaf/ferrAcid1_pwMaf
    hgLoadMaf -warn picrTorr1 therAcid1_pwMaf -pathPrefix=/gbdb/picrTorr1/pwMaf/therAcid1_pwMaf
    hgLoadBed picrTorr1 phastConsElements picrTorr1-elements.bed 

    #trackDb
    cd ~/kent/src/hg/makeDb/trackDb/archae/picrTorr1
    #trackDb.ra entry
    # track multizPtTaTvFa
    # shortLabel Conservation
    # longLabel Thermoplasma/Ferroplasma/Picrophilus multiz alignments
    # group compGeno
    # priority 10.0
    # visibility pack
    # type wigMaf 0.0 1.0
    # maxHeightPixels 100:40:11
    # wiggle phastCons
    # yLineOnOff Off
    # autoScale Off
    # pairwise pwMaf
    # speciesOrder therAcid1 therVolc1 ferrAcid1
    cvs add trackDb.ra
    cvs commit -m "New multiz track" trackDb.ra
    #html page
    cvs add multizPtTaTvFa.html
    cvs commit -m "Details page for multiz track" multizPtTaTvFa.html
