This file describes how we made the browser database on the
Mouse Feb 2002 freeze assembly.

CREATING DATABASE AND STORING mRNA/EST SEQUENCE AND AUXILIARRY INFO 

o - Create the database.
     - ssh cc94
     - hgap
     - create database hg3;
     - quit
o - Store the mRNA (non-alignment) info in database.
     - hgLoadRna new hg3
     - hgLoadRna add hg3 /projects/cc/hg/h/mrna/mrna.fa ~/cc/h/mrna/mrna.ra
     - hgLoadRna add hg3 /projects/cc/hg/h/mrna/est.fa ~/cc/h/mrna/est.ra

STORING O+O SEQUENCE AND ASSEMBLY INFORMATION

o - Create packed chromosome sequence files and put in database
     - mysql hg3 < ~/src/hg/lib/chromInfo.sql
     - cd /projects/cc/hg/gs.3/oo.15/allctgs
     - hgNibSeq hg3 /projects/cc/hg/gs.3/oo.15/nib *.fa

o - Store o+o info in database.
     - cd ~/cc/gs.3/oo.15
     - jkStuff/liftAgp.sh
     - jkStuff/liftGl.sh ooGreedy.74.gl
     - hgGoldGapGl hg3 ~/cc/gs.3/oo.15 
     - cd ~/cc/gs.3
     - hgClonePos hg3 oo.15 ffa/sequence.inf .
     - hgContigPos hg3 oo.15

REPEAT MASKING 

o - Repeat mask the assembled FPC contigs as so:
      - rmskOoJobs /projects/cc/hg/gs.3/oo.15 /projects/cc/hg/gs.3/oo.15/jkStuff/rmsk2
      - cd /projects/cc/hg/gs.3/oo.15/jkStuff/rmsk2
      - edit all.con to split into two files first.con and second.con.  Replicate
        header in second.con.  (This step necessary because of a hopefully temporary
	limitation in the number of jobs you can submit at once.)
      - condor_submit first.con
      - condor_submit second.con
      - Wait for 12 hours.
      - Rescue the condor jobs that are still going by ssh to the machines
        still running, doing a 'cd /var/tmp/hg/jk/rm.gs1' and then a
	'ls -lt | more' to see the contig they are working on.  Then
	copying the .fa.masked file to the contig, and running 'fakeOut'
	to generate a .out file from it.

o - Load RepeatMasker output into database:
      - ssh cc
      - cd /projects/cc/hg/gs.3/oo.15
      - jkStuff/liftOut.sh
      - hgLoadOut ?/*.out ??/*.out
        (Ignore the "Strange perc. field warnings.  Maybe mention them
	 to Arian someday.)

MAKING AND STORING mRNA AND EST ALIGNMENTS

o - Generate mRNA and EST alignments as so:
      - cdnaOnOoJobs /projects/cc/hg/gs.3/oo.15 /projects/cc/hg/gs.3/oo.15/jkStuff/postPsl
      - cd /projects/cc/hg/gs.3/oo.15/jkStuff/postPsl
      - edit all.con to split into five files.
      - run 'slowly.sh' to run condor_submit on each with a ten minute delay between.
        (Edit out initial 7 hour delay.)
      - Wait 12 hours or so.

o - Process mRNA alignments into near best in genome.
      - ssh cc
      - cd /projects/cc/hg/gs.3/oo.15
      - cat */lift/*.lft > jkStuff/liftAll.lft
      - cd /projects/cc/hg/gs.3/oo.15/jkStuff/postPsl/psl
      - mkdir /projects/cc/hg/gs.3/oo.15/psl
      - mkdir /projects/cc/hg/gs.3/oo.15/psl/mrnaRaw
      - ln */*.mrna.psl *.mrna.psl !$
      - cd /projects/cc/hg/gs.3/oo.15/psl
      - rm /scratch/jk/*
      - pslSort dirs mrnaGoodRaw.psl /scratch/jk mrnaRaw
      - pslReps mrnaGoodRaw.psl mrnaBestRaw.psl mrnaBestRaw.psr
      - liftUp all_mrna.psl ../jkStuff/liftAll.lft mrnaBestRaw.psl
        (Ignore warnings about chrXX not being in liftSpec file.)
      - pslSortAcc nohead mrna /scratch/jk all_mrna.psl
      - check mrna dir looks good.
      - rm -r mrnaRaw mrnaGoodRaw.psl mrnaBestRaw.psl mrnaBestRaw.psr

o - Load mRNA alignments into database.
      - ssh cc94
      - cd /projects/cc/hg/gs.3/oo.15/psl/mrna
      - foreach i (*.psl)
            mv $i $i:r_mrna.psl
	end
      - hgLoadPsl hg3 *.psl
      - cd ..
      - remove header lines from all_mrna.psl
      - hgLoadPsl hg3 all_mrna.psl

o - Process EST alignments into chromosome-oriented near best in genome.
    (In general this follows the same pattern as the mRNA alignments.)
      - ssh cc
      - cd /projects/cc/hg/gs.3/oo.15/jkStuff/postPsl/psl
      - mkdir /projects/cc/hg/gs.3/oo.15/psl/estRaw
      - ln */*.est.psl *.est.psl !$
      - cd /projects/cc/hg/gs.3/oo.15/psl
      - rm /scratch/jk/*
      - pslSort dirs estGoodRaw.psl /scratch/jk estRaw
      - pslReps estGoodRaw.psl estBestRaw.psl estBestRaw.psr
      - liftUp all_est.psl ../jkStuff/liftAll.lft estBestRaw.psl
        (Ignore warnings about chrXX not being in liftSpec file.)
      - pslSortAcc nohead est /scratch/jk all_est.psl
      - check est dir looks good.
      - rm -r estRaw estGoodRaw.psl estBestRaw.psl estBestRaw.psr

o - Load EST alignments into database.
      - ssh cc94
      - cd /projects/cc/hg/gs.3/oo.15/psl/est
      - foreach i (*.psl)
            mv $i $i:r_est.psl
	end
      - hgLoadPsl hg3 *.psl
      - cd ..
      - remove header lines from all_est.psl
      - hgLoadPsl hg3 all_est.psl

LOADING IN EXTERNAL FILES

o - Load rnaGene table
      - cd /projects/cc/hg/gs.3/oo.15/bed
      - mkdir rnaGene
      - cd rnaGene
      - download data from ftp.genetics.wustl.edu/pub/eddy/pickup/ncrna-oo15.gff.gz
      - unzip
      - liftUp chrom.gff ../../jkStuff/liftAll.lft ncrna-oo15.gff
      - grep chr21 ncrna-oo15.gff >> chrom.gff
      - grep chr22 ncrna-oo15.gff >> chrom.gff
      - hgRnaGenes hg3 chrom.gff

