#!/bin/csh

ssh hgwdev
cd /cluster/data/encode
mkdir -p ortho2/lifted
cd ortho2/lifted
hgsql hg16 -s -e "SELECT * FROM encodeRegions ORDER BY NAME" > hg16.bed


# CHIMP
ssh hgwdev
cd /cluster/data/encode/ortho2
cd lifted
mkdir -p panTro1
cd panTro1
# used chains from reciprocal best net -- these were more
# chopped up, but regions in these don't span unbridged gaps
# minMatch .7 leaves ENr324 and ENm011 unmapped
# Lowered match threshold until region coverage looked good (to .4)
# (except m11, for some reason),  with minimum of fragmentation
# Same at .3 and .2 -- at the point where regions look reasonable,
# lowering the threshold in this way doesn't change them.
# NOTE: reducing minSizeQ to 10000, due to fragmented panTro1 assembly
cat > panTro1.csh << 'EOF'
/bin/nice ~kate/bin/i386/liftOver -minMatch=.01 -minSizeT=4000 \
    -minSizeQ=10000 -multiple -chainTable=hg16.rBestChainPanTro1 \
    ../hg16.bed /cluster/data/hg16/bed/liftOver/hg16TopanTro1.chain \
    panTro1.unmerged.bed5 panTro1.unmapped.bed -verbose=2
~kate/bin/i386/liftOverMerge -mergeGap=20000 -verbose=2 \
        panTro1.unmerged.bed5 panTro1.bed5
'EOF'
    #/cluster/data/hg16/bed/liftOver/hg16ToPanTro1.newId.over.chain \
# << for emacs
csh panTro1.csh >&! panTro1.log
wc -l panTro1.*bed*
    # 71 panTro1.bed5
    # 79 panTro1.unmerged.bed5

    # OLD 77 panTro1.bed5
    #  OLD 0 panTro1.unmapped.bed
awk '{printf "%s\t%s\t%s\t%s_%s\n", $1, $2, $3, $4, $5}' \
        panTro1.bed5 > panTro1.bed
cd ..
ln -s panTro1/panTro1.bed .

ssh hgwdev
cd /cluster/data/encode/ortho2/lifted
hgLoadBed -noBin panTro1 encodeRegions2 panTro1.bed


# RAT
ssh kksilo
cd /cluster/data/encode/ortho2/lifted
mkdir -p rn3
cd rn3
cat > rn3.csh << 'EOF'
/bin/nice ~kate/bin/i386/liftOver -minMatch=.01 -minSizeT=4000 \
    -minSizeQ=20000 -multiple -chainTable=hg16.chainRn3 \
    ../hg16.bed /cluster/data/hg16/bed/liftOver/hg16Torn3.chain \
    rn3.unmerged.bed5 rn3.unmapped.bed -verbose=2
~kate/bin/i386/liftOverMerge -mergeGap=20000 -verbose=2 \
        rn3.unmerged.bed5 rn3.bed5
'EOF'
# << for emacs
csh rn3.csh >&! rn3.log
wc -l rn3.*bed*
    #  58 rn3.bed5
    #  0 rn3.unmapped.bed
awk '{printf "%s\t%s\t%s\t%s_%s\n", $1, $2, $3, $4, $5}' \
        rn3.bed5 > rn3.bed
cd ..
ln -s rn3/rn3.bed .

ssh hgwdev
cd /cluster/data/encode/ortho2/lifted
hgLoadBed -noBin rn3 encodeRegions2 rn3.bed

# CHICKEN
ssh kksilo
cd /cluster/data/encode/ortho2/lifted
mkdir -p galGal2
cd galGal2
cat > galGal2.csh << 'EOF'
# allow smaller chain sizes in query (only 1K, vs. 20K in close species)
/bin/nice ~kate/bin/i386/liftOver -minMatch=.01 minSizeT=4000 \
        -multiple -minSizeQ=1000 -chainTable=hg16.chainGalGal2 \
    ../hg16.bed /cluster/data/hg16/bed/liftOver/hg16TogalGal2.chain \
    galGal2.unmerged.bed5 galGal2.unmapped.bed -verbose=2
~kate/bin/i386/liftOverMerge -mergeGap=20000 -verbose=2 \
        galGal2.unmerged.bed5 galGal2.bed5
'EOF'
# << for emacs
csh galGal2.csh >&! galGal2.log
wc -l galGal2.*bed*
    # 75 galGal2.bed5
    #  6 galGal2.unmapped.bed
    # maps all but r111 and r212, r222
awk '{printf "%s\t%s\t%s\t%s_%s\n", $1, $2, $3, $4, $5}' \
        galGal2.bed5 > galGal2.bed
cd ..
ln -s galGal2/galGal2.bed .

ssh hgwdev
cd /cluster/data/encode/ortho2/lifted
hgLoadBed -noBin galGal2 encodeRegions2 galGal2.bed

# MOUSE MM5
ssh kksilo
cd /cluster/data/encode/ortho2/lifted
mkdir -p mm5
cd mm5
cat > mm5.csh << 'EOF'
/bin/nice ~kate/bin/i386/liftOver -minMatch=.01 -minSizeT=4000 \
    -minSizeQ=20000 -multiple \
    ../hg16.bed /cluster/data/hg16/bed/liftOver/hg16ToMm5.chain \
    mm5.unmerged.bed5 mm5.unmapped.bed -verbose=2
~kate/bin/i386/liftOverMerge -mergeGap=20000 -verbose=2 \
        mm5.unmerged.bed5 mm5.bed5
'EOF'
# << for emacs
csh mm5.csh >&! mm5.log
wc -l mm5.*bed*
awk '{printf "%s\t%s\t%s\t%s_%s\n", $1, $2, $3, $4, $5}' \
        mm5.bed5 > mm5.bed
cd ..
ln -s mm5/mm5.bed .

# load into database
ssh hgwdev
cd /cluster/data/encode/ortho2/lifted
hgLoadBed -noBin mm5 encodeRegions2 mm5.bed


# DOG
ssh kksilo
cd /cluster/data/encode/ortho2/lifted
mkdir -p canFam1
cd canFam1
cat > canFam1.csh << 'EOF'
/bin/nice ~kate/bin/i386/liftOver -minMatch=.01 -minSizeT=4000 \
    -minSizeQ=20000 -multiple -chainTable=hg16.chainCanFam1 \
    ../hg16.bed /cluster/data/hg16/bed/liftOver/hg16ToCanFam1.chain \
    canFam1.unmerged.bed5 canFam1.unmapped.bed -verbose=2
~kate/bin/i386/liftOverMerge -mergeGap=20000 -verbose=2 \
        canFam1.unmerged.bed5 canFam1.bed5
'EOF'
# << for emacs
csh canFam1.csh >&! canFam1.log
wc -l canFam1.*bed*
    #   51 canFam1.bed5
    #    0 canFam1.unmapped.bed
awk '{printf "%s\t%s\t%s\t%s_%s\n", $1, $2, $3, $4, $5}' \
        canFam1.bed5 > canFam1.bed
cd ..
ln -s canFam1/canFam1.bed .

# load into database
ssh hgwdev
cd /cluster/data/encode/ortho2/lifted
hgLoadBed -noBin canFam1 encodeRegions2 canFam1.bed


# MOUSE MM3 -- for testing only
mkdir -p tests
cd tests
# ????? Not sure if .01 used or not
~kate/bin/i386/liftOver -minMatch=.01 -minSizeT=4000 \
        -minSizeQ=20000 -multiple \
        /cluster/data/encode/ortho2/lifted/hg16.bed \
    /cluster/data/hg16/bed/liftOver/hg16Tomm3.chain \
    mm3.bed5 mm3.unmapped.bed
awk '{printf "%s\t%s\t%s\t%s_%s\n", $1, $2, $3, $4, $5}' mm3.bed5 \
        > mm3.bed
# For now, load on top of freeze one, for Daryl's comparison page maker
hgLoadBed mm3 encodeRegions mm3.bed -noBin


# FUGU (IN PROGRESS - problem with net ?)
ssh kksilo
cd /cluster/data/encode/ortho2/lifted.noDups
mkdir -p fr1
cd fr1
cat > fr1.csh << 'EOF'
/bin/nice ~kate/bin/i386/liftOver -minMatch=.01 -minSizeT=4000 \
    -minSizeQ=1000 -multiple \
    ../hg16.bed /cluster/data/hg16/bed/liftOver/hg16ToFr1.chain \
    fr1.unmerged.bed5 fr1.unmapped.bed -verbose=2
~kate/bin/i386/liftOverMerge -mergeGap=20000 -verbose=2 \
        fr1.unmerged.bed5 fr1.bed5
'EOF'
# << for emacs
csh fr1.csh >&! fr1.log
wc -l fr1.*bed*
    #   51 fr1.bed5
    #    0 fr1.unmapped.bed
awk '{printf "%s\t%s\t%s\t%s_%s\n", $1, $2, $3, $4, $5}' \
        fr1.bed5 > fr1.bed
cd ..
ln -s fr1/fr1.bed .

# load into database
ssh hgwdev
cd /cluster/data/encode/ortho2/lifted
hgLoadBed -noBin fr1 encodeRegions2 fr1.bed



# GET SEQUENCES FOR SPLIT REGIONS (FOR VALIDATION BY ELLIOT MARGULIES)

ssh kksilo
cd /cluster/data/encode/ortho2/lifted
grep _2 *.bed | sed -e 's/_2//' -e 's/.bed:.*EN/.EN/' > split2.txt
wc -l split2.txt
#  50 
grep _3 *.bed | sed -e 's/_3//' -e 's/.bed:.*EN/.EN/' > split3.txt
wc -l split3.txt
# 18

ssh hgwdev
cd /cluster/data/encode/ortho2/lifted
mkdir fa.3+
csh regionSeq split3.txt encodeRegions2 fa.3+
cd fa.3+
tar cvfz /usr/local/apache/htdocs/kate/encode/orthologs.split3.fa.gz *fa

