!/bin/csh -f
exit

#############################################################################
# This is the make doc for hg18 ENCODE

#############################################################################
# Changes to ENCODE groups (2007-07-31 kate)

# Change labels for Transcripts and Chrom 
hgsql hg18 -e "UPDATE grp SET label='ENCODE Transcription' where name='encodeTxLevels'"
hgsql hg18 -e "UPDATE grp SET label='ENCODE Chromatin Structure' where name='encodeChrom'"

# Merge CompGeno and Var groups (few tracks)
hgsql hg18 -e "UPDATE grp SET label='ENCODE Comparative Genomics and Variation' where name='encodeCompGeno'"
hgsql hg18 -e "DELETE FROM grp where name='encodeVariation'"

# Retire obsolete group
hgsql hg18 -e "DELETE FROM grp where name='encode'"

#############################################################################
# Create encodeRegions table

    ssh hgwdev
    cd /cluster/data/encode
    mkdir convertHg18
    ln -s convertHg18 hg18
    ln -s convertHg17 hg17
    cd hg18
    hgsql hg17 -N -e "SELECT * FROM encodeRegions ORDER BY name" | \
        liftOver stdin /gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz \
                encodeRegions.bed encodeRegions.unmapped
    hgLoadBed hg18 encodeRegions encodeRegions.bed
    cp encodeRegions.bed ~/browser/ENCODE/build35_regions.bed
    # cvs add, install in /usr/local/apache/htdocs/ENCODE

##########################################################################
# DOWNLOADS (2007-09-21 kate)

    ssh hgwdev
    cd /usr/local/apache/htdocs/goldenPath/hg18
    mkdir -p encode
    cd encode
    # release terms
    cp ../../hg17/encode/README.txt .
    # annotation database
    # request admin set up automated database dump
    mkdir database
    # auxiliary data files
    mkdir datafiles 
    # sequences
    cd /cluster/data/encode/convertHg18
    hgsql hg18 -N -e \
      "SELECT name, chrom, chromStart, chromEnd FROM encodeRegions ORDER BY name">regions.txt 

    ssh kolossus
    cd /cluster/data/encode/convertHg18
    mkdir regions
    cd regions
    /cluster/data/encode/bin/scripts/encodeSequences.pl -upper \
        ../regions.txt /iscratch/i/hg18/nib  > hg18.fa
    /cluster/data/encode/bin/scripts/encodeSequences.pl -masked \
        ../regions.txt /iscratch/i/hg18/nib  > hg18.msk.fa
    faSize -detailed hg18.fa > hg18_count.txt
    gzip *.fa
    md5sum *.fa.gz > md5sum.txt
    # copy regions/README.txt from hg17 and edit

    ssh hgwdev
    cd /usr/local/apache/htdocs/goldenPath/hg18/encode
    ln -s /cluster/data/encode/convertHg18/regions .
    cp ../../hg17/encode/regions/README.txt regions
    # edit README

##############################################################################
# Lifting rampage (Andy)

ssh hgwdev
bash
cd /cluster/data/encode/convertHg18
/cluster/data/encode/bin/scripts/listEncodeTables.csh hg17 > hg17.tables
wc -l hg17.tables
#554 hg17.tables (dang)

# start with easy beds i.e. the ones like "bed <num> ."

grep "bed.*\." hg17.tables > easyBeds.tables
grep -v "bed.*\." hg17.tables > remaining.tables
wc -l easyBeds.tables
#127 easyBeds.tables
mkdir easyBeds
for fields in 3 4 5 6 9 12; do 
    for table in `grep "bed $fields" easyBeds.tables | cut -f1`; do 
       hgsql hg18 -e "drop table $table"
       /cluster/data/encode/bin/scripts/convertBedTable.csh hg17 hg18 \
         $table $fields >> easyBeds.script.log 
       mv $table.* easyBeds/
    done
done
# still got 4 like "bed ." 
egrep -v "bed (3|4|5|6|9|12)" easyBeds.tables
#encodeYaleChIPSTAT1HeLaBingRenSites     encodeChip      bed .
#encodeYaleChIPSTAT1HeLaMaskLess36mer36bpSite    encodeChip      bed .
#encodeYaleChIPSTAT1HeLaMaskLess50mer38bpSite    encodeChip      bed .
#encodeYaleChIPSTAT1HeLaMaskLess50mer50bpSite    encodeChip      bed .
# these are all bed 3
for table in `egrep -v "bed (3|4|5|6|9|12)" easyBeds.tables | cut -f1`; do
       /cluster/data/encode/bin/scripts/convertBedTable.csh hg17 hg18 \
         $table 3 >> easyBeds.script.log
       mv $table.* easyBeds/
done

# ok now there's ones like "bed <num> +"
wc -l remaining.tables
#427 remaining.tables
grep '\+' remaining.tables > plusBed.tables
grep -v '\+' remaining.tables > tmp; mv tmp remaining.tables 
wc -l remaining.tables plusBed.tables 
#  383 remaining.tables
#   44 plusBed.tables
#  427 total
mkdir plusBeds
for fields in 4 5 6 9 12; do
    for table in `grep "bed $fields" plusBed.tables | cut -f1`; do
       /cluster/data/encode/bin/scripts/convertBedTable.csh hg17 hg18 \
         $table $fields >> plusBeds.script.log
       mv $table.* plusBeds/
    done
done

# how about bedGraph ones?
grep bedGraph remaining.tables > bedGraph.tables
grep -v bedGraph remaining.tables > tmp; mv tmp remaining.tables 
wc -l bedGraph.tables remaining.tables 
#  186 bedGraph.tables
#  197 remaining.tables
#  383 total
mkdir bedGraph
for table in `cut -f1 bedGraph.tables`; do
       /cluster/data/encode/bin/scripts/convertBedTable.csh hg17 hg18 \
         $table 4 >> bedGraph.script.log
       mv $table.* bedGraph/
done

######################################
# Continue lifting rampage (ting, 06-07-2007)
#

# Examining remaining.tables
# All GIS tables are bed 12, so lift these
		grep Gis remaining.tables > Gis.tables
		grep -v Gis remaining.tables > tmp; mv tmp remaining.tables
		wc -l Gis.tables remaining.tables
		#    7 Gis.tables
		#  190 remaining.tables
		#  197 total
		mkdir bedGis
		doGis.csh
		# 7 tables lifted.

# 190 remaining.

####################################################
# More lifting (Andy)

ssh hgwdev
bash
cd /cluster/data/encode/convertHg18

# genePred tables
grep genePred remaining.tables > genePred.tables
grep -v genePred remaining.tables > tmp; mv tmp remaining.tables
wc -l genePred.tables remaining.tables 
#  68 genePred.tables
# 122 remaining.tables
# 190 total
mkdir genePred
for table in `cut -f1 genePred.tables`; do
    /cluster/data/encode/bin/scripts/convertGenePredTable.csh hg17 hg18 $table >> genePred.scripts.log;
    mv $table.* genePred/
done
# ERRORS, uh oh
# fixed /cluster/data/encode/bin/scripts/convertGenePredTable.csh
# binned hg17 tables weren't working right.
grep error genePred.scripts.log | sed 's/^.*converting\ \(.*\)\.txt.*$/\1/' > genePredBins.tables
for table in `cat genePredBins.tables`; do
    /cluster/data/encode/bin/scripts/convertGenePredTable.csh hg17 hg18 $table >> genePredBins.scripts.log;
    mv $table.* genePred/
done

# missed bed tables.  There's a few like "bed5FloatScore" and "bed 3", etc.
# these can be treated as normal beds
grep bed remaining.tables | cut -f1,3 | sed 's/bed5FloatScore/bed 5/' > \
  bedOther.tables
grep -v bed remaining.tables > tmp
wc -l bedOther.tables tmp
#  14 bedOther.tables
# 108 tmp
# 122 total
mkdir bedOther
for fields in 3 4 5; do 
    for table in `grep "bed $fields" bedOther.tables | cut -f1`; do 
       /cluster/data/encode/bin/scripts/convertBedTable.csh hg17 hg18 \
         $table $fields >> bedOther.script.log 
       mv $table.* bedOther/
    done
done

# wiggle
# first tally up which ones are in which DBs.  The older ones can go hg16->hg18 instead
# of hg17->hg18.  Make three sets of tables and do set operations:
hgsql hg16 -e 'show tables' > hg16.all.tables
hgsql hg17 -e 'show tables' > hg17.all.tables
grep -v wigMaf remaining.tables | cut -f1 > wig.tables
grep wigMaf remaining.tables > tmp; mv tmp remaining.tables
wc -l wig.tables remaining.tables 
# 119 wig.tables
#   3 remaining.tables
# 122 total
#  OOPS I forgot to subtract the 14 tables from last one.

# How many of the wiggle tables are in hg17?  I hope all 119
grep -Fw -f wig.tables hg17.all.tables | wc -l
# 105
# good.  Ok how about hg16?
grep -Fw -f wig.tables hg16.all.tables | wc -l
# 61
# I guess then hg17 should have 44 newer ones.
grep -Fw -f wig.tables hg16.all.tables > hg16.wig.tables
grep -Fwv -f hg16.wig.tables wig.tables > hg17.wig.tables
wc -l *wig.tables
#  61 hg16.wig.tables
#  44 hg17.wig.tables
# 105 wig.tables
# Awesome.  These two sets shouldn't intersect at all:
grep -Fw -f hg16.wig.tables hg17.wig.tables | wc -l
# 0
# Great.  Now lets move on.  Let's use hgWiggle on each of these tables to 
# fetch the old data.  Then we'll convert that to bed 4, lift that, then 
# run wigEncode on the lifted data.

mkdir ../hg18.wib
mkdir -p /gbdb/hg18/encode/wib

mkdir fromHg16.wig
for table in `cat hg16.wig.tables`; do 
    hgWiggle -db=hg16 $table \
       | grep -v "^#" | awk -f varStepToBed.awk > $table.old.wig
    liftOver -bedPlus=3 -tab $table.old.wig /gbdb/hg16/liftOver/hg16ToHg18.over.chain.gz \
      $table.new.wig $table.unmapped
    sort -k1,1 -k2,2n $table.new.wig > tmp.wig; mv tmp.wig $table.new.wig
    wigEncode $table.new.wig $table.wig $table.wib 2>> wigFromHg16.log
    mv $table.wib ../hg18.wib/
    ln -s /cluster/data/encode/hg18.wib/${table}.wib /gbdb/hg18/encode/wib/${table}.wib
    hgLoadWiggle -pathPrefix=/gbdb/hg18/encode/wib hg18 $table $table.wig
    mv $table.*wig $table.unmapped fromHg16.wig/
done

mkdir fromHg17.wig
for table in `cat hg17.wig.tables`; do
    hgWiggle -db=hg17 $table \
       | grep -v "^#" | awk -f varStepToBed.awk > $table.old.wig
    liftOver -bedPlus=3 -tab $table.old.wig /gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz \
      $table.new.wig $table.unmapped
    sort -k1,1 -k2,2n $table.new.wig > tmp.wig; mv tmp.wig $table.new.wig
    wigEncode $table.new.wig $table.wig $table.wib 2>> wigFromHg17.log
    mv $table.wib ../hg18.wib/
    ln -s /cluster/data/encode/hg18.wib/${table}.wib /gbdb/hg18/encode/wib/${table}.wib
    hgLoadWiggle -pathPrefix=/gbdb/hg18/encode/wib hg18 $table $table.wig
    mv $table.*wig $table.unmapped fromHg17.wig/
done

##########################################################
# Wig lifting 
# we need to find all the old wiggle data and lift that.  
# Start with hg16

ssh hgwdev
bash 
cd /cluster/data/encode/convertHg18

# find those hg16 tables

cat > affyChipChip.hg16.wig.tables << "EOF"
encodeAffyChIpHl60PvalBrg1Hr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Brg1_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalBrg1Hr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Brg1_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalBrg1Hr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Brg1_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalBrg1Hr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Brg1_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalCebpeHr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CEBPe_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalCebpeHr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CEBPe_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalCebpeHr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CEBPe_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalCebpeHr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CEBPe_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalCtcfHr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CTCF_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalCtcfHr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CTCF_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalCtcfHr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CTCF_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalCtcfHr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_CTCF_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalH3K27me3Hr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_H3K27T_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalH3K27me3Hr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_H3K27T_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalH3K27me3Hr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_H3K27T_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalH3K27me3Hr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_H3K27T_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalH4Kac4Hr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_HisH4_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalH4Kac4Hr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_HisH4_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalH4Kac4Hr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_HisH4_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalH4Kac4Hr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_HisH4_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalP300Hr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_P300_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalP300Hr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_P300_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalP300Hr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_P300_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalP300Hr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_P300_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalPu1Hr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_PU1_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalPu1Hr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_PU1_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalPu1Hr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_PU1_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalPu1Hr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_PU1_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalRaraHr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_RARecA_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalRaraHr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_RARecA_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalRaraHr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_RARecA_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalRaraHr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_RARecA_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalRnapHr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Pol2_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalRnapHr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Pol2_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalRnapHr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Pol2_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalRnapHr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_Pol2_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalSirt1Hr00	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_SIRT1_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalSirt1Hr02	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_SIRT1_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalSirt1Hr08	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_SIRT1_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalSirt1Hr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_SIRT1_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
encodeAffyChIpHl60PvalTfiibHr32	Affy/2005-06-01/chipchip/wig/EC_AS_HL60_DN_RA_TFIIB-R_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B2_B3.pval.median.wig.bz2
EOF

mkdir -p wigs/hg16
cd wigs/hg16
cat ../../affyChipChip.hg16.wig.tables | while read -a line; do 
   chain=/gbdb/hg16/liftOver/hg16ToHg18.over.chain.gz
   table=${line[0]};
   oldWig=$table.hg16.wig
   newWig=$table.hg18.wig
   bad=$table.hg18.unmapped
   wib=$table.wib
   wigTable=$table.tab
   file=/cluster/data/encode/${line[1]}; 

   echo $table
   bzcat $file | tail +2 | awk -f ../../varStepToBed.awk | \
     awk 'BEGIN{OFS="\t"}{print $1, $2+1, $3, $4;}' > $oldWig;
   liftOver -bedPlus=3 $oldWig $chain $newWig $bad
   bedSort $newWig tmp
   mv tmp $newWig
   wigEncode $newWig $wigTable $wib
done

# One more for BU Orchid

awk -f ../../varStepToBed.awk ../../../BU/orchid/2005-06-09/t0 > encodeBu_ORChID1.hg16.wig
liftOver -bedPlus=3 encodeBu_ORChID1.hg16.wig /gbdb/hg16/liftOver/hg16ToHg18.over.chain.gz encodeBu_ORChID1.hg18.wig encodeBu_ORChID1.hg18.unmapped
bedSort encodeBu_ORChID1.hg18.wig tmp; mv tmp encodeBu_ORChID1.hg18.wig
wigEncode encodeBu_ORChID1.hg18.wig encodeBu_ORChID1.tab encodeBu_ORChID1.wib
# NOTE: this track was replaced with newer data -- the lift was
# never used.

# Encode hapmap coverage

for graph in ../../../sanger/coverage/encode*.bedGraph; do
     table=${graph%.bedGraph}
     table=${table#*coverage\/}
     liftOver -bedPlus=3 $graph /gbdb/hg16/liftOver/hg16ToHg18.over.chain.gz \
       $table.hg18.wig $table.hg18.unmapped
     bedSort $table.hg18.wig tmp; mv tmp $table.hg18.wig
     wigEncode $table.hg18.wig $table.tab $table.wib
done

# hg17 tables
cd ../
mkdir hg17
cd hg17
cat | while read -a line; do
    table=${line[0]};
    file=/cluster/data/encode/${line[1]};
    chain=/gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz;
    awk -f ../../varStepToBed.awk $file > $table.hg17.wig;
    bedSort $table.hg17.wig tmp; mv tmp $table.hg17.wig
    liftOver -bedPlus=3 $table.hg17.wig $chain $table.hg18.wig $table.hg18.unmapped;
    bedSort $table.hg18.wig tmp; mv tmp $table.hg18.wig;
    wigEncode $table.hg18.wig $table.tab $table.wib
done << "EOF"
encodeAffyChIpHl60PvalStrictH3K9K14DHr00	Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/00/EC_AS_HL60_DN_RA_H3K9K14D_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictH3K9K14DHr02	Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/02/EC_AS_HL60_DN_RA_H3K9K14D_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictH3K9K14DHr08	Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/08/EC_AS_HL60_DN_RA_H3K9K14D_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictH3K9K14DHr32	Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/32/EC_AS_HL60_DN_RA_H3K9K14D_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictHisH4Hr00	Affy/2005-10-03/lab/CHIP/wig/HisH4/00/EC_AS_HL60_DN_RA_HisH4_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictHisH4Hr02	Affy/2005-10-03/lab/CHIP/wig/HisH4/02/EC_AS_HL60_DN_RA_HisH4_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictHisH4Hr08	Affy/2005-10-03/lab/CHIP/wig/HisH4/08/EC_AS_HL60_DN_RA_HisH4_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictHisH4Hr32	Affy/2005-10-03/lab/CHIP/wig/HisH4/32/EC_AS_HL60_DN_RA_HisH4_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictp63_ActD	Affy/2005-10-03/lab/CHIP/wig/p63_ActD/EC_AS_ME180_ActD_p63_C01_EC_AS_ME180_CombInput_PlMinActD_B1_B2_B3.pval.median.wig
encodeAffyChIpHl60PvalStrictp63_mActD	Affy/2005-10-03/lab/CHIP/wig/p63_mActD/EC_AS_ME180_Ctrl_p63_C01_EC_AS_ME180_CombInput_PlMinActD_B1_B2_B3.pval.median.wig
encodeAffyChIpHl60PvalStrictPol2Hr00	Affy/2005-10-03/lab/CHIP/wig/Pol2/00/EC_AS_HL60_DN_RA_Pol2_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictPol2Hr02	Affy/2005-10-03/lab/CHIP/wig/Pol2/02/EC_AS_HL60_DN_RA_Pol2_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictPol2Hr08	Affy/2005-10-03/lab/CHIP/wig/Pol2/08/EC_AS_HL60_DN_RA_Pol2_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60PvalStrictPol2Hr32	Affy/2005-10-03/lab/CHIP/wig/Pol2/32/EC_AS_HL60_DN_RA_Pol2_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.pval.median.wig
encodeAffyChIpHl60SignalStrictH3K9K14DHr00	Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/00/EC_AS_HL60_DN_RA_H3K9K14D_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictH3K9K14DHr02	Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/02/EC_AS_HL60_DN_RA_H3K9K14D_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictH3K9K14DHr08	Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/08/EC_AS_HL60_DN_RA_H3K9K14D_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictH3K9K14DHr32	Affy/2005-10-03/lab/CHIP/wig/H3K9K14D/32/EC_AS_HL60_DN_RA_H3K9K14D_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictHisH4Hr00	Affy/2005-10-03/lab/CHIP/wig/HisH4/00/EC_AS_HL60_DN_RA_HisH4_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictHisH4Hr02	Affy/2005-10-03/lab/CHIP/wig/HisH4/02/EC_AS_HL60_DN_RA_HisH4_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictHisH4Hr08	Affy/2005-10-03/lab/CHIP/wig/HisH4/08/EC_AS_HL60_DN_RA_HisH4_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictHisH4Hr32	Affy/2005-10-03/lab/CHIP/wig/HisH4/32/EC_AS_HL60_DN_RA_HisH4_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictp63_ActD	Affy/2005-10-03/lab/CHIP/wig/p63_ActD/EC_AS_ME180_ActD_p63_C01_EC_AS_ME180_CombInput_PlMinActD_B1_B2_B3.sig.median.wig
encodeAffyChIpHl60SignalStrictp63_mActD	Affy/2005-10-03/lab/CHIP/wig/p63_mActD/EC_AS_ME180_Ctrl_p63_C01_EC_AS_ME180_CombInput_PlMinActD_B1_B2_B3.sig.median.wig
encodeAffyChIpHl60SignalStrictPol2Hr00	Affy/2005-10-03/lab/CHIP/wig/Pol2/00/EC_AS_HL60_DN_RA_Pol2_00hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictPol2Hr02	Affy/2005-10-03/lab/CHIP/wig/Pol2/02/EC_AS_HL60_DN_RA_Pol2_02hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictPol2Hr08	Affy/2005-10-03/lab/CHIP/wig/Pol2/08/EC_AS_HL60_DN_RA_Pol2_08hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyChIpHl60SignalStrictPol2Hr32	Affy/2005-10-03/lab/CHIP/wig/Pol2/32/EC_AS_HL60_DN_RA_Pol2_32hr_C01_EC_AS_HL60_DN_RA_Input_All_B1_B4_B5.sig.median.wig
encodeAffyRnaGm06990Signal	Affy/2005-10-03/lab/RNA/wig/GM06990/EC_AS_GM06990_RCyP+_C01vsNULL.sig.wig
encodeAffyRnaHeLaSignal	Affy/2005-11-22/lab/Affy_HeLa/wig/EC_AS_HeLa_RCyP+_C01vsNULL.sig.wig
encodeAffyRnaHl60SignalHr00	Affy/2005-10-03/lab/RNA/wig/HL60/00/EC_AS_HL60_RWP+_RA_00hr_C01vsNULL.sig.wig        
encodeAffyRnaHl60SignalHr02	Affy/2005-10-03/lab/RNA/wig/HL60/02/EC_AS_HL60_RWP+_RA_02hr_C01vsNULL.sig.wig
encodeAffyRnaHl60SignalHr08	Affy/2005-10-03/lab/RNA/wig/HL60/08/EC_AS_HL60_RWP+_RA_08hr_C01vsNULL.sig.wig
encodeAffyRnaHl60SignalHr32	Affy/2005-10-03/lab/RNA/wig/HL60/32/EC_AS_HL60_RWP+_RA_32hr_C01vsNULL.sig.wig
encodeUvaDnaRepTr50	UVa/2005-10-15/lab/smoothedtr50.hg17.wig
EOF

# Uppsala hg17 is already in bed format

cat | while read -a line; do
    table=${line[0]};
    file=/cluster/data/encode/${line[1]};
    chain=/gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz;
    liftOver -bedPlus=3 $file $chain $table.hg18.wig $table.hg18.unmapped;
    bedSort $table.hg18.wig tmp; mv tmp $table.hg18.wig;
    wigEncode $table.hg18.wig $table.tab $table.wib
done << "EOF"
encodeUppsalaChipH3acBut0h      Uppsala/2006-05-29/lab/encodeUppsalaChipH3acBut0h.wig.txt
encodeUppsalaChipH3acBut12h     Uppsala/2006-05-29/lab/encodeUppsalaChipH3acBut12h.wig.txt
encodeUppsalaChipH4acBut0h      Uppsala/2006-05-29/lab/encodeUppsalaChipH4acBut0h.wig.txt
encodeUppsalaChipH4acBut12h     Uppsala/2006-05-29/lab/encodeUppsalaChipH4acBut12h.wig.txt
EOF

cat | while read -a line; do
    table=${line[0]};
    file=/cluster/data/encode/${line[1]};
    chain=/gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz;
    liftOver -bedPlus=3 $file $chain $table.hg18.wig $table.hg18.unmapped;
    bedSort $table.hg18.wig tmp; mv tmp $table.hg18.wig;
    wigEncode $table.hg18.wig $table.tab $table.wib
done << "EOF"
encodeYaleAffyNeutRNATransMap   yale/rna/2005-10-14/encodeYaleAffyNeutRNATransMap.trim
encodeYaleAffyNB4RARNATransMap  yale/rna/2005-10-14/lab/encode_Yale_Affy_NB4_RA_RNA_Transcript_Map_ncbi35.wig
encodeYaleAffyNB4TPARNATransMap yale/rna/2005-10-14/lab/encode_Yale_Affy_NB4_TPA_RNA_Transcript_Map_ncbi35.wig
encodeYaleAffyNB4UntrRNATransMap        yale/rna/2005-10-14/lab/encode_Yale_Affy_NB4_CTRL_RNA_Transcript_Map_ncbi35.wig
encodeYaleAffyPlacRNATransMap   yale/rna/2005-10-14/lab/encode_Yale_Affy_Placenta_RNA_Transcript_Map_ncbi35.wig
EOF

# ERRORS ... the first one worked, the others need trimming.

cat | while read -a line; do
    table=${line[0]};
    file=/cluster/data/encode/${line[1]};
    chain=/gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz;
    liftOver -bedPlus=3 $file $chain $table.hg18.wig $table.hg18.unmapped;
    bedSort $table.hg18.wig stdout | /cluster/data/encode/bin/scripts/trimOverlap.pl > tmp;
    mv tmp $table.hg18.wig;
    wigEncode $table.hg18.wig $table.tab $table.wib;
done << "EOF"
encodeYaleAffyNB4RARNATransMap  yale/rna/2005-10-14/lab/encode_Yale_Affy_NB4_RA_RNA_Transcript_Map_ncbi35.wig
encodeYaleAffyNB4TPARNATransMap yale/rna/2005-10-14/lab/encode_Yale_Affy_NB4_TPA_RNA_Transcript_Map_ncbi35.wig
encodeYaleAffyNB4UntrRNATransMap        yale/rna/2005-10-14/lab/encode_Yale_Affy_NB4_CTRL_RNA_Transcript_Map_ncbi35.wig
encodeYaleAffyPlacRNATransMap   yale/rna/2005-10-14/lab/encode_Yale_Affy_Placenta_RNA_Transcript_Map_ncbi35.wig
EOF

# Forgot an hg16 one

table=encodeUcsdNgChipSignal
file=/cluster/data/encode/UCSD/nimblegen/2005-05-31/encodeUcsdNgChipSignal.varStep
chain=/gbdb/hg16/liftOver/hg16ToHg18.over.chain.gz
awk -f ../../varStepToBed.awk $file > $table.hg16.wig
liftOver -bedPlus=3 $table.hg16.wig $chain $table.hg18.wig $table.hg18.unmapped
bedSort $table.hg18.wig stdout | /cluster/data/encode/bin/scripts/trimOverlap.pl > tmp
mv tmp $table.hg18.wig
wigEncode $table.hg18.wig $table.tab $table.wib

##########################################################################
# Boston University ORChID track - (2007-06-29 ting)
#	data developer contact:  Steve Parker parker@bu.edu
# This is a new dataset to replace the old one, for the same track.
# On hg17 the track name is encodeBu_ORChID1, was commented as "non-standard table name"
# I took this chance to rename it as encodeBUORChID on hg18.

    ssh hgwdev
    cd /cluster/data/encode/BU
    mkdir -p orchid/2007-06-29/lab
    cd -p orchid/2007-06-29/lab
    wget --timestamping "http://dna.bu.edu/parker/.data/orchid_hg18_encode.wig.gz"
    cd ..
    mkdir wib
# The file orchid_hg18_encode.wig.gz from data provider contains 0-based coordinates,
# thus wigEncode choked on it -- specifically, at chr16, position 0 (ENm008). 
# I compared this new data to the old dataset (2005-09-08) and made sure that this
# is the case. I saved the original file to 'original.wig.gz', and added 1 to all
# positions in orchid_hg18_encode.wig.gz
    wigEncode lab/orchid_hg18_encode.wig.gz encodeBUORChID.wig \
    	wib/encodeBUORChID.wib 
#   Converted lab/orchid_hg18_encode.wig.gz, upper limit 1.64, lower limit -0.98

# load
    set dir = /gbdb/hg18/encode/BU/2007-06-29
    mkdir -p $dir
    hgLoadWiggle -pathPrefix=$dir hg18 encodeBUORChID encodeBUORChID.wig
    mkdir -p $dir/wib
    ln -s `pwd`/wib/encodeBUORChID.wib $dir/wib

# create encodeBUORChID.html at trackDb/human/hg18/
    

#############################################################################
# Stanford NRSF ChIP-seq (DONE, Heather, July 2007)

ssh hgwdev
cd /cluster/data/encode/stanford/2007-03-14
liftOver fix.bed /gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz hg18.bed core.unmapped
liftOver control_fix.bed /gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz hg18.control.bed control.unmapped
hgLoadBed hg18 encodeStanfordNRSFEnriched hg18.bed -tab
hgLoadBed hg18 encodeStanfordNRSFControl hg18.control.bed -tab

############################################################################
# Yale ENCODE Lifting

ssh hgwdev
cd /cluster/data/encode/convertHg18
for table in `cat yale.lst`; do 
   echo select tableName,type from trackDb where tableName=\"$table\" \
     | hgsql hg17 | tail +2 >> yale.tables 
done
sed -e 's/bed5FloatScoreWithFdr/bed 5/' \
    -e 's/bedGraph\ 4/bed 4/' -e 's/bed5FloatScore/bed 5/' \
    < yale.tables > tmp.tables
mv tmp.tables yale.tables
mkdir yale
for fields in 4 5; do
    for table in `grep "bed $fields" yale.tables | cut -f1`; do
       /cluster/data/encode/bin/scripts/convertBedTable.csh hg17 hg18 \
         $table $fields >> yale.script.log
       mv $table.* yale/
    done
done

###########################################################################
# Pseudogenes Class table copied from hg17  (20087-08-01 kate)
# This table is copied unchanged.

    ssh hgwdev
    cd /cluster/data/encode/convertHg18
    mkdir pseudogene
    cd pseudogene
    hgsqldump --all --tab=. hg17 encodePseudogeneClass
    hgsql hg18 < encodePseudogeneClass.sql
    echo "LOAD DATA LOCAL INFILE 'encodePseudogeneClass.txt' \
                into table encodePseudogeneClass" | hgsql hg18

###########################################################################
# Affy EC chrom21/chrom22 (Andy DONE 2007-07-20)

ssh hgwdev
bash
cd /cluster/data/encode/Affy
mkdir -p 2007-07-12/lab
cd 2007-07-12/
mkdir -p processed/{bed,wigTable,wib,download}
cd lab/
cp /var/ftp/encode/encode_ext_RNA_hg18_chr21-22.tar.gz .
tar xfz encode/encode_ext_RNA_hg18_chr21-22.tar.gz
rm encode/encode_ext_RNA_hg18_chr21-22.tar.gz
cd ../
find lab -name '*.bed' > renamesBed.txt
find lab -name '*.wig' > renamesWig.txt
# Make 2nd column for table name
cat renamesBed.txt | while read -a line; do
    tail +2 ${line[0]} > processed/bed/${line[1]}.bed
    hgLoadBed hg18 ${line[1]} processed/bed/${line[1]}.bed
done
rm bed.tab
cat renamesWig.txt | while read -a line; do
    table=${line[1]}
    origFile=${line[0]}
    tail +2 $origFile > processed/download/${table}.wig
    wigEncode processed/download/${table}.wig processed/wigTable/${table}.tab \
        processed/wib/${table}.wib 2>> processed/wigEncode.log
    pushd /gbdb/hg18/encode/wib
    ln -s /cluster/data/encode/Affy/2007-07-12/processed/wib/${table}.wib
    popd
    hgLoadWiggle -pathPrefix=/gbdb/hg18/encode/wib hg18 $table processed/wigTable/${table}.tab
    gzip processed/download/${table}.wig
done
cd /usr/local/apache/htdocs/goldenPath/hg18/encode
ln -s /cluster/data/encode/Affy/2007-07-12/processed/download/*.gz .

###########################################################################
# Yale Pol II Chip (Chip-seq) (DONE Andy 11-07-2007)

cd /cluster/data/encode/yale
mkdir -p 2007-07-17/lab
cd 2007-07-17/lab
unzip Yale_jul17_v2.zip

set table = wgEncodeYaleChipSeqPol2HelaSites
hgLoadBed hg18 $table lab/PolII/PolII_hg18-sites.bed
#Reading PolII_hg18-sites.bed
#Loaded 87253 elements of size 4
#Sorted
#Creating table definition for encodeYalePolIISites
#Saving bed.tab
#start -142, end 1144 out of range in findBin (max is 512M)
# CONTACTED submitter to ask about negative coordinate.
# For now, leave out chrM
sed '/^chrM/d' lab/PolII/PolII_hg18-sites.bed | hgLoadBed hg18 $table stdin

# NOTE: max score=1779, min score=7
# data distribution
 awk '{print $4}' pol2.bed | sort -n | textHistogram stdin -binSize=100 ;
   0 ************************************************************ 83113
   100 ** 3300
   200  545
   300  144
   400  58
   500  37
   600  20
   700  6
   800  6
   900  0
   1000  2
   1100  2
   1200  0
   1300  0
   1400  0
   1500  0
   1600  0
   1700  1


bedSort PolII_hg18-signal.wig tmp.wig
v tmp.wig PolII_hg18-signal.wig 
../../../bin/scripts/trimOverlap.pl < PolII_hg18-signal.wig > tmp.wig
mv tmp.wig PolII_hg18-signal.wig 
mv encodeYalePolIISignal.wib /cluster/data/encode/hg18.wib/
ln -s /cluster/data/encode/hg18.wib/encodeYalePolIISignal.wib /gbdb/hg18/encode/wib/
hgLoadWiggle -pathPrefix=/gbdb/hg18/encode/wib hg18 encodeYalePolIISignal encodeYalePolIISignal.wig 
#Connected to database hg18 for track encodeYalePolIISignal
#Creating wiggle table definition in hg18.encodeYalePolIISignal
#Saving wiggle.tab
#WARNING: Exceeded chr18_random size 4406 > 4262. dropping 145 data point(s)
# hmmm... that's not a good warning.  I wonder if these guys got the genome wrong.
hgLoadBed hg18 encodeYalePolIISites PolII_hg18-sites.bed 
#Reading PolII_hg18-sites.bed
#Loaded 87253 elements of size 4
#Sorted
#Creating table definition for encodeYalePolIISites
#Saving bed.tab
#start -142, end 1144 out of range in findBin (max is 512M)

# MORE ERRORS.  Clearly this submission wasn't quite meant to be just yet.
# to be continued...

# continued... made a "resub" dir and copied the resubmitted zipfile there.
cd /cluste/data/2007-07-17/resub   
unzip Yale_jul17_v2.zip
cd PolII/
trimObBedLines PolII_hg18-signal.wig > ../../processed/wgEncodeYalePolIISignal.wigBed
pushd ../../processed/
wigEncode wgEncodeYalePolIISignal.wigBed wgEncodeYalePolIISignal.wig wgEncodeYalePolIISignal.wib
gzip wgEncodeYalePolIISignal.wigBed
cd ../../../hg18.wib
ln -s ../yale/2007-07-17/processed/wgEncodeYalePolIISignal.wib
cd /gbdb/hg18/encode/wib
ln -s /cluster/data/encode/hg18.wib/wgEncodeYalePolIISignal.wib 
cd /usr/local/apache/htdocs/goldenPath/hg18/encode/wig
ln -s /cluster/data/encode/yale/2007-07-17/processed/wgEncodeYalePolIISignal.wigBed.gz 
popd
hgLoadWiggle -pathPrefix=/gbdb/hg18/encode/wib hg18 wgEncodeYalePolIISignal wgEncodeYalePolIISignal.wig

#########################################################################
# YALE STAT1 (more ChIP-seq) (DONE, Andy 2007-11-20)

cd /cluster/data/encode/yale
mkdir 2007-08-08
cd 2007-08-08/
cp /var/ftp/encode/Yale_aug8.zip .
unzip Yale_aug8.zip
cd STAT1/
trimObBedLines hg18 STAT1_hg18-signal.wig wgEncodeYaleStat1Signal.wigBed
trimObBedLines hg18 STAT1_hg18-sites.bed wgEncodeYaleStat1Sites.bed
gzip wgEncodeYaleStat1Signal.wigBed
wigEncode wgEncodeYaleStat1Signal.wigBed.gz wgEncodeYaleStat1Signal.{wig,wib}
cd ../
mkdir lab processed
mv readme_aug8.txt STAT1 lab/
rm Yale_aug8.zip 
mv lab/STAT1/wgEncodeYaleStat1Si* processed/
pushd ../../hg18.wib/
ln -s ../yale/2007-08-08/processed/wgEncodeYaleStat1Signal.wib
cd /gbdb/hg18/encode/wib
ln -s /cluster/data/encode/hg18.wib/wgEncodeYaleStat1Signal.wib 
popd
cd processed/
hgLoadWiggle -pathPrefix=/gbdb/hg18/encode/wib hg18 wgEncodeYaleStat1Signal wgEncodeYaleStat1Signal.wig
hgLoadBed hg18 wgEncodeYaleStat1Sites wgEncodeYaleStat1Sites.bed 


##########################################################################
# Genome Institute of Singapore PET data (2007-08-30 ting)
# Submitted 8/22 by Atif Shahab and Chia-lin Wei
# Three new PET datasets on human embryonic stem cell hES3.
# One polyA-RNA dataset, and two ChIP-PET datasets of H3K4me3 and H3K27me3.
# Build them as subtracks into existing GIS tracks: GIS-RNA-PET and GIS-CHIP-PET.

    ssh hgwdev
    cd /cluster/data/encode/GIS/
    mkdir 2007-08-22
    cd 2007-08-22
    mkdir lab
    cd lab
    cp /var/ftp/encode/gis.tar.gz ./
    gunzip gis.tar.gz
    tar -xvf gis.tar
    
    # obtained 3 data files: H3K27me3.bed  H3K4me3.bed  polyA.bed
    # These are mapped on hg17, first lift.
    cd /cluster/data/encode/GIS/2007-08-22
    liftOver lab/polyA.bed ../../convertHg18/hg17ToHg18.over.chain.gz \
    	polyA-hg18.bed polyA-unmapped.bed
    # 426301 lifted, 34 unmapped
    
    liftOver lab/H3K4me3.bed ../../convertHg18/hg17ToHg18.over.chain.gz \
    	H3K4me3.bed H3K4me3-unmapped.bed
    # 679752 lifted, 13 unmapped
    
    liftOver lab/H3K27me3.bed ../../convertHg18/hg17ToHg18.over.chain.gz \
    	H3K27me3.bed H3K27me3-unmapped.bed
    # 992509 lifted, 25 unmapped
    
    # GIS data are not scored. Based on Angie and Kate's previous work,
    # scored BED can be made from item name. Use scoreGisBed.pl to do so.
    scoreGisBed.pl polyA-hg18.bed 2 encodeGisRnaPetHes3.bed
    scoreGisBed.pl H3K4me3-hg18.bed 1 encodeGisChipPetHes3H3K4me3.bed
    scoreGisBed.pl H3K27me3-hg18.bed 1 encodeGisChipPetHes3H3K27me3.bed
    
    # load on hg18
    hgLoadBed hg18 encodeGisRnaPetHes3 encodeGisRnaPetHes3.bed
    # Loaded 426301 elements of size 12
    
    hgLoadBed hg18 encodeGisChipPetHes3H3K4me3 encodeGisChipPetHes3H3K4me3.bed
    # Loaded 679752 elements of size 12
    
    hgLoadBed hg18 encodeGisChipPetHes3H3K27me3 encodeGisChipPetHes3H3K27me3.bed 
    # Loaded 992509 elements of size 12
    
    # modified trackDb.encodeTxLevel.ra, trackDb.encodeChip.ra,
    #          encodeGisChipPetAll.html, encodeGisRnaPet.html

###########
# Promote UCSD genome-wide Chip tracks:
# UCSD TAF1 IMR90 Chip/chip to Regulation group
# (2007-09-14 kate)
# See hg18.txt

######################################################
# Add strand information for encodeGencodeRace data - ting 09-27-2007
# ENCODE 5RACE data do not contain strand information. This
# information is very important, and can be derived from 
# available GENCODE and 5RACE data.
# There are two relatively simple strategies to derive strand
# information. However, there are several exceptions to either
# strategy. Therefore I will combine these two strategies in
# this one script. 
# Strategy 1: a RACE primer should extend from 3' end of a transcript
#             towards 5' end. Therefore, if any RACE frag from
#             this primer extends towards the right of the primer
#             location, it means the gene goes from right to left,
#             i.e. on - strand. Therefore, the primer should be
#             on the + strand, and the corresponding RACEfrag should
#             be on the - strand (same as gene). By the same token,
#             if a RACEfrag extends toward left, it indicates that
#             the primer is on - strand, while the gene and RACEfrag
#             are on + strand.
#             The only case that such relationship can not be determined
#             is when the RACEfrag contains only one exon, and the
#             primer locates in that exon. It is not sure if the 
#             RACEfrag extends to the right or left. 
#             This strategy leaves 3 primers undetermined.
# 
# Strategy 2: RACE primers should be designed based on GENCODE
#             exons. Therefore, the orientation of the primer can be 
#             determined by its overlapping GENCODE exon. In this case,
#             the primer is on the opposite strand of the GENCODE exon,
#             and any RACEfrag from this primer should be on the opposite
#             strand of the primer. 
#             There exist several exceptions, where the primer is 
#             located outside of exons. It is probably ok if instead 
#             look at the nearest exon if it doesn't overlap with any.
#             This strategy leaves 37 primers undetermined.
# Combining 1 and 2 all primers are determined for their orientation.
# 
# Instead of working on the original gff files, I decide to work on
# data files after hg18 migration. These files are genePred formatted.
# Working folder is 
# /cluster/store6/encode/GencodeRACEfrags/2007-04-11/strand

  ssh hdwdev
  cd /cluster/data/encode/GencodeRACEfrags/latest/
  mkdir strand
  cd strand
  cp /cluster/data/encode/convertHg18/genePred/*Race*.tab ./
  cp /cluster/data/encode/convertHg18/genePred/encodeGencodeGeneKnownMar07.tab ./
  
  ./addRacePrimerStrand.pl encodeGencodeRaceFragsPrimer.tab encodeGencodeGeneKnownMar07.tab
  csh load.csh > & ! load.log
  
  # encodeGencodeRaceFragsBrain
  # Reading encodeGencodeRaceFragsBrain.tab
  # 269 gene predictions
  # encodeGencodeRaceFragsColon
  # Reading encodeGencodeRaceFragsColon.tab
  # 269 gene predictions
  # encodeGencodeRaceFragsGM06990
  # Reading encodeGencodeRaceFragsGM06990.tab
  # 236 gene predictions
  # encodeGencodeRaceFragsHL60
  # Reading encodeGencodeRaceFragsHL60.tab
  # 236 gene predictions
  # encodeGencodeRaceFragsHeart
  # Reading encodeGencodeRaceFragsHeart.tab
  # 261 gene predictions
  # encodeGencodeRaceFragsHela
  # Reading encodeGencodeRaceFragsHela.tab
  # 168 gene predictions
  # encodeGencodeRaceFragsKidney
  # Reading encodeGencodeRaceFragsKidney.tab
  # 293 gene predictions
  # encodeGencodeRaceFragsLiver
  # Reading encodeGencodeRaceFragsLiver.tab
  # 243 gene predictions
  # encodeGencodeRaceFragsLung
  # Reading encodeGencodeRaceFragsLung.tab
  # 290 gene predictions
  # encodeGencodeRaceFragsMuscle
  # Reading encodeGencodeRaceFragsMuscle.tab
  # 238 gene predictions
  # encodeGencodeRaceFragsPlacenta
  # Reading encodeGencodeRaceFragsPlacenta.tab
  # 275 gene predictions
  # encodeGencodeRaceFragsPrimer
  # Reading encodeGencodeRaceFragsPrimer.tab
  # 365 gene predictions
  # encodeGencodeRaceFragsSmallIntest
  # Reading encodeGencodeRaceFragsSmallIntest.tab
  # 277 gene predictions
  # encodeGencodeRaceFragsSpleen
  # Reading encodeGencodeRaceFragsSpleen.tab
  # 275 gene predictions
  # encodeGencodeRaceFragsStomach
  # Reading encodeGencodeRaceFragsStomach.tab
  # 300 gene predictions
  # encodeGencodeRaceFragsTestis
  # Reading encodeGencodeRaceFragsTestis.tab
  # 292 gene predictions

  # Strand information is added for primers and all RACEfrags.

######################################################
# LIFT NHGRI DIPs from hg17 (2007-10-22 kate)

    sh hgwdev
    cd /cluster/data/encode/NHGRI/mullikin/hg17
    hgsql hg18 < encodeIndels.sql
    zcat encodeIndels.bed.gz | tail +2 | \
        liftOver -bedPlus=8 stdin /gbdb/hg17/liftOver/hg17ToHg18.over.chain.gz \
                encodeIndels.hg18.bed encodeIndels.hg18.unmapped
        # lost 670 items (of 11452 total)
        # This is high -- nearly 6%, and losses were in all regions,
        # not just chrX.
    hgLoadBed hg18 encodeIndels -tab -sqlTable=encodeIndels.sql \
                encodeIndels.hg18.bed

    # change group name to merge in variation
    hgsql hg18 -e "update grp set name='encodeCompAndVar' where name='encodeCompGeno'"


#########################################################
# 2007-11-08 (ASZ)
# These wig files were shown to not match their corresponding database table 
# Dropped them from the hgdownload server:
#/goldenPath/hg18/encode/wig/encodeUppsalaChipH3acBut0h.wigBed.gz
#/goldenPath/hg18/encode/wig/encodeUppsalaChipH3acBut12h.wigBed.gz 
#/goldenPath/hg18/encode/wig/encodeUppsalaChipH4acBut0h.wigBed.gz
#/goldenPath/hg18/encode/wig/encodeUppsalaChipH4acBut12h.wigBed.gz
#/goldenPath/hg18/encode/wig/encodeYaleAffyNB4RARNATransMap.wigBed.gz
#/goldenPath/hg18/encode/wig/encodeYaleAffyNB4TPARNATransMap.wigBed.gz
#/goldenPath/hg18/encode/wig/encodeYaleAffyNB4UntrRNATransMap.wigBed.gz
#/goldenPath/hg18/encode/wig/encodeYaleAffyNeutRNATransMap.wigBed.gz
#/goldenPath/hg18/encode/wig/encodeYaleAffyPlacRNATransMap.wigBed.gz

#########################################################
# Yale RACE (2007-11-15 galt)
#
cd /cluster/data/encode/yale/
mkdir race
cd race
mkdir 2007-11-15
ln -s 2007-11-15/ latest
cd latest
mkdir lab
cd lab

wget http://homes.gersteinlab.org/people/jiangdu/race_seq/race_desc.html
wget http://homes.gersteinlab.org/people/jiangdu/race_seq/conserved_transcripts-til-20070402.bed

tail +5 conserved_transcripts-til-20070402.bed | gawk '{print$1}' | sort -u | head
chr11
chr21
chr22

cp race_desc.html ${HOME}/kent/src/hg/makeDb/trackDb/human/hg18/encodeYaleRace.html
cvs add ${HOME}/kent/src/hg/makeDb/trackDb/human/hg18/encodeYaleRace.html

tail +5 conserved_transcripts-til-20070402.bed | hgLoadBed hg18 encodeYaleRace stdin

vi trackDb.encodeTxLevels.ra
---
track encodeYaleRace
superTrack encodeYaleRnaSuper dense
shortLabel Yale RACE
longLabel Yale RACE 420 primarily novel TARs in ENCODE regions
group encodeTxLevels
priority 32.0
chromosomes chr11,chr21,chr22
visibility hide
type bed 12 .
dataVersion ENCODE Nov 2007 
origAssembly hg18
---

vi ${HOME}/kent/src/hg/makeDb/trackDb/human/hg18/encodeYaleRnaSuper.html
#edit to add the new RACE track to the Credits section


#############################################################################
# TBA alignments from Margulies lab, NHGRI (2008-2-20 kate)
#  Submitted by Gayle McEwen (mceweng@mail.nih.gov), from their DEC-07 freeze
#  Requested doc update (README & track description) from Elliott on 2/20
# Conservation  scores: (BinCons and ChaiCons provided 3/24/08

    ssh kkstore03
    cd /cluster/data/encode/TBA
    mkdir -p DEC-07/2008-01-10/lab
    cd DEC-07/2008-01-10/lab
    wget -nd ftp://kronos.nhgri.nih.gov/pub/outgoing/elliott/encode/freeze/DEC-2007/tba-DEC-2007.tar.gz
    tar xvfz tba-DEC-2007.tar.gz
    cd ..

    mkdir maf
cat > getMafs.csh << 'EOF'
    foreach f (lab/tba/*/*.maf.gz)
        set r = $f:t:r:r:e
        echo $r
        gunzip -c $f | \
            sed -e 's/^s human\./s hg18./' \
                -e 's/^s mouse\./s mm9./' \
                -e 's/^s cow\./s bosTau3./' \
                -e 's/^s dog\./s canFam2./' \
                -e 's/^s chicken\./s galGal3./' \
                -e 's/^s monodelphis\./s monDom4./' \
                -e 's/^s chimp\./s panTro2./' \
                -e 's/^s macaque\./s rheMac2./' \
                -e 's/^s orangutan\./s ponAbe2./' \
                -e 's/^s rat\./s rn4./' \
                        > maf/$r.maf
    end
'EOF'
    csh getMafs.csh >&! getMafs.log &

    # Score too small messages -- can be ignored (the score isn't meaningful)

    # Add gap annotation
    # prepare bed files with gap info
    ssh kkstore03
    cd /cluster/data/encode/TBA
    cd DEC-07/2008-01-10
    mkdir anno
    cd anno
    mkdir maf run
    cd run

    cat > species.lst << 'EOF'
        hg18
        bosTau3
        canFam2
        galGal3
        monDom4
        panTro2
        rheMac2
        ponAbe2
        mm9
        rn4
'EOF'

cat > doNBed.csh << 'EOF'
    foreach db (`cat species.lst`)
        echo -n "$db "
        set cdir = /cluster/data/$db
        if (! -e $cdir/$db.N.bed) then
            echo "creating N.bed"
            twoBitInfo -nBed $cdir/$db.2bit $cdir/$db.N.bed
        else
            echo ""
        endif
    end
'EOF'
    csh doNBed.csh >&! doNBed.log &

    rm -f nBeds
    foreach db (`grep -v hg18 species.lst`)
        echo "$db "
        ln -s  /cluster/data/$db/$db.N.bed $db.bed
        echo $db.bed  >> nBeds
    end
    
cat > doAnno.csh << 'EOF'
    foreach f (../../maf/*.maf)
        set b = $f:t
        echo $f
        nice mafAddIRows -nBeds=nBeds $f \
                /cluster/data/hg18/hg18.2bit ../maf/$b
    end
'EOF'
#<< happy emacs
    csh doAnno.csh >&! doAnno.log &

    # Load MAF table with annotated mafs.  Also load summary table.
    ssh hgwdev
    set mdir = /cluster/data/encode/TBA/DEC-07/2008-01-10/anno/maf
    cd $mdir
    set gdir = /gbdb/hg18/encode/TBA/DEC-07/2008-01-10/maf
    rm -f $gdir/*.maf
    mkdir -p $gdir
    ln -s $mdir/*.maf $gdir
    hgLoadMaf -pathPrefix=$gdir -WARN hg18 encodeTbaAlignDec07 >&! load.log &
    cat *.maf | hgLoadMafSummary hg18 encodeTbaSummaryDec07 stdin 
    cd ..

    # Reannotate with newer mafAddIRows having distinctive rows for
    # tandem dups (by request of JK)
    # 2008-10-23 kate
    # again (another fix to mafAddIRows) 2008-10-27 kate
    ssh kolossus
    cd /cluster/data/encode/TBA
    cd DEC-07/2008-01-10
    cd anno/run
    # edit doAnno.csh to use new version
    csh doAnno.csh >&! doAnno.log &
    ssh hgwdev
    set mdir = /cluster/data/encode/TBA/DEC-07/2008-01-10/anno/maf
    cd $mdir
    set gdir = /gbdb/hg18/encode/TBA/DEC-07/2008-01-10/maf
    hgLoadMaf -pathPrefix=$gdir -WARN hg18 encodeTbaAlignDec07 >&! load.log &
    # Ignore 'score too small' errors
    cat *.maf | hgLoadMafSummary hg18 encodeTbaSummaryDec07 stdin 
    #Created 141213 summary blocks from 8144409 components and 389847 mafs from stdin
    #Loading into hg18 table encodeTbaSummaryDec07...

    # Gene frames
    ssh hgwdev
    cd /cluster/data/encode/TBA/DEC-07/2008-01-10
    mkdir frames
    cd frames

    # Pick gene tables, according to the following criteria:
    # KG if present, else refGene if >10000 entries, else ensGene (unless dog),
    # else mgcGenes, else mrnas if > 10000 else none.   In all cases 
    # except none, add in refGene.
    # NOTE: shortcut by using sources from hg18 multiz framing
    # (added braney 2008-03-01)  use geneCode for hg18, no
    #                            genes from ponAbe2

hg18: encodeGencodeGeneKnownMar07 
bosTau3: mrna
canFam2: mrna
galGal3: mrna
monDom4: ensGene
panTro2: refGene
rheMac2: ensGene
rn4: knownGene
mm9: knownGene

    # get the genes for all genomes
    # mRNAs with CDS.  single select to get cds+psl, then split that up and
    # create genePred
    # using mrna table as genes
cat > getGenes.csh << 'EOF'
    rm -fr genes
    mkdir -p genes
    set mrnaDbs = "bosTau3 canFam2 galGal3"
    foreach queryDb ($mrnaDbs)
      set tmpExt = `mktemp temp.XXXXXX`
      set tmpMrnaCds = ${queryDb}.mrna-cds.${tmpExt}
      set tmpMrna = ${queryDb}.mrna.${tmpExt}
      set tmpCds = ${queryDb}.cds.${tmpExt}
      echo $queryDb
      hgsql -N -e 'select all_mrna.qName,cds.name,all_mrna.* \
                   from all_mrna,gbCdnaInfo,cds \
                   where (all_mrna.qName = gbCdnaInfo.acc) and \
                     (gbCdnaInfo.cds != 0) and (gbCdnaInfo.cds = cds.id)' \
       $queryDb > ${tmpMrnaCds}
      cut -f 1-2  ${tmpMrnaCds} > ${tmpCds}
      cut -f 4-100  ${tmpMrnaCds} > ${tmpMrna}
      mrnaToGene -cdsFile=${tmpCds} -smallInsertSize=8 -quiet ${tmpMrna} stdout | \
        genePredSingleCover stdin stdout | gzip -2c > /scratch/tmp/$queryDb.tmp.gz
      rm ${tmpMrnaCds} ${tmpMrna} ${tmpCds}
      mv /scratch/tmp/$queryDb.tmp.gz genes/$queryDb.gp.gz
      rm -f $tmpExt
    end
    # using encodeGencodeGeneKnownMar07 for hg18
    # using knownGene for rn4 mm9 
    # using refGene for panTro2
    # using ensGene for monDom4, rheMac2
    # genePreds; (must keep only the first 10 columns for knownGene)
    #set geneDbs = "hg18 mm9 rn4 panTro2 monDom4 rheMac2 ponAbe2"
    #  NOTE: next time include ponAbe2, using ensGene
    set geneDbs = "hg18 mm9 rn4 panTro2 monDom4 rheMac2"
    foreach queryDb ($geneDbs)
      if ($queryDb == "monDom4" || $queryDb == "rheMac2") then
        set geneTbl = ensGene
      else if ($queryDb == "panTro2") then
        set geneTbl = refGene
      else if ($queryDb == "rn4" || $queryDb == "mm9") then
        set geneTbl = knownGene
      else if ($queryDb == "hg18") then
        set geneTbl = encodeGencodeGeneMar07 
      endif
      hgsql -N -e "select name,chrom,strand,txStart,txEnd,cdsStart,cdsEnd,exonCount,exonStarts,exonEnds from $geneTbl" ${queryDb} \
      | genePredSingleCover stdin stdout | gzip -2c \
        > /scratch/tmp/$queryDb.tmp.gz
      mv /scratch/tmp/$queryDb.tmp.gz genes/$queryDb.gp.gz
    end
'EOF'
    csh getGenes.csh >&! getGenes.log &

    ssh kkstore03
    cd /cluster/data/encode/TBA/DEC-07/2008-01-10/frames
    (cat  ../maf/*.maf | nice genePredToMafFrames hg18 stdin stdout bosTau3 genes/bosTau3.gp.gz canFam2 genes/canFam2.gp.gz galGal3 genes/galGal3.gp.gz hg18 genes/hg18.gp.gz panTro2 genes/panTro2.gp.gz rheMac2 genes/rheMac2.gp.gz mm9 genes/mm9.gp.gz rn4 genes/rn4.gp.gz monDom4 genes/monDom4.gp.gz | nice gzip > mafFrames.gz) >& frames.log &

    ssh hgwdev
    cd /cluster/data/encode/TBA/DEC-07/2008-01-10/frames
    nice hgLoadMafFrames hg18 encodeTbaFramesDec07 mafFrames.gz >& loadFrames.log &

    # Post downloads
    ssh kkstore03
    cd /cluster/data/encode/TBA/DEC-07/2008-01-10/
    mkdir downloads
    cd anno/maf
    # redo to include re-annotated mafs (with 'T' lines for tandem dups)
    # 2008-11-06 kate
    tar cvfz ../../downloads/encodeTba.maf.tgz  *.maf

    # Obtain sequence freeze
    ssh kkstore03
    cd /cluster/data/encode/MSA
    mkdir -p DEC-07/lab
    cd DEC-07/lab
    wget -nd ftp://kronos.nhgri.nih.gov/pub/outgoing/elliott/encode/freeze/DEC-2007/DEC-2007.tar.gz

    # Received README.txt for sequence freeze
    # and encodeTbaAlign.html update from Gayle McEwen, 6/12/08
    cp encodeTbaAlign_DEC-2007.html ~/kent/src/hg/makeDb/trackDb/human/hg18/encodeTbaAlignDec07.html
    # checkin to CVS

    cd ..
    mkdir downloads
    cd downloads
    ln -s /cluster/data/encode/MSA/DEC-07/lab/DEC-2007.tar.gz  .
    ln -s /cluster/data/encode/MSA/DEC-07/lab/seq sequences

    # Received species tree from Gayle 8/08
    cp ../lab/conserved.mod tree_4d.tba.nh
    # edit to remove phastCons-specific header
    # edit tree to remove species not in this dataset: gorilla, lemur, black_lemur, sheep, 
    #   muntjak_indian, ajbat, cpbat, eehedgehog, wallaby, dunnart, torgoise, xenopus
    #   tetraodon, fugu, zebrafish, pig
    tail +2 seq/metadata.txt | awk '{print $1}' | sort | uniq > species.txt
    echo `cat species.txt|sed 's/$/,/'` | sed 's/ //g' > speciesList.txt
    /cluster/bin/phast/tree_doctor --prune-all-but `cat speciesList.txt` ../lab/tree_Dec2007.nh | sed 's/:0.000000//g' > species36.nh
    # Create tree image with phyloGif -- use 700 height, preserve underscores
    # encode_36way.gif
    cp encode_36way.gif ~/browser/images/phylo/
    # checkin to CVS
    cp ../lab/README_DEC-2007.txt README.txt
    # fix typo -- it's the Dec not Sep freeze
    # edit DIRECTORY structure section a bit to reflect this downloads organiatoin

    # post for download 
    ssh hgwdev
    cd /usr/local/apache/htdocs/goldenPath/hg18/encode
    mkdir -p MSA/DEC-2007
    cd MSA/DEC-2007
    ln -s /cluster/data/encode/MSA/DEC-07/downloads/{README.txt,DEC-2007.tar.gz} .
    ln -s /cluster/data/encode/MSA/DEC-07/downloads/{tree_4d.tba.nh,species36.nh} .
    cp ~/browser/images/phylo/encode_36way.gif .
    mkdir -p alignments/TBA/
    cd alignments/TBA
    ln -s /cluster/data/encode/TBA/DEC-07/2008-01-10/downloads/encodeTba.maf.tgz encodeTbaDec07.maf.tgz

    # Conservation
    mkdir -p ChaiCons/2008-03-24/lab BinCons/2008-03-24/lab
    # copy files from Gayle McEwan email

    # binCons files are formatted <region> start end name score,
    # where score is always 1000
    # Lift these to hg18 coordinates, and remove score field.

    echo "select chromStart, name, chromEnd-chromStart, chrom from encodeRegions" | hgsql -N hg18 | sed 's/$/\t30000000/' > /cluster/data/encode/MSA/encodeRegions.lft

    liftUp ChaiCons.bed /cluster/data/encode/MSA/encodeRegions.lft warn lab/CHAI.bed
    wc -l ChaiCons.bed3 lab/CHAI.bed
     #208916 ChaiCons.bed
     #208916 lab/CHAI.bed
    awk '{printf "%s\t%d\t%d\tchai.%d\n", $1, $2, $3, NR}' ChaiCons.bed3 > ChaiCons.bed4
    hgLoadBed hg18 encodeTbaChaiConsDec07 ChaiCons.bed4
    # Loaded 208916 elements of size 3

    cd ../../BinCons/2008-03-24
    liftUp -type=.bed stdout /cluster/data/encode/MSA/encodeRegions.lft warn lab/BINCONS.bed |\
        sed 's/1000$//' > BinCons.bed

    wc -l BinCons.bed lab/BINCONS.bed
    # 117793 BinCons.bed
    # 117836 lab/BINCONS.bed
    # difference due to blank lines in source file:
    grep '^$' lab/* | wc -l
    # 43
    hgLoadBed hg18 encodeTbaBinConsDec07 BinCons.bed
    
#############################################################################
# encodeGencodeGeneKnownMar07  (2010-04-07 markd)
# Was discovered to be corrupted on hgwdev and all servers
# 

    cd /cluster/data/encode/convertHg18/genePred
    genePredCheck -db=hg18 encodeGencodeGeneKnownMar07.tab 
    checked: 2991 failed: 0
    hgLoadGenePred -genePredExt hg18 encodeGencodeGeneKnownMar07 encodeGencodeGeneKnownMar07.tab
    genePredCheck -db=hg18 encodeGencodeGeneKnownMar07
