#!/usr/bin/perl

# This script was originally written by Yontao Lu and was copied from
# /cluster/store4/ratMarker/code/ into the kent/src tree in order
# to get it under CVS control; added warnings.

# $Id: createStsInfoRat,v 1.3 2006/03/09 02:00:28 angie Exp $

use warnings;

##Des: Take a look how many RH marker has primers info

if(@ARGV != 6)
{
    print STDERR "createStsInfoRat: Unists raw alias rhmap fhhmap shrspmap\n";
    exit(1);
}


##0-id, 1-name, 2-rgdid,3-rgdname, 4-uistsid, 5-#ofalias, 6-alias, 7-primer1, 8-primer2, 9-distance, 10-isSeq, 11-organism, 12-FFHXACI, 13-chr, 14-posi, 15-SHRSPXBN, 16-chr, 17-posi, 18-RH, 19-chr, 20-posi, 21-RHLOD, 22-Gene_name, 23-Gene_Id, 24-cloSeq


$uni = shift;
$raw = shift;
$ali = shift;
$rh = shift;
$fhh = shift;
$sh = shift;

open(RH, "<$rh") || die "Can't open rh map $rh: $!";
open(ALI, "<$ali") || die "can't open alias $ali: $!";
open(UNI, "<$uni") || die "Can't open unists $uni: $!";
open(RAW, "<$raw") || die "Can't open rawinfo $raw: $!";
open(FHH, "<$fhh") || die "Can't open fhh map $fhh: $!";
open(SH, "<$sh") || die "Can't open shrsh map $sh: $!";

$ucscId = 1;
while(my $line = <UNI>)
{
    chomp($line);
    my (@eles) = split(/\t/, $line);
    $eles[4] = uc($eles[4]);
    $eles[6] = uc($eles[6]);
    if($eles[1] =~ /\w/o)
    {
	$sts{$eles[0]} = $ucscId;
	$sts{$eles[4]} = $ucscId;
	$sts{$eles[6]} = $ucscId;
    }
    $table[$ucscId][0] = $ucscId;
    if (! $eles[4] || $eles[4] eq '-') {
	warn "WARNING: Missing name for UniSTS ID $eles[0] (UCSC ID $ucscId) will cause trouble downstream.\n";
    }
    $table[$ucscId][1] = $eles[4];
    $table[$ucscId][2] = 0;
    $table[$ucscId][3] = "";
    $table[$ucscId][4] = $eles[0];
    $table[$ucscId][5] = 0;
    $table[$ucscId][6] = "";
    $table[$ucscId][7] = $eles[1];
    $table[$ucscId][8] = $eles[2];
    $table[$ucscId][9] = $eles[3] || "0";
    $table[$ucscId][10] = 0;
    $table[$ucscId][11] = "Rattus norvegicus";
    $table[$ucscId][12] = $table[$ucscId][13] = "";
    $table[$ucscId][15] = $table[$ucscId][16] = "";
    $table[$ucscId][18] = $table[$ucscId][19] = "";
    $table[$ucscId][14] = $table[$ucscId][17] = $table[$ucscId][20] = 0.0;
    $table[$ucscId][21] = 0.0;
    $table[$ucscId][22] = $table[$ucscId][23] = $table[$ucscId][24] = "";
    $ucscId++;
}
close(UNI);
$empty="";
delete $sts{$empty};

while(my $line = <ALI>)
{
    chomp($line);
    my($unid, $names) = split(/\t/, $line);
    my (@name) = split(/\;/, $names);
    if($sts{$unid})
    {
        my $indx = $sts{$unid};
        $table[$indx][5] = @name;
        $table[$indx][6] = $names;
        foreach my $key (@name)
        {
            $key = uc($key);
            $sts{$key} = $indx;
	}
    }
    else{

        my $indx = 0;
        foreach my $key (@name)
        {
            $key = uc($key);
            if($sts{$key} )
            {
                $indx = $sts{$key};
                last;
            }
        }
        if ($indx)
        {
            $table[$indx][5] = @name;
            $table[$indx][6] = $names;
            foreach my $key (@name)
            {
                $key = uc($key);
                $sts{$key} = $indx;
            }
        }
    }
}
close(ALI);


$found = 0;
$new = 0;
#$line = <RAW>;
while(my $line = <RAW>)
{
    chomp($line);
    my (@eles) = split(/\t/, $line);
    next if ($eles[0] eq 'RGD_ID' && $eles[1] eq 'SYMBOL_NAME');
    my $rgdId = "RGD:$eles[0]";
    $eles[1] = uc($eles[1]);
    if($sts{$eles[1]})
    {
	my $indx = $sts{$eles[1]};
	$table[$indx][2] = $eles[0] || 0;
	$table[$indx][3] = $eles[1];
	if($eles[2] && $eles[2] =~ /[ATCG]/o)
	{
	    $table[$indx][24] = $eles[2];
	    $table[$indx][10] = 1;
	}
	$found++;
    }
    elsif($sts{$rgdId})
    {
	my $indx = $sts{$rgdId};
        $table[$indx][2] = $eles[0] || 0;
        $table[$indx][3] = $eles[1];
	if($eles[2] =~ /[ATCG]/o)
	{
	    $table[$indx][24] = $eles[2];
	    $table[$indx][10] = 1 ;
	}
	$found++;
    }
    elsif($eles[3] =~ /\w/o || $eles[2] =~ /[ATCG]/o)
    {
	$table[$ucscId][0] = $ucscId;
	if (! $eles[1] || $eles[1] eq '-') {
	    warn "WARNING: Missing name for RGD ID $eles[0] (UCSC ID $ucscId) will cause trouble downstream.\n";
	}
	$table[$ucscId][1] = $eles[1];
	$table[$ucscId][2] = $eles[0] || 0;
	$table[$ucscId][3] = $eles[1];
	$table[$ucscId][4] = $table[$ucscId][5] = 0;
	$table[$ucscId][6] = "";
	$table[$ucscId][7] = $eles[3];
	$table[$ucscId][8] = $eles[4];
	$table[$ucscId][9] = $eles[5] || "0";
	$table[$ucscId][11] = "Rattus norvegicus";
    	$table[$ucscId][24] = "";
	if($eles[2] =~ /[ATCG]/o)
	{
	    $table[$ucscId][10] = 1;
	    $table[$ucscId][24] = $eles[2];
	}
	else
	{
	    $table[$ucscId][10] = 0;
	}
	$table[$ucscId][12] = $table[$ucscId][13] = "";
	$table[$ucscId][15] = $table[$ucscId][16] = "";
	$table[$ucscId][18] = $table[$ucscId][19] = "";
	$table[$ucscId][14] = $table[$ucscId][17] = $table[$ucscId][20] = 0.0;
	$table[$ucscId][21] = 0.0;
	$table[$ucscId][22] = $table[$ucscId][23] = "";
	$sts{$eles[1]} = $ucscId;
	$ucscId++;
	$new++;
    }
}
close(RAW);
delete $sts{$empty};	
print STDERR "found: $found\tnew: $new\n";

open(ALI, "<$ali") || die "can't open alias $!";

while(my $line = <ALI>)
{
    chomp($line);
    my($unid, $names) = split(/\t/, $line);
    my (@name) = split(/\;/, $names);
    if($sts{$unid})
    {
	my $indx = $sts{$unid};
	$table[$indx][5] = @name;
	$table[$indx][6] = $names;
	foreach my $key (@name)
	{
	    $key = uc($key);
	    $sts{$key} = $indx;
	}
    }
    else{
	
	my $indx = 0;
	foreach my $key (@name)
	{
	    $key = uc($key);
	    if($sts{$key} )
	    {
		$indx = $sts{$key};
		last;
	    }
	}
	if ($indx)
	{
	    $table[$indx][5] = @name;
	    $table[$indx][6] = $names;
	    foreach my $key (@name)
	    {
		$key = uc($key);
		$sts{$key} = $indx;
	    }
	}
    }
}
close(ALI);


while(my $line = <RH>)
{
    next if($line =~ /^\#/);
    chomp($line);
    my($unid, $name,@rest) = split(/\t/, $line);
    $name = uc($name);
    my $indx = 0;
    if($sts{$unid})
    {
	$indx = $sts{$unid};
    }
    elsif($sts{$name})
    {
	$indx =$sts{$name};
    }
    next if ($indx == 0);
    $table[$indx][18] = "RH_map.2.2";
    $table[$indx][19] = $rest[0];
    $table[$indx][20] = $rest[1];
    $table[$indx][21] = $rest[2] if ($rest[2] !~ "F");
}
close(RH);

while(my $line = <FHH>)
{
    next if($line =~ /^\#/);
    chomp($line);
    my($unid, $name,@rest) = split(/\t/, $line);
    $name = uc($name);
    my $indx = 0;
    if($sts{$unid})
    {
        $indx = $sts{$unid};
    }
    elsif($sts{$name})
    {
        $indx =$sts{$name};
    }
    next if ($indx == 0);
    $table[$indx][12] = "FHH_x_ACI";
    $table[$indx][13] = $rest[0];
    $table[$indx][14] = $rest[1];
}
close(FHH);

while(my $line = <SH>)
{
    next if($line =~ /^\#/);
    chomp($line);
    my($unid, $name,@rest) = split(/\t/, $line);
    $name = uc($name);
    my $indx = 0;
    if($sts{$unid})
    {
        $indx = $sts{$unid};
    }
    elsif($sts{$name})
    {
	$indx =$sts{$name};
    }
    next if ($indx == 0);
    $table[$indx][15] = "SHRSP_x_BN";
    $table[$indx][16] = $rest[0];
    $table[$indx][17] = $rest[1];
}
close(SH);


for(my $i = 1; $i < $ucscId; $i++)
{
    die "ucscId $i has too few elements " . scalar(@{$table[$i]}) if (scalar(@{$table[$i]} < 25));
    for (my $j=0;  $j < 25;  $j++) {
	die "ucscId $i el $j is undef" if (! defined $table[$i]->[$j]);
    }
    my $newline = join("\t", @{$table[$i]});
    print  "$newline\n";
}

