# pick first informative hit from tabular input (qpid evalue desc qseq)
# output HTML table (qpid evalue desc SANS-link)

use strict;

my $URL="http://ekhidna2.biocenter.helsinki.fi/cgi-bin/sans/sans.cgi?mode=table&seq=";
my $oldqpid='';
my $oldqseq='';
my $found=0;

print "Click on protein identifier to view the result of SANSparallel.<TABLE><TR><TH>Protein</TH><TH>E-value</TH><TH>Predicted description</TH></TR>\n";
while(<STDIN>) {
	chomp;
	my($qpid,$evalue,$desc,$qseq)=split(/\t/);
	if($qpid ne $oldqpid) { 
		if($oldqpid ne '' && $found==0) {
			# backup if no informative hits
			print "<TR><TD>",join("</TD><TD>","<A HREF=$URL.$oldqseq target=_blank>$oldqpid</A>",'','no informative hits'),"</TD></TR>\n";			
		}
		$found=0; 
		$oldqpid=$qpid; 
		$oldqseq=$qseq;
	}
	next if($found>0);
	next if(informative($desc)<1);
	print "<TR><TD>",join("</TD><TD>","<A HREF=$URL.$qseq target=_blank>$qpid</A>",$evalue,$desc),"</TD></TR>\n";
	$found=1;
}
print "</TABLE>\n";

exit();

sub informative {
	$_=shift(@_);
	# check uninformative terms
	if(/Uncharacterized protein/i) { return(0); }
	if(/hypothetical.*protein/i) { return(0); }
	if(/similar\w* to/i) { return(0); }
	if(/putative hypothetical/i) { return(0); }
	if(/predicted protein/i) { return(0); }
	if(/genom.*scaffold/i) { return(0); }
	return(1);
}
