#!/usr/bin/perl -w
# lrf june 07
#
# Modules for running tmpos
#
#### ($string, $string) = &tmpos::axes($string,$string) ;
# Axes script, pdbfilename-with-helices, returns helices and cog filenames
#
#### (0,1) = &tmpos::tm2helix($string,$string,$string,$string) ; 
# tm2helix script, pdb filename (for numbering and chain-ids), tm ends filename, output pdb filename (no .pdb extension)
# if returns 1, then error message
#
#### ($number)   = &tmpos::execute($string, $string,$string,$string,$string) ; 
# tmpos executable, label, sequence filename, r4s-output filename, helices filename, cog filename
# returns score
#
use strict;
 
use lib "razor/0/common/scripts/modules/parsing";
use lib "/razor/1/lucy/columbia/mp_homology/perl";
require tm_functions;
 
package   tmpos;
require   Exporter;
my @ISA = qw (Exporter);
my @EXPORT = qw (axes tm2helix execute);
 
sub execute {
 
	# $s = sequence file ($pdb.seq.txt);
      # $s = $seq_dir/$family/$n1$seq_ext
	# $r = rate4site output (cons.$pdb.txt)
      # $r = $r4s_dir/$family/$r4s_file$n1.txt
	# $h = helices file
	  # $h = $name.helices
	# $c = file with centers of gravity
	  # $c = $name.cog
	my ($exe, $label, $s, $r, $h, $c) = @_;
	if (!-x $exe) { warn "$exe is not executable\n"; return(0); }
	if (!-e $s) { warn "cannot find $s\n"; return(0); } 
	if (!-e $r) { warn "cannot find $r\n"; return(0); } 
	if (!-e $h) { warn "cannot find $h\n"; return(0); } 
	if (!-e $c) { warn "cannot find $c\n"; return(0); } 

	# run calculation
	system("rm -f tmpos_err.txt");
	print STDERR "Running:\n $exe $s $r $h $c\n";
	system("$exe $s $r $h $c >> /dev/null");
	if (-e "tmpos_err.txt") { warn "\nFATAL ERROR: See tmpos_err.txt\n\n"; return (0); }

	# get results
	my $score = `grep answer tmpos_log.txt | awk '{print \$2}'`; 

	###############
	# tidy up output
	foreach my $file qw (tmpos_helices_data.txt tmpos_helices_score.txt consurf.spt) { 
		 system("mv $file $label.$file");
	}
	system("mv tmpos_log.txt $label.tmpos.log");
	system("mv pdbFILE.ent   $label.axes.pdb");
    system("rm -f tmpos_err.txt");
    system("rm -f *.spt");

	return($score); 

}
# run script to generate helix centers of gravity
sub axes {

	# $scr = "/razor/1/lucy/cluster_software/tmpos/ExtractPrincipleAxe.py";
	# pdb should be file without extension
	my ($scr,$pdb) = @_;

	if (!-e $scr) { warn "$scr is not executable\n"; return (1); }
	if (!-e $pdb) { warn "$pdb could not be found\n"; return (1); }

    # extract helix data 
    print STDERR "Running $scr\n";
    system("python $scr $pdb >> /dev/null");
    if (!-e "$pdb.cog")     { warn "Problem creating $pdb.cog from $pdb, with $scr\n"; return (1); }
    if (!-e "$pdb.helices") { warn "Problem creating $pdb.helices from $pdb, with $scr\n"; return (1); }

# quick fix for sequence ids
	open (HELIX, "$pdb.helices");
	my $line_count = 1;
	my $new_chain  = 1;
	my $old_chain = 1000;
	my $new_helices = "";
	while (<HELIX>) {
        # don't do anything if chain number is 1
		if (($line_count == 1) and ($_ =~ "^1 ")) { goto CONTINUE; }
		$line_count++;

		# replace chain numbers with numbers starting from 1
		my @cols = split(/\s+/,$_);
		if ($cols[0] > $old_chain) { 
			$new_chain++; 
			$old_chain = $cols[0];
		}
		$_ =~ s/^$cols[0]\s/$new_chain /;
		$old_chain = $cols[0];

		$new_helices .= $_;
	}
	print STDERR "Doing quick fix on $pdb.helices...\n";
	open (NEWHELIX, ">$pdb.helices");
	print NEWHELIX $new_helices;
	close NEWHELIX;
	
	print STDERR "Created $pdb.helices: $new_helices";

	CONTINUE:;
	close HELIX;
# end quick fix

    if (-z "$pdb.helices") { warn "Problem editing $pdb.helices\n"; return(1); }
	return("$pdb.helices", "$pdb.cog");

}

## Run conversion of HOMEP TM ends file to HELIX entries in pdb style
# requires pdb file for residue names
sub tm2helix {

	# newpdb should be a name without a .pdb extension
	# $t = tm ends file ($pdb.tmfull.txt);
          #$t = "$tm_dir/$family/$n1$tm_ext";
	my ($pdb,$t,$newpdb) = @_;
	if (!-e $t)   { warn "cannot find $t\n"; return(1); } 
	if (!-e $pdb) { warn "cannot find $pdb\n"; return(1); }
	
	# read TM list file for pdb
	my (@protein, %num_tm, @start, @end, @chn, $ntm);
	my ($protein, $rnum_tm, $rstart, $rend, $rchn) = &tm_functions::readTMlist($t, "1");
	%num_tm  = %$rnum_tm; @chn = @$rchn;
	@start   = @$rstart;  @end = @$rend;
	$ntm     = $num_tm{$protein};

	# read pdb file to get residue names
	my (%res_name, @pdb_lines);
	open (PDB, $pdb);
	while (<PDB>) {

		push @pdb_lines, $_;
		next unless /^ATOM/;
		my $atom_name = substr($_,12,4);
		next unless ($atom_name eq " CA ");

		my $res_num = substr($_,22,4);
		my $temp    = substr($_,17,4);
		my $chain = substr($_,21,1);

		$res_num =~ s/\s+//g;
		$temp    =~ s/\s+//g;
		$res_name{$res_num}{$chain} = $temp;
		
	}
	open (OUT, ">$newpdb"); 
	
	# generate HELIX entries for each Tm domain
	print STDERR "Creating HELIX listing\n";
	for my $t (1 .. $ntm) {
	  
		my $c = $chn[$t]{$protein};
		my $s = $start[$t]{$protein};
		my $e = $end[$t]{$protein};
		my $name_start = $res_name{$s}{$c};
		my $name_end   = $res_name{$e}{$c};
		my $string = sprintf "HELIX %4d%4d%4s %1s%5d %4s %1s%5d\n",$t,$t,$name_start,$c,$s,$name_end,$c,$e;
		print OUT $string;

	}
	print OUT @pdb_lines;
	close OUT;

	return();

}

# END 
