#!/bin/perl


######################################################################
#                                                                    #
# this program parses the output from structural alignment using ska #
#                                                                    #
######################################################################
# 
# Created on 4/10/2002.
# Author: ikoh & cltang @ honig lab 
# 
# Advertised functions:
#
# $version = &version ()
# (\%seqs,\%structs) = &toSeqsStructsHash ($skaText) 
#

use strict;

package   ska;
require   Exporter;
my @ISA = qw (Exporter);
my @EXPORT = qw (version toSeqsStructsHash);

#### EXPORTED ####

###########
sub version
########### 
{
    print STDERR "parse Version 0.1: Apr 10, 2002\n";
    print STDERR "ska Version as of Apr 10, 2002\n";
    return "0.1";
}

##############
sub toSeqsStructsHash
##############
# Convert ska format to seqID->seq and seqID->struct hash
{
    my ($inbuff) = @_;

    &advance (\$inbuff,"\.\.\.\+"); # move to first record

    my @lines = split ("\n", $inbuff);
    my $line;
    my $lockHash = 0;
    my %retSeqHash = ();
    my %retStructHash = ();
    my $cur_template;
    my $n_seq = 0;
    my $ind = 0;
    my $which = 0;

    foreach $line (@lines) {
	if ($line =~ /^\s+([>\.\+<]+)\s*$/) {
	    $cur_template = $1;
	}
	elsif (isSkaSeqLine($line)) {
	    my @items = &parseSkaSeqLine($line,$cur_template);
	    my $pdbID = $items[0];
	    my $seq   = $items[1];

	    if (length($retStructHash{$pdbID})==0) {
		last if ($lockHash);
		$retStructHash{$pdbID} = $seq;
		$n_seq++;
	    }
	    elsif (length($retSeqHash{$pdbID})==0) {
		last if ($lockHash);
		$retSeqHash{$pdbID} = $seq;
	    }
	    else {
		$ind++;

		if ($which eq 0) {
		    $retStructHash{$pdbID} .= $seq;
		}
		else {
		    $retSeqHash{$pdbID} .= $seq;
		}

		$which = 1 - $which
		    if ($ind % $n_seq eq 0);
	    }
	}
	else {
	    undef $cur_template;
	    $lockHash = 1; # lock after first block of seq records
	}
    }

    return (\%retSeqHash,\%retStructHash);
}

#*** PRIVATE ***#

#****************#
sub isSkaSeqLine
#****************#
# Check if line is a PrISM sequence line
{
    my ($line) = @_;
    my $bool = 0;
    $bool = 1 
	if ((length($line) eq 79) and
	    ($line =~ /^\s*\S+\s*\-?\d*\s{2}.*$/));
    #print "$line $bool\n";
    return $bool;
}

#*******************#
sub parseSkaSeqLine
#*******************#
# Parse a sequence line from PrISM
{
    my ($line,$template) = @_;
    my $len_template = length($template);
    $line =~ /^\s*(\S+)\s*(\-?\d*)\s{2}(.*)$/;
    my $pdbID = $1;
    my $ano   = $2;
    my $seq   = substr($3,0,$len_template);
    $seq =~ s/\s/-/g;

    if ($pdbID =~ /\d{4}$/ && !length($ano)) {
        # strip alignment number from name of sequence
        $pdbID =~ s/\d{4}$//;
    }

    return ($pdbID,$seq);
}

#*********#
sub advance
#*********#
# Advance the text buffer until we see the token.
# Leave the line containing the token in the buffer.
# If $delim is empty, use "\n".
# If $token is empty, advance to next line.
{
    my ($txtref, $token, $delim) = @_;
    my $retbuff = "";
    
    if (length($delim) == 0) { $delim = "\n"; }
    my @lines = split ($delim, $$txtref);

    $$txtref = "";
    
    my $buffref = \$retbuff;

    if (length($token)) {
	foreach (@lines) {
	    $buffref = $txtref if (/$token/);
	    $$buffref .= $_;
	    $$buffref .= $delim;
	}
    } else {
	$retbuff = shift @lines;
	foreach (@lines) {
	    $$txtref .= $_;
	    $$txtref .= $delim;
	}
    }
    
    return $retbuff;
}

