
=head1 NAME

Class - Aids in parsing Protein Data Bank Files

=head1 AUTHOR

        Cinque Soto
	# modified version of Cinque's module for parsing HETATM
	# as well as ATOM records, has Ingrid's version of dealing
	# with start/end indices and insertion codes

=head1 BUGS

        None found (yet...)

=head1 CAVEATS

        When using simple methods PDB Files that
        are composed of many NMR models

=head1 CHANGE LOG

        Last revision data is 2/7/2002

=head1 CREDITS

        None yet...

=head1 IHERITANCE

        Only standard modules

=head1 INSTALLATION

        How to install: 1) Unzip in directory where Perl Libraries are
                           installed.
                        2) Untar file
                        3) run perl makefile--> perl makefile.pl
                        4) run make (or nmake in Windows)--> make
                        
=head1 SYNOPSIS

        use PARSER::PDB;

        $object = PDB->new("PDB FIle");

        $object->methods("arguments");

=head1 DESCRIPTION

I am working on a complete description for this class.

=head1 SEE ALSO

Refere to other documents, such as L<perlmod>.

=cut

###############################################################################################

package PDB;

                    ############################################
#use strict 'refs'; #   ALLOW THE USE OF SYMBOLIC REFERENCES   #
                    ############################################

                   ####################
use strict 'subs'; #DISALLOW BAREWORDS#
                   ####################
                   
                   ###########################
use strict 'vars'; #DISALLOW GLOBAL VARIABLES#
                   ###########################

my $VERSION = '0.01';
my $debug = "Y";


sub new {

      my $class = shift; 

      my $ref_dummy_array = [];

      $ref_dummy_array->[0] = @_[0];

      return bless  $ref_dummy_array, ref($class) || $class;

 }

sub filterA{

    ###########################################################
    #THE FOLLOWING FUNCTION IS MEANT TO BE USED ONLY IN THIS  #
    #MODULE.  FEEL FREE TO USE IT WHEREVER YOU WISH...ALTHOUGH#
    #RESULTS MAY NOT BE AS EXPECTED.                          #
    ###########################################################

     my $ref_dummy_array = shift;

     my $CODE = @_[0];

     my @dummy = ();

     my $i=0;

     foreach $i (0..@$CODE-1){

	 if ($CODE->[$i]=~ m/(\w)+/){

	     $dummy[$i] = $CODE->[$i];

	   }

	 elsif($CODE->[$i]=~ m/(\W)+/){

             $dummy[$i] = "NULL";

	   }

	 $i++;

      }
	       
   return \@dummy;   
    
 }

sub residue_code {

   my $ref_dummy_array = shift;

   my $CODE = @_[0];

   my $dummy="";

   my $index;

   my %AA_1TO3  = (A => "ALA", R => "ARG", N => "ASN", D => "ASP", C => "CYS",
                   Q => "GLN", E => "GLU", G => "GLY", H => "HIS", I => "ILE",
                   L => "LEU", K => "LYS", M => "MET", F => "PHE", P => "PRO",
                   S => "SER", T => "THR", W => "TRP", Y => "TYR", V => "VAL",
                   Z => "GLX", B => "ASX", Z => "GLX", X => "UNK");



  foreach $index (keys %AA_1TO3){
 
        if($CODE =~ m/$AA_1TO3{$index}/){

           $dummy = $index;

         }

       
      }

   return \$dummy;

  }


sub atom_mass {

   ################################################################
   #HASH WAS ABSTRACTED FROM THE MOLECULAR MASS MODULE WRITTEN    #
   #BY brian d foy <comdog@computerdog.com>.  ELEMENT NAMES       #
   #WERE ALL CAPITALIZED TO BE COMPATIBLE WITH PDB'S FIELD FORMAT #
   ################################################################

   my $ref_dummy_array = shift;

   my $CODE = @_[0];

   my $dummy="";

   my $index;

   my %atom_mass  = ("H" => 1.00794,"D" => 2.014101,"T" => 3.016049,
                     "HE" => 4.002602,"LI" => 6.941,"BE" => 9.012182,
                     "B" => 10.811,C => 12.0107,N => 14.00674,
                      O => 15.9994,"F" => 18.9984032,"NE" => 20.1797,
                     "NA" => 22.989770,"MG" => 24.3050,"AL" => 26.981538,
                     "SI" => 28.0855,"P" => 30.973761,"S" => 32.066,
                     "CL" => 35.4527,"AR" => 39.948,"K" => 39.0983,
                     "CA" => 40.078,"SC" => 44.955910,"TI" => 47.867,
                     "V" => 50.9415,"CR" => 51.9961,"MN" => 54.938049,
                     "FE" => 55.845,"CO" => 58.933200,"NI" => 58.6934,
                     "CU" => 63.546,"ZN" => 65.39,"GA" => 69.723,
                     "GE" => 72.61,"AS" => 74.92160,"SE" => 78.96,
                     "BR" => 79.904,"KR" => 83.80,"RB" => 85.4678,
                     "SR" => 87.62,"Y" => 88.90585,"ZR" => 91.224,
                     "NB" => 92.90638,"MO" => 95.94,"TC" => 98,
                     "RU" => 101.07,"RH" => 102.90550,"PD" => 106.42,
                     "AG" => 107.8682,"CD" => 112.411,"IN" => 114.818,
                     "SN" => 118.710,"SB" => 121.760,"TE" => 127.60,
                     "I" => 126.90447,"XE" => 131.29,"CS" => 132.90545,
                     "BA" => 137.327,"LA" => 138.9055,"CE" => 140.116,
                     "PR" => 140.90765,"ND" => 144.24,"PM" => 145,
                     "SM" => 150.36,"EU" => 151.964,"GD" => 157.25,
                     "TB" => 158.92534,"DY" => 162.50,"HO" => 164.93032,
                     "ER" => 167.26,"TM" => 168.93421,"YB" => 173.04,
                     "LU" => 174.967,"HF" => 178.49,"TA" => 180.9479,
                     "W" => 183.84,"RE" => 186.207,"OS" => 190.23,
                     "IR" => 192.217,"PT" => 195.078,"AU" => 196.96655,
                     "HG" => 200.59,"TL" => 204.3833,"PB" => 207.2,
                     "BI" => 208.98038,"PO" => 209,"AT" => 210,"RN" => 222,
                     "FR" => 223,"RA" => 226,"AC" => 227,"TH" => 232.038,
                     "PA" => 231.03588,"U" => 238.0289,"NP" => 237,"PU" => 244,
                     "AM" => 243,"CM" => 247,"BK" => 247,"CF" => 251,
                     "ES" => 252,"FM" => 257,"MD" => 258,"NO" => 259,
                     "LR" => 262,"RF" => 261,"DB" => 262,"SG" => 266,
                     "BH" => 264,"HS" => 269,"MT" => 268,"UUN" => 271,
                     "UUU" => 272,);

  foreach $index (keys %atom_mass){
 
        if(uc($CODE) =~ m/$index/){

           $dummy = $atom_mass{$index};

         }
       
      }

   return \$dummy;

  }

sub residue_mass {

   my $ref_dummy_array = shift;

   my $CODE = @_[0];

   my $dummy="";

   my $index;

   my %RES_MASS  = (A => "89.09",R => "174.20",N => "132.12",D => "133.10",C => "121.15",
                    Q => "146.15",E => "147.13",G => "75.07",H => "155.16",I => "131.17",
                    L => "131.17",K => "146.19",M => "149.21",F => "165.19",P => "115.13",
                    S => "105.09",T => "119.12",W => "204.23",Y => "181.19",V => "117.15",
                    Z => "146.64",B => "132.61",Z => "146.64",X => "128.16");

  foreach $index (keys %RES_MASS){
 
        if($CODE =~ m/$index/){

           $dummy = $RES_MASS{$index};
       
	 }

      }

   return \$dummy;

  }



###################################################################
#THE LIST__METHODS ARE MEANT TO BE CALLED WITHIN THIS PACKAGE.    #
#THEY CAN BE USED INSIDE OF A WHILE LOOP THAT IS CYCLING THROUGH  #
#THE CONTENTS OF A PDB FILE.                                      # 
###################################################################

sub list_header{

    my $ref_dummy_array = shift;

    my $array_line = @_[0];
       
    my $data="";

    if($array_line =~ /^HEADER/){

        $data = substr($array_line,7,80);
        
        $data =~ s/^\s+//;

        $data =~ s/\s+$//;

        return \$data;

        }

   }

sub list_ChainID{

    my $ref_dummy_array = shift;

    my $array_line = @_[0];
       
    my $data="";

    chomp($array_line);

     if($array_line =~ /^ATOM/
	or $array_line =~ /^HETATM/
     ){

        $data = substr($array_line,21,1);
	$data =~ s/ /_/;
        return \$data;

     }

   }

sub list_Serial{

    my $ref_dummy_array = shift;

    my $array_line = @_[0];
       
    my $data="";

    chomp($array_line);

    if($array_line =~ /^ATOM/
	or $array_line =~ /^HETATM/
	){

        $data = substr($array_line,6,5);
        
         $data =~ s/^\s+//;

         $data =~ s/\s+$//;

	 return \$data;
        
       }

         
  }

sub list_Atom{

    my $ref_dummy_array = shift;

    my $array_line = @_[0];
       
    my $data="";

    chomp($array_line);

    if($array_line =~ /^ATOM/
	or $array_line =~ /^HETATM/
      )
	{

        $data = substr($array_line,12,4);

        $data =~ s/^\s+//;

        $data =~ s/\s+$//;

        return \$data;

        }

     }


sub list_ResName{

    my $ref_dummy_array = shift;

    my $array_line = @_[0];
       
    my $data="";

    chomp($array_line);

    if($array_line =~ /^ATOM/
	or $array_line =~ /^HETATM/
      )
	{
	  if ($array_line =~ /^ATOM/) {
        	$data = substr($array_line,17,3);
    	  }
	  #represent all HETATMs as belonging to methionines or unknowns
	  elsif ($array_line =~ /^HETATM/) {
		#$data = substr($array_line,19,3);
		 if (substr($array_line, 19, 3) eq "MSE") {
			$data = "MET";
		 }
		 else {
		 	$data = "UNK";
		}
	  }

         $data =~ s/^\s+//;

          $data =~ s/\s+$//;

          return \$data;
      
        }

     }


sub list_ResSeqNumber{

    my $ref_dummy_array = shift;

    my $array_line = @_[0];
       
    my $data="";

    chomp($array_line);

     if($array_line =~ /^ATOM/
	or $array_line =~ /^HETATM/
     )
	{

        $data = substr($array_line,22,5);
   
         $data =~ s/^\s+//;

        $data =~ s/\s+$//;

        return \$data;
   
       }

    }


sub list_ResInsCode{

    my $ref_dummy_array = shift;

    my $array_line = @_[0];
       
    my $data="";

    chomp($array_line);

     if($array_line =~ /^ATOM/
	or $array_line =~ /^HETATM/
     )
	{

        $data = substr($array_line,26,1);
   
        return \$data;
   
       }

    }


sub list_XYZ{

    my $ref_dummy_array = shift;

    my $array_line = @_[0];
       
    my @data=();

    chomp($array_line);

    if($array_line =~ /^ATOM/
	or $array_line =~ /^HETATM/
    )
	{    

        $data[0] = substr($array_line,30,8);

        $data[1] = substr($array_line,38,8);

        $data[2] = substr($array_line,46,8);

        return \@data;

        }

     }


sub list_BFactor{

    my $ref_dummy_array = shift;

    my $array_line = @_[0];
       
    my $data="";

    if($array_line =~ /^ATOM/
	or $array_line =~ /^HETATM/
    )
	{

        $data = substr($array_line,60,6);

        return \$data;

        }

     }

sub list_Element{

    my $ref_dummy_array = shift;

    my $array_line = @_[0];
       
    my $data="";

    if($array_line =~ /^ATOM/
	or $array_line =~ /^HETATM/
    )
	{

        $data = substr($array_line,76,2);

         return \$data;
      
        }

  }

####################################################################
###################END LIST FUNCTIONS###############################
####################################################################

sub get_by_chain{

    my $ref_dummy_array = shift;

    my $chain = @_[0];

    my @dummy = ();

    my $i=0;

    my $FLAG="OFF";

    open(DUMMY, "$ref_dummy_array->[0]");

    while(<DUMMY>){

       chomp($_);

       if($ {PDB->list_ChainID("$_")} =~m/$chain/){
	  $FLAG="ON";

          $dummy[$i]=$_;

          $i++;

         }

        elsif((/^TER/) && ($FLAG =~ m/ON/)){

	  last;

         }     

        elsif((/^ENDMDL/) && ($FLAG =~ m/ON/)){

	  last;

         }     

      }
    
    close(DUMMY);
      
    return \@dummy;

  }

sub seqnum_by_chain{

    my $ref_dummy_array = shift;

    my $chain = @_[1];

    my %seen=();

    my @dummy = ();

    my $FLAG="OFF";

	 open(DUMMY, "$ENV{DATABASES}/pdb/"."$ref_dummy_array->[0]"."\.pdb") or print STDERR "Cannot open file $ref_dummy_array->[0] .pdb\n";

    while(<DUMMY>){

       chomp($_);

       if($ {PDB->list_ChainID("$_")} =~m/$chain/){

	 $FLAG="ON";

         unless($seen{$ {PDB->list_ResSeqNumber("$_")}}){ 

            $seen{$ {PDB->list_ResSeqNumber("$_")}} = 1;

            push(@dummy,$ {PDB->list_ResSeqNumber("$_")});

	  }


         }

        elsif((/^TER/) && ($FLAG =~ m/ON/)){

	   last;

         }       
 
       elsif((/^ENDMDL/) && ($FLAG =~ m/ON/)){

	   last;

         }


      }
    
    close(DUMMY);
      
    return \@dummy;

  }

sub find_all_chains{

    ######################################################
    #THIS METHOD HAS NOT BEEN TESTED EXTENSIVELY. IT MAY #
    #HANG WHEN GIVEN A VERY LARGE NMR-DERIVED STRUCTURE; #
    #THERE MAY BE AN ARRAY OVERFLOW. USE WITH CAUTION.   #
    ######################################################
    
    my $ref_dummy_array = shift;
        
    open(DUMMY, "$ENV{DATABASES}/pdb/"."$ref_dummy_array->[0]"."\.pdb") or print STDERR "Cannot open file $ref_dummy_array->[0] .pdb\n";
    
    my %seen = ();

    my @dummy=();

    my $FLAG = "OFF";

    while(<DUMMY>){

	   chomp($_);

           if(/^ATOM/
		or /^HETATM/
	   )
	
	{
 
	      unless ($seen{$ {PDB->list_ChainID($_)}}){              

		   $FLAG = "ON";

		   $seen{${PDB->list_ChainID($_)}}=1;

                   push(@dummy, ${PDB->list_ChainID($_)});

                 }
                       
              }
           
           
           elsif((/^ENDMDL/) && ($FLAG =~ m/ON/)){

	       last;

             }

     }

    
   
    close(DUMMY);

    return PDB->filterA(\@dummy);
 
}

sub load_aa_from_PDB_into_array {

    my $ref_dummy_array = shift;

    my $chain = @_[0];

    my @dummy = ();

    my $FLAG = "OFF";

    open(DUMMY, "$ref_dummy_array->[0]");

    while(<DUMMY>){

       chomp($_);

       if(/^ATOM/
	or /^HETATM/
	)
	{

           if(($ {PDB->list_ChainID("$_")} =~m/$chain/) &&

	      ($ {PDB->list_Atom("$_")} =~m/CA/)){

	          $FLAG = "ON";

	          push(@dummy, $ {PDB->residue_code($ {PDB->list_ResName("$_")})});
	       
              }         

         }
      
       elsif((/^TER/) && ( $FLAG =~ m/ON/)){

           last;

         }

       elsif((/^ENDMDL/) && ($FLAG =~ m/ON/)){

           last;

         }

      }
    
    close(DUMMY);

    return \@dummy;

 }

sub make_fasta_file {

    my $ref_dummy_array = shift;

    my $chain = @_[0];

    my $column_count=0;

    my $FLAG = "OFF";

    open(DUMMY, "$ENV{DATABASES}/pdb/"."$ref_dummy_array->[0]"."\.pdb") or print STDERR "Cannot open file $ref_dummy_array->[0] .pdb\n";

    open(FASTA, ">$ref_dummy_array->[0]".".$chain".".fasta")
	or die("no such file\n"); 

    while(<DUMMY>){

       chomp($_);

       if(/^HEADER/){

                printf(FASTA "> %s  CHAIN: %s\n",${PDB->list_header("$_")}, 
                  $chain);

	 }

       elsif(/^ATOM/
	or /^HETATM/
	){ 

           if(($ {PDB->list_ChainID("$_")} =~m/$chain/) &&

	     ($ {PDB->list_Atom("$_")} =~m/CA/)){

               $FLAG = "ON";

	       $column_count++;

	       printf(FASTA "%s",${PDB->residue_code($ {PDB->list_ResName("$_")})});
	       
	       if($column_count % 80 == 0){printf(FASTA "\n");}

             }         

         }

        elsif((/^TER/) && ( $FLAG =~ m/ON/)){

           last;

         }
 
       elsif((/^ENDMDL/) && ( $FLAG =~ m/ON/)){

           last;

         }

      }
    
    close(DUMMY);

    close(FASTA);

  }

sub get_seq
{
    my $ref_dummy_array = shift;
    my $fh = @_[0];
    my $chain = @_[1];
    my $start = @_[2];	# not necessarily numbers, sometimes e.g. 1s, 1a
    my $end   = @_[3];
    my $column_count = @_[4];
    my $start_num;
    my $start_ins;
    my $end_num;
    my $end_ins;
    my $PDB_num;
    my $PDB_ins;
    if( $start =~ /^\s*(-?\d*)(\S?)\s*$/ )
    {
	$start_num = $1;
	$start_ins = $2;
    }
    if( $end =~ /^\s*(-?\d*)(\S?)\s*$/ )
    {
	$end_num = $1;
	$end_ins = $2;
    }
    my $prev_resnum;
    undef $prev_resnum;
    #print "\tget_seq------- \"$chain\" \"$start\" \"$end\"\n";

    my $FLAG = "OFF";

    open(DUMMY, "$ENV{DATABASES}/pdb/"."$ref_dummy_array->[0]"."\.pdb") or print STDERR "Cannot open file $ref_dummy_array->[0] .pdb\n";
#	or die("no such file\n"); 

    my $found_start = 0;
    my $found_end   = 0;
    while(<DUMMY>)
    {
	chomp($_);

	if(/^ATOM/
	or /^HETATM/
	)
	{ 
	    if( $ {PDB->list_ChainID("$_")} =~m/$chain/i )
	    {
		if( $ {PDB->list_ResSeqNumber("$_")} =~ /^\s*(-?\d*)(\S?)\s*$/ )
		{
		    $PDB_num = $1;
		    $PDB_ins = $2;
		    #print "\nPDB: $PDB_num $PDB_ins $chain " if ($debug eq "Y");
		}
		else
		{
		    print STDERR "error: PDB code for $ref_dummy_array->[0]\n";
		}
		if( defined($start) && defined($end) )
		{
		    if( ! $found_start )
		    {
			if( ($PDB_num eq $start_num) &&
			    ($PDB_ins =~m/$start_ins/i) &&
			    ($start_ins =~m/$PDB_ins/i) )
			{
			    $found_start = 1;
			    $FLAG = "ON";
			    $column_count++;
			    printf($fh "%s",${PDB->residue_code($ {PDB->list_ResName("$_")})});
			    if($column_count % 80 == 0){printf($fh "\n");}
			    $prev_resnum = $ {PDB->list_ResSeqNumber("$_")};
			}
		    }
		    else
		    {
			if( ($PDB_num eq $end_num) &&
			    ($PDB_ins =~m/$end_ins/i) &&
			    ($end_ins =~m/$PDB_ins/i) )
			{
			    $column_count++;
			    printf($fh "%s",${PDB->residue_code($ {PDB->list_ResName("$_")})});
			    if($column_count % 80 == 0){printf($fh "\n");}
			    $found_end = 1;
			    last;
			}
			else
			{
			    if( $prev_resnum eq ${PDB->list_ResSeqNumber("$_")} )
			    {
				next;
			    }
			    else
			    {
				$prev_resnum = $ {PDB->list_ResSeqNumber("$_")};
			    }
			    $column_count++;
			    printf($fh "%s",${PDB->residue_code($ {PDB->list_ResName("$_")})});
			    if($column_count % 80 == 0){printf($fh "\n");}
			}
		    }
		}
		else
		{
		    $FLAG = "ON";
		    if( defined($prev_resnum) )
		    {
			if( $prev_resnum eq ${PDB->list_ResSeqNumber("$_")} )
			{
			    next;
			}
			else
			{
			    $prev_resnum = $ {PDB->list_ResSeqNumber("$_")};
			}
		    }
		    else
		    {
			$prev_resnum = $ {PDB->list_ResSeqNumber("$_")};
		    }
		    $column_count++;
		    printf($fh "%s",${PDB->residue_code($ {PDB->list_ResName("$_")})});
		    if($column_count % 80 == 0){printf($fh "\n");}
		}
	    }
	}
	elsif((/^TER/) && ( $FLAG =~ m/ON/))
	{
	    last;
	}
	elsif((/^ENDMDL/) && ( $FLAG =~ m/ON/))
	{
	    last;
	}
    }
    close(DUMMY);
    if( defined($start) && defined($end) &&
	($found_start eq 0) && ($found_end eq 0) )
    {
	print STDERR "Error getting $start-$end for $ref_dummy_array->[0]\n";
    }
    return $column_count;
}

sub get_seq2
{
    my $ref_dummy_array = shift;
    my $fh = @_[0];
    my $chain = @_[1];
    my $column_count = @_[2];
    my $PDB_num;
    my $PDB_ins;
    my $prev_resnum;
    undef $prev_resnum;
    #print "\tget_seq------- \"$chain\"\n";

    my $FLAG = "OFF";

    open(DUMMY, "$ENV{DATABASES}/pdb/"."$ref_dummy_array->[0]"."\.pdb") or print STDERR "Cannot open file $ref_dummy_array->[0] .pdb\n";

    while(<DUMMY>)
    {
	chomp($_);

	if(/^ATOM/
	or /^HETATM/
	)
	{ 
	    if( $ {PDB->list_ChainID("$_")} =~m/$chain/i )
	    {
		if( $ {PDB->list_ResSeqNumber("$_")} =~ /^\s*(-?\d*)(\S?)\s*$/ )
		{
		    $PDB_num = $1;
		    $PDB_ins = $2;
		}
		else
		{
		    print STDERR "error: PDB code for $ref_dummy_array->[0]\n";
		}
		if( $prev_resnum eq ${PDB->list_ResSeqNumber("$_")} )
		{
		    next;
		}
		else
		{
		    $prev_resnum = $ {PDB->list_ResSeqNumber("$_")};
		}
		$column_count++;
		printf($fh "%s",${PDB->residue_code($ {PDB->list_ResName("$_")})});
		if($column_count % 80 == 0){printf($fh "\n");}
		$prev_resnum = $ {PDB->list_ResSeqNumber("$_")};
	    }
	}
	elsif((/^TER/) && ( $FLAG =~ m/ON/))
	{
	    last;
	}
	elsif((/^ENDMDL/) && ( $FLAG =~ m/ON/))
	{
	    last;
	}
    }
    close(DUMMY);
    return $column_count;
}

sub get_atom
{
    my $ref_dummy_array = shift;
    my $fh = @_[0];
    my $chain = @_[1];
    my $start = @_[2];	# not necessarily numbers, sometimes e.g. 1s, 1a
    my $end   = @_[3];
    my $start_num;
    my $start_ins;
    my $end_num;
    my $end_ins;
    my $PDB_num;
    my $PDB_ins;
    if( $start =~ /^\s*(-?\d*)(\S?)\s*$/ )
    {
	$start_num = $1;
	$start_ins = $2;
    }
    if( $end =~ /^\s*(-?\d*)(\S?)\s*$/ )
    {
	$end_num = $1;
	$end_ins = $2;
    }
    #my $column_count = @_[4];
    #my $prev_resnum;
    #undef $prev_resnum;
    print "\tget_atom------- \"$chain\" \"$start\" \"$end\"\n";

    my $FLAG = "OFF";

    open(DUMMY, "$ENV{DATABASES}/pdb/"."$ref_dummy_array->[0]"."\.pdb") or print STDERR "Cannot open file $ref_dummy_array->[0] .pdb\n";
#	or die("no such file\n"); 

    my $found_start = 0;
    my $found_end   = 0;
    while(<DUMMY>)
    {
	chomp($_);

	if(/^ATOM/
	or /^HETATM/
	)
	{ 
	    #if(($ {PDB->list_ChainID("$_")} =~m/$chain/i) &&
	    #   ($ {PDB->list_Atom("$_")} =~m/CA/) )
	    if( $ {PDB->list_ChainID("$_")} =~m/$chain/i )
	    {
		if( $ {PDB->list_ResSeqNumber("$_")} =~ /^\s*(-?\d*)(\S?)\s*$/ )
		{
		    $PDB_num = $1;
		    $PDB_ins = $2;
		}
		else
		{
		    print STDERR "error: PDB code for $ref_dummy_array->[0]\n";
		}
		if( defined($start) && defined($end) )
		{
		    #if(($ {PDB->list_ResSeqNumber("$_")} >= $start) &&
		    #   ($ {PDB->list_ResSeqNumber("$_")} <= $end) )

		    if( ! $found_start )
		    {
			#if( ($PDB_num eq $start_num) &&
			if( ($PDB_num eq $start_num) &&
			    ($PDB_ins =~m/$start_ins/i) &&
			    ($start_ins =~m/$PDB_ins/i) )
			{
			    $found_start = 1;
			    $FLAG = "ON";
			    #$column_count++;
			    #printf($fh "%s",${PDB->residue_code($ {PDB->list_ResName("$_")})});
			    printf($fh "%s\n","$_");
			    #if($column_count % 80 == 0){printf($fh "\n");}
			    #$prev_resnum = $ {PDB->list_ResSeqNumber("$_")};
			}
		    }
		    else
		    {
			if( (! $found_end) && ($PDB_num eq $end_num) &&
			    ($PDB_ins =~m/$end_ins/i) &&
			    ($end_ins =~m/$PDB_ins/i) )
			{
			    #$column_count++;
			    #printf($fh "%s",${PDB->residue_code($ {PDB->list_ResName("$_")})});
			    printf($fh "%s\n","$_");
			    #if($column_count % 80 == 0){printf($fh "\n");}
			    $found_end = 1;
			    #last;
			}
			elsif( ($found_end) &&
				!(($PDB_num eq $end_num) &&
				($PDB_ins =~m/$end_ins/i) &&
				($end_ins =~m/$PDB_ins/i))
				)
			{
			    last;
			}
			#elsif( $PDB_num > $end_num )	don't do this
			#{				PDB's weird, but it's ok
			#    next;
			#}
			else
			{
			    #if( $prev_resnum eq ${PDB->list_ResSeqNumber("$_")} )
			    #{
				#next;
			    #}
			    #else
			    #{
				#$prev_resnum = $ {PDB->list_ResSeqNumber("$_")};
			    #}
			    #$column_count++;
			    #printf($fh "%s",${PDB->residue_code($ {PDB->list_ResName("$_")})});
			    printf($fh "%s\n","$_");
			    #if($column_count % 80 == 0){printf($fh "\n");}
			}
		    }
		}
		else
		{
		    $FLAG = "ON";
		    #if( defined($prev_resnum) )
		    #{
			#if( $prev_resnum eq ${PDB->list_ResSeqNumber("$_")} )
			#{
			    #next;
			#}
			#else
			#{
			    #$prev_resnum = $ {PDB->list_ResSeqNumber("$_")};
			#}
		    #}
		    #else
		    #{
			#$prev_resnum = $ {PDB->list_ResSeqNumber("$_")};
		    #}
		    #$column_count++;
		    #printf($fh "%s",${PDB->residue_code($ {PDB->list_ResName("$_")})});
		    printf($fh "%s\n","$_");
		    #if($column_count % 80 == 0){printf($fh "\n");}
		}
	    }
	}
	elsif((/^TER/) && ( $FLAG =~ m/ON/))
	{
	    last;
	}
	elsif((/^ENDMDL/) && ( $FLAG =~ m/ON/))
	{
	    last;
	}
    }
    close(DUMMY);
    if( defined($start) && defined($end) &&
	($found_start eq 0) && ($found_end eq 0) )
    {
	print STDERR "Error getting $start-$end for $ref_dummy_array->[0]\n";
    }
    #return $column_count;
}

sub get_atom2
{
    my $ref_dummy_array = shift;
    my $fh = @_[0];
    my $chain = @_[1];
    my $PDB_num;
    my $PDB_ins;
    #my $column_count = @_[4];
    #my $prev_resnum;
    #undef $prev_resnum;
    #print "\tget_atom------- \"$chain\" \"$start\" \"$end\"\n";

    my $FLAG = "OFF";

    open(DUMMY, "$ENV{DATABASES}/pdb/"."$ref_dummy_array->[0]"."\.pdb") or print STDERR "Cannot open file\n $ref_dummy_array->[0] .pdb";
#	or die("no such file\n"); 

    while(<DUMMY>)
    {
	chomp($_);

	if(/^ATOM/
	or /^HETATM/
	)
	{ 
	    if( $ {PDB->list_ChainID("$_")} =~m/$chain/i )
	    {
		if( $ {PDB->list_ResSeqNumber("$_")} =~ /^\s*(-?\d*)(\S?)\s*$/ )
		{
		    $PDB_num = $1;
		    $PDB_ins = $2;
		}
		else
		{
		    print STDERR "error: PDB code for $ref_dummy_array->[0]\n";
		}
		#if( $prev_resnum eq ${PDB->list_ResSeqNumber("$_")} )
		#{
		    #next;
		#}
		#else
		#{
		    #$prev_resnum = $ {PDB->list_ResSeqNumber("$_")};
		#}
		#$column_count++;
		#printf($fh "%s",${PDB->residue_code($ {PDB->list_ResName("$_")})});
		printf($fh "%s\n","$_");
		#if($column_count % 80 == 0){printf($fh "\n");}
		#$prev_resnum = $ {PDB->list_ResSeqNumber("$_")};
	    }
	}
	elsif((/^TER/) && ( $FLAG =~ m/ON/))
	{
	    last;
	}
	elsif((/^ENDMDL/) && ( $FLAG =~ m/ON/))
	{
	    last;
	}
    }
    close(DUMMY);
    #return $column_count;
}

sub chain_mass {

    my $ref_dummy_array = shift;

    my $chain = @_[0];

    my $dummy = 0;

    my $counter = 0;

    my $FLAG = "OFF";

    open(DUMMY, "$ref_dummy_array->[0]");

    while(<DUMMY>){

       chomp($_);

       if(/^ATOM/
	or /^HETATM/
	){ 

             if(($ {PDB->list_ChainID("$_")} =~m/$chain/)

                &&($ {PDB->list_Atom("$_")} =~m/CA/)){

                 $FLAG = "ON"; 

	         $dummy += ${PDB->residue_mass(${PDB->residue_code(${PDB->list_ResName("$_")})})};

                 $counter++;

               }

          }
      
       elsif((/^TER/) && ( $FLAG =~ m/ON/)){

           last;

         }
 
      elsif((/^ENDMDL/) && ( $FLAG =~ m/ON/)){

           last;

         }

      }
    
    close(DUMMY);
      
    $dummy = $dummy - $counter*18;

    return \$dummy;

 }

1;





