
/****************************************************************************** 
 * 
 *  file:  Alignment.cpp
 * 
 *  Copyright (c) 2003,  University of Virginia..
 *  All rights reverved.
 * 
 *  See the file COPYRIGHT in the top directory of this distribution for
 *  more information.
 *  
 *  THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 
 *  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 *  DEALINGS IN THE SOFTWARE.  
 *  
 *****************************************************************************/ 


#include "Alignment.h"

namespace NOPT {

Alignment::Alignment()
: 
  _seq1(""),
  _seq2(""),
  _score(0),
  _begin1Index(0),
  _begin2Index(0),
  _end1Index(0),
  _end2Index(0)
{ }

Alignment::Alignment(const Alignment& a)
: 
  _seq1(a._seq1),
  _seq2(a._seq2),
  _score(a._score),
  _begin1Index(a._begin1Index),
  _begin2Index(a._begin2Index),
  _end1Index(a._end1Index),
  _end2Index(a._end2Index)
{ }

Alignment::Alignment(const string& seq1, const string& seq2, int score,
				     int begin1Index, int begin2Index, 
					 int end1Index, int end2Index)
: 
  _seq1(seq1),
  _seq2(seq2),
  _score(score),
  _begin1Index(begin1Index),
  _begin2Index(begin2Index),
  _end1Index(end1Index),
  _end2Index(end2Index)
{ }

//------------------------------------------------------------------------
// calculates the score for the input sequences, the scoring matrix and
// gap penalties.
//------------------------------------------------------------------------
Alignment::Alignment(const string& seq1, 
				     const string& seq2, 
					 ScoringMatrix& sm,
					 int gapCreate,
					 int gapExtend )
: 
  _seq1(seq1),
  _seq2(seq2),
  _score(0),
  _begin1Index(0),
  _begin2Index(0),
  _end1Index(seq1.length()-1),
  _end2Index(seq2.length()-1)
{ 
	if ( _seq1.length() != _seq2.length() )
		throw(GenericException("sequences of unequal length!  seq1: " + 
								tos(_seq1.length()) + "  seq2: " + 
								tos(_seq1.length()) ));

	_score = Alignment::SWScore( _seq1, _seq2, sm, gapCreate, gapExtend );

}

//------------------------------------------------------------------------
// Static method that returns the smith-waterman score for two aligned
// sequences given the gapCreate, gapExtend and scoring matrix.
//------------------------------------------------------------------------
int Alignment::SWScore( const string& seq1, 
				        const string& seq2, 
					    ScoringMatrix& sm,
					    int gapCreate,
					    int gapExtend,
						bool debug )
{
	if ( seq1.length() != seq2.length() )
		throw(GenericException("sequences of unequal length!  seq1: " + 
								tos(seq1.length()) + "  seq2: " + 
								tos(seq2.length()) ));
	int totalScore = 0;
	int seq1GapCount = 0;
	int seq2GapCount = 0;

	for ( int i = 0; (unsigned int)i < seq1.length(); i++ )
	{
		if ( debug )
			cout << seq1[i] << "  " << seq2[i] << "  ->  " ;

		if ( seq1[i] != gap && seq2[i] != gap )
		{
			totalScore += sm.score( seq1[i], seq2[i] );

			if ( debug )
				cout << sm.score( seq1[i], seq2[i] );
		
			if ( seq1GapCount > 0 )
			{
				totalScore += gapCreate + ( gapExtend * seq1GapCount );
				if ( debug )
					cout << "  s1 gap cost -> " 
				    	 << gapCreate + ( gapExtend * seq1GapCount );
				
			}
			if ( seq2GapCount > 0 )
			{
				totalScore += gapCreate + ( gapExtend * seq2GapCount );
				if ( debug )
					cout << "  s2 gap cost -> " 
				   	     << gapCreate + ( gapExtend * seq2GapCount );
			}

			if ( debug )
				cout << "    running score: "  << totalScore << endl;

			seq1GapCount = 0;
			seq2GapCount = 0;
		}
		else if ( seq1[i] != gap && seq2[i] == gap )
		{
			if ( debug )
				cout << "s2 gap" << endl;

			seq2GapCount++;

			if ( seq1GapCount > 0 )
			{
				totalScore += gapCreate + ( gapExtend * seq1GapCount );
				if ( debug )
					cout << "  s1 gap cost -> " 
				   		  << gapCreate + ( gapExtend * seq1GapCount )
				   		  << "    running score: "  << totalScore << endl;
				
				seq1GapCount = 0;
			}
		}
		else if ( seq1[i] == gap && seq2[i] != gap )
		{
			if ( debug )
				cout << "s1 gap" << endl;

			seq1GapCount++;

			if ( seq2GapCount > 0 )
			{
				totalScore += gapCreate + ( gapExtend * seq2GapCount );
				if ( debug )
					cout << "  s2 gap cost -> " 
				   		  << gapCreate + ( gapExtend * seq2GapCount )
				   		  << "    running score: "  << totalScore << endl;

				seq2GapCount = 0;
			}
		}
		else
			throw(GenericException("Invalid sequence characters: seq1[i] " +
								tos( seq1[i] ) + "   seq2[i] " +
								tos( seq2[i] ) + "\n"));
	}

	// account for any gaps on the end
	if ( seq1GapCount > 0 )
	{
		totalScore += gapCreate + ( gapExtend * seq1GapCount );
		if ( debug )
			cout << "  end s1 gap cost -> " 
		   		  << gapCreate + ( gapExtend * seq1GapCount ) << endl;
	}
	if ( seq2GapCount > 0 )
	{
		totalScore += gapCreate + ( gapExtend * seq2GapCount );
		if ( debug )
			cout << "  end s2 gap cost -> " 
		   		  << gapCreate + ( gapExtend * seq2GapCount ) << endl;
	}

	return totalScore;
}


Alignment& Alignment::operator=(const Alignment& a)
{
	if ( &a != this )
	{
		_score = a._score;
		_seq1 = a._seq1;
		_seq2 = a._seq2;
	}
	return *this;
}

bool Alignment::operator==(const Alignment& a)
{
	if ( _seq1 == a._seq1 && _score == a._score && _seq2 == a._seq2 ) 
		return true;
	else
		return false;
}

bool Alignment::operator<(const Alignment& a) const
{
	if ( _score < a._score ) 
		return true;
	else
		return false;	
}

ostream& operator<<(ostream& os, const Alignment& a)
{
	os << a._score << "|" << a._seq1 << "|" << a._seq2;
	return os;
}


string Alignment::getXml() const
{
	string s = 
		XML::tag("gappedSequence1",_seq1) + 
		XML::tag("gappedSequence2",_seq2) + 
		XML::tag("score",tos(_score)) +
		XML::tag("begin1Index",tos(_begin1Index)) +
		XML::tag("begin2Index",tos(_begin2Index)) +
		XML::tag("end1Index",tos(_end1Index)) +
		XML::tag("end2Index",tos(_end2Index)) ;

	return s;
}

string Alignment::getSeq1() const
{
	return _seq1;
}

string Alignment::getSeq2() const
{
	return _seq2;
}

int Alignment::getScore() const
{
	return _score;	
}

string Alignment::getAscii(bool uncompress ) const
{
	string s = tos(_begin1Index) + "|" +
			   tos(_begin2Index) + "|" +
			   tos(_end1Index) + "|" +
			   tos(_end2Index) + "|" +
			   tos(_score) + "|";

	if ( uncompress )
		s += _seq1 + "|" + _seq2;
	else
		s += compress(_seq1,_seq2);

	return s;
}


string Alignment::compress(const string& s1, const string& s2 ) const
{
	string d = "";

	// turn strings into topGap, botGap, match
	for( int i = 0; (unsigned int)i < s1.length(); i++ )
	{
		if (s1[i] != gap && s2[i] != gap)
			d += match;
		else if ( s1[i] == gap )
			d += topGap;
		else if ( s2[i] == gap )
			d += botGap;
		else
			throw(GenericException("bad gap match " + tos(s1[i]) 
								   + " " + tos(s2[i])));
	}

	// replace dupes
	string c = ""; 
	char curr = d[0];
	int count = 0;

	for( int i = 0; (unsigned int)i < d.length(); i++ )
	{
		if ( curr == d[i] )
			count++;
		else
		{
			c += tos(curr) + tos(count);
			curr = d[i];
			count = 1;
		}
	}
	c += tos(curr) + tos(count);

	return c;
}

// 
// Reference implementation of uncompress. Not used for anything.
//
void Alignment::uncompress(const string& c, string& s1, string& s2,
				           const string& seq1, const string& seq2) const
{

	int s1Count = 0;
	int s2Count = 0;
	string num = "";

	int i = 0;
	while ( (unsigned int)i < c.length() )
	{
		// peel off the sentinel
		char sentinel = c[i++];

		// peel off the count
		while ( (unsigned int)i < c.length() && 
				c[i] != match  && 
				c[i] != topGap && 
				c[i] != botGap ) 
			num += c[i++];

		int count = atoi( num.c_str() );
		num.erase();

		// Now reconstruct the original alignments based on the 
		// sentinel, count, and sequences. 
		if ( sentinel == match )
		{
			while ( count > 0 )
			{
				s1 += seq1[ s1Count++ ];
				s2 += seq2[ s2Count++ ];
				count--;
			}
		}
		else if ( sentinel == topGap )
		{
			while ( count > 0 )
			{
				s1 += gap;
				s2 += seq2[ s2Count++ ];
				count--;
			}
		}
		else if ( sentinel == botGap )
		{
			while ( count > 0 )
			{
				s1 += seq1[ s1Count++ ];
				s2 += gap;
				count--;
			}
		}
	}	
}	


//------------------------------------------------------------------------
// Builds an alignment from the raw seqs and a MyersMiller script. 
//------------------------------------------------------------------------
Alignment::Alignment(const string& s1, const string& s2,
				     const vector<int>& script, int score,
					 int begin1Index, int begin2Index,
					 int end1Index, int end2Index ) 
: _score(score),
  _begin1Index(begin1Index),
  _begin2Index(begin2Index),
  _end1Index(end1Index),
  _end2Index(end2Index)
{
	int op = 0;

	// note, this is because of the prepended space to the sequence strings.
	int i1 = 1;
	int i2 = 1; 

	int index = 0; 

	while ((unsigned int)i1 < s1.length() || (unsigned int)i2 < s2.length() )
	{
		if (op == 0 && script[index] == 0)
		{
			op = script[index++];
			_seq1 += s1[ i1 ]; 
			_seq2 += s2[ i2 ]; 
			i1++;
			i2++;
		}
		else
		{
			if (op == 0)
				op = script[index++];

			if (op > 0)
			{
				_seq1 += gap;
				_seq2 += s2[ i2 ];
				i2++;
				op--;
			}       
			else
			{
				_seq1 += s1[ i1 ];
				_seq2 += gap;
				i1++;
				op++;
			}
		}
	}
}             


}
