
/****************************************************************************** 
 * 
 *  file:  WatermanByersGenerator.cpp
 * 
 *  Copyright (c) 2003,  University of Virginia..
 *  All rights reverved.
 * 
 *  See the file COPYRIGHT in the top directory of this distribution for
 *  more information.
 *  
 *  THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 
 *  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 *  DEALINGS IN THE SOFTWARE.  
 *  
 *****************************************************************************/ 


#include "WatermanByersGenerator.h"

namespace NOPT {

//--------------------------------------------------------------------------
//
//--------------------------------------------------------------------------
WatermanByersGenerator::WatermanByersGenerator(int gapCreate,
				                       int gapExtend,
				                       ScoringMatrix* pam,
									   bool isDNA,
									   const string& seq1,
									   const string& seq2,
									   float lowerBoundPercent,
									   float upperBoundPercent,
									   int debug,
									   bool local)
									   
: AlignmentGenerator( gapCreate, gapExtend, pam, isDNA, seq1, seq2,
				      lowerBoundPercent, upperBoundPercent, debug, local,
					  "Waterman-Byers" )
{
	_createMatrices();
}

//--------------------------------------------------------------------------
//
//--------------------------------------------------------------------------
WatermanByersGenerator::~WatermanByersGenerator()
{ }

//--------------------------------------------------------------------------
// Creates the initial score matrix which is referenced from elements as
// the stack is traversed.  Also figures out the optimal score and the
// bounds.
//--------------------------------------------------------------------------
void WatermanByersGenerator::_createMatrices()
{
	string seq1 = " " + _seq1;
	string seq2 = " " + _seq2;

	_fMatrix.resize( seq1.length() );
	for ( int i = 0; (unsigned int)i < _fMatrix.size(); i++ )
		_fMatrix[i].resize( seq2.length() );

	_fill( seq1, seq2, _fMatrix );

    _optimalScore = _fMatrix[seq1.length() - 1][seq2.length() - 1];

	_calculateBounds();
}


//--------------------------------------------------------------------------
// This method creates the initial elements on the stack and then 
// calls processElements which builds the actual sequences.
//
// NOTE that if we're about to create a gap, we check for inclusion
// against the _lowerBound plus the _gapCreate penalty.  
// The reason for this is that since we're approaching the gap from both
// sides we're effectively including the gap create penalty twice, once 
// when the stack creates the gap and once when the forward path creates
// the gap. We accomodate this by effectively lowering the _lowerBound. 
// Note that when the currentScore is calculated we don't do anything
// special.
//
//--------------------------------------------------------------------------
void WatermanByersGenerator::_generate( bool uncompress ) 
{
	int begin_i = _seq1.length();
	int begin_j = _seq2.length();

	// initial across
	int across_i = _seq1.length();
	int across_j = _seq2.length() - 1;

	if ( ( _gapCreate + _gapExtend + _fMatrix[ across_i ][ across_j ] ) >= 
		 ( _lowerBound + _gapCreate ) )
	{
		Element *across = new Element( begin_i, begin_j, across_i, across_j, 
						               _gapCreate + _gapExtend, 
						               ACROSS, 0 );
		_stack.push( across );
	}

	// initial diag
	int diag_i = _seq1.length() - 1;
	int diag_j = _seq2.length() - 1;

	if ( ( _sm->score( _seq1[ diag_i ], _seq2[ diag_j ] ) + 
		   _fMatrix[ diag_i ][ diag_j ] ) >= 
	     _lowerBound )
	{
		Element *diag = new Element( begin_i, begin_j, diag_i, diag_j, 
						             _sm->score(_seq1[diag_i],_seq2[diag_j]),
						             DIAG, 0 );
		_stack.push( diag );
	}

	// initial down
	int down_i = _seq1.length() - 1;
	int down_j = _seq2.length();

	if ( ( _gapCreate + _gapExtend + _fMatrix[ down_i ][ down_j ] ) >= 
		 ( _lowerBound  + _gapCreate ) )
	{
		Element *down = new Element( begin_i, begin_j, down_i, down_j, 
						             _gapCreate + _gapExtend, 
						             DOWN, 0);
		_stack.push( down );
	}						

	if ( _debug & DEBUG_MORE )
		cout << "init: " << endl
		     << "  lower bound: " << _lowerBound << endl
		     << "  across: " << _fMatrix[ across_i ][ across_j ] << endl
		     << "  diag: " << _fMatrix[ diag_i ][ diag_j ] << endl
		     << "  down: " << _fMatrix[ down_i ][ down_j ] << endl
		     << "  sm: " << _sm->score( _seq1[diag_i] ,_seq2[diag_j] ) << endl
		     << "  gap: " << _gapCreate + _gapExtend << endl;

	while ( !_stack.empty() )
		_processElement( uncompress );

	if ( _debug & DEBUG_SOME )
		cout << "Stack is empty!" << endl;
} 

//--------------------------------------------------------------------------
// Pops the top element from the stack and evaluates possible extensions
// to the sequence.  New extensions, in the form of elements are pushed onto
// the stack.  
//
// See note in _generate() comments about how we check for inclusion on
// the creation of gaps.
//--------------------------------------------------------------------------
void WatermanByersGenerator::_processElement(bool uncompress)
{
	Element *el = _stack.top();
	_stack.pop();

	//
	// Truncate the sequence based on the length pointer.  This really
	// only does something once we've completed an alignment and are
	// reaching back down into the stack to begin the next one.
	//
	int seqLen = el->getSeqLength();
	_currentSeq1.erase( seqLen ); 
	_currentSeq2.erase( seqLen );

	int direction = el->getDirection();
	int curr_i = el->getNextI();
	int curr_j = el->getNextJ();
	int currScore = el->getScore();

	if ( _debug & DEBUG_MORE )
		cout << "---------------------------------------" << endl;

	//
	// Build the sequences
	//
	if ( ( direction & DOWN ) || ( direction & EXTENDED_DOWN ) )
	{
		if ( _debug & DEBUG_MORE )
		{
			if  ( direction & DOWN ) 
				cout << "direction: DOWN" << endl;
			else
				cout << "direction: EXTENDED DOWN" << endl;
		}

		_currentSeq1 += _seq1[ curr_i ]; 
		_currentSeq2 += Alignment::gap; 
	}
	else if ( ( direction & ACROSS ) || ( direction & EXTENDED_ACROSS ) ) 
	{
		if ( _debug & DEBUG_MORE )
		{
			if ( direction & ACROSS  )
				cout << "direction: ACROSS" << endl;
			else
				cout << "direction: EXTENDED ACROSS" << endl;
		}

		_currentSeq1 += Alignment::gap; 
		_currentSeq2 += _seq2[ curr_j ];
	}
	else // DIAG
	{
		if ( _debug & DEBUG_MORE )
			cout << "direction: DIAG" << endl;

		_currentSeq1 += _seq1[ curr_i ]; 
		_currentSeq2 += _seq2[ curr_j ];
	}

	if ( _debug & DEBUG_MORE )
		cout << "curr_i: " << curr_i << endl
		     << "curr_j: " << curr_j << endl
		     << "len: " << seqLen << endl
		     << "seq1: " << _currentSeq1 << endl
		     << "seq2: " << _currentSeq2 << endl
		     << "current Score " << currScore << endl
		     << "lowerBound " << _lowerBound << endl
		     << "(tmp) lowerBound " << _lowerBound + _gapCreate << endl;

	//
	// We've reached the beginning of the seqs, so create the alignment.
	//
	if ( ( curr_i == 0 ) && ( curr_j == 0 ) )
	{
	 	if ( currScore >= _lowerBound ) 
		{

			string rev1 = _currentSeq1;
			string rev2 = _currentSeq2;

			reverse( rev1.begin(), rev1.end() );
			reverse( rev2.begin(), rev2.end() );

			if ( _debug & DEBUG_MORE )
				cout << "creating alignment" << endl
				     << "score " << currScore << endl
				     << "rev1 " << rev1 << endl
				     << "rev2 " << rev2 << endl
			         << "===================================" << endl << endl;

			if ( _performCount )
				_countAlignment( rev1, rev2 );
			else
			{
				Alignment a( rev1, rev2, currScore, 0,0, 
							 rev1.length(), rev2.length());

				cout << _getAscii( a, _numAlignments, uncompress ) << endl; 
			}

			_numAlignments++;
		}
		else
			if ( _debug & DEBUG_MORE )
				cout << "reached end, but currScore is too low:"
					 << " Skipping alignment" << endl
					 << "   score:      " << currScore << endl
					 << "   lowerBound: " << _lowerBound << endl << endl;
	}

	//
	// Evaluate each of the possible extensions from the curr position
	// and add a new element to the stack if necessary.
	//
	else
	{
		// 
		// Extended Down 
		// Only extend a gap down if a gap has already been started.
		//
		if ( ( curr_i > 0 ) && 
		     ( ( direction & DOWN ) || 
			   ( direction & EXTENDED_DOWN ) ) )
		{
			int xDown_i = curr_i - 1;
			int xDown_j = curr_j;

			if ( _debug & DEBUG_MORE )
				cout << "  score:  " 
					 << _gapExtend  << " + "
					 << _fMatrix[ xDown_i ][ xDown_j ]; 

			if ( ( currScore + _gapExtend + _fMatrix[ xDown_i ][ xDown_j ] ) >= 
				 ( _lowerBound + _gapCreate ) )
			{
				Element *xd = new Element( curr_i, curr_j, xDown_i, xDown_j, 
								           currScore + _gapExtend, 
								           EXTENDED_DOWN, seqLen + 1 );	
				_stack.push( xd );

				if ( _debug & DEBUG_MORE )
					cout << "  pushed extended down " << endl; 
			}
			else
				if ( _debug & DEBUG_MORE )
					cout << "  not extending down" << endl;
		}
		else
			if ( _debug & DEBUG_MORE )
				cout << "  skipping extended down" << endl;

		// 
		// Extended Across 
		// Only extend a gap across if a gap has already been started.
		//
		if ( ( curr_j > 0 ) && 
		     ( ( direction & ACROSS ) || 
			   ( direction & EXTENDED_ACROSS ) ) )
		{
			int xAcross_i = curr_i;
			int xAcross_j = curr_j - 1;

			if ( _debug & DEBUG_MORE )
				cout << "  score:  " 
					 << _gapExtend  << " + "
					 << _fMatrix[ xAcross_i ][ xAcross_j ]; 
			
			if ( ( currScore + _gapExtend + 
				   _fMatrix[ xAcross_i ][ xAcross_j ] ) >= 
                 ( _lowerBound + _gapCreate ) )
			{
				Element *xa = new Element( curr_i, curr_j, 
								           xAcross_i, xAcross_j, 
								           currScore + _gapExtend,
									       EXTENDED_ACROSS, seqLen + 1);
				_stack.push( xa );
				if ( _debug & DEBUG_MORE )
					cout << "  pushed extended across " << endl; 
			}
			else
				if ( _debug & DEBUG_MORE )
					cout << "  not extending across" << endl;
		}
		else
			if ( _debug & DEBUG_MORE )
				cout << "  skipping extended across" << endl;

		// 
		// Diagonal
		//
		// Note that for Diagonal we can use the real lowerbound to
		// check for inclusion.
		//
		if ( ( curr_i > 0 ) && ( curr_j > 0 ) )
		{
			int diag_i = curr_i - 1;
			int diag_j = curr_j - 1;
			int dscore = _sm->score( _seq1[ diag_i ], _seq2[ diag_j ] );

			if ( _debug & DEBUG_MORE )
				cout << "  score:  " << dscore << "(" 
					 << _seq1[ diag_i ] << _seq2[ diag_j ] << ")"
					 << " + " << _fMatrix[ diag_i ][ diag_j]; 

			if ( ( currScore + dscore + _fMatrix[ diag_i ][ diag_j ] ) >= 
				 _lowerBound )
			{
		
				Element *d = new Element( curr_i, curr_j, diag_i, diag_j, 
								          currScore + dscore, 
										  DIAG, seqLen +1 );
				_stack.push(d);
				if ( _debug & DEBUG_MORE )
					cout << "  pushed diag  " << endl;
			}
			else
				if ( _debug & DEBUG_MORE )
					cout << "  not diag" << endl;
		}
		else
			if ( _debug & DEBUG_MORE )
				cout << "  skipping diag" << endl;

		// 
		// Begin Gap Across
		// Only create a gap if we're coming off of a diagonal.
		//
		if ( ( curr_j > 0 ) && 
			 ( ( direction & DIAG )  ||
			   ( direction & DOWN )  ||
			   ( direction & EXTENDED_DOWN ) ) )
		{
			int across_i = curr_i;
			int across_j = curr_j - 1;

			if ( _debug & DEBUG_MORE )
				cout << "  score:  " << _gapCreate << " + " 
					 << _gapExtend << " + "
					 << _fMatrix[ across_i ][ across_j ]; 

			if ( ( currScore + _gapCreate + _gapExtend + 
				   _fMatrix[ across_i ][ across_j ] ) >= 
				 ( _lowerBound + _gapCreate ) )
			{
				Element *a = new Element( curr_i, curr_j, across_i, across_j, 
								          currScore + _gapCreate + _gapExtend,
										  ACROSS , seqLen +1);
				_stack.push(a);
				if ( _debug & DEBUG_MORE )
					cout << "  pushed across " << endl; 
			}
			else
				if ( _debug & DEBUG_MORE )
					cout << "  not across" << endl;
		}
		else
			if ( _debug & DEBUG_MORE )
				cout << "  skipping across" << endl;

		// 
		// Begin Gap Down
		// Only create a gap if we're coming off of a diagonal.
		//
		if ( ( curr_i > 0 ) && 
			 ( ( direction & DIAG )  ||
			   ( direction & ACROSS )  ||
			   ( direction & EXTENDED_ACROSS ) ) )
		{
			int down_i = curr_i - 1;
			int down_j = curr_j;

			if ( _debug & DEBUG_MORE )
				cout << "  score:  " << _gapCreate << " + " 
					 << _gapExtend << " + "
					 << _fMatrix[ down_i ][ down_j ]; 

			if ( ( currScore + _gapCreate + _gapExtend + 
				   _fMatrix[ down_i ][ down_j ] ) >= 
				 ( _lowerBound + _gapCreate ) )
			{
				Element *dd = new Element( curr_i, curr_j, down_i, down_j, 
								           currScore + _gapCreate + _gapExtend,
									       DOWN, seqLen + 1);
				_stack.push(dd);
				if ( _debug & DEBUG_MORE )
					cout << "  pushed down " << endl; 
			}
			else
				if ( _debug & DEBUG_MORE )
					cout << "  not down" << endl;
		}
		else
			if ( _debug & DEBUG_MORE )
				cout << "  skipping down" << endl;
	}


	delete el;
} 

//--------------------------------------------------------------------------
// Writes counts to stdout.
//--------------------------------------------------------------------------
void WatermanByersGenerator::_doCounts() 
{
	_generate( false );
}

//--------------------------------------------------------------------------
// Writes single alignment in ascii text.
//--------------------------------------------------------------------------
string WatermanByersGenerator::_getAscii(const Alignment& a,
										 int num,
				                         bool uncompress)  const
{
	string	s = "a_" + tos(num) + "~" + tos(_paramSum) + "|"
				+ a.getAscii(uncompress); 
	return s;
}


//--------------------------------------------------------------------------
// Writes alignments in ascii text.
//--------------------------------------------------------------------------
void WatermanByersGenerator::getAscii(bool uncompress) 
{
	cout << "seq1~" << _seq1 << endl;
	cout << "seq2~" << _seq1 << endl;
	_generate( uncompress );
	cout << "totalNumAlignments~" << _numAlignments << endl;
	cout << "parameters~" << _paramSum << _paramString << endl;
}

//--------------------------------------------------------------------------
// Currently no XML support.  Because of the way this algorithm generates
// the alignments.
//--------------------------------------------------------------------------
bool WatermanByersGenerator::xmlable() const
{
	return false;
}

}
