
/****************************************************************************** 
 * 
 *  file:  ZukerGenerator.cpp
 * 
 *  Copyright (c) 2003,  University of Virginia..
 *  All rights reverved.
 * 
 *  See the file COPYRIGHT in the top directory of this distribution for
 *  more information.
 *  
 *  THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 
 *  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 *  DEALINGS IN THE SOFTWARE.  
 *  
 *****************************************************************************/ 


#include "ZukerGenerator.h"

namespace NOPT {

//--------------------------------------------------------------------------
//
//--------------------------------------------------------------------------
ZukerGenerator::ZukerGenerator(int gapCreate,
				                       int gapExtend,
				                       ScoringMatrix* sm,
									   bool isDNA,
									   const string& seq1,
									   const string& seq2,
									   float lowerBoundPercent,
									   float upperBoundPercent,
									   int limit,
									   int debug,
									   const string& order,
									   bool local)
									   
: AlignmentGenerator( gapCreate, gapExtend, sm, isDNA, seq1, seq2,
				      lowerBoundPercent, upperBoundPercent, debug, local,
					  "Zuker" ),
  _possible(0),
  _rev1(seq1),
  _rev2(seq2),
  _order(order),
  _limit(limit)
{
	srand(1); // seed the rng so that it generates the same seq every time

	reverse(_rev1.begin(),_rev1.end());
	reverse(_rev2.begin(),_rev2.end());

	_createMatrices();
	_generate();
}

//--------------------------------------------------------------------------
//
//--------------------------------------------------------------------------
ZukerGenerator::~ZukerGenerator()
{ }


//--------------------------------------------------------------------------
//
//--------------------------------------------------------------------------
void ZukerGenerator::_createMatrices()
{

	//
	// initialize everything
	//
	string seq1 = " " + _seq1;
	string seq2 = " " + _seq2;

	// these matrices need to hang around
	_fDirections.resize( seq1.length() ); 
	for ( int i = 0; (unsigned int)i < _fDirections.size(); i++ )
		_fDirections[i].resize( seq2.length(), 0 );

	_bDirections.resize( seq1.length() ); 
	for ( int i = 0; (unsigned int)i < _bDirections.size(); i++ )
		_bDirections[i].resize( seq2.length(), 0 );

	_zuker.resize( seq1.length()-1 ); 
	for ( int i = 0; (unsigned int)i < _zuker.size(); i++ )
		_zuker[i].resize( seq2.length()-1, 0 ); 

	// these don't
	vector< vector<int> > _fMatrix;
	_fMatrix.resize( seq1.length() ); 
	for ( int i = 0; (unsigned int)i < _fMatrix.size(); i++ )
		_fMatrix[i].resize( seq2.length(), 0 );

	vector< vector<int> > _bMatrix;
	_bMatrix.resize( seq1.length() ); 
	for ( int i = 0; (unsigned int)i < _bMatrix.size(); i++ )
		_bMatrix[i].resize( seq2.length(), 0 );

	// fill forward direction matrices 
	_fill( seq1, seq2, _fMatrix, _fDirections );

	// erase the prepended spaces
	seq1.erase(seq1.begin());
	seq2.erase(seq2.begin());

	// create reverse strings
	string rev1 = seq1;
	string rev2 = seq2;
	reverse_copy(seq1.begin(),seq1.end(),rev1.begin());
	reverse_copy(seq2.begin(),seq2.end(),rev2.begin());
	rev1 = " " + rev1;
	rev2 = " " + rev2;

	// fill reverse direction matrices 
	_fill( rev1, rev2, _bMatrix, _bDirections );

	//
	// Calculate Zuker matrix 
	//
	// note that the indices refer to different places in the _fMatrix
	// than in the seqs because of the space used to create _fMatrix that
	// is not present in the seqs now. 
	//
	int m = seq1.length()-1;
	int n = seq2.length()-1;

	vector< vector<string> > vs( _zuker.size() ); // used for debug
	for ( int i = 0; (unsigned int)i < _zuker.size(); i++ )
	{
		vs[i].resize( _zuker[i].size() );
		for ( int j = 0; (unsigned int)j < _zuker[i].size(); j++ )
		{
			_zuker[i][j] = _sm->score(seq1[i],seq2[j]) + _fMatrix[i][j] +
			               _bMatrix[m-i][n-j];

			vs[i][j] = tos( _fMatrix[i][j] ) + "+" +
					   tos( _sm->score(seq1[i],seq2[j]) ) + "+" + 
					   tos( _bMatrix[m-i][n-j] );
	
			// _local demands we calculate optimal here 
			_optimalScore = max ( _optimalScore, _zuker[i][j] );
		}
	}

	_calculateBounds();

	if ( _debug & DEBUG_MORE ) 
	{
		_showMatrix(_fMatrix,0,0,"Forward");
		_showMatrix(_fDirections,0,0,"Forward Direction");
		_showMatrix(_bMatrix,0,0,"Backward");
		_showMatrix(_bDirections,0,0,"Backward Direction");
		_showMatrix(vs,0,0,"Zuker creation");
		_showMatrix(_zuker,0,0,"Zuker");
	}	
}

//--------------------------------------------------------------------------
//
//--------------------------------------------------------------------------
void ZukerGenerator::_generate()
{
	for ( int i = 0; (unsigned int)i < _zuker.size(); i++ )
		for ( int j = 0; (unsigned int)j < _zuker[i].size(); j++ )
			if ( _zuker[i][j] >= _lowerBound && _zuker[i][j] <= _upperBound )
				_possible++;

	for ( int i = 0; (unsigned int)i < _zuker.size(); i++ )
		for ( int j = 0; (unsigned int)j < _zuker[i].size(); j++ )
			if ( _zuker[i][j] >= _lowerBound && 
				 _zuker[i][j] <= _upperBound && 
				 _alignments.size() < (unsigned int)_limit )
			{
				_createAlignment(i,j);
				_numAlignments++;
			}

	if ( _order == "SORT" )
	{
		sort( _alignments.begin(), _alignments.end() );  
		reverse( _alignments.begin(), _alignments.end() );
	}
	else if ( _order == "REVERSE_SORT" )
	{
		sort( _alignments.begin(), _alignments.end() );  
	}
	else if ( _order == "RANDOM" )
	{
		random_shuffle( _alignments.begin(), _alignments.end() );  
	}
	// else "CREATE" which is the order they're created in 
} 

//--------------------------------------------------------------------------
// Calculates robustness score for each aligned edge (doesn't consider gaps).
//--------------------------------------------------------------------------
void ZukerGenerator::getRobustness( bool constrain )
{
	for ( int i = 0; (unsigned int)i < _zuker.size(); i++ )
		for ( int j = 0; (unsigned int)j < _zuker[i].size(); j++ )
		{
			if ( constrain && 
				 ( _zuker[i][j] < _lowerBound || _zuker[i][j] > _upperBound ) )
				continue;	

			int maxNotIJ = INT_MIN;  
			// check column down
			for ( int x = 0; (unsigned int)x < _zuker.size(); x++ )
				if ( x != i )
					maxNotIJ = max( maxNotIJ, _zuker[x][j] );

			// check row across
			for ( int y = 0; (unsigned int)y < _zuker[i].size(); y++ )
				if ( y != j )
					maxNotIJ = max( maxNotIJ, _zuker[i][y] );
			
			int robust = _zuker[i][j] - maxNotIJ;

			cout << "r~" << _seq1[i] << "-" << i << "," 
				         << _seq2[j] << "-" << j << "="
						 << robust << endl;
		}
}



//--------------------------------------------------------------------------
//
//--------------------------------------------------------------------------
void ZukerGenerator::_createAlignment(int i, int j)
{
	int score = _zuker[i][j];
	string seq1;
	string seq2;

	if ( _debug & DEBUG_MORE )
	{
		cout << endl << endl << "--------------------------" << endl;
		_showMatrix(_zuker,i,j,"Zuker");
		_showMatrix(_fDirections,i,j,"Forward Direction");
		char z = tolower( _seq1[i] );
		seq1 += z; 
		char zz = tolower( _seq2[j] );
		seq2 += zz; 
	}
	else
	{
		seq1 += _seq1[i];
		seq2 += _seq2[j];
	}

	//
	// From the point, work backwords to the beginning of the string
	// using the _fDirections matrix.  
	//
	// Note, we use the i-1 and j-1 as the starting points because seq1 and
	// seq2 are initialized with the diagonal value.  That is, the alignment
	// is constrained to align those two specific residues.  So once we've
	// aligned those, we can continue backtrack through the directional
	// matrices and generate an alignment.
	//
	int bi = i - 1;
	int bj = j - 1;


	while ( bi >= 0 || bj >= 0 )
	{
		int dir = _fDirections[bi+1][bj+1].getDirection();

		int allowed = _randomize( dir );
		int origAllowed = allowed;
		if ( _debug & DEBUG_MORE )
			cout << "orig allowed " << allowed << endl;

		if ( dir & DIAG && allowed-- == 0 )
		{
			seq1 = _seq1[bi] + seq1;
			seq2 = _seq2[bj] + seq2;
			if ( _debug & DEBUG_MORE )
				cout << "DIAG " << bi+1 << " " << bj+1 
					 << " seq1 " << seq1 << "   seq2 " << seq2 << endl;
			bi--;
			bj--;
		}
		else if ( dir & DOWN && allowed-- == 0 )
		{
			seq1 = _seq1[bi] + seq1;
			seq2 = Alignment::gap + seq2;
			if ( _debug & DEBUG_MORE )
				cout << "DOWN " << bi+1 << " " << bj+1 
					 << " seq1 " << seq1 << "   seq2 " << seq2 << endl;
			bi--;
		}
		else if ( dir & ACROSS && allowed-- == 0 )
		{
			seq1 = Alignment::gap + seq1;
			seq2 = _seq2[bj] + seq2;
			if ( _debug & DEBUG_MORE )
				cout << "ACROSS " << bi+1 << " " << bj+1 
					 << " seq1 " << seq1 << "   seq2 " << seq2 << endl;
			bj--;
		}
		else if ( dir & EXTENDED_DOWN && allowed-- == 0 )
		{
			int beginDown = _fDirections[bi+1][bj+1].getDownIndex();
			int diff = bi+1 - beginDown;
			if ( _debug & DEBUG_MORE )
				cout << "diff   " << diff << " beginDown " << beginDown <<
					 "  bi+1 " << bi + 1 << 
					 "  dir obj " << _fDirections[bi+1][bj+1] << endl;

			for ( int x = 0; x < diff; x++ )
			{
				seq1 = _seq1[bi] + seq1;
				seq2 = Alignment::gap + seq2;
				if ( _debug & DEBUG_MORE )
					cout << "EXTENDED_DOWN " << bi+1 << " " << bj+1 
						 << " seq1 " << seq1 << "   seq2 " << seq2 << endl;
				bi--;
			}
		}
		else if ( dir & EXTENDED_ACROSS && allowed-- == 0 )
		{
			int beginAcross = _fDirections[bi+1][bj+1].getAcrossIndex();
			int diff = bj+1 - beginAcross;
			if ( _debug & DEBUG_MORE )
				cout << "diff   " << diff << " beginAcross " << beginAcross <<
					 "  bj+1 " << bj + 1 << 
					 "  dir obj " << _fDirections[bi+1][bj+1] << endl;

			for ( int x = 0; x < diff; x++ )
			{
				seq1 = Alignment::gap + seq1;
				seq2 = _seq2[bj] + seq2;
				if ( _debug & DEBUG_MORE )
					cout << "EXTENDED_ACROSS " << bi+1 << " " << bj+1 
						 << " seq1 " << seq1 << "   seq2 " << seq2 << endl;
				bj--;
			}
		}
		else if ( dir & END_POSITION )
		{
			if ( _local )
				break; 
			else
			{ 
				if ( bj > 0 || bi > 0 )
					throw(GenericException((string)"Something wrong with " +
					  (string)"forward direction matrix. O at bi:" + 
					  tos(bi) + " bj:" + tos(bj) ));
				else
					break;
			}
		}
		else
			throw(GenericException((string)"Bad direction in forward " +
				  (string)"direction matrix at bi:" + tos(bi) + " bj:" + 
                  tos(bj) + "  dir: " + tos(dir) + "  allowed: " + 
				  tos(origAllowed) ));

	}

	int begin1Index = bi+1;
	int begin2Index = bj+1;

	//
	// From the input point moving forward, complete the strings.
	//
	i = _seq1.length()-2-i;
	j = _seq2.length()-2-j;

	if ( _debug & DEBUG_MORE )
	{
		cout << "seq1 so far:  " << seq1 << endl;
		cout << "seq2 so far:  " << seq2 << endl;
		_showMatrix(_bDirections,i,j,"Backward Direction");
	}

	while ( i >= 0 || j >= 0 ) 
	{
		int dir = _bDirections[i+1][j+1].getDirection();

		int allowed = _randomize( dir );
		int origAllowed = allowed;
		if ( _debug & DEBUG_MORE )
			cout << "orig allowed " << allowed << endl;

		if ( dir & DIAG  && allowed-- == 0 )
		{
			seq1 = seq1 + _rev1[i];
			seq2 = seq2 + _rev2[j];
			if ( _debug & DEBUG_MORE )
				cout << "DIAG " << i+1 << " " << j+1 
					 << " seq1 " << seq1 << "   seq2 " << seq2 << endl;
			i--;
			j--;

		}
		else if ( dir & DOWN && allowed-- == 0 )
		{
			seq1 = seq1 + _rev1[i];
			seq2 = seq2 + Alignment::gap;
			if ( _debug & DEBUG_MORE )
				cout << "DOWN " << i+1 << " " << j+1 
					 << " seq1 " << seq1 << "   seq2 " << seq2 << endl;
			i--;
		}
		else if ( dir & ACROSS && allowed-- == 0 )
		{
			seq1 = seq1 + Alignment::gap;
			seq2 = seq2 + _rev2[j];
			if ( _debug & DEBUG_MORE )
				cout << "ACROSS " << i+1 << " " << j+1 
					 << " seq1 " << seq1 << "   seq2 " << seq2 << endl;
			j--;
		}
		else if ( dir & EXTENDED_DOWN && allowed-- == 0 )
		{
			int beginDown = _bDirections[i+1][j+1].getDownIndex();
			int diff = i+1 - beginDown;
			if ( _debug & DEBUG_MORE )
				cout << "diff   " << diff << " beginDown " << beginDown <<
					 "  i+1 " << i + 1 << 
					 "  dir obj " << _bDirections[i+1][j+1] << endl;

			for ( int x = 0; x < diff; x++ )
			{
				seq1 = seq1 + _rev1[i];
				seq2 = seq2 + Alignment::gap;
				if ( _debug & DEBUG_MORE )
					cout << "EXTENDED_DOWN " << i+1 << " " << j+1 
						 << " seq1 " << seq1 << "   seq2 " << seq2 << endl;
				i--;
			}
		}
		else if ( dir & EXTENDED_ACROSS && allowed-- == 0 )
		{
			int beginAcross = _bDirections[i+1][j+1].getAcrossIndex();
			int diff = j+1 - beginAcross;
			if ( _debug & DEBUG_MORE )
				cout << "diff   " << diff << " beginAcross " << beginAcross <<
					 "  j+1 " << j + 1 << 
					 "  dir obj " << _bDirections[i+1][j+1] << endl;

			for ( int x = 0; x < diff; x++ )
			{
				seq1 = seq1 + Alignment::gap;
				seq2 = seq2 + _rev2[j];
				if ( _debug & DEBUG_MORE )
					cout << "EXTENDED_ACROSS " << i+1 << " " << j+1 
						 << " seq1 " << seq1 << "   seq2 " << seq2 << endl;
				j--;
			}
		}
		else if ( dir & END_POSITION )
		{
 			if ( _local )
				break;
			else
			{
				if ( j > 0 || i > 0 )
					throw(GenericException((string)"Something wrong with " +
                      (string)"forward direction matrix. O at i:" + tos(i) +
					  " j:" + tos(j) ));
				else
					break;
			}
		}
		else
			throw(GenericException("Bad direction in backward direction " + 
								   (string)"matrix at i:" + tos(i) + " j:" + 
								   tos(j) + "  dir: " + tos(dir) + 
								   " allowed: " + tos(origAllowed) ));


	}

	int end1Index = _seq1.length() - i;
	int end2Index = _seq2.length() - j;

	// Now actually create the alignment
	Alignment a( seq1, seq2 , score, 
				 begin1Index, begin2Index, end1Index, end2Index  );

	if ( _debug & DEBUG_SOME )
	{
		cout << a << endl;	
		cout << "seq1: " << seq1 << endl;
		cout << "seq2: " << seq2 << endl;
	}

	if ( find(_alignments.begin(), _alignments.end(),a) == _alignments.end() )
		_alignments.push_back(a);
} 


//--------------------------------------------------------------------------
//
//--------------------------------------------------------------------------
int ZukerGenerator::_randomize(int dir)
{
	int count = 0;

	// figure out which directions are available at this point
	if ( dir & DIAG )
		count++;
	if ( dir & DOWN )
		count++;
	if ( dir & ACROSS )
		count++;
	if ( dir & EXTENDED_DOWN )
		count++;
	if ( dir & EXTENDED_ACROSS )
		count++;
	if ( dir & END_POSITION ) // so we don't divide by 0
		count++;

	// choose one at random
	return ( rand() % count ); 
}

}

