
/****************************************************************************** 
 * 
 *  file:  AlignmentGenerator.cpp
 * 
 *  Copyright (c) 2003,  University of Virginia..
 *  All rights reverved.
 * 
 *  See the file COPYRIGHT in the top directory of this distribution for
 *  more information.
 *  
 *  THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 
 *  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 *  DEALINGS IN THE SOFTWARE.  
 *  
 *****************************************************************************/ 


#include "AlignmentGenerator.h"

namespace NOPT {

//--------------------------------------------------------------------------
//
//--------------------------------------------------------------------------
AlignmentGenerator::AlignmentGenerator(int gapCreate,
				                       int gapExtend,
				                       ScoringMatrix* sm,
									   bool isDNA,
									   const string& seq1,
									   const string& seq2,
									   float lowerBoundPercent,
									   float upperBoundPercent,
									   int debug,
									   bool local,
									   const string& algName)
									   
: _seq1(seq1),
  _seq2(seq2),
  _isDna(isDNA),
  _gapCreate(gapCreate),
  _gapExtend(gapExtend),
  _upperBoundPercent(upperBoundPercent),
  _lowerBoundPercent(lowerBoundPercent),
  _numAlignments(0),
  _debug(debug),
  _local(local),
  _sm(sm),
  _lowerBound(-99999),
  _upperBound(-99999),
  _optimalScore(-99999),
  _performCount(false),
  _runningCount(0),
  _beginTime(0),
  _showStatus(false),
  _algorithmName(algName)
{
	if ( _lowerBoundPercent > _upperBoundPercent )
		throw(GenericException("lower bound greater than upper bound! upper: "
								+ tos(_upperBoundPercent) + " lower: " 
								+ tos(_lowerBoundPercent) ) );


	if ( _isDna && _sm->name() != "dna" ) 
		throw(GenericException("Scoring matrix and isDNA don't match!"));

}

//--------------------------------------------------------------------------
//
//--------------------------------------------------------------------------
AlignmentGenerator::~AlignmentGenerator()
{ }


//--------------------------------------------------------------------------
// Calculates the upper and lower bounds based on the percentages passed
// in and the optimal score.  For this to work the optimal score needs to
// be set.
//--------------------------------------------------------------------------
void AlignmentGenerator::_calculateBounds()
{
	if ( _optimalScore >= 0 )
		_lowerBound = (int)(_lowerBoundPercent * (float)_optimalScore);
	else
		_lowerBound = (int)(((1-_lowerBoundPercent)+1) * (float)_optimalScore);

	if ( _optimalScore >= 0 )
		_upperBound = (int)(_upperBoundPercent * (float)_optimalScore);
	else
		_upperBound = (int)(((1-_upperBoundPercent)+1) * (float)_optimalScore);

	_createParamString();
}

//--------------------------------------------------------------------------
// Used for adding weights to the score matrix.
//--------------------------------------------------------------------------
string AlignmentGenerator::_createKey( int i, int j, char s1, char s2 )
{
	string s = tos(i) + "-" + tos(j) + "-" + tos(s1) + "-" + tos(s2);
    return s;
}

//--------------------------------------------------------------------------
// Uses Gotoh's method (roughly) for filling score and direction matrices.
//
// This is the "master" _fill algorithm called either directly or by other
// _fill methods.
//--------------------------------------------------------------------------
void AlignmentGenerator::_fill( const string& seq1,
				                const string& seq2,
								vector< vector<int> >& scores,
								vector< vector<Whence> >& directions,
								map<string,int>& weights )
{
	bool trackDirections = false;
	if ( directions.size() == scores.size() )
		trackDirections = true;

	bool addWeights = false;
	if ( weights.size() > 0 )
		addWeights = true;

	if ( _debug & DEBUG_SOME )
		cout << "trackDirections: " << trackDirections << endl
			 << "addWeights: " << addWeights << endl;

	vector<int> C( seq2.length(), 0 ); 
	vector<int> D( seq2.length(), 0 ); 
	vector<int> beginDownGap( seq2.length(), 0 ); 

	int t, e, s, c, beginAcrossGap;
	t = e = s = c = beginAcrossGap = 0;

	// 
	//  i is down, j is across
	// 
	scores[0][0] = C[0] = 0;
	t = _gapCreate;
	for ( int j = 1; (unsigned int)j < C.size(); j++ )
	{
		if ( _local )
		{
			if ( trackDirections )
			{
				Whence acrossInit(END_POSITION,0,0);
				directions[0][j] = acrossInit;
			}
		}
		else
		{
			scores[0][j] = C[j] = t = t + _gapExtend;
			D[j] = t + _gapCreate;

			if ( trackDirections )
			{
				Whence acrossInit(EXTENDED_ACROSS,0,0);
				directions[0][j] = acrossInit;
			}
		}

		if ( addWeights )
		{
			string key = _createKey(0,j,Alignment::gap,seq2[j]);
			scores[0][j] += weights[ key ];
			C[j] += weights[ key ];
			D[j] += weights[ key ];
		}
	}

	if ( _debug & DEBUG_LOTS )
		_showMatrix(scores,0,0,"scores");
	
	t = _gapCreate;

	for ( int i = 1; (unsigned int)i < seq1.length(); i++ )
	{
		if ( _local ) 
		{
			s = e = c = 0;

			if ( trackDirections )
			{
				Whence downInit(END_POSITION,0,0);
				directions[i][0] = downInit;
			}
		}
		else
		{
			s = C[0];
			scores[i][0] = C[0] = c = t = t + _gapExtend;
			e = t + _gapCreate;

			if ( trackDirections )
			{
				Whence downInit(EXTENDED_DOWN,0,0);
				directions[i][0] = downInit;
			}
		}

		if ( addWeights )
		{
			string key = _createKey(i,0,seq1[i],Alignment::gap);
			scores[i][0] += weights[ key ];
			c += weights[ key ];
			e += weights[ key ];
			string key1 = _createKey(i-1,0,seq1[i-1],seq2[0]);
			s += weights[ key1 ];
		}

		beginAcrossGap = 0;
		for ( int j = 1; (unsigned int)j < seq2.length(); j++ )
		{
			// Find the best gap strategy for going ACROSS.
			// If the a gap is created, note where it begins. 
			// c is the value immediately to the left
			// e is the value of extending a gap across
			if ( _debug  & DEBUG_LOTS)
				cout << "index i: " << i << "  j: " << j << endl
					 << "Across max of e: " << e + _gapExtend 
					 << " c: " << c + _gapCreate + _gapExtend <<endl;

			int tmpAcross = max( e, c + _gapCreate ) + _gapExtend;

			if ( tmpAcross != e + _gapExtend && 
				 tmpAcross == c + _gapCreate + _gapExtend )
				beginAcrossGap = j-1;

			e = tmpAcross;

			if ( addWeights )
	            e += weights[ _createKey( i, j, seq1[i], Alignment::gap ) ];

			// Find the best gap strategy for going DOWN.
			// If the a gap is created, note where it begins. 
			// C[j] is the value immediately above
			// D[j] is the value of extending a gap down 
			if ( _debug  & DEBUG_LOTS)
				cout << "Down max of D[j]: " << D[j] + _gapExtend 
					 << " C[j]: " << C[j] + _gapCreate + _gapExtend << endl;

			int tmpDown = max( D[j], C[j] + _gapCreate ) + _gapExtend;

			if ( tmpDown == C[j] + _gapCreate + _gapExtend &&
					  tmpDown != D[j] + _gapExtend )
				beginDownGap[j] = i-1;

			D[j] = tmpDown;

			if ( addWeights )
				D[j] += weights[  _createKey( i, j, Alignment::gap, seq2[j] ) ];

			int diag = s + _sm->score(seq1[i],seq2[j]);

			if ( addWeights )
				diag += weights[ _createKey( i, j, seq1[i], seq2[j] ) ]; 

			// D[j] is the best down
			// e is the best across
			// diag is diag

			c = max( D[j], max( e, diag ) );

			if ( _local )
				c = max( 0, c );

			if ( _debug  & DEBUG_LOTS) 
			{
				cout << "diag s: " << s << " score: " 
					 << _sm->score(seq1[i],seq2[j]) << "    diag: " 
					 << diag << endl
					 << "Decision: " << endl
					 << "diag: " << diag << " across(e): " 
					 << e << " down(D[j]): " << D[j] << endl; 
			}


			if ( trackDirections )
			{
				// Set the directional bitmask so we know how we got here.
				int prev = 0;
				Whence whence(0,0,0);

				if ( diag == c )
					prev += DIAG;
				if ( D[j] == c )
				{
					if ( beginDownGap[j] == i-1 )
						prev += DOWN;
					else
					{
						prev += EXTENDED_DOWN;
						whence.setDownIndex( beginDownGap[j] );
					}
				}
				if ( e == c )
				{
					if ( beginAcrossGap == j-1 )
						prev += ACROSS;
					else
					{
						prev += EXTENDED_ACROSS;
						whence.setAcrossIndex( beginAcrossGap );
					}
				}
				if ( _local && 0 == c )
					prev = END_POSITION;

				whence.setDirection( prev );
				directions[i][j] = whence;
			}

			// Update final matrices and vectors.
			s = C[j];
			C[j] = c;
			scores[i][j] = C[j];

			if ( _debug  & DEBUG_MORE)
			{
				_showMatrix(scores,i,j,"scores");
				_showMatrix(directions,i,j,"directions");
			}
		}
	}
}

//--------------------------------------------------------------------------
// Uses Gotoh's method for filling score and direction matrices.
//
// This method calls _fill such that weights aren't added to the score
// matrix as the algorithm progresses.
//--------------------------------------------------------------------------
void AlignmentGenerator::_fill( const string& seq1,
				                const string& seq2,
								vector< vector<int> >& scores,
								vector< vector<Whence> >& directions)
{
	map<string,int> noWeights;

	_fill( seq1, seq2, scores, directions, noWeights);
}

//--------------------------------------------------------------------------
// Uses Gotoh's method for filling score and direction matrices.
//
// This method calls _fill such that neither weights are added nor 
// directions calculated.
//--------------------------------------------------------------------------
void AlignmentGenerator::_fill( const string& seq1,
				                const string& seq2,
								vector< vector<int> >& scores )
{
	vector< vector<Whence> > noDirections;
	map<string,int> noWeights;

	_fill( seq1, seq2, scores, noDirections, noWeights);
}

//--------------------------------------------------------------------------
// Writes output in XML.
//--------------------------------------------------------------------------
string AlignmentGenerator::getXml() const
{
	const string aln = "alignment";
	string xmlString =
		XML::tag("alignmentGroup",
			XML::tag("sequence1", tos(_seq1) ) +
			XML::tag("sequence2", tos(_seq2) ) +
			XML::tag("gapCreate", tos(_gapCreate) ) +
			XML::tag("gapCreate", tos(_gapExtend) ) +
			XML::tag("scoringMatrixName", _sm->name() ) +
			XML::tag("lowerBoundPercent", tos(_lowerBoundPercent) ) +
			XML::tag("upperBoundPercent", tos(_upperBoundPercent) ) +
			XML::tag("lowerBound", tos(_lowerBound) ) +
			XML::tag("upperBound", tos(_upperBound) ) +
			XML::tag("optimalScore", tos(_optimalScore) ) +
			XML::tag("isDna", tos(_isDna) ) +
			XML::tag("local", tos(_local) ) +
			XML::tag(aln, _alignments  ) );

	return xmlString;
}


//--------------------------------------------------------------------------
// Generates ASCII output.
//--------------------------------------------------------------------------
void AlignmentGenerator::getAscii(bool uncompress)
{
	cout << "seq1~" << _seq1 << endl;
	cout << "seq2~" << _seq2 << endl; 
	cout << "totalNumAlignments~" << _alignments.size() << endl; 
	cout << "parameters~" << _paramSum << _paramString << endl;

	for ( int i = 0; (unsigned int)i < _alignments.size(); i++ )
	{
		cout << "a_" << i << "~" << _paramSum << "|" 
			 << _alignments[i].getAscii(uncompress) << endl;

	}
}

//--------------------------------------------------------------------------
// Creates the parameter string.
//--------------------------------------------------------------------------
void AlignmentGenerator::_createParamString()
{
	_paramString = "|optimalScore=" + tos(_optimalScore) +
			       "|lowerBound=" + tos(_lowerBound) +
			       "|upperBound=" + tos(_upperBound) +
			       "|lowerBoundPercent=" + tos(_lowerBoundPercent) +
			       "|upperBoundPercent=" + tos(_upperBoundPercent) +
			       "|gapCreate=" + tos(_gapCreate) +
			       "|gapExtend=" + tos(_gapExtend) +
			       "|scoringMatrix=" + _sm->name() +
			       "|algorithm=" + getAlgorithmName() +
			       "|local=" + tos(_local) +
			       "|dna=" + tos(_isDna) ;

	_paramSum = _checkSum(_paramString);
}

//--------------------------------------------------------------------------
// Based on code made public by Joseph M. Newcomer which
// he adapted from an Adobe document.
// See http://www.flounder.com/checksum.htm for details.
//--------------------------------------------------------------------------
int AlignmentGenerator::_checkSum( const string& s )
{
	int sum = 0;
	int r = 55665;
	int c1 = 52845;
	int c2 = 22719;
	for ( int i = 0; (unsigned int)i < s.length(); i++ )
	{
		unsigned char cipher = (((int)s[i]) ^ (r >> 8));
		r = (cipher + r) * c1 + c2;
		sum += cipher;
	}
	return sum;
}

	
//--------------------------------------------------------------------------
// A method that checks each invididual edge in an alignment and adds
// the edge to the count map.
//--------------------------------------------------------------------------
void AlignmentGenerator::_countAlignment(const string& s1, const string& s2)
{

	if ( _showStatus )
	{
		if ( _runningCount++ % 1000 == 0 )
		{
			cout << "."; 
			cout.flush();
		}

		if ( _runningCount % 10000 == 0 )
		{
			clock_t now = clock();
			cout << "  (running count): " << _runningCount 
				 << "   (elapsed secs): " 
				 << (now - _beginTime)/CLOCKS_PER_SEC << endl;
			cout.flush();
		}
	}

	int s1i = 0;
	int s2i = 0;
	int s1i_prev = 0;
	int s2i_prev = 0;

	for ( int i = 0; (unsigned int)i < s1.length(); i++ )
	{
		if ( s1[i] != Alignment::gap )  
			s1i++; 

		if ( s2[i] != Alignment::gap )
			s2i++;

		string key = tos(s1[i]) + "." + tos(s1i_prev) + "." + tos(s1i) + "." +
			         tos(s2[i]) + "." + tos(s2i_prev) + "." + tos(s2i); 

		_edgeCounts[key]++;

		s1i_prev = s1i;
		s2i_prev = s2i;
	}
}

//--------------------------------------------------------------------------
// Writes counts to stdout.
//--------------------------------------------------------------------------
void AlignmentGenerator::getCounts( bool showStatus ) 
{
	_performCount = true;
	_beginTime = clock();
	_showStatus = showStatus;

	_doCounts();

	if ( _showStatus )
		cout << endl;

	cout << "numAlignments " << _numAlignments << endl;

	for ( MLL mll = _edgeCounts.begin(); mll != _edgeCounts.end(); mll++ )
		cout << mll->first << " " << mll->second << endl;
}

//--------------------------------------------------------------------------
// Whether the given generator can create XML output.
//--------------------------------------------------------------------------
bool AlignmentGenerator::xmlable() const
{
	return true;
}

//--------------------------------------------------------------------------
//
//--------------------------------------------------------------------------
void AlignmentGenerator::_doCounts()
{
	for ( int i = 0; (unsigned int)i < _alignments.size(); i++ )
		_countAlignment( _alignments[i].getSeq1(), _alignments[i].getSeq2() );

	_numAlignments = _alignments.size();
}

//--------------------------------------------------------------------------
//
//--------------------------------------------------------------------------
string AlignmentGenerator::getAlgorithmName()
{
	return _algorithmName; 
}


}


