/****************************************************************************** 
 * 
 *  file:  SmootGenerator.cpp
 * 
 *  Copyright (c) 2004,  University of Virginia..
 *  All rights reverved.
 * 
 *  See the file COPYRIGHT in the top directory of this distribution for
 *  more information.
 *  
 *  THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 
 *  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 *  DEALINGS IN THE SOFTWARE.  
 *  
 *****************************************************************************/


#include "SmootGenerator.h"

namespace NOPT {

//---------------------------------------------------------------------------
//
//---------------------------------------------------------------------------
SmootGenerator::SmootGenerator(int gapCreate,
                               int gapExtend,
                               ScoringMatrix* pam,
                               bool isDNA,
                               const string& seq1,
                               const string& seq2,
							   float lowerBoundPercent,
                               float upperBoundPercent,
                               int debug)
: AlignmentGenerator( gapCreate, gapExtend, pam, isDNA, seq1, seq2,
					  lowerBoundPercent, upperBoundPercent, debug, 
					  false, "Smoot" )
{
	string res1;
	string res2;
	_optimalScore = _myersMillerAlign(seq1,seq2,res1,res2);
	_rememberEdges( res1, res2, _optimalScore );

	_calculateBounds();

	if ( _debug & DEBUG_SOME )
		cout << "optimal score:  " << _optimalScore << endl
		     << "lower bound:    " << _lowerBound << endl 
		     << "upper bound:    " << _upperBound << endl ;
}

//---------------------------------------------------------------------------
//
//---------------------------------------------------------------------------
void SmootGenerator::_generate( bool uncompress ) 
{
	for ( int i = 0; (unsigned int)i < _seq1.length()-1; i++ ) 
		for ( int j = 0; (unsigned int)j < _seq2.length()-1; j++ ) 
		{
			string key = tos(_seq1[i]) + "-" + tos(i) + "," + 
					     tos(_seq2[j]) + "-" + tos(j);

			if ( usedEdges[key] )
			{
				if ( _debug & DEBUG_SOME )
					cout << "skipping edge: " << key << endl;

				continue;
			}
			else
				if ( _debug & DEBUG_SOME )
					cout << "checking edge: " << key << endl;

			// Split the sequences into two parts.
			string firstA = _seq1.substr(0,i);
			string firstB = _seq2.substr(0,j);
			
			string secondA = _seq1.substr(i+1);
			string secondB = _seq2.substr(j+1);
		
			if ( _debug & DEBUG_SOME )
				cout << endl << endl 
				     << "------------------------------------------" << endl
				     << "Seq 1 parts:  '" << firstA << "'  '" 
				     << _seq1[i] << "'  '" << secondA << "'" << endl 
					 << "Seq 2 parts:  '" <<  firstB << "'  '" 
					 << _seq2[j] << "'  '" << secondB << "'" << endl;

			string firstResultA;
			string firstResultB;
			string secondResultA;
			string secondResultB;

			// Calculate the alignments for the two parts.
			int firstOpt = _myersMillerAlign(firstA,firstB,
							                 firstResultA,firstResultB);
			int secondOpt = _myersMillerAlign(secondA,secondB,
							                  secondResultA,secondResultB);
				
			int zukerScore = firstOpt + secondOpt + 
			                 _sm->score(_seq1[i],_seq2[j]);

			if ( _debug & DEBUG_SOME )
				cout << "firstOpt  " << firstOpt << endl
				     << "secondOpt " << secondOpt << endl
				     << "match     " << _sm->score(_seq1[i],_seq2[j]) << endl
				     << "zuker     " << zukerScore << endl << endl; 

			// merge the results to create the final alignments
			string resultA = firstResultA + _seq1[i] + secondResultA;
			string resultB = firstResultB + _seq2[j] + secondResultB;

			_rememberEdges( resultA, resultB, zukerScore );

			// If the score is good enough, add to our set.
			if ( zukerScore >= _lowerBound && zukerScore <= _upperBound )
			{

				Alignment x( resultA, resultB, zukerScore, 
				             0, 0, _seq1.length()-1, _seq2.length()-1);
			
				if ( _debug & DEBUG_SOME )
					cout << "Included" << endl
					     << "a     '" << _seq1 << "'" << endl
					     << "fa    '" << firstResultA << "'" << endl
					     << "a[i]  '" << _seq1[i]  << endl
					     << "sa    '" << secondResultA << "'" << endl << endl 
					     << "b     '" << _seq2 << endl
					     << "fb    '" << firstResultB << "'" << endl
					     << "b[j]  '" << _seq2[j]  << endl
					     << "sb    '" << secondResultB  << "'" << endl << endl
					     << "ra    '" << resultA  << "'" << endl 
					     << "rb    '" << resultB  << "'" << endl;

			    int testOpt = Alignment::SWScore( resultA, resultB,
	                                      *_sm, _gapCreate, _gapExtend );

				if ( zukerScore != testOpt )
					throw( GenericException( (string)
						"Zuker score not equal to calculated score! Zuker: " + 
					 	tos(zukerScore) + "  vs. generated: " + 
						tos( testOpt) )) ;
	
				// We want to output the alignments as we go because they
				// take forever to create.  However, we also don't want a
				// ton of dupes, so we need to keep some notion of the 
				// alignment around.   We don't want to keep the entire 
				// alignment around (too much space) so instead we 
				// calculate a checksum of it and keep track of that.
				//
				// This could, in theory, expand the memory usage beyond
				// linear space.  In practice this is very (very) unlikely 
				// because we are only sampling the near-optimal space and 
				// even then only taking those alignments within particular
				// bounds.  I'd almost go so far as to say its impossible,
				// but I don't have a proof, so I won't.
				cout << "a~" << _paramSum << "|" 
					 << x.getAscii(uncompress) << endl; 

				/*
				string as = x.getAscii(uncompress);
				int sum = _checkSum( as );

				if ( find(_sums.begin(), _sums.end(),sum) == _sums.end() )
			 	{
					// output the alignment as we go
					_sums.push_back(sum);
			 	}
				*/
			}
			else
				if ( _debug & DEBUG_SOME )
					cout << "NOT included" << endl;
		}
}

//---------------------------------------------------------------------------
// Calculates a Myers-Miller alignment of the two seqs.
//---------------------------------------------------------------------------
int SmootGenerator::_myersMillerAlign( const string& a, 
                                        const string&b, 
					 				    string& resultA, 
									    string& resultB )
{
	MyersMillerGenerator mm(_gapCreate,_gapExtend,_sm,_isDna,a,b,_debug,false);
	Alignment x = mm.getAlignment();
	resultA = x.getSeq1();
	resultB = x.getSeq2();
	return x.getScore();
}

//---------------------------------------------------------------------------
// Calculates the optimal score relatively quickly and in linear space.
//---------------------------------------------------------------------------
int SmootGenerator::_calcOptimalScore( const string& a, const string& b )
{
	
	// boundary cases

	if ( a.length() == 0 && b.length() == 0 )
	{
		if ( _debug & DEBUG_LOTS )
			cout << "both a and b have length 0... returning 0" << endl; 
		return 0;
	}

	if ( a.length() == 0 )
	{
		int ret = b.length() * _gapExtend + _gapCreate;
		if ( _debug & DEBUG_LOTS )
			cout << "a has length 0... returning " << ret << endl; 
		return ret;
	}

	if ( b.length() == 0 )
	{
		int ret = a.length() * _gapExtend + _gapCreate;
		if ( _debug & DEBUG_LOTS )
			cout << "b has length 0... returning " << ret << endl; 
		return ret;
	}

	// normal cases

	// both vectors sized to the length of the "across" sequence
	vector<int> C( b.length()+1 );  
	vector<int> D( b.length()+1 );

	int e,c,s,t; // temp variables

	e = c = s = t = 0;

	C[0] = 0;
	t = _gapCreate;
	for ( int j = 1; (unsigned int)j < b.length()+1; j++ )
	{
		C[j] = t = t + _gapExtend;
		D[j] = t + _gapCreate;
	}

	t = _gapCreate;
	for ( int i = 1; (unsigned int)i < a.length()+1; i++ )
	{
		s = C[0];
		C[0] = c = t = t + _gapExtend;
		e = t + _gapCreate;

		for ( int j = 1; (unsigned int)j < b.length()+1; j++ )
		{
			e = max( e + _gapExtend, c + _gapCreate + _gapExtend ); 
			D[j] = max( D[j] + _gapExtend, C[j] + _gapCreate + _gapExtend );
			c = max( D[j], max( e, s + _sm->score(a[i-1],b[j-1]) ) );
            s = C[j];
            C[j] = c;
		}
	}

	if ( _debug & DEBUG_LOTS )
	{
		cout << endl << "C vector: " << endl;
		for ( int x = 0; (unsigned int)x < C.size(); x++ )
			cout << C[x] << " ";
		cout << endl << "D vector: " << endl; 
		for ( int x = 0; (unsigned int)x < D.size(); x++ )
			cout << D[x] << " ";
		cout << endl; 
	}

	return C[b.length()];
}

//---------------------------------------------------------------------------
//
//---------------------------------------------------------------------------
void SmootGenerator::getAscii(bool uncompress)
{
	cout << "p~" << _paramSum << _paramString << endl;
	_generate( uncompress );
}

//---------------------------------------------------------------------------
//
//---------------------------------------------------------------------------
void SmootGenerator::_rememberEdges(const string& a, const string& b, int s)
{
	int aCount = 0;
	int bCount = 0;
	for ( int i = 0; (unsigned int)i < a.length(); i++ )
	{
		string key = "";
		int ok = 0;
		if ( a[i] != Alignment::gap )
		{
			key += tos(a[i]) + "-" + tos(aCount);
			aCount++;
			ok++;
		}

		if ( b[i] != Alignment::gap )
		{
			key += "," + tos(b[i]) + "-" + tos(bCount);
			bCount++;
			ok++;
		}

		if ( ok == 2 )
		{
			if ( _debug & DEBUG_LOTS )
				cout << "adding edge:     " << key << endl;

			usedEdges[ key ] = true;
		}
		else
			if ( _debug & DEBUG_LOTS )
				cout << "NOT adding edge: " << key << endl;
	}
}

}
