
/****************************************************************************** 
 * 
 *  file:  ZukerGenerator.h
 * 
 *  Copyright (c) 2003,  University of Virginia..
 *  All rights reverved.
 * 
 *  See the file COPYRIGHT in the top directory of this distribution for
 *  more information.
 *  
 *  THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 
 *  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 *  DEALINGS IN THE SOFTWARE.  
 *  
 *****************************************************************************/ 


#ifndef ZUKERGENERATOR_H
#define ZUKERGENERATOR_H

#include <string>
#include <cstdlib>
#include <algorithm>
#include <fstream>
#include <vector>
#include <list>
#include <ScoringMatrix.h> 
#include <GenericException.h> 
#include <XMLable.h> 
#include <XML.h> 
#include <Tostring.h> 
#include <time.h> 
#include "Alignment.h" 
#include "Whence.h" 
#include "AlignmentGenerator.h" 

using namespace std;

namespace NOPT {

/**
 * Generates a set of near optimal alignments according to the Zuker algorithm.
 * This class uses the Zuker algorithm (ref) to generate a sample of near
 * optimal sequence alignments.  Extends AlignmentGenerator.
 */
class ZukerGenerator : public AlignmentGenerator
{
	public:
				
		/**
		 * Constructor.
		 * Constructs the score and direction matrices and pushes the
		 * alignments created onto the _alignments vector.
		 * \param int gapCreate - Gap create penalty.
		 * \param int gapExtend - Gap extend penalty.
		 * \param ScoringMatrix* sm - Pointer to a ScoringMatrix object.
		 * \param bool isDNA - Whether the sequences are DNA.
		 * \param string seq1 - First sequence to be aligned.
		 * \param string seq2 - Second sequence to be aligned.
		 * \param float lowerbound - Lower bound percentage, between 0 and 1.
		 * \param float upperbound - Upper bound percentage, between 0 and 1.
		 * \param int limit - A limit on the number of alignments to 
		 * generate.
		 * \param string order - A value that describes the order in which
		 * the alignments are to be returned.  Possible values are SORT,
		 * REVERSE_SORT, RANDOM, CREATE.
		 * \param bool local - Whether the alignments are local or global.
		 */
		ZukerGenerator(int gapCreate, int gapExtend, ScoringMatrix* sm,
						   bool isDNA, const string& seq1, 
						   const string& seq2, float lowerbound, 
						   float upperbound, int limit, int debug, 
						   const string& order, bool local );

		/**
		 * Destructor.
		 */
		~ZukerGenerator();

		/**
		 * Calculates the robustness score for each aligned edge. 
		 * \param bool constrain - Whether to limit the output to the edges
		 * within the near optimal neighborhood specified.  If this option
		 * is set to false, a value for _every_ edge (seq1 length X seq 2
		 * length) will be output.
		 */
		void getRobustness( bool constrain = true );

	private:

		/**
		 * Generates the Zuker matrix and the direction matrices.
		 * Called from the constructor, the method creates forward score and
		 * direction matrices and then forward score and direction matrices
		 * on _reversed_ sequences.  The score matrices are then combined into
		 * the Zuker matrix.
		 */
		void _createMatrices();

		/**
		 * Generates the sample of alignments. 
		 * Once the matrices have been created, we simply loop through
		 * the Zuker matrix and generate alignments if the value at a given
		 * node is within the specified bounds.
		 */
		void _generate();

		/**
		 * Creates a single alignment.
		 * Starting at point i,j in the Zuker matrix, we backtrack through
		 * the forward direction matrix and backtrack through the reverse
		 * direction matrix from i,j to generate the alignment.
		 * \param int i - Down index.
		 * \param int j - Across index.
		 */
		void _createAlignment(int i, int j);

		/**
		 * Helps pick a random direction.
		 * When in the course of backtracking to create an alignment a node
		 * is reached where there are more than one direction that can be
		 * followed, this method simply picks one of the alternatives.
		 * This is used to generate a more comprehensive set alignments.
		 * \param int dir - The direction bitmask.
		 */
		int _randomize( int dir );

		//-------------------------------------------------------------------
		// Other values.
		//-------------------------------------------------------------------

		/**
		 * The number of possible alignments generated.
		 * This is simply the number of nodes in the Zuker matrix that
		 * fall within the specified bounds.
		 */
		int _possible;

		/**
		 * The reverse of _seq1.
		 * The reverse of the first input sequence. Used to calculate the
		 * reverse matrices.
		 */
		string _rev1;

		/**
		 * The reverse of _seq2.
		 * The reverse of the second input sequence. Used to calculate the
		 * reverse matrices.
		 */
		string _rev2;

		/**
		 * The order in which to sort the alignments.
		 * This is the way the alignments will be returned to the user.
		 * The possible values are SORT, REVERSE_SORT, RANDOM, CREATE.
		 * SORT orders the alignments highest score to lowest score.
		 * REVERSE_SORT orders the alignments lowest score to highest score.
		 * RANDOM uses the STL random sort facility to randomly sort the
		 * vector. CREATE simply returns the alignments in the order they
		 * were created, that is across row 0, across row 1, ...
		 */
		string _order;

		/**
		 * Upper limit on the number of alignments to generate.
		 * As this algorithm can generate a fairly large set of alignments
		 * we allow a limit to be specified so no machine limitations are
		 * hit.
		 */
		int _limit;

		/**
		 * The forward score matrix.
		 * This matrix is calculated using the _fill method in 
		 * AlignmentGenerator and _seq1 and _seq2.
		 */
	//	vector< vector<int> > _fMatrix;

		/**
		 * The reverse score matrix.
		 * This matrix is calculated using the _fill method in 
		 * AlignmentGenerator and _rev1 and _rev2.  Don't let the name mislead
		 * you, this is a forward matrix, its just the strings are reversed.
		 */
//		vector< vector<int> > _bMatrix;

		/**
		 * The forward direction matrix.
		 * A matrix of Whence objects that tracks how the _fMatrix was
		 * created.
		 */
		vector< vector<Whence> > _fDirections;  

		/**
		 * The reverse direction matrix.
		 * A matrix of Whence objects that tracks how the _bMatrix was
		 * created.
		 */
		vector< vector<Whence> > _bDirections; 

		/**
		 * The Zuker matrix.
		 * The values in this matrix represent the optimal score that
		 * aligning two indices can create. See (ref) for details.
		 */
		vector< vector<int> > _zuker;

		vector< vector<int> > _robust;
};

}

#endif
