
/****************************************************************************** 
 * 
 *  file:  StringDiff.cpp
 * 
 *  Copyright (c) 2003,  University of Virginia.
 *  All rights reverved.
 * 
 *  See the file COPYRIGHT in the top directory of this distribution for
 *  more information.
 *  
 *  THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 
 *  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 *  DEALINGS IN THE SOFTWARE.  
 *  
 *****************************************************************************/ 


#include "StringDiff.h"

namespace NOPT {

string StringDiff::patch(const string& base, string& diffs)
{

	if ( diffs == "" )
		return base;
	
	vector<string> tokens;

	// get the first token and initialize strtok 
	char* temp = strtok((char *)diffs.c_str(),",");
	tokens.push_back( temp );

	// get the rest of the tokens 
	while ( (temp = strtok(NULL,",")) )
		tokens.push_back( temp );

	//
	// start with the base string and edit it as described by the diff tokens
	//
	string result = base;
	int diffCount = 0;
	for ( int i = 0; (unsigned int)i < tokens.size(); i++ )
	{
		// parse diff token
		string ch = tokens[i].substr( tokens[i].length() - 1 );
		tokens[i].erase( tokens[i].length() - 1 );
		string sign = tokens[i].substr( tokens[i].length() - 1 );
		tokens[i].erase( tokens[i].length() - 1 );
		int loc = atoi(tokens[i].c_str());

		// make edit
		if ( sign == "-" )
		{
			result.erase( loc-diffCount, 1 );
			diffCount++;
		}
		else
		{
			result.insert( loc, ch );
			diffCount--;
		}	
	}

	return result;
}



string StringDiff::diff(const string& a, const string& b)
{
	map<int, int> matchMap;

	int lastA = a.size() -1;
	int lastB = b.size() -1;

	_getLongestCommonSubsequence( a, b, matchMap );

	map<int, int>::iterator MI;
	int maxAi = 0;
	for ( MI = matchMap.begin(); MI != matchMap.end(); MI++ )
		maxAi = max( maxAi, MI->first );

	string diffString;
	int ai = 0;
	int bi = 0;
	for ( ai = 0; ai <= maxAi; ai++ )
	{	
		if ( matchMap.find( ai ) != matchMap.end() )
		{
			int bLine = matchMap[ ai ];
			while ( bi < bLine )
			{
				diffString += tos(bi) + "+" + b[bi] + ","; 
				//cout << bi << "+ " << b[bi] <<  endl;
				bi++;
			}
			bi++;  // match
		}
		else
		{
			diffString += tos(ai) + "-" + a[ai] + ","; 
			//cout << ai << "- " <<  a[ai] << endl;
		}
	}

	while ( ai <= lastA )
	{
		diffString += tos(ai) + "-" + a[ai] + ","; 
		//cout << ai << "- " <<  a[ai] << endl; 
		ai++;
	}

	while ( bi <= lastB )
	{
		diffString += tos(bi) + "+" + b[bi] + ","; 
		//cout << bi << "+ " <<  b[bi] << endl; 
		bi++;
	}

	return diffString;
}


void StringDiff::_getLongestCommonSubsequence( const string& a, 
				                               const string& b, 
				                               map<int, int>& matchMap )
{
	int aStart = 0;
	int bStart = 0;
	int aEnd = a.length()-1;
	int bEnd = b.length()-1;

	// prune matches from beginning and end
	while ( aStart <= aEnd && bStart <= bEnd && a[aStart] == b[bStart] )
		matchMap[aStart++] = bStart++;

	while ( aStart <= aEnd && bStart <= bEnd && a[aEnd] == b[bEnd] )
		matchMap[aEnd--] = bEnd--;

	// get equivalence classes of positions of elements
	map<char, vector<int> > eqClass;
	for ( int i = bStart; i <= bEnd; i++ )
		eqClass[ b[i] ].push_back( i );

	vector<int> threshold;
	multimap<int,ListElement> links;

	for ( int i = aStart; i <= aEnd; i++ )
	{
		if ( eqClass[ a[i] ].size() != 0  )
		{
			vector<int> rev( eqClass[ a[i] ].size() );
			reverse_copy( eqClass[ a[i] ].begin(), eqClass[ a[i] ].end(), 
						  rev.begin() );
			
			int k = 0;
			for ( int j = 0; (unsigned int)j < rev.size(); j++ )
			{
				// optimization so we don't call _replaceNextLarger
				if ( k &&
					 k != -99999 && 
					 threshold[ k ] > rev[j] && 
					 threshold[ k - 1 ] < rev[j] )
				{
					threshold[k] = rev[j];
				}
				else
					k = _replaceNextLarger( threshold, rev[j], k );


				// build linked list
				if ( k != -99999 )
				{
					//
					// Ugly hack to emmulate perl behavior.  perl sucks.
					//
					MMLI li;

					if ( k > 0 )
					{
						pair<MMLI,MMLI> p = links.equal_range(k-1);
						for ( MMLI m = p.first; m != p.second; ++m)
							li = m;
					}
					else
						li = links.end();

					links.insert( make_pair(k, ListElement(li, i, rev[j]) ) );
				}
			}
		}
	}

	if ( threshold.size() > 0 )
	{
		MMLI vi; 
		pair<MMLI,MMLI> p = links.equal_range( threshold.size() -1 );
		for ( MMLI m = p.first; m != p.second; ++m)
			vi = m;

		// for some reason this generates a warning, but it appears
		// to be doing what it is supposed to...
		for ( vi; vi != links.end(); vi = (vi->second).li ) 
			matchMap[ (vi->second).i ] = (vi->second).j;
	}
}


int StringDiff::_replaceNextLarger(vector<int>& array, int& aValue, int& high)
{
	if ( !high || high == -99999 )
		high = array.size() - 1;

	// check if we're off the end
	if ( high == -1 || aValue > array[ array.size() -1 ] )
	{
		array.push_back( aValue );
		return high + 1;
	}

	// binary search for insertion point
	int low = 0;
	int index = 0;
	int found = 0;
	while ( low <= high )
	{
		index = ( high + low )/2;
		found = array[ index ];

		if ( aValue == found )
			return -99999;
		else if ( aValue > found )
			low = index + 1;
		else
			high = index - 1;
	}

	// insertion point is in low
	array[low] = aValue;
	return low;
}

}
