
/****************************************************************************** 
 * 
 *  file:  SequenceReader.cpp
 * 
 *  Copyright (c) 2003,  University of Virginia.
 *  All rights reverved.
 * 
 *  See the file COPYRIGHT in the top directory of this distribution for
 *  more information.
 *  
 *  THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 
 *  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
 *  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
 *  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
 *  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
 *  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
 *  DEALINGS IN THE SOFTWARE.  
 *  
 *****************************************************************************/ 


#include "SequenceReader.h"

namespace NOPT {

void SequenceReader::readFasta( const string& filename, 
			                    string& seq,
							    string& header )
{
    ifstream seqFile(filename.c_str());
    if ( seqFile.bad() )
        throw(GenericException("Bad ifstream for filename: " + filename));

    // Don't initially skip whitespace, because of comments.
    seqFile.unsetf(ios::skipws);

    char temp;
    bool gotFirstNewLine = false;
    while ( seqFile >> temp )
    {
        if ( gotFirstNewLine )
            seq += temp;
        else
        {
			header += temp;
            if ( temp == '\n' )
            {
                // Ok, got comment line so we can now skip whitespace
                // and get sequence.
                gotFirstNewLine = true;
                seqFile.setf(ios::skipws);
            }
        }
    }

    seqFile.close();
}

void SequenceReader::readMultipleFasta( const string& filename, 
			                            vector<string>& seqs,
							            vector<string>& headers )
{
    ifstream seqFile(filename.c_str());
    if ( seqFile.bad() )
        throw(GenericException("Bad ifstream for filename: " + filename));

    // Don't initially skip whitespace, because of comments.
    seqFile.unsetf(ios::skipws);

    string tmpSeq;
    string tmpHeader;
    char temp;
    bool gotFirstNewLine = false;
    while ( seqFile >> temp )
    {
        if ( gotFirstNewLine )
		{
			if ( temp == '>' )
			{
				gotFirstNewLine = false;
                seqFile.unsetf(ios::skipws);

				seqs.push_back(tmpSeq);
				headers.push_back(tmpHeader);

				tmpSeq = "";
				tmpHeader = ">";
			}
			else
				tmpSeq += temp;
    	}        	
        else
        {
			tmpHeader += temp;
            if ( temp == '\n' )
            {
                // Ok, got comment line so we can now skip whitespace
                // and get sequence.
                gotFirstNewLine = true;
                seqFile.setf(ios::skipws);
            }
        }
    }

	// add the last seqs and headers to vectors
	if ( tmpSeq.length() > 0 )
	{
		seqs.push_back( tmpSeq );
		if ( tmpHeader.length() > 0 )
			headers.push_back( tmpHeader );
	}

    seqFile.close();
}

void SequenceReader::readMultiplePir( const string& filename, 
			                            vector<string>& seqs,
							            vector<string>& headers )
{
    ifstream seqFile(filename.c_str());
    if ( seqFile.bad() )
        throw(GenericException("Bad ifstream for filename: " + filename));

    // Don't initially skip whitespace, because of comments.
    seqFile.unsetf(ios::skipws);

    string tmpSeq;
    string tmpHeader;
    char temp;
    bool finishedHeader = false;
    bool readNextAsHeader = false;
    while ( seqFile >> temp )
    {
		int c = (int)temp;
		if ( readNextAsHeader )
		{
			if ( temp == '\n' )
			{
				readNextAsHeader = false;
				finishedHeader = true;
				// now that we've finished the header, we can skip whitespace
                seqFile.setf(ios::skipws);
			}
			else
				tmpHeader += temp;
		}
		else if ( finishedHeader )
		{
			if ( temp == '*' ) // starting a new header
			{
				finishedHeader = false;
				readNextAsHeader = false;
				seqs.push_back(tmpSeq);
				headers.push_back(tmpHeader);

				tmpSeq = "";
				tmpHeader = ">";
			}
			else
				tmpSeq += temp;
    	}        	
        else
        {
            if ( temp == '>' )
			{
				// first char after a *, meaning we have a new header,
				// meaning we want ws again.
                seqFile.unsetf(ios::skipws);
				tmpHeader += temp;
			}
			else if ( temp == '\n' )
            {
				readNextAsHeader = true;
				finishedHeader = false;
            }
			else
				tmpHeader += temp;
        }
    }

	// add the last seqs and headers to vectors
	if ( tmpSeq.length() > 0 )
	{
		seqs.push_back( tmpSeq );
		if ( tmpHeader.length() > 0 )
			headers.push_back( tmpHeader );
	}

    seqFile.close();
}
}
