#!/usr/bin/perl -Tw

#=============================================================================
# 
#  file:  start.pl
# 
#  Copyright (c) 2004, Michael E. Smoot and the University of Virginia
#  All rights reverved.
# 
#  See the file COPYRIGHT in the top directory of this distribution for
#  more information.
#  
#  THE SOFTWARE IS PROVIDED _AS IS_, WITHOUT WARRANTY OF ANY KIND, EXPRESS 
#  OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 
#  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
#  DEALINGS IN THE SOFTWARE.  
#  
#============================================================================ 


use strict;
use CGI qw(:standard);
use CGI::Carp qw(fatalsToBrowser carpout set_message carp);
use Bio::Seq;
use Bio::DB::GenPept;
use LWP::Simple qw(get);
use File::Copy;
#use Error qw(:try);
use vars qw( $webBase $webTmp $tmp %scoringMatrixHash $NOPTALIGN);

main();

#----------------------------------------------------------------------------
# main
#----------------------------------------------------------------------------
sub main
{
	#########################################################################
	##  Installation specific code.  Change as necessary.

	# The path to the noptalign executable.
	$NOPTALIGN = '/wrpx00.p0/users/mes5k/software/noptalign2/src/Generators/noptalign';

	# The base URL for the project. 
	$webBase = 'http://wrpx00.bioch.virginia.edu/noptalign';

	# The temp URL for the project (can be the same as base). 
	$webTmp = 'http://wrpx00.bioch.virginia.edu/tmp/noptalign';

	# The path to the temp directory.
	$tmp = '/var/www/html/tmp/noptalign';


	#########################################################################
	##  Common code. You shouldn't need to change anything below this point.

	$ENV{PATH} = '';
	openLog();
	my $query = new CGI;
	if ( !$query->param ) 
	{ 
		printDataEntryPage($query); 
	} 
	else 
	{ 
		printAppletPage($query); 
	}

}


#----------------------------------------------------------------------------
# Print the intial data entry page.
#----------------------------------------------------------------------------
sub printDataEntryPage
{
	my ($q) = @_;
	my @tempList = keys %scoringMatrixHash; 
	my @scoringMatrixList = sort @tempList;

	my %idDbLabels = ('protein' => 'Protein', 'nucleotide' => 'DNA' );
	my @idDbOptions = keys %idDbLabels;

	my %inputLabels = ('text' => 'FASTA format','db' => 'Accession/GI number');
	my @inputOptions = keys %inputLabels;

	my $JAVASCRIPT = 
	'function addParam( sm, gc, gx )
	{
		param_list.value = sm + "," + gc + "," + gx + "\n" + param_list.value; 
		scoring_matrix.selectedIndex = 0;
		gap_extend.value = "";
		gap_create.value = "";
	}';
	
	print $q->header;
	print $q->start_html(-title=>'Near-optimal Sequence Alignment',
	                     -script=>$JAVASCRIPT ),
   		$q->h1('Near-optimal Sequence Alignment Start Page'),
		$q->br,$q->br,$q->hr,
   		$q->start_multipart_form(),

		$q->table($q->Tr([ $q->td( [
			$q->b('Sequence type:'),
			$q->radio_group(-name=>'id_database',-values=>\@idDbOptions,
		                -default=>'protein',-labels=>\%idDbLabels) 
		] ) ] ) ), $q->br,

		$q->table($q->Tr({-valign=>'TOP'}, [
			$q->td([
				$q->b('Input format:'),
				$q->popup_menu(-name=>'input_type_seq1',
						       -values=>\@inputOptions,
           			           -default=>'db',-labels=>\%inputLabels),	
				'Subsequence from:',
				$q->textfield(-name=>'seq1_begin',-default=>'',-size=>'3'),
				'to:',
				$q->textfield(-name=>'seq1_end',-default=>'',-size=>'3') ])])),

		$q->table($q->Tr({-valign=>'TOP'}, [
			$q->td([
				$q->b('Sequence 1:'), 
				$q->textarea(-name=>'seq1Text',-default=>'',
		   	              -rows=>'4',-columns=>'40') ] )])), 

		$q->table($q->Tr({-valign=>'TOP'}, [
			$q->td([
				$q->b('Input format:'),
				$q->popup_menu(-name=>'input_type_seq2',
						       -values=>\@inputOptions,
           			           -default=>'db',-labels=>\%inputLabels), 
				'Subsequence from:',
				$q->textfield(-name=>'seq2_begin',-default=>'',-size=>'3'),
				'to:',
				$q->textfield(-name=>'seq2_end',-default=>'',-size=>'3') ])])),	

		$q->table($q->Tr({-valign=>'TOP'}, [
			$q->td( [
				$q->b('Sequence 2:'), 
				$q->textarea(-name=>'seq2Text',-default=>'',
	       	                 -rows=>'4',-columns=>'40'),
				$q->table($q->Tr({-align=>'RIGHT'}, [
					$q->td([ spaces(10), 
					         $q->submit(-name=>'webstart_button', 
					         -value=>'Begin Alignment: WEBSTART') ] ),
					$q->td([ spaces(10),
					         $q->a({target=>'new', 
							        href=>"$webBase/java_options.html"},
					                "What's the difference?")] ),
					$q->td([ spaces(10),
					         $q->submit(-name=>'applet_button', 
					         -value=>'Begin Alignment: APPLET') ] ) 
				] ) )
		] ) ] ) ), $q->br,

		$q->hr,

		$q->table($q->Tr([ $q->td( [
			$q->b('Alignment Parameters:'),
			$q->popup_menu(-name=>'scoring_matrix',-values=>['-','BLOSUM50',
				'BLOSUM62','BLOSUM80','PAM250','PAM120'],
               -default=>'BLOSUM50'),
			$q->textfield(-name=>'gap_create',-default=>'-10',-size=>'3'),
			$q->textfield(-name=>'gap_extend',-default=>'-2',-size=>'3'),
			$q->button(-name=>'add_param',-value=>'Add parameters', 
		           -onClick=>'addParam(scoring_matrix.value, gap_create.value, 
		                             gap_extend.value)' ),
			$q->textarea(-name=>'param_list', -value=>'', -rows=>'3', 
					     -columns=>'20')
		] ) ] ) ), $q->br,


		$q->table($q->Tr([ $q->td( [
			$q->b('Near-optimal Neighborhood:')," Upper bound: ",
			$q->textfield(-name=>'upperBound',-default=>'1',-size=>'5'),
			"Lower bound :",
			$q->textfield(-name=>'lowerBound',-default=>'0.98',-size=>'5')
		] ) ] ) ), $q->br,

		$q->br,$q->br,

		$q->b('Specify a '), 
		$q->b(
		 	$q->a({target=>'new', 
			       href=>"http://www.sanger.ac.uk/Software/formats/GFF/"},
		          "GFF formatted")),
		$q->b(' annotation file: '),

		$q->filefield(-name=>'gff_upload', -size=>50, -maxlength=>80),

		$q->br,$q->br,
		$q->hr,$q->hr,

		$q->br,
		$q->b('NOTE: The longer the sequences and the larger the 
		    near-optimal neighborhood, the longer it will take to generate 
			the output.  Also note that the applet takes a little while
			to initialize.  Please be Patient!'),
		$q->br,$q->br,$q->br,
		"Run an ", 	
		$q->b(
			$q->a({target=>'new', href=>"$webTmp/example.html"}, 
			      "applet example.") ),
		$q->br,
		"Run a ", 	
		$q->b(
			$q->a({target=>'new', href=>"$webTmp/example.JNLP"}, 
			      "Webstart example.") ),
		$q->br,$q->br,$q->br,
		"Please cite: ",
		$q->br,$q->br,
		"Michael E. Smoot, Stephanie A. Guerlain, William R. Pearson. ",
		$q->b("2004. "),
		$q->a({target=>'new', href=>"http://bioinformatics.oupjournals.org/cgi/content/abstract/bth013?ijkey=884aekEMdA63s&keytype=ref"}, "Visualization of near-optimal sequence alignments."), 
		$q->i(" Bioinformatics."), 
		" 20(6),953-958.",
		$q->br,$q->br,$q->br,
		"Please report any errors to ",
		$q->a({target=>'new', href=>"mailto:mes5k\@cs.virginia.edu"}, 
			      "mes5k\@cs.virginia.edu"),
		". Be sure to include the alignment parameters used in the report.",
		$q->br,$q->br,$q->br,
		"Powered by ", 	
		$q->a({target=>'new', href=>"http://bioperl.org"}, "BioPerl"),
		", ",
		$q->a({target=>'new', href=>"http://biojava.org"}, "BioJava"),
		", and ",
		$q->a({target=>'new', href=>"http://www.cs.umd.edu/hcil/piccolo/"}, 
			"Piccolo."),
		$q->endform, $q->end_html;
}

#----------------------------------------------------------------------------
# Open the Carp error log.
#----------------------------------------------------------------------------
sub openLog
{
    my $errorLog = "$tmp/near_opt.log";
    open(LOG, ">>$errorLog") || dieLogError( "Could not open $errorLog\n$!");
	#print LOG "got message\n";
    carpout(\*LOG);
    set_message("Please report any errors to " . 
	            "<a href=\"mailto:mes5k\@virginia.edu\">mes5k\@virginia.edu</a>");
}


#----------------------------------------------------------------------------
# Get submitted parameters, validate them, create command string, execute
# command and write applet HTML.
#----------------------------------------------------------------------------
sub printAppletPage
{
	my ($q) = @_;

	#
	# Get, validate and launder all parameters.
	#
	my @featureList = ();

	my ($seq1,$seq1Name,$seq1Desc,$seq1Begin,$seq1End,$finalFile1) 
										= getSequence(1,\@featureList,$q);
	my ($seq2,$seq2Name,$seq2Desc,$seq2Begin,$seq2End,$finalFile2) 
										= getSequence(2,\@featureList,$q);

	# upload any features
	readUploadedFeatures( \@featureList, $q );

	# is it DNA?
	my $isDNA = checkDNA( $seq1, $seq2 );

	my @scoringParameters = getScoringParams($q);

	my $upperBound = getBound('upperBound',$q);
	my $lowerBound = getBound('lowerBound',$q);

	if ( $lowerBound > $upperBound )
	{ dieLogError( "Bounds incorrectly specified.  lower > upper.\n"); }

	#
	# Everything should be untainted at this point so build the commands
	# and process it.
	#
	my @commandResults = generateAlignments( \@scoringParameters,
									         $isDNA,
			                                 $lowerBound,
									         $upperBound,
									         $finalFile1,
									         $finalFile2,
											 $seq1Name,
											 $seq2Name,
											 $seq1Desc,
											 $seq2Desc);

	#
	# Write the alignment set file 
	#
	my $tempID =  randID();
	my $oFile = "alignment_$tempID";
	open(DATAFILE,">$tmp/$oFile") 
		|| dieLogError( "Couldn't open $tmp/$oFile\n$!\n");
	foreach my $line (@commandResults) { print DATAFILE "$line\n"; }
	close DATAFILE;

	# 
	# Write the feature file.
	#
	my $gffFile = "features_$tempID";
	open(FEATURES,">$tmp/$gffFile") 
		|| dieLogError( "Couldn't open $tmp/$gffFile\n$!\n");
	foreach my $ln (@featureList) { print FEATURES "$ln\n"; }
	close FEATURES;

	# 
	# Write the applet/webstart file that references the alignment set and
	# redirects user to said file.
	#
	my $applet = $q->param('applet_button') || '';
	my $webstart = $q->param('webstart_button') || '';

	my $appletFile = createApplet( $tempID );
	my $jnlpFile = createJNLPfile( $tempID );

	if ( $applet ne '' && $webstart eq '' )
	{
		print redirect("$webTmp/$appletFile");
	}
	elsif ( $applet eq '' && $webstart ne '' )
	{
		print redirect("$webTmp/$jnlpFile");
	}
	else
	{
		print LOG "webstart '$webstart\'\n";	
		print LOG "applet '$applet\'\n";	
		dieLogError( "ERROR: applet or webstart not selected.\n");
	}

	unlink($finalFile1);
	unlink($finalFile2);
}


#----------------------------------------------------------------------------
# Validate the sequences.  First peal off the FASTA header information if
# it exists, then nuke anything that isn't an alphabetic char.  If at this 
# point there are any invalid characters, namely o or j, die since we're likely
# not dealing with a valid sequence.
#
# DNA Alphabet:  A C G T U R Y M W S K D H V B N X 
# Amino Acid Alphabet:  A R N D C Q E G H I L K M F P S T W Y V B Z X
# Combined alphabets: A B C D E F G H I K L M N P Q R S T U V W X Y Z
# Combined alphabets, restated:  [a-ik-pq-z]
#----------------------------------------------------------------------------
sub validateSequence
{
	my $seq = $_[0];
	my $temp = '';

#	if ( $seq =~ /^\s*>.+\n(\w+)/ )
#	{
#		$seq = $1;
#	}
	# is it in fasta format
	if ( $seq =~ /^\s*>.+/ || $seq=~ /^.*>.+\s+\w+/ )
	{
		until ( ( $temp = substr $seq, 0, 1 ) =~ />/ )
		{ $seq = substr $seq, 1; }
	
		until ( ( $temp = substr $seq, 0, 1 ) =~ /\n/ || $temp =~ /\r/ )
		{ $seq = substr $seq, 1; }
	}

	$seq =~ s/\s|\W|\d|_//g; 
	$seq = lc $seq;

	# explicitly launder seq
	if ( $seq =~ /^([a-ik-pq-z]+)$/ ) { $seq = $1; }
	else { dieLogError( "Invalid seq:  $seq\n$!\n"); }
	
	return $seq;  
}

#----------------------------------------------------------------------------
# Check sequences to see if they are DNA or Amino Acids.
#----------------------------------------------------------------------------
sub checkDNA
{
	my $seq1 = $_[0];
	my $seq2 = $_[1];

	my $threshold = 0.8;
	
	my %s1C = ();
	%s1C = ( 'a' => 0, 'c' => 0, 'g' => 0, 't' => 0 );
	my %s2C = ();
	%s2C = ( 'a' => 0, 'c' => 0, 'g' => 0, 't' => 0 );

	my $temp = '';

	my $total1 = 0;
	my $total2 = 0;

	while ( $temp = chop $seq1 ) { $s1C{$temp}++; $total1++; }
	while ( $temp = chop $seq2 ) { $s2C{$temp}++; $total2++; }

	my $acgt1 = $s1C{'a'} + $s1C{'c'} + $s1C{'g'} + $s1C{'t'};
	my $acgt2 = $s2C{'a'} + $s2C{'c'} + $s2C{'g'} + $s2C{'t'};

	if ( $total1 > 0 && $total2 > 0 &&
	     $acgt1/$total1 > $threshold && $acgt2/$total2 > $threshold ) 
		{ return 1; }
	else 
		{ return 0; }
}

#-----------------------------------------------------------------------------
# Removes taintedness from number.
#-----------------------------------------------------------------------------
sub validateNumber
{
	my $num = $_[0];

	if ( $num =~ /^([\d.-]+)$/ ) { $num = $1; }
	else { dieLogError( "Invalid number entered:  $num\n$!\n"); }

	return $num;
}

#-----------------------------------------------------------------------------
# Removes taintedness from word.
#-----------------------------------------------------------------------------
sub validateWord
{
	my $word = $_[0];
	$word =~ s/\s+//g;

	if ( $word =~ /^(\w+)$/ ) { $word = $1; }
	else { dieLogError( "Invalid word:  $word\n$!\n"); }

	return $word;
}

#-----------------------------------------------------------------------------
# Removes taintedness from word.
#-----------------------------------------------------------------------------
sub validateMultipleWords
{
	my $word = $_[0];

	$word =~ s/~/ /g;

	# Per Bill we no longer launder input names.
#	if ( $word =~ /^((\w+\s*)+)$/ ) { $word = $1; }
#	else { dieLogError( "Invalid word:  $word\n$!\n"); }

	return $word;
}

#-----------------------------------------------------------------------------
# Removes taintedness from position string.
#-----------------------------------------------------------------------------
sub validatePositions
{
	my $positions = $_[0];

	# only allow commas, dashes and numbers
	$positions =~ s/![,\-\d]//g;

	# launder
	if ( $positions =~ /^((\d*,*-*\d*)+)$/ ) { $positions = $1; }
	else { dieLogError( "Invalid positions:  $positions\n"); }

	# must begin and end in a number
	if ( $positions )
	{
		while ( $positions !~ /^\d/ and $positions != "" ) 
			{ $positions =  substr $positions, 1; }
		while ( $positions !~ /\d$/ and $positions != "" ) 
			{ chop $positions; }
	}

	# can't have mulitple ranges: 4-10-23 
	if ( $positions =~ /\d-\d+-\d/ ) 
		{ dieLogError( "Invalid multiple ranges: $positions\n"); }
	
	return $positions;
}

#----------------------------------------------------------------------------
# Validates list of scoring matrix names 
#----------------------------------------------------------------------------
sub validateMatrices
{
	my @matrices = @_;
	my %safeMatrices = ();

	foreach my $name ( @matrices )
	{
		if ( $name =~ /(\w+\,\-*\d+\,\-*\d+)/ )
		{
			 $safeMatrices{ $1 } ++;
		}
	}

	return (keys %safeMatrices);
}

#----------------------------------------------------------------------------
# Writes temp sequence file.
#----------------------------------------------------------------------------
sub writeSeqFile
{
	my $seq = $_[0];
	my $rid = randID(); 
	my $fileName = "$tmp/near_tmp.$rid";

	open(TMP,">$fileName") || dieLogError("Couldn't open $fileName!\n$!");

	print TMP "> fasta header\n$seq\n";

	close TMP;

	return $fileName;
}

#----------------------------------------------------------------------------
# Create a random ID 
#----------------------------------------------------------------------------
sub randID
{
	my $val = ( rand 100000 ) % 100000;
	return $val;
}

#----------------------------------------------------------------------------
# Not currently used, see createMungedApplet below
#----------------------------------------------------------------------------
sub createApplet
{
	my $id = shift;
	my $name = "noptalign_$id.html";

	open FILE,">$tmp/$name" or dieLogError("Could not open file: $tmp/$name\n");

	print FILE "
<html>
<head>
<title>Near Optimal Sequence Alignment</title>
</head>
<body BGCOLOR=\"#ffffff\" LINK=\"#000099\">
<h2>Near Optimal Sequence Alignment Comparison</h2>
<applet code=\"edu/virginia/bioch/nopt/SeqCompareApplet.class\"
        archive=\"$webBase/noptdisplay-applet.jar,$webBase/piccolo.jar,$webBase/reduced-biojava.jar\"
        width=\"850\"
        height=\"600\">
	<param name=\"alignmentURL\" value=\"$webTmp/alignment_$id\">
	<param name=\"featureURL\" value=\"$webTmp/features_$id\">
</applet>
<br><br>
<ul>
<li><a target=\"new\" href=\"$webBase/problems.html\">Having problems?</a></li>
<li><a href=\"$webTmp/noptalign_$id.JNLP\">Try WebStart</a></li>
<li><a href=\"$webBase/start.cgi\">Start Page (back)</a></li>
</ul>
</body>
</html>";

	close FILE;
	return $name;
}


#----------------------------------------------------------------------------
#
#----------------------------------------------------------------------------
sub createJNLPfile
{
    my $id = shift;
    my $name = "noptalign_$id.JNLP";

    open FILE,">$tmp/$name" or dieLogError("Could not open file: $tmp/$name\n");

    print FILE "
<?xml version=\"1.0\" encoding=\"utf-8\"?>

<jnlp
    spec=\"1.0+\"
    codebase=\"$webTmp\"
    href=\"$name\">

<information>
    <title>Near Optimal Alignment Display</title>
    <vendor>University of Virginia</vendor>
    <homepage href=\"$webBase/index.html\"/>
    <description>
		Software to display a set of near optimal sequence alignments.
    </description>
    <offline-allowed/>
</information>

<security>
    <all-permissions/>
</security>

<resources>
    <j2se version=\"1.4\"/>
    <jar href=\"noptdisplay.jar\"/>
    <jar href=\"piccolo.jar\"/>
    <jar href=\"reduced-biojava.jar\"/>
    <jar href=\"batik-awt-util.jar\"/>
    <jar href=\"batik-dom.jar\"/>
    <jar href=\"batik-svg-dom.jar\"/>
    <jar href=\"batik-svggen.jar\"/>
    <jar href=\"batik-util.jar\"/>
    <jar href=\"batik-xml.jar\"/>
</resources>

<application-desc main-class=\"edu.virginia.bioch.nopt.SeqCompareMain\">
    <argument>-A</argument>
    <argument>$webTmp/alignment_$id</argument>
    <argument>-F</argument>
    <argument>$webTmp/features_$id</argument>
</application-desc>

</jnlp>
    ";

    close FILE;

    return $name;
}

#----------------------------------------------------------------------------
# Fetches sequence from NCBI given locus, accession number or gid
#----------------------------------------------------------------------------
sub fetchSequence
{
 	my $id = $_[0];
	my $begin = $_[1];
 	my $db = $_[2];
	my $features = $_[3];
    my $seq = '';
    my $seqName = ''; 
    my $seqDesc = ''; 

    my $database;
	
	#try {

	if ( $db eq 'protein' )
	{
		$database = new Bio::DB::GenPept();
	}
	else
	{
		$database = new Bio::DB::GenBank();
	}
	
	dieLogError( "Couldn't connect to database\n") unless ( $database );

	my $seqObj = $database->get_Seq_by_id($id);
	if ( !$seqObj )
	{
		$seqObj = $database->get_Seq_by_acc($id);
		dieLogError( "Couldn't find anything for id: $id\n") unless ( $seqObj );
	}

	$seq = $seqObj->seq();
	$seqName = $seqObj->display_id();
	$seqDesc = $seqObj->desc();

	my @bioFeatures = $seqObj->top_SeqFeatures();
	foreach my $feat (@bioFeatures)
	{
		my $fn = "";
		print LOG "NEW feature \n";
		foreach my $tag ( $feat->all_tags() )
		{
			print LOG "feature '$tag'\n";
			$fn .= join(' ',$feat->each_tag_value($tag)) . ' ';
			$fn =~ s/\s/\~/g; # get rid of all whitespace for GFF
		}

		my $nm = $seqName;
		my $start = $feat->start - $begin;
		my $end = $feat->end - $begin;
		$nm =~ s/\s/\~/g;

		# sanity check
		if ( $fn eq "" || $nm eq "" || $start eq "" || $end eq "" ) { next; }

		push @$features, "$nm\tNCBI\t$fn\t$start\t$end\t.\t.\t.";
	}

	#} catch Bio::Root::Exception with {
	#	my $err = shift;
	#	print LOG "caught exception\n";
	#	dieLogError( "YODEL $err");
	#}

    if ( $seq eq "" )
    {
        dieLogError( "No sequence found for id \"$id\" in database \"$db\"\n");
	}

	return $seq, $seqName, $seqDesc;  
}


#----------------------------------------------------------------------------
#
#----------------------------------------------------------------------------
sub spaces
{
	my $num = shift @_;
	my $spaceString = "";
	for my $i (1..$num) { $spaceString .= "&nbsp;"; }

	return $spaceString;
}

#----------------------------------------------------------------------------
#
#----------------------------------------------------------------------------
sub getSequence
{
	my $num = shift;
	my $featList = shift;
	my $q = shift;

	my $seqText = $q->param("seq".$num."Text") || '';
	my $inputType = $q->param("input_type_seq".$num) || '';
	my $idDb = $q->param('id_database') || '';

	my $seq = '';
	my $seqName = '';
	my $seqDesc = '';
	my $finalFile = '';

	# get the beginning of the subseq
	my $seqBegin = $q->param("seq".$num."_begin") || '0';
	$seqBegin = validateNumber( $seqBegin ); 

	# now fetch actual sequences
	if ( $inputType eq 'text' and $seqText ne '' )
	{
		$seq = validateSequence( $seqText );
	}
	elsif ( $inputType eq 'db' and $seqText ne '' )
	{
		# If the seqText is shorter than 20 chars and it doesn't look like a
		# fasta sequence, then assume we have an id.  Otherwise just parse
		# it as sequence.
		if ( length( $seqText ) < 20 && $seqText !~ /^>.+\n(\w+)/ )
		{
			#print LOG "'$seqText' looks like an id\n";
			$seqText = validateWord( $seqText );
			$idDb = validateWord( $idDb );

			($seqText,$seqName,$seqDesc) = 
					fetchSequence($seqText,$seqBegin,$idDb,$featList);
		}

		$seq = validateSequence( $seqText );
	}
	else 
	{
		dieLogError( "No or conflicting sequence information " .
		             "specified for seq $num\n");
	}

	if ( $seqName eq '' ) { $seqName = "sequence" . $num; }
	if ( $seqDesc eq '' ) { $seqDesc = "($num)"; }

	# handle end subsequences
	my $seqEnd = $q->param("seq".$num."_end") || length( $seq );
	$seqEnd = validateNumber( $seqEnd ); 

	if ( $seqBegin >= $seqEnd ||
		 $seqBegin < 0 ||
		 $seqEnd > length( $seq ) )
 	{
		print LOG "Invalid subsequence $num! begin: $seqBegin end: $seqEnd   ".
		          "  ignoring!\n";
		$seqBegin = 0;
		$seqEnd = length( $seq );
	}

	# trim the seqs appropriately
	$seq = substr $seq, $seqBegin, ($seqEnd - $seqBegin);

	# now write the files
	$finalFile = writeSeqFile( $seq );

	return ($seq,$seqName,$seqDesc,$seqBegin,$seqEnd,$finalFile);
}

#----------------------------------------------------------------------------
#
#----------------------------------------------------------------------------
sub getScoringParams
{
	my $q = shift;

	my $scoringList = $q->param('param_list'); 
	my $scoringMatrix = $q->param('scoring_matrix');
	my $gapCreate = $q->param('gap_create');
	my $gapExtend = $q->param('gap_extend');

	if ( $scoringMatrix ne '-' and 
	     $gapCreate ne '' and
 		 $gapExtend ne '' )
	{
		$scoringMatrix = validateWord( $scoringMatrix );
		$gapCreate = validateNumber( $gapCreate );
		$gapExtend = validateNumber( $gapExtend );
		$scoringList .= "\n$scoringMatrix,$gapCreate,$gapExtend";
	}

	my @scoringParameters = split /\s/, $scoringList;
	@scoringParameters = validateMatrices(@scoringParameters);
	#print LOG "post list @scoringParameters\n";

	if ( $#scoringParameters < 0 )
	{
		dieLogError( "Alignment parameters not specified.\n");
	}

	return @scoringParameters;
}

#----------------------------------------------------------------------------
# Runs the noptalign command for the parameters specified
#----------------------------------------------------------------------------
sub generateAlignments
{
	my $scoringParameters = shift;
	my $isDNA = shift;
	my $lowerBound = shift;
	my $upperBound = shift;
	my $finalFile1 = shift;
	my $finalFile2 = shift;
	my $seq1Name = shift;
	my $seq2Name = shift;
	my $seq1Desc = shift;
	my $seq2Desc = shift;

	my @commandResults = ();
	my $i = 0;
	my $paramCount = 0;
	my $runningTotal = 0;
	my $mySeq1 = "";
	my $mySeq2 = "";

	foreach my $alignParam ( @$scoringParameters )
	{
		my ( $mat, $gapC, $gapE ) = split /,/, $alignParam;
		$mat = lc $mat;

		my $command = "$NOPTALIGN ";

		if ( $isDNA ) { $command .= " -n "; $mat = "dna"; }

		$command .= " -b $lowerBound ";
		$command .= " -u $upperBound ";
		$command .= " -f $gapC ";
		$command .= " -g $gapE "; 
		$command .= " -s $mat ";
		$command .= " -1 $finalFile1 ";
		$command .= " -2 $finalFile2 ";
		$command .= " -l 500 ";
		$command .= " -r ";

		#carp "command: $command" ;
		my $date = scalar localtime;
		my $host = $ENV{'REMOTE_HOST'} || 'no host';
		my $addr = $ENV{'REMOTE_ADDR'} || 'no addr';

		print LOG "[$date] $host @ $addr -  command:\n$command\n\n";

		open(COMM,"$command|") || 
			dieLogError( "Command: \n$command\nFailed!\n$!");
		while (<COMM>)
		{
			chomp;
			if ( $_ =~ /^ERROR.+/ )
			{
				dieLogError( "Got command error:\n$_\n\n");
			}

			# append seq names and descriptions to the parameter lists
			elsif ( $_ =~ /^p~.+/ )
			{
				$_ .= "|seq1Name=$seq1Name|seq2Name=$seq2Name" .
				      "|seq1Description=$seq1Desc|seq2Description=$seq2Desc";
			}

			push @commandResults, $_;
		}		
		close COMM;
	}

	return @commandResults;
}

#----------------------------------------------------------------------------
#
#----------------------------------------------------------------------------
sub getBound
{
	my $name = shift;
	my $q = shift;

	my $bound = $q->param($name) || '';	
	if ( $bound lt 0 || $bound gt 1 )
	{
		dieLogError( "Near-optimal neighborhood $name not " . 
		             "specified correctly\n");
	}
	$bound = validateNumber($bound);
}

#----------------------------------------------------------------------------
#
#----------------------------------------------------------------------------
sub readUploadedFeatures
{
	my $features = shift;
	my $q = shift;

	my $fileHandle = $q->upload('gff_upload');

	if ( defined $fileHandle )
	{
		while (<$fileHandle>)
		{
			chomp;
			if ($_ =~ /^(\w+)\s(\w+)\s(\w+)\s(\d+)\s(\d+)\s(.+)\s(.+)\s(.+).*/)
			{
				push @$features, "$1\t$2\t$3\t$4\t$5\t.\t.\t.";
			}
		}
		close $fileHandle;
	}
}

#----------------------------------------------------------------------------
# Because Carp does not work with mod_perl, create this hack instead:
#----------------------------------------------------------------------------
sub dieLogError
{
	my $error = shift;
	print LOG "ERROR: $error\n";
	print "Content-Type: text/html; charset=ISO-8859-1\n";
	print "
<html>
<head>
<title>Near Optimal Sequence Alignment ERROR</title>
</head>
<body>
<h2><font color=\"red\">Error in processing request!</font></h2>
$error
</body>
</html>
";
	exit(0);
}
