#!/usr/bin/perl -w

use strict;
use DBI;
use Bio::SearchIO;
use Getopt::Long;

my $host = "";
my $db = "seqdb_demo";
my $user = "seqdb_user";
my $pass = "seqdb_pass";

my $debug = 0;
my $tag = "";
my $comment = "";
my $verbose = 0;
my $format = "fasta";

GetOptions("debug=i" => \$debug,
	   "verbose=i" => \$verbose,
	   "host=s" => \$host,
	   "db=s" => \$db,
	   "user=s" => \$user,
	   "password=s" => \$pass,
	   "tag" => \$tag,
	   "comment" => \$comment,
	   "format=s" => \$format,
	  );

my $dbh = DBI->connect(qq{dbi:mysql:@{[$host ? "host:$host;" : ""]}dbname=$db},
		       $user,
		       $pass
		      ) or die $DBI::errstr;

$dbh->trace($debug);

# prepare all our statement handles:

# first, define the tables and fields:
my %inserts = ( search => [ qw( tag comment algo algo_ver matrix filtered
                                queryct querysize libct libsize
                              )
                          ],
                search_query  => [ qw( search_id seq_id
                                name descr begin end len type
                              )
                          ],
                search_lib    => [ qw( search_id seq_id
                                name descr begin end len type
                              )
                          ],
                search_hit    => [ qw( search_id query_id lib_id
                                score bits expect percid alen
                                qbegin qend lbegin lend strand
                              )
                          ],
              );

# now, dynamically build and prepare each statement:
for my $name (keys %inserts) {
    my @fields = @{$inserts{$name}};
    $inserts{$name} = $dbh->prepare(
        "INSERT INTO $name ( " . join(", ", @fields)  .
        " ) VALUES ( " . join(", ", ("?") x @fields) . " )"
    );
}

# prepare one more statement to find a lib entry
# if it's already been hit against:

my $findlib = $dbh->prepare(q{
    SELECT lib_id
    FROM   search_lib
    WHERE  search_id = ?
      AND  seq_id = ?
      AND  type = ?
      AND  begin = ?
      AND  len = ?
});

# OK, ready to parse and load:
my $parser = new Bio::SearchIO -fh => \*STDIN, -format => $format, -verbose => $verbose;

my ($search_id, $queryct, $querysize);
while (my $result = $parser->next_result) { # for each query
    $queryct++;
    $querysize += $result->query_length;

    # insert the search metainformation, if we haven't already:
    unless ($search_id) {
        my ($tag, $comment) = @ARGV;
	die "Must supply tag for search!\n" unless defined $tag;
        $inserts{search}->execute(
            $tag, $comment,
            $result->algorithm, $result->algorithm_version,
            $result->get_parameter('matrix'),
            $result->get_parameter('filter') ? 'y' : 'n',
            $queryct, $querysize,
            $result->database_entries, $result->database_letters
        );
        $search_id = $inserts{search}->{mysql_insertid};
    }

    my ($name, $type, $seq_id, $descr, $begin, $end, $len);

    # insert this query:
    $name = $result->query_name;
    if ($name =~ m/^gi\|(\d+)/o) {
        $type = "GI"; $seq_id = $1;
    } elsif ($name =~ m/^ ( [^ \. ]+ \. [^ \| ]+ ) \| ( [^ \| \s ]+ ) /ox) {
        $type = $1; $seq_id = $2;
    } elsif ($name =~ m/^(\S+?)[\:\-](\S+)/o) {
	$type = $1; $seq_id = $2;
    } else {
        die "Unable to determine query sequence type or ID: $name\n";
    }

    $descr = $result->query_description;
    if($descr =~ m/ \@C: (\d+) /ox) {
        $begin = $1;
    } else { $begin = 1; }

    $len = $result->query_length;
    $end = $begin + $len - 1;

    $inserts{search_query}->execute( $search_id, $seq_id, $name, $descr,
                              $begin, $end, $len, $type );
    my $query_id = $inserts{search_query}->{mysql_insertid};

    while (my $hit = $result->next_hit) { # for each hit
        # now get the library sequence info:

        $name = $hit->name;
        if ($name =~ m/^gi\|(\d+)/o) {
            $type = "GI"; $seq_id = $1;
        } elsif ($name =~ m/^ ( [^ \. ]+ \. [^ \| ]+ ) \| ( [^ \| \s ]+ ) /ox) {
            $type = $1; $seq_id = $2;
	} elsif ($name =~ m/^(\S+?)[\:\-](\S+)/o) {
	    $type = $1; $seq_id = $2;
        } else {
	    use Data::Dumper;
            die "Unable to determine library sequence type or ID: $name\n" . Dumper($hit);
        }

        $descr = $hit->description;
        if($descr =~ m/ \@C: (\d+) /ox) {
            $begin = $1;
        } else { $begin = 1; }

        $len = $hit->length;
        $end = $begin + $len - 1;

        my $lib_id;
        # check to see if this sequence has already been hit against:
        $findlib->execute($search_id, $seq_id, $type, $begin, $len);
        if ($findlib->rows > 0) {
            # found it; use the existing lib_id
            $lib_id = $findlib->fetchrow_array;
        } else {
            # need to insert it now:
            $inserts{search_lib}->execute( $search_id, $seq_id, $name, $descr,
                                    $begin, $end, $len, $type );
            $lib_id = $inserts{search_lib}->{mysql_insertid};
        }

	my $strand;
	my ($qstrand, $lstrand) = $hit->strand;
	# convert from BioPerl strand markers to our "f", "r" or "u"
	$qstrand ||= "1";
	$lstrand ||= "1";
	$qstrand = $qstrand eq "-1/1" ? "u" : $qstrand == -1 ? "r" : "f";
	$lstrand = $lstrand eq "-1/1" ? "u" : $lstrand == -1 ? "r" : "f";
	if ($qstrand eq "u" || $lstrand eq "u") {
            $strand = "u";
        } elsif (($qstrand eq "r" || $lstrand eq "r") && $qstrand ne $lstrand) {
            $strand = "r";
        } else {
            $strand = "f";
        }

        # finally ready to store the hit alignment info:
        $inserts{search_hit}->execute(
            $search_id, $query_id, $lib_id,
            $hit->raw_score, $hit->bits, $hit->significance,
            $hit->frac_identical, $hit->length_aln,
            $hit->start('query'), $hit->end('query'),
            $hit->start('hit'), $hit->end('hit'),
            $strand
        );
    }
}

# update the size of the total search, if more than one query:
$dbh->do(qq{
    UPDATE search
    SET    queryct = $queryct,
           querysize = $querysize
    WHERE  search_id = $search_id
}) if $queryct > 1;
