/*****************************************************************
 * outline.c
 *
 * Copyright 1999, Clark Cooper
 * All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the license contained in the
 * COPYING file that comes with the expat distribution.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * Read an XML document from standard input and print an element
 * outline on standard output.
 */

/* 
* ===========================================================================
*
*                            PUBLIC DOMAIN NOTICE
*               National Center for Biotechnology Information
*
*  This software/database is a "United States Government Work" under the
*  terms of the United States Copyright Act.  It was written as part of
*  the author's offical duties as a United States Government employee and
*  thus cannot be copyrighted.  This software/database is freely available
*  to the public for use. The National Library of Medicine and the U.S.
*  Government have not placed any restriction on its use or reproduction.
*
*  Although all reasonable efforts have been taken to ensure the accuracy
*  and reliability of the software and data, the NLM and the U.S.
*  Government do not and cannot warrant the performance or results that
*  may be obtained by using this software or data. The NLM and the U.S.
*  Government disclaim all warranties, express or implied, including
*  warranties of performance, merchantability or fitness for any particular
*  purpose.
*
*  Please cite the author in any work or product based on this material.
*
* ===========================================================================

	Modified by Tom Madden (National Center for Biotechnology Information) to 
	parse XML produced by BLAST (12/20/2001).  

	The input is an XML file produced by BLAST (on stdin).  Output is
	on stdout and is (for every HSP produced by BLAST):

	query-identifier database-description expect-value database-identifier

	One line per HSP.  Each of the above lines is separated by a blank line 
	to make reading easier.

	This is only an example XML parser, feel free to modify as needed to output
	other data.  Please do not blame Clark Cooper for any bugs in this
	source file.  

	To compile this application download Expat from http://sourceforge.net/projects/expat/
	uncompress, dearchive and compile the Expat library per the included README's.
	Then copy this file and Makefile.blast_parse to the "examples" directory
	and (from that directory) invoke:

	make -f Makefile.blast_parse blast_parse


	Please send questions/comments about this file to toolbox@ncbi.nlm.nih.gov.  

	Please send questions about running BLAST to blast-help@ncbi.nlm.nih.gov.
*/


#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <expat.h>

#define BUFFSIZE	8192

char Buff[BUFFSIZE];
char charbuf[BUFFSIZE];


int Depth;
int description;
int expect;
int query_id;
int db_id;

char db_id_buff[BUFFSIZE];
char query_id_buff[BUFFSIZE];
char description_buff[BUFFSIZE];
double expect_value;

static void
text_handler(void *data, const XML_Char *s, int len) {

	char mybuff[BUFFSIZE];

	if (len > BUFFSIZE-1)
		len = BUFFSIZE-1;

	if (db_id == 1)
	{
		strncpy(db_id_buff, s, len);
		db_id_buff[len] = '\0';
		db_id = 0;
	}
	else if (query_id == 1)
	{
		strncpy(query_id_buff, s, len);
		query_id_buff[len] = '\0';
		query_id = 0;
	}
	else if (description == 1)
	{
		strncpy(description_buff, s, len);
		description_buff[len] = '\0';
		description = 0;
	}
	else if (expect == 1)
	{
		strncpy(mybuff, s, len);
		mybuff[len] = '\0';
		expect_value = atof(mybuff);
		expect = 0;
	}

	return;

} /* End of character handler. */

static void
start(void *data, const char *el, const char **attr) 
{

    db_id=0;
    query_id=0;
    description=0;
    expect=0;

    if (strcmp(el, "Hit_id") == 0)
    {
	db_id = 1;
    }
    else if (strcmp(el, "BlastOutput_query-ID") == 0)
    {
	query_id = 1;
    }
    else if (strcmp(el, "Hit_def") == 0)
    {
	description = 1;
    }
    else if (strcmp(el, "Hsp_evalue") == 0)
    {
	expect = 1;
    }

    Depth++;

}  /* End of start handler */

static void
end(void *data, const char *el) {

    if (strcmp(el, "Hsp") == 0)
    {
	printf("%s\t%s\t%5.3e\t%s\n\n", 
		query_id_buff, description_buff, expect_value, db_id_buff);
    }
  Depth--;
}  /* End of end handler */

int
main(int argc, char *argv[]) {
  XML_Parser p = XML_ParserCreate(NULL);
  if (! p) {
    fprintf(stderr, "Couldn't allocate memory for parser\n");
    exit(-1);
  }

  XML_SetElementHandler(p, start, end);
  
  XML_SetCharacterDataHandler(p, text_handler);


  for (;;) {
    int done;
    int len;

    len = fread(Buff, 1, BUFFSIZE, stdin);
    if (ferror(stdin)) {
      fprintf(stderr, "Read error\n");
      exit(-1);
    }
    done = feof(stdin);

    if (! XML_Parse(p, Buff, len, done)) {
      fprintf(stderr, "Parse error at line %d:\n%s\n",
	      XML_GetCurrentLineNumber(p),
	      XML_ErrorString(XML_GetErrorCode(p)));
      exit(-1);
    }

    if (done)
      break;
  }
  return 0;
}  /* End of main */

