#!/usr/bin/perl -w
#
#  $Id: parseClstr.pl,v 1.1 2008/05/19 20:41:50 mprice Exp $
#  fastHmm/fastBlast Alignment Tools
#  http://microbesonline.org/fasthmm (fasthmm@microbesonline.org)
#
#  Convert cd-hit output to tab-delimited form with exemplar as first column
#  and all members (including exemplar) as other entries in the row
#
#  Copyright (C) 2007 The Regents of the University of California
#  All rights reserved.
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
#  Disclaimer
#
#  NEITHER THE UNITED STATES NOR THE UNITED STATES DEPARTMENT OF ENERGY,
#  NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED,
#  OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
#  COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
#  OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE
#  PRIVATELY OWNED RIGHTS.
#

use strict;
die "Run parseClstr.pl as a filter" if @ARGV != 0;

my $outLine = undef;
my @before = ();
my $exemplar = undef;
while(<STDIN>)
{
    if ( /^>/ )
    {
	print $outLine, "\n" if ( defined($outLine) );
	$outLine = "";
	@before = ();
	$exemplar = undef;
    } elsif ( /^\d+\s+\d+aa,\s+>(\S+\.\d+\.\d+)\.+\s+(.+)/ ) {
	my $seqId = $1;
	my $star = $2;
	if ( $star eq '*' ) {
	    $outLine = "$seqId\t$seqId";
	    if ( scalar( @before ) > 0 )
	    {
		$outLine .= "\t" . join( "\t", @before );
		@before = ();
	    }
	    $exemplar = $seqId;
	} elsif ( defined($exemplar) ) {
	    $outLine .= "\t$seqId";
	} else {
	    push( @before, $seqId );
	}
    }
}
print $outLine, "\n" if defined $outLine;
