#!/usr/bin/perl -w
#
#  $Id: expandHitsByDomains.pl,v 1.1 2008/09/26 22:03:45 mprice Exp $
#  fastHmm/fastBlast Alignment Tools
#  http://microbesonline.org/fasthmm (fasthmm@microbesonline.org)
#
#  Script for replacing hits between masking sequences to members of those domains
#
#  Copyright (C) 2007 The Regents of the University of California
#  All rights reserved.
#
#  This program is free software; you can redistribute it and/or modify
#  it under the terms of the GNU General Public License as published by
#  the Free Software Foundation; either version 2 of the License, or
#  (at your option) any later version.
#
#  This program is distributed in the hope that it will be useful,
#  but WITHOUT ANY WARRANTY; without even the implied warranty of
#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#  GNU General Public License for more details.
#
#  You should have received a copy of the GNU General Public License along
#  with this program; if not, write to the Free Software Foundation, Inc.,
#  51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
#  Disclaimer
#
#  NEITHER THE UNITED STATES NOR THE UNITED STATES DEPARTMENT OF ENERGY,
#  NOR ANY OF THEIR EMPLOYEES, MAKES ANY WARRANTY, EXPRESS OR IMPLIED,
#  OR ASSUMES ANY LEGAL LIABILITY OR RESPONSIBILITY FOR THE ACCURACY,
#  COMPLETENESS, OR USEFULNESS OF ANY INFORMATION, APPARATUS, PRODUCT,
#  OR PROCESS DISCLOSED, OR REPRESENTS THAT ITS USE WOULD NOT INFRINGE
#  PRIVATELY OWNED RIGHTS.


use strict;
use lib "$ENV{FASTHMM_DIR}/lib";
use FastBLAST;
use Getopt::Long;

{
    my $usage = "expandHitsbyDomains.pl -db FastBLASTDirectory -in input_pairs > expandeddomains\n"
	. "The input file should be tab-delimited with two columns:\n"
	. "The masking domain and the original domain\n"
	. "The database argument should be to a valid FastBLAST database directory\n"
	. "that contains an indexed fb.all.align file with the original domain ids\n"
	. "Writes a new candidates domains file to STDOUT\n"
	. "suitable as input to fastBlastAlignment.pl\n";
    my $db = undef;
    my $infile = undef;
    (GetOptions('db=s' => \$db,
		'in=s' => \$infile)
     && defined $db
     && defined $infile
     && @ARGV==0)
	|| die $usage;

    my %maskers = (); # original domain -> masker -> 1, so we only look each domain once

    open(IN,"<",$infile) || die "Cannot read $infile";
    while(<IN>) {
	chomp;
	my @F = split /\t/, $_;
	die "Cannot parse $_" unless @F >= 2;
	my ($masked,$masker) = @F;
	$maskers{$masked}{$masker} = 1;
    }
    close(IN) || die "Error reading $infile";

    FastBLAST::InitDomains($db);
    foreach my $masked (sort keys %maskers) {
	my $hash = $maskers{$masked};
	if (FastBLAST::HasAlignment($masked)) {
	    my $lines = FastBLAST::FetchAlignment($masked); # fetch lines instead of writing to filehandle
	    foreach my $line (@$lines) {
		my ($oldid,$memberid,$alignseq,$begin,$end,$score,$eval) = split /\t/, $line;
		die "Error reading alignment for domain $masked:\n$line" unless defined $eval;
		
		foreach my $masker (sort keys %$hash) {
		    my $maskernofb = $masker;
		    $maskernofb =~ s/^fb[.]//;
		    print join("\t", $maskernofb, $memberid, $begin, $end)."\n";
		}
	    }
	} else {
	    $masked =~ m/^fb[.](.*)[.](\d+)[.](\d+)$/ || die "Cannot parse domain id $masked";
	    my ($geneid,$beg,$end) = ($1,$2,$3);
	    foreach my $masker (sort keys %$hash) {
		print join("\t","$geneid.$beg.$end", $geneid, 1, $end-$beg+1)."\n";
	    }
	}
    }
    FastBLAST::CleanupDomains();
}




