#!/usr/local/bin/perl -s
# use to translate Agricola dumps to AAtDB .ace file
# Modified Thu May 11 21:59:47 EDT 1995 JWMorris/AAtDB
# Tweeked JWMorris ACE97 conference 4.Aug.97

if (defined ($u)){
    die "Usage: $ [-u] [-start=aaaaa] agricola_ref_file\n";
}

if (defined($start)){
    $aatdb=$start;
}
    else{$aatdb="aaaaa";}
$current = "skip";
$line0= 'start';
open (JOURNAL, ">journal.names");
open (GENBANK, ">genbank.acc");
while (<ARGV>){
    chomp;
    if($_=~/^AN: /){
	$tmp =$';
	&prnt unless ($count < 1);
	$accessn=$tmp; &accessn;
	$aatdb++;
	$count++;
	$clean;
	
	}
    if ($_=~/^[A-Z][A-Z]: |^[A-Z][A-Z][A-Z]: /){
	if ($current ne "skip"){
	    &$current;
	    $current = "skip";
	}
    }
    if ($current ne "skip"){
# If you want your Abstracts as LongText, consider uncommenting the
# following lines, in order to keep the linebreaks.

#	if ($current eq "ab"){$$current = $$current."$_\n";}
#	else{
	    $$current = "$$current $_";
#	}
	
    }
    if($_=~/^AU: /){$au=$'; $current = "au";}
    if($_=~/^TI: /){$ti=$'; $current = "ti";}
    if($_=~/^SO: /){$so=$'; $current = "so";}
    if($_=~/^PY: /){$year=$';}
    if($_=~/^PT: /){$type=$';}
    if($_=~/^DE: /){$key1=$'; &key1;}
    if($_=~/^ID: /){$key2=$'; &key2;}
    if($_=~/^AB: /){$ab=$'; $current = "ab";}

}

&prnt;

# subroutines that do all the work

sub clean{
    $current="skip";
    $au= $ti=$so= $year= $type= $key1=$key2=$abstr= '';
    $journal=$volume=$pages='';
    @key1=@key2=@au='';
}

sub accessn{
    $accessn=~s/ //;

    }

sub ti {
    $ti =~ s/\"//g;
#    print "$ti\n";
}

sub ab {
    chomp $ab;
    $abstr = $ab;
    $abstr =~ s/\"//g;
#    print "$abstr\n";
}    

sub key1 {

    $key1 =~ tr/[A-Z]/[a-z]/;
    $key1 =~ s/arabidopsis//g;
    $key1 =~ s/thaliana//g;
    $key1 =~ s/-\./\./g;
    $key1 =~ s/-/ /g;
    $key1 =~ s/\.$//;
    @key1 = split(/\. /, $key1);
    foreach $word (@key1){
    $word =~ s/^ //;
}
#		 print "@key1\n\n"; 
}    

sub au {
    $au =~ s/([A-Z])\.([A-Z])\.([A-Z])/\1. \2. \3/g;
    $au =~ s/([A-Z])\.([A-Z])/\1. \2/g;
    $au =~ s/,-/, /g;
    @au = split (/; /, $au);
#    print "@au\n\n";
}

sub key2 {
    $key2 =~ tr/[A-Z]/[a-z]/;
    $key2 =~ s/-\./\./g;
    $key2 =~ s/-/ /g;
    $key2 =~ s/\.$//;
    $key2 =~ s/^ //;
    @key2 = split(/\. /, $key2);
    foreach $word (@key2){
    $word =~ s/^ //;
}
#    print "@key2\n\n"; 
}    


sub so {
#    $so1 = $so;
#    $so1 =~ s/\.$//;

#    if($so1 =~/\. /){

#	$journal = $`;
#	$so1 = $';
#    }
#    if($so1 =~/v\. /){	$so1 = $';}
#    if($so1 =~/ /){
#	$volume = $`;
#	$so1 = $';
#    } 
#    if ($so1 =~/p\. /){$pages = $';}
#    $pages =~s/-/ /;
#    print "debug SO is $so\n";
    if($so =~ /^([a-zA-Z\-]+)\. .+ : .+ v. (.+) .+ p. (.+)\.$/){
	$journal=$1;
	$volume=$2;
	$pages=$3;
    }
#print "-2 debug $journal\n$volume\n$pages\n\n";
}

sub atnum {
    ($atname, $other) = split(/,/,@au[0],2);
    $atname =~tr/[A-Z]/[a-z]/;
    if (length $atname < 5){$atname = $atname.'-----';}
    $atname = substr ($atname, 0, 5);
#    print "$atname-$year\n";
}

sub prnt {
    &atnum;
    print "\nPaper : $atname-$year-$aatdb#_#$count\n";
    print "Agricola_ID\t $accessn\n";
    print "Title\t \"$ti\"\n";
    foreach $auth (@au){
	print "Author\t \"$auth\"\n";
    }
    print "Year\t $year\n";
    print "Journal\t \"$journal\"\n";
    print JOURNAL "Journal\t \"$journal\"\n";

    print "Volume\t \"$volume\"\n";
    print "Page\t $pages\n";
    print "Type\t \"$type\"\n";
    foreach $word (@key1){
	print "Keyword\t \"$word\"\n" unless ($word eq '');
    }    
    foreach $word (@key2){
	if ($word =~ /genbank\//){
	    print "Genbank_Accession\t $'\n";
	    print GENBANK "Genbank_Accession\t $'\n";
	    next;
	}
	print "Keyword\t \"$word\"\n"  unless ($word eq '');
    }
    print "Abstract\t \"$abstr\"\n" unless ($abstr eq '');
    
}







