#!/usr/bin/perl -w
############################################
# Ian M. Hayhurst 19 December 2003
#
# Make a fasta file from all the 'embl like'
# Derwent .DAT files in a directory
#
# no doubt there are prettier ways but I've
# had enough of this one...
# and it's almost Christmas ;-)
# I'm Sure I borrowed an idea from David Starks-Browning here,
# only I cant remember what
############################################
$usage=
"Usage: derwent2fasta.pl [path to Derwent DAT files]\n";
@ARGV ==
1 or die "$usage";
#print "do stuff with $ARGV[0]\n";
$local_incoming_directory =
"$ARGV[0]";
$db_name =
"AAGENESEQ";
chdir($local_incoming_directory);
opendir(DIRECTORY,
$local_incoming_directory)
or die "I Can't open the $local_incoming_directory directory.\n";
@dirdatfilelist =
(readdir (DIRECTORY
)) ;
closedir DIRECTORY;
#remove '.' and '..' from head of dirlisting to create array of datfiles @datfilelist
@datfilelisttmp =
splice(@dirdatfilelist,
2, $
#dirdatfilelist);
@datfilelist =
sort @datfilelisttmp;
open (FASTA ,
">GENESEQP.fasta") or die ("I cant open your output file");
$ID=
"";
$AC=
"";
$DE=
"";
$buffer=
"";
foreach $element (@datfilelist) {
if ($element =~
m/.DAT/
){
open (ELEMENT,
"$element");
while (<ELEMENT>){
chomp;
if (/^ID/
) {$ID=
$_;
$ID=~
s/^ID\
s+//;
$ID=~ /
(\w+
)/;
$ID=$
1;
} #Just the AC No in ID
if (/^AC/
) {$AC=
$_;
$AC=~
s/^AC\
s+//;
$AC=~
s/;//;
} #no semicolon thanks
if (/^DE/ ... /^XX/
) {
if (/^XX/
){next;
}
$DE=
$_;
$DE=~
s/^DE\
s+//;
$buffer =
$buffer .
$DE;
next;
}
if (($ID ne "") &&
($AC ne "") &&
($DE ne "")) { #When you've got all three print em
print FASTA
">$db_name:$AC $ID $buffer\n";
$ID=
"";
#Reset Vars for next record
$AC=
"";
$DE=
"";
$buffer=
"";
}
if (/^SQ/ ... /^\/\//
) { #get the lines that dont start with SQ
if (/^\/\//
){next;
} #between SQ and //
if (! /^SQ/
) {$seq=
$_;
$seq=~
s/
[^A-Za-z
]//g;
#concat the spaces and loose the numerals
print FASTA
"$seq\n";
}
}
}
close (ELEMENT
);
#unlink ("$element"); #blow away the source DAT file if you want
}
}
close (FASTA
);
Page was generated in 0.2960 seconds