WibbleWobbleWah : derwent2fasta

home :: about :: gallery :: linux :: links :: other :: recent changes :: contact
#!/usr/bin/perl -w
############################################
# Ian M. Hayhurst 19 December 2003
#
# Make a fasta file from all the 'embl like'
# Derwent .DAT files in a directory
#
#  no doubt there are prettier ways but I've
# had enough of this one...   
#        and it's almost Christmas ;-)
# I'm Sure I borrowed an idea from David Starks-Browning here,
# only I cant remember what
############################################

$usage="Usage: derwent2fasta.pl [path to Derwent DAT files]\n";
@ARGV == 1 or die "$usage";
#print "do stuff with $ARGV[0]\n";
$local_incoming_directory = "$ARGV[0]";
$db_name ="AAGENESEQ";
chdir($local_incoming_directory);
opendir(DIRECTORY, $local_incoming_directory)
        or die "I Can't open the $local_incoming_directory directory.\n";

        @dirdatfilelist = (readdir (DIRECTORY)) ;
        closedir DIRECTORY;
#remove '.' and '..' from head of dirlisting to create array of datfiles @datfilelist
        @datfilelisttmp = splice(@dirdatfilelist, 2, $#dirdatfilelist);
        @datfilelist = sort @datfilelisttmp;
open (FASTA , ">GENESEQP.fasta") or die ("I cant open your output file");
$ID=""; $AC=""; $DE=""; $buffer="";
foreach $element (@datfilelist) {

    if ($element  =~ m/.DAT/){
        open (ELEMENT, "$element");
        while (<ELEMENT>){
       
            chomp;
            if (/^ID/) {$ID=$_; $ID=~ s/^ID\s+//; $ID=~ /(\w+)/; $ID=$1;}     #Just the AC No in ID
            if (/^AC/) {$AC=$_; $AC=~ s/^AC\s+//; $AC=~ s/;//;}        #no semicolon thanks
            if (/^DE/ ... /^XX/) {
                if (/^XX/){next;}
                $DE=$_;
                $DE=~ s/^DE\s+//;
                $buffer = $buffer . $DE;
                next;
                }
            if (($ID ne "") && ($AC ne "") && ($DE ne "")) {        #When you've got all three print em
                print FASTA">$db_name:$AC $ID $buffer\n";
                $ID="";                            #Reset Vars for next record
                $AC="";
                $DE="";
                $buffer="";
                }   
           
                if (/^SQ/ ... /^\/\//) {                #get the lines that dont start with SQ                   
                    if (/^\/\//){next;}                #between SQ and //           
                    if (! /^SQ/) {$seq=$_; $seq=~ s/[^A-Za-z]//g;   #concat the spaces and loose the numerals
                            print FASTA "$seq\n";}
                               
                    }
           
           
               
            }
        close (ELEMENT);
        #unlink ("$element");                            #blow away the source DAT file if you want
        }
   
    }

close (FASTA);
Powered by 1.1.6.1
Page was generated in 0.2960 seconds