logo [Gallery] [Linux] [Links] [Stuff]


#!/usr/bin/perl -w strict
# dupget.pl
# Checks for Duplicate entries in a fasta file
# Ian M. Hayhurst Jan 08 2003
$usage="Usage: dupget.pl [fasta filename]\n";
@ARGV == 1 or die "$usage";
#print "do stuff with $ARGV[0]\n";

open (FASTA, "$ARGV[0]")
    or die "I cant open $ARGV[0]\n";
   
while (<FASTA>){
    chomp;
    if (/^\>/) {$record{$_}++;}
    }
close (FASTA);

$record_num = scalar keys %record;
print "$ARGV[0]contains $record_num records \n";

foreach $line (keys %record){
    if ($record{$line} >1) {print "$line has $record{$line} entries\n";}
    }
exit;

#!/usr/bin/perl -w strict
# dup-strip.pl
# Checks for Duplicate Description entries in a fasta file
# And strips them out
# Ian M. Hayhurst Jan 08 2003
$usage="Usage: dup-strip.pl [fasta filename]\n";
@ARGV == 1 or die "$usage";
#print "do stuff with $ARGV[0]\n";
open (UNIQUE, ">$ARGV[0].uniq")
    or die "I can't open $ARGV[0].uniq \n";
open (DUPES, ">$ARGV[0].dup")
    or die "I can't open $ARGV[0].dup \n";
open (FASTA, "$ARGV[0]")
    or die "I cant open $ARGV[0]\n";
   
$switch=0; #true if record is a duplicate   
while (<FASTA>){
    #chomp;
    if (/^\>/)    { if (exists($record{$_}))    {$switch=1;}
                    else {$record{$_}++; $switch=0;}
           
            }
           
    if ($switch) {print DUPES;}
    else {print UNIQUE;}
    }
close (FASTA);
close (DUPES);
close (UNIQUE);

#$record_num = scalar keys %record;
#print "$ARGV[0]contains $record_num records \n";

#foreach $line (keys %record){
#    if ($record{$line} >1) {print "$line has $record{$line} entries\n";}
#    }
exit;
Photographs that appear on this site may not be reproduced in any form without the express permission of the author.
All photographs Copyright © 1999-2001 by Ian M. Hayhurst - All Rights Reserved.