#!/usr/bin/perl # This script reads in a nt file in fasta format, concatenates # it all into the variable $str and then extracts out "genes" # by finding ATG and then the next in-frame TAA while (<>){ if (/^>/) { next; } chomp; $str = $str.$_; } while ($str =~ /(ATG([ACGT]{3})+?TAA)/g){ print "$1\n"; }