#!/usr/bin/perl # Stacia Wyman, 5 Jan 05, Winter Study CS011 # HW_1_1 # # The following code allows you to read all the file names from a particular # directory into an array. Note that you can use variable names in the double # quotes around the name of the directory to be opened. $prefix = "/home/faculty/stacia/ws05/data/metazoa/"; opendir(METAZOA,"$prefix") || die "Couldn't open dir"; @ALL_FILES = readdir(METAZOA); closedir(METAZOA); foreach $file (@ALL_FILES){ chomp($file); # I wasn't sure if this was needed (it wasn't), # but it doesn't hurt to have it. if ($file =~ /\.gbk$/) { # make sure it's a GenBank file $filename = $prefix.$file; # $file doesn't contain path, so add it open(GBFILE,$filename) || die "Couldn't open $!"; while (){ if(/^DEFINITION\s+(.+) mitochon/){ print "$1\n"; } } close(GBFILE); } } close(OUTFILE); #!/usr/bin/perl # Stacia Wyman, 5 Jan 05, Winter Study CS011 # HW_1_2 # # Very similar to number one, but we also record coordinates whenever we # see: gene num..num # Then, as soon as we see /gene="COX1", we output the saved coords. $prefix = "/home/faculty/stacia/ws05/data/metazoa/"; opendir(METAZOA,"$prefix") || die "Couldn't open dir"; @ALL_FILES = readdir(METAZOA); closedir(METAZOA); # I chose to use coordstr as boolean flag which is 1 when we've # read coordinates, *except* if we just printed out COX1's coordinates, # then it is zero so it won't print out the coordinates twice for each time # it sees the text /gene="COX1". There are many different and correct ways to # do this. $coordstr = 0; foreach $file (@ALL_FILES){ chomp($file); if ($file =~ /\.gbk$/) { $filename = $prefix.$file; open(GBFILE,$filename) || die "Couldn't open $!"; while (){ if(/^DEFINITION\s+(.+) mitochon/){ print "$1, "; } if(/gene\s+(\d+\.\.\d+)/){ # have to escape the . here $coordstr = $1; } if (/gene="COX1"/ && $coordstr){ print "$coordstr\n"; $coordstr = 0; } } close(GBFILE); } } close(OUTFILE);