#!/usr/bin/perl # This script reads genes from a file (one per line) and translates them # to amino acid sequences #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # YOU CAN IGNORE THIS CODE UNTIL THE NEXT LINE OF SQUIGLY LINES # Takes in a genetic code and a nucleotide string and returns the aa string $gcode = 1; open(GCODES,"){ if (/id $gcode/){ $get = 1;} if (($get == 1) && (/ncbieaa\s+\"(.+)\",/)){ $aa_str = $1; } if (($get == 1) && (/sncbieaa\s+\"(.+)\"/)){ $start_codons = $1; } if (($get == 1) && (/Base1\s+(.+)/)){ $base_1 = $1; } if (($get == 1) && (/Base2\s+(.+)/)){ $base_2 = $1; } if (($get == 1) && (/Base3\s+(.+)/)){ $base_3 = $1; last; } } my @aa = $aa_str =~ /[\w\*]/g; my @starts = $start_codons =~ /[\w-]/g; my @base1 = $base_1 =~ /\w/g; my @base2 = $base_2 =~ /\w/g; my @base3 = $base_3 =~ /\w/g; for $i (0..$#aa) { $codon = $base1[$i].$base2[$i].$base3[$i]; $nt_to_aa{$codon} = $aa[$i]; } #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ while (<>){ $nt_string = $_; $aa_string = ""; $place = 0; # place holder in the nucleotide sequence (where we start # translating from) $codon will hold one codon $codon = substr($nt_string,$place,3); while (length($codon) == 3){ $aa_string .= $nt_to_aa{$codon}; $place += 3; $codon = substr($nt_string,$place,3); } print $nt_string,"\n"; $aa_string =~ s/([\w|\*])/\1 /g; print "$aa_string\n"; print "============================================\n"; }