I finally finished the little program i'd been working on since last year. It's nothing too great or useful, but i'm proud of it. It's the first thing i really program on my own (i.e. without a book by my side telling me exactly what to do and only letting me change the font size), so don't burst my bubble for a while.
Anywho, here it is (in case anyone cares): a little program which compares two DNA sequences and finds any mutations in them:
Questions / comments / suggestions / corrections / dinner invitations?
Anywho, here it is (in case anyone cares): a little program which compares two DNA sequences and finds any mutations in them:
Code:
print "\n\nWelcome.\n\nWe will compare two DNA sequences and analyze any mutations.\n\nPlease choose a sequence length (in multiples of 3): ";
$length=<STDIN>;
while ($length%3!=0)
{
print "That is not a multiple of 3. Please choose another sequence length: ";
$length=<STDIN>;
}
print "\nVery well. Now write the first sequence.\n";
$nt=1;
while ($nt<=$length)
{
print "Nucleotide $nt: ";
$seq1[$nt]=<STDIN>;
if (($seq1[$nt]=~"A")||($seq1[$nt]=~"a"))
{
$seq1[$nt]='A';
$nt++;
}
elsif (($seq1[$nt]=~"C")||($seq1[$nt]=~"c"))
{
$seq1[$nt]='C';
$nt++;
}
elsif (($seq1[$nt]=~"G")||($seq1[$nt]=~"g"))
{
$seq1[$nt]='G';
$nt++;
}
elsif (($seq1[$nt]=~"T")||($seq1[$nt]=~"t"))
{
$seq1[$nt]='T';
$nt++;
}
elsif (($seq1[$nt]=~"U")||($seq1[$nt]=~"u"))
{
print "Uracil only exists in RNA. We are working with DNA.\n";
}
else
{
print "That nucleotide doesn't exist. Please write A, C, G or T.\n";
}
}
print "\nNow write the second sequence.\n";
$nt=1;
while ($nt<=$length)
{
print "Nucleotide $nt: ";
$seq2[$nt]=<STDIN>;
if (($seq2[$nt]=~"A")||($seq2[$nt]=~"a"))
{
$seq2[$nt]='A';
$nt++;
}
elsif (($seq2[$nt]=~"C")||($seq2[$nt]=~"c"))
{
$seq2[$nt]='C';
$nt++;
}
elsif (($seq2[$nt]=~"G")||($seq2[$nt]=~"g"))
{
$seq2[$nt]='G';
$nt++;
}
elsif (($seq2[$nt]=~"T")||($seq2[$nt]=~"t"))
{
$seq2[$nt]='T';
$nt++;
}
elsif (($seq2[$nt]=~"U")||($seq2[$nt]=~"u"))
{
print "Uracil only exists in RNA. We are working with DNA.\n";
}
else
{
print "That nucleotide doesn't exist. Please write A, C, G or T.\n";
}
}
print "\nVerification:\n\nSequence 1: ";
$nt=1;
while ($nt<=$length)
{
print ("$seq1[$nt]");
$nt++;
}
$nt=1;
print ("\nSequence 2: ");
while ($nt<=$length)
{
print ("$seq2[$nt]");
$nt++;
}
print "\n\nIs this correct? (y/n) ";
$yes=<STDIN>;
while (($yes!=~"y")&&($yes!=~"Y")&&($yes!=~"n")&&($yes!=~"N"))
{
print ("Please state whether these sequences are correct (y/n): ");
$yes=<STDIN>;
}
while (($yes=~"n")||($yes=~"N"))
{
print "\nWrite the first sequence again.\n";
$nt=1;
while ($nt<=$length)
{
print "Nucleotide $nt: ";
$seq1[$nt]=<STDIN>;
if (($seq1[$nt]=~"A")||($seq1[$nt]=~"a"))
{
$seq1[$nt]="A";
$nt++;
}
elsif (($seq1[$nt]=~"C")||($seq1[$nt]=~"c"))
{
$seq1[$nt]="C";
$nt++;
}
elsif (($seq1[$nt]=~"G")||($seq1[$nt]=~"g"))
{
$seq1[$nt]="G";
$nt++;
}
elsif (($seq1[$nt]=~"T")||($seq1[$nt]=~"t"))
{
$seq1[$nt]="T";
$nt++;
}
elsif (($seq1[$nt]=~"U")||($seq1[$nt]=~"u"))
{
print "Uracil only exists in RNA. We are working with DNA.\n";
}
else
{
print "That nucleotide doesn't exist. Please write A, C, G or T.\n";
}
}
print "\nNow write the second sequence.\n";
$nt=1;
while ($nt<=$length)
{
print "Nucleotide $nt: ";
$seq2[$nt]=<STDIN>;
if (($seq2[$nt]=~"A")||($seq2[$nt]=~"a"))
{
$seq2[$nt]="A";
$nt++;
}
elsif (($seq2[$nt]=~"C")||($seq2[$nt]=~"c"))
{
$seq2[$nt]="C";
$nt++;
}
elsif (($seq2[$nt]=~"G")||($seq2[$nt]=~"g"))
{
$seq2[$nt]="G";
$nt++;
}
elsif (($seq2[$nt]=~"T")||($seq2[$nt]=~"t"))
{
$seq2[$nt]="T";
$nt++;
}
elsif (($seq2[$nt]=~"U")||($seq2[$nt]=~"u"))
{
print "Uracil only exists in RNA. We are working with DNA.\n";
}
else
{
print "That nucleotide doesn't exist. Please write A, C, G or T.\n";
}
}
print "\nVerification:\n\nSequence 1: ";
$nt=1;
while ($nt<=$length)
{
print ("$seq1[$nt]");
$nt++;
}
$nt=1;
print ("\nSequence 2: ");
while ($nt<=$length)
{
print ("$seq2[$nt]");
$nt++;
}
print "\n\nIs this correct? (y/n) ";
$yes=<STDIN>;
}
if (($yes=~"y")||($yes=~"Y"))
{
print "\nProcessing...\n\n";
$identical=1;
$nt=1;
while ($nt<=$length)
{
if ($seq1[$nt] eq $seq2[$nt])
{
$mutation[$nt]=0;
}
else
{
$mutation[$nt]=1;
$identical=0;
}
$nt++;
}
$nt=1;
$aa=1;
while ($nt<=$length)
{
if (($seq1[$nt] eq "G")&&($seq1[$nt+1] eq "C"))
{
$aa1[$aa]="alanine";
}
elsif (($seq1[$nt] eq "T")&&($seq1[$nt+1] eq "G")&&($seq1[$nt+2] ne "T"))
{
$aa1[$aa]="cysteine";
}
elsif (($seq1[$nt] eq "T")&&($seq1[$nt+1] eq "G")&&($seq1[$nt+2] ne "C"))
{
$aa1[$aa]="cysteine";
}
elsif (($seq1[$nt] eq "G")&&($seq1[$nt+1] eq "A")&&($seq1[$nt+2] ne "C"))
{
$aa1[$aa]="aspartate";
}
elsif (($seq1[$nt] eq "G")&&($seq1[$nt+1] eq "A")&&($seq1[$nt+2] ne "T"))
{
$aa1[$aa]="aspartate";
}
elsif (($seq1[$nt] eq "G")&&($seq1[$nt+1] eq "A")&&($seq1[$nt+2] ne "G"))
{
$aa1[$aa]="glutamate";
}
elsif (($seq1[$nt] eq "G")&&($seq1[$nt+1] eq "A")&&($seq1[$nt+2] ne "A"))
{
$aa1[$aa]="glutamate";
}
elsif (($seq1[$nt] eq "T")&&($seq1[$nt+1] eq "T")&&($seq1[$nt+2] ne "C"))
{
$aa1[$aa]="phenylalanine";
}
elsif (($seq1[$nt] eq "T")&&($seq1[$nt+1] eq "T")&&($seq1[$nt+2] ne "T"))
{
$aa1[$aa]="phenylalanine";
}
elsif (($seq1[$nt] eq "G")&&($seq1[$nt+1] eq "G"))
{
$aa1[$aa]="glycine";
}
elsif (($seq1[$nt] eq "C")&&($seq1[$nt+1] eq "A")&&($seq1[$nt+2] ne "C"))
{
$aa1[$aa]="histidine";
}
elsif (($seq1[$nt] eq "C")&&($seq1[$nt+1] eq "A")&&($seq1[$nt+2] ne "T"))
{
$aa1[$aa]="histidine";
}
elsif (($seq1[$nt] eq "A")&&($seq1[$nt+1] eq "T")&&($seq1[$nt+2] ne "G"))
{
$aa1[$aa]="isoleucine";
}
elsif (($seq1[$nt] eq "A")&&($seq1[$nt+1] eq "A")&&($seq1[$nt+2] eq "A"))
{
$aa1[$aa]="lysine";
}
elsif (($seq1[$nt] eq "A")&&($seq1[$nt+1] eq "A")&&($seq1[$nt+2] eq "G"))
{
$aa1[$aa]="lysine";
}
elsif (($seq1[$nt] eq "T")&&($seq1[$nt+1] eq "T")&&($seq1[$nt+2] eq "G"))
{
$aa1[$aa]="leucine";
}
elsif (($seq1[$nt] eq "T")&&($seq1[$nt+1] eq "T")&&($seq1[$nt+2] eq "A"))
{
$aa1[$aa]="leucine";
}
elsif (($seq1[$nt] eq "C")&&($seq1[$nt+1] eq "T"))
{
$aa1[$aa]="leucine";
}
elsif (($seq1[$nt] eq "A")&&($seq1[$nt+1] eq "T")&&($seq1[$nt+2] eq "G"))
{
$aa1[$aa]="methionine";
}
elsif (($seq1[$nt] eq "A")&&($seq1[$nt+1] eq "A")&&($seq1[$nt+2] eq "C"))
{
$aa1[$aa]="asparagine";
}
elsif (($seq1[$nt] eq "A")&&($seq1[$nt+1] eq "A")&&($seq1[$nt+2] eq "T"))
{
$aa1[$aa]="asparagine";
}
elsif (($seq1[$nt] eq "C")&&($seq1[$nt+1] eq "C"))
{
$aa1[$aa]="proline";
}
elsif (($seq1[$nt] eq "G")&&($seq1[$nt+1] eq "A")&&($seq1[$nt+2] eq "G"))
{
$aa1[$aa]="glutamine";
}
elsif (($seq1[$nt] eq "G")&&($seq1[$nt+1] eq "A")&&($seq1[$nt+2] eq "A"))
{
$aa1[$aa]="glutamine";
}
elsif (($seq1[$nt] eq "A")&&($seq1[$nt+1] eq "G")&&($seq1[$nt+2] eq "G"))
{
$aa1[$aa]="arginine";
}
elsif (($seq1[$nt] eq "A")&&($seq1[$nt+1] eq "G")&&($seq1[$nt+2] eq "A"))
{
$aa1[$aa]="arginine";
}
elsif (($seq1[$nt] eq "C")&&($seq1[$nt+1] eq "G"))
{
$aa1[$aa]="arginine";
}
elsif (($seq1[$nt] eq "A")&&($seq1[$nt+1] eq "G")&&($seq1[$nt+2] eq "T"))
{
$aa1[$aa]="serine";
}
elsif (($seq1[$nt] eq "A")&&($seq1[$nt+1] eq "G")&&($seq1[$nt+2] eq "C"))
{
$aa1[$aa]="serine";
}
elsif (($seq1[$nt] eq "T")&&($seq1[$nt+1] eq "C"))
{
$aa1[$aa]="serine";
}
elsif (($seq1[$nt] eq "A")&&($seq1[$nt+1] eq "C"))
{
$aa1[$aa]="threonine";
}
elsif (($seq1[$nt] eq "G")&&($seq1[$nt+1] eq "T"))
{
$aa1[$aa]="valine";
}
elsif (($seq1[$nt] eq "T")&&($seq1[$nt+1] eq "G")&&($seq1[$nt+2] eq "G"))
{
$aa1[$aa]="tryptophan";
}
elsif (($seq1[$nt] eq "T")&&($seq1[$nt+1] eq "A")&&($seq1[$nt+2] eq "T"))
{
$aa1[$aa]="tyrosine";
}
elsif (($seq1[$nt] eq "T")&&($seq1[$nt+1] eq "A")&&($seq1[$nt+2] eq "C"))
{
$aa1[$aa]="tyrosine";
}
elsif (($seq1[$nt] eq "T")&&($seq1[$nt+1] eq "A")&&($seq1[$nt+2] eq "A"))
{
$aa1[$aa]="a stop codon";
}
elsif (($seq1[$nt] eq "T")&&($seq1[$nt+1] eq "G")&&($seq1[$nt+2] eq "A"))
{
$aa1[$aa]="a stop codon";
}
elsif (($seq1[$nt] eq "T")&&($seq1[$nt+1] eq "A")&&($seq1[$nt+2] eq "G"))
{
$aa1[$aa]="a stop codon";
}
$nt+=3;
$aa++;
}
$nt=1;
$aa=1;
while ($nt<=$length)
{
if (($seq2[$nt] eq "G")&&($seq2[$nt+1] eq "C"))
{
$aa2[$aa]="alanine";
}
elsif (($seq2[$nt] eq "T")&&($seq2[$nt+1] eq "G")&&($seq2[$nt+2] ne "T"))
{
$aa2[$aa]="cysteine";
}
elsif (($seq2[$nt] eq "T")&&($seq2[$nt+1] eq "G")&&($seq2[$nt+2] ne "C"))
{
$aa2[$aa]="cysteine";
}
elsif (($seq2[$nt] eq "G")&&($seq2[$nt+1] eq "A")&&($seq2[$nt+2] ne "C"))
{
$aa2[$aa]="aspartate";
}
elsif (($seq2[$nt] eq "G")&&($seq2[$nt+1] eq "A")&&($seq2[$nt+2] ne "T"))
{
$aa2[$aa]="aspartate";
}
elsif (($seq2[$nt] eq "G")&&($seq2[$nt+1] eq "A")&&($seq2[$nt+2] ne "G"))
{
$aa2[$aa]="glutamate";
}
elsif (($seq2[$nt] eq "G")&&($seq2[$nt+1] eq "A")&&($seq2[$nt+2] ne "A"))
{
$aa2[$aa]="glutamate";
}
elsif (($seq2[$nt] eq "T")&&($seq2[$nt+1] eq "T")&&($seq2[$nt+2] ne "C"))
{
$aa2[$aa]="phenylalanine";
}
elsif (($seq2[$nt] eq "T")&&($seq2[$nt+1] eq "T")&&($seq2[$nt+2] ne "T"))
{
$aa2[$aa]="phenylalanine";
}
elsif (($seq2[$nt] eq "G")&&($seq2[$nt+1] eq "G"))
{
$aa2[$aa]="glycine";
}
elsif (($seq2[$nt] eq "C")&&($seq2[$nt+1] eq "A")&&($seq2[$nt+2] ne "C"))
{
$aa2[$aa]="histidine";
}
elsif (($seq2[$nt] eq "C")&&($seq2[$nt+1] eq "A")&&($seq2[$nt+2] ne "T"))
{
$aa2[$aa]="histidine";
}
elsif (($seq2[$nt] eq "A")&&($seq2[$nt+1] eq "T")&&($seq2[$nt+2] ne "G"))
{
$aa2[$aa]="isoleucine";
}
elsif (($seq2[$nt] eq "A")&&($seq2[$nt+1] eq "A")&&($seq2[$nt+2] eq "A"))
{
$aa2[$aa]="lysine";
}
elsif (($seq2[$nt] eq "A")&&($seq2[$nt+1] eq "A")&&($seq2[$nt+2] eq "G"))
{
$aa2[$aa]="lysine";
}
elsif (($seq2[$nt] eq "T")&&($seq2[$nt+1] eq "T")&&($seq2[$nt+2] eq "G"))
{
$aa2[$aa]="leucine";
}
elsif (($seq2[$nt] eq "T")&&($seq2[$nt+1] eq "T")&&($seq2[$nt+2] eq "A"))
{
$aa2[$aa]="leucine";
}
elsif (($seq2[$nt] eq "C")&&($seq2[$nt+1] eq "T"))
{
$aa2[$aa]="leucine";
}
elsif (($seq2[$nt] eq "A")&&($seq2[$nt+1] eq "T")&&($seq2[$nt+2] eq "G"))
{
$aa2[$aa]="methionine";
}
elsif (($seq2[$nt] eq "A")&&($seq2[$nt+1] eq "A")&&($seq2[$nt+2] eq "C"))
{
$aa2[$aa]="asparagine";
}
elsif (($seq2[$nt] eq "A")&&($seq2[$nt+1] eq "A")&&($seq2[$nt+2] eq "T"))
{
$aa2[$aa]="asparagine";
}
elsif (($seq2[$nt] eq "C")&&($seq2[$nt+1] eq "C"))
{
$aa2[$aa]="proline";
}
elsif (($seq2[$nt] eq "G")&&($seq2[$nt+1] eq "A")&&($seq2[$nt+2] eq "G"))
{
$aa2[$aa]="glutamine";
}
elsif (($seq2[$nt] eq "G")&&($seq2[$nt+1] eq "A")&&($seq2[$nt+2] eq "A"))
{
$aa2[$aa]="glutamine";
}
elsif (($seq2[$nt] eq "A")&&($seq2[$nt+1] eq "G")&&($seq2[$nt+2] eq "G"))
{
$aa2[$aa]="arginine";
}
elsif (($seq2[$nt] eq "A")&&($seq2[$nt+1] eq "G")&&($seq2[$nt+2] eq "A"))
{
$aa2[$aa]="arginine";
}
elsif (($seq2[$nt] eq "C")&&($seq2[$nt+1] eq "G"))
{
$aa2[$aa]="arginine";
}
elsif (($seq2[$nt] eq "A")&&($seq2[$nt+1] eq "G")&&($seq2[$nt+2] eq "T"))
{
$aa2[$aa]="serine";
}
elsif (($seq2[$nt] eq "A")&&($seq2[$nt+1] eq "G")&&($seq2[$nt+2] eq "C"))
{
$aa2[$aa]="serine";
}
elsif (($seq2[$nt] eq "T")&&($seq2[$nt+1] eq "C"))
{
$aa2[$aa]="serine";
}
elsif (($seq2[$nt] eq "A")&&($seq2[$nt+1] eq "C"))
{
$aa2[$aa]="threonine";
}
elsif (($seq2[$nt] eq "G")&&($seq2[$nt+1] eq "T"))
{
$aa2[$aa]="valine";
}
elsif (($seq2[$nt] eq "T")&&($seq2[$nt+1] eq "G")&&($seq2[$nt+2] eq "G"))
{
$aa2[$aa]="tryptophan";
}
elsif (($seq2[$nt] eq "T")&&($seq2[$nt+1] eq "A")&&($seq2[$nt+2] eq "T"))
{
$aa2[$aa]="tyrosine";
}
elsif (($seq2[$nt] eq "T")&&($seq2[$nt+1] eq "A")&&($seq2[$nt+2] eq "C"))
{
$aa2[$aa]="tyrosine";
}
elsif (($seq2[$nt] eq "T")&&($seq2[$nt+1] eq "A")&&($seq2[$nt+2] eq "A"))
{
$aa2[$aa]="a stop codon";
}
elsif (($seq2[$nt] eq "T")&&($seq2[$nt+1] eq "G")&&($seq2[$nt+2] eq "A"))
{
$aa2[$aa]="a stop codon";
}
elsif (($seq2[$nt] eq "T")&&($seq2[$nt+1] eq "A")&&($seq2[$nt+2] eq "G"))
{
$aa2[$aa]="a stop codon";
}
$nt+=3;
$aa++;
}
$nt=1;
$aa=1;
if ($identical==1)
{
print "The two sequences are identical.\n\n";
}
else
{
$mutationcounter=0;
while ($nt<=$length)
{
$mutationcounter=0;
if ($mutation[$nt]==1)
{
$mutationcounter++;
$aa=($nt+2)/3;
}
if ($mutation[$nt+1]==1)
{
$mutationcounter++;
$aa=($nt+2)/3;
}
if ($mutation[$nt+2]==1)
{
$mutationcounter++;
$aa=($nt+2)/3;
}
if ($mutationcounter==1)
{
print "There is one ";
if ($aa1[$aa] eq $aa2[$aa])
{
print "synonymous mutation in codon $aa.\n";
}
else
{
print "mutation in codon $aa which causes $aa2[$aa] to substitute $aa1[$aa].\n";
}
}
elsif ($mutationcounter==2)
{
print "There are two ";
if ($aa1[$aa] eq $aa2[$aa])
{
print "synonymous mutations in codon $aa.\n";
}
else
{
print "mutations in codon $aa which cause $aa2[$aa] to substitute $aa1[$aa].\n";
}
}
elsif ($mutationcounter==3)
{
print "There are three mutations in codon $aa which cause $aa2[$aa] to substitute $aa1[$aa].\n";
}
$nt+=3;
}
print "\n";
}
}