#!/usr/bin/env sage -python # # The Needleman-Wunsch algorithm for biological sequence alignment. # # Stephen Forrest, http://wandership.ca/ import sys from sage.all import * # similarity matrix # A G C T # A 10 -1 -3 -4 # G -1 7 -5 -3 # C -3 -5 9 0 # T -4 -3 0 8 SM = { ("A","A"): 10, ("A","G"): -1, ("A","C"): -3, ("A","T"): -4, ("G","A"): -1, ("G","G"): 7, ("G","C"): -5, ("G","T"): -3, ("C","A"): -3, ("C","G"): -5, ("C","C"): 9, ("C","T"): 0, ("T","A"): -4, ("T","G"): -3, ("T","C"): 0, ("T","T"): 8 } gap = -5 def NeedlemanWunsch (sequence, reference): rows = len(reference) cols = len(sequence) a = matrix(rows, cols, [0 for i in range(rows*cols)]) for i in range(1,rows): for j in range(1,cols): choice1 = a[i-1, j-1] + SM[reference[i], sequence[j]] choice2 = a[i-1, j] + gap choice3 = a[i, j-1] + gap a[i,j] = max(choice1, choice2, choice3) ref = [] seqq = [] i = rows-1 j = cols-1 while (i>0) and (j>0): score = a[i,j] score_diag = a[i-1, j-1] score_up = a[i, j-1] score_left = a[i-1, j] if (score == score_diag + SM[reference[i], sequence[j]]): ref.append( reference[i] ) seqq.append( sequence[j] ) i = i-1 j = j-1 elif (score == score_left + gap): ref.append( reference[i] ) seqq.append( "-" ) i = i-1 elif (score == score_up + gap): ref.append( "-" ) seqq.append( sequence[j] ) j = j-1 while (i > 0): ref.append( reference[i] ) seqq.append( "-" ) i = i-1 while (j > 0): ref.append( "-" ) seqq.append( sequence[j] ) j = j-1 seqq.reverse() ref.reverse() return( [ "".join(seqq), "".join(ref) ] ) print NeedlemanWunsch("AGCTAGCT", "AGCAGCT") s1 = "ACAACCGCTATGTATTTCGTACATTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCACCTGTAGTACATAAAAACCCAACCCACATCAAAACCCCCTCCCCATGCTTACAAGCAAGTACAGCAATCAACCCTCAACTATCACACATCAACTGCAACTCCAAAGCCACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTTAACAGTACATAGTACATAAAGCCATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGACCCCCCTCAGATAGGGGTCCC" crs = "ACAACCGCTATGTATTTCGTACATTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCACCTGTAGTACATAAAAACCCAATCCACATCAAAACCCCCTCCCCATGCTTACAAGCAAGTACAGCAATCAACCCTCAACTATCACACATCAACTGCAACTCCAAAGCCACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTTAACAGTACATAGTACATAAAGCCATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGACCCCCCTCAGATAGGGGTCCC" print NeedlemanWunsch( s1, crs )