# # # The Needleman-Wunsch algorithm for DNA sequence comparison. # # Stephen Forrest, http://wandership.ca/ NeedlemanWunsch := module() local ModuleApply, SM, gap; # similarity matrix # A G C T # A 10 -1 -3 -4 # G -1 7 -5 -3 # C -3 -5 9 0 # T -4 -3 0 8 SM := table([ ("A","A") = 10, ("A","G") = -1, ("A","C") = -3, ("A","T") = -4, ("G","A") = -1, ("G","G") = 7, ("G","C") = -5, ("G","T") = -3, ("C","A") = -3, ("C","G") = -5, ("C","C") = 9, ("C","T") = 0, ("T","A") = -4, ("T","G") = -3, ("T","C") = 0, ("T","T") = 8 ]): gap := -5; ModuleApply := proc(sequence::string, reference::string) local rows, cols, a, i, j, choice1, choice2, choice3, ref, seqq, score, score_diag, score_up, score_left; rows := length(reference); cols := length(sequence); a := Array(0..rows, 0..cols, datatype=integer); # for i from 0 to rows do a[i,0] := 0; end do; # for j from 0 to cols do a[0,j] := 0; end do; for i from 1 to rows do for j from 1 to cols do choice1 := a[i-1, j-1] + SM[reference[i], sequence[j]]; choice2 := a[i-1, j] + gap; choice3 := a[i, j-1] + gap; a[i,j] := max(choice1, choice2, choice3); end do; end do; ref := StringTools:-StringBuffer(); seqq := StringTools:-StringBuffer(); i := length(reference); j := length(sequence); while (i>0) and (j>0) do score := a[i,j]; score_diag := a[i-1, j-1]; score_up := a[i, j-1]; score_left := a[i-1, j]; if (score = score_diag + SM[reference[i], sequence[j]]) then ref:-append( reference[i] ); seqq:-append( sequence[j] ); i := i-1; j := j-1; elif (score = score_left + gap) then ref:-append( reference[i] ); seqq:-append( "-" ); i := i-1; elif (score = score_up + gap) then ref:-append( "-" ); seqq:-append( sequence[j] ); j := j-1; end if; end do; while (i > 0) do ref:-append( reference[i] ); seqq:-append( "-" ); i := i-1; end do; while (j > 0) do ref:-append( "-" ); seqq:-append( sequence[j] ); j := j-1; end do; map( StringTools:-Reverse, [seqq:-value(), ref:-value()]) end proc: end module: NeedlemanWunsch("AGCTAGCT", "AGCAGCT"); s1 := "ACAACCGCTATGTATTTCGTACATTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCACCTGTAGTACATAAAAACCCAACCCACATCAAAACCCCCTCCCCATGCTTACAAGCAAGTACAGCAATCAACCCTCAACTATCACACATCAACTGCAACTCCAAAGCCACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTTAACAGTACATAGTACATAAAGCCATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGACCCCCCTCAGATAGGGGTCCC": crs := "ACAACCGCTATGTATTTCGTACATTACTGCCAGCCACCATGAATATTGTACGGTACCATAAATACTTGACCACCTGTAGTACATAAAAACCCAATCCACATCAAAACCCCCTCCCCATGCTTACAAGCAAGTACAGCAATCAACCCTCAACTATCACACATCAACTGCAACTCCAAAGCCACCCCTCACCCACTAGGATACCAACAAACCTACCCACCCTTAACAGTACATAGTACATAAAGCCATTTACCGTACATAGCACATTACAGTCAAATCCCTTCTCGTCCCCATGGATGACCCCCCTCAGATAGGGGTCCC"; NeedlemanWunsch( s1, crs );