Advanced Bioinformatic and Computational Biology
Genetic Sequence Alignment
Human Genom Analysis
Hidden Markov Models
Biological Process Analysis
Jucas Cantor
Evolutionary Distance
Sequence Distance
Functional Enrichment Analysis
Algorithm Codes:
(Smith Waterman and Needleman-Wunch)
#Author : Kursat Cakal #Date : 04/02/2022 import sys def getBLOSUM62andPAMScoreTables(fileName): with open(fileName) as scoreTableFile: scoreTable = scoreTableFile.read() rowContent = scoreTable.strip().split('\n') tableProteins = rowContent.pop(0) proteinS,scoreTable = tableProteins.split(), {} for row in rowContent: scoreValues = row.split() proteinElement, scoreTable[proteinElement] = scoreValues.pop(0), {} for protein in proteinS: scoreTable[proteinElement][protein] = scoreValues.pop(0) return scoreTable def getComparisionScore(fileName, rowElement, colElement): row, col = rowElement.upper(), colElement.upper() return int(getBLOSUM62andPAMScoreTables(fileName)[row][col]) def getProteinSequences(fileName): with open(fileName) as file: sequences = file.readlines() return sequences[0].rstrip(), sequences[1].rstrip() def allocatePMatrix(leftSide, topSide, algorithm): processMatrix = [[0] * topSide for row in range(leftSide)] complementaryGapM = [[[0, set()]] * topSide for row in range(leftSide)] complementaryGapM[1][0],complementaryGapM[0][1] = [0, set()],[0, set()] if algorithm == 'SWA': for i in range(1, leftSide): processMatrix[i][0] = 0 complementaryGapM[i][0] = [1, {'left'}] for j in range(1, topSide): processMatrix[0][j] = 0 complementaryGapM[0][j] = [1, {'top'}] elif algorithm == 'NWA': for i in range(1, leftSide): processMatrix[i][0] = processMatrix[i - 1][0] + getPenalization(complementaryGapM, i - 1, 0, 'left') complementaryGapM[i][0] = [1, {'left'}] for j in range(1, topSide): processMatrix[0][j] = processMatrix[0][j - 1] + getPenalization(complementaryGapM, 0, j - 1, 'top') complementaryGapM[0][j] = [1, {'top'}] else: print("Select SWA or NWA Algorithm") return processMatrix, complementaryGapM def getPenalization(complementaryGap, row, col, setGap): if complementaryGap[row][col][0] == 1: if setGap == 'left': if 'left' in complementaryGap[row][col][1]: return scoreValueGapExtension #penalize horizontal extention elif setGap == 'top': if 'top' in complementaryGap[row][col][1]: return scoreValueGapExtension #penalize vertical extention return scoreValueGapOpening def getDecrement(incomingParam): return incomingParam-1 def NWAlignment(setA, setB, setRow, setCol, prmProcessMatrix, prmComplementaryGap): for r in range(1, setRow): sys.stdout.write("#") sys.stdout.flush() for c in range(1, setCol): diagA=prmProcessMatrix[r - 1][c - 1] diagB=getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1]) diagSetA=setA[c - 1] diagSetB=setB[r - 1] scoreDiagonalArrow = prmProcessMatrix[r - 1][c - 1] + getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1])#BLOSUM62 or PAM Score Table is Applied for Different Penalization Combinations or Match Possibilities... scoreLeftArrow = prmProcessMatrix[r - 1][c] + getPenalization(prmComplementaryGap, r - 1, c, 'left') scoreTopArrow = prmProcessMatrix[r][c - 1] + getPenalization(prmComplementaryGap, r, c - 1, 'top') directionSet = [scoreDiagonalArrow, scoreLeftArrow, scoreTopArrow] directionIndex = directionSet.index(max(directionSet)) if directionSet[directionIndex] == scoreTopArrow: prmComplementaryGap[r][c] = [1, {'top'}] if directionSet[directionIndex] == scoreLeftArrow: prmComplementaryGap[r][c] = [1, {'left'}] prmProcessMatrix[r][c] = directionSet[directionIndex] sys.stdout.write("\n") print("# Results #") print("Please do not use script in the Linux Shell or Mac Terminal. Use scrollable IDE Run Window to better view.") sys.stdout.write("\n") r, c = setRow - 1, setCol - 1 alignmentSetA, alignmentSetB, alignmentCentralLine = (' ') * 3 while r > 0 and c > 0: scoreDiagonalArrow = prmProcessMatrix[r][c] - getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1]) scoreLeftArrow = prmProcessMatrix[r][c] - getPenalization(prmComplementaryGap, r - 1, c, 'left') scoreTopArrow = prmProcessMatrix[r][c] - getPenalization(prmComplementaryGap, r, c - 1, 'top') if prmProcessMatrix[r - 1][c - 1] == scoreDiagonalArrow: alignmentSetA += setA[c - 1] alignmentSetB += setB[r - 1] if getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1]) == getComparisionScore(scoreTableFileName, setA[c - 1], setA[c - 1]) or getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1]) == getComparisionScore(scoreTableFileName, setB[r - 1], setB[r - 1]): alignmentCentralLine += '|' else: alignmentCentralLine += ' ' r, c = getDecrement(r), getDecrement(c) elif prmProcessMatrix[r - 1][c] == scoreLeftArrow: alignmentSetA += '-' alignmentSetB += setB[r - 1] alignmentCentralLine += ' ' r = getDecrement(r) elif prmProcessMatrix[r][c - 1] == scoreTopArrow: alignmentSetA += setA[c - 1] alignmentSetB += '-' alignmentCentralLine += ' ' c = getDecrement(c) while c > 0: alignmentSetA += setA[c - 1] alignmentSetB += '-' alignmentCentralLine += ' ' c = getDecrement(c) while r > 0: alignmentSetA += '-' alignmentSetB += setB[r - 1] alignmentCentralLine += ' ' r = getDecrement(r) print(alignmentSetA[::-1] + '\n' + alignmentCentralLine[::-1] + '\n' + alignmentSetB[::-1], "\n") sequenceHit = alignmentCentralLine.count('|') pID = (sequenceHit * 100) / (len(alignmentCentralLine) - 1) return pID, prmProcessMatrix, prmProcessMatrix[setRow - 1][setCol - 1] def SWAlignment(setA, setB, setRow, setCol, prmProcessMatrix, prmComplementaryGap): optimalAlignmentScore = 0 scoreIndexes = (0, 0) for r in range(1, setRow): sys.stdout.write("#") sys.stdout.flush() for c in range(1, setCol): scoreDiagonalArrow = prmProcessMatrix[r - 1][c - 1] + getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1]) scoreLeftArrow = prmProcessMatrix[r - 1][c] + getPenalization(prmComplementaryGap, r - 1, c, 'left') scoreTopArrow = prmProcessMatrix[r][c - 1] + getPenalization(prmComplementaryGap, r, c - 1, 'top') directionSet = [0, scoreDiagonalArrow, scoreLeftArrow, scoreTopArrow] directionIndex = directionSet.index(max(directionSet)) if directionSet[directionIndex] == scoreTopArrow: prmComplementaryGap[r][c] = [1, {'top'}] if directionSet[directionIndex] == scoreLeftArrow: prmComplementaryGap[r][c] = [1, {'left'}] prmProcessMatrix[r][c] = directionSet[directionIndex] if prmProcessMatrix[r][c] > optimalAlignmentScore: optimalAlignmentScore = prmProcessMatrix[r][c] scoreIndexes = (r, c) sys.stdout.write("\n") print("# Results #") print("Please do not use script in the Linux Shell or Mac Terminal. Use scrollable IDE Run Window to better view.") sys.stdout.write("\n") r, c = scoreIndexes[0], scoreIndexes[1] alignmentSetA, alignmentSetB, alignmentCentralLine = (' ') * 3 while r > 0 and c > 0: scoreDiagonalArrow = prmProcessMatrix[r][c] - getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1]) scoreLeftArrow = prmProcessMatrix[r][c] - getPenalization(prmComplementaryGap, r - 1, c, 'left') scoreTopArrow = prmProcessMatrix[r][c] - getPenalization(prmComplementaryGap, r, c - 1, 'top') if prmProcessMatrix[r - 1][c - 1] == scoreDiagonalArrow: alignmentSetA += setA[c - 1] alignmentSetB += setB[r - 1] if getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1]) == getComparisionScore(scoreTableFileName, setA[c - 1], setA[c - 1]) or getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1]) == getComparisionScore(scoreTableFileName, setB[r - 1], setB[r - 1]): alignmentCentralLine += '|' else: alignmentCentralLine += ' ' r, c = getDecrement(r), getDecrement(c) elif prmProcessMatrix[r - 1][c] == scoreLeftArrow: alignmentSetA += '-' alignmentSetB += setB[r - 1] alignmentCentralLine += ' ' r = getDecrement(r) elif prmProcessMatrix[r][c - 1] == scoreTopArrow: alignmentSetA += setA[c - 1] alignmentSetB += '-' alignmentCentralLine += ' ' c = getDecrement(c) elif prmProcessMatrix[r][c] == 0: break totalTerminalGap = 0 while c > 0: alignmentSetA += setA[c - 1] alignmentSetB += '-' totalTerminalGap += 1 alignmentCentralLine += ' ' c = getDecrement(c) while r > 0: alignmentSetA += '-' alignmentSetB += setB[r - 1] totalTerminalGap += 1 alignmentCentralLine += ' ' r = getDecrement(r) print(alignmentSetA[::-1] + '\n' + alignmentCentralLine[::-1] + '\n' + alignmentSetB[::-1], "\n") sequenceHit = alignmentCentralLine.count('|') pID = (sequenceHit * 100) / ((len(alignmentCentralLine) - 1) - totalTerminalGap) return pID, prmProcessMatrix, optimalAlignmentScore def startApp(sequenceFile,scoreTableFileName): proteinSequenceA, proteinSequenceB = getProteinSequences(sequenceFile) leftSide, topSide = len(proteinSequenceB) + 1, len(proteinSequenceA) + 1 BLOSUM62 = getBLOSUM62andPAMScoreTables(scoreTableFileName) try: if selectedAlgorithm == 'SWA': print("Please Wait. Smith-Waterman Algorithm is Processing...") processMatrix, complementaryGap = allocatePMatrix(leftSide, topSide, selectedAlgorithm) pID, processMatrix, alignmentScore = SWAlignment(proteinSequenceA, proteinSequenceB, leftSide, topSide, processMatrix, complementaryGap) print("Alignment Score:", alignmentScore) print("Percentage of Identity:", pID) elif selectedAlgorithm == 'NWA': print("Please Wait. Needleman-Wunch Algorithm Processing...") processMatrix, complementaryGap = allocatePMatrix(leftSide, topSide, selectedAlgorithm) pID, processMatrix, alignmentScore = NWAlignment(proteinSequenceA, proteinSequenceB, leftSide, topSide, processMatrix, complementaryGap) print("Alignment Score:", alignmentScore) print("Percentage of Identity:", pID) except: print("Please Select Algorithm Smith-Waterman or Needleman-Wunch") def printArguments(): print(sequenceFile+"\t\t"+scoreTableFileName+"\t\t\t"+selectedAlgorithm+"\t\t\t\t"+str(scoreValueGapOpening)+"\t\t\t\t"+str(scoreValueGapExtension)) if __name__ == '__main__': try: if len(sys.argv) == 1: print("#########################################################################################################################################") print("You can set parameters below order from terminal python args. It is initialied as default. If you wish please put arguments sequentially.") print("#########################################################################################################################################") print("<SequenceFile.txt> <ScoreTableFile.txt> <algorithm> <GapOpeningScore> <GapExtensionScore>") sequenceFile = "proteinseqMain.txt" # OR "proteinseqE1.txt" OR etc... scoreTableFileName = "BLOSUM62.txt" # OR "PAM250.txt" selectedAlgorithm = "SWA" # OR "SWA" scoreValueGapOpening = -10 # OR -1 OR -2 OR etc... scoreValueGapExtension = -5 # OR -1 OR -2 OR etc... printArguments() startApp(sequenceFile, scoreTableFileName) elif len(sys.argv) == 6: print("Successful Argument Inputs:") print("<SequenceFile.txt>\t\t<ScoreTableFile.txt>\t\t<algorithm>\t\t<GapOpeningScore>\t\t<GapExtensionScore>") sequenceFile = sys.argv[1] scoreTableFileName = sys.argv[2] selectedAlgorithm = sys.argv[3] scoreValueGapOpening = int(sys.argv[4]) scoreValueGapExtension = int(sys.argv[5]) printArguments() startApp(sequenceFile, scoreTableFileName) else: sys.exit("Arguments error.") except: print("\nException:Please set required terminal arguments to be able to run code..") print("\nException:Probably you put unavailable file name or wrong parameter format..")