Advanced Bioinformatic and Computational Biology
Genetic Sequence Alignment
Human Genom Analysis
Hidden Markov Models
Biological Process Analysis
Jucas Cantor
Evolutionary Distance
Sequence Distance
Functional Enrichment Analysis
Algorithm Codes:
(Smith Waterman and Needleman-Wunch)
#Author : Kursat Cakal
#Date : 04/02/2022
import sys
def getBLOSUM62andPAMScoreTables(fileName):
with open(fileName) as scoreTableFile:
scoreTable = scoreTableFile.read()
rowContent = scoreTable.strip().split('\n')
tableProteins = rowContent.pop(0)
proteinS,scoreTable = tableProteins.split(), {}
for row in rowContent:
scoreValues = row.split()
proteinElement, scoreTable[proteinElement] = scoreValues.pop(0), {}
for protein in proteinS:
scoreTable[proteinElement][protein] = scoreValues.pop(0)
return scoreTable
def getComparisionScore(fileName, rowElement, colElement):
row, col = rowElement.upper(), colElement.upper()
return int(getBLOSUM62andPAMScoreTables(fileName)[row][col])
def getProteinSequences(fileName):
with open(fileName) as file:
sequences = file.readlines()
return sequences[0].rstrip(), sequences[1].rstrip()
def allocatePMatrix(leftSide, topSide, algorithm):
processMatrix = [[0] * topSide for row in range(leftSide)]
complementaryGapM = [[[0, set()]] * topSide for row in range(leftSide)]
complementaryGapM[1][0],complementaryGapM[0][1] = [0, set()],[0, set()]
if algorithm == 'SWA':
for i in range(1, leftSide):
processMatrix[i][0] = 0
complementaryGapM[i][0] = [1, {'left'}]
for j in range(1, topSide):
processMatrix[0][j] = 0
complementaryGapM[0][j] = [1, {'top'}]
elif algorithm == 'NWA':
for i in range(1, leftSide):
processMatrix[i][0] = processMatrix[i - 1][0] + getPenalization(complementaryGapM, i - 1, 0, 'left')
complementaryGapM[i][0] = [1, {'left'}]
for j in range(1, topSide):
processMatrix[0][j] = processMatrix[0][j - 1] + getPenalization(complementaryGapM, 0, j - 1, 'top')
complementaryGapM[0][j] = [1, {'top'}]
else:
print("Select SWA or NWA Algorithm")
return processMatrix, complementaryGapM
def getPenalization(complementaryGap, row, col, setGap):
if complementaryGap[row][col][0] == 1:
if setGap == 'left':
if 'left' in complementaryGap[row][col][1]:
return scoreValueGapExtension #penalize horizontal extention
elif setGap == 'top':
if 'top' in complementaryGap[row][col][1]:
return scoreValueGapExtension #penalize vertical extention
return scoreValueGapOpening
def getDecrement(incomingParam):
return incomingParam-1
def NWAlignment(setA, setB, setRow, setCol, prmProcessMatrix, prmComplementaryGap):
for r in range(1, setRow):
sys.stdout.write("#")
sys.stdout.flush()
for c in range(1, setCol):
diagA=prmProcessMatrix[r - 1][c - 1]
diagB=getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1])
diagSetA=setA[c - 1]
diagSetB=setB[r - 1]
scoreDiagonalArrow = prmProcessMatrix[r - 1][c - 1] + getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1])#BLOSUM62 or PAM Score Table is Applied for Different Penalization Combinations or Match Possibilities...
scoreLeftArrow = prmProcessMatrix[r - 1][c] + getPenalization(prmComplementaryGap, r - 1, c, 'left')
scoreTopArrow = prmProcessMatrix[r][c - 1] + getPenalization(prmComplementaryGap, r, c - 1, 'top')
directionSet = [scoreDiagonalArrow, scoreLeftArrow, scoreTopArrow]
directionIndex = directionSet.index(max(directionSet))
if directionSet[directionIndex] == scoreTopArrow:
prmComplementaryGap[r][c] = [1, {'top'}]
if directionSet[directionIndex] == scoreLeftArrow:
prmComplementaryGap[r][c] = [1, {'left'}]
prmProcessMatrix[r][c] = directionSet[directionIndex]
sys.stdout.write("\n")
print("# Results #")
print("Please do not use script in the Linux Shell or Mac Terminal. Use scrollable IDE Run Window to better view.")
sys.stdout.write("\n")
r, c = setRow - 1, setCol - 1
alignmentSetA, alignmentSetB, alignmentCentralLine = (' ') * 3
while r > 0 and c > 0:
scoreDiagonalArrow = prmProcessMatrix[r][c] - getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1])
scoreLeftArrow = prmProcessMatrix[r][c] - getPenalization(prmComplementaryGap, r - 1, c, 'left')
scoreTopArrow = prmProcessMatrix[r][c] - getPenalization(prmComplementaryGap, r, c - 1, 'top')
if prmProcessMatrix[r - 1][c - 1] == scoreDiagonalArrow:
alignmentSetA += setA[c - 1]
alignmentSetB += setB[r - 1]
if getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1]) == getComparisionScore(scoreTableFileName, setA[c - 1], setA[c - 1]) or getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1]) == getComparisionScore(scoreTableFileName, setB[r - 1], setB[r - 1]):
alignmentCentralLine += '|'
else:
alignmentCentralLine += ' '
r, c = getDecrement(r), getDecrement(c)
elif prmProcessMatrix[r - 1][c] == scoreLeftArrow:
alignmentSetA += '-'
alignmentSetB += setB[r - 1]
alignmentCentralLine += ' '
r = getDecrement(r)
elif prmProcessMatrix[r][c - 1] == scoreTopArrow:
alignmentSetA += setA[c - 1]
alignmentSetB += '-'
alignmentCentralLine += ' '
c = getDecrement(c)
while c > 0:
alignmentSetA += setA[c - 1]
alignmentSetB += '-'
alignmentCentralLine += ' '
c = getDecrement(c)
while r > 0:
alignmentSetA += '-'
alignmentSetB += setB[r - 1]
alignmentCentralLine += ' '
r = getDecrement(r)
print(alignmentSetA[::-1] + '\n' + alignmentCentralLine[::-1] + '\n' + alignmentSetB[::-1], "\n")
sequenceHit = alignmentCentralLine.count('|')
pID = (sequenceHit * 100) / (len(alignmentCentralLine) - 1)
return pID, prmProcessMatrix, prmProcessMatrix[setRow - 1][setCol - 1]
def SWAlignment(setA, setB, setRow, setCol, prmProcessMatrix, prmComplementaryGap):
optimalAlignmentScore = 0
scoreIndexes = (0, 0)
for r in range(1, setRow):
sys.stdout.write("#")
sys.stdout.flush()
for c in range(1, setCol):
scoreDiagonalArrow = prmProcessMatrix[r - 1][c - 1] + getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1])
scoreLeftArrow = prmProcessMatrix[r - 1][c] + getPenalization(prmComplementaryGap, r - 1, c, 'left')
scoreTopArrow = prmProcessMatrix[r][c - 1] + getPenalization(prmComplementaryGap, r, c - 1, 'top')
directionSet = [0, scoreDiagonalArrow, scoreLeftArrow, scoreTopArrow]
directionIndex = directionSet.index(max(directionSet))
if directionSet[directionIndex] == scoreTopArrow:
prmComplementaryGap[r][c] = [1, {'top'}]
if directionSet[directionIndex] == scoreLeftArrow:
prmComplementaryGap[r][c] = [1, {'left'}]
prmProcessMatrix[r][c] = directionSet[directionIndex]
if prmProcessMatrix[r][c] > optimalAlignmentScore:
optimalAlignmentScore = prmProcessMatrix[r][c]
scoreIndexes = (r, c)
sys.stdout.write("\n")
print("# Results #")
print("Please do not use script in the Linux Shell or Mac Terminal. Use scrollable IDE Run Window to better view.")
sys.stdout.write("\n")
r, c = scoreIndexes[0], scoreIndexes[1]
alignmentSetA, alignmentSetB, alignmentCentralLine = (' ') * 3
while r > 0 and c > 0:
scoreDiagonalArrow = prmProcessMatrix[r][c] - getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1])
scoreLeftArrow = prmProcessMatrix[r][c] - getPenalization(prmComplementaryGap, r - 1, c, 'left')
scoreTopArrow = prmProcessMatrix[r][c] - getPenalization(prmComplementaryGap, r, c - 1, 'top')
if prmProcessMatrix[r - 1][c - 1] == scoreDiagonalArrow:
alignmentSetA += setA[c - 1]
alignmentSetB += setB[r - 1]
if getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1]) == getComparisionScore(scoreTableFileName, setA[c - 1], setA[c - 1]) or getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1]) == getComparisionScore(scoreTableFileName, setB[r - 1], setB[r - 1]):
alignmentCentralLine += '|'
else:
alignmentCentralLine += ' '
r, c = getDecrement(r), getDecrement(c)
elif prmProcessMatrix[r - 1][c] == scoreLeftArrow:
alignmentSetA += '-'
alignmentSetB += setB[r - 1]
alignmentCentralLine += ' '
r = getDecrement(r)
elif prmProcessMatrix[r][c - 1] == scoreTopArrow:
alignmentSetA += setA[c - 1]
alignmentSetB += '-'
alignmentCentralLine += ' '
c = getDecrement(c)
elif prmProcessMatrix[r][c] == 0:
break
totalTerminalGap = 0
while c > 0:
alignmentSetA += setA[c - 1]
alignmentSetB += '-'
totalTerminalGap += 1
alignmentCentralLine += ' '
c = getDecrement(c)
while r > 0:
alignmentSetA += '-'
alignmentSetB += setB[r - 1]
totalTerminalGap += 1
alignmentCentralLine += ' '
r = getDecrement(r)
print(alignmentSetA[::-1] + '\n' + alignmentCentralLine[::-1] + '\n' + alignmentSetB[::-1], "\n")
sequenceHit = alignmentCentralLine.count('|')
pID = (sequenceHit * 100) / ((len(alignmentCentralLine) - 1) - totalTerminalGap)
return pID, prmProcessMatrix, optimalAlignmentScore
def startApp(sequenceFile,scoreTableFileName):
proteinSequenceA, proteinSequenceB = getProteinSequences(sequenceFile)
leftSide, topSide = len(proteinSequenceB) + 1, len(proteinSequenceA) + 1
BLOSUM62 = getBLOSUM62andPAMScoreTables(scoreTableFileName)
try:
if selectedAlgorithm == 'SWA':
print("Please Wait. Smith-Waterman Algorithm is Processing...")
processMatrix, complementaryGap = allocatePMatrix(leftSide, topSide, selectedAlgorithm)
pID, processMatrix, alignmentScore = SWAlignment(proteinSequenceA, proteinSequenceB, leftSide, topSide, processMatrix, complementaryGap)
print("Alignment Score:", alignmentScore)
print("Percentage of Identity:", pID)
elif selectedAlgorithm == 'NWA':
print("Please Wait. Needleman-Wunch Algorithm Processing...")
processMatrix, complementaryGap = allocatePMatrix(leftSide, topSide, selectedAlgorithm)
pID, processMatrix, alignmentScore = NWAlignment(proteinSequenceA, proteinSequenceB, leftSide, topSide, processMatrix, complementaryGap)
print("Alignment Score:", alignmentScore)
print("Percentage of Identity:", pID)
except:
print("Please Select Algorithm Smith-Waterman or Needleman-Wunch")
def printArguments():
print(sequenceFile+"\t\t"+scoreTableFileName+"\t\t\t"+selectedAlgorithm+"\t\t\t\t"+str(scoreValueGapOpening)+"\t\t\t\t"+str(scoreValueGapExtension))
if __name__ == '__main__':
try:
if len(sys.argv) == 1:
print("#########################################################################################################################################")
print("You can set parameters below order from terminal python args. It is initialied as default. If you wish please put arguments sequentially.")
print("#########################################################################################################################################")
print("<SequenceFile.txt> <ScoreTableFile.txt> <algorithm> <GapOpeningScore> <GapExtensionScore>")
sequenceFile = "proteinseqMain.txt" # OR "proteinseqE1.txt" OR etc...
scoreTableFileName = "BLOSUM62.txt" # OR "PAM250.txt"
selectedAlgorithm = "SWA" # OR "SWA"
scoreValueGapOpening = -10 # OR -1 OR -2 OR etc...
scoreValueGapExtension = -5 # OR -1 OR -2 OR etc...
printArguments()
startApp(sequenceFile, scoreTableFileName)
elif len(sys.argv) == 6:
print("Successful Argument Inputs:")
print("<SequenceFile.txt>\t\t<ScoreTableFile.txt>\t\t<algorithm>\t\t<GapOpeningScore>\t\t<GapExtensionScore>")
sequenceFile = sys.argv[1]
scoreTableFileName = sys.argv[2]
selectedAlgorithm = sys.argv[3]
scoreValueGapOpening = int(sys.argv[4])
scoreValueGapExtension = int(sys.argv[5])
printArguments()
startApp(sequenceFile, scoreTableFileName)
else:
sys.exit("Arguments error.")
except:
print("\nException:Please set required terminal arguments to be able to run code..")
print("\nException:Probably you put unavailable file name or wrong parameter format..")