Advanced Bioinformatic and Computational Biology


Genetic Sequence Alignment




Human Genom Analysis
Hidden Markov Models
Biological Process Analysis
Jucas Cantor
Evolutionary Distance
Sequence Distance
Functional Enrichment Analysis



Algorithm Codes:


(Smith Waterman and Needleman-Wunch)

    #Author : Kursat Cakal
    #Date : 04/02/2022

    import sys
    
    def getBLOSUM62andPAMScoreTables(fileName):
        with open(fileName) as scoreTableFile:
          scoreTable = scoreTableFile.read()
        rowContent = scoreTable.strip().split('\n')
        tableProteins = rowContent.pop(0)
        proteinS,scoreTable = tableProteins.split(), {}
        for row in rowContent:
          scoreValues = row.split()
          proteinElement, scoreTable[proteinElement] = scoreValues.pop(0), {}
          for protein in proteinS:
            scoreTable[proteinElement][protein] = scoreValues.pop(0)
        return scoreTable
    def getComparisionScore(fileName, rowElement, colElement):
        row, col = rowElement.upper(), colElement.upper()
        return int(getBLOSUM62andPAMScoreTables(fileName)[row][col])
    def getProteinSequences(fileName):
        with open(fileName) as file:
            sequences = file.readlines()
        return sequences[0].rstrip(), sequences[1].rstrip()
    def allocatePMatrix(leftSide, topSide, algorithm):
        processMatrix = [[0] * topSide for row in range(leftSide)]
        complementaryGapM = [[[0, set()]] * topSide for row in range(leftSide)]
        complementaryGapM[1][0],complementaryGapM[0][1] = [0, set()],[0, set()]
        if algorithm == 'SWA':
            for i in range(1, leftSide):
                processMatrix[i][0] = 0
                complementaryGapM[i][0] = [1, {'left'}]
            for j in range(1, topSide):
                processMatrix[0][j] = 0
                complementaryGapM[0][j] = [1, {'top'}]
        elif algorithm == 'NWA':
            for i in range(1, leftSide):
                processMatrix[i][0] = processMatrix[i - 1][0] + getPenalization(complementaryGapM, i - 1, 0, 'left')
                complementaryGapM[i][0] = [1, {'left'}]
            for j in range(1, topSide):
                processMatrix[0][j] = processMatrix[0][j - 1] + getPenalization(complementaryGapM, 0, j - 1, 'top')
                complementaryGapM[0][j] = [1, {'top'}]
        else:
            print("Select SWA or NWA Algorithm")
        return processMatrix, complementaryGapM
    def getPenalization(complementaryGap, row, col, setGap):
        if complementaryGap[row][col][0] == 1:
            if setGap == 'left':
                if 'left' in complementaryGap[row][col][1]:
                    return scoreValueGapExtension #penalize horizontal extention
            elif setGap == 'top':
                if 'top' in complementaryGap[row][col][1]:
                    return scoreValueGapExtension #penalize vertical extention
    
        return scoreValueGapOpening
    def getDecrement(incomingParam):
        return incomingParam-1
    def NWAlignment(setA, setB, setRow, setCol, prmProcessMatrix, prmComplementaryGap):
        for r in range(1, setRow):
            sys.stdout.write("#")
            sys.stdout.flush()
            for c in range(1, setCol):
                diagA=prmProcessMatrix[r - 1][c - 1]
                diagB=getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1])
                diagSetA=setA[c - 1]
                diagSetB=setB[r - 1]
                scoreDiagonalArrow = prmProcessMatrix[r - 1][c - 1] + getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1])#BLOSUM62 or PAM Score Table is Applied for Different Penalization Combinations or Match Possibilities...
                scoreLeftArrow = prmProcessMatrix[r - 1][c] + getPenalization(prmComplementaryGap, r - 1, c, 'left')
                scoreTopArrow = prmProcessMatrix[r][c - 1] + getPenalization(prmComplementaryGap, r, c - 1, 'top')
                directionSet = [scoreDiagonalArrow, scoreLeftArrow, scoreTopArrow]
                directionIndex = directionSet.index(max(directionSet))
                if directionSet[directionIndex] == scoreTopArrow:
                    prmComplementaryGap[r][c] = [1, {'top'}]
                if directionSet[directionIndex] == scoreLeftArrow:
                    prmComplementaryGap[r][c] = [1, {'left'}]
                prmProcessMatrix[r][c] = directionSet[directionIndex]
        sys.stdout.write("\n")
        print("#      Results      #")
        print("Please do not use script in the Linux Shell or Mac Terminal. Use scrollable IDE Run Window to better view.")
        sys.stdout.write("\n")
        r, c = setRow - 1, setCol - 1
        alignmentSetA, alignmentSetB, alignmentCentralLine = (' ') * 3
        while r > 0 and c > 0:
            scoreDiagonalArrow = prmProcessMatrix[r][c] - getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1])
            scoreLeftArrow = prmProcessMatrix[r][c] - getPenalization(prmComplementaryGap, r - 1, c, 'left')
            scoreTopArrow = prmProcessMatrix[r][c] - getPenalization(prmComplementaryGap, r, c - 1, 'top')
            if prmProcessMatrix[r - 1][c - 1] == scoreDiagonalArrow:
                alignmentSetA += setA[c - 1]
                alignmentSetB += setB[r - 1]
                if getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1]) == getComparisionScore(scoreTableFileName, setA[c - 1], setA[c - 1]) or getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1]) == getComparisionScore(scoreTableFileName, setB[r - 1], setB[r - 1]):
                    alignmentCentralLine += '|'
                else:
                    alignmentCentralLine += ' '
                r, c = getDecrement(r), getDecrement(c)
            elif prmProcessMatrix[r - 1][c] == scoreLeftArrow:
                alignmentSetA += '-'
                alignmentSetB += setB[r - 1]
                alignmentCentralLine += ' '
                r = getDecrement(r)
            elif prmProcessMatrix[r][c - 1] == scoreTopArrow:
                alignmentSetA += setA[c - 1]
                alignmentSetB += '-'
                alignmentCentralLine += ' '
                c = getDecrement(c)
        while c > 0:
            alignmentSetA += setA[c - 1]
            alignmentSetB += '-'
            alignmentCentralLine += ' '
            c = getDecrement(c)
        while r > 0:
            alignmentSetA += '-'
            alignmentSetB += setB[r - 1]
            alignmentCentralLine += ' '
            r = getDecrement(r)
        print(alignmentSetA[::-1] + '\n' + alignmentCentralLine[::-1] + '\n' + alignmentSetB[::-1], "\n")
        sequenceHit = alignmentCentralLine.count('|')
        pID = (sequenceHit * 100) / (len(alignmentCentralLine) - 1)
        return pID, prmProcessMatrix, prmProcessMatrix[setRow - 1][setCol - 1]
    def SWAlignment(setA, setB, setRow, setCol, prmProcessMatrix, prmComplementaryGap):
        optimalAlignmentScore = 0
        scoreIndexes = (0, 0)
        for r in range(1, setRow):
            sys.stdout.write("#")
            sys.stdout.flush()
            for c in range(1, setCol):
                scoreDiagonalArrow = prmProcessMatrix[r - 1][c - 1] + getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1])
                scoreLeftArrow = prmProcessMatrix[r - 1][c] + getPenalization(prmComplementaryGap, r - 1, c, 'left')
                scoreTopArrow = prmProcessMatrix[r][c - 1] + getPenalization(prmComplementaryGap, r, c - 1, 'top')
                directionSet = [0, scoreDiagonalArrow, scoreLeftArrow, scoreTopArrow]
                directionIndex = directionSet.index(max(directionSet))
                if directionSet[directionIndex] == scoreTopArrow:
                    prmComplementaryGap[r][c] = [1, {'top'}]
                if directionSet[directionIndex] == scoreLeftArrow:
                    prmComplementaryGap[r][c] = [1, {'left'}]
                prmProcessMatrix[r][c] = directionSet[directionIndex]
                if prmProcessMatrix[r][c] > optimalAlignmentScore:
                    optimalAlignmentScore = prmProcessMatrix[r][c]
                    scoreIndexes = (r, c)
        sys.stdout.write("\n")
        print("#      Results      #")
        print("Please do not use script in the Linux Shell or Mac Terminal. Use scrollable IDE Run Window to better view.")
        sys.stdout.write("\n")
        r, c = scoreIndexes[0], scoreIndexes[1]
        alignmentSetA, alignmentSetB, alignmentCentralLine = (' ') * 3
        while r > 0 and c > 0:
            scoreDiagonalArrow = prmProcessMatrix[r][c] - getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1])
            scoreLeftArrow = prmProcessMatrix[r][c] - getPenalization(prmComplementaryGap, r - 1, c, 'left')
            scoreTopArrow = prmProcessMatrix[r][c] - getPenalization(prmComplementaryGap, r, c - 1, 'top')
            if prmProcessMatrix[r - 1][c - 1] == scoreDiagonalArrow:
                alignmentSetA += setA[c - 1]
                alignmentSetB += setB[r - 1]
                if getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1]) == getComparisionScore(scoreTableFileName, setA[c - 1], setA[c - 1]) or getComparisionScore(scoreTableFileName, setA[c - 1], setB[r - 1]) == getComparisionScore(scoreTableFileName, setB[r - 1], setB[r - 1]):
                    alignmentCentralLine += '|'
                else:
                    alignmentCentralLine += ' '
                r, c = getDecrement(r), getDecrement(c)
            elif prmProcessMatrix[r - 1][c] == scoreLeftArrow:
                alignmentSetA += '-'
                alignmentSetB += setB[r - 1]
                alignmentCentralLine += ' '
                r = getDecrement(r)
            elif prmProcessMatrix[r][c - 1] == scoreTopArrow:
                alignmentSetA += setA[c - 1]
                alignmentSetB += '-'
                alignmentCentralLine += ' '
                c = getDecrement(c)
            elif prmProcessMatrix[r][c] == 0:
                break
        totalTerminalGap = 0
        while c > 0:
            alignmentSetA += setA[c - 1]
            alignmentSetB += '-'
            totalTerminalGap += 1
            alignmentCentralLine += ' '
            c = getDecrement(c)
        while r > 0:
            alignmentSetA += '-'
            alignmentSetB += setB[r - 1]
            totalTerminalGap += 1
            alignmentCentralLine += ' '
            r = getDecrement(r)
        print(alignmentSetA[::-1] + '\n' + alignmentCentralLine[::-1] + '\n' + alignmentSetB[::-1], "\n")
        sequenceHit = alignmentCentralLine.count('|')
        pID = (sequenceHit * 100) / ((len(alignmentCentralLine) - 1) - totalTerminalGap)
        return pID, prmProcessMatrix, optimalAlignmentScore
    def startApp(sequenceFile,scoreTableFileName):
        proteinSequenceA, proteinSequenceB = getProteinSequences(sequenceFile)
        leftSide, topSide = len(proteinSequenceB) + 1, len(proteinSequenceA) + 1
        BLOSUM62 = getBLOSUM62andPAMScoreTables(scoreTableFileName)
        try:
            if selectedAlgorithm == 'SWA':
                print("Please Wait. Smith-Waterman Algorithm is Processing...")
                processMatrix, complementaryGap = allocatePMatrix(leftSide, topSide, selectedAlgorithm)
                pID, processMatrix, alignmentScore = SWAlignment(proteinSequenceA, proteinSequenceB, leftSide, topSide, processMatrix, complementaryGap)
                print("Alignment Score:", alignmentScore)
                print("Percentage of Identity:", pID)
            elif selectedAlgorithm == 'NWA':
                print("Please Wait. Needleman-Wunch Algorithm Processing...")
                processMatrix, complementaryGap = allocatePMatrix(leftSide, topSide, selectedAlgorithm)
                pID, processMatrix, alignmentScore = NWAlignment(proteinSequenceA, proteinSequenceB, leftSide, topSide, processMatrix, complementaryGap)
                print("Alignment Score:", alignmentScore)
                print("Percentage of Identity:", pID)
        except:
            print("Please Select Algorithm Smith-Waterman or Needleman-Wunch")
    def printArguments():
        print(sequenceFile+"\t\t"+scoreTableFileName+"\t\t\t"+selectedAlgorithm+"\t\t\t\t"+str(scoreValueGapOpening)+"\t\t\t\t"+str(scoreValueGapExtension))
    if __name__ == '__main__':
        try:
            if len(sys.argv) == 1:
                print("#########################################################################################################################################")
                print("You can set parameters below order from terminal python args. It is initialied as default. If you wish please put arguments sequentially.")
                print("#########################################################################################################################################")
                print("<SequenceFile.txt> <ScoreTableFile.txt> <algorithm> <GapOpeningScore> <GapExtensionScore>")
                sequenceFile = "proteinseqMain.txt" # OR "proteinseqE1.txt" OR etc...
                scoreTableFileName = "BLOSUM62.txt" # OR "PAM250.txt"
                selectedAlgorithm = "SWA"           # OR "SWA"
                scoreValueGapOpening = -10          # OR -1 OR -2 OR etc...
                scoreValueGapExtension = -5         # OR -1 OR -2 OR etc...
                printArguments()
                startApp(sequenceFile, scoreTableFileName)
            elif len(sys.argv) == 6:
                print("Successful Argument Inputs:")
                print("<SequenceFile.txt>\t\t<ScoreTableFile.txt>\t\t<algorithm>\t\t<GapOpeningScore>\t\t<GapExtensionScore>")
                sequenceFile = sys.argv[1]
                scoreTableFileName = sys.argv[2]
                selectedAlgorithm = sys.argv[3]
                scoreValueGapOpening = int(sys.argv[4])
                scoreValueGapExtension = int(sys.argv[5])
                printArguments()
                startApp(sequenceFile, scoreTableFileName)
            else:
                sys.exit("Arguments error.")
        except:
            print("\nException:Please set required terminal arguments to be able to run code..")
            print("\nException:Probably you put unavailable file name or wrong parameter format..")