!********************************************************** ! BLASTN.TRU v.1.0 6/23/03 ! nucleotide blasting in TrueBasic ! Modified 6/26/03 ! ! Input: query and target sequence files, in FastA format ! Output: alignments displayed on screen ! Service routines: ! ! PrintSegment (title$, sequence$, start, extent) ! Prints a segment within the variable sequence$ ! Starts at the specified position and prints the ! number of elements specified by extent ! Segment preceded by title$ ! ! Print2DArray (array, width) ! Prints numbers within a 2-dimensional numeric array in a tabular format ! The width of each column must be specified ! The width includes blanks between columns !********************************************************** !********************************************************** ! !function declarations ! !********************************************************** DECLARE DEF doGapRight DECLARE DEF checkDiagonal DECLARE DEF checkScore DECLARE DEF doGapDown DECLARE DEF doGapRight DECLARE DEF reverse$ !********************************************************** ! !file declarations ! !********************************************************** LET queryFile$ = "C:\1\cftsobc\bbsi\webpage\webfinal\inst\courses\RS-P2\P2-primer-1R.txt" LET queryFile$ = "D:\BBSI-Scenarios\PhageP2\P2-primer-1R.txt" LET targetFile$ = "C:\1\genome\ecoli\ecolik12.nt" !********************************************************** ! ! parameters used to control behavior of blast ! !********************************************************** LET wordLength = 11 LET matchReward = 1 LET mismatchPenalty = 2 LET gapBeginPenalty = 5 LET gapExtendPenalty = 2 LET maxScore = wordLength * matchReward !********************************************************** ! ! constants used for formatting output ! !********************************************************** LET gap$ = "-" LET bar$ = "|" !********************************************************** ! !global variables ! !********************************************************** LET target$ = "" LET query$ = "" LET t = 1 LET q = 1 LET direction = 1 LET targetFlank$ = "" LET queryFlank$ = "" LET TRUE = 1 LET FALSE = 0 LET maxI = 0 LET maxJ = 0 LET targetMax = 0 let targetMin = 0 let queryMax = 0 let queryMin = 0 let forwardTarget$ = "" let forwardQuery$ = "" let backwardTarget$ = "" let backwardQuery$ = "" DIM score(1,1) DIM lastI(1,1) DIM lastJ(1,1) DIM seen$(1) !********************************************************** ! ! main program ! !********************************************************** Call getFileInfo(queryFile$, queryName$, query$) CALL getFileInfo(targetFile$, targetName$, target$) !LET query$ = "GAATGCCATTGAAGATACCTACAGCCCAGTT" !LET target$ = "GAATGCCATTGTTAGATAAGCCTACAGCCCACCC" print "Query: " & queryName$ print "Target: " & targetName$ LET query$ = UCASE$(query$) LET target$ = UCASE$(target$) LET position = 1 FOR queryWordStart = 1 TO len(query$) - wordLength LET queryWord$ = query$[queryWordStart:queryWordStart+wordLength-1] LET position = pos(target$, queryWord$, position) IF position > 0 Then LET targetWordStart = position CALL oneMatch(targetWordStart, queryWordStart) LET position = pos(target$, queryWord$, position + 1) END IF NEXT queryWordStart !********************************************************** ! ! performs file I/O operations ! !********************************************************** SUB getFileInfo (path$, name$, seq$) LET seq$ = "" OPEN #1: name path$ LINE INPUT #1: name$ DO LINE INPUT #1: temp$ let seq$ = seq$ & temp$ LOOP UNTIL END#1 CLOSE #1 END SUB !******************************************************************************************************************** ! ! After the initial word match, this subroutine attempts to extend it in both directions (forward and backward) ! by calling on halfMatch. It then prints the result on screen using printHit ! !******************************************************************************************************************** SUB oneMatch(tStart, qStart) LOCAL targetWordStart, targetWordEnd, queryWordStart, queryWordEnd LET targetWordStart = tStart LET targetWordEnd = targetWordStart + wordLength - 1 LET queryWordStart = qStart LET queryWordEnd = queryWordStart + wordLength - 1 LET direction = 1 ! Extend match in the positive (rightward) direction CALL halfMatch(targetWordEnd, queryWordEND) LET targetMax = targetWordEnd + maxI - 1 LET queryMax = queryWordEnd + maxJ - 1 LET forwardTarget$ = targetFlank$ LET forwardQuery$ = queryFlank$ LET direction = -1 ! Extend match in the negative (leftward) direction Call halfMatch (targetWordStart, queryWordStart) LET targetMin = targetWordStart - maxI + 1 LET queryMin = queryWordStart - maxJ + 1 LET backwardTarget$ = targetFlank$ LET backwardQuery$ = queryFlank$ LET targetWord$ = target$[targetWordStart:targetWordStart + wordLength-1] LET backwardTarget$ = reverse$(backwardTarget$) LET targetHit$ = backwardTarget$ & targetWord$ & forwardTarget$ LET queryWord$ = query$[queryWordStart:queryWordStart + wordLength-1] LET backwardQuery$ = reverse$(backwardQuery$) LET queryHit$ = backwardQuery$ & queryWord$ & forwardQuery$ IF LEN(targetHit$) > 0 AND LEN(queryHit$) > 0 THEN CALL printHit(targetMin, targetHit$, queryMin, queryHit$) END IF END SUB !******************************************************************************************************************** ! ! This subroutine extends the word match in a direction (whatever the direction is, the logic is the same. ! direction is set in the global variable direction, +1 means left to right (extending the end of the hit), -1 means ! right to left (extending from beginning of the hit. This routine uses fillRight, fillDown to build up the scoring ! matrix (score()) and extensions to figure out the sequence from the score matrix ! ! Sets the following variables: ! ! targetFlank$ matching sequence to left or right of word match in target ! queryFlank$ matching sequence to left or right of word match in query ! maxI Position in target most distant from word match to be retained in full match ! maxJ Position in query most distant from word match to be retained in full match ! !******************************************************************************************************************** SUB halfMatch(target,query) LET t = target LET q = query MAT REDIM lastI(1,1) LET lastI(1,1) = -1 MAT REDIM LastJ(1,1) LET lastJ(1,1) = -1 MAT REDIM score(1,1) LET maxScore = matchReward * wordLength LET score(1,1) = maxScore LET maxI = 1 LET maxJ = 1 CALL fillRight CALL fillDown CALL extensions END SUB !******************************************************************************************************************** ! ! This subroutine fills the scoring matrix vertically ! !******************************************************************************************************************** SUB fillDown LET previousChanged = doGapDown(1,1) LET hIndex = 2 DO WHILE hIndex < previousChanged + 1 LET thisChanged = -1 FOR vIndex = hIndex to previousChanged + 1 LET changedScore = checkDiagonal(vIndex, hIndex) IF changedScore = TRUE THEN LET thisChanged = MAX(thisChanged, doGapDown(vIndex, hIndex)) LET temp = doGapRight(vIndex, hIndex) END IF IF vIndex < previousChanged THEN IF hIndex > 1 THEN LET changedScore = checkScore(vIndex, hIndex - 1, vIndex, hIndex, -gapBeginPenalty) END IF IF changedScore = TRUE THEN IF hIndex > 1 THEN LET temp = doGapRight(vIndex, hIndex -1) LET thisChanged = MAX(thisChanged, doGapDown(vIndex, hIndex)) END IF END IF END IF Next vIndex LET hIndex = hIndex + 1 LET previousChanged = thisChanged LOOP END SUB !******************************************************************************************************************** ! ! This subroutine fills the scoring matrix horizontally ! !******************************************************************************************************************** SUB fillRight LET previousChanged = doGapRight(1,1) LET vIndex = 2 DO WHILE vIndex <= previousChanged + 1 LET thisChanged = -1 FOR hIndex = vIndex to previousChanged + 1 LET changedScore = checkDiagonal(vIndex, hIndex) IF changedScore = TRUE THEN LET thisChanged = MAX(thisChanged, doGapRight(vIndex, hIndex)) LET temp = doGapDown(vIndex, hIndex) END IF IF hIndex < previousChanged THEN IF vIndex > 1 THEN LET changedScore = checkScore(vIndex - 1, hIndex, vIndex, hIndex, -gapBeginPenalty) END IF IF changedScore = TRUE THEN IF vIndex > 1 THEN LET temp = doGapDown(vIndex - 1, hIndex) LET thisChanged = MAX(thisChanged, doGapRight(vIndex, hIndex)) END IF END IF END IF NEXT hIndex LET vIndex = vIndex + 1 LET previousChanged = thisChanged LOOP END SUB !******************************************************************************************************************** ! ! This subroutine collects the extension (from word match) information in terms of positions in the sequence ! and convert it to actual sequences ! !******************************************************************************************************************** SUB extensions LOCAL i, j, i0, j0 LET i = maxI LET j = maxJ LET queryFlank$,targetFlank$ = "" DO WHILE i > 1 LET i0 = lastI(i,j) LET j0 = lastJ(i,J) DO WHILE (i > 1) AND ((i > i0) OR (j > j0)) IF (i = i0) THEN LET targetFlank$ = gap$ & targetFlank$ ELSE LET targetFlank$ = target$[t + (i-1) * direction:t + (i-1) * direction] & targetFlank$ LET i = i-1 END IF IF (j = j0) THEN LET queryFlank$ = gap$ & queryFlank$ ELSE LET queryFlank$ = query$[q + (j-1) * direction:q + (j-1) * direction] & queryFlank$ LET j = j-1 END IF LOOP LOOP END SUB !******************************************************************************************************************** ! ! Given a location in the score matrix, this subroutine figures out cost of a vertical gap (gap in whatever ! sequence is at the top of the matrix ! !******************************************************************************************************************** DEF doGapDown(i,j) LOCAL hIndex, vIndex, newScore, oldScore, penalty, changed LET vIndex = i LET hIndex = j CALL checkScoreDim(hIndex, vIndex) IF score(vIndex, hIndex) > 0 THEN LET newScore = score(vIndex, hIndex) ELSE LET newScore = 0 END IF LET penalty = gapBeginPenalty LET changed = vIndex DO LET newScore = newScore - penalty LET penalty = gapExtendPenalty LET vIndex = vindex + 1 CALL checkScoreDim(hIndex, vIndex) IF score(vIndex, hindex) > 0 THEN LET oldScore = score(vIndex, hIndex) ELSE LET oldScore = 0 END IF IF newScore > oldScore THEN CALL recordScoreChange(vIndex - 1, hIndex, vIndex, hIndex, newScore) LET changed = vIndex ELSE IF newScore - gapExtendPenalty <= oldScore - gapBeginPenalty THEN LET doGapDown = changed EXIT DEF END IF END IF LOOP END DEF !******************************************************************************************************************** ! ! Given a location in the score matrix, this subroutine figures out cost of a horizontal gap (gap in whatever ! sequence is on the side of the matrix ! !******************************************************************************************************************** DEF doGapRight(i,j) LOCAL hIndex, vIndex, newScore, oldScore, penalty, changed LET vIndex = i LET hIndex = j CALL checkScoreDim(hIndex, vIndex) IF score(vIndex, hIndex) > 0 THEN LET newScore = score(vIndex, hIndex) ELSE LET newScore = 0 END IF LET penalty = gapBeginPenalty LET changed = hIndex DO LET newScore = newScore - penalty LET penalty = gapExtendPenalty LET hIndex = hindex + 1 CALL checkScoreDim(hIndex, vIndex) IF score(vIndex, hindex) > 0 THEN LET oldScore = score(vIndex, hIndex) ELSE LET oldScore = 0 END IF IF newScore > oldScore THEN CALL recordScoreChange(vIndex, hIndex - 1, vIndex, hIndex, newScore) LET changed = hIndex ELSE IF newScore - gapExtendPenalty <= oldScore - gapBeginPenalty THEN LET doGapRight = changed EXIT DEF END IF END IF LOOP END DEF DEF checkDiagonal(i,j) LOCAL changeAmount, targetPosition, queryPosition, hIndex, vIndex LET vIndex = i LET hIndex = j LET targetPosition = t + (i-1) * direction LET queryPosition = q + (j-1) * direction IF targetPosition < 1 OR targetPosition > len(target$) THEN LET checkDiagonal = FALSE EXIT DEF END IF IF queryPosition < 0 OR queryPosition > len(query$) THEN LET checkDiagonal = FALSE EXIT DEF END IF IF target$[targetPosition:targetPosition] = query$[queryPosition:queryPosition] THEN LET changeAmount = matchReward ELSE LET changeAmount = - mismatchPenalty END IF LET checkDiagonal = checkScore(i-1, j-1, i, j, changeAmount) END DEF DEF checkScore(fromI, fromJ, toI, toJ, changeAmount) CALL checkScoreDim(toI, toJ) IF score (toI, toJ) > 0 THEN LET oldScore = score(toI, toJ) ELSE LET oldScore = 0 END IF IF score(fromI, fromJ) > 0 THEN LET newScore = score (fromI, fromJ) ELSE LET checkScore = FALSE EXIT DEF END IF LET newScore = newScore + changeAmount IF newScore > oldScore THEN CALL recordScoreChange(fromI, fromJ, toI, toJ, newScore) LET checkScore = TRUE ELSE LET checkscore = FALSE END IF END DEF !******************************************************************************************************************** ! ! This subroutine is used to update the score, lastI and lastJ matrices. It also updates the max scores. It's ! the only routine that updates these values directly ! !******************************************************************************************************************** SUB recordScoreChange(fromI, fromJ, toI, toJ, newScore) LOCAL fromVIndex, fromHIndex, toVIndex, toHindex LET fromVIndex = fromI LET fromHIndex = fromJ LET toVIndex = toI LET toHIndex = toJ CALL checkLastIDim(toHindex, toVindex) CALL checkLastJDim(toHindex, toVindex) CALL checkScoreDim(toHindex, toVindex) LET score(toVIndex, toHIndex) = newScore LET lastI(toVIndex, toHIndex) = fromVIndex LET lastJ(toVindex, toHindex) = fromHIndex IF newScore > maxScore THEN LET maxScore = newScore LET maxI = toVIndex LET maxJ = toHIndex END IF END SUB !******************************************************************************************************************** ! ! This function reverses a given string i.e. turns "ABC" into "CBA" ! !******************************************************************************************************************** DEF reverse$(sentence$) LOCAL i, SentenceLength, result$ LET result$ = "" LET sentenceLength = len(sentence$) IF sentenceLength < 1 Then EXIT DEF ELSE For i = sentenceLength TO 1 STEP -1 LET result$ = result$ & sentence$[i:i] NEXT i LET reverse$ = result$ END IF END DEF !******************************************************************************************************************** ! ! Since True Basic arrays do not expand automatically, the following functions are used to check the dimensions ! of arrays before any attempt to update them are made. Avoids those pesky subscript out of bonds errors ! !******************************************************************************************************************** SUB checkLastIDim(hIndex, vIndex) LOCAL i,j ! increase number of rows, will not impact existing data IF (vIndex > SIZE(lastI, 1)) THEN MAT REDIM lastI(vIndex, SIZE(lastI, 2)) END IF ! number of column changed. default mat redim operation will reorganize data (=bad!) IF (hIndex > SIZE(lastI, 2)) THEN LOCAL tempArray(1,1) MAT REDIM tempArray (SIZE(lastI, 1), hIndex) FOR i = 1 to SIZE(lastI,1) FOR j = 1 TO SIZE(lastI, 2) LET tempArray(i,j) = lastI(i,j) NEXT j NEXT i MAT REDIM lastI(SIZE(lastI, 1), hIndex) FOR i = 1 TO SIZE(tempArray, 1) FOR j = 1 TO SIZE(tempArray, 2) LET lastI(i,j) = tempArray(i,j) NEXT j NEXT i END IF END SUB SUB checkLastJDim(hIndex, vIndex) LOCAL i,j ! increase number of rows, will not impact existing data IF (vIndex > SIZE(lastJ, 1)) THEN MAT REDIM lastJ(vIndex, SIZE(lastJ, 2)) END IF ! number of column changed. default mat redim operation will reorganize data (=bad!) IF (hIndex > SIZE(lastJ, 2)) THEN LOCAL tempArray(1,1) MAT REDIM tempArray (SIZE(lastJ, 1), hIndex) FOR i = 1 to SIZE(lastJ,1) FOR j = 1 TO SIZE(lastJ, 2) LET tempArray(i,j) = lastJ(i,j) NEXT j NEXT i MAT REDIM lastJ(SIZE(lastJ, 1), hIndex) FOR i = 1 TO SIZE(tempArray, 1) FOR j = 1 TO SIZE(tempArray, 2) LET lastJ(i,j) = tempArray(i,j) NEXT j NEXT i END IF END SUB SUB checkScoreDim(hIndex, vIndex) LOCAL i,j ! increase number of rows, will not impact existing data IF (vIndex > SIZE(score, 1)) THEN MAT REDIM score(vIndex, SIZE(score, 2)) END IF ! number of column changed. default mat redim operation will reorganize data (=bad!) IF (hIndex > SIZE(score, 2)) THEN LOCAL tempArray(1,1) MAT REDIM tempArray (SIZE(score, 1), hIndex) FOR i = 1 to SIZE(score,1) FOR j = 1 TO SIZE(score, 2) LET tempArray(i,j) = score(i,j) NEXT j NEXT i MAT REDIM score(SIZE(score, 1), hIndex) FOR i = 1 TO SIZE(tempArray, 1) FOR j = 1 TO SIZE(tempArray, 2) LET score(i,j) = tempArray(i,j) NEXT j NEXT i END IF END SUB !******************************************************************************************************************** ! ! The following routines are used to print out the results on screen ! !******************************************************************************************************************** !******************************************************************************************************************** ! ! This function counts how many bases are in a given segment, not counting the gap symbol ! !******************************************************************************************************************** DEF countBases(segment$) ! don't count gap$ character local position, skip LET position = 0 DO LET position = POS(segment$, gap$, position+1) IF position > 0 THEN LET skip = skip + 1 Else EXIT DO END IF LOOP LET countBases = len(segment$) - skip END DEF !******************************************************************************************************************** ! ! Somewhat formatted output of a segment ! !******************************************************************************************************************** SUB printSegment(name$, segment$, start, length) PRINT name$, start, segment$, start+length-1 END SUB !******************************************************************************************************************** ! ! This routine prints out the little bars that connects the matching bases for aligned sequences in the output ! !******************************************************************************************************************** SUB printAlignment(sString$, tString$) LOCAL position DIM sArray$(1) DIM tArray$(1) DIM bars$(1) LET outputBar$ = "" LET sStringLen = len(sString$) LET tStringLen = len(tString$) MAT REDIM sArray$(sStringLen) FOR position = 1 to sStringLen LET sArray$(position) = sString$[position:position] NEXT position IF tStringLen < sStringLEN THEN MAT REDIM tArray$(sStringLen) ELSE MAT REDIM tArray$(tStringLen) END IF FOR position = 1 TO tStringLen LET tArray$(position) = tString$[position:position] NEXT position MAT REDIM bars$(sStringLen) FOR k = 1 TO sStringLen IF (sArray$(k) = tArray$(k)) AND (sArray$(k) <> gap$) THEN LET outputBar$ = outputBar$ & bar$ ELSE LET outputBar$ = outputBar$ & " " END IF NEXT k PRINT " ", " ", outputBar$ END SUB !******************************************************************************************************************** ! ! This routine prints the hit from oneMatch ! !******************************************************************************************************************** SUB printHit(targetMin, targetHit$, queryMin, queryHit$) LOCAL key$, i !check to see if we've already seen this or not LET Key$ = STR$(targetMin) &":"& targetHit$ &":"& STR$(queryMin) &":"& queryHit$ FOR i = 1 TO SIZE(seen$) IF seen$(i) = key$ THEN EXIT SUB END IF NEXT i LET seen$(SIZE(seen$)) = key$ MAT REDIM seen$(SIZE(seen$) + 1) LET start = 1 LET maxSegLen = 50 DO WHILE start < len(targetHit$) LET segLen = MIN(maxSegLen, len(targetHit$) - start) LET querySeg$ = queryHit$[start:start+segLen] LET targetSeg$ = targetHit$[start:start+segLen] LET queryLen = countBases(querySeg$) LET targetLen = countBases(targetSeg$) CALL printSegment("query", querySeg$, queryMin, queryLen) CALL printAlignment(targetSeg$, querySeg$) CALL printSegment("Target", targetSeg$, targetMin, targetLen) PRINT LET targetMin = targetMin + targetLen LET queryMin = queryMin + queryLen LET start = start + segLen LOOP END SUB END !******************************************************************************************************************** ! ! This routine prints a two dimensional array in a possibly nice format ! The second parameter (width) specifies the number of characters for each number ! Demanding a width allows the columns to line up properly ! !******************************************************************************************************************** SUB Print2DArray(array(,),width) PRINT LET format$ = Repeat$("#",width) FOR i=LBound(array,1) TO UBound(array,1) FOR j=LBound(array,2) TO UBound(array,2) PRINT Using$(format$,array(i,j)); NEXT j PRINT NEXT i PRINT END SUB