!************************************************************************************************************* ! Parse-Blast: Parses output from Blast implemented by TIGR CMR. ! ! Saves all hits of a query (one sequence) on a database (Subjcts) ! ! Outputs (comma delimited): ! QueryName Name of the query ! QueryLen Length of Query ! QueryFrom Position in query where the hit starts ! QueryTo Position in query where the hit ends ! SubjctName Name of the subject ! SubjctLen Length of hit sequence ! Orientation Orientation of the hit (c if it's plus/minus, d if it's plus/plus) ! SubjectFrom Position in subject where the hit starts ! SubjectTo Position in subject where the hit ends ! EValue1 E value of hit ! % identity Percent identity between query and subject of the hit ! ! If hit not reported, then all values = 0 !************************************************************************************************************* !************************************************************************************************************* ! ! Libraries and Declarations ! !************************************************************************************************************* LIBRARY "utilities.trc" ! Contains Explode DECLARE FUNCTION DelChar$ !************************************************************************************************************* ! ! File Definitions ! !************************************************************************************************************* LET blastFile$ = "alr2168vs7120CMR.txt" LET blastFile = 1 OPEN #BlastFile: NAME BlastFile$, ORG text, CREATE old, ACCESS input LET output = 2 LET outfile$ = "test.txt" OPEN #output: NAME outfile$, ORG text, CREATE NewOld, ACCESS OutIn ERASE #output SET #output: MARGIN Maxnum ! Allows printing lines of any length !************************************************************************************************************* ! ! Initializes global variables ! !************************************************************************************************************* LET true = 1 LET false = 0 LET comma$ = "," DIM field$(1) ! Used by Explode !************************************************************************************************************* ! ! Main program ! !************************************************************************************************************* PRINT "Beginning to read file" CALL Find_next_query DO WHILE query_found = true CALL Evaluate_query CALL Find_next_target DO WHILE target_found = true CALL Evaluate_target CALL Find_next_hit DO WHILE hit_found = true CALL Evaluate_score CALL Find_seq LOOP IF target_found = false AND query_found = false THEN CALL Find_next_target LOOP IF query_found = false THEN CALL Find_next_query LOOP !************************************************************************************************************* ! ! SUBROUTINES and FUNCTIONS ! !************************************************************************************************************* SUB InputBlast LINE INPUT #BlastFile: line$ ! print line$ END SUB !************************************************************************************************************* ! ! Deals with the query section of the blast output file. Get the name and length of the query out ! !************************************************************************************************************* SUB Find_next_query LET query_found = false DO WHILE MORE #BlastFile CALL InputBlast IF line$[1:6] = "Query=" THEN LET query_found = true EXIT DO END IF LOOP END SUB SUB Evaluate_query CALL Explode(line$,field$,"= ") IF Size(field$) < 1 THEN CAUSE ERROR 11, "Bad line in Evaluate_query" ELSE IF Size(field$) < 2 THEN LET query_name$ = "(unnamed)" ELSE LET query_name$ = field$(2) END IF CALL InputBlast CALL Explode(line$,field$," ()") LET query_length$ = field$(1) PRINT "QUERY: "; query_name$ END SUB !************************************************************************************************************* ! ! Deals with the database section of the blast output file. get the name and length of the subject out ! !************************************************************************************************************* SUB Find_next_target LET target_found = false DO WHILE MORE #BlastFile CALL InputBlast IF line$[1:1] = ">" THEN LET target_found = true EXIT DO ELSE IF line$[1:6] = " *****" THEN LET target_found = false LET query_found = false EXIT DO END IF LOOP END SUB SUB Evaluate_target CALL Explode(line$, field$, "> ") IF Size(field$) < 1 THEN CAUSE ERROR 31, "Bad line in Find_next_target" LET target_name$ = field$(1) DO ! Read length (may require 1 or 2 lines) CALL InputBlast CALL Explode(line$,field$," =") IF UCASE$(field$(1)) = "LENGTH" THEN LET target_length = Val(DelChar$(field$(2),",")) ! Remove commas from length EXIT DO END IF LOOP END SUB !************************************************************************************************************* ! ! Deals with matches between query and target ! !************************************************************************************************************* SUB Find_next_hit ! If a target was found, then it's guaranteed that there's at least one hit ! Hits begin with " Score" LET hit_found = false DO WHILE MORE #BlastFile CALL InputBlast IF line$[1:6] = " Score" THEN LET hit_found = true EXIT DO END IF LOOP END SUB SUB Find_seq ! If a hit was found, then it's guaranteed that there's at least one line of sequence ! In finding the end of the sequence, necessarily either the next hit, target, query, ! or end of file will be encountered DO WHILE MORE #BlastFile IF line$[1:6] = "Query:" THEN ! Next line of sequence alignment CALL Evaluate_extent ELSE IF line$[1:8] = " Score =" THEN ! Ran into next hit CALL Close_hit LET hit_found = true EXIT DO ELSE IF line$[1:1] = ">" THEN ! Ran into next target CALL Close_hit LET hit_found = false LET target_found = true EXIT DO ELSE IF line$[1:6] = "Query=" THEN ! Ran into next query CALL Close_hit LET hit_found = false LET target_found = false LET query_found = true EXIT DO ELSE IF line$[1:10] = "Parameters" THEN ! Ran into end of file CALL Close_hit LET hit_found = false LET target_found = false LET query_found = false EXIT DO END IF CALL InputBlast LOOP END SUB !************************************************************************************************************* ! ! deals with the alignment of the blast output file. Get the start and end of both query and subject out, ! as well as the sequence of the hit from the subject. ! !************************************************************************************************************* SUB Evaluate_extent CALL Explode(line$,field$," ") LET BeginningOfLine = Val(field$(2)) LET EndOfLine = Val(field$(4)) IF begin_query = 0 THEN LET begin_query = BeginningOfLine LET end_query = EndOfLine CALL InputBlast ! Skip alignment marks CALL InputBlast ! Get Hit sequence CALL Explode(line$,field$," ") LET BeginningOfLine = Val(field$(2)) LET EndOfLine = Val(field$(4)) IF begin_target = 0 THEN LET begin_target = BeginningOfLine LET end_target = EndOfLine END SUB !************************************************************************************************************* ! ! Deals with the score section of the blast output file. Get the e value, percent identity and ! orientation of the hit out ! !************************************************************************************************************* SUB Evaluate_score CALL Explode(line$,field$," ,=") IF Size(field$) < 8 THEN CAUSE ERROR 21,"Wrong line in Evaluate_score, line 1" ! LET e_val = Val(field$(6)) LET e_val$ = field$(6) CALL InputBlast CALL Explode(line$,field$," ()=%") IF Size(field$) < 9 THEN CAUSE ERROR 21, "Wrong line in Evaluate_score, line 2" LET id = Val(field$(3)) IF field$(9) = "Plus" THEN LET orientation$ = "d" ELSE LET orientation$ = "c" END SUB !************************************************************************************************************* ! ! All information regarding a hit have been read. Output the information and reset variables so we can ! move on ! !************************************************************************************************************* SUB Close_hit IF hit_found = true THEN CALL Output_hit LET hit_found = false LET begin_query, begin_target = 0 LET end_query, end_target = 0 LET hit_seq$ = "" END SUB !************************************************************************************************************* ! ! Prints out all the values we have for the hit in one comma delimited line. ! !************************************************************************************************************* SUB Output_hit PRINT #output: query_name$; comma$; query_length$; comma$; PRINT #output: begin_query; comma$; end_query; comma$; PRINT #output: target_name$; comma$; target_length; comma$; PRINT #output: orientation$; comma$; PRINT #output: begin_target; comma$; end_target; comma$; PRINT #output: e_val$; comma$; id END SUB END