!************************************************************************************************************* ! Protein compare ! ! Threads the amino acid sequence of one protein through the three-dimensional structure of another ! ! Input files: ! ! Structure file: PDB format. Must have only one chain ! ! Alignment file: PIR format (as output by Clustal) ! Leading and lagging blanks are OK ! ! > [header line for sequence with known structure] ! [optional blank line] ! [amino acid sequence, with gaps (-) for alignment] ! * [optional] ! > [header line for sequence to be threaded] ! [optional blank line] ! [amino acid sequence, with gaps (-) for alignment] ! * [optional] ! ! Output file: ! ! New structure file: PDB format ! ! NO CHANGE: Amino acids common between the two sequences go in chain A ! INSERTIONS: Amino acids in threaded sequence but not original go in chain B ! Amino acid called "INS" ! Inserted atom given meaningless number ! DELETIONS: Amino acids in original but not in threaded sequence go in chain C ! Amino acid called "DEL" in first atom of residue (others unchanged) ! REPLACEMENTS: Amino acids changed in threaded sequence go in chain D ! Amino acid called new name in first atom of residue (others unchanged) ! ! Informational lines (e.g. Title, source, etc) copied unchanged ! therefore information may no longer be appropriate ! !************************************************************************************************************* ! ******************* LIBRARIES and DECLARATIONS ******************* LIBRARY "Aminoacid_module.TRU" ! Contains routines to access info about amino acids LIBRARY "Utilities.trc" ! Contains Explode DECLARE FUNCTION GetAlignInfo$ ! Reads alignment file, extracts a sequence from it DECLARE FUNCTION UpdateChainID$ DECLARE FUNCTION updateResidueName$ DECLARE FUNCTION UpdateResidueSeqNum$ DECLARE FUNCTION Alignment_comparison$ ! Creates summary of alignment between two sequences DECLARE FUNCTION RepStr$ DECLARE FUNCTION GetSize DECLARE FUNCTION AA_translation$ ! Takes 3-letter code to 1-letter code (or reverse) ! ************************ FILE DEFINITIONS ************************ LET alignmentFile = 1 LET alignmentFile$ = "seq.pir" LET structureFile = 2 LET structureFile$ = "UDPGDH Streptococcus pyogenes.pdb" LET outputFile = 3 LET outputFile$ = "MesorhizobiumUDPglcDH.pdb" ! *************************** CONSTANTS **************************** LET gap$ = "-" LET true = 1 LET false = 0 ! **************************** VARIABLES *************************** ! knownSeq$ Amino acid sequence of protein with known structure ! threadedSeq$ Amino acid sequence of protein to be threaded through structure ! sequence_comparison$ Analysis of alignment: M:match, I:insertion, D:deletion, R:replacement DIM words$(1) ! Used by Explode to contain words within line ! ************************** MAIN PROGRAM ************************** LET knownSeq$ = GetAlignInfo$(alignmentFile$, 1) LET knownSeqHeader$ = header$ LET threadedSeq$ = GetAlignInfo$(alignmentFile$, 2) LET threadedSeqHeader$ = header$ CALL PrintSequences LET sequence_comparison$ = Alignment_comparison$(knownSeq$, threadedSeq$) CALL CreateNewStructure PRINT "Threading completed" ! ******************* SUBROUTINES and FUNCTIONS ******************** !************************************************************************************************************* ! ! Opens a file containing multiple sequences in FASTA format, return the sequence requested by position ! !************************************************************************************************************* SUB PrintSequences PRINT "Sequence of protein with known structure: "; PRINT knownSeqHeader$ PRINT knownSeq$ PRINT PRINT "Sequence of protein to be threaded: "; PRINT threadedSeqHeader$ PRINT threadedSeq$ PRINT END SUB !************************************************************************************************************* ! GetAlignInfo$ (path to file, number of sequence to extract) ! Opens a file containing multiple sequences in FASTA format, return the sequence requested by position ! !************************************************************************************************************* FUNCTION GetAlignInfo$ (path$, seqNum) LET seq$ = "" OPEN #alignmentFile: name path$ LET currentSeq = 0 DO WHILE MORE #alignmentFile LINE INPUT #alignmentFile: line$ LET firstLetter$ = line$[1:1] SELECT CASE firstLetter$ CASE ">" ! Marks beginning of sequence LET currentSeq = currentSeq + 1 IF currentSeq = seqNum THEN CALL Explode(line$, words$, " >") ! Splits line into words separated by spaces (put in array words$) LET header$ = words$(1) END IF CASE "*" CASE ELSE IF currentSeq = seqNum THEN LET seq$ = seq$ & Trim$(line$) END SELECT LOOP LET GetAlignInfo$ = seq$ CLOSE #alignmentFile END FUNCTION !************************************************************************************************************* ! Alignment_comparison (known sequence, threaded sequence) ! Analyze alignment at each position ! Put I (insertion) where gap in sequence with known structure ! Put D (delection) where gap in sequence to be threaded ! Put M (match) where both sequences match ! Put R (replacement) where sequences differ ! !************************************************************************************************************* FUNCTION Alignment_comparison$(knownSequence$, threadedSequence$) LOCAL comparison$ LET comparison$ = "" LET seqLen = LEN(knownSequence$) ! Both sequences must be same length FOR i = 1 to seqLen IF knownSequence$[i:i] = gap$ THEN LET comparison$ = comparison$ & "I" ELSE IF threadedSequence$[i:i] = gap$ THEN LET comparison$ = comparison$ & "D" ELSE IF knownSequence$[i:i] <> threadedSequence$[i:i] THEN LET comparison$ = comparison$ & "R" ELSE LET comparison$ = comparison$ & "M" END IF NEXT i LET Alignment_comparison$ = comparison$ END FUNCTION !************************************************************************************************************* ! CreateNewStructure ! ! Copy pdb file and modify the ATOM records according to the edit transcript while copying ! Changes only the first atom of a residue ! !************************************************************************************************************* SUB CreateNewStructure LOCAL line$, transcriptPosition, atomPosition, seqPosition, previousResidue$, residue, residueCount, newLine$ LET residueCount = GetSize(threadedSeq$, 1, len(threadedSeq$)) OPEN #structureFile: NAME structureFile$ OPEN #outputFile: NAME outputFile$, CREATE NEWOLD ERASE #outputFile ! LET residue = 0 DO LINE INPUT #structureFile: line$ ! the only lines that matter are ATOM lines. More specifically, the first ATOM line ! for a residue. position [23:26] of an ATOM line is the residue sequence number LET linetype$ = line$[1:4] IF linetype$ = "ATOM" THEN LET residue = Val(line$[23:26]) LET choice_for_residue$ = sequence_comparison$[residue:residue] SELECT CASE choice_for_residue$ CASE "M" ! If the residues match, do nothing (copy line) CASE "R" ! If a residue is replaced, we update residue's name, chain ID in the record LET line$ = updateResidueName$(threadedSeq$[residue:residue], line$) LET line$ = UpdateChainID$("D", line$) CASE "D" ! If a residue is deleted (gap in the sequence it's aligned with) ! then insert a new record with the name of "DEL" on chain C LET newLine$ = line$ LET residueCount = residueCount + 1 LET newLine$ = updateResidueName$("DEL", newLine$) LET newLine$ = UpdateChainID$("C", newLine$) LET newLine$ = UpdateResidueSeqNum$(USING$("####",residueCount), newLine$) PRINT #outputFile: newLine$ CASE "I" ! If a residue is inserted in the sequence it's aligned with (gap in current sequence) LET newLine$ = line$ LET residueCount = residueCount + 1 LET newLine$ = updateResidueName$("INS", newLine$) LET newLine$ = UpdateChainID$("B", newLine$) LET newLine$ = UpdateResidueSeqNum$(USING$("####",residueCount), newLine$) PRINT #outputFile: newLine$ CASE ELSE PRINT "ERROR: *"; sequence_comparison$[residue:residue];"*" END SELECT ELSE ! not ATOM line ! don't care about it if it's not an ATOM record END IF LET previous_linetype$ = linetype$ PRINT #outputFile: line$ LOOP UNTIL END #structureFile CLOSE #structureFile CLOSE #outputFile END SUB !************************************************************************************************************* ! GetSize(sequence, start, stop) ! ! Gives the length of a sequence, excluding gap characters ! ! Input: ! seq$ : sequence to be counted ! start: position in sequence to start counting ! stop : position in sequence to stop counting ! !************************************************************************************************************* FUNCTION GetSize(seq$, start, stop) LOCAL position, skip LET position = start LET skip = 0 DO ! Count gaps in sequence LET position = POS(seq$, gap$, position) IF position > 0 AND position < stop THEN LET skip = skip + 1 LET position = position + 1 ELSE EXIT DO END IF LOOP LET GetSize = (stop - start + 1) - skip END FUNCTION !************************************************************************************************************* ! UpdateChainID$ (chainID$, record$) ! ! Updates the chain ID for the given ATOM record. ! ATOM record is fixed width column, chain ID is the 22nd character ! ! Input: ! chainID$: new chainID to be written to the record (e.g. A, B, C, D) ! record$ : ATOM line in pdb format ! !************************************************************************************************************* FUNCTION UpdateChainID$(chainID$, record$) IF len(chainID$) <> 1 THEN LET UpdateChainID$ = "" ELSE LET UpdateChainID$ = record$[1:21] & chainID$ & record$[23:len(record$)] END IF END FUNCTION !************************************************************************************************************* ! UpdateResidueName$ (residueName$, record$) ! ! Updates the residue name for the given ATOM record. ! ATOM record is fixed width column, residue name takes up positions 18th to 20th ! ! Input: ! residueName$: residue name to be written to the record ! record$ : ATOM line in pdb format ! !************************************************************************************************************* FUNCTION updateResidueName$(residueName$, record$) LET length = len(residueName$) IF length = 1 THEN LET name$ = UCase$(AA_translation$(residueName$)) ELSE LET name$ = residueName$ END IF IF len(name$) <> 3 THEN PRINT "aa not found: "; residueName$ LET updateResidueName$ = "" ELSE LET updateResidueName$ = record$[1:17] & name$ & record$[21:len(record$)] END IF END FUNCTION !************************************************************************************************************* ! UpdateResidueSeqNum$( residueSeqNum$, record$ ) ! update the residue sequence number for the given ATOM record. ATOM record is fixed width column, residue ! sequence number takes up positions 23rd to 26th ! ! Input: ! residueSeqNum$: residue sequence number to be written to the record ! record$ : ATOM record in pdb format ! !************************************************************************************************************* FUNCTION UpdateResidueSeqNum$(residueSeqNum$, record$) LET length = len(residueSeqNum$) LET UpdateResidueSeqNum$ = record$[1:22] & residueSeqNum$ & record$[27:len(record$)] END FUNCTION END