/* SeqCalc.h David Wagner */ /* This program attempts to calculate a sequence of amino acids */ /* that may form a protein given information about its main chain. */ /* This is similar to the problem addressed by Mayo in his paper */ /* He uses an energy function, but here we will use statistical */ /* data to derive a Hidden Markov Model for the sequence. */ /* At the start of this experiment, I do not know if this model */ /* is applicable. Thus the primary objective is to determine if */ /* this model is useful for solving this problem. */ #define ALA 0 #define CYS 1 #define ASP 2 #define GLU 3 #define PHE 4 #define GLY 5 #define HIS 6 #define ILE 7 #define LYS 8 #define LEU 9 #define MET 10 #define ASN 11 #define PRO 12 #define GLN 13 #define ARG 14 #define SER 15 #define THR 16 #define VAL 17 #define TRP 18 #define TYR 19 #define NUMRESTYPES 20 #define NUMPHIANGLES 20 #define NUMPSIANGLES 20 #define PR_NAME 0 #define PR_PHI 1 #define PR_PSI 2 #define PR_BETA 3 #define PR_PRED 4 #define CM_REPORT 0 #define CM_TRAIN 1 #define CM_PREDICT 2 #define TRAINING_FILE "trainHMM.txt" #define START_NODE_NUM 20 typedef char boolean; typedef struct atomLoc { double x; double y; double z; } AtomLoc; typedef struct aaInfo { int aaType; char aaCode[4]; int predictedType; AtomLoc N; AtomLoc CA; AtomLoc C; AtomLoc O; AtomLoc CB; boolean Nbool; boolean CAbool; boolean Cbool; boolean Obool; boolean CBbool; double psi; // The angle between the two surrounding N's double phi; // The angle between the two surrounding C's double beta; // An angle I invented to represent whether the // C-beta points toward the center, hopefully // this is a measure of hydrophobicity struct aaInfo* next; struct aaInfo* prev; } AAInfo; typedef struct aaChain { AAInfo* aaChainHead; AAInfo* aaChainTail; struct aaChain* next; } AAChain; typedef struct node { double prob[NUMPSIANGLES][NUMPHIANGLES]; int total[NUMPSIANGLES][NUMPHIANGLES]; } Node; typedef struct edge { int start; int end; double prob; int total; } Edge; typedef struct hmm { Node nodeList[NUMRESTYPES+1]; Edge edgeList[NUMRESTYPES+1][NUMRESTYPES]; } HMM; FILE* openfile(char* filename); int countAtoms(FILE* fname); void parseLines(FILE *fname); void parseLine(char * theLine, int residueNum); void addAtom(char *atomName, int residueNum, char chainID, int resSeqNum, double x, double y, double z); void addChain(AAChain ** currChain); void addResidue(AAInfo ** currAA, AAChain * currChain); void addAtomInfo(AAInfo * currAA, char * atomName, int residueNum, double x, double y, double z); void addPrevAtomAngles(AAInfo * currAA); void trainChains(FILE * fname); void trainChain(FILE * fname, AAChain * theChain); void trainAAInfo(FILE * fname, AAInfo * theInfo); void printChains(); void printChain(AAChain * theChain, int item); void printAAInfo(AAInfo * theInfo, int item); void printAACode(AAInfo * theInfo); double findAngle(AtomLoc * p1, AtomLoc * p2, AtomLoc * p3, AtomLoc * p4); void initHMM(); void trainHMM(FILE * fname); void adjustHMM(); void printHMM(); void runViterbi(AAChain * theChain);