codonW/ 777 0 0 0 10240476240 5261 5codonW/codon_us.c 777 0 0 263133 10237501757 7401 0/**************************************************************************/ /* CodonW codon usage analysis package */ /* Copyright (C) 2005 John F. Peden */ /* This program is free software; you can redistribute */ /* it and/or modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; version 2 of the */ /* License, */ /* */ /* This program is distributed in the hope that it will be useful, but */ /* WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* You should have received a copy of the GNU General Public License along*/ /* with this program; if not, write to the Free Software Foundation, Inc.,*/ /* 675 Mass Ave, Cambridge, MA 02139, USA. */ /* */ /* */ /* The author can be contacted by email (jfp#hanson-codonw@yahoo.com Anti-*/ /* Spam please change the # in my email to an _) */ /* */ /* For the latest version and information see */ /* http://codonw.sourceforge.net */ /**************************************************************************/ /* */ /* ----------------------- codon_us.C ------------------------ */ /* This file contains most of the codon usage analysis subroutines */ /* except for the COA analysis */ /* Internal subroutines and functions */ /* initilize_point assigns genetic code dependent parameters to structs*/ /* initilize_coa decides which cod/AA to include in a COA by default */ /* codon_usage_tot Counts codon and amino acid usage */ /* ident_codon Converts codon into a numerical value in range 1-64 */ /* codon_usage_out Write out Codon Usage to file */ /* codon_error Called after all codons read, checks data was OK */ /* rscu_usage_out Write out RSCU */ /* raau_usage_out Write out normalised amino acid usage */ /* aa_usage_out Write out amino acid usage */ /* how_synon Calculates how synonymous each codon is */ /* how_synon_aa Calculates how synonymous each AA is */ /* clean_up Re-zeros various internal counters and arrays */ /* base_sil_us_out Write out base composition at silent sites */ /* cai_out Write out CAI usage */ /* cbi_out Write out codon bias index */ /* fop_out Write out Frequency of Optimal codons */ /* enc_out Write out Effective Number of codons */ /* gc_out Writes various analyses of base usage */ /* dot(,X) prints a period every X times it is called */ /* get_aa converts a three base codon into a 1 or 3 letter AA */ /* cutab_out Write a nice tabulation of the RSCU+CU+AA */ /* dinuc_count Count the dinucleotide usage */ /* dinuc_out Write out dinucleotide usage */ /* coa_raw_out Write out raw codon usage for use by COA analysis */ /* sorted_by_axis1 Sorts genes according to their axis one position */ /* gen_cusort_fop COA specific, write out cu of genes by axis1 posit. */ /* highlow Used sorted cu to calculate high_low chi sq. contin */ /* hydro_out Write out Protein hydropathicity */ /* aromo_out Write out Protein aromaticity */ /* */ /* */ /* External subroutines to codon_us.c */ /* my_exit Controls exit from CodonW closes any open files */ /* tidy reads the input data */ /* output called from tidy to decide what to do with the data */ /* toutput handles the reformatting and translation of seqs */ /* output_long if sequence is very long then process what we know */ /* and write sequence to disk in fragments */ /* open_file Open files, checks for existing files */ /* fileclose Closes files and returns a NULL pointer or exits */ /* */ /**************************************************************************/ #include #include #include #include #include #include #include "codonW.h" /********************* Initilize Pointers**********************************/ /* Various pointers to structures are assigned here dependent on the */ /* genetic code chosen. */ /* paa points to a struct containing Amino Acid names */ /* pap points to amino acid properties */ /* pcai points to Adaptation values used to calc CAI */ /* pfop points to a struct describing optimal codons */ /* pcbi points to the same structure as pfop */ /* pcu points to data which has the translation of codons */ /* ds is a struct describing how synonymous a codon is */ /* da is a struct describing the size of each AA family */ /* pcoa points to a struct that describes columns to be */ /* included/excluded from any COA analysis */ /**************************************************************************/ int initilize_point(char code, char fop_species, char cai_species) { paa = &amino_acids; pap = &amino_prop; pcai = &cai[cai_species]; pfop = &fop[fop_species]; pcbi = &fop[fop_species]; pcu = &cu[code]; ds = how_synon(); da = how_synon_aa(); pcoa = &coa; printf ("\n"); if (pm->codonW) printf ("Genetic code is currently set to %s %s\n\n",pcu->des,pcu->typ); return 1; } /*******************How Synonymous is this codon *************************/ /* This function discovers at run time how synonymous a codon is by check-*/ /* ing all other codons to see if they encode the same AA */ /* This saves a lot of time when new genetic codes are added */ /**************************************************************************/ int *how_synon(void) { static int dds[65]; int x,i; for (x = 0; x < 65; x++) dds[x] = 0; for (x = 1; x < 65; x++) for (i = 1; i < 65; i++) if (pcu->ca[x] == pcu->ca[i]) dds[x]++; return dds; /* return a structure */ } /*******************How Synonymous is this AA *************************/ /* This function discovers at run time how synonymous an amino acid is by */ /* checking all codons to see if they encode this same AA */ /* This saves a lot of time when new genetic codes are added */ /**************************************************************************/ int *how_synon_aa(void) { static int dda[22]; int x; for (x = 0; x < 22; x++) dda[x] = 0; for (x = 1; x < 65; x++) dda[pcu->ca[x]]++; return dda; /* return a structure */ } /********************* Initialise COA *********************************/ /* Decides which codons or amino acids are to be included in a COA if only*/ /* the default choice is used. For an amino acid COA, only stops are excl */ /* but for a codon usage COA stop codons and non-synonymous codons are */ /* excluded */ /* pcoa points to a struct that describes columns to be */ /* included/excluded from any COA analysis */ /* structure contains AA and Codon information */ /**************************************************************************/ int initilize_coa(char code) { static char initilized; static char oldcode; int i; /* if called a second time return unless the genetic code has changed */ if (initilized && (oldcode == code)) return 1; for (i = 0; i < 22; i++) /* for each amino acid */ if (i == 11 || i == 0) /* stop codons have the value 11 */ pcoa->amino[i] = FALSE; /* see RECODING file for more details */ else pcoa->amino[i] = TRUE; for (i = 0; i < 65; i++) /* for each codon */ if (*(ds + i) == 1 || pcu->ca[i] == 11 || i == 0) pcoa->codons[i] = FALSE; else pcoa->codons[i] = TRUE; initilized = TRUE; /* we have been called ... */ return 1; } /****************** Codon Usage Counting *****************************/ /* Counts the frequency of usage of each codon and amino acid this data */ /* is used throughout CodonW */ /* pcu->ca contains codon to amino acid translations for the current code */ /* and is assigned in initialise point */ /**************************************************************************/ int codon_usage_tot(char *seq, long int how_many) { char codon[4]; int icode; int i; for (i = 0; i < how_many - 2; i += 3) { strncpy(codon, (seq + i), 3); icode = ident_codon(codon); ncod[icode]++; /*increment the codon count */ naa[pcu->ca[icode]]++; /*increment the AA count */ codon_tot++; /*increment the codon total */ } if (how_many % 3) { /*if last codon was partial */ icode = 0; /*set icode to zero and */ ncod[0]++; /*increment untranslated */ } /*codons */ return icode; /*return the last codon */ } /****************** Ident codon *****************************/ /* Converts each codon into a numerical array (codon) and converts this */ /* array into a numerical value in the range 0-64, zero is reserved for */ /* codons that contain at least one unrecognised base */ /* */ /**************************************************************************/ int ident_codon(char *codon) { int icode = 0; int x; for (x = 0; x < 3; x++) { switch (codon[x]) { case 'T': case 't': case 'U': case 'u': codon[x] = (char) 1; continue; case 'C': case 'c': codon[x] = (char) 2; continue; case 'A': case 'a': codon[x] = (char) 3; continue; case 'G': case 'g': codon[x] = (char) 4; continue; case '\0': return 0; default: codon[x] = (char) 0; break; } } if (codon[0] * codon[1] * codon[2] != 0) icode = (codon[0] - 1) * 16 + codon[1] + (codon[2] - 1) * 4; else icode = 0; return icode; } /****************** Codon error *****************************/ /* Does some basic error checking for the input data, it can be called */ /* using different error levels, thus generating different types of */ /* messages. Basically checks for start, stop codons and internal stop */ /* codons. As well as non-translatable and partial codons */ /**************************************************************************/ long int codon_error(int x, int y, char *ttitle, char error_level) { long int ns = 0; /* number of stops */ long int loc_cod_tot = 0; static int error_lines = 0; int i; for (i = 1, ns = 0; i < 65; i++) { loc_cod_tot += ncod[i]; if (pcu->ca[i] == 11) ns += ncod[i]; /*count stop codons */ } switch (error_level) { case 1: /*internal stop codons */ ns = ns - valid_stops; /* a stop was a valid_stop if it was the last codon of a sequence */ if ( ! valid_start && pm->warn ) { dot(0,10); fprintf(pm->my_err, "\nWarning: Sequence %3li \"%-20.20s\" does " "not begin with a recognised start codon\n" ,num_sequence,ttitle); error_lines++; } if (ns && pm->warn ) { dot(0,10); if (pm->totals && pm->warn) fprintf(pm->my_err,"\nWarning: some sequences had internal stop" " codons (found %li such codons)\n", ns); else fprintf(pm->my_err, "\nWarning: Sequence %3li \"%-20.20s\" has " "%li internal stop codon(s)\n", num_sequence, ttitle, ns); num_seq_int_stop++; error_lines++; } break; case 2: dot(0,10); if (ncod[0] == 1 && pcu->ca[x] != 11 && pm->warn){ /* last codon was partial */ fprintf(pm->my_err, "\nWarning: Sequence %3li \"%-20.20s\" last codon was partial\n" ,num_sequence, ttitle); error_lines++; }else { if (ncod[0] && pm->warn){ /* non translatable codons */ if (pm->totals) fprintf(pm->my_err, "\nWarning: some sequences had non translatable" " codons (found %li such codons)\n", ncod[0]); else fprintf(pm->my_err, "\nWarning: sequence %3li \"%-20.20s\" has %li non translatable" " codon(s)\n", num_sequence, ttitle, ncod[0]); error_lines++; } if (pcu->ca[x] != 11 && pm->warn ) { if (!pm->totals){ fprintf(pm->my_err, "\nWarning: Sequence %3li \"%-20.20s\" is not terminated by" " a stop codon\n", num_sequence, ttitle); error_lines++; } } } break; case 3: /* Nc error routines see codon_us */ dot(0,10); /* dot resetting internal counter */ if (x==3) x=4; /* if x=3 there are no 3 or 4 fold AA */ fprintf(pm->my_err, "\nSequence %li \"%-20.20s\" contains ",num_sequence, ttitle); (y) ? fprintf(pm->my_err, "only %i ", (int) y) : fprintf(pm->my_err, "no "); fprintf(pm->my_err, "amino acids with %i synonymous codons\n", x); fprintf(pm->my_err, "\t--Nc was not calculated \n"); error_lines+=2; break; case 4: /* run silent */ break; default: my_exit(99,"Programme error in codon_error\n"); } if ((((error_lines + 2) * 2) > pm->term_length) && pm->verbose && pm->my_err == stderr ) { error_lines = 0; /* count lines of errors */ dot(0,10); pause; } return loc_cod_tot; /* Number of codons counted */ } /****************** Codon Usage Out *****************************/ /* Writes codon usage output to file. Note this subroutine is only called */ /* when machine readable output is selected, otherwise cutab_out is used */ /**************************************************************************/ int codon_usage_out(FILE * fblkout, long int *nncod, int last_aa, int vvalid_stops, char *ttitle) { long int ccodon_tot = 0; int x; char sp=pm->seperator; ccodon_tot = codon_error(last_aa, vvalid_stops, "" , (char) 4); /*dummy*/ /*example of output */ /*0,0,0,0,3,2,2,0,0,0,0,0,0,3,0,0, */ /*0,0,0,4,3,4,1,7,0,0,0,0,3,1,3,1,Codons=100 */ /*0,0,0,0,10,6,3,0,0,0,0,0,1,1,12,0,Universal Genetic code */ /*0,0,0,3,7,5,7,9,0,1,1,1,8,4,5,0,MLSPCOPER.PE1 */ for (x = 1; x < 65; x++) { fprintf(fblkout, "%i%c",nncod[x],sp); switch (x) { case 16: fprintf(fblkout, "\n"); break; case 32: fprintf(fblkout, "Codons=%ld\n",ccodon_tot); break; case 48: fprintf(fblkout, "%.30s\n", pcu->des); break; case 64: fprintf(fblkout, "%.20s\n",ttitle); break; default: break; } } return 1; } /****************** RSCU Usage out *****************************/ /* Writes Relative synonymous codon usage output to file. Note this subrou*/ /* tine is only called if machine readable output is selected */ /* If human readable format was selected then what the user really wanted */ /* was cutab so this is automatically selected in codons.c */ /* RSCU values are genetic codon dependent */ /**************************************************************************/ int rscu_usage_out(FILE * fblkout, long *nncod, long *nnaa) { int x; char sp=pm->seperator; /* ds points to an array[64] of synonym values i.e. how synon its AA is */ for (x = 1; x < 65; x++) { if (nnaa[pcu->ca[x]] != 0) fprintf(fblkout, "%5.3f%c", ( (float) nncod[x] / (float) nnaa[pcu->ca[x]]) * ((float) *(ds + x)), sp ); else fprintf(fblkout, "0.000%c",sp); if (x == 64) fprintf(fblkout, "%-20.20s", title); if (!(x % 16)) fprintf(fblkout, "\n"); } return 1; } /****************** RAAU output *****************************/ /* Writes Relative amino acid usage output to file. Amino Acid usage is */ /* normalised for gene length */ /**************************************************************************/ int raau_usage_out(FILE * fblkout, long *nnaa) { long int aa_tot = 0; static char first_line = TRUE; int i,x; char sp; if (pm->seq_format=='M') /* if machine readable */ sp = pm->seperator; else sp = '\t'; if (first_line) { /* if true write a header*/ if ( pm->seq_format=='M') fprintf(fblkout, "%s", "Gene_name"); else fprintf(fblkout, "%-20.20s", "Gene name"); for (i = 0; i < 22; i++) if ( pm->seq_format=='M') fprintf(fblkout, "%c%s", sp,paa->aa3[i]);/* three letter AA names*/ else fprintf(fblkout, "%c %-6.6s", sp,paa->aa3[i]); fprintf(fblkout, "\n"); first_line = FALSE; } for (i = 1; i < 22; i++) if (i != 11) aa_tot += nnaa[i]; /* total No. of AAs */ if ( pm->seq_format=='M') fprintf(fblkout, "%.30s", title); else fprintf(fblkout, "%-20.20s", title); /* don't waste spaces */ for (x = 0; x < 22; x++) if (x == 11) fprintf(fblkout, "%c0.0000",sp); /* report 0 for stops */ else if (aa_tot) if ( pm->seq_format=='M') fprintf(fblkout, "%c%.4f",sp, (double) nnaa[x] / (double) aa_tot); else fprintf(fblkout, "%c%7.4f",sp, (double) nnaa[x] / (double) aa_tot); else /*What no AminoAcids!!!! */ if ( pm->seq_format=='M') fprintf(fblkout, "%c%c",sp,sp); else fprintf(fblkout, "%c ***** ",sp); fprintf(fblkout, "\n",sp); return 1; } /****************** AA usage output *****************************/ /* Writes amino acid usage output to file. */ /**************************************************************************/ int aa_usage_out(FILE * fblkout, long *nnaa) { static char first_line = TRUE; int i; char sp=pm->seperator; if (first_line) { (pm->seq_format=='M')? fprintf(fblkout, "%s", "Gene_name"): fprintf(fblkout, "%-20.20s ", "Gene name"); for (i = 0; i < 22; i++) (pm->seq_format=='M')? fprintf(fblkout, "%c%s", sp,paa->aa3[i]): /* 3 letter AA code */ fprintf(fblkout, "%-5.5s", paa->aa3[i]); fprintf(fblkout, "\n"); first_line = FALSE; } (pm->seq_format=='M')? fprintf(fblkout, "%.20s", title): fprintf(fblkout, "%-20.20s ", title); for (i = 0; i < 22; i++){ (pm->seq_format=='M')? fprintf(fblkout, "%c%li", sp,nnaa[i]): fprintf(fblkout, "%-5li",nnaa[i]); } fprintf(fblkout, "\n"); return 1; } /****************** Base Silent output *******************************/ /* Calculates and write the base composition at silent sites */ /* normalised as a function of the possible usage at that silent site with*/ /* changing the amino acid composition of the protein. It is inspired by */ /* GC3s but is much more complicated to calculate as not every AA has the */ /* option to use any base at the third position */ /* All synonymous AA can select between a G or C though */ /**************************************************************************/ void base_sil_us_out(FILE * foutput, long *nncod, long *nnaa) { int id,i,x,y,z; long bases_s[4]; /* synonymous GCAT bases */ long cb[4]; /* codons that could have been GCAT */ int done[4]; char sp= (char) (pm->seq_format=='H')? (char) '\t': (char) pm->seperator; for (x = 0; x < 4; x++) { cb[x] = 0; bases_s[x] = 0; } /* blank the arrays */ for (x = 1; x < 5; x++) for (y = 1; y < 5; y++) for (z = 1; z < 5; z++) { /* look at all 64 codons */ id = (x - 1) * 16 + y + (z - 1) * 4; if (*(ds + id) == 1 || pcu->ca[id] == 11) continue; /* if no synon skip to next codon */ bases_s[z - 1] += nncod[id]; /* count No. codon ending in base X */ } for (i = 1; i < 22; i++) { for (x = 0; x < 4; x++) /* don't want to count bases in 6 fold */ done[x] = FALSE; /* sites twice do we so we remember */ if (i == 11 || *(da + i) == 1) continue; /* if stop codon skip, or AA not synony */ for (x = 1; x < 5; x++) /* else add aa to could have ended count */ for (y = 1; y < 5; y++) for (z = 1; z < 5; z++) { id = (x - 1) * 16 + y + (z - 1) * 4; /* assign codon values in range 1-64 */ if (pcu->ca[id] == i && done[z - 1] == FALSE) { /* encode AA i which we know to be synon so add could_be_x ending*/ /* by the Number of that amino acid */ cb[z - 1] += nnaa[i]; done[z - 1] = TRUE; /* don't look for any more or we might */ /* process leu+arg+ser twice */ } } } /* Now the easy bit ... just output the results to file */ for (i = 0; i < 4; i++) { if (cb[i] > 0) fprintf(foutput, "%6.4f%c", (double) bases_s[i]/(double)cb[i], sp); else fprintf(foutput, "0.0000%c",sp); } return; } /****************** Clean up *******************************/ /* Called after each sequence has been completely read from disk */ /* It re-zeros all the main counters, but is not called when concatenating*/ /* sequences together */ /**************************************************************************/ int clean_up(long int *nncod, long int *nnaa) { int x; int i; for (x = 0; x < 65; x++) nncod[x] = 0; for (x = 0; x < 23; x++) nnaa[x] = 0; /* dinucleotide count remembers the */ dinuc_count(" ", 1); /* last_base from the last fragment */ /* this causes the last base to be "" */ for (x = 0; x < 3; x++) for (i = 0; i < 16; i++) din[x][i] = 0; dinuc_count(" ", 1); master_ic = tot = non_std_char = AT_TOT = GC_TOT = AA_TOT = GAP_TOT = IUBC_TOT = 0; long_seq = FALSE; valid_stops = valid_start = codon_tot = tot = fram = 0; return 1; } /*****************Codon Adaptation Index output *************************/ /* Codon Adaptation Index (CAI) (Sharp and Li 1987). CAI is a measurement */ /* of the relative adaptiveness of the codon usage of a gene towards the */ /* codon usage of highly expressed genes. The relative adaptiveness (w) of*/ /* each codon is the ratio of the usage of each codon, to that of the most*/ /* abundant codon for the same amino acid. The relative adaptiveness of */ /* codons for albeit a limited choice of species, can be selected from the*/ /* Menu. The user can also input a personal choice of values. The CAI */ /* index is defined as the geometric mean of these relative adaptiveness */ /* values. Non-synonymous codons and termination codons (genetic code */ /* dependent) are excluded. To aid computation, the CAI is calculated as */ /* using a natural log summation, To prevent a codon having a relative */ /* adaptiveness value of zero, which could result in a CAI of zero; */ /* these codons have fitness of zero (<.0001) are adjusted to 0.01 */ /**************************************************************************/ int cai_out(FILE * foutput, long int *nncod) { long int totaa = 0; double sigma; float ftemp; int x; char sp= (char) (pm->seq_format=='H')? (char) '\t': (char) pm->seperator; static char cai_ttt = FALSE; static char description[61]; static char reference[61]; static CAI_STRUCT user_cai; if (!cai_ttt ) { /* have we been called already */ user_cai.des = description; /* assign an array to a pointer */ user_cai.ref = reference; /* as above */ if ( pm->caifile==NULL && pm->verbose==TRUE && pm->menu==TRUE && (pcai == cai )){ /* this is false */ /* if personal caifile is on commandline or */ /* in non-interactive mode or -silent option */ /* or cai values are not the default values */ printf("\nDo you wish to input a personal choice of CAI" " values (y/n) [n] "); gets(pm->junk); /* This allows a user defined choice of CAI values to be selected */ if ('Y' == (char) toupper( (int) pm->junk[0])) { /* tell the user a little about what we are looking for */ printf("\nInput file must contain 64 CAI values\n" "ranging from 0.00 to 1.00\n" "values must be separated by spaces\n"); /* open the CAI adaptiveness values file */ if (!(pm->caifile = open_file("file with CAI values" ,"cai.coa", "r", 0))) my_exit(6,"cai_out"); } } /* matched if pm->caifile=*/ if (pm->caifile){ rewind (pm->caifile); /* unlikely unless fopfile = caifile */ x = 0; strcpy(user_cai.des,"User supplied CAI adaptation values "); strcpy(user_cai.ref,"No reference"); user_cai.cai_val[x++] = (float) 0.0; while ((fscanf(pm->caifile, "%f ", &ftemp)) != EOF) { /* if any bad CAI values are read EXIT*/ if (ftemp < 0 || ftemp > 1.0) { printf("\nError CAI %f value out of range\nEXITING",ftemp); my_exit(99,"cai_out"); } user_cai.cai_val[x++] = ftemp; /* assign value */ } /* end of while */ if (x != 65) { /* wrong number of codons */ fprintf(pm->my_err, "\nError in CAI file, found %i values" " expected 64 values EXITING\n", x - 1); my_exit(99,"cai_out"); } pcai = &user_cai; /* assigns pointer to user CAI values */ } /* matches if( pm->caifile... */ printf ("Using %s (%s) w values to calculate " "CAI \n",pcai->des,pcai->ref); cai_ttt = TRUE; /*stops this "if" from being entered */ } /* matches if (!cai_ttt ) */ for (x = 1, sigma = 0; x < 65; x++) { if (pcu->ca[x] == 11 || *(ds + x) == 1) continue; if (pcai->cai_val[x] < 0.0001)/* if value is effectively zero */ pcai->cai_val[x] = (float) 0.01; /* make it .01 */ sigma += (double) *(nncod + x) * log((double) pcai->cai_val[x]); totaa += *(nncod + x); } if (totaa) { /* catch floating point overflow error*/ sigma = sigma / (double) totaa; sigma = exp(sigma); } else sigma = 0; fprintf(foutput, "%5.3f%c", sigma,sp); return 1; } /*****************Codon Bias Index output **************************/ /* Codon bias index is a measure of directional codon bias, it measures */ /* the extent to which a gene uses a subset of optimal codons. */ /* CBI = ( Nopt-Nran)/(Nopt-Nran) Where Nopt = number of optimal codons; */ /* Ntot = number of synonymous codons; Nran = expected number of optimal */ /* codons if codons were assigned randomly. CBI is similar to Fop as used */ /* by Ikemura, with Nran used as a scaling factor. In a gene with extreme */ /* codon bias, CBI will equal 1.0, in a gene with random codon usage CBI */ /* will equal 0.0. Note that it is possible for Nopt to be less than Nran.*/ /* This results in a negative value for CBI. */ /* ( Bennetzen and Hall 1982 ) */ /**************************************************************************/ int cbi_out(FILE * foutput, long int *nncod, long int *nnaa ) { long int tot_cod = 0; long int opt = 0; float exp_cod = (float) 0.0; float fcbi; int c,x; char str[2]; char sp= (pm->seq_format=='H')? (char) '\t': (char) pm->seperator; static char description[61]; static char reference[61]; static char first_call_cbi = TRUE; static char has_opt_info[22]; static FOP_STRUCT user_cbi; if (first_call_cbi) { /* have we been called already */ user_cbi.des = description; /* assign a pointer to array */ user_cbi.ref = reference; if ( pm->cbifile == NULL && pm->verbose==TRUE && pm->menu==TRUE && ( pcbi == fop )){ /* this is false */ /* if personal fopfile is on commandline or */ /* in non-interactive mode or -silent option */ /* or fop values are not the default values */ printf("\nDo you wish to input a personal choice of CBI" " values (y/n) [n] "); gets(pm->junk); if ('Y' == (char) toupper( (int) pm->junk[0])) { printf("\nInput file must contain 64 CBI values\n" " 1= rare codon\n 2= common codon\n 3= optimal codon\n"); if (!(pm->cbifile = open_file("file with CBI values" ,"cbi.coa", "r", 0))) my_exit(6,"cai_out"); } /* matches if Y== */ } /* matches if pm->cbifile==NULL */ if ( pm->cbifile ){ rewind (pm->cbifile); /* fopfile can be the same as cbifile */ strcpy(user_cbi.des,"User supplied choice"); strcpy(user_cbi.ref,"No reference"); x = 0; user_cbi.fop_cod[x++] = 0; while ((c = fgetc(pm->cbifile)) != EOF && x <=66) { sprintf (str,"%c",c); if (isdigit(c) && atoi(str) >= 0 && atoi(str) <= 3) { user_cbi.fop_cod[x++] = (char) atoi(str); } /* isdigit */ } /* end of while */ if (x != 65) { /* wrong number of codons */ sprintf(pm->messages, "\nError in CBI file %i digits found, " "expected 64 EXITING\n", x - 1); my_exit(99,pm->messages); } pcbi = (&user_cbi); } /* matches if(pm->cbifile) */ printf ("Using %s (%s) \noptimal codons to calculate " "CBI\n",pcbi->des,pcbi->ref); /* initilise has_opt_info */ for (x = 1; x < 22; x++) has_opt_info[x]=0; for (x = 1; x < 65; x++) { if (pcu->ca[x] == 11 || *(ds + x) == 1) continue; if (pcbi->fop_cod[x] == 3 ) has_opt_info[pcu->ca[x]]++; } first_call_cbi = FALSE; /* this won't be called again */ } /* matches if (first_call_cbi) */ for (x = 1; x < 65; x++) { if (! has_opt_info[pcu->ca[x]]) continue; switch ((int) pcbi->fop_cod[x]) { case 3: opt += nncod[x]; tot_cod += nncod[x]; exp_cod += (float) nnaa[pcu->ca[x]]/ (float) da[pcu->ca[x]]; break; case 2: case 1: tot_cod += *(nncod + x); break; default: sprintf(pm->messages, " Serious error in CBI information found" " an illegal CBI value of %f for codon %i" " permissible values are \n 1 for non-optimal" " codons\n 2 for common codons\n" " 3 for optimal codons\n" " EXITING ", pcbi->fop_cod[x], x); my_exit(99,pm->messages); break; } /* end of switch */ } /* for ( ) */ if( tot_cod - exp_cod) fcbi= (opt - exp_cod) / (tot_cod - exp_cod); else fcbi= (float) 0.0; fprintf(foutput, "%5.3f%c", fcbi,sp); /* CBI QED */ return 1; } /****************** Frequency of OPtimal codons output ********************/ /* Frequency of Optimal codons (Fop) (Ikemura 1981). This index, is ratio */ /* of optimal codons to synonymous codons (genetic code dependent). Optimal*/ /* codons for several species are in-built and can be selected using Menu 3*/ /* By default, the optimal codons of E. coli are assumed. The user may also*/ /* enter a personal choice of optimal codons. If rare synonymous codons */ /* have been identified, there is a choice of calculating the original Fop */ /* index or a modified index. Fop values for the original index are always */ /* between 0 (where no optimal codons are used) and 1 (where only optimal */ /* codons are used). When calculating the modified Fop index, any negative */ /* values are adjusted to zero. */ /***************************************************************************/ int fop_out(FILE * foutput, long int *nncod) { long int nonopt = 0; long int std = 0; long int opt = 0; float ffop; int c,x; char nonopt_codons = FALSE; char str[2]; char sp= (pm->seq_format=='H')? (char) '\t': (char) pm->seperator; static char first_call = TRUE; static char description[61]; static char reference[61]; static char asked_about_fop = FALSE; static char factor_in_rare = FALSE; static char has_opt_info[22]; static FOP_STRUCT user_fop; if (first_call) { /* have I been called previously */ user_fop.des = description; user_fop.ref = reference; if ( pm->fopfile == NULL && pm->verbose==TRUE && pm->menu == TRUE && (pfop == fop )) { /* this is false */ /* if personal fopfile is on commandline or */ /* in non-interactive mode or -silent option */ /* or fop values are not the default values */ printf("\nDo you wish to input a personal choice of Fop" " values (y/n) [n] "); gets(pm->junk); if ('Y' == (char) toupper( (int) pm->junk[0])) { printf("\nInput file must contain 64 Fop values\n" " 1= rare codon\n 2= common codon\n 3= optimal codon\n"); if (!(pm->fopfile = open_file("file with Fop values" ,"fop.coa", "r", 0))) my_exit(6,"fop_out"); } /* if 'Y' == */ } /* if (pm->fopfile == NULL........ ) */ if ( pm->fopfile ) { rewind (pm->fopfile); /* possible for fopfile = cbifile */ strcpy(user_fop.des,"User supplied choice"); strcpy(user_fop.ref,"No reference"); x = 0; user_fop.fop_cod[x++] = 0; while ((c = fgetc(pm->fopfile)) != EOF && x <=66) { sprintf (str,"%c",c); if (isdigit(c) && atoi(str) >= 0 && atoi(str) <= 3) { user_fop.fop_cod[x++] = (char) atoi(str); } /* test isdigit */ } /* end of while */ if (x != 65) { /* wrong number of codons */ sprintf(pm->messages, "\nError in Fop file %i values found, " "expected 64 EXITING\n", x - 1); my_exit(99,pm->messages); } pfop = &user_fop; /* assigns pointer to user fop values*/ } printf ("Using %s (%s)\noptimal codons to calculate " "Fop\n",pfop->des,pfop->ref); /* initilise has_opt_info */ for (x = 1; x < 22; x++) has_opt_info[x]=0; for (x = 1; x < 65; x++) { if (pcu->ca[x] == 11 || *(ds + x) == 1) continue; if (pfop->fop_cod[x] == 3 ) has_opt_info[pcu->ca[x]]++; if (pfop->fop_cod[x] == 1 ){ if (!asked_about_fop && pm->verbose) { printf("\nIn the set of optimal codons you have selected,\n" "non-optimal codons have been identified\nThey can be " "used in the calculation of a modified Fop, " "(Fop=(opt-rare)/total)\n else the original formulae " "will be used (Fop=opt/total)\n\n\t\tDo you wish " "calculate a modified fop (y/n) [n] "); gets(pm->junk); if ( 'Y' == (char) toupper( (int)pm->junk[0])) factor_in_rare = TRUE; asked_about_fop = TRUE; } if ( factor_in_rare == TRUE ) has_opt_info[pcu->ca[x]]++; } } /* matches for (x=1 */ first_call = FALSE; } /* matches if ( !first_call ) */ for (x = 1; x < 65; x++) { if (!has_opt_info[pcu->ca[x]] ) continue; switch ((int) pfop->fop_cod[x]) { case 3: opt += *(nncod + x); break; case 2: std += *(nncod + x); break; case 1: nonopt_codons = TRUE; nonopt += *(nncod + x); break; default: sprintf(pm->messages, " Serious error in fop information found" " an illegal fop value of %f for codon %l" " permissible values are \n 1 for non-optimal" " codons\n 2 for common codons\n" " 3 for optimal codons\n" " EXITING ", pfop->fop_cod[x], x); printf ("opt %l, std %l, nonopt %l\n",opt,std,nonopt); my_exit(99,pm->messages); break; } } /* only ask this once ... */ if (factor_in_rare && (opt + nonopt + std) ) ffop = (float) (opt - nonopt) / (float) (opt + nonopt + std); else if ((opt + nonopt + std)) ffop = (float) opt / (float) (opt + nonopt + std); else ffop=0.0; fprintf(foutput, "%5.3f%c", ffop,sp); return 1; } /*************** Effective Number of Codons output *********************/ /* The effective number of codons (NC) (Wright 1990). This index is a */ /* simple measure of overall codon bias and is analogous to the effective */ /* number of alleles measure used in population genetics. Knowledge of the*/ /* optimal codons or a reference set of highly expressed genes is not */ /* needed when calculating this index. Initially the homozygosity for each*/ /* amino acid is estimated from the squared codon frequencies. */ /**************************************************************************/ float enc_out(FILE * foutput, long int *nncod, long int *nnaa) { int numaa[9]; int fold[9]; int error_t = FALSE; int i,z,x; double totb[9]; double averb = 0, bb = 0, k2 = 0, s2 = 0; float enc_tot = 0.0F; char sp= (pm->seq_format=='H')? (char) '\t': (char) pm->seperator; /* don't assume that 6 is the largest possible amino acid family assume 9*/ for (i = 0; i < 9; i++) { fold[i] = 0; /* initialise arrays to zero */ totb[i] = 0.0; numaa[i] = 0; } for (i = 1; i < 22; i++) { /* for each amino acid */ if (i == 11) continue; /* but not for stop codons */ if (*(nnaa + i) <= 1) /* if this aa occurs once then skip */ bb = 0; else { for (x = 1, s2 = 0; x < 65; x++) { /* Try all codons but we are only looking for those that encode*/ /* amino amid i, saves having to hard wire in any assumptions */ if (pcu->ca[x] != i) continue; /* skip is not i */ if (*(nncod + x) == 0) /* if codons not used then */ k2 = 0.0; /* k2 = 0 */ else k2 = pow(((double) *(nncod + x) / (double) *(nnaa + i)), (double) 2); s2 += k2; /* sum of all k2's for aa i */ } bb = (((double) *(nnaa + i) * s2) - 1.0) / /* homozygosity */ (double) (*(nnaa + i) - 1.0); } if (bb > 0.0000001) { totb[*(da + i)] += bb; /* sum of all bb's for amino acids */ /* which have z alternative codons */ numaa[*(da + i)]++; /* where z = *(da+i) */ } /* numaa is no of aa that were z */ fold[*(da + i)]++; /* fold z=4 can have 9 in univ code */ } /* but some aa may be absent from */ /* gene therefore numaa[z] may be 0 */ enc_tot = (float) fold[1]; for (z = 2, averb = 0, error_t = FALSE; z <= 8; z++) { /* look at all values of z if there */ if (fold[z]) { /* are amino acids that are z fold */ if (numaa[z] && totb[z] > 0) averb = totb[z] / numaa[z]; else if (z==3 && numaa[2] && numaa[4] && fold[z]==1 ) /* special case */ averb = (totb[2] / numaa[2] + totb[4] / numaa[4]) * 0.5; else { /* write error to stderr */ codon_error( z, numaa[z], title, 3 ); error_t = TRUE; /* error catch for strange genes */ break; } enc_tot += (float) fold[z] / (float) averb; /* the calculation */ } } if (error_t) fprintf(foutput, "*****%c",sp); else if (enc_tot <= 61) fprintf(foutput, "%5.2f%c", enc_tot,sp); else fprintf(foutput, "61.00%c",sp); return enc_tot; } /******************* G+C output *******************************/ /* This function is a real work horse, initially it counts base composit */ /* ion in all frames, length of gene, num synonymous codons, number of */ /* non synonymous codons. Then dependent on the value for which used in */ /* switch statement. We return various analyses of this data */ /* if which ==1 then the output is very detailed, base by base etc. */ /* if which ==2 then the output is for GC content only */ /* if which ==3 then the output is for GC3s (GC at synonymous 3rd posit) */ /* if which ==4 then the output is for L_sym */ /* if which ==5 then the output is for L_aa */ /* The output from this subroutine is in a tabular format if human read- */ /* able output is selected, and in columns if machine readable. Also the */ /* number of values reported changes as it is assumed the user has access*/ /* to a spreadsheet type programme if they are requesting tabular output */ /*************************************************************************/ void gc_out(FILE * foutput, FILE * fblkout, int which){ long int id; long int bases[5]; /* base that are synonymous GCAT */ long int base_tot[5]; long int base_1[5]; long int base_2[5]; long int base_3[5]; long int tot_s = 0; long int totalaa = 0; static char header = FALSE; int x,y,z; char sp= (pm->seq_format=='H')? (char) '\t': (char) pm->seperator; typedef double lf; for (x = 0; x < 5; x++) { bases[x] = 0; /* initialise array values to zero */ base_tot[x] = 0; base_1[x] = 0; base_2[x] = 0; base_3[x] = 0; } for (x = 1; x < 5; x++) for (y = 1; y < 5; y++) for (z = 1; z < 5; z++) { /* look at all 64 codons */ id = (x - 1) * 16 + y + (z - 1) * 4; if (pcu->ca[id] == 11) continue; /* skip if a stop codon */ base_tot[x] += ncod[id]; /* we have a codon xyz therefore the */ base_1[x] += ncod[id]; /* frequency of each position for base*/ base_tot[y] += ncod[id]; /* x,y,z are equal to the number of */ base_2[y] += ncod[id]; /* xyz codons .... easy */ base_tot[z] += ncod[id]; /* will be fooled a little if there */ base_3[z] += ncod[id]; /* non translatable codons, but these */ /* are ignored when the avg is calc */ totalaa += ncod[id]; if (*(ds + id) == 1) continue; /* if not synon skip codon */ bases[z] += ncod[id]; /* count no of codons ending in Z */ tot_s += ncod[id]; /* count tot no of silent codons */ } if (!tot_s || !totalaa) { fprintf(pm->my_err, "Warning %.20s appear to be too short\n", title); fprintf(pm->my_err, "No output was written to file \n"); return; } switch ((int) which) { case 1: /* exhaustive output for analysis */ if (pm->seq_format == 'M') { /* machine readable format */ if (!header) { /* print a first line */ fprintf(fblkout, "Gene_description%cLen_aa%cLen_sym%cGC%cGC3s%cGCn3s%cGC1%cGC2" "%cGC3%cT1%cT2%cT3%cC1%cC2%cC3%cA1%cA2%cA3%cG1%cG2%cG3\n" ,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp,sp); header = TRUE; } /* now print the information */ fprintf(fblkout, "%-.20s%c", title,sp); fprintf(fblkout, "%ld%c%ld%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c" "%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f%c" "%5.3f%c%5.3f%c%5.3f%c%5.3f%c%5.3f\n", totalaa,sp, tot_s,sp, (lf) (base_tot[2] + base_tot[4]) / (lf) (totalaa * 3),sp, (lf) (bases[2] + bases[4]) / (lf) tot_s,sp, (lf) (base_tot[2] + base_tot[4] - bases[2] - bases[4]) / (lf) (totalaa * 3 - tot_s),sp, (lf) (base_1[2] + base_1[4]) / (lf) (totalaa),sp, (lf) (base_2[2] + base_2[4]) / (lf) (totalaa),sp, (lf) (base_3[2] + base_3[4]) / (lf) (totalaa),sp, (lf) base_1[1] / (lf) totalaa,sp, (lf) base_2[1] / (lf) totalaa,sp, (lf) base_3[1] / (lf) totalaa,sp, (lf) base_1[2] / (lf) totalaa,sp, (lf) base_2[2] / (lf) totalaa,sp, (lf) base_3[2] / (lf) totalaa,sp, (lf) base_1[3] / (lf) totalaa,sp, (lf) base_2[3] / (lf) totalaa,sp, (lf) base_3[3] / (lf) totalaa,sp, (lf) base_1[4] / (lf) totalaa,sp, (lf) base_2[4] / (lf) totalaa,sp, (lf) base_3[4] / (lf) totalaa); } else { /* must be human formatted output then*/ fprintf(fblkout, /* tabulated output */ "Gene Name: %-69.69s\nLength : %-ld aa" " \tNon_synonymous/synonymous codons (%3ld/%5ld)\n" " GC=%5.3f\tGC3s=%5.3f\tGC_not_GC3s=%5.3f\n" "base\t1\t2\t3\ttotal\t\t1\t2\t3 \ttotal\n" " T\t%5.3f\t%5.3f\t%5.3f\t%5.3f\t" "W\t%5.3f\t%5.3f\t%5.3f\t%5.3f\n" " C\t%5.3f\t%5.3f\t%5.3f\t%5.3f\t" "S\t%5.3f\t%5.3f\t%5.3f\t%5.3f\n" " A\t%5.3f\t%5.3f\t%5.3f\t%5.3f\t" "R\t%5.3f\t%5.3f\t%5.3f\t%5.3f\n" " G\t%5.3f\t%5.3f\t%5.3f\t%5.3f\t" "Y\t%5.3f\t%5.3f\t%5.3f\t%5.3f\n\n", title, totalaa, totalaa - tot_s, tot_s, (lf) (base_tot[2] + base_tot[4]) / (lf) (totalaa * 3), (lf) (bases[2] + bases[4]) / (lf) tot_s, (lf) (base_tot[2] + base_tot[4] - bases[2] - bases[4]) / (lf) (totalaa * 3 - tot_s), (lf) base_1[1] / (lf) totalaa, (lf) base_2[1] / (lf) totalaa, (lf) base_3[1] / (lf) totalaa, (lf) base_tot[1] / (lf) (totalaa * 3), (lf) (base_1[1] + base_1[3]) / (lf) totalaa, (lf) (base_2[1] + base_2[3]) / (lf) totalaa, (lf) (base_3[1] + base_3[3]) / (lf) totalaa, (lf) (base_tot[1] + base_tot[3]) / (lf) (totalaa * 3), (lf) base_1[2] / (lf) totalaa, (lf) base_2[2] / (lf) totalaa, (lf) base_3[2] / (lf) totalaa, (lf) base_tot[2] / (lf) (totalaa * 3), (lf) (base_1[2] + base_1[4]) / (lf) totalaa, (lf) (base_2[2] + base_2[4]) / (lf) totalaa, (lf) (base_3[2] + base_3[4]) / (lf) totalaa, (lf) (base_tot[2] + base_tot[4]) / (lf) (totalaa * 3), (lf) base_1[3] / (lf) totalaa, (lf) base_2[3] / (lf) totalaa, (lf) base_3[3] / (lf) totalaa, (lf) base_tot[3] / (lf) (totalaa * 3), (lf) (base_1[3] + base_1[4]) / (lf) totalaa, (lf) (base_2[3] + base_2[4]) / (lf) totalaa, (lf) (base_3[3] + base_3[4]) / (lf) totalaa, (lf) (base_tot[3] + base_tot[4]) / (lf) (totalaa * 3), (lf) base_1[4] / (lf) totalaa, (lf) base_2[4] / (lf) totalaa, (lf) base_3[4] / (lf) totalaa, (lf) base_tot[4] / (lf) (totalaa * 3), (lf) (base_1[1] + base_1[2]) / (lf) totalaa, (lf) (base_2[1] + base_2[2]) / (lf) totalaa, (lf) (base_3[1] + base_3[2]) / (lf) totalaa, (lf) (base_tot[1] + base_tot[2]) / (lf) (totalaa * 3)); /* What hit me, did anyone see a bus */ } break; case 2: /* a bit more simple ... GC content */ fprintf(foutput, "%5.3f%c", (lf) ((base_tot[2] + base_tot[4]) / (lf) (totalaa * 3)),sp); break; case 3: /* GC3s */ fprintf(foutput, "%5.3f%c", (lf) (bases[2] + bases[4]) / (lf) tot_s,sp); break; case 4: /* Number of synonymous codons */ fprintf(foutput, "%3li%c", tot_s,sp); break; case 5: /* Total length in translatable AA */ fprintf(foutput, "%3li%c", totalaa,sp); break; #ifdef DEBUG default: fprintf(stderr, " Programming error in GC_out which (%i) is out of " "valid range\n" ,(int) which); my_exit(99, "gc out"); break; #endif } return; } /******************** DOT ******************************************/ /* Indicates the progress of a search */ /**************************************************************************/ void dot(int y, long int period) { static long int xx; static char dott=0; if (!y) dott = 0; /* re-zero the width counter */ if (++xx % period == 0){ /* every period calls print a . */ fprintf(stderr,"."); dott++; } if ( dott == 50) { /* every 50 dots wrap the line */ fprintf(stderr,"\n"); dott=0; } return; } /********************** get_aa *****************************************/ /* get_aa converts a numeric codon value (range 0-64 ) into a amino acid */ /* and returns that amino acid number */ /* pcu->ca converts the codon number into amino acid number */ /* paa->aa1 converts amino acid code into letters */ /***************************************************************************/ char *get_aa(int which, char *codon) { char *amino=NULL; if (strlen(codon) == 3) { if (which == 1) amino = paa->aa1[pcu->ca[ident_codon(codon)]]; else amino = paa->aa3[pcu->ca[ident_codon(codon)]]; } else { amino = amino; amino = paa->aa1[0]; } return amino; } /********************** cutab_out ***********************************/ /* Generates a formatted table of codon, RSCU and amino acid usage */ /* ds points to an array[64] of synonymous values */ /* it reveals how many synonyms there are for each aa */ /**************************************************************************/ int cutab_out(FILE * fblkout, long *nncod, long *nnaa) { int last_row[4]; int x; char sp; if (pm->seq_format=='M') sp = pm->seperator; else sp = '\t'; for (x = 0; x < 4; x++) last_row[x] = 0; codon_tot = codon_error(1, 1, "", (char) 4); /* dummy*/ for (x = 1; x < 65; x++) { if (last_row[x % 4] != pcu->ca[x]){ (pm->seq_format=='M')? fprintf(fblkout, "%s%c%s%c", paa->aa3[pcu->ca[x]], sp, paa->cod[x], sp): fprintf(fblkout, "%s %s" , paa->aa3[pcu->ca[x]], paa->cod[x]); } else{ (pm->seq_format=='M')? fprintf(fblkout, "%c%s%c", sp, paa->cod[x], sp): fprintf(fblkout, " %s", paa->cod[x]); } /* Sample of output *******************************************************/ /*Phe UUU 0 0.00 Ser UCU 1 0.24 Tyr UAU 1 0.11 Cys UGU 1 0.67 */ /* UUC 22 2.00 UCC 10 2.40 UAC 17 1.89 UGC 2 1.33 */ /*Leu UUA 0 0.00 UCA 1 0.24 TER UAA 0 0.00 TER UGA 1 3.00 */ /* UUG 1 0.12 UCG 6 1.44 UAG 0 0.00 Trp UGG 4 1.00 */ /**************************************************************************/ (pm->seq_format=='M')? fprintf(fblkout, "%i%c%.2f%c", (int) nncod[x], sp, (nncod[x]) ? ((float) nncod[x] / (float) nnaa[pcu->ca[x]]) * (float) (*(ds + x)):0,sp): /* end of fprintf */ fprintf(fblkout, "%5i%5.2f ", (int) nncod[x], (nncod[x]) ? ((float) nncod[x] / (float) nnaa[pcu->ca[x]]) * (float) (*(ds + x)):0); /* end of fprintf */ last_row[x % 4] = pcu->ca[x]; if (!(x % 4)) fprintf(fblkout, "\n"); if (!(x % 16)) fprintf(fblkout, "\n"); } fprintf(fblkout, "%li codons in %16.16s (used %22.22s)\n\n", (long int) codon_tot, title, pcu->des); return 1; } /******************** Dinuc_count *************************************/ /* Count the frequency of all 16 dinucleotides in all three possible */ /* reading frames. This one of the few functions that does not use the */ /* codon and amino acid usage arrays ncod and naa to measure the parameter*/ /* rather they use the raw sequence data */ /**************************************************************************/ int dinuc_count(char *seq, long int ttot) { static char a = 0; int i; for (i = 0; i < ttot; i++) { last_base = a; switch (seq[i]) { case 't': case 'T': case 'u': case 'U': a = 1; break; case 'c': case 'C': a = 2; break; case 'a': case 'A': a = 3; break; case 'g': case 'G': a = 4; break; default: a = 0; break; } if (!a || !last_base) continue; /* true if either of the base is not */ /* a standard UTCG, or the current bas*/ /* is the start of the sequence */ din[fram][((last_base - 1) * 4 + a) - 1]++; if (++fram == 3) fram = 0; /* resets the frame to zero */ } return 1; } /***************** Dinuc_out ************************************/ /* Outputs the frequency of dinucleotides, either in fout rows per seq */ /* if the output is meant to be in a human readable form, each row repre- */ /* senting a reading frame. The fourth row is the total of the all the */ /* reading frames. Machine readable format writes all the data into a */ /* single row */ /**************************************************************************/ int dinuc_out(FILE * fblkout, char *ttitle) { static char called = FALSE; char bases[5] = {'T', 'C', 'A', 'G'}; char sp = pm->seperator; long dinuc_tot[4]; int i,x,y; for ( x=0 ; x<4 ; x ++) dinuc_tot[x]=0; for ( x=0 ; x<3 ; x++ ) for ( i=0 ; i<16 ; i++ ){ dinuc_tot[x]+=din[x][i]; /* count dinuc usage in each frame */ dinuc_tot[3]+=din[x][i]; /* and total dinuc usage, */ } if (pm->seq_format=='H' ) sp = ' '; if (!called) { /* write out the first row as a header*/ called = TRUE; if (pm->seq_format=='H' ) { fprintf(fblkout,"%-13.13s%cframe%c","title", sp,sp); for (x = 0; x < 4; x++) for (i = 0; i < 4; i++) fprintf(fblkout,"%c%c%4.4c",bases[x],bases[i],sp); }else{ fprintf(fblkout, "%s","title"); for (y = 0; y < 4; y ++){ fprintf(fblkout, "%c%s",sp,"frame"); for (x = 0; x < 4; x++) for (i = 0; i < 4; i++) fprintf(fblkout,"%c%c%c",sp, bases[x],bases[i]); } } fprintf(fblkout, "\n"); } /* matches if (!called) */ /*Sample output truncated **********************************************/ /*title frame TT TC TA TG CT CC CA CG AT */ /*MLSPCOPER.PE1__ 1:2 0.024 0.041 0.016 0.008 0.049 0.041 0.033 0.098 ... */ /*MLSPCOPER.PE1__ 2:3 0.000 0.195 0.000 0.098 0.000 0.138 0.008 0.073 ... */ /*MLSPCOPER.PE1__ 3:1 0.008 0.016 0.000 0.033 0.033 0.107 0.172 0.262 ... */ /*MLSPCOPER.PE1__ all 0.011 0.084 0.005 0.046 0.027 0.095 0.071 0.144 ... */ /*MLSPCOPER.PE2__ 1:2 0.026 0.026 0.009 0.009 0.053 0.035 0.053 0.061 ... */ /**************************************************************************/ for (x = 0; x < 4; x++) { if ( pm->seq_format == 'H' || x == 0 ) fprintf(fblkout, (pm->seq_format=='H') ? "%-15.15s%c":"%-.15s%c", ttitle, sp); switch (x) { case 0: fprintf(fblkout, "1:2%c", sp); break; case 1: fprintf(fblkout, "2:3%c", sp); break; case 2: fprintf(fblkout, "3:1%c", sp); break; case 3: fprintf(fblkout, "all%c", sp); break; } if ( x == 3 ){ for (i = 0; i < 16; i++) if ( dinuc_tot[x] ) fprintf(fblkout,"%5.3f%c", (float)(din[0][i]+din[1][i]+din[2][i])/ (float)dinuc_tot[x], sp); else fprintf(fblkout,"%5.3f%c",0.00, sp); } else{ for (i = 0; i < 16; i++) if ( dinuc_tot[x] ) fprintf(fblkout, "%5.3f%c", (float) din[x][i]/(float)dinuc_tot[x], sp); else fprintf(fblkout,"%5.3f%c", 0.00, sp); } if ( pm->seq_format == 'H' || x == 3) fprintf(fblkout, "\n"); } return 1; } /************* Coa_raw_out *************************************/ /* Write out codon usage in a format compatible with the format required */ /* by text2bin, i.e. part of the COA analysis suite of subroutines */ /* rather than storing this data in memory, we first write raw codon usage*/ /* to disk, and then read it in as necessary, the file handle for this */ /* data is passed via the fcoaout pointer. By default it writes to the */ /* files coa_raw and coa1_raw */ /**************************************************************************/ char coa_raw_out(FILE * fcoaout, long *nncod, long *nnaa, char *ttitle) { static int count = 0; int i; for (i = 0; i < (int) strlen(ttitle); i++) /* don't take any chances */ if (isspace( (int) *(ttitle + i))) *(ttitle + i) = '_'; strncpy(pm->junk, ttitle, 20); /* sequence name */ fprintf(fcoaout, "%i_%s ", ++count, pm->junk); switch (pm->coa) { case 'c': case 'r': /* if rscu or codon usage */ for (i = 1; i < 65; i++) fprintf(fcoaout, "%i\t", (int) nncod[i]); fprintf(fcoaout, "\n"); break; case 'a': /* if amino acid usage */ for (i = 1; i < 22; i++) fprintf(fcoaout, "%i\t", (int) nnaa[i]); fprintf(fcoaout, "\n"); break; #ifdef DEBUG /* Debugging code */ default: fprintf(pm->my_err, " Error in coa_out_raw\n"); #endif } return 1; } /********** sorted_by_axis1 *******************************************/ /* COA specific routine, after the position of the genes on the first axis*/ /* has been computed the genes are sorted according to there ordination */ /* this allows us to identify gene positioned at either end of the first */ /* trend. Then the codon usage of these genes is used to determine the CU */ /* of these two groups. This information is used to identify optimal codon*/ /* calculated putative CAI adaptive values and for the Chi squared con- */ /* tingency test, used to identify the optimal and non-optimal codons */ /* The position of each gene on axis 1 is passed via the ax1 pointer */ /* The integer rank of each sequence is stored in sortax1 */ /* The number of genes is passed by the integer value lig */ /**************************************************************************/ void sorted_by_axis1(double *ax1, int *sortax1, int lig) { double min; int nmin, *tagged; int i,j; /* allocated an array such that we can record which genes have been */ /* processed already, and are in sortax1 */ if ((tagged = (int *) calloc(lig + 1, sizeof(int))) == NULL) my_exit(3, "sorted by axis 1"); /* blank the array, shouldn't have to do this for ANSI C compilers */ for (i = 1; i <= lig; i++) tagged[i] = FALSE; /* for each gene */ for (j = 1; j <= lig; j++) { i = 0; while (tagged[++i]); /* find the first gene not in sortax1 */ min = ax1[i]; /* assign it value to min */ nmin = i; /* assign it ordination to nmin */ for (i = 1; i <= lig; i++) { /* for each gene */ if (tagged[i]) continue; /* gene is already in sortax1 .. next */ if (ax1[i] < min) { /* find the min value among the rest */ min = ax1[i]; /* assign it value to min */ nmin = i; /* assign it ordination to nmin */ } } sortax1[j] = nmin; /* gene with lowest ax1 position is */ tagged[nmin] = TRUE; /* assigned to sorax1 and tagged */ } free(tagged); } /*********** gen_cusort_fop ******************************/ /* COA specific routine, takes the sorted array of axis 1 positions from */ /* sort_by_axis1 and passed via the sortax1 pointer. The array contains */ /* the genes in order of occurrence in the original input file, but the */ /* ranked order of each gene is recorded as the array value */ /* This allows us to identify genes position at either end of the main */ /* trend. Then the codon usage of these genes is used to write out a file */ /* with the genes in a axis1 position order */ /* the codon usage of the two groups at either end of the principle axis */ /* are also counted. This information is then passed to highlow() */ /* The position of each gene on axis 1 is passed via the ax1 pointer */ /* The integer rank of each sequence is stored in sortax1 */ /* The number of genes is passed by the interger value lig */ /**************************************************************************/ void gen_cusort_fop(int *sortax1, int lig, FILE * fnam, FILE *ssummary) { int stops; long int *low, *high; int min, max, i ; float v2; FILE *fcusort = NULL; int j; /* first open the original raw codon usage file */ if ((fcusort = open_file("", "cusort.coa", "w", FALSE)) == NULL) my_exit(1, "gen_cusort_fop"); /* calloc enough memory for the codon usage of the low group of genes */ if ((low = (long int *) calloc(65, sizeof(long int))) == NULL) my_exit(3, "low gen_cusort_fop"); /* calloc enought memory for the codon usage of the high group of genes*/ if ((high = (long int *) calloc(65, sizeof(long int))) == NULL) my_exit(3, "high gen_cusort_fop"); /*pcoa->fop_gene is set in the advanced correspondence menu and is used*/ /*to set the No of genes at either end of the principle axis that are */ /*to be used to create the low and high codon bias subsets of genes */ if (pcoa->fop_gene < 0) { /* the number represent a percentage */ min = (int) ((float) lig * ((float) pcoa->fop_gene * -0.01)); max = lig - (int) ((float) lig * ((float) pcoa->fop_gene * -0.01)); } else { /* the value is an absolute number */ min = pcoa->fop_gene; max = lig - pcoa->fop_gene; } if (min <= 0) { /* error catch in case % is too low */ min = 1; /* or fop_gene is set too high */ fprintf(pm->my_err, "Problems with the number genes used for" " fop adjusting to 1 gene\n"); } if (max <= 0) { /* ditto */ max = 1; fprintf(pm->my_err, "Problems with the number genes used for" " fop adjusting to one gene\n"); } for (j = 1; j < 65; j++) { /* initialise the blank array */ low[j] = 0; high[j] = 0; } /* write explanation about what we are doing to summary.coa */ fprintf(ssummary, "\ncusort.coa (not shown here) contains CU of " "genes sorted by their\n" "ordination on the principle axis or factor\n" "Genes used to calculate fop were 1 to %i and %i to %i\n" "these gene numbers REFER ONLY to the file cusort.coa\n" ,min, max + 1, pcoa->rows); for (i = 1; i <= lig; i++) { /* foreach gene */ rewind(fnam); /* go to start of codon_raw */ clean_up(ncod, naa); /* blank the codon usage array */ j = 1; while (j++ != sortax1[i]) /* find the rank of gene i */ fgets(pm->junk, BUFSIZ,fnam);/* by scanning for lines of CU in */ fscanf(fnam, "%s", pm->junk); /* now we know the name of seq i */ for (j = 1; j < 64; j++) { /* now read in the cu of each codon */ fscanf(fnam, "%f", &v2); /* assign it initially to v2 */ ncod[j] = (long int) v2; /* then place this value in ncod */ if (min >= i) /* remember the codon usage of the */ low[j] += (long int) v2; /* two groups of genes at either end */ if (max < i) /* of the axis, containing min and */ high[j] += (long int) v2; /* max genes */ } fscanf(fnam, "%f\n", &v2); /* now read the last codon in */ ncod[64] = (long int) v2; if (min >= i) low[64] += (long int) v2; if (max < i) high[64] += (long int) v2; /* as above */ /* we want to use codon_us_out to write out the sorted list of CU */ /* to cusort.coa. But if we have any internal stops etc, it will */ /* generate error messages, but we have already seen this messages */ /* on the first pass, so we fool it by saying all the stops are */ /* valid stops and not to complain again */ for (j = 1, stops = 0; j < 65; j++) if (pcu->ca[j] == 11) stops += (int) ncod[j]; dot( 1 , 10 ); codon_usage_out(fcusort, ncod, 11, stops, pm->junk); } fileclose(&fcusort); highlow(low, high, ssummary); /* now we call highlow */ /* to use the sorted cu output */ free(low); /* release the memory to the OS */ free(high); } /************ highlow ********************************************/ /* The codon usage of the two groups on either end of the axis is assigned*/ /* to low and high ... perhaps these would be better called left and right*/ /* as when they are passed to this function it is not know which group is */ /* lowly or highly biased. This is decided within highlow, by calculating */ /* the enc (a measure of bias) for each group and assigning the group with*/ /* the lowest enc as the higher biased genes. This works if the trend */ /* represented by axis1 is truly selection for optimal translation */ /* IT'S THE USERS RESPONSIBILITY TO ASSERTAIN IF THIS IS VALID */ /* This information is used to identify optimal codons, as well as */ /* calculate putative CAI adaptive values and for the Chi squared con- */ /* tingency test, used to identify the optimal and non-optimal codons */ /**************************************************************************/ void highlow(long int *low, long int *high, FILE * ssummary) { int *last_row, icode, outer,i,j,x ; long int *aa_low, *aa_high, *left, *right, *left_aa, *right_aa; long int *highest_x; long int right_tot = 0, left_tot = 0; float enc_low, enc_high; float a, b, c, d, e, f, g, h, total, hr, br, *x2; float w; char *flag, sp; FILE *fcai=NULL,*fhilo = NULL, *ffop = NULL; FILE *fcbi=NULL; /*calloc to the pointers the required storage */ if ((fhilo = open_file("", "hilo.coa", "w", FALSE)) == NULL) my_exit(1, "hilo.coa"); if ((ffop = open_file("", "fop.coa", "w", FALSE)) == NULL) my_exit(1, "fop.coa"); if ((aa_low = (long int *) calloc(22, sizeof(long int))) == NULL) my_exit(3, "aa_low"); if ((aa_high = (long int *) calloc(22, sizeof(long int))) == NULL) my_exit(3, "aa_high"); if ((highest_x = (long int *) calloc(22, sizeof(long int))) == NULL) my_exit(3, "last_row"); if ((x2 = (float *) calloc(65, sizeof(float))) == NULL) my_exit(3, "x2"); if ((flag = (char *) calloc(65, sizeof(char))) == NULL) my_exit(3, "flag"); if ((last_row = (int *) calloc(65, sizeof(int))) == NULL) my_exit(3, "last_row"); if (pm->seq_format=='M') sp = pm->seperator; else sp = '\t'; /* initialize the various arrays */ for (x = 0; x < 4; x++) last_row[x] = 0; for (x = 0; x < 22; x++){ highest_x[x]=0; aa_low [x]=0; aa_high [x]=0; } for (x = 0; x <65 ; x++) { x2 [x]= (float) 0.0; flag [x]=0; last_row[x]=0; } /*count the amino acid usage for the two datasets, initially we only */ /*have the codon usage of the two groups */ for (i = 1; i < 65; i++) { aa_low[pcu->ca[i]] += low[i]; aa_high[pcu->ca[i]] += high[i]; flag[i] = ' '; /*flag is used to identify opt codons */ } enc_low = enc_out(fhilo, low, aa_low); /*calc enc for each of */ enc_high = enc_out(fhilo, high, aa_high); /*datasets */ fprintf(fhilo, "\n"); fprintf(ssummary, "\nenc_left %f enc_right %f\n", enc_low, enc_high); for (i = 1; i < 65; i++) { if (*(ds + i) == 1 || pcu->ca[i] == 11) /*skip stop and nonsynon*/ continue; if (enc_low < enc_high) { /*decide which is more */ left = low; /*biased */ right = high; /*left and right refer */ left_aa = aa_low; /*the columns of outputed*/ right_aa = aa_high; /*hilow table */ a = (float) low[i]; b = (float) high[i]; g = (float) aa_low[pcu->ca[i]]; h = (float) aa_high[pcu->ca[i]]; } else { left = high; right = low; left_aa = aa_high; right_aa = aa_low; a = (float) high[i]; b = (float) low[i]; g = (float) aa_high[pcu->ca[i]]; h = (float) aa_low[pcu->ca[i]]; } /* calculate the chi squared contingency value */ c = g - a; d = h - b; e = a + b; f = c + d; total = a + b + c + d; if (e * f * h * g) x2[i] = ((a * d - c * b) * (a * d - c * b)) * total / (e * f * g * h); else x2[i] = (float) -99.0; /*if 0 assign nonsense value*/ if (g * h) { hr = a / g; br = b / h; if (hr > br && x2[i] > 6.635) /* if significant at p<.99 */ flag[i] = '*'; else if (hr > br && x2[i] > 3.841) /* if significant at p<0.05 */ flag[i] = '@'; } } fprintf(ssummary, "Chi squared contingency test of genes from both\n" "extremes of axis 1\n"); /* this created the hi-low codon usage table */ /* Sample output truncated (***********************************************/ /*Asp GAU 0.10 ( 10) 1.68 ( 53) Gly GGU 0.21 ( 12) 0.85 ( 11) */ /* GAC* 1.90 (184) 0.32 ( 10) GGC* 3.13 (176) 2.00 ( 26) */ /*Glu GAA 0.00 ( 0) 1.34 ( 55) GGA 0.05 ( 3) 0.69 ( 9) */ /* GAG* 2.00 (255) 0.66 ( 27) GGG 0.60 ( 34) 0.46 ( 6) */ /* */ /* */ /* Number of codons in high bias dataset 2825 */ /* Number of codons in low bias dataset 1194 */ /*Note: high bias was assigned to the dataset with the lower average Nc */ /*NO Chi could be calculated for UGU */ /*Codon UUC (Phe) chi value was 70.175 */ /*Codon UCC (Ser) chi value was 48.030 */ /*Codon UAC (Tyr) chi value was 86.069 */ /**************************************************************************/ for (outer = 1; outer <= 3; outer += 2) { for (x = 1; x < 5; x++) { for (j = 1; j < 5; j++) { icode = ((x - 1) * 16) + ((j - 1) * 4) + outer; for (i = icode; i <= icode + 1; i++) { /*loop twice */ /* if the previous entry in this column codes for the same AA */ if (last_row[i % 2] != pcu->ca[i]) { fprintf(fhilo, "%s%c%s%c%c", paa->aa3[pcu->ca[i]], sp, paa->cod[i], flag[i], sp); fprintf(ssummary, "%s%c%s%c%c", paa->aa3[pcu->ca[i]], sp, paa->cod[i], flag[i], sp); } else { fprintf(fhilo, "%c%s%c%c", sp, paa->cod[i], flag[i], sp); fprintf(ssummary, " %c%s%c%c",sp,paa->cod[i],flag[i],sp); } /* write out Codon usage, RSCU and significance for both data */ fprintf(fhilo, "%4.2f (%3i) %4.2f (%3i)%c", (left[i]) ? ((float) left[i] / (float) left_aa[pcu->ca[i]]) * (float) (*(ds + i)) : 0.0, (int) left[i], (right[i]) ? ((float) right[i] / (float) right_aa[pcu->ca[i]]) * (float) (*(ds + i)) : 0.0, (int) right[i],sp); /* end of fprintf */ fprintf(ssummary, "%4.2f (%3i) %4.2f (%3i)%c", (left[i]) ? ((float) left[i] / (float) left_aa[pcu->ca[i]]) * (float) (*(ds + i)) : 0.0, (int) left[i], (right[i]) ? ((float) right[i] / (float) right_aa[pcu->ca[i]]) * (float) (*(ds + i)) : 0.0, (int) right[i],sp); /* end of fprintf */ last_row[i % 2] = pcu->ca[i]; /* remember the last row */ } fprintf(fhilo, "\n"); fprintf(ssummary, "\n"); } fprintf(ssummary, "\n"); fprintf(fhilo, "\n"); } fprintf(ssummary, "\n"); fprintf(fhilo, "\n"); } for (i = 1; i < 65; i++) { /* count both datasets */ right_tot += right[i]; left_tot += left[i]; } fprintf(fhilo, "\tNumber of codons in high bias dataset %li\n", left_tot); fprintf(fhilo, "\tNumber of codons in low bias dataset %li\n", right_tot); fprintf(fhilo, "Note: high bias was assigned to the dataset with the lower" " average Nc\n"); fprintf(ssummary, "\tNumber of codons in high bias dataset %li\n", left_tot); fprintf(ssummary, "\tNumber of codons in low bias dataset %li\n", right_tot); fprintf(ssummary, "Note high bias was assigned to the genes with the lower" " overall Nc\n"); /* now printout the Chi Squared values for each significant comparison */ for (i = 1; i < 65; i++) { if (flag[i] == '*' || flag[i] == '@') { fprintf(fhilo, "Codon %s (%s) chi value was %.3f\n", paa->cod[i], paa->aa3[pcu->ca[i]], x2[i]); fprintf(ssummary, "Codon %s (%s) chi value was %.3f\n", paa->cod[i], paa->aa3[pcu->ca[i]], x2[i]); } if (x2[i] == -99) /* there were no codons in one of the groups*/ fprintf(fhilo, "NO Chi could be calculated for %s\n", paa->cod[i]); } fprintf(fhilo, "\n"); fprintf(ssummary, "\n"); /* now write out the optimal codons as PUTATIVELY identified by codonW */ fprintf(ssummary, "These are the PUTATIVE optimal codons\n" "This is the format required for Menu 4 option 2 (Fop) " "and option 3 (CBI)\n" "This data is also duplicated in the files \"fop.coa\" " "and \"cbi.coa\"\n" "The format of these files is that required for input " "as a personal choice\n" "of optimal codons for these indexes\n"); for (i = 1; i < 65; i++) { if( left[i] > highest_x[pcu->ca[i]]) /* used for calculating CAI */ highest_x[pcu->ca[i]]=left[i]; if (*(ds + i) == 1 || pcu->ca[i] == 11) { fprintf(ffop, "2"); fprintf(ssummary, "2"); } else if (flag[i] == '*') { fprintf(ffop, "3"); fprintf(ssummary, "3"); } else if (((left[i]) ? ((float) left[i] / (float) left_aa[pcu->ca[i]]) * (float) (*(ds + i)) : 0.0) < 0.1) { /* if RSCU <0.1 its rare */ fprintf(ffop, "1"); fprintf(ssummary, "1"); } else { fprintf(ffop, "2"); fprintf(ssummary, "2"); } if (!(i % 16)) { /* handle line wrapping */ fprintf(ffop, "\n"); fprintf(ssummary, "\n"); } else { fprintf(ffop, ","); fprintf(ssummary, ","); } } fileclose(&ffop); /* close the Fop file */ if ((fcbi = open_file("", "cbi.coa", "w", FALSE)) == NULL) my_exit(1, "cbi.coa"); /* open cbi.coa */ for (i = 1; i < 65; i++) { /* write values 2 cbi.coa*/ if (flag[i] == '*') /* Only report optimal codons */ fprintf(fcbi, "3"); else fprintf(fcbi, "2"); /* ignore non optimal codons */ if (!(i % 16)) fprintf(fcbi, "\n"); else fprintf(fcbi, ","); } fileclose(&fcbi); fprintf(ssummary, "\n\n"); /* now calculate and write out CAI adaptiveness values */ fprintf(ssummary, "These are PUTATIVE CAI adaptiveness values " "identified by this programme\n" "This data is also duplicated in the file \"cai.coa\"\n" "The format of this file is compatible with the format\n" "of the file used to input a personal selection of CAI values\n" "That is, the format required for Menu 4 option 1\n" "cai.coa\tinput file to be used for CAI calculations\n" "\n\nCod AA Xi\tWi\t\tCod AA Xi\tWi\n"); if ((fcai = open_file("", "cai.coa", "w", FALSE)) == NULL) my_exit(1, "cai.coa"); for (i = 1, x = TRUE ; i < 65 && x ; i++) { /* if a stop or a non-synonymous codon w = 1 */ if (*(ds + i) == 1 || pcu->ca[i] == 11) { fprintf(fcai, "1.0000000 \n"); fprintf(ssummary,"%s %s %6.1f %9.7f\t", paa->cod[i], paa->aa3[pcu->ca[i]], (float) left[i], 1.0000000); } else if ( highest_x[pcu->ca[i]] ) { /* if a codon is absent then adjust its frequecy to 0.5 */ if ( left[i] ) w= (float) left[i]/ (float) highest_x[pcu->ca[i]]; else w= (float) 0.5 / (float) highest_x[pcu->ca[i]]; fprintf(fcai, "%9.7f \n", w); /* output CAI W */ fprintf(ssummary,"%s %s %6.1f %9.7f\t", paa->cod[i], paa->aa3[pcu->ca[i]], (left[i]) ? (float) left[i]:0.5 , w); /* either strange amino acid composition or data sets where too small */ } else { fprintf(pm->my_err, "WARNING An attempt to calculate CAI relative " "adaptivnesss FAILED\n no %s amino acids found" " in the high bias dataset \n",paa->aa3[pcu->ca[i]]); fprintf(ssummary, "\nWARNING An attempt to calculate CAI relative adaptiveness " "FAILED\n no %s amino acids found in the high bias dataset \n", paa->aa3[pcu->ca[i]]); x=FALSE; } if( !(i%2)) fprintf (ssummary , "\n"); } /* matches for (i = 1, x = TRUE ; i < 65 && x ; i++) */ fileclose(&fcai); /* close files */ fileclose(&fhilo); free(aa_low); /* free memory */ free(aa_high); free(highest_x); free(x2); free(flag); free(last_row); return; } /********************* hydro_out **********************************/ /* The general average hydropathicity or (GRAVY) score, for the hypothet- */ /* ical translated gene product. It is calculated as the arithmetic mean */ /* of the sum of the hydropathic indices of each amino acid. This index */ /* was used to quantify the major COA trends in the amino acid usage of */ /* E. coli genes (Lobry, 1994). */ /* Calculates and outputs total protein hydropathicity based on the Kyte */ /* and Dolittle Index of hydropathicity (1982) */ /* nnaa Array with frequency of amino acids */ /* paa points to a struct containing Amino Acid values */ /* pap->hydro Pointer to hydropathicity values for each AA */ /**************************************************************************/ int hydro_out(FILE * foutput, long int *nnaa) { long int a2_tot = 0; float hydro = (float) 0.0; int i; char sp= (pm->seq_format=='H')? (char) '\t': (char) pm->seperator; for (i = 1; i < 22; i++) if (i != 11) a2_tot += nnaa[i]; if (!a2_tot) { /* whow .. no amino acids what happened */ fprintf(pm->my_err, "Warning %.20s appear to be too short\n", title); fprintf(pm->my_err, "No output was written to file \n", title); return 1; } for (i = 1; i < 22; i++) if (i != 11) hydro += ((float) nnaa[i] / (float) a2_tot) * (float) pap->hydro[i]; fprintf(foutput, "%8.6f%c", hydro,sp ); return 1; } /**************** Aromo_out ***********************************************/ /* Aromaticity score of protein. This is the frequency of aromatic amino */ /* acids (Phe, Tyr, Trp) in the hypothetical translated gene product */ /* nnaa Array with frequency of amino acids */ /* paa points to a struct containing Amino Acid values */ /* pap->aromo Pointer to aromaticity values for each AA */ /**************************************************************************/ int aromo_out(FILE * foutput, long int *nnaa) { long int a1_tot = 0; float aromo = (float) 0.0; int i; char sp= (pm->seq_format=='H')? (char) '\t': (char) pm->seperator; for (i = 1; i < 22; i++) if (i != 11) a1_tot += nnaa[i]; if (!a1_tot) { fprintf(pm->my_err, "Warning %.20s appear to be too short\n", title); fprintf(pm->my_err, "No output was written to file \n", title); return 1; } for (i = 1; i < 22; i++) if (i != 11) aromo += ((float) nnaa[i] / (float) a1_tot) * (float) pap->aromo[i]; fprintf(foutput, "%8.6f%c", aromo,sp); return 1; } codonW/codons.c 777 0 0 137567 10237502004 7052 0/**************************************************************************/ /* CodonW codon usage analysis package */ /* Copyright (C) 2005 John F. Peden */ /* This program is free software; you can redistribute */ /* it and/or modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; version 2 of the */ /* License, */ /* */ /* This program is distributed in the hope that it will be useful, but */ /* WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* You should have received a copy of the GNU General Public License along*/ /* with this program; if not, write to the Free Software Foundation, Inc.,*/ /* 675 Mass Ave, Cambridge, MA 02139, USA. */ /* */ /* */ /* The author can be contacted by email (jfp#hanson-codonw@yahoo.com Anti-*/ /* Spam please change the # in my email to an _) */ /* */ /* For the latest version and information see */ /* http://codonw.sourceforge.net */ /**************************************************************************/ /* */ /* ----------------------- Codons.C ------------------------ */ /* This file contains main() function and drives CodonW. */ /* */ /* External subroutines and functions */ /* clearscr screen clearing Macro defined in CodonW.h */ /* proc_comm_line process command line arguments */ /* initilize_point assigns genetic code dependent parameters to structs*/ /* initilize_coa selects the default codons to exclude from the */ /* Correspondence Analysis */ /* main_menu The interactive menu system */ /* clean_up Re-zeros various internal counters and arrays */ /* open_file Open files, checks for existing files */ /* fileclose Closes files and returns a NULL pointer or exits */ /* textbin Converts codon usage to binary data file */ /* dot(,X) prints a period every X times it is called */ /* PrepAFC Prepare for the COA */ /* DiagoRC This routine generates the COA */ /* colmout write the output from COA to file */ /* rowout save as above except records the gene information */ /* inertialig analyse row inertia and records the results to file */ /* inertiacol analyse column inertia and record the results */ /* suprow add supplementary genes into COA */ /* get_aa converts a three base codon into a 1 or 3 letter AA */ /* codon_error Called after all codons read, checks data was OK */ /* rscu_usage_out Write out RSCU */ /* codon_usage_out Write out Codon Usage */ /* raau_usage_out Write out normalised amino acid usage */ /* dinuc_count Count the dinucleotide usage */ /* dinuc_out Write out dinucleotide usage */ /* aa_usage_out Write out amino acid usage */ /* gc_out Writes various analyses of base usage */ /* cutab_out Write a nice tabulation of the RSCU+CU+AA */ /* base_sil_us_out Write out base composition at silent sites */ /* cai_out Write out CAI usage */ /* cbi_out Write out codon bias index */ /* fop_out Write out Frequency of Optimal codons */ /* enc_out Write out Effective Number of codons */ /* hydro_out Write out Protein hydropathicity */ /* aromo_out Write out Protein aromaticity */ /* coa_raw_out Write out raw codon usage for use by COA analysis */ /* */ /* */ /* Internal subroutines to Codon.c */ /* my_exit Controls exit from CodonW closes any open files */ /* tidy reads the input data */ /* output called from tidy to decide what to do with the data */ /* toutput handles the reformatting and translation of seqs */ /* output_long if sequence is very long then process what we know */ /* and write sequence to disk in fragments */ /* file_close Closes open files */ /* c_help Generates help informatio */ /* WasHelpCalled Checks strings to see if help was requested */ /* */ /**************************************************************************/ #include #include #include #include #include #include #define ORIG_DEFS /* used to decide whether declarations are external or not */ /* Master Header file */ #include "codonW.h" #undef ORIG_DEFS #if defined(__MWERKS__) #include #endif /************************** MAIN **************************************/ /* The main function processes commandline arguments to decide whether */ /* CodonW is running in an interactive mode, if so then the menu is called*/ /* CodonW also had the less documented feature of imitating other useful */ /* codon usage and sequence manipulation program. If the program is */ /* called by a recognised name (see proc_comm_line for a list) such as */ /* rscu then pm->codons is false and it only performs the required tasks */ /* bypassing the menu system. */ /* Main then calls tidy() to read in the data files, and count codon usage*/ /* depending on the requested output options toutput calls various subrou */ /* tines. If COA has been requested it also calls these subroutines and */ /* recording useful information to summary.coa. */ /**************************************************************************/ int main(int argc, char *argv[]) { FILE *finput = NULL, *foutput = NULL, *fblkout = NULL; FILE *fcoaout = NULL; FILE *fsummary= NULL; int num_seq = 0; num_sequence = 0; num_seq_int_stop = 0; valid_stops = 0; last_aa = 0; #if defined(__MWERKS__) /* Macintosh code-warrior */ argc=ccommand(&argv); #endif pm = &Z_menu; pm->totals = FALSE; pm->my_err = stderr; initilize_point(pm->code, pm->f_type, pm->c_type); initilize_coa(pm->code); proc_comm_line(&argc, &argv); /******************** main loop ****************************/ do { if (pm->codonW) { /* If the program chosen is codons */ printf(" Welcome to CodonW %.*s for Help type h\n\n", (int) strlen(Revision) - 11, Revision +10 ); /* Now Run the main menu interface */ if (pm->menu) main_menu(0); } /* if users select human readable output they want nice tables */ if (pm->bulk == 'C' && pm->seq_format == 'H') pm->bulk = 'O'; if (pm->bulk == 'S' && pm->seq_format == 'H') pm->bulk = 'O'; pm->analysis_run = TRUE; /* codons has started an analysis this*/ /* parameter is checked by my_exit */ if (pm->inputfile != NULL) /* rewind various input files in case */ rewind(pm->inputfile); /* this is a second analysis run */ if (pm->fopfile != NULL) rewind(pm->fopfile); if (pm->cbifile != NULL) rewind(pm->cbifile); if (pm->caifile != NULL) rewind(pm->caifile); /* num_sequence number of sequences read */ /* num_seq_int_stop number with internal stop codons */ /* valid_stops No.terminated with a stop codon */ /* tot total number of codons read */ num_sequence = num_seq_int_stop = valid_stops = tot = 0; clean_up(ncod, naa); /*re-zero count of amino and codons */ finput = pm->inputfile; foutput = pm->outputfile; fblkout = pm->tidyoutfile; fileclose(&pm->fcoa_out); if (pm->coa) if ((pm->fcoa_out = open_file("", "coa_raw", "w", FALSE)) == NULL) my_exit(1, "coa_raw"); /*controlled exit from CodonW */ fcoaout = pm->fcoa_out; /* Tidy */ /* reads input data, returns the number of sequences read in */ /* num_sequence is global so I don't really have to assign it here */ num_sequence = tidy(finput, foutput, fblkout, fcoaout); fprintf(pm->my_err,"\n\n\t\tNumber of sequences: %i\n", num_sequence); /* num_seq_int_stop value is calculated in codon_usage_out */ if (num_seq_int_stop > 0 && pm->warn ) { if (pm->totals && (num_seq_int_stop >= valid_stops )) fprintf(pm->my_err, "\tWARNING\t At least one sequence in your" " input file has\ninternal stop codons (found %i" " internal stops) \tWARNING\n",num_seq_int_stop); else fprintf(pm->my_err, "\tWARNING\t %i sequences had internal " "stop codons \tWARNING\n",num_seq_int_stop); } /* don't wait for a pause if no_menu has been set */ if ( pm->codonW && pm->menu ) pause; if ( pm->coa && pm->totals) /* idiots error catch */ my_exit(99,"A COA analysis of concatenated sequences is nonsensical\n" "I have completed any other requests but not the COA"); /* if COA has been requested then open summary.coa and start the analysis */ if (pm->coa) { if (fsummary == NULL) if ((fsummary = open_file("", "summary.coa", "w", FALSE)) == NULL) my_exit(1, "summary.coa"); /* set the number of genes in the analysis to the number read in by tidy */ pcoa->rows = num_sequence; fileclose(&fcoaout); /* if COA has been selected then during the reading in phase raw codon usag*/ /* will have been written to the file coa_raw */ /* text bin converts this to binary data for the COA analysis program */ textbin("coa_raw", "cbrawin"); printf("Generating correspondence analysis\n"); dot(0,10); fprintf(fsummary, "\t\tSummary of Correspondence Analysis \n\n" "The input file was %s it contained %i genes\n" "The number of axes generated was %i\n" "A COA was requested of %s%s usage\n\n\n" "Most of the output presented in this file " "has also been written to separate files\n" "genes.coa\tThe position of the genes on the " "first %i axis\n" "%s.coa\tThe position of the %i %s on the %i " "principle axes\n\n\n", pm->curr_infilename, pcoa->rows, ((pcoa->rowscolm)?pcoa->rows:pcoa->colm)-1, (pm->coa == 'r') ?"relative synonymous ":"", (pm->coa == 'a') ?"amino acid" : "codon", pcoa->axis, (pm->coa == 'a') ?"amino" : "codon", pcoa->colm, (pm->coa == 'a') ?"amino acids":"codons", pcoa->axis); /* allocate memory for the rows and columns, scale both, and write out the*/ /* resulting matrix to the file cbrawin */ PrepAFC("cbrawin"); /* Now do the analysis, calculate the data inertia and all the vectors */ DiagoRC(fsummary); /* colmout records the position of the columns on each of the factors/axes*/ if (pm->coa == 'a') colmout("cbfcco", "amino.coa", paa, fsummary); else colmout("cbfcco", "codon.coa", paa, fsummary); /* rowout records the position of the genes on each of the axis */ rowout("cbfcli", "genes.coa", "coa_raw", fsummary); /* pcoa->level == e for exhaustive analysis of inertia */ if (pcoa->level == 'e') { fprintf(fsummary, "\n\n\nYou requested detailed output from the COA" "\n\nThe absolute and relative inertia " "of each gene and %s (see also inertia.coa)\n", (pm->coa == 'a') ? "amino acids" : "codons"); /* inertialig must preceed inertiacol, records inertia of genes to file */ /* it opens the raw codon usage file and loads the raw data to memory */ inertialig("inertia.coa", "coa_raw" ,fsummary); /* uses the preloaded raw codon usage, to calculate inertia and other data*/ /* such as contribution of each column to each factor and to the extent */ /* each column is explained by each factor and what the residual variation*/ /* is */ inertiacol("inertia.coa", fsummary); } /* if pcoa->add_row is real string, then it will be the name of the file */ /* containing additional sequence data, that will be excluded from the COA*/ /* but factored in, using the original COA vectors and then all other */ /* calculation can proceed as with the original data */ if (strlen(pcoa->add_row)) { if ((finput = open_file("", pcoa->add_row, "r", FALSE)) == NULL) my_exit(6, "add_row"); if ((foutput = tmpfile()) == NULL) my_exit(1, "temp file foutput"); if ((fblkout = tmpfile()) == NULL) my_exit(1, "temp file fblkout"); if ((fcoaout = open_file("", "coa1_raw", "w", FALSE)) == NULL) my_exit(1, "coa1_raw"); clean_up(ncod, naa); num_sequence =num_seq_int_stop=valid_stops=tot = 0; /* load the additional data file and process as normal */ /* but don't calculate any indices or write the data to the normal output */ /* files, rather write them to tmp files which will be deleted at end of */ /* program execution */ num_seq = tidy(finput, foutput, fblkout, fcoaout); /* close the files now we are finished */ fileclose(&fcoaout); fileclose(&foutput); fileclose(&fblkout); fileclose(&finput); /* covert to binary, use additional raw data file, note not coa_raw this */ textbin("coa1_raw", "cb1raw"); /* now call the routine suprow and add these additional genes, we will */ /* process this data for inertia and append the gene and col. coordinates */ /* to the original gene.coa and codon.coa (or amino.coa) */ suprow(num_seq, "cbfcvp", "cb1raw", "genes.coa", "coa1_raw", fsummary); /* close these files now that we have finished with them and the COA */ fileclose(&foutput); fileclose(&fblkout); fileclose(&fcoaout); } } printf("\n"); } while (pm->codonW && pm->menu ); /* OK now we loop back to main_menu */ /* though only if we are in interactive mode and running as CodonW */ my_exit(0,""); /* last call to my_exit */ return 0; /* dummy return to keep pedantic but */ /* brain dead compilers happy */ } /********************** END of MAIN() **********************************/ /********************** Subroutines **********************************/ /* Tidy */ /* reads input data from a sequence file containing fasta like formatted */ /* sequence discards numbers, but keeps other characters */ /* Each sequence must begin with title line must start with > or ; */ /* any following descriptive lines must begin with ; or >.Sequence start */ /* is the first alphabetic character on the line following the headers */ /* There is no limit to sequence length or number of sequences but */ /* input lines should be less than 200 char in width */ /**************************************************************************/ int tidy(FILE * finput, FILE * foutput, FILE * fblkout, FILE * fcoaout) { char seq[MAX_GENE + LINE_LENGTH + 1]; char in[LINE_LENGTH + 1]; int first_line = TRUE, ic = 0; int ii = 0; int i,x; long ic_orig = 0; /* while still able to read data from the input file keep reading */ while ((fgets(in, LINE_LENGTH, finput) != NULL)) { /* idiot error check to see if the file looks like fasta or PIR format */ if (!num_sequence && in[0] != ';' && in[0] != '>') { fprintf(stderr, "\n Error input file not in a recognised format \n" " you must convert it into FASTA/Pearson format" " EXITING\n"); my_exit(99, "input file not in a recognised format:tidy"); } if (in[0] == ';' || in[0] == '>') { /* if true them this is a header */ if (first_line) { /* if true this is the first header*/ first_line = FALSE; /* will only be reset when reread */ /* the next sequence */ if (num_sequence) { /* wait till we have read the first*/ /* before writing to disk */ /* now if we are concatenating sequence data we need will handle it thus */ if (pm->totals) { /* first if translating or reformatting the input file flush the read */ /* data to the disk */ if (strchr("RNT",(int)pm->bulk)!=NULL) output_long(fblkout, seq); if (tot) { /* if something we have sequence read in, then we need to process this */ /* check whether the last codon of the sequence was was a stop */ last_aa = codon_usage_tot(seq, tot); if (pcu->ca[last_aa] == 11) valid_stops++; } /* rather re-setting everything to zero, we will just blank the array seq */ tot = 0; } else { /* else matches if tot; if sequences are not being concatenated we call */ /* output to decide what to do with all the read data */ /* then we blank all the data from memory and start again */ output(seq, foutput, fblkout, fcoaout); clean_up(ncod, naa); } } /* matches if(num_sequence) */ /* If we get here we have read a header line, this then needs to be proc'ed*/ /* first the header is tested to see does it contain spaces the string is */ /* converted from the first non space character to the title array */ for (ii = 1; isspace( (int) in[ii]) && ii < (int) strlen(in); ii++) ; strncpy(title, in + ii, 99); /* Titles are cleaned up by removing newline characters and the delimiting */ /* character p,->seperater and also null terminating the title string */ for (i = 0; i < (int) strlen(title); i++) { if (title[i] == '\n') title[i] = '\0'; /* chops new line off */ else if (title[i] == pm->seperator ) title[i] = '_'; /* removes the separator if present */ else if (i == (int) (strlen(title) - 1)) title[i] = '\0'; /* if we have reached end of title */ } /* if we are reformatting the data, we print a friendly dot just in-case */ if (strchr("RNT", (int)pm->bulk) ==NULL || pm->totals) dot((int) num_sequence, 5); /* we have now finished processing our first header line and are reading */ /* our sequence data */ num_sequence++; } /* matches if first line */ continue; /* read another line ie. jump to while()*/ } /* if (in[0] == ';' || in[0] == '>') */ else{ /* this must be a line containing seq */ first_line = TRUE; /* so reset the first_line variable */ } /* at this point we have read in the header lines and have been or about to*/ /* process the input data, now we test how much we have read into the array*/ /* seq, tot is equivalent to the last element in the array */ /* if tot is greater than or equal to MAX_GENE then the array is quite full*/ /* luckily we made the array seq to be MAX_GENE plus LINE_LENGTH +1 */ if (tot >= MAX_GENE) { /* sequence is larger than seq */ master_ic += MAX_GENE; /* now remember how many bases we are */ ic_orig = tot; /* going to write to disk */ /* and what size the array was to start */ if (strchr("RNT", (int) pm->bulk) != NULL) output_long(fblkout, seq);/* flush to disk and then continue */ else if (pm->bulk == 'D') dinuc_count(seq, tot); /* then we had better count the dinucs */ /* Debugging code in-case we are asking for something that we can't handle */ #ifdef DEBUG else if (strchr("OCASLDBX", (int) pm->bulk) != NULL) ; /* dummy */ else if (pm->bulk) fprintf(stderr, "ERROR-22 %c pm->bulk undefined\n", pm->bulk); if (pm->cai || pm->fop || pm->cbi || pm->enc || pm->gc || pm->gc3s || pm->sil_base || pm->bulk || pm->coa); else fprintf(stderr, "Programming error"); #endif /* Now count first MAX_GENE bases, luckily MAX_GENE is always a multiple of*/ /* 3, we count the bases and amino acids in codon_usage_tot */ last_aa = codon_usage_tot(seq, MAX_GENE); /* now we move all unprocessed/written/counted bases to the front of seq */ for (i = MAX_GENE, x = 0; i < ic_orig; i++, x++) seq[x] = seq[i]; /* i is pointing near the end of array */ tot = x; /* x the front of the array */ } /* Matches if (tot >= MAX_GENE) */ ic = 0; /* first base of the input file */ while (in[ic] != '\0') { /* scan input line till we see a Null */ if (isalpha((int)in[ic])) ; /* do nothing if a alpha */ else if (pm->bulk == 'R' && in[ic] == '-'); /* do nothing */ else if (in[ic] == '*' || in[ic] == '.') ; /* do nothing */ else { ic++; /* is not one above skip to next letter */ continue; } /* while( in[ic] != '\0') */ in[ic] = (char)toupper((int)in[ic]);/* converts2capitals */ if (strrchr("CG", (int) in[ic]) != NULL) GC_TOT++; /* is it a G or C */ else if (strrchr("ATU", (int) in[ic]) != NULL) AT_TOT++; /* is it an A or T */ else if ( in[ic] == '-' ) GAP_TOT++; /* is it a gap character */ else non_std_char++; /* then it isn't a standard base */ if (strrchr("ABCDEFGHIKLMNPQRSTVWYZX" ,(int) in[ic]) != NULL) AA_TOT++; /* it might be an amino acid */ if (strrchr("MRWSYKVHDBXN" , (int) in[ic]) != NULL) IUBC_TOT++; /* it might be a IUBC code */ seq[tot] = in[ic]; /* move base into seq array */ seq[tot + 1] = '\0'; /* make sure array is null term'ed */ /* now we test that the first codon is a valid start codon */ if ( tot == 0 && master_ic == 0 ) { in[1] = (char)toupper((int)in[1]); /* Uppercase the first codon */ in[2] = (char)toupper((int)in[2]); if ( in[1] == 'T' && (in[0] == 'A' || in[2] == 'G' )) valid_start=TRUE; /* Yeup it could be a start codon */ else valid_start=FALSE; /* Nope it doesn't seem to be one */ } ic++; /* total No. of sequence bases read */ tot++; /* total currently stored in memory */ } } /* reached end of input file */ /* Idiot error catch, this file is empty, at least it looks empty to codonW*/ if ( !num_sequence ) my_exit(99,"The input file was empty"); /* better make sure to write anything left in seq to disk before returning */ output(seq, foutput, fblkout, fcoaout); return (int) num_sequence; } /************************ TOUTPUT **********************************/ /* toutput */ /* */ /* This subroutine is very similar to output_long, basically it reformats */ /* or translates sequences less than MAX_GENE in length as a single read */ /* It writes in reader format "ACG ATT ATC" i.e writes the sequence in */ /* codons. Because it works with output_long it needs to know whether */ /* the sequence being written to disk is a fragment or a complete gene */ /**************************************************************************/ int toutput(FILE * fblkout, char *seq) { long int ic = 0; int space = 3; char codon[4]; int i,x; if (long_seq == FALSE) { /* then this must be a complete genes */ switch (pm->bulk) { case 'T': /* tidy or fasta formatted header */ fprintf(fblkout, ">%-20.20s%6li\n", title, (long int) tot + master_ic); break; case 'R': /* reader header .. don't ask */ fprintf(fblkout, ">%6li %-70.70s\n", (long int) tot + master_ic, title); break; case 'N': /* Conceptually translated DNA header */ fprintf(fblkout, ">%-20.20s%6li\n", title, (long int) ((tot + master_ic) / 3)); break; default: /* whoops */ printf("\nProgramming error type A2 check code \n"); my_exit(99, "toutput"); break; } } else { /* then long_seq must be true, this means we are about to finish writing a*/ /* sequence that has already been written in MAX_GENE chunks to disk) */ /* when we wrote the original header line, we didn't know the size of the */ /* sequence, but now we do so we are going to update that bit of info */ /* luckily remembered to record where the header line is in the file */ /* its at fl_pos_start */ fl_pos_curr = ftell(fblkout); /* record where we are at present */ fseek(fblkout, fl_pos_start, 0);/* find the header line for this seq */ switch (pm->bulk) { case 'T': /* Now update the info */ fprintf(fblkout, ">%-20.20s%6li", title, (long int) tot + master_ic); break; case 'R': fprintf(fblkout, ">%6li %-70.70s", (long int) tot + master_ic, title); break; case 'N': fprintf(fblkout, ">%-20.20s%6li", title, (long int) ((tot + master_ic) / 3)); break; default: printf("\nProgramming error type A3 check code \n"); my_exit(99, "output"); } fseek(fblkout, fl_pos_curr, 0);/* now we move back to where we were */ } while (ic < tot) { /* keep writing till the array is empty*/ switch (pm->bulk) { case 'T': fprintf(fblkout, "%c", seq[ic++]); reg++; break; case 'R': if (space == 3) { /* Its reader format so print a space */ fprintf(fblkout, " "); /* every third base */ space = 0; } else { /* not the 3rd base yet so just print */ fprintf(fblkout, "%c", seq[ic++]); space++; reg++; } break; case 'N': for (i = (int) ic, x = 0; i < (int) ic + 3 && i < tot; i++, x++) codon[x] = *(seq + i); /* get the next three bases if there */ codon[x] = '\0'; /* null terminate the codon array */ ic += 3; /* remember that we have read 3 bases */ /* use the function get_aa to return the amino acid for the codon */ /* 1 = is for the one letter code of the codon */ fprintf(fblkout, "%c", *get_aa(1, codon)); reg++; break; } if (!(reg % 61)) { /* every 60 bases print a new line char */ reg = 1; fprintf(fblkout, "\n"); } } if (reg != 1) { /* reached the end of sequence so we */ fprintf(fblkout, "\n"); /* print a \n char unless we just did */ reg = 1; /* reset number of bases printed */ } /* Now that we have finished writing this sequence to disk lets have a */ /* closer look at it, and do a few diagnostics about the bases used */ if (AT_TOT + GC_TOT > AA_TOT*0.5) {/* Assume its DNA then */ fprintf(pm->my_err, "%3li>\t%6li %-40.40s\tDNA\tGC%" " =%5.3f\n" /* with G+C content and length of gene */ ,num_sequence ,(long int) tot + master_ic, title ,(float) GC_TOT / (GC_TOT + AT_TOT)); if (non_std_char - IUBC_TOT && pm->warn ) /* any non IUBC characters */ fprintf(pm->my_err, "\t\t WARNING %d non IUBC standard characters " "in sequence %i\n" ,non_std_char - IUBC_TOT ,num_sequence); } else { /* if not DNA then it must be a protein */ fprintf(pm->my_err, "\t%3i>\t%6li %-40.40s\tPROTEIN\n" ,num_sequence ,(long int) tot + master_ic ,title); if ( (tot+master_ic)-AA_TOT && pm->warn) /* non IUBC AA chars */ fprintf(pm->my_err, "\t\t WARNING %d non " "standard AA characters " "in sequence %i\n" ,non_std_char ,num_sequence); } return 1; /* return to calling function */ } /************************* output_long **********************************/ /* called to write a block of a sequence that has exceeded the MAX_GENE */ /* limit. If this is the first time it has been called for this sequence */ /* (ie. long_seq is false) it write a dummy header line which is updated */ /* by toutput when the last fragment of the sequence is written to disk */ /**************************************************************************/ int output_long(FILE * fblkout, char *seq) { long int ic = 0; char space = 3; char codon[4]; int i,x; if (long_seq == FALSE) { /* First call to output_long for seq. So record where the header line is */ /* and then write the dummy header line. */ fl_pos_start = ftell(fblkout); if (pm->bulk == 'R') fprintf(fblkout, ">%6s %-72.72s\n", " ", title); else fprintf(fblkout, ">%-20.20s%9s\n", title, " "); long_seq = TRUE; } /* see toutput for explanation of the switch statement */ while (ic < MAX_GENE && ic < tot) { switch (pm->bulk) { case 'T': fprintf(fblkout, "%c", seq[ic++]); reg++; break; case 'R': if (space == 3) { fprintf(fblkout, " "); space = 0; } else { fprintf(fblkout, "%c", seq[ic++]); space++; reg++; } break; case 'N': for (i = (int) ic, x = 0; i < (int) ic + 3 && i < tot; i++, x++) codon[x] = *(seq + i); codon[x] = '\0'; fprintf(fblkout, "%c", *get_aa(1, codon)); ic += 3; reg++; break; default: printf("\nProgramming error type A1 check code \n"); my_exit(99, "output_long"); } if (!(reg % 61)) { reg = 1; fprintf(fblkout, "\n"); } } return 1; /* return to tidy */ } /************************* output **********************************/ /* Called from after subroutine tidy has read the sequence into memory */ /* or more accurately counted the codon and amino acid usage. This sub- */ /* routine, via a switch checks which parameters and indices have been */ /* requested and write these to file, it handles all output except for COA*/ /**************************************************************************/ void output(char *seq, FILE * foutput, FILE * fblkout, FILE * fcoaout) { char sp; /* set the column delimiter to something shorter than pm->seperator */ sp = (char) (pm->seq_format=='H')? (char) '\t': (char) pm->seperator; if (tot) { /* still data in array seq.. */ last_aa = codon_usage_tot(seq, tot); if (pcu->ca[last_aa] == 11) valid_stops++; /* check the last codon was a stop */ } /* codon_error, if 4th parameter is 1, then checks for valid start and */ /* internal stop codon, if 4th parmater is 2, checks that the last codon*/ /* is a stop or was partial, and for non-translatable codons */ codon_error(last_aa, valid_stops, title, (char) 1); codon_error(last_aa, valid_stops, title, (char) 2); /* if we are concatenating sequences then change the title to avger_of */ if(pm->totals) (pm->seq_format=='M')? strcpy(title, "Average_of_genes"): strcpy(title, "Average of genes"); if (strchr("RNT", (int) pm->bulk) != NULL) { /* better write the remaing sequence in seq to disk */ toutput(fblkout, seq); } else if (strchr("OCASDLDBX", (int) pm->bulk) != NULL) { /* These subroutines are self explanatory (see the top of this file) */ /* are called such that only one can be called for each sequence read */ /* all these calls are written to the bulk output file */ switch ((int) pm->bulk) { case 'S': rscu_usage_out(fblkout, ncod, naa); break; case 'C': codon_usage_out(fblkout, ncod, last_aa, valid_stops, title); break; case 'L': raau_usage_out(fblkout, naa); break; case 'D': dinuc_count(seq, tot); dinuc_out(fblkout, title); break; case 'A': aa_usage_out(fblkout, naa); break; case 'B': gc_out(foutput, fblkout, 1); break; case 'O': cutab_out(fblkout, ncod, naa); break; case 'X': /* X is no bulk output written to file */ break; default: fprintf(stderr, "ERROR-23 %s bulk undefined\n", pm->prog); my_exit(99, "output"); break; } } else if (pm->bulk) { /* just a programming error catch */ fprintf(stderr, "ERROR-24 %s -prog undefined\n", pm->prog); my_exit(99, "output"); } /* if an index has been requested then this is true */ if (pm->sil_base || pm->cai || pm->fop || pm->enc || pm->gc3s || pm->gc || pm->cbi || pm->L_sym || pm->L_aa || pm->coa || pm->hyd|| pm->aro) { /* if this is the first sequence then write a header line */ if (num_sequence == 1 || pm->totals) { fprintf(foutput, (pm->seq_format == 'H')? "%-25.25s%c":"%-.25s%c" ,"title",sp); if (pm->sil_base) fprintf(foutput, "%s%c%s%c%s%c%s%c", "T3s",sp,"C3s",sp,"A3s",sp, "G3s",sp); if (pm->cai) fprintf(foutput, "%s%c", "CAI",sp); if (pm->cbi) fprintf(foutput, "%s%c", "CBI",sp); if (pm->fop) fprintf(foutput, "%s%c", "Fop",sp); if (pm->enc) fprintf(foutput, "%s%c", "Nc",sp); if (pm->gc3s) fprintf(foutput, "%s%c", "GC3s" ,sp); if (pm->gc) fprintf(foutput, "%s%c", "GC" ,sp); if (pm->L_sym) fprintf(foutput, "%s%c", "L_sym",sp); if (pm->L_aa) fprintf(foutput, "%s%c", "L_aa" ,sp); if (pm->hyd) fprintf(foutput, "%s%c", "Gravy",sp); if (pm->aro) fprintf(foutput, "%s%c", "Aromo",sp); fprintf(foutput, "\n"); } /* if output format is human readable print the fixed width sequence */ /* name, else print only the name of the sequence */ fprintf(foutput, (pm->seq_format == 'H')? "%-25.25s%c":"%-.25s%c" ,title,sp); /*Need to use if statements as we allow more than one index to be calc*/ /* per sequence read in */ if (pm->sil_base) base_sil_us_out(foutput, ncod, naa); if (pm->cai) cai_out(foutput, ncod); if (pm->cbi) cbi_out(foutput, ncod, naa); if (pm->fop) fop_out(foutput, ncod); if (pm->enc) enc_out(foutput, ncod, naa); if (pm->gc3s) gc_out(foutput, fblkout, 3); if (pm->gc) gc_out(foutput, fblkout, 2); if (pm->L_sym) gc_out(foutput, fblkout, 4); if (pm->L_aa) gc_out(foutput, fblkout, 5); if (pm->hyd) hydro_out(foutput, naa); if (pm->aro) aromo_out(foutput, naa); if (pm->coa) coa_raw_out(fcoaout, ncod, naa, title); fprintf(foutput, "\n"); } return; } /************************* my_exit **********************************/ /* Called to clean up open files and generate an intelligent exit message */ /* Also warns if no analysis has been run, the user did not select R from */ /* the main menu. If COA was selected then it reminds the user to look */ /* at the file summary.coa, and deletes any stray binary files */ /**************************************************************************/ int my_exit(int error_num, char *message) { fileclose(&pm->inputfile); /* if we are masuquarading as another program we assign both outputfile */ /* and tidyout the same filehandle (we don't want to close this twice */ if ( pm->outputfile == pm->tidyoutfile ){ fileclose(&pm->outputfile); }else{ fileclose(&pm->outputfile); fileclose(&pm->tidyoutfile); } fileclose(&pm->cuout); fileclose(&pm->fopfile); fileclose(&pm->cbifile); fileclose(&pm->caifile); fileclose(&pm->logfile); fileclose(&pm->fcoa_in); fileclose(&pm->fcoa_out); if (pm->inputfile = fopen("cbrawin", "r")) { fclose(pm->inputfile); deletefile("cbrawin"); } if (pm->inputfile = fopen("cbfcco", "r")) { fclose(pm->inputfile); deletefile("cbfcco"); } if (pm->inputfile = fopen("cbfcli", "r")) { fclose(pm->inputfile); deletefile("cbfcli"); } if (pm->inputfile = fopen("cbfcpc", "r")) { fclose(pm->inputfile); deletefile("cbfcpc"); } if (pm->inputfile = fopen("cbfcpl", "r")) { fclose(pm->inputfile); deletefile("cbfcpl"); } if (pm->inputfile = fopen("cbfcta", "r")) { fclose(pm->inputfile); deletefile("cbfcta"); } if (pm->inputfile = fopen("cbfcvp", "r")) { fclose(pm->inputfile); deletefile("cbfcvp"); } if (pm->inputfile = fopen("cb1rawin", "r")) { fclose(pm->inputfile); deletefile("cb1rawin"); } if (error_num == 2 || error_num == 0 ) { if (pm->analysis_run) { fprintf(stderr, "Files used:\n"); if (strlen(pm->curr_infilename)) fprintf(pm->my_err, " Input file was\t %s \n", pm->curr_infilename); if (strlen(pm->curr_outfilename)){ fprintf(pm->my_err, " Output file was\t %s %s", pm->curr_outfilename, (pm->codonW) ? " (codon usage indices, e.g. gc3s)\n":"\n"); } if (strlen(pm->curr_tidyoutname)){ fprintf(pm->my_err, " Output file was\t %s %s", pm->curr_tidyoutname, (pm->codonW) ? " (bulk output e.g. raw codon usage)\n":"\n"); } if (pm->coa) fprintf(pm->my_err, " For more information about the COrrespondence " "Analysis see summary.coa\n"); } else if ( pm->codonW ) fprintf(stderr, " \n\n WARNING You are exiting before codonW has generated any results\n" " Select 'r' from the main menu to run\n"); } if ( pm->codonW ) printf("\n CodonW has finished\n"); switch ((int) error_num) { case 0: /* silent exit */ exit(0); break; case 1: printf("failed to open file for output <%s>\n", message); exit(1); break; case 2: printf("user requested exit <%s>\n", message); exit(0); break; case 3: printf("failed to allocate memory <%s>\n", message); exit(1); break; case 4: printf("Write to disk failed ! <%s>\n", message); exit(1); break; case 5: printf("Read from disk failed! <%s>\n", message ); exit(1); break; case 6: printf("failed to open file for reading <%s>\n", message); exit(1); break; case 7: printf("failed to close file <%s>\n", message); exit(1); case 99: printf(" Controlled exit <%s>\n",message); exit(0); break; default: printf("for unknown reason\n"); exit(1); break; } return 0; } /************************** file_close **********************************/ /* Fileclose function checks whether the filepointer is open, if so it */ /* attempts to close the open file handle and assigns a null pointer */ /* to that handle */ /**************************************************************************/ int fileclose(FILE ** file_pointer) { if (*file_pointer != NULL ) { if (fclose(*file_pointer) == EOF ) { fprintf(stderr,"Failed to close file %i \n",errno); perror ("Unexpected condition in fileclose"); exit(7); } *file_pointer = NULL; /* make sure file_pointer is null*/ } return 1; } /************************** Chelp **************************************/ /* Chelp scans opens the help file and returns text associated with that */ /* help keyword. Help keywords are surrounded by hashs, starting in the */ /* first column of the ASCII help file and are terminated by // */ /**************************************************************************/ int chelp ( char *help_keyword ) { char helplib [MAX_FILENAME_LEN]=""; char *p=NULL, inhelp=FALSE; char QueryString[120]; /* limit for help phrase is 120 chars */ char HelpMessage[121]; int line_counter=2; /* assume 2 blank lines to start with */ FILE *hfp=NULL; /* Inital steps is to locate help file */ /* First check if CODONW_H has been set as an environment variable */ /* If not then assume that the help file is in the current directory */ p=getenv( "CODONW_H" ); if ( p != NULL ) strcpy ( helplib , p ); else { strcpy ( helplib , "codonW.hlp"); } hfp=open_file("",helplib, "r", FALSE); /* if we can't open the help file then explain what we where trying to do */ if ( hfp == NULL ) { fprintf ( stderr , "Could not open help file codonw.hlp\n" "Expected to find this file in %s\n" "This can be overridden by setting the" "environmental variable\n" "CODONW_H to the help file location\n", helplib); pause; /* make sure they Ack. the error mesg */ return 0; /* abort */ } /* Now that we have opened the help file, assemble the help keyword string */ strcpy (QueryString , "#"); strcat (QueryString , help_keyword ); strcat (QueryString , "#"); fprintf(stderr,"\n\n"); /* now scan the help file looking for this keyword */ while ( fgets ( HelpMessage, 120, hfp ) ) { if ( strstr (HelpMessage,QueryString) != NULL ) inhelp=TRUE; /* we found it */ else if ( inhelp && strstr ( HelpMessage , "//") ) { /* found the end*/ fileclose(&hfp ); if ( line_counter )pause; return 1; } /* if inhelp is true we have found the help keyword but not reached EOF */ else if ( inhelp ) { if ( strchr(HelpMessage,'\n') ) fprintf ( stderr, "%s",HelpMessage ); /*stderr,it must be interactive */ else fprintf ( stderr, "%s\n",HelpMessage ); /*make sure there are line feeds*/ /* count how many lines I have printed to the terminal and compare it */ /* with the length of the terminal screen as defined by pm->term_length */ if (line_counter++ >= pm->term_length-3 && line_counter ) { line_counter=0; pause; fprintf(stderr, "%s",HelpMessage); } } } /* Error catches for problems with help file */ if ( HelpMessage == NULL && inhelp == FALSE ){ fprintf ( stderr ," Error in help file, %s not found ", QueryString); pause; } else { fprintf (stderr , "Premature end of help file ... \n"); pause; } return 0; /* failed for some reason */ } /******************** WasHelpCalled ***********************************/ /* Checks the string input to see if the user asked for help */ /**************************************************************************/ char WasHelpCalled ( char * input ) { char ans = FALSE; if ( strlen ( input) == 1 && (char)toupper((int)input[0]) == 'H') ans = TRUE; else if ( !strcmp ( input , "help") ) ans = TRUE; else if ( !strcmp ( input , "HELP") ) ans = TRUE; return ans; } codonW/codonW.h 777 0 0 72732 10240476225 7004 0/**************************************************************************/ /* CodonW codon usage analysis package */ /* Copyright (C) 2005 John F. Peden */ /* This program is free software; you can redistribute */ /* it and/or modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; version 2 of the */ /* License, */ /* */ /* This program is distributed in the hope that it will be useful, but */ /* WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* You should have received a copy of the GNU General Public License along*/ /* with this program; if not, write to the Free Software Foundation, Inc.,*/ /* 675 Mass Ave, Cambridge, MA 02139, USA. */ /* */ /* */ /* The author can be contacted by email (jfp#hanson-codonw@yahoo.com Anti-*/ /* Spam please change the # in my email to an _) */ /* */ /* For the latest version and information see */ /* http://codonw.sourceforge.net */ /**************************************************************************/ #define ARB_UNIT 100 /* used to define the array*/ #define MAX_GENE (ARB_UNIT*3) /* seq, which holds readin */ #define LINE_LENGTH (ARB_UNIT+100) /* sequence data */ #define GARG_EXACT 0x800 /* used in function gargs */ #define GARG_NEXT 0x1000 /* used in function gargs */ #define GARG_THERE 0x2000 /* used in function gargs */ #define GARG_SUBSQ 0x4000 /* used in function gargs */ #define MAX_ARGS 100 /* used in function gargs */ /* debugging code */ #define debug_ printf("Got to %i\n",debugger++); #define debug(x) printf( #x " = %d", x); /* defile the macro pause */ #define pause {fprintf(stderr,"\nPress return or enter to continue -> ");gets(pm->junk);} #define MAX_FILENAME_LEN 90 /* max filename */ /* define the structures used within codonW */ typedef struct { char *des; char *typ; int ca[65]; } GENETIC_CODE_STRUCT; /* genetic code information */ typedef struct { char *aa1[22]; /* 1 letter AA code */ char *aa3[22]; /* 3 letter AA code */ char *cod[65]; /* 3 letter name of codons */ } AMINO_STRUCT; typedef struct { float hydro[22]; /* hydropathicity values */ int aromo[22]; /* aromaticity values */ } AMINO_PROP_STRUCT; typedef struct { char *des; /* store a description */ char *ref; /* a reference */ char fop_cod[65]; /* the optimal codons */ } FOP_STRUCT; typedef struct { char *des; /* store a description */ char *ref; /* a reference */ float cai_val[65]; /* the CAI w values */ } CAI_STRUCT; typedef struct { char level; /* either expert or standard*/ int axis; /* how many axis to generate*/ int rows; /* how many genes in dataset*/ int colm; /* how many columns in data */ int fop_gene; /* No of genes to use to ident opt codon*/ char add_row[MAX_FILENAME_LEN]; /* file with supp sequences */ float inertia; /* total data inertia */ char codons[65]; /* codon to be analysed */ char amino [22]; /* amino acids to be COA'ed */ } COA_STRUCT; typedef struct { char prog; /* used to ident which prog */ char bulk; /* used to ident blk output */ char verbose; /* don't overwrite files */ char totals; /* concatenate genes ? */ char menu; /* show a menu ? */ char warn; /* show sequence warning */ char codonW; /* am I codonW */ char fop; /* calc index fop */ char cai; /* calc index CAI */ char cbi; /* calc index CBI */ char bases; /* calc base composition */ char gc3s; /* calc gc at sil.3rd base */ char gc; /* calc gc */ char enc; /* calc enc */ char sil_base; /* calc silent base compo */ char L_sym; /* No of synonymous codons */ char L_aa; /* No of amino acids */ char hyd; /* calc hydropathicity */ char aro; /* calc aromaticity */ char seperator; /* column separator */ char coa; /* calculate a COA or not ? */ char code; /* which genetic code */ char f_type; /* which predefined fop val */ char c_type; /* which predefined CAI val */ char seq_type; /* DNA or Protein or CU */ char seq_format; /* Human or machine readable*/ char curr_infilename [MAX_FILENAME_LEN]; /* input filename */ char curr_outfilename[MAX_FILENAME_LEN]; /* .out filename */ char curr_tidyoutname[MAX_FILENAME_LEN]; /* .blk filename */ char fop_filen[MAX_FILENAME_LEN]; /* user fop filename */ char cai_filen[MAX_FILENAME_LEN]; /* user CAI filename */ char cbi_filen[MAX_FILENAME_LEN]; /* user CBI filename */ char curr_logfilename[MAX_FILENAME_LEN]; /* used for logging errors */ char junk [BUFSIZ+1]; /* used to store char info */ char messages [300]; /* used to constuct messgs */ char analysis_run; /* has CodonW actually run */ int term_length; /* how many lines are there */ /* file pointers */ FILE *inputfile; /* input file */ FILE *outputfile; /* .out file */ FILE *tidyoutfile; /* .blk file */ FILE *cuout; /* codon usage output */ FILE *fopfile; /* fop input values */ FILE *caifile; /* cai input values */ FILE *cbifile; /* cbi input values */ FILE *logfile; /* log file name */ FILE *my_err; /* pointer for err stream */ FILE *fcoa_in; FILE *fcoa_out; } MENU_STRUCT ; #ifndef DECOSF #define DEBUG /* include debug code */ #endif #ifndef TRUE #define TRUE 1 /* for dumb compilers */ #endif #ifndef FALSE #define FALSE 0 /* for dumb compilers */ #endif /* these handle how to delete files, and blank the screen */ #if defined _WINDOWS || defined _WIN32 # define deletefile(x) _unlink(x) # define clearscr(x) {int n; for(n=0; n". If you use GCG, the output from the program tofasta is acceptable. If prompted for either the "bulk" or "output" file names, these filenames will be used to record the results of the analysis. These files will be opened for writing which may destroy the content of the files, should the files already exist. So if a file already exists with the name you have chosen, you will be asked whether you wish to overwrite the file, append the results to the file, or choose a new filename (that is, unless you have chosen the option to overwrite files silently). // #File_not_found# File not found The name of the input file that you have chosen does not exist in the current working directory. Either choose a new filename or give the fully qualified filename (e.g. e:\codon\cu\input.dat). Depending on the system that you are using, the names of all files in the current working directory may or may not be displayed when a file cannot be located. // #file_exists# File exists If the filename that you have chosen as the output file exists, it will be deleted if opened for writing. You now have the choice of whether or not to overwrite this file (thus deleting the original). If you choose not to overwrite you have the further choice of either appending the results to the file you originally choose or selecting a new filename. (Note: If you select overwrite silently from the defaults menu you will not be prompted if a file of the same name already exists; it will be overwritten.) // #file_append# File Append You decided not to overwrite the file. You can either append the results to this file or choose a new filename. // #menu_2# Menu 2 Purifying sequences menu This menu was originally used to eliminate sequences from data that had high sequence identity to other sequences in the dataset and thus might bias the output results. This functionality is not currently portable and is not being made available at present. Try using the NCBI program nrdb or the EGCG9 program clean_up to remove identical or almost identical sequences. // #menu_3# Menu 3 Defaults menu To improve flexibility, many of the default values used internally by CodonW (defined in the header file codonW.h) can be altered at runtime using this menu. Ten options can be customised. Option (1) Change ASCII delimiter in output. The default ASCII delimiter used to separate information in machine readable output files is a comma. The delimiter can be changed via this option to either the tab or space character. Option (2) Run silently. This option can be used when running from a script file or as a batch job. If TRUE, it suppresses warnings about overwriting files, the prompting for a personal choice of Fop, CBI or CAI values (although these can still be given via command line arguments) and the pause after each page of error or warning messages has been displayed. Option (3) Log warnings/information to a file. The default value for this option is set as FALSE, in which case all warning or error messages generated by CodonW are written to the screen via the standard error stream. When TRUE, the errors are redirected to a log file:- you will be prompted for the filename for this log file. This option is useful if there are a large number of sequences in the input file or there are many warning messages. Option (4) Number of lines on screen. This is used to set the screen length, which is used during screen refreshing and the pagination of error messages. Option (5) Change the genetic code. By default, CodonW assumes the universal genetic code when translating and processing codons. This option allows alternative genetic codes to be selected. Option (6) Change the Fop/CBI values. To calculate either the CBI or Fop indices, a set of optimal codons is required; by default the optimal codons of E. coli are assumed. This option displays a submenu which lists eight species where optimal codons have been identified. When calculating the Fop/CBI of genes from these species the appropriate set of codons should be selected. Personal selections of optimal codons can be input at runtime. Option (7) Change the CAI values. To calculate the codon adaptation index it is necessary to assign fitness values to each codon; by default the fitness values of E. coli codons are assumed. However, these values are very species-specific and so using E. coli fitness values to calculate CAI values for other species is nonsensical. Before assigning fitness values to a codon a set of genes which have been experimentally verified to be highly expressed must be identified. Such sets have been created for relatively few species. This menu lists the species where a reference set of highly expressed genes is known, and fitness values assigned. Personal selections of fitness values can be input at runtime if calculating CAI. Option (8) Toggle human or machine-readable output. The default format for most CodonW output files is human readable. Machine-readable output is fixed width numerical data separated by an ASCII delimiter. This format is readily imported into a wide range of statistical and graphical analysis programs but not easily read by eye. Human readable output is more verbose but easier to read. The output formats for codon usage, tabulation of codon usage, relative synonymous codon usage and base compositions are the most radically affected by this option. Option (9) Toggle output for each or all genes. By default, CodonW processes each gene individually. When the option "all genes" is selected, sequences are concatenated and processed as a single sequence. This option can be used to calculate total codon or amino acid usage, the average G+C content, Fop, etc. Option (10) Correspondence analysis defaults. This option allows access to the "advanced correspondence analysis" menu. This menu is normally accessed as a submenu of "Correspondence analysis" (Menu 5), but is included here so that all runtime options are accessible via the "Change default values" menu. // #menu_4# Menu 4 Codon Usage Indices This menu is used to choose the indices calculated by CodonW; by default only the G+C content of the sequence is selected. The calculation of these indices (except G+C content) is dependent on the genetic code selected under Menu 3. More than one index may be calculated at once. Option (1) Codon Adaptation Index (CAI). CAI measures the relative adaptation of a gene to the codon usage of highly expressed genes. The relative adaptiveness (w) of a codon is the ratio of the usage of that codon to that of the most abundant codon for the same amino acid. The relative adaptiveness of codons (for albeit a limited choice of species) can be selected from Menu 3. Option (2) Frequency of Optimal codons (Fop). This index is the ratio of optimal codons to synonymous codons (genetic code dependent). Optimal codons for several species are in-built and can be selected using Menu 3. By default, the optimal codons of E. coli are assumed. The user may also enter a personal choice of optimal codons. If rare synonymous codons have been identified, there is a choice of calculating the original Fop index or a modified Fop index. Fop values for the original index are always between 0 (where no optimal codons are used) and 1 (where only optimal codons are used). When calculating the modified Fop index, any negative values are adjusted to zero. Option (3) Codon Bias Index (CBI). The codon bias index is a measure of directional codon bias. It measures the extent to which a gene uses a subset of optimal codons. Option (4) The effective number of codons (NC). This index is a simple measure of overall codon bias and is analogous to the effective number of alleles measure used in population genetics. Knowledge of the optimal codons or a reference set of highly expressed genes is unnecessary when calculating this index. Option (5) G+C content of the gene. This is calculated as the frequency of nucleotides that are guanine or cytosine. Option (6) G+C content 3rd position of synonymous codons (GC3s). This is the fraction of codons, synonymous at the third codon position, which have either a guanine of cytosine at that third codon position. Option (7) Silent base composition. Selection of this option calculates four separate indices, i.e. G3s, C3s, A3s & T3s. Although correlated with GC3s, this index is not directly comparable with it. It quantifies the usage of each base at synonymous third codon positions. Option (8) Length silent sites (Lsil). This is the frequency of synonymous codons within each gene. Option (9) Length amino acids (Laa). This is the number of translatable codons. Option (10) Hydropathicity of protein. This is the general average hydropathicity or (GRAVY) score for the hypothetical translated gene product. It is the arithmetic mean of the sum of the hydropathic indices of each amino acid. Option (11) Aromaticity score of protein. This is the frequency of aromatic amino acids (Phe, Tyr, Trp) in the hypothetical translated gene product. The hydropathicity and aromaticity protein scores are indices of amino acid usage. The strongest trend in the variation in the amino acid composition of E. coli genes is correlated with protein hydropathicity, the second strongest trend is correlated with gene expression, while the third is correlated with aromaticity. // #menu_5_coa# Menu 5 Correspondence analysis In many unicellular organisms, protein coding genes have non-random usage of synonymous codons (see Andersson and Kurland (1990) and Sharp et al. (1993) for reviews). Correspondence analysis uses contingency tables (counts of the joint occurrences of rows and columns of a table). Therefore, the sequence data must be transformed into a contingency table. The frequency of each codon (or amino acid) is tabulated for each gene. This is then converted into an Euclidean distance measurement of distance between the rows or columns. CodonW calculates a scaled distance measurement as recommended by Grantham and co-workers (Grantham et al 1981). Analysis of a large number of distances would ordinarily be very time consuming. Correspondence analysis provides a simple visualisation of these distances by projecting the points from their original multidimensional space onto lower dimensions, with genes with similar distances plotted as neighbours. In addition to calculating the coordinates for the projection of these points, correspondence analysis (as implemented in CodonW) also calculates the total inertia of the data, together with the eigenvalue and relative variation explained by each axis. CodonW can also quantify the absolute and relative contribution of each gene, codon or amino acid on each identified trend. To limit variation due to stochastic noise, it is recommended that short genes (less than 50 codons) be excluded from a correspondence analysis. The correspondence analysis menu (Menu 5) has four options, the default option being not to generate a correspondence analysis, i.e. Do not perform a COA. Option (1) Correspondence analysis of codon usage. This generates a correspondence analysis on the total codon usage. By default, this is on synonymous codons, although the advanced menu may be used to adjust which codons are included/excluded. If analysing synonymous codon usage, the analysis has 58 degrees of freedom. Option (2) Correspondence analysis of RSCU. This generates a correspondence analysis of relative synonymous codon usage (RSCU). RSCU is calculated as the ratio of the observed frequency of a codon to the frequency expected under unbiased codon usage within a synonymous codon group. Correspondence analysis of RSCU is useful because variation caused by unequal usage of amino acids is removed; however the number of degrees of freedom is reduced to 40. Option (3) Correspondence analysis of Amino Acid usage. This generates a correspondence analysis of amino acid composition, with 19 degrees of freedom. Option (4) Do not perform a correspondence analysis. This is the default option. // #menu_6# Menu 6 Basic Stats This menu was originally designed to calculate some basic statistics on the output from the various codon usage indices. This functionality is not currently portable and is not being made available at present. // #menu_7# Menu 7 Relaxation (almost) This menu was designed to help teach the genetic code(s). It asks various random questions about codon translation and codon usage. The genetic code used as the basis for the correct answers can be changed under the default menu (Menu 3). // #fun# Teach yourself the genetic codes and codon usage. To exit type "quit" or "exit" (without the quotation marks). If you don't know the answer to the question, you can type "?" (without the quotation marks) . You will then be prompted with the correct answer. Beware:- you will be penalised for incorrect answers :). The questions are: What is the three-letter name? (You must convert the one-letter code given to the three-letter code.) How synonymous is Amino Acid? (How many synonyms are there for this amino acid?) Name the Amino Acid? (Which amino acid is coded by this codon?) // #menu_8_blk# Menu 8 Bulk output options in CodonW Non-correspondence analysis output from CodonW which cannot easily be summarised as a single index is bulk output. Under this menu there are 10 options. Multiple options cannot be selected simultaneously. Each time this menu is selected you will be prompted for an alternative output filename. Option (1) Fasta format output of DNA sequence. The input sequences are reformatted and written to a file in a Fasta /Pearson-like format. Option (2) Reader format output of DNA sequence. This format is derived from the fasta format, except that the sequence is written as codons with three bases separated by a space, and the size of the sequence is recorded at column 70. Option (3) Translate input file to amino acids. This translates DNA to amino acids using the selected genetic code. The amino acids are written in a Fasta/Pearson compatible format. Option (4) Codon Usage. This is the default option. The frequency of each codon is written to a file in four rows with 16 columns per row. The codons are written in sequential numerical order, left to right. Option (5) Amino acid usage. The frequency of each amino acid, untranslatable codons and stop codons are recorded, one row per gene and 23 columns per row. The first column contains a unique gene description, the second column records number of untranslatable codons, the third and subsequent columns summarize the amino acid and termination codon usage. Option (6) Relative Synonymous Codon Usage (RSCU). Relative synonymous codon usage is calculated as the ratio of the observed frequency of a codon to the frequency expected if codon usage were random. Option (7) Relative Amino acid usage (RAAU). Relative Amino acid usage is the frequency of the amino acid relative to the total amino acid usage. Option (8) Dinucleotide frequencies. The frequency of the 16 dinucleotides is calculated in each of the three possible codon positions. The data are recorded with one row per position and 16 columns per row. Option (9) Base composition analysis. This option records the frequency of nucleotides in each codon position. It also reports GC, GC3s and GCns (GC content excluding synonymous third position codons). Option (10) No output written to file. This option is useful when working with large datasets and disk storage or disk access is a limiting factor. This option suppresses all the output to the bulk output file. // #menu_coa# Advanced Correspondence Analysis menu. This menu allows much greater control over the correspondence analysis. Option (1) Unselect or select. This menu changes slightly depending on whether correspondence analysis is of amino acid or codon usage.It simplifies the selection of the codons/amino acids that are to be included in the COA. This allows the user to override the default selections, which if the COA is of codon usage, is the exclusion of non- synonymous codons and termination codons. Option (2) Change the number of axes. The number of axes generated by a correspondence analysis is N-1, where N is either the number of genes or columns (whichever is the lesser in value). However, the default is to generate information about the first four axes (or trends). This option allows the user to record coordinates on any number of axes, up to the maximum generated by the analysis. Each axis generated by correspondence analysis is represented by a multidimensional vector. The position of a gene on any axis is the product of that gene's codon usage and the axis vector. As the vector is itself a product of the codon usage, the vectors can be affected by unusual codon usage. An analysis of nuclear and plasmid genes would be difficult, as the codon usage of each would perturb the other. Each dataset could be analysed individually but as the vectors for the axes would be different, it would be difficult to make direct comparisons between the analyses. To overcome this problem it is necessary to generate the COA vectors using one dataset and then to apply the same vectors to another. Thus direct comparison between the ordination of genes is possible. In CodonW, this is possible by using the following option (Option 3). Option (3) Add additional genes after correspondence analysis. The user is prompted for the file containing the additional sequences, to which the vectors are to be applied. The vectors are calculated, as normal, using the genes contained in the standard input file (Menu 1). The co- ordinates and any additional information about these original genes are recorded as normal. Next the additional genes are read in and the original vectors applied to them. The ordinations of these additional genes are then appended to the COA output files (for an explanation about the COA output files see below). Option (4) Toggle level of correspondence analysis output. By default this option is set to "normal" but can be toggled to "exhaustive". If the exhaustive output option is selected, then in addition to the standard information about gene and codon/amino acid ordination, additional information about inertia of the rows and columns is generated. This additional information includes the absolute contribution of the inertia of each row or column to each of the recorded axes, and the fraction of the variation within each row or column explained by each axis. Option (5) Change number of genes used to identify optimal codons. Correspondence analysis of either RSCU or codon usage where the major trend correlates with gene expression can be used to identify optimal codons. This is achieved by comparing the codon usage of the genes that lie at the extremes of the principal trend (axis 1). By default this is the top and bottom 10% of genes (as defined by axis 1 ordination). Using this option this can be set to a percentage between 1% and 50%, or to an absolute number of genes. // #select# Codon or Amino acid selection The codons or amino acids that will NOT be analysed in this correspondence analysis are surrounded by curly brackets. The choices of which codons/amino acids that are to be excluded can be changed. Simply give the number associated with each codon/amino acid for which you want to change the status. // codonW/codonWinstall 777 0 0 13664 6363004653 10127 0#!/bin/sh # Adapted from the SRS5 srsinstall script #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # analyse command line and print usage if wrong # ERR='codonWinstall: Stopping due to Error' if [ "$#" = 0 ]; then option="all" elif [ "$1" = "all" ]; then option="all" elif [ "$1" = "clean" ]; then option="clean" elif [ "$1" = "codonw" ]; then option="codonw" elif [ "$1" = "links" ]; then option="links" elif [ "$1" = "cleanall" ]; then option="cleanall" elif [ "$1" = "realclean" ]; then option="cleanall" else option="usage" echo "unknown option '$1'" fi #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # print "usage" # if [ "$option" = 'usage' ]; then cat << END Usage: ./codonWinstall option Options: all does a complete installation codonw compile codonw only .. no linked programmes links generate links to pseudo programmes clean removes all object files cleanall removes all the object files, codonW, linked files and Makefile realclean removes all the object files, codonW, linked files and Makefile END exit 1 fi #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if [ "$option" = 'all' ]; then echo "... starting installation of codonW" fi #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # put correct "make" and "cc" commands # OS=`uname` if [ "$OS" = "SunOS" ]; then case "`uname -r`" in [56]*) OS='Solaris' ;; esac fi # some echo commands do not support -n # generally /usr/bin/echo doesn't but /usr/ucb/echo does, so one can hardwire # to be independent of users path. if [ -r /usr/ucb/echo ]; then ECHON="/usr/ucb/echo -n" else ECHON="echo -n" fi # OSF/1 v4.0 /usr/ucb is symlink to /usr/bin, but /bin/sh has builtin -n if [ "$OS" = "OSF1" ]; then case "`uname -r`" in V[4]*) ECHON="echo -n" # or can #CMD_ENV=bsd ; export CMD_ENV ;; esac fi # we did our best, but now let's test ECHONT="`$ECHON | wc -c`" if [ $ECHONT != 0 ]; then ECHONEND="\c" ECHON=echo else ECHONEND="" fi if [ ! -f "Makefile" ]; then $ECHON "enter the make command [make]: $ECHONEND" read makeCom if [ "$makeCom" = "" ]; then makeCom='make' ; fi # for OSF1 need to know if it is osf1 make or gnu make if [ "$OS" = "OSF1" ]; then $ECHON "is this OSF1 make [y]: $ECHONEND" read OSFmake if [ "$OSFmake" = "" ]; then OSFmake='y' ; fi fi if [ "$OS" = "SunOS" -o "$OS" = "Solaris" ]; then ccComDef='gcc' else ccComDef='cc' fi $ECHON "enter the cc command [${ccComDef}]: $ECHONEND" read ccCom if [ "$ccCom" = "" ]; then ccCom="$ccComDef" ; fi echo 'choose between optimised code, or code for debugging' $ECHON "optimised code [y]: $ECHONEND" read optimCom if [ "$optimCom" = "" ]; then optimCom="y" ; fi $ECHON "enter the link command [ln]: $ECHONEND" read ccLn if [ "$ccLn" = "" ]; then ccLn="ln -f" ; fi $ECHON "Do you want hard or soft links [hard]: $ECHONEND" read ccLnflag if [ "$ccLnflag" = "" ]; then ccLnflag="hard" ; fi echo 'choose between optimised code, or code for debugging' $ECHON "optimised code [y]: $ECHONEND" read optimCom if [ "$optimCom" = "" ]; then optimCom="y" ; fi echo "...creating makefile for '$OS'" #better rename the old makefiles if [ -f "Makefile" ]; then \mv Makefile Makefile.pre ; fi if [ -f "makefile" ]; then \mv makefile makefile.pre ; fi touch Makefile # Add the logical parts of the make file if [ "$OS" = "OSF1" ]; then echo "override cflags = $(CFLAGS) -g" >> Makefile; fi if [ "$optimCom" = "y" ]; then cflags="-O"; else cflags="-g -DDEBUG" ; fi if [ "$ccLnflag" = "hard" ]; then lncmd=$ccLn; else lncmd="$ccLn -s"; fi link_prog="rscu cu aau raau tidy reader cutab cutot transl bases base3s dinuc cai fop gc3s gc cbi enc" cat <> Makefile objects = codon_us.o codons.o open_fil.o commline.o menu.o tester.o coresp.o linked = $link_prog CC=$ccComDef CFLAGS= $cflags -DBSD LN=$lncmd all: codonw links codonw: $(objects) $(CC) $(CFLAGS) $(objects) -o codonw -lm clean: \rm -f $(objects) cleanall: \rm -f $(objects) codonw Makefile $(linked) realclean: \rm -f $(objects) codonw Makefile $(linked) codon_us.o: codon_us.c codonW.h $(CC) -c $(CFLAGS) codon_us.c menu.o: menu.c codonW.h $(CC) -c $(CFLAGS) menu.c codons.o: codons.c codonW.h $(CC) -c $(CFLAGS) codons.c coresp.o: coresp.c codonW.h $(CC) -c $(CFLAGS) coresp.c open_fil.o: open_fil.c codonW.h $(CC) -c $(CFLAGS) open_fil.c commline.o: commline.c codonW.h $(CC) -c $(CFLAGS) commline.c tester.o: tester.c codonW.h $(CC) -c $(CFLAGS) tester.c links: codonw EOF for file in $link_prog do echo "\t\t$(LN) codonw $file" >> Makefile done echo >> Makefile fi #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # cleans the object # if [ "$option" = 'clean' ]; then echo '...cleaning the old object files ' echo '...make clean' make clean fi #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if [ "$option" = 'cleanall' -o "$option" = 'realclean' ]; then echo '...cleaning the old object files, linked files and executables' echo '...make realclean ' make realclean fi #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if [ "$option" = 'all' ]; then echo '...Starting to make codonW, with auxillary programs ' echo '...make all' make all fi #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if [ "$option" = 'codonw' ]; then echo '...checking codonW is up to date' echo '...make codonw' make codonw fi #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ if [ "$option" = 'links' ]; then echo '... Linking auxilliary programs to ' echo '...make links' make links fi #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ codonW/commline.c 777 0 0 77023 10237502030 7335 0/**************************************************************************/ /* CodonW codon usage analysis package */ /* Copyright (C) 2005 John F. Peden */ /* This program is free software; you can redistribute */ /* it and/or modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; version 2 of the */ /* License, */ /* */ /* This program is distributed in the hope that it will be useful, but */ /* WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* You should have received a copy of the GNU General Public License along*/ /* with this program; if not, write to the Free Software Foundation, Inc.,*/ /* 675 Mass Ave, Cambridge, MA 02139, USA. */ /* */ /* */ /* The author can be contacted by email (jfp#hanson-codonw@yahoo.com Anti-*/ /* Spam please change the # in my email to an _) */ /* */ /* For the latest version and information see */ /* http://codonw.sourceforge.net */ /**************************************************************************/ #include #include #include #include #include "codonW.h" /************** process_command_line *************************************/ /* The command line is passed to this function for processing. The name of*/ /* the programme is read, and based on this, CodonW will emulate several */ /* useful codon usage analysis programmes routinely used in our laboratory*/ /* all other command line arguments are read. Unrecognised arguments are */ /* reported to the user, arguments not preceded by a dash are assumed to */ /* be filenames. The input, output and bulk output files to be precise */ /**************************************************************************/ int proc_comm_line( int *pargc , char ***pargv) { char *p; char c; int n; char prog_name[64]; char root[MAX_FILENAME_LEN]; /* decide how to process argc[0] which will be the name of the programme */ #if defined (_WINDOWS) || defined (_DOS) || defined ( WIN32 ) if ( (p = strrchr(**pargv, '\\')) != NULL ) strncpy(prog_name, p+1 , 63); else strncpy(prog_name, **pargv, 10); #elif defined (BSD) || defined(SYSV) || defined (UNIX) || defined (LINUX) /* Must be unix */ if ( (p = strrchr(**pargv, '/')) != NULL ) strncpy(prog_name, p+1, 63); else strncpy(prog_name, **pargv, 10); #elif defined(VMS) || defined (OPENVMS) /* maybe VMS or OPENVMS */ if ( (p = strrchr(**pargv, ']')) != NULL ) strncpy(prog_name, p+1, 63); else strncpy(prog_name, **pargv, 10); #else printf("UNRECOGNISED SYSTEM type won't be able to impersonate other programmes\n"); strcpy(prog_name, "codon"); /* OK I give up */ #endif if ( (p = strrchr(prog_name, '.')) != NULL ) /* remove file extension */ *p = '\0'; /* tidy.exe -> tidy */ /* first call to garg initialises the function with the command line*/ /* parameters and the number of arguments, subsequent calls strip */ /* these off one by one */ /* has the user asked for help ???????????? */ if ((p = garg(*pargc, *pargv, "-h", GARG_EXACT)) || (p = garg(0, NULL, "-help", GARG_EXACT))){ printf( "codonW [inputfile] [outputfile] [bulkoutfile] [options]\n" "General options and defaults:\n" " -h(elp)\tThis help message\n" " -nomenu\tPrevent the menu interface being displayed\n" " -nowarn\tPrevent warnings about sequences being displayed\n" " -silent\tOverwrite files silently\n" " -totals\tConcatenate all genes in inputfile\n" " -machine\tMachine readable output\n" " -human\t\tHuman readable output\n" " -code N\tGenetic code as defined under menu 3 option 5\n" " -f_type N\tFop/CBI codons as defined by menu 3 option 6\n" " -c_type N\tCai fitness values as defined by menu 3 option 7\n" " -t (char)\tColumn separator to be used in output files " "(comma,tab,space)\n" "\nCodon usage indices and Amino acid indices \n" " -cai\t\tcalculate Codon Adaptation Index (CAI)\n" " -fop\t\tcalculate Frequency of OPtimal codons index (FOP)\n" " -cbi\t\tcalculate Codon Bias Index (CBI)\n" " -enc\t\tEffective Number of Codons (ENc)\n" " -gc\t\tG+C content of gene (all 3 codon positions)\n" " -gcs3\t\tGC of synonymous codons 3rd positions\n" " -sil_base\tBase composition at synonymous third codon " "positions\n" ); pause; printf( " -L_sym\t\tNumber of synonymous codons\n" " -L_aa\t\tTotal number of synonymous and non-synonymous codons\n" " -all_indices\t\tAll the above indices\n" " -aro\t\tCalculate aromaticity of protein\n" " -hyd\t\tCalculate hydropathicity of protein\n" " -cai_file {file}\tUser input file of CAI values\n" " -cbi_file {file}\tUser input file of CBI values\n" " -fop_file {file}\tUser input file of Fop values\n" "\nCorrespondence analysis (COA) options \n" " -coa_cu \tCOA of codon usage frequencies\n" " -coa_rscu\tCOA of Relative Synonymous Codon Usage\n" " -coa_aa\tCOA of amino acid usage frequencies\n" " -coa_expert\tGenerate detailed(expert) statistics on COA\n" " -coa_axes N\tSelect number of axis to record\n" " -coa_num N\tSelect number of genes to use to identify " "optimal codons\n" "\t\tvalues can be whole numbers or a percentage (5 or 10%%)\n" "\nBulk output options | only one can be selected per analysis\n" " -aau\t\tAmino Acid Usage (AAU)\n" " -raau\t\tRelative Amino Acid Usage (RAAU)\n" " -cu\t\tCodon Usage (CU) (default)\n" ); pause; printf( " -cutab\t\tTabulation of codon usage\n" " -cutot\t\tTabulation of dataset's codon usage\n" " -rscu\t\tRelative Synonymous Codon Usage (RSCU)\n" " -fasta\t\tfasta format\n" " -tidy\t\tfasta format\n" " -reader\tReader format (codons are separated by spaces)\n" " -transl\tConceptual translation of DNA to amino acid\n" " -base\t\tDetailed report of codon G+C composition\n" " -dinuc\t\tDinucleotide usage of the three codon pos.\n" " -noblk\t\tNo bulk output to be written to file\n" "\nWhere {file} represents an input filename, and N an integer" " value" ); pause; my_exit(99,""); /* after writing out help quit */ } /* These parameters are normally set in menu3 ie. the defaults menu */ /* for a explanation of the various GARG_FLAGS see gargs */ /* -silent stops warnings about file about to be overwritten */ if (garg(0, NULL, "-silent", GARG_THERE)) pm->verbose = FALSE; /* -total causes sequences to be concatenated and treated as one sequence */ if ( garg(0, NULL, "-total" , GARG_THERE)) pm->totals = TRUE; /* -machine or -human determines for whom the output should be formatted */ if (p = garg(0, NULL, "-human", GARG_THERE)) pm->seq_format = 'H'; if (p = garg(0, NULL, "-mach", GARG_THERE)) pm->seq_format = 'M'; /* -code determines the genetic code */ if (p = garg(0, NULL, "-code", GARG_NEXT | GARG_EXACT)) { strcpy(pm->junk, p); n=0; while ( isdigit( (int) pm->junk[n]) && pm->junk[n] != '\0') n++; if ( n != (int)strlen(pm->junk) || atoi(pm->junk) < 0 || atoi(pm->junk) > NumGeneticCodes ) { printf( "FATAL: The value for genetic code %s is invalid\n", pm->junk); my_exit(99,"Fatal error in genetic code value"); } else { pm->code = (char) atoi(p); /* define genetic code */ initilize_point(pm->code, pm->f_type, pm->c_type); } } /* -f_type selects which of the predefined fop values to use */ /* NB. The fop is selected with the integer value corresponding to the menu*/ /* choice under the defaults menu. It must be in the range 1-NumFopSpecies */ if (p = garg(0, NULL, "-f_type", GARG_NEXT | GARG_EXACT)) { strcpy(pm->junk, p); n = 0; while ( isdigit( (int) pm->junk[n]) && pm->junk[n] != '\0') n++; if ( n != (int)strlen(pm->junk) || atoi(pm->junk) < 0 || atoi(pm->junk) >= NumFopSpecies ) { printf("FATAL: The value for fop_type %s is not valid\n", pm->junk); my_exit(99,"Fatal error in Fop value"); } else { pm->f_type = (char) atoi(p); /* define organism type for Fop */ initilize_point(pm->code, pm->f_type, pm->c_type); } } /* -d_type selects which of the predefined CAI values to use */ /* NB. The CAI is selected with the integer value corresponding to the menu*/ /* choice under the defaults menu. It must be in the range 1-NumCAISpecies */ if (p = garg(0, NULL, "-c_type", GARG_NEXT | GARG_EXACT)) { strcpy(pm->junk,p); n = 0; while ( isdigit( (int) pm->junk[n]) && pm->junk[n] != '\0') n++; if ( n != (int)strlen(pm->junk) || atoi(pm->junk) < 0 || atoi(pm->junk) >= NumCaiSpecies) { printf("FATAL: The value for cai_type %s is not valid\n", pm->junk); my_exit(99,"Fatal error in CAI type value"); } else { pm->c_type = (char) atoi(p); /* define organism type for CAI */ initilize_point(pm->code, pm->f_type, pm->c_type); } } /* Command line arguments for the indices menu (4) */ /* The presence of any of these flags, cause the relevant indices to be */ /* calculated */ /* Indices are CAI, FOP, CBI, Nc, GC, GC3s, Lsyn, Laa, silent_base */ /* composition, hydropathicity, aromaticity */ if (p = garg(0, NULL, "-cai" , GARG_EXACT)) pm->cai = TRUE; if (p = garg(0, NULL, "-fop" , GARG_EXACT)) pm->fop = TRUE; if (p = garg(0, NULL, "-cbi" , GARG_EXACT)) pm->cbi = TRUE; if (p = garg(0, NULL, "-enc" , GARG_EXACT)) pm->enc = TRUE; if (p = garg(0, NULL, "-gc" , GARG_EXACT)) pm->gc = TRUE; if (p = garg(0, NULL, "-gc3s" , GARG_EXACT)) pm->gc3s = TRUE; if (p = garg(0, NULL, "-sil_base" , GARG_EXACT)) pm->sil_base = TRUE; if (p = garg(0, NULL, "-L_sym" , GARG_EXACT)) pm->L_sym = TRUE; if (p = garg(0, NULL, "-L_aa" , GARG_EXACT)) pm->L_aa = TRUE; if (p = garg(0, NULL, "-hyd" , GARG_EXACT)) pm->hyd = TRUE; if (p = garg(0, NULL, "-aro" , GARG_EXACT)) pm->aro = TRUE; /* Turns on all the above indices */ if (p = garg(0, NULL, "-all_indices" , GARG_EXACT)){ pm->cai = TRUE; pm->fop = TRUE; pm->cbi = TRUE; pm->enc = TRUE; pm->gc = TRUE; pm->gc3s = TRUE; pm->sil_base = TRUE; pm->L_sym = TRUE; pm->L_aa = TRUE; pm->hyd = TRUE; pm->aro = TRUE; } /* This section in used to input the filenames for personal choices of Fop */ /* CBI or CAI values. The name is tested to make sure the file is readable */ /* the pointer to the file is then assign to the relevant pointer in the */ /* struct Z_menu and then processed properly in codon_us.c */ /* Fop */ if (p = garg(0, NULL, "-fop_file", GARG_NEXT | GARG_EXACT)) { if ( (pm->fopfile = open_file( "", p, "r", FALSE)) == NULL ) { printf("Could not open Fop file - %s\n", p); my_exit(1,"commline open fop file"); } else strncpy(pm->fop_filen, pm->junk, MAX_FILENAME_LEN - 1); /* idiot catch, if you load personal fop values you want to calculate fop */ pm->fop=TRUE; } /* CAI */ if (p = garg(0, NULL, "-cai_file", GARG_NEXT | GARG_EXACT)) { if ( (pm->caifile = open_file( "", p, "r", FALSE)) == NULL ) { printf("Could not open CAI file - %s\n", p); my_exit(1,"commline failed error"); } else strncpy(pm->cai_filen, pm->junk, MAX_FILENAME_LEN - 1); pm->cai=TRUE; /* idiot catch */ } /* CBI */ if (p = garg(0, NULL, "-cbi_file", GARG_NEXT | GARG_EXACT)) { if ( (pm->cbifile = open_file( "", p, "r", FALSE)) == NULL ) { printf("Could not open CBI file - %s\n", p); my_exit(1,"Commline failed to open file"); } else strncpy(pm->cbi_filen, pm->junk, MAX_FILENAME_LEN - 1); pm->cbi = TRUE; /* idiot catch */ } /* This section changes the default correspondence menu choices normally */ /* set in menu menu 5. */ /* Note only one of -coa_cu -coa_rscu -coa_aa can be chosen */ if (p = garg(0, NULL, "-coa_cu" , GARG_EXACT)) pm->coa = 'c'; if (p = garg(0, NULL, "-coa_rscu" , GARG_EXACT)) pm->coa = 'r'; if (p = garg(0, NULL, "-coa_aa" , GARG_EXACT)) pm->coa = 'a'; if (p = garg(0, NULL, "-coa_expert", GARG_EXACT)) /* detailed inertia */ (coa.level='e'); /* analysis */ /* These are options selectable under the advanced COA menu */ /* This first option -coa_axes changes the number of axis recorded to file */ if (p = garg(0, NULL, "-coa_axes", GARG_NEXT | GARG_EXACT)){ if ( isdigit( (int) *p) ){ n = (char)atoi(p); /* just check that correspondence analysis has been selected */ if ( pm->coa == 'a' && (n > 20 || n<0) || ( n<0 || n>59 )) { fprintf(pm->my_err,"Value %d is out of range for Number COA Axis " "adjusting to max value\n",n); if ( pm->coa == 'a' ) pcoa->axis = 20; else pcoa->axis = 59; }else{ pcoa->axis = (char) n; } } } /* Select the size of dataset to use to identify optimal codons */ if (p = garg(0, NULL, "-coa_num", GARG_NEXT|GARG_EXACT)) { strcpy (pm->junk,p) ; if( (p=strchr ( pm->junk,'%')) != NULL) { p='\0'; pcoa->fop_gene=atoi(pm->junk)*-1; }else { pcoa->fop_gene=atoi(pm->junk); } } /* These option are mutually exclusive and are normally selected using the */ /* the bulk output menu (menu 8) */ if ( p = garg(0, NULL, "-raau", GARG_EXACT)) pm->bulk = 'L'; if ( p = garg(0, NULL, "-cu" , GARG_EXACT)) pm->bulk = 'C'; if ( p = garg(0, NULL, "-cutab", GARG_THERE)) pm->bulk = 'O'; if ( p = garg(0, NULL, "-cutot", GARG_THERE)){ pm->bulk = 'C'; pm->totals =TRUE; } if ( p = garg(0, NULL, "-reader", GARG_EXACT)) pm->bulk = 'R'; if ( p = garg(0, NULL, "-rscu", GARG_EXACT)) pm->bulk = 'S'; if ( p = garg(0, NULL, "-tidy", GARG_EXACT)) pm->bulk = 'T'; if ( p = garg(0, NULL, "-fasta", GARG_EXACT)) pm->bulk = 'T'; if ( p = garg(0, NULL, "-aau", GARG_EXACT)) pm->bulk = 'A'; if ( p = garg(0, NULL, "-transl", GARG_THERE)) pm->bulk = 'N'; if ( p = garg(0, NULL, "-base", GARG_THERE)) pm->bulk = 'B'; if (p = garg(0, NULL, "-dinuc", GARG_THERE)) pm->bulk = 'D'; if (p = garg(0, NULL, "-noblk", GARG_EXACT)) pm->bulk = 'X'; /* -t is used to change the column separator used in the output files */ /* at present it must be a space, tab or comma */ /* Must occur after -transl or it misreads transl as a seperator */ if (p = garg(0, NULL, "-t" , GARG_NEXT | GARG_SUBSQ)) { strcpy(pm->junk, p); n = 0; do { c = pm->junk[n++]; } while ( strchr("'\"\0", (int) c) != NULL ); if ( strchr ("\t, ", (int) c) == NULL ) { printf( "WARNING: The chosen separator %s is unsuitable use" "comma, tab or space\n", pm->junk); } else { pm->seperator = c; } } /* These options are commandline specific, ie. they do not have an */ /* menu option */ /* prevents the menu system from being displayed, everything is */ /* assumed to have been given on the command line */ if (p = garg(0, NULL, "-nomenu", GARG_EXACT)) pm->menu = FALSE; /* prevents warnings about possible problems with the sequence data */ /* being displayed, i.e. partial codons, stop codons, start codons */ if (p = garg(0, NULL, "-nowarn", GARG_THERE)) pm->warn = FALSE; /* This section tries to identify the name used to call CodonW and it that*/ /* name concurs with one of those tested for, certain commandline options */ /* are assumed and the programme becomes much less interactive */ /* First step is to convert programme name to lower case */ for ( n=0; *(prog_name + n) != '\0'; n++) *(prog_name + n) = (char) tolower( (int) *(prog_name + n)); /* special options designed to unify code used by several auxiliary */ /* programmes. In essence CodonW will impersonate other commonly used */ /* codon usage analysis programmes if called using a special name */ if ( !strcmp(prog_name, "raau" ) ) pm->bulk = 'L'; else if ( !strcmp(prog_name, "cu" ) ) pm->bulk = 'C'; else if ( !strcmp(prog_name, "cutab" ) ) pm->bulk = 'O'; else if ( !strcmp(prog_name, "reader") ) pm->bulk = 'R'; else if ( !strcmp(prog_name, "rscu" ) ) pm->bulk = 'S'; else if ( !strcmp(prog_name, "tidy" ) ) pm->bulk = 'T'; else if ( !strcmp(prog_name, "aau" ) ) pm->bulk = 'A'; else if ( !strcmp(prog_name, "dinuc" ) ) pm->bulk = 'D'; else if ( !strcmp(prog_name, "transl") ) pm->bulk = 'N'; else if ( !strcmp(prog_name, "bases" ) ) pm->bulk = 'B'; else if ( !strcmp(prog_name, "base3s") ) { pm->prog = 's' ; pm->menu = FALSE; pm->sil_base = TRUE; } else if ( !strcmp(prog_name, "cai" ) ) { pm->prog = 'c'; pm->menu = FALSE; pm->cai = TRUE; } else if ( !strcmp(prog_name, "fop" ) ) { pm->prog = 'f'; pm->menu = FALSE; pm->fop = TRUE; } else if ( !strcmp(prog_name, "gc3s" ) ) { pm->prog = '3'; pm->menu = FALSE; pm->gc3s = TRUE; } else if ( !strcmp(prog_name, "gc" ) ) { pm->prog = 'g'; pm->menu = FALSE; pm->gc = TRUE; } else if ( !strcmp(prog_name, "enc" ) ) { pm->prog = 'e'; pm->menu = FALSE; pm->enc = TRUE; } else if ( !strcmp(prog_name, "cbi" ) ) { pm->prog = 'i'; pm->menu = FALSE; pm->cbi = TRUE; } else if ( !strcmp(prog_name, "cutot" ) ) { pm->bulk = 'C'; pm->menu = FALSE; pm->totals =TRUE; } else { pm->codonW=TRUE; /* if argc[0] is not recognised assume codons*/ /* if blk output is still X then assume cu */ if (pm->bulk=='X') pm->bulk='C'; } if (!pm->codonW ) { /* we appear to be impersonating another prog*/ /* now we switch to the correct greeting */ if (pm->bulk && pm->bulk!='X'){ pm->seperator='\000'; /* stop chars being converted by tidy*/ switch (pm->bulk) { case 'R': printf("\t\t\tREADER Formatting Program\n"); break; case 'T': printf("\t\t\tTIDY Formatting Program\n"); break; case 'S': printf("\tRelative Synonymous Codon Usage\n"); break; case 'B': printf("\t\t\tBase composition calculation\n"); break; case 'C': (pm->totals)? printf("\t\t\tTotal Codon Usage Tabulation\n"): printf("\t\t\tCodon Usage\n"); break; case 'L': printf("\tRelative Amino Acid Usage Calculating \n"); break; case 'D': printf("\t\t\tDi-Nucleotide frequencies Program\n"); break; case 'A': printf("\t\t\tAmino Acid Usage Calculating Program\n"); break; case 'N': printf("\t\tDNA 2 AA translating Program\n"); break; case 'O': printf("\tCodon usage tabulation Program\n"); break; case 'G': printf("\tTotal Codon usage tabulation\n"); break; default: { fprintf(stderr, "Sorry:- could not recognise BULK option" " -%c (Use -h for help)", pm->bulk); my_exit(99,"bad option commandline"); } } } else if (pm->prog) switch (pm->prog) { case 's': printf("\t\t\tSilent base G+C+A+T Calculating Program\n"); break; case 'e': printf("\t\tNc Calculating Program\n"); break; case 'f': printf("\t\tFop Calculating Program\n"); break; case 'c': printf("\t\t\tCAI Calculating Program\n"); break; case '3': printf("\t\t\tGC3s Calculating Program\n"); break; case 'g': printf("\t\t\tG+C Calculating Program\n"); break; case 'i': printf("\t\t\tCodon Bias Index Calculating Program\n"); break; default: { fprintf(stderr, "Sorry:- could not recognise "); fprintf(stderr, "argument -%c (Use -h for help)", pm->prog); my_exit(99,"commline"); } /* matches default */ } else fprintf( stderr, "unknown error type in commline.c" ); } /* matched if (!pm->codonW) */ /* By this point we should have processed all the command line arguments */ /* so now we test for any remaining, these are unrecognised */ while (p = garg(0, NULL, "-", GARG_THERE)) if ( pm->menu ) printf("Unrecognised argument %s\n", p); else { /* if we are running without a menu then abort this run */ sprintf ( pm->junk,"Unrecognised argument %s", p); my_exit ( 99 , pm->junk); } /* Anything remaining should be file names */ /* The first name should be the input file name */ if ( p = garg(0, NULL, "", GARG_THERE)) { if ( (pm->inputfile = open_file( "", p, "r", FALSE)) == NULL ) { printf("Could not open input file - %s\n", p ); my_exit(1,"failed to open file in proc_commline"); } else strncpy(pm->curr_infilename, pm->junk, MAX_FILENAME_LEN - 1); } /* The second should be the output filename */ if ( p = garg(0, NULL, "", GARG_THERE)) { if ( (pm->outputfile = open_file( "", p, "w", (int) pm->verbose)) == NULL ) { printf("Could not open output file - %s\n", p ); my_exit(1,"commline out file"); } else strncpy(pm->curr_outfilename, pm->junk, MAX_FILENAME_LEN - 1); } /* The third which only occurs if the programme is running as CodonW */ if ( pm->codonW && (p = garg(0, NULL, "", GARG_THERE)) ) { if ( (pm->tidyoutfile = open_file( "", p, "w", (int) pm->verbose)) == NULL ) { printf("Could not open blkoutput file - %s\n", p ); my_exit(1,"commline blk outfile"); } else strncpy(pm->curr_tidyoutname, pm->junk, MAX_FILENAME_LEN - 1); } /* Now check the command line is empty ... it should be at this point */ while (p = garg(0, NULL, "", GARG_THERE)) printf("This command line parameter was not recognised %s\n", p); /* IF no file name was found on the command line and the programme is */ /* impersonating another programme or we decided not to use the menu */ /* we need to load an input file name */ if ( (!pm->codonW || !pm->menu) && !pm->inputfile ){ if ( (pm->inputfile = open_file( "input filename", "input.dat", "r", FALSE)) == NULL ) { printf("Could not open input file - %s\n", p ); my_exit(1,"commline inputfile"); } strncpy(pm->curr_infilename, pm->junk, MAX_FILENAME_LEN - 1); } /* If we have an input filename but no output then we must prompt for the */ /* output filename */ if ( pm->inputfile && !pm->outputfile ) { /* If we are trying to impersonate another programme use this method*/ /* but make sure that we know what this other programme is called */ if ( !pm->codonW && strlen (prog_name) ){ strcpy(pm->curr_outfilename, prog_name); strcat(pm->curr_outfilename, ".def"); } else { /* Use the input filename as a root filename */ strncpy(root, pm->curr_infilename, MAX_FILENAME_LEN - 5); for (n = (int) strlen(root); n && root[n]!='.' ; --n); if ( n ) root[n] = '\0'; /* find root of filename */ strcpy(pm->curr_outfilename, root); strcat(pm->curr_outfilename, ".out"); } /* matchs else */ /* now we know the suggested name for the output file lets open it */ if ( pm->verbose ) { if ( (pm->outputfile = open_file( "indices output filename", pm->curr_outfilename, "w",(int) pm->verbose)) == NULL ) my_exit(1,"commline"); strncpy(pm->curr_outfilename, pm->junk, MAX_FILENAME_LEN - 1); }else{ if ( (pm->outputfile = open_file( "", pm->curr_outfilename, "w",(int) pm->verbose)) == NULL ) my_exit(1,"commline"); strncpy(pm->curr_outfilename, pm->junk, MAX_FILENAME_LEN - 1); } } /* match if ( pm->inputfile */ /* we had a commandline inputfile name and output filename but none */ /* for bulkoutput .. we prompt to save having to use menu 1 */ if ( pm->inputfile && ! pm->tidyoutfile ){ if ( pm->codonW ) { /* Use the input filename as a root filename */ strncpy(root, pm->curr_infilename , MAX_FILENAME_LEN - 5); for (n = (int) strlen(root); n && root[n]!='.' ; --n); if ( n ) root[n] = '\0'; /* find root of filename */ strcpy(pm->curr_tidyoutname, root); strcat(pm->curr_tidyoutname, ".blk"); /* now we know the suggested name for the output file lets open it */ if( pm->verbose) { if ( (pm->tidyoutfile = open_file( "bulk output filename", pm->curr_tidyoutname, "w",(int) pm->verbose)) == NULL ) my_exit(1,"commline"); strncpy(pm->curr_tidyoutname, pm->junk, MAX_FILENAME_LEN - 1); }else{ if ( (pm->tidyoutfile = open_file( "", pm->curr_tidyoutname, "w",(int) pm->verbose)) == NULL ) my_exit(1,"commline"); strncpy(pm->curr_tidyoutname, pm->junk, MAX_FILENAME_LEN - 1); } }else{ /* only use one output file when impersonating other programmes */ /* just in case we make blkout and output the same file */ pm->tidyoutfile = pm->outputfile; } } return 1; } /****************** Garg ***********************************************/ /* This subroutine strips of the commandline arguments and passes them back*/ /* to the calling function. Each time it is called with argc and argv non */ /* null the commandline is refreshed. If called with these are null args */ /* a commandline pre-stored is used, this commandline is striped arg by arg*/ /* as they are identified */ /* This subroutine was developed as a collaboration with Colin McFarlane */ /* GARG_EXACT The argument must match targ exactly */ /* GARG_THERE The targ may be sub-string of the argument */ /* GARG_SUBSQ The string immediate after targ is returned */ /* GARG_NEXT The next argument after targ is returned */ /* else return NULL */ /***************************************************************************/ char *garg(int argc, char *argv[], const char *targ, int mode) { static char *argw[MAX_ARGS]; static int done[MAX_ARGS]; static int argn; int arg = 1, nc; if (argv) { if (--argc < 1) return NULL; for (argn = 0; argn < argc; argn++) { argw[argn] = argv[argn + 1]; done[argn] = 0; } } nc = mode & GARG_EXACT ? BUFSIZ : strlen(targ); for (arg = 0; arg < argn; arg++) if ((0 == strncmp(targ, argw[arg], nc)) && !done[arg]) { done[arg] = 1; if (mode & GARG_THERE) return argw[arg]; if (mode & GARG_SUBSQ) return &argw[arg][nc]; if (mode & GARG_NEXT) { done[++arg < argn ? arg : --arg] = 1; return argw[arg]; } return argw[arg]; } return NULL; } codonW/coresp.c 777 0 0 150720 10237502060 7044 0/**************************************************************************/ /* CodonW codon usage analysis package */ /* Copyright (C) 2005 John F. Peden */ /* This program is free software; you can redistribute */ /* it and/or modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; version 2 of the */ /* License, */ /* */ /* This program is distributed in the hope that it will be useful, but */ /* WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* You should have received a copy of the GNU General Public License along*/ /* with this program; if not, write to the Free Software Foundation, Inc.,*/ /* 675 Mass Ave, Cambridge, MA 02139, USA. */ /* */ /* */ /* The author can be contacted by email (jfp#hanson-codonw@yahoo.com Anti-*/ /* Spam please change the # in my email to an _) */ /* */ /* For the latest version and information see */ /* http://codonw.sourceforge.net */ /**************************************************************************/ /* This file contains source code for */ /* the core functions involved in correspondence */ /* analysis, this code was originally written */ /* by Jean Thioulouse */ /* ADE software: multivariate analysis and graphical */ /* display of environmental data */ /* IN Guariso,G and Rizzoli, A (eds), */ /* Software per l'Ambiente. Patron editor, Bolonia, pp.57-62. */ /* */ /* and is used with kind permission */ /* */ /* It has however been extensively modified to integrate it */ /* as seamlessly as practical into CodonW and as such can no */ /* longer be considered as a stand alone package */ /* */ /* Originally written as a general Multivariate analysis (MVA) */ /* package, it is now hardwired specifically for codon or amino */ /* acid usage analysis */ /* */ /* All unnecessary functions have been removed */ /* Originally each data file had an associated resource file */ /* which described required parameters */ /* The need for these files has been removed */ /* */ /**************************************************************************/ /* Functions */ /* textbin converts codon usage to binary data file */ /* */ /**************************************************************************/ #include #include #include #include #include #include "codonW.h" /*************** textbin *****************************************/ /* examines the struct pcoa to see which codons/amino acids are to be inc */ /* in the analysis. It then writes this data to a binary file */ /* it also counts the amino acid and codon usage of each gene */ /**************************************************************************/ void textbin(char *fileread, char *fileout) { double *vlec; int v2; int i,j,x; pcoa->colm=0; if ( pm->coa=='a' ) { for ( i=1; i<22;i++) if ( pcoa->amino[i] ) pcoa->colm++; /* number of colms in analysis */ }else { for ( i=1; i<65;i++) if ( pcoa->codons[i]) pcoa->colm++; /* number of colms in analysis */ } vecalloc(&vlec, pcoa->colm); /* allocate an array */ /* open output files */ if ( (pm->fcoa_in = open_file( "", fileread, "r", FALSE)) == NULL ) { fprintf(pm->my_err,"(txt2bin)"); my_exit(1,"txt2bin"); } if ( (pm->fcoa_out = open_file( "",fileout, "wb", FALSE)) == NULL ) { fprintf(pm->my_err,"(txt2bin)"); my_exit(6,"fileout"); } for (i=1;i<=pcoa->rows;i++) { /* pcoa-rows is the No of genes */ fscanf(pm->fcoa_in,"%s",pm->junk); /* read the data from coa_raw into the array vlec */ switch (pm->coa){ case 'a': for (j=1,x=1;j<21;j++) { fscanf(pm->fcoa_in,"%i",&v2); if ( pcoa->amino[j] ) vlec[x++] = (double) v2; } fscanf(pm->fcoa_in,"%i\n",&v2); if ( pcoa->amino[j] ) vlec[pcoa->colm] = (double) v2; if ( x != pcoa->colm ) my_exit (99,"Fatal Error in txt2bin"); break; case 'c': for (j=1,x=1;j<64;j++) { fscanf(pm->fcoa_in,"%i",&v2); if( pcoa->codons[j] ) vlec[x++] = (double) v2; } fscanf(pm->fcoa_in,"%i\n",&v2); if(pcoa->codons[j] ) vlec[pcoa->colm] = (double) v2; if ( x != pcoa->colm ) my_exit (99,"Fatal Error in txt2bin"); break; case 'r': clean_up ( ncod , naa ); for (j=1,x=1;j<64;j++) { fscanf(pm->fcoa_in,"%i",&v2); naa[pcu->ca[j]]+=v2; /* count amino acids */ ncod[j] =v2; /* count codons */ } fscanf(pm->fcoa_in,"%i\n",&v2); /* read last codon */ naa[pcu->ca[j]]+=v2; ncod[j] =v2; for (j=1,x=0;j<=64;j++) { if(pcoa->codons[j] ) { ++x; vlec[x] = (double) ((naa[pcu->ca[j]])? (float) ncod[j]/naa[pcu->ca[j]]*(float)( *(ds+j) ): 0.00); } } break; #ifdef DEBUG default: fprintf(pm->my_err,"error in textbin %c unknown \n",pm->coa ); break; #endif } /* end if */ writevec(vlec, pm->fcoa_out); } /* close files and release memory and return */ fileclose(&pm->fcoa_in); fileclose(&pm->fcoa_out); free (vlec); } /*************** colmout *****************************************/ /* The user has already decided how many axis to be recorded to file */ /* this value is stored in pcoa->axis. After the analysis is complete the */ /* output data is stored in several binary formatted file. In this case */ /* nfice and nfics points at the file names. */ /* For each axis that has been requested to be recorded, the position */ /* of each column (either amino or codon ) is read from the binary file */ /* and converted into an easily read text file, which is pointed */ /* at by nfics and the summary file pointed at by summary. */ /**************************************************************************/ void colmout(char *nfice, char *nfics,AMINO_STRUCT *ppaa, FILE *summary) { double *vlec; int col, lig=0; FILE *fice=NULL, *fics=NULL; float v2; int x,i,j; char sp=pm->seperator; lig=pcoa->colm; col=pcoa->axis; /* number of axis */ vecalloc(&vlec, col); if( (fice=open_file("",nfice,"rb",FALSE))==NULL) my_exit(6,"nfice2"); if( (fics=open_file("",nfics, "w",FALSE))==NULL) my_exit(1,"nfics2"); fprintf(summary,"\n\nThe position of each %s by axis \n" "also see %s for seperate output\n", (pm->coa=='a')? "amino acid":"codon",nfics); fprintf(fics , "%s","label"); fprintf(summary, "%-20.20s","label"); for (j=1;j<=col;j++) { fprintf(fics , "%c%s%d",sp,"Axis",j); fprintf(summary, "%c%9s%d",sp, "Axis",j); } fprintf(fics , "\n"); fprintf(summary, "\n"); i=0; x=1; while( x<=lig ) { /* only write out for the columns analysed */ if( pm->coa == 'a' ) { while ( !pcoa->amino[++i] ); /* skip amino if false */ fprintf(fics , "%s%c",ppaa->aa3[i],sp ); fprintf(summary, "%-20.20s%c",ppaa->aa3[i],sp ); x++; }else{ while ( !pcoa->codons[++i] ); /* skip codon if false */ fprintf(fics , "%s%c",ppaa->cod[i],sp); fprintf(summary , "%-20.20s%c",ppaa->cod[i],sp); x++; } readvec(vlec, fice); for (j=1;jaxis is converted from a binary text file to an ASCII file as */ /* well as the summary file */ /**************************************************************************/ void rowout(char *nfice, char *nfics, char *ncout, FILE *summary) { double *vlec, *ax1; int col, lig,*sortax1; FILE *fice=NULL, *fics=NULL, *fnam=NULL; float v2; int i,j; char sp=pm->seperator; lig=pcoa->rows; col=pcoa->axis; vecalloc(&vlec, col); vecalloc(&ax1 , lig); if( (sortax1= (int *) calloc(lig+1,sizeof(int)))==NULL) my_exit(3,"sortax1"); if( (fice=open_file("",nfice,"rb",FALSE))==NULL) my_exit(6,"nfice3"); if( (fics=open_file("",nfics, "w",FALSE))==NULL) my_exit(1,"nfics3"); if( (fnam=open_file("",ncout, "r",FALSE))==NULL) my_exit(6,"ncout3"); fprintf(summary,"\n\nThe position of each gene by axis \n" "(see also %s)\n",nfics); fprintf(fics , "%s%c","label",sp); fprintf(summary, "%-20.20s%c","label",sp); for (j=1;j<=col;j++) { fprintf(fics , "%s%d%c","Axis",j,sp); fprintf(summary, "%9s%d%c", "Axis",j,sp); } fprintf(fics , "\n"); fprintf(summary, "\n"); for (i=1;i<=lig;i++) { fgets(pm->junk,BUFSIZ,fnam); pm->junk[35]='\0'; for ( j=35 ; j>=0; j--) if ( isspace( (int) pm->junk[j]) ) pm->junk[j]='\0'; fprintf(fics , "%s%c",pm->junk,sp); fprintf(summary, "%-20.20s%c",pm->junk,sp); readvec(vlec, fice); for (j=1;jcoa != 'a' ) { sorted_by_axis1 ( ax1, sortax1, lig); gen_cusort_fop ( sortax1, lig, fnam, summary ); } fileclose(&fics); fileclose(&fice); fileclose(&fnam); free(ax1); free(sortax1); free(vlec); } /************** vecalloc *****************************************/ /* Allocate memory for a vector of size n and assign that memory to the */ /* pointer to a pointer vac */ /**************************************************************************/ void vecalloc (double **vec, int n) { if ( (*vec = (double *) calloc(n+1, sizeof(double))) != NULL) { **vec = n; return; } else my_exit(3,"vecalloc"); } /************** writevec *****************************************/ /* Write out the value of the vector v1 to a binary file fic */ /**************************************************************************/ void writevec(double *v1, FILE *fic) { float v2; int i, c1; c1 = (int) v1[0]; /* Num of vectors */ for (i=1;i<=c1;i++) { v2 = (float) v1[i]; if ( fwrite((const char *)&v2, 4, 1, fic) != 1) my_exit(4,"writevec"); } } /************** PrepAFC *****************************************/ /* Calculated Distance matrix for values in contingency table */ /* Values are first scaled by n (where n is the total usage of a row or */ /* column */ /**************************************************************************/ void PrepAFC(char *nfic) { char bid[17]; int i, j; double **w; double *poili, *poico; double a1, a2, x1, n; /*-------------------------------------------------------------------------*/ vecalloc(&poili, pcoa->rows); vecalloc(&poico, pcoa->colm); taballoc(&w, pcoa->rows, pcoa->colm); lecmat(w, nfic); n = 0; for (i=1;i<=pcoa->rows;i++) { a1 = 0.0; a2 = 0.0; for (j=1;j<=pcoa->colm;j++) { x1 = w[i][j]; a1 = a1 + x1; poico[j] = poico[j] + x1; } n = n + a1; poili[i] = a1; } /* scale the vectors, and matrix */ scalvec(poili, 1.0/n); scalvec(poico, 1.0/n); scalmat(w, 1.0/n); strcpy(bid,"cbfcpl"); ecrvec(poili, bid); strcpy(bid,"cbfcpc"); ecrvec(poico, bid); /*-------------------------------------------------------------------------*/ for (i=1;i<=pcoa->rows;i++) { a1 = poili[i]; if (a1 != 0.0) { for (j=1;j<=pcoa->colm;j++) { a2 = poico[j]; if (a2 != 0) w[i][j] = w[i][j] / a1 / a2 - 1; } } } strcpy(bid,"cbfcta"); ecrmat(w, bid); /*-------------------------------------------------------------------------*/ freetab(w); freevec(poili); freevec(poico); pcoa->inertia = (float) inertot (); } /************** inertot ********************************************/ /* Calculate total data inertia */ /***************************************************************************/ double inertot ( void ) { int i, j; double **tab; double *pl, *pc; double a1, s1, inertia; taballoc (&tab, pcoa->rows, pcoa->colm); vecalloc (&pc, pcoa->colm); vecalloc (&pl, pcoa->rows); lecmat (tab,"cbfcta"); lecvec(pl, "cbfcpl"); lecvec(pc, "cbfcpc"); inertia = 0; for (i=1;i<=pcoa->rows;i++) { a1 = pl[i]; for (j=1;j<=pcoa->colm;j++) { s1 = tab[i][j]; inertia = inertia + s1 * s1 * a1 * pc[j]; } } freetab(tab); freevec(pl); freevec(pc); return inertia; } /************** lecmat *****************************************/ /* Opens binary file nfic, reads the values it contains and records them */ /* in the matrix pointed to by tab */ /**************************************************************************/ void lecmat (double **tab, char *nfic) { int i, j, l1, c1; float v2; FILE *fic=NULL; l1 = (int) tab[0][0]; c1 = (int) tab[1][0]; if( (fic=open_file("",nfic,"rb",FALSE))==NULL) my_exit(1,"lecmat"); for (i=1;i<=l1;i++) { for (j=1;j<=c1;j++) { if ( fread((char *)&v2, 4, 1, fic) != 1) { fprintf(pm->my_err,"Error: can't read matrix (lecmat)"); my_exit(5,"lecmat"); } tab[i][j] = v2; } } fileclose(&fic); } /************** freetab *****************************************/ /* Releases memory dynamically allocated to a table tab(x,y) */ /**************************************************************************/ void freetab (double **tab) { int i, n; n = (int) *(*(tab)); /* number of rows in table */ for (i=0;i<=n;i++) { free((char *) *(tab+i) ); } free((char *) tab); } /************** freevec *****************************************/ /* Releases memory dynamically allocated to a vector */ /**************************************************************************/ void freevec (double *vec) { free((char *) vec); } /************** taballoc *****************************************/ /* Dynamically allocates memory to the table tab(l1,c1) */ /**************************************************************************/ void taballoc (double ***tab, int l1, int c1) { int i; if ( (*tab = (double **) calloc(l1+1, sizeof(double *))) != NULL) { for (i=0;i<=l1;i++) { if ( (*(*tab+i)=(double *) calloc(c1+1, sizeof(double))) == NULL ) { fprintf(pm->my_err,"(taballoc)"); my_exit(3,"taballoc"); } } } else {fprintf(pm->my_err,"(taballoc)"); my_exit(3,"taballoc2"); } **(*tab) = l1; **(*tab+1) = c1; } /************** lecvec *****************************************/ /* Reads vectors from filename *nfic and assigns them to a vector */ /**************************************************************************/ void lecvec (double *v1, char *nfic) { float v2; int i, c1; FILE *fic=NULL; if( (fic=open_file("",nfic,"rb",FALSE))==NULL) my_exit(6,"lecvec"); c1 = (int) v1[0]; for (i=1;i<=c1;i++) { if ( fread((char *)&v2, 4, 1, fic) != 1){ fprintf(pm->my_err,"(lecvec)"); my_exit(5,"lecvec"); } v1[i] = v2; } fileclose(&fic); } /************** ecrmat ******************************************/ /* Writes the table pointed to by **tab to the binary filename *nfic */ /**************************************************************************/ void ecrmat (double **tab, char *nfic) { int i, j, l1, c1; float v2; FILE *fic=NULL; l1 = (int)tab[0][0]; c1 = (int)tab[1][0]; if( (fic=open_file("",nfic,"wb",FALSE))==NULL) my_exit(1,"ecrmat"); for (i=1;i<=l1;i++) { for (j=1;j<=c1;j++) { v2 = (float)tab[i][j]; if ( fwrite((const char *)&v2, 4, 1, fic) != 1) { fprintf(pm->my_err,"(ecrmat)"); my_exit(4,"ecrmat"); } } } fileclose(&fic); } /************** ecrvec ******************************************/ /* Writes the pointer pointed to by *v1 to the binary file *nfic */ /**************************************************************************/ void ecrvec (double *v1, char *nfic) { float v2; int i, c1; FILE *fic=NULL; c1 = (int)v1[0]; if( (fic=open_file("",nfic,"wb",FALSE))==NULL) my_exit(1,"ecrvec"); for (i=1;i<=c1;i++) { v2 = (float)v1[i]; if ( fwrite((const char *)&v2, 4, 1, fic) != 1){ fprintf(pm->my_err,"(ecrvec)"); my_exit(4,"ecrvec"); } } fileclose(&fic); } /************** scalmat ******************************************/ /* Scale the matrix pointed to by **tab by r */ /**************************************************************************/ void scalmat (double **tab, double r) { int l1, c1, i, j; l1 = (int) tab[0][0]; c1 = (int) tab[1][0]; for (i=1;i<=l1;i++) { for (j=1;j<=c1;j++) { tab[i][j] = tab[i][j] * r; } } } /************** scalvec ******************************************/ /* Scale the vector pointed to by *v1 by r */ /**************************************************************************/ void scalvec (double *v1, double r) { int i, c1; c1 = (int) v1[0]; for (i=1;i<=c1;i++) { v1[i] = v1[i] * r; } } /************** DiagoRC ******************************************/ /* This function generates/calculates the correspondence analysis factors */ /**************************************************************************/ void DiagoRC ( FILE *summary) { int lcmin, rang, f1, i, j, k; double **w, **ctab, **auxi, **vp1, **vp2; double *poili, *poico, *l; double s, s1, a1, inertotal; lcmin = pcoa->colm; if (pcoa->rows < pcoa->colm) lcmin = pcoa->rows; taballoc(&w, pcoa->rows, pcoa->colm); taballoc(&ctab, lcmin, lcmin); taballoc(&auxi, lcmin, 2); vecalloc(&poili, pcoa->rows); vecalloc(&poico, pcoa->colm); vecalloc(&l, lcmin); lecvec(poili, "cbfcpl"); sqrvec(poili); lecvec(poico, "cbfcpc"); sqrvec(poico); lecmat(w, "cbfcta"); inertotal=0; for (i=1;i<=pcoa->rows;i++) { a1 = poili[i]; for (j=1;j<=pcoa->colm;j++) { s1 = w[i][j] * a1 * poico[j]; w[i][j] = s1; s1 = s1 * s1; inertotal = inertotal + s1; } } fprintf(summary,"The total inertia of the data was %f\n",inertotal); fprintf(summary, "\nExplanation of the variation by axis " "(see also eigen.coa)\n"); /* prodmatAAtB and prodmatAtAB calc product of the scaled distance matrix */ /* DiagoComp diagnolises the product matrix ctab */ /* editvalpro output the eigen values */ dot(1,10); if (pcoa->rows < pcoa->colm) { prodmatAAtB(w, ctab); DiagoComp(pcoa->rows, ctab, l, &rang); f1=pcoa->axis; editvalpro(summary, l, pcoa->rows, inertotal); for (j=1;j<=pcoa->rows;j++) { auxi[j][1] = l[j]; auxi[j][2] = l[j]/inertotal; } sqrvec(l); } else { prodmatAtAB(w, ctab); DiagoComp(pcoa->colm, ctab, l, &rang); f1=pcoa->axis; editvalpro(summary, l, pcoa->colm, inertotal); for (j=1;j<=pcoa->colm;j++) { auxi[j][1] = l[j]; auxi[j][2] = l[j]/inertotal; } sqrvec(l); } if (f1==0) { if (lcmin == 1) f1 = 1; else f1 = 2; } /* output the relative inertia values */ ecrmat(auxi, "cbfcvp"); /* Calculate the factorial coordinates */ if (pcoa->rows < pcoa->colm) { taballoc(&vp2, pcoa->colm, f1); for (j=1;j<=pcoa->colm;j++) { for (k=1;k<=f1;k++) { s = 0; for (i=1;i<=pcoa->rows;i++) { s = s + w[i][j] * ctab[i][k]; } vp2[j][k] = s; } } for (i=1;i<=pcoa->colm;i++) { if (poico[i] != 0) { for (j=1;j<=f1;j++) { vp2[i][j] = vp2[i][j] / poico[i]; } } } for (i=1;i<=pcoa->rows;i++) { if (poili[i] != 0) { for (j=1;j<=pcoa->rows;j++) { ctab[i][j] = ctab[i][j] * l[j] / poili[i]; } } } ecrmatred(ctab, f1, "cbfcli"); ecrmatred(vp2, f1, "cbfcco"); freetab(vp2); } else { taballoc(&vp1, pcoa->colm, f1); taballoc(&vp2, pcoa->rows, f1); for (i=1;i<=pcoa->colm;i++) { for (j=1;j<=f1;j++) { vp1[i][j] = ctab[i][j]; } } prodmatABC(w, vp1, vp2); for (i=1;i<=pcoa->rows;i++) { if (poili[i] != 0.0) { for (j=1;j<=f1;j++) { vp2[i][j] = vp2[i][j] / poili[i]; } } } for (i=1;i<=pcoa->colm;i++) { if (poico[i] != 0) { for (j=1;j<=rang;j++) { ctab[i][j] = ctab[i][j] * l[j] / poico[i]; } } } ecrmat(vp2, "cbfcli"); ecrmatred(ctab, f1, "cbfcco"); freetab(vp1); freetab(vp2); } goto fin; /* free memory */ fin: freetab(w); freetab(ctab); freetab(auxi); freevec(poili); freevec(poico); freevec(l); } /* End of DiagoRC */ /************** sqrvec ******************************************/ /* This function calculates the square root of a vector */ /**************************************************************************/ void sqrvec (double *v1) { int i, c1; double v2; c1 = (int) v1[0]; for (i=1;i<=c1;i++) { v2 = v1[i]; if (v2 < 0.0) { fprintf(pm->my_err,"Error: Square root of negative number (sqrvec)"); my_exit(99,"sqrvec"); } v2 = sqrt(v2); v1[i] = v2; } } /************** prodmatAAtB ***************************************/ /* Calculate the product of matrix a*a and return it as matrix b */ /**************************************************************************/ void prodmatAAtB (double **a, double **b) { int j, k, i, lig, col; double s; lig = (int) a[0][0]; col = (int) a[1][0]; for (j=1;j<=lig;j++) { dot ( 1 , 10 ); for (k=j;k<=lig;k++) { s = 0; for (i=1;i<=col;i++) { s = s + a[j][i] * a[k][i]; } b[j][k] = s; b[k][j] = s; } } } /************** prodmatABC ***************************************/ /* Calculate the product of matrix a*b and return it as matrix c */ /**************************************************************************/ void prodmatABC (double **a, double **b, double **c) { int j, k, i, lig, col, col2; double s; lig = (int) a[0][0]; col = (int) a[1][0]; col2 = (int) b[1][0]; for (i=1;i<=lig;i++) { dot(1,10); for (k=1;k<=col2;k++) { s = 0; for (j=1;j<=col;j++) { s = s + a[i][j] * b[j][k]; } c[i][k] = s; } } } /************** prodmatAtAB ***************************************/ /* Calculate the product of matrix a*A and return it as matrix b */ /**************************************************************************/ void prodmatAtAB (double **a, double **b) { int j, k, i, lig, col; double s; lig = (int) a[0][0]; col = (int) a[1][0]; for (j=1;j<=col;j++) { dot(1,100); for (k=j;k<=col;k++) { s = 0; for (i=1;i<=lig;i++) { s = s + a[i][k] * a[i][j]; } b[j][k] = s; b[k][j] = s; } } } /************** editvalpro ***************************************/ /* Calculate eigenvalues, relative inertia and Sum of inertia for each */ /* factor and record this to eigen.coa and summary.coa */ /**************************************************************************/ void editvalpro (FILE *ficlist, double *vp, int n, double s) { double sc1, sc2; int i, n1; float v2, v3, v4; FILE *eigen=NULL; char sp; sp=pm->seperator; if ( (eigen=open_file("","eigen.coa","w",FALSE))==NULL ) my_exit(1,"editvalpro"); sc1 = 0.0; for (i=1;i<=n;i++) { if (vp[i] < 0.0) { v2 = (float) vp[i]; fprintf(ficlist, "Eigenvalue number %d is negative : %+.4E\n", i, v2); vp[i] = 0.0; } } n1 = (n > 40) ? 40 : n; fprintf(ficlist, "Num. Eigenval. R.Iner. R.Sum " "|Num. Eigenval. R.Iner. R.Sum |"); fprintf(ficlist, "\n"); for (i=1;i<=n1;i=i+2) { sc1 = sc1 + vp[i]; if (i < n1) { sc2 = sc1 + vp[i+1]; v2 = (float) vp[i]; v3 = (float)vp[i]/(float)s; v4 = (float)sc1/(float)s; fprintf(ficlist, "%.2d %+.4E %+.4f %+.4f ", i, v2, v3, v4); fprintf(eigen ,"%.2d%c%.4E%c%.4f%c%.4f\n",i,sp,v2,sp,v3,sp,v4); v2 = (float)vp[i+1]; v3 = (float)vp[i+1]/(float)s; v4 = (float)sc2/(float)s; fprintf(ficlist, " |%.2d %+.4E %+.4f %+.4f |", i+1, v2, v3, v4); fprintf(eigen ,"%.2d%c%.4E%c%.4f%c%.4f\n",i+1,sp,v2,sp,v3,sp,v4); } else { v2 = (float)vp[i]; v3 = (float)vp[i]/(float)s; v4 = (float)sc1/(float)s; fprintf(ficlist, "%.2d %+.4E %+.4f %+.4f ", i, v2, v3, v4); fprintf(eigen ,"%.2d%c%.4E%c%.4f%c%.4f\n",i,sp,v2,sp,v3,sp,v4); } sc1 = sc2; fprintf(ficlist, "\n"); } fprintf(ficlist, "\n"); fileclose(&eigen); } /************** ecrmatred *****************************************/ /* Output c1 columns of matrix tab to filename *nfic */ /**************************************************************************/ void ecrmatred (double **tab, int c1, char *nfic) { int i, j, l1; float v2; FILE *fic=NULL; l1 = (int) tab[0][0]; if( (fic=open_file("",nfic,"wb",FALSE))==NULL) my_exit(1,"ecrmatred"); for (i=1;i<=l1;i++) { for (j=1;j<=c1;j++) { v2 = (float) tab[i][j]; if ( fwrite((const char *)&v2, 4, 1, fic) != 1){ fprintf(pm->my_err,"(ecrmatred)"); my_exit(4,"ecrmatred"); } } } fileclose(&fic); } /************** readvec ***************************************/ /* read vector v1 from filehandle fic */ /**************************************************************************/ void readvec (double *v1, FILE *fic) { float v2; int i, c1; c1 = (int) v1[0]; for (i=1;i<=c1;i++) { if ( fread((char *)&v2, 4, 1, fic) != 1) { fprintf(pm->my_err,"(readvec)"); my_exit(5,"readvec"); } v1[i] = v2; } } /************** DiagoComp ***************************************/ /* Diagnolisation of matrix w */ /* T. FOUCART Analyse factorielle de tableaux multiples, */ /* Masson, Paris 1984,185p., p. 62. D'aprhs VPROP et TRIDI, */ /* de LEBART et coll. */ /* Lots of nasty goto jumps ... ported from Fortran */ /*************************************************************************/ void DiagoComp (int n0, double **w, double *d, int *rang) { double *s; double a, b, c, x, xp, q, bp, ab, ep, h, t, u , v; double dble; int ni, i, i2, j, k, jk, ijk, ij, l, ix, m, m1, isnou; vecalloc(&s, n0); a = 0.000000001; ni = 100; if (n0 == 1) { d[1] = w[1][1]; w[1][1] = 1.0; *rang = 1; freevec (s); return; } for (i2=2;i2<=n0;i2++) { b=0.0; c=0.0; i=n0-i2+2; k=i-1; if (k < 2) goto Et1; for (l=1;l<=k;l++) { c = c + fabs((double) w[i][l]); } if (c != 0.0) goto Et2; Et1: s[i] = w[i][k]; goto Etc; Et2: for (l=1;l<=k;l++) { x = w[i][l] / c; w[i][l] = x; b = b + x * x; } xp = w[i][k]; ix = 1; if (xp < 0.0) ix = -1; /* q = -sqrt(b) * ix; */ dble = b; dble = -sqrt(dble); q = dble * ix; s[i] = c * q; b = b - xp * q; w[i][k] = xp - q; xp = 0; for (m=1;m<=k;m++) { w[m][i] = w[i][m] / b / c; q = 0; for (l=1;l<=m;l++) { q = q + w[m][l] * w[i][l]; } m1 = m + 1; if (k < m1) goto Et3; for (l=m1;l<=k;l++) { q = q + w[l][m] * w[i][l]; } Et3: s[m] = q / b; xp = xp + s[m] * w[i][m]; } bp = xp * 0.5 / b; for (m=1;m<=k;m++) { xp = w[i][m]; q = s[m] - bp * xp; s[m] = q; for (l=1;l<=m;l++) { w[m][l] = w[m][l] - xp * s[l] - q * w[i][l]; } } for (l=1;l<=k;l++) { w[i][l] = c * w[i][l]; } Etc: d[i] = b; } /* for (i2=2;i2my_err,"Error: can't compute matrix eigenvalues"); my_exit(99,"corresp"); Etd: m = m + 1; q = (d[k+1]-h) * 0.5 / s[k]; /* t = sqrt(q * q + 1.0); */ dble = q * q + 1.0; dble = sqrt(dble); t = dble; if (q < 0.0) isnou = -1; q = d[j] - h + s[k] / (q + t * isnou); u = 1.0; v = 1.0; h = 0.0; jk = j-k; for (ijk=1;ijk<=jk;ijk++) { dot(1,100); i = j - ijk; xp = u * s[i]; b = v * s[i]; if (fabs((double) xp) < fabs((double) q)) goto Et8; u = xp / q; /* t = sqrt(u * u + 1); */ dble = u * u + 1.0; dble = sqrt(dble); t = dble; s[i+1] = q * t; v = 1 / t; u = u * v; goto Et9; Et8: v = q / xp; /* t = sqrt(1 + v * v); */ dble = 1.0 + v * v; dble = sqrt(dble); t = dble; s[i+1] = t * xp; u = 1 / t; v = v * u; Et9: q = d[i+1] - h; t = (d[i] - q) * u + 2.0 * v * b; h = u * t; d[i+1] = q + h; q = v * t - b; for (l=1;l<=n0;l++) { xp = w[l][i+1]; w[l][i+1] = u * w[l][i] + v * xp; w[l][i] = v * w[l][i] - u * xp; } } d[k] = d[k] - h; s[k] = q; s[j] = 0.0; goto Et6; Eta:; } /* for (k=1;k<=n0;k++) */ for (ij=2;ij<=n0;ij++) { dot(1,300); i = ij - 1; l = i; h = d[i]; for (m=ij;m<=n0;m++) { if (d[m] >= h) { l = m; h = d[m]; } } if (l == i) { goto Etb; } else { d[l] = d[i]; d[i] = h; } for (m=1;m<=n0;m++) { h = w[m][i]; w[m][i] = w[m][l]; w[m][l] = h; } Etb:; } /* for (ij=2;ij<=n0;ij++) */ *rang = 0; for (i=1;i<=n0;i++) { if (d[i] / d[1] < 0.00001) d[i] = 0.0; if (d[i] != 0.0) *rang = *rang + 1; } freevec(s); } /* DiagoComp */ /************** inertialig ***************************************/ /* Called when advanced correspondence analysis option has been selected */ /* This analyses and reports the absolute and relative contributions of */ /* each gene to the inertia of the principal factors (by default the */ /* first 4 axis) */ /*************************************************************************/ void inertialig( char *inertia_out, char *ncout, FILE *summary) { int i, j, k, f1, l1,c1,lcmin; double **cooli, **w; double *vtab, *conli, *poili, *poico; double l0, inertotal, a1, a2, m2, m3, s1; double temp1=0,temp2=0; FILE *inert_out=NULL,*fnam=NULL; l1 =pcoa->rows; c1 =pcoa->colm; f1 =pcoa->axis; inertotal =pcoa->inertia; if( (inert_out=open_file( "",inertia_out,"w",FALSE))==NULL) my_exit(1,"inertia out"); lcmin = c1; if (l1junk,BUFSIZ,fnam); pm->junk[35]='\0'; for ( j=35 ; j>=0; j--) if ( isspace((int)pm->junk[j]) ) pm->junk[j]='\0'; fprintf(inert_out ,"%-.15s%c",pm->junk,pm->seperator); fprintf(summary, "%-15.15s",pm->junk); fprintf(summary ,"|%5d|", i); fprintf(inert_out,"%d%c", i,pm->seperator); l0 = poili[i]*poili[i]/inertotal; for (j=1;j<=f1;j++) { temp1=(cooli[i][j] * cooli[i][j]); /* bug fix for Think C */ temp2=(l0 / vtab[j]); /* need to split calculation*/ a1 = temp1 * temp2; fprintf(summary, "%5d|", (int) (a1 * 10000)); fprintf(inert_out,"%d%c",(int) (a1 * 10000),pm->seperator); } fprintf(summary, "\n"); fprintf(inert_out,"\n"); } fprintf(summary, "\n\nRelative contributions\nThis is the variation \n" "in the %s usage of each gene that is \n" "explained by each axis/factor\n" "see also %s \n", (pm->coa=='a')?"amino acid":"codon",inertia_out); fclose(fnam); if( (fnam=open_file("",ncout, "r",FALSE))==NULL) my_exit(6,"inertialgn"); fprintf(summary, "----------Relative contributions----------\n"); fprintf(summary, "Short_gene_name|Num |"); for (k=1;k<=f1;k++) { fprintf(summary, "Fac%2d|", k); } fprintf(summary, "|Remains| Weight | Cont.|"); fprintf(summary, "\n"); fprintf(inert_out,"\n"); for (i=1;i<=l1;i++) { fgets(pm->junk,BUFSIZ,fnam); pm->junk[35]='\0'; for ( j=35 ; j>=0; j--) if ( isspace( (int) pm->junk[j]) ) pm->junk[j]='\0'; fprintf(inert_out , "%-.15s%c",pm->junk,pm->seperator); fprintf(summary, "%-15.15s",pm->junk); fprintf(summary, "|%5d|", i); fprintf(inert_out,"%d%c", i,pm->seperator); a2 = 0.; m3 = poili[i]*poili[i]/inertotal; m2 = conli[i]; if (m2 == 0.) m2 = 1.; for (j=1;j<=f1;j++) { a1 = cooli[i][j] * cooli[i][j] * m3 / m2; a2 = a2 + a1; fprintf(summary, "%5d|", (int) (a1 * 10000)); fprintf(inert_out,"%d%c",(int) (a1 * 10000),pm->seperator); } fprintf(summary, "|%5d ", (int) ((1-a2) * 10000)); fprintf(summary, "|%5d |%5d |\n", (int) (inertotal * m3 * 10000), (int) (m2 * 10000)); fprintf(inert_out, "\n"); } fprintf(summary , "\n"); fprintf(inert_out, "\n"); /* free memory */ freetab(w); freevec(poili); freevec(poico); freetab(cooli); freevec(conli); freevec(vtab); fileclose(&inert_out); fileclose(&fnam); } /* End of Inertia */ /************** inertiacol ****************************************/ /* Called when advanced correspondence analysis option has been selected */ /* This analyses and reports the absolute and relative contributions of */ /* each codon or amino acid to the inertia of the principal factors (by */ /* default the first 4 axis) */ /**************************************************************************/ void inertiacol(char *inertia_out, FILE *summary ) { int x,i, j, k, f1, l1,c1, lcmin; double **cooco, **w; double *vtab, *conco, *poili, *poico; double l0, inertotal, a1, a2, m2, m3, s1; FILE *inert_out=NULL; if( (inert_out=open_file( "",inertia_out,"a",FALSE))==NULL) my_exit(1,"inertia out2"); l1 =pcoa->rows; c1 =pcoa->colm; f1 =pcoa->axis; inertotal =pcoa->inertia; lcmin = c1; if (l1coa=='a')? "amino acids":"codons"); for (i=1;i<=l1;i++) { a1 = poili[i]; for (j=1;j<=c1;j++) { s1 = w[i][j] * a1 * poico[j]; s1 = s1 * s1; conco[j] = conco[j] + s1; } } /* scale the vectors by 1/inertia total */ scalvec(conco, 1.0/inertotal); fprintf(summary, "\n\nColumn inertia\n"); fprintf(summary, "All contributions are in 1/10000\n\n"); fprintf(summary, "----------Absolute contributions----------\n"); fprintf(summary, "Key|Num |"); for (k=1;k<=f1;k++) { fprintf(summary, "Fac%2d|", k); } fprintf(summary, "\n"); for (x=0,i=1;i<=c1;i++) { if (pm->coa == 'a' ){ while(pcoa->amino[++x] == FALSE); fprintf(summary, "%s", paa->aa3[x]); fprintf(inert_out,"%s%c",paa->aa3[x],pm->seperator); }else{ while(pcoa->codons[++x] == FALSE); fprintf(summary, "%s", paa->cod[x]); fprintf(inert_out,"%s%c",paa->cod[x],pm->seperator); } fprintf(summary, "|%5d|", i); fprintf(inert_out,"%d%c",i,pm->seperator); l0 = poico[i]*poico[i]/inertotal; for (j=1;j<=f1;j++) { a1 = cooco[i][j] * cooco[i][j] * l0 / vtab[j]; fprintf(summary, "%5d|", (int) (a1 * 10000)); fprintf(inert_out,"%i%c", (int) (a1 * 10000),pm->seperator ); } fprintf(summary, "\n"); fprintf(inert_out,"\n"); } fprintf(summary, "\n"); fprintf(inert_out,"\n"); fprintf(summary, "----------Relative contributions----------\n"); fprintf(summary, "Key|Num |"); for (k=1;k<=f1;k++) { fprintf(summary, "Fac%2d|", k); } fprintf(summary, "|Remains| Weight | Cont.|"); fprintf(summary, "\n"); for (x=0,i=1;i<=c1;i++) { if (pm->coa == 'a' ){ while(pcoa->amino[++x] == FALSE); fprintf(summary, "%s", paa->aa3[x]); fprintf(inert_out,"%s%c",paa->aa3[x],pm->seperator); }else{ while(pcoa->codons[++x] == FALSE); fprintf(summary, "%s", paa->cod[x]); fprintf(inert_out,"%s%c",paa->cod[x],pm->seperator); } fprintf(summary, "|%5d|", i); fprintf(inert_out,"%d%c",i,pm->seperator); a2 = 0.; m3 = poico[i]*poico[i]/inertotal; m2 = conco[i]; if (m2 == 0.) m2 = 1.; for (j=1;j<=f1;j++) { a1 = cooco[i][j] * cooco[i][j] * m3 / m2; a2 = a2 + a1; fprintf(summary, "%5d|", (int) (a1 * 10000)); fprintf(inert_out,"%d%c",(int) (a1 * 10000),pm->seperator); } fprintf(summary, "|%5d ", (int) ((1-a2) * 10000)); fprintf(summary, "|%5d |%5d |\n", (int) (inertotal * m3 * 10000), (int) (m2 * 10000)); fprintf(inert_out,"\n"); } fprintf(summary, "\n"); freetab(w); freetab(cooco); freevec(poili); freevec(poico); freevec(conco); freevec(vtab); } /* End of Inertia */ /************** selectcol ***************************************/ /* extract a column from the file *nfic, column has the dimension of the */ /* number of genes. If these disagree it will about. Col is the number of*/ /* the column to extract. */ /*************************************************************************/ void selectcol (char *nfic , double *col, int numcol) { FILE *fic=NULL; int i, c1,l1; double *vlec; c1=2; l1=( pcoa->rows < pcoa->colm)? pcoa->rows:pcoa->colm; vecalloc(&vlec, c1); if (numcol>c1) { fprintf (pm->my_err,"fatal input-output error numcol>c1 (selectcol"); my_exit(99,"corresp"); } if( (fic=open_file( "",nfic,"rb",FALSE))==NULL) my_exit(6,"nfic4"); for (i=1;i<=l1;i++) { readvec(vlec, fic); col[i] = vlec[numcol]; } fileclose(&fic); freevec(vlec); } /************** suprow ***************************************/ /* This sub adds supplementary genes after the correspondence analysis */ /* has completed for an initial set of genes. The supplementary genes are*/ /* read in and processed up to the point of the generation of factors */ /* at which point the factors for the initial analysis are used to calc */ /* the position of the supplementary genes on the originally identified */ /* axis */ /*************************************************************************/ void suprow (int num_seq, char *nficvp, char *nfictasup, char *nficlisup, char*option , FILE *summary) { int l1,c1,l2,c2,i,j,k; double **compos, **tabsup; double *vp, *poico; double *moy, *var; double a1, a2; FILE *ficlisup=NULL; FILE *fnam=NULL; l2=num_seq; c2=pcoa->colm; l1=pcoa->rows; c1=pcoa->colm; if( (fnam=open_file("",option, "r",FALSE))==NULL) my_exit(6,"sup row corresp"); taballoc(&tabsup, l2, c2); lecmat(tabsup, nfictasup); taballoc(&compos, c1, pcoa->axis); lecmat(compos, "cbfcco"); vecalloc(&moy, c1); vecalloc(&var, c1); vecalloc(&vp, pcoa->axis); lecvalpro(vp, nficvp); vecalloc(&poico, c1); lecvec(poico, "cbfcpc"); for (j=1;j<=pcoa->axis;j++) { vp[j] = sqrt((double)vp[j]); a1 = vp[j]; for (i=1;i<=c1;i++) { compos[i][j] = compos[i][j] / a1; } } for (i=1;i<=c1;i++) { a1 = poico[i]; for (j=1;j<=pcoa->axis;j++) { compos[i][j] = compos[i][j] * a1; } } /* Transform genes with the initial factor */ for (i=1;i<=l2;i++) { a1 = 0.0; for (j=1;j<=c1;j++) { a1 = a1 + tabsup[i][j]; } if (a1 != 0.) { for (j=1;j<=c1;j++) { a2 = tabsup[i][j] / a1; if (poico[j]!=0) {tabsup[i][j] = a2 / poico[j];} } } } /* Position the suppli. genes on the original factors */ if( (ficlisup = open_file("",nficlisup,"a",FALSE))==NULL ) my_exit(1,"nficlisup"); fprintf(summary,"\n\nThe position of each additional gene by axis " "(see also %s )\n",option); fprintf(summary, "Additional genes added after COA: \n"); fprintf(summary, "Number of genes: %d, columns: %d\n\n", l1, c1); for (i=1;i<=l2;i++) { fgets(pm->junk,BUFSIZ,fnam); pm->junk[35]='\0'; for ( j=35 ; j>=0; j--) if ( isspace((int)pm->junk[j]) ) pm->junk[j]='\0'; fprintf(ficlisup, "%s%c",pm->junk,pm->seperator); fprintf(summary , "%s%c",pm->junk,pm->seperator); for (k=1;k<=pcoa->axis;k++) { a1 = 0.; for (j=1;j<=c1;j++) { a1 = a1 + tabsup[i][j] * compos[j][k]; } fprintf(ficlisup,"%f%c",(float)a1,pm->seperator); fprintf(summary ,"%10.5f%c",(float)a1,pm->seperator); } fprintf(ficlisup,"\n"); fprintf(summary ,"\n"); } fclose(ficlisup); freetab (tabsup); freetab (compos); freevec (vp); freevec(poico); freevec(moy); freevec(var); fileclose(&fnam); } /************** lecvalpro ***************************************/ /* Read a vector from a binary formatted file */ /*************************************************************************/ void lecvalpro (double *v1, char *nfic) { float v2; int i, c1; FILE *fic=NULL; if ( (fic=open_file("",nfic,"rb",FALSE))==NULL) my_exit(6,"lecvalpro"); c1 = (int) v1[0]; for (i=1;i<=c1;i++) { if ( fread((char *)&v2, 4, 1, fic) != 1) { fprintf(pm->my_err,"(lecvalpro)"); my_exit(5,"lecvalpro"); } v1[i] = v2; if ( fread((char *)&v2, 4, 1, fic) != 1) { fprintf(pm->my_err,"(lecvalpro)"); my_exit(5,"lecvalpro2"); } } fileclose(&fic); } codonW/indices.txt 777 0 0 17145 10237502116 7551 0Codon usage indices This document describes the indices calculated by CodonW, by default only the G+C content of the sequence is reported. The others being dependent on the genetic code selected. More than one index may be calculated at the same time. Codon Adaptation Index (CAI) (Sharp and Li 1987). CAI is a measurement of the relative adaptiveness of the codon usage of a gene towards the codon usage of highly expressed genes. The relative adaptiveness (w) of each codon is the ratio of the usage of each codon, to that of the most abundant codon for the same amino acid. The relative adaptiveness of codons for albeit a limited choice of species, can be selected from Menu 3. The user can also input a personal choice of values. The CAI index is defined as the geometric mean of these relative adaptiveness values. Non-synonymous codons and termination codons (dependent on genetic code) are excluded. To prevent a codon absent from the reference set but present in other genes from having a relative adaptiveness value of zero, which would cause CAI to evaluate to zero for any genes which used that codon; it was suggested that absent codons should be assigned a frequency of 0.5 when estimating ? (Sharp and Li 1987). An alternative suggestion was that ? should be adjusted to 0.01 where otherwise it would be less than this value (Bulmer 1988). CodonW does not adjust the ? value if a non-zero-input value is found; zero values are assigned a value of 0.01. Frequency of Optimal codons (Fop) (Ikemura 1981). This index, is the ratio of optimal codons to synonymous codons (genetic code dependent). Optimal codons for several species are in-built and can be selected using Menu 3. By default, the optimal codons of E. coli are assumed. The user may also enter a personal choice of optimal codons. If rare synonymous codons have been identified, there is a choice of calculating the original Fop index or a modified Fop index. Fop values for the original index are always between 0 (where no optimal codons are used) and 1 (where only optimal codons are used). When calculating the modified Fop index, negative values are adjusted to zero. Codon Bias Index (CBI) (Bennetzen and Hall 1982). Codon bias index is another measure of directional codon bias, it measures the extent to which a gene uses a subset of optimal codons. CBI is similar to Fop as used by Ikemura, with expected usage used as a scaling factor. In a gene with extreme codon bias, CBI will equal 1.0, in a gene with random codon usage CBI will equal 0.0. Note that it is possible for the number of optimal codons to be less than expected by random change. This results in a negative value for CBI. The effective number of codons (NC) (Wright 1990). This index is a simple measure of overall codon bias and is analogous to the effective number of alleles measure used in population genetics. Knowledge of the optimal codons or a reference set of highly expressed genes is unnecessary. Initially the homozygosity for each amino acid is estimated from the squared codon frequencies (see Equation 5). If amino acids are rare or missing, adjustments must be made. When there are no amino acids in a synonymous family, Nc is not calculated as the gene is either too short or has extremely skewed amino acid usage (Wright 1990). An exception to this is made for genetic codes where isoleucine is the only 3-fold synonymous amino acid, and is not used in the protein gene. The reported value of Nc is always between 20 (when only one codon is effectively used for each amino acid) and 61 (when codons are used randomly). If the calculated Nc is greater than 61 (because codon usage is more evenly distributed than expected), it is adjusted to 61. G+C content of the gene. The frequency of nucleotides that are guanine or cytosine. G+C content 3rd position of synonymous codons (GC3s). This the fraction of codons, that are synonymous at the third codon position, which have either a guanine of cytosine at that third codon position. Silent base compositions. Selection of this option calculates four separate indices, i.e. G3s, C3s, A3s & T3s. Although correlated with GC3s, this index is not directly comparable. It quantifies the usage of each base at synonymous third codon positions. When calculating GC3s each synonymous amino acid has at least one synonym with G or C in the third position. Two or three fold synonymous amino acids do not have an equal choice between bases in the synonymous third position. The index A3s is the frequency that codons have an A at their synonymous third position, relative to the amino acids that could have a synonym with A in the synonymous third codon position. The codon usage analysis of Caenorhabditis elegans identified a trend correlated with the frequency of G3s. Though it was not clear whether it reflected variation in base composition (or mutational biases) among regions of the C. elegans genome, or another factor (Stenico et al. 1994). Length silent sites (Lsil). Frequency of synonymous codons. Length amino acids (Laa). Equivalent to the number of translatable codons. Hydropathicity of protein. The general average hydropathicity or (GRAVY) score, for the hypothetical translated gene product. It is calculated as the arithmetic mean of the sum of the hydropathic indices of each amino acid (Kyte and Doolittle 1982). This index has been used to quantify the major COA trends in the amino acid usage of E. coli genes (Lobry and Gautier 1994). Aromaticity score The frequency of aromatic amino acids (Phe, Tyr, Trp) in the hypothetical translated gene product. The hydropathicity and aromaticity protein scores are indices of amino acid usage. The strongest trend in the variation in the amino acid composition of E. coli genes is correlated with protein hydropathicity, the second trend is correlated with gene expression, while the third is correlated with aromaticity (Lobry and Gautier 1994). The variation in amino acid composition can have applications for the analysis of codon usage. If total codon usage is analysed, a component of the variation will be due to differences in the amino acid composition of genes. Bennetzen, J. L., and B. D. Hall, (1982). Codon selection in yeast. Journal of Biological Chemistry 257: 3026-3031. Bulmer, M., (1988). Are codon usage patterns in unicellular organisms determined by selection-mutation balance. Journal of Evolutionary Biology 1: 15-26. Ikemura, T., (1981). Correlation between the abundance of Escherichia coli transfer RNAs and the occurrence of the respective codons in its protein genes: a proposal for a synonymous codon choice that is optimal for the E. coli system. Journal of Molecular Biology 151: 389- 409. Kyte, J., and R. Doolittle, (1982). A simple method for displaying the hydropathic character of a protein. Journal of Molecular Biology 157: 105-132. Lobry, J. R., and C. Gautier, (1994). Hydrophobicity, expressivity and aromaticity are the major trends of amino acid usage in 999 Escherichia coli chromosome encoded genes. Nucleic Acids Research 22: 3174-3180. Sharp, P. M., and W. H. Li, (1987). The codon adaptation index a measure of directional synonymous codon usage bias, and its potential applications. Nucleic Acids Research 15: 1281-1295. Stenico, M., A. T. Lloyd and P. M. Sharp, (1994). Codon usage in Caenorhabditis elegans delineation of translational selection and mutational biases. Nucleic Acids Research 22: 2437-2446. Wright, F., (1990). The effective number of codons used in a gene. Gene 87 : 23-29. codonW/input.dat 777 0 0 514465 6363001576 7243 0>YCG9 Probable 1377 residues Pha 0 Code 0 ATGAATATGCTCATTGTCGGTAGAGTTGTTGCTAGTGTTGGGGGAAGCGGACTTCAAACG CTTTGCTTTGTTATTGGTTGTACGATGGTTGGTGAAAGGTCACGTCCATTGGTGATTTCC ATCCTAAGTTGTGCATTTGCTGTAGCTGCTATCGTTGGTCCTATAATCGGAGGTGCCTTT ACAACCCATGTTACCTGGAGGTGGTGCTTCTATATCAATCTTCCTATCGGTGGTCTTGCC ATTATTATGTTTTTACTCACATATAAGGCCGAGAATAAGGGTATACTTCAACAAATTAAA GATGCTATAGGAACAATCTCGAGCTTTACTTTTAGTAAGTTCAGACACCAAGTTAATTTT AAAAGACTTATGAATGGCATAATCTTCAAGTTTGACTTCTTTGGTTTTGCCCTCTGCTCT GCAGGGCTGGTCCTTTTCCTACTGGGGCTAACCTTTGGTGGTAATAAATATAGTTGGAAC TCTGGCCAAGTCATCGCATATTTGGTTTTGGGTGTCTTACTTTTTATTTTTTCATTGGTG TACGATTTCTTCTTATTCGATAAATTCAACCCGGAACCTGATAATATATCCTACAGGCCT CTCCTTCTAAGAAGATTGGTAGCAAAACCAGCCATAATAATAATAAACATGGTAACATTT CTATTATGTACCGGTTACAATGGGCAAATGATATACTCTGTCCAGTTTTTCCAACTTATA TTTGCGTCGAGTGCATGGAAAGCCGGTCTTCACTTGATACCAATCGTTATTACCAACGTT ATTGCGGCCATTGCAAGTGGTGTGATTACCAAAAAGCTCGGTTTAGTTAAACCACTCTTA ATATTTGGAGGCGTTCTTGGGGTAATTGGAGCAGGGCTTATGACACTTATGACAAATACG TCCACGAAGTCAACTCAAATTGGTGTTTTGCTATTACCGGGGTTTTCCCTTGGATTTGCT CTACAAGCATCGCTCATGAGTGCACAGCTTCAAATTACCAAAGATCGTCCAGAAGCTGCT ATGGACTTTATTGAAGTAACAGCTTTCAATACATTCATGAAGTCATTAGGTACAACTCTT GGTGGTGTGCTTTCAACCACTGTTTTTTCCGCCTCCTTTCACAACAAAGTATCACGAGCT CATCTAGAGCCTTACGAAGGAAAAACGGTTGATGACATGATTTTGTATCGTCTTCAAAAC TACGACGGTTCTCATTCGACTATTGGAAACATTTTAAGCGACTCCATTAAGAACGTATTT TGGATGGATCTAGGGTTTTATGCCTTAGGATTTTTGTTTTGTAGTTTTTCATCCAATAAG AAATTAATCATACCAAAAAAGGACGAGACACCAGAAGATAATTTAGAAGACAAGTAG >YCG8 573 residues Pha 0 Code 0 ATGAGAACGGCCGTACCGCAGTTGCTGGAAGCAACTGCCTGTGTCTCTAGAGAATGCCCC CTCGTCAAAAGAAGTCAGGACATAAAAAGAGCAAGAAAACGTCTACTCAGTGACTGGTAT AGGCTCGGCGCTGATGCAAACATGGATGCCGTATTACTAGTTGTTAACTCCGCCTGGAGG TTTCTGGCCGTCTGGCGACCCTTCGTAAACTCAATCCAACATGCAACTCAGGAATTGTAT CAAAATATCGCCCATTACCTTCTTCATGGCAACGTAAATATACAGAGGGTCACAGCACTA CTACAGCTCGTAATGGGACAGGACGATTTACTTTTTAGTATGGATGATGTTCTACAAGAG GTCTTCAGAATACAGCTCTATTTGAATAAGATGCTGCCGCACAACTCTCACAAATGGCAA AAGCCATCCCCCTTTGACTCCGCAAACTTACTACTTAACTTCAGAGACTGGACAACTGAC AATGCTCTCCTCCAAGAGTTGCTACTATCCTATCCCACAATTAATAAAAACAAACACAAA AATCACTCCGTCCCTCGTCTAATACAAGTTTGA >ALPHA2 633 residues Pha 0 Code 0 ATGAATAAAATACCCATTAAAGACCTTTTAAATCCACAAATCACAGATGAGTTTAAATCC AGCATACTAGACATAAATAAAAAGCTCTTTTCTATTTGCTGTAATTTACCTAAGTTACCA GAGAGTGTAACAACAGAAGAAGAAGTTGAATTAAGGGATATATTAGGATTCTTATCTAGG GCCAACAAAAACCGTAAGATTAGTGATGAGGAGAAGAAGTTGTTGCAAACAACATCTCAA CTCACTACTACCATTACTGTATTACTCAAAGAAATGCGCAGCATAGAAAACGATAGAAGT AATTATCAACTTACACAGAAAAATAAATCGGCGGATGGGTTGGTATTTAATGTGGTAACT CAAGATATGATAAACAAAAGTACTAAACCTTACAGAGGACACCGGTTTACAAAAGAAAAT GTCCGAATACTAGAAAGTTGGTTTGCAAAGAACATCGAGAACCCATATCTAGATACCAAG GGCCTAGAGAATCTAATGAAGAATACCAGTTTATCTCGCATTCAAATCAAAAACTGGGTT TCGAATAGAAGAAGAAAAGAAAAAACAATAACAATCGCTCCAGAATTAGCGGACCTCTTG AGCGGTGAGCCTCTGGCAAAGAAGAAAGAATGA >ALPHA1 528 residues Pha 0 Code 0 ATGTTTACTTCGAAGCCTGCTTTCAAAATTAAGAACAAAGCATCCAAATCATACAGAAAC ACAGCGGTTTCAAAAAAGCTGAAAGAAAAACGTCTAGCTGAGCATGTGAGGCCAAGCTGC TTCAATATTATTCGACCACTCAAGAAAGATATCCAGATTCCTGTTCCTTCCTCTCGATTT TTAAATAAAATCCAAATTCACAGGATAGCGTCTGGAAGTCAAAATACTCAGTTTCGACAG TTCAATAAGACATCTATAAAATCTTCAAAGAAATATTTAAACTCATTTATGGCTTTTAGA GCATATTACTCACAGTTTGGCTCCGGTGTAAAACAAAATGTCTTGTCTTCTCTGCTCGCT GAAGAATGGCACGCGGACAAAATGCAGCACGGAATATGGGACTACTTCGCGCAACAGTAT AATTTTATAAACCCTGGTTTTGGTTTTGTAGAGTGGTTGACGAATAATTATGCTGAAGTA CGTGGTGACGGATATTGGGAAGATGTGTTTGTACATTTGGCCTTATAG >CHA1 1083 residues Pha 0 Code 0 ATGTCGATAGTCTACAATAAAACACCATTATTACGTCAATTCTTCCCCGGAAAGGCTTCT GCACAATTTTTCTTGAAATATGAATGCCTTCAACCAAGTGGCTCCTTCAAAAGTAGAGGA ATCGGTAATCTCATCATGAAAAGTGCCATTCGAATTCAAAAGGACGGTAAAAGATCTCCT CAGGTTTTCGCTAGTTCTGGCGGTAATGCCGGTTTTGCTGCTGCAACAGCATGTCAAAGA CTGTCTCTACCATGTACAGTCGTGGTTCCTACAGCGACAAAGAAGAGAATGGTAGATAAA ATCAGGAACACCGGTGCCCAGGTTATCGTGAGTGGTGCCTACTGGAAAGAAGCAGATACT TTTTTAAAAACAAATGTCATGAATAAAATAGACTCTCAGGTCATTGAGCCCATTTATGTT CATCCCTTCGATAATCCGGATATTTGGGAAGGACATTCATCTATGATAGATGAAATAGTA CAAGATTTGAAATCGCAACATATTTCCGTGAATAAGGTTAAAGGCATAGTATGCAGCGTT GGTGGAGGTGGTTTATACAATGGTATTATTCAAGGTTTGGAAAGGTATGGTTTAGCTGAT AGGATCCCTATTGTGGGGGTGGAAACGAATGGATGTCATGTTTTCAATACTTCTTTGAAA ATAGGCCAACCAGTTCAATTCAAGAAGATAACAAGTATTGCTACTTCTCTAGGAACGGCC GTGATCTCTAATCAAACTTTCGAATACGCTCGCAAATACAACACCAGATCCGTTGTAATA GAGGACAAAGATGTTATTGAACCCTGTCTTAAATATACACATCAATTCAATATGGTGATT GAACCGGCATGTGGCGCCGCATTGCATTTGGGTTACAACACTAAGATCCTAGAAAATGCA CTGGGCTCAAAATTAGCTGCGGATGACATTGTGATAATTATTGCTTGTGCGAGCTCCTCT AATACTATAAAGGACTTGGAAGAAGCGTTGGATAGCATGAGAAAAAAAGACACTCCTGTA ATAGAAGTCGCTGACAATTTCATATTTCCAGAAAAAAATATTGTGAATTTAAAAAGTGCT TGA >KRR1 951 residues Pha 0 Code 0 ATGGTGTCTACACATAACAGAGATAAACCTTGGGATACGGATGATATTGATAAATGGAAG ATAGAGGAGTTTAAGGAAGAGGATAACGCATCCGGTCAACCTTTTGCTGAAGAGTCCAGT TTTATGACTTTGTTTCCTAAATACAGAGAAAGTTACTTGAAGACGATTTGGAATGATGTA ACAAGGGCTCTAGACAAACACAACATAGCGTGTGTTCTAGATTTAGTCGAAGGTTCTATG ACAGTAAAAACAACTAGAAAAACATACGATCCCGCTATCATTTTGAAAGCCAGAGATTTG ATCAAATTATTGGCGAGATCCGTTCCTTTCCCGCAAGCCGTTAAGATCCTACAAGATGAC ATGGCATGCGACGTTATTAAAATTGGTAATTTCGTTACTAACAAAGAAAGGTTTGTCAAG AGAAGACAACGTCTTGTAGGCCCTAACGGTAATACTTTAAAGGCTTTGGAACTTCTAACT AAATGTTACATTCTAGTACAAGGTAACACAGTAAGTGCCATGGGTCCCTTCAAGGGCTTG AAGGAGGTCCGTCGAGTAGTAGAAGATTGTATGAAAAATATTCACCCTATCTATCATATC AAGGAATTAATGATAAAAAGAGAATTGGCAAAAAGGCCAGAGTTAGCCAATGAAGATTGG TCAAGATTCTTGCCCATGTTTAAGAAGAGGAATGTGGCCAGAAAGAAACCCAAGAAGATC AGAAACGTCGAAAAGAAGGTCTATACTCCATTTCCTCCTGCCCAATTGCCTAGAAAGGTT GATTTGGAAATTGAAAGTGGTGAGTATTTCTTAAGCAAGAGAGAAAAGCAAATGAAGAAA TTAAATGAGCAAAAGGAAAAGCAAATGGAAAGAGAAATCGAAAGGCAGGAAGAGAGAGCA AAAGATTTCATAGCTCCGGAAGAAGAAGCATACAAGCCAAACCAAAATTAG >PRD1 2139 residues Pha 0 Code 0 ATGCGATTGTTGCTGTGCAAGAATTGGTTTGCGTCACCTGTAATCTCACCACTACTGTAT ACCCGCTCCTTATATTCAATGGCTAACACTACTAGTTTCCCTATTGCTCCCCAGGCCCCG CCTAATTGGTCGTTCACTCCCAGCGATATTAGTGGGAAAACCAACGAAATCATCAACAAC AGCAACAATTTCTATGATTCTATGAGTAAGGTAGAGAGCCCTTCCGTGAGTAATTTTGTG GAGCCTTTCATGAAGTTTGAAAATGAATTGGGCCCAATAATTAACCAATTAACTTTCTTA CAGCATGTGTCGTCTGATAAAGAAATTAGGGACGCATCTGTGAACTCCTCAATGAAACTG GATGAGTTGAACATCGATCTATCTCTGCGTCACGACATCTTTTTGCAATTCGCCCGCGTC TGGCAGGATGTTCAATCGAAGGCAGATTCTGTGGAAAGAGAAACTTTCAAATACGTTGAG AAGTCTTACAAGGACTACATTCATTCTGGTTTGGAACTTGACGAGGGAAACCGATTGAAA ATCAAAGAGATCAAAAAGAAGATCTCCGTTAACTCTATTAATTTTTCGAAGAATCTGGGA GAACAAAAGGAATACATCACTTTCACCAAAGAACAATTGGAAGGTGTGCCGGATTCTATT TTGACGCAGTTCGAGACAATAAAATCTGACAAAGATAGCAATGAAACCTTGTATAAAGTC ACCTTCAAATATCCGGACATTTTTCCCGTGATGAAATTGGCATCCTCAGCTCAGACTAGA AAGCAGGCCTTTTTGGCCGACCAAAATAAGGTCCCTGAAAATGAAGCTATACTGTTGGAT ACATTGAAGCTGCGTGACGAATTGGCCTCGTTATTGGGCTATGACACGTATGCGAACTAC AACCTGTATGATAAAATGGCTGAAGATAGCACTACGGTAATGAACTTTTTGAATGATTTG AAGGACAAGCTAATTCCGCTGGGCAGAAAGGAACTACAGGTCTTGCAAGATATGAAAGCC GAAGATGTTAAGAAACTTAACCAGGGTGCAGATCCAAACTACTACATTTGGGACCACCGT TACTACGATAACAAATATTTGTTAGAAAACTTCAATGTGGACCTAGAAAAGATTTCTGAA TATTTTCCACTAGAGGCTACGATTACTGGTATGCTGGAAATATACGAAACATTGTTTAAT TTGAAGTTTATCGAGACGAAAGATTCTCAAAACAAATCTGTTTGGCATGACGACGTCAAA CAAATCGCCGTTTGGAATATGGATGATCCAAAGTCTCCAAACTTTGTTGGTTGGATTTAT TTCGATTTACATCCTCGTGATGGTAAATATGGCCACGCTGCCAATTTTGGTTTATCGTCA TCATTCATGATTGATGACACCACAAGATCGTATCCGGTTACTGCGTTGGTTTGCAATTTC TCCAAATCTACGAAGGATAAACCTTCTCTACTGAAGCATAACGAAATAGTGACCTTTTTC CATGAATTGGGCCATGGTATCCATGACCTGGTGGGACAAAACAAGGAATCGAGGTTTAAT GGCCCCGGATCTGTTCCATGGGATTTTGTGGAGGCACCTTCCCAAATGTTAGAATTTTGG ACTTGGAATAAGAATGAATTAATCAACCTCTCATCACATTACAAAACGGGCGAAAAAATT CCAGAATCTTTGATCAATTCATTGATCAAAACTAAACACGTAAATGGTGCTTTATTCACT CTAAGACAATTACATTTTGGGTTATTTGATATGAAAGTACATACTTGTAAAGACTTGCAA AACCTGTCAATTTGCGATACCTGGAACCAATTGAGACAGGATATTTCTTTGATTTCTAAT GGTGGTACGTTATCCAAGGGTTATGATTCATTTGGCCATATAATGTCAGACTCTTACTCT GCCGGTTATTACGGTTATCTATGGGCGGAAGTCTTTGCAACTGATATGTATCACACCAAA TTCGCTAAGGATCCGTTAAATGCCAAGAATGGGATACAATACCGTGATATTGTGTTGGCT CGTGGTGGCCTTTATGATATTAATGATAATCTGAAAGAATTTTTGGGTAGGGAACCTTCT AAGGATGCTTTCTTGAAGGAGCTGGGCTTACAGAACTAA >KAR4 1008 residues Pha 0 Code 0 ATGGCATTCCAAGATCCAACTTACGACCAGAATAAAAGCAGACACATCAACAACAGTCAC TTGCAAGGGCCAAACCAGGAAACAATAGAAATGAAATCTAAACACGTATCATTCAAACCC TCTAGAGACTTCCATACAAACGATTACTCGAATAACTACATTCATGGGAAGTCGCTACCG CAACAGCATGTTACTAATATTGAGAATAGGGTTGATGGCTATCCAAAACTTCAGAAATTA TTTCAGGCGAAAGCTAAACAAATAAATCAATTTGCCACTACGCCATTTGGGTGTAAAATC GGAATAGATTCCATTGTTCCAACGTTGAATCACTGGATACAGAACGAAAATTTGACTTTC GACGTGGTGATGATTGGCTGCTTAACAGAAAATCAGTTTATTTACCCAATTTTAACCCAA TTGCCATTGGATAGATTGATCTCCAAACCAGGTTTCCTGTTCATCTGGGCCAATTCTCAA AAAATCAATGAACTTACTAAACTTTTGAATAATGAAATATGGGCTAAAAAGTTTAGAAGA AGTGAAGAATTGGTTTTTGTTCCTATTGACAAGAAATCACCGTTTTATCCAGGTTTAGAT CAGGACGATGAAACGTTGATGGAAAAAATGCAATGGCACTGTTGGATGTGTATCACAGGT ACAGTAAGGAGGTCTACAGATGGACATCTTATTCATTGTAACGTAGACACTGACTTGAGT ATCGAAACGAAGGACACCACTAATGGTGCTGTACCATCCCATTTGTATCGTATTGCAGAA AACTTCTCTACCGCGACTAGACGATTACATATTATTCCTGCAAGGACTGGTTACGAGACA CCCGTCAAAGTAAGACCTGGCTGGGTTATAGTGAGCCCAGATGTTATGTTGGATAACTTC TCACCCAAGAGATATAAAGAAGAGATAGCTAATTTAGGTTCGAATATCCCATTAAAAAAT GAGATTGAGCTGTTAAGACCAAGAAGTCCAGTACAAAAAGCACAATAA >PBN1 1251 residues Pha 0 Code 0 ATGGTGACAAGACATAGAGTGACTGTACTCTACAATGCCCCTGAGGATATCGGTAATCAT ATGCGCCAAAATGACACTCATTTGACTGTTCGTGGAGGTTCTGGTGTGGTTTTACAACAA AGGTGGCTATTAGAGAGGACTGGAAGCTTGGATAAATCCTTTACGAGAATCACTTGGAGG CCCAGAGCGGACTTGGCTAGAAGTTTAAGCGTTATAGAAAATGAACTGAGTGCTGGCTTT TCAGTTTACTCAAATTCTTCGGATGTGCCGGAAAGGTTTATTACTAACCCAGTCTACAAT TCATTTCACAGTGAGAAGTTTGACATAGAGCAGTACTTGCCTCCCGAAGTAGATTTGAAT CTGTCATGGAATCCAGAAGATTTTACATATGATATATCAGTGGAGCCCACACAAATCCAA ATTGTTGAATATCGTCTGTTGAAACAGGGTGAAGAATTTACAATTGCAAGAGTGAAAGAT GAGAAACTCGAAGTAGGTGTATTCTTTGTGGATGCAAGTGATGAAAGTGATGTCGATATT GGTGGAATACGTTGTAATTGGAGGATGGACGATGGTAAAATGGAAAGATGTCAGAAAACA TCCTTATTGTATAAACAGGGCCATATCGCATACAATCACTCGACGACTACGACATCACTA TATCTGAATGAACCTATCGGTTTGCATCCAAAAATCATGATTGATCTCACAGATTTCGAA GAACGCCCTAAATGCATGTATCTAATGCACCTGCAATTGCCGTTAGAATTATTTATCGAT AAATTCCAATCCTCTCCCTTACTACTTTTTGGAGAAGACGACTTAGAATTACCAGAATAC TCTCTTCGAGATAAGGCATGGGGTTCTGAAAGTATCTTTGAATTGAAAGCCGGCACAATG AATGAAGTGACATTGCATACTAGATATATTGAGCCTTCTAATAATAAAGGGGATAAATTA GAAGTTTCATTTGATCCAGAAGTTATATTAGCCTGCGACACAGGTGACAATAAAGTTTCC CGTAATCCATTTTATAAAAAAGGTCTAGGATATGAATCTCTCTTTACAGACGATACTACA TTCCGCCATTTGAACTCGACAACTCTTCTAGTACCAATTCCAAGGCCTGACACAAAGGAT TATTCCAAGATCAAAAATGGTACGTTACTATGCTTACTCATCTCCATCATATACATTTTC TCCAAGGTATTTGGTAACAACAAGAAGAAAAGATCAGTAAAACGGGAATAA >LRE1 1761 residues Pha 0 Code 0 ATGCCCAATACGCATACTCAACATGTGCAAATATCAGAGCCAAATCCTGTAAATACTTTG TCTACACCATCCAAAAGAGGTCACCGCCATCGCAGATCGCTAGCAATATCAGGAGATTTT GATTTTTTGAAACAGCCTGCAGCAATTGTGAATTTACCACCTCCACAGGCGGCTGAAAAT TGTCCTTCAACTGCCCCAACTGCTGTATCAAGTACATTATCGCCAATACGCTACAATAGA TTTCCTTGCAAAACCAATGAAGACGCTGGAACGTTAGATTTGCCTGAACCAAGATTTTAT CCGTTATCACCAAAGAACAATCTGCAAACACCAAGTCCACGATTTTTCATTAGTGAAGAG CCAAGTTTTTCATCGCCAGTTAAAGGCGTCCCAGATGCCATTATTAACCTTGACGATGCG TTGAAGACAAGGCCTAGGTCATTTAAATCACATAGAAGATCTGAATCCGCTCCTCCTGAT TTGGAGGTTATGGTAGATAAGGGCAATTGTGCAGCCGGTTCTAACTCTATGATTAAAGAA GAAGAGGACTCCTTAATTGAACCAGAATCGAAAAATGAATATTATGAGCAAAAGCTTCCA ACAGCACTATTATCCCCACTGCGGCCTTCCCTTTGTGTATCTGAACAGGCCATTGATGTA GATGATTCAGCTCTCAATGGGTCACCGACCCATCACAACCATGGGATGCAAAACGCCAAT GCACGGAATTCCAACACATTCAATTCGTTGAAGATCAAAGGCCAAAAGCAAAGATATTAT CATTATACGAAGCAGCTACCTTTGACCGTAGGCTGTGACTCGCAATCTCCAAAAGAACAA AGGTCGGCTGCTTCAATGACAATCAATCAGGCAATGACACCTTCTTCCCTGGCCTATACC CCTTCTAAACTAGCATCTACTCCCGCAACACCAGTATCCTTTTATGACAGCAATGCGGAC ATTAACTTAGAAAGTGATAATTTTCCACTAAAAGATAACCCTAGATATGCCAAGGATGGT TATCCTAAAAAGTGCGGCAATTCACAGCTTAATCGTGTGCTGGATAGCGATAAAAGACAG GATTTTAGTGGAGAATCGAGAAGAAGAAGATCGGGCAGTCCTATCTCCCACATGCAACAC CGCAACCTGATTGATAATATGAAAGGTAGACGAAACAGTAACACGATAAACTCAATCTTC AACTACAAGAGTCAACATTATGAAATGCCATATGATGATATGATGAAAAATGAAAACATT AATGCACAGTCCATGCCCTTTTCAGTCAACGGTGTCAACAATGAAAATAGTATCGGAGGG GTTATTACGAGAGCGGACGATGCACCCCTTCAACACTCTGTGGTCAAATCCTGTACGCCT GATGGCAAGGAAGAAATGAATAGGCTTAAAAGTAATGACAGTAATGAATATTCCAAGTCT GAAGGGCAGATCAGAACCAATTCGCAACTAAGTAAGGACATTCTCATGGGTGAACCAGGT GATATGGTTGATCTGTCCTCTTTTGTCAACACGCAGAGAAAAGCCTCAAATGAAACTGGT GACTTAGTCTTTAGTTTATCCCAGGATGATGACGCACTGAAAACGTTCCATGCGAGCAAT AGCGCAGCAACAAGCAATGAAAGCTGGTGTATTAGCGATGGTGCGTTAGGAAAGCAGGCG CAGGACAGTGAAGTTAGGAGGAAAGAAATCAAATTAGGACTCTTTAGACATATTTTCAAG GAAGTAATACAACAATATTAA >APA1 966 residues Pha 0 Code 0 ATGAGTATCCCCGCTGACATTGCATCTTTAATTAGTGACAAGTACAAAAGTGCCTTCGAT AATGGTAACTTAAAATTTATCCAGACTGAAACAACGAAAACAAAGGACCCAAAAACCAGC ATGCCATACTTGATTAGCCACATGCCAAGTCTGATCGAAAAGCCAGAGCGTGGCCAAACT CCAGAAGGAGAGGATCCACTAGGCAAACCTGAGGAAGAATTAACGGTTATCCCAGAATTT GGTGGTGCCGATAACAAAGCGTATAAATTGCTATTAAACAAATTCCCTGTAATCCCTGGA CACACTTTATTGGTAACTAACGAATACCAACATCAAACTGATGCCTTGACCCCAACCGAT TTATTGACTGCTTATAAGTTGCTGTGTGCCTTGGACAATGAAGAATCCGACAAGAGACAC ATGGTCTTTTACAATTCTGGTCCAGCCAGTGGTTCTTCATTGGACCACAAACATTTGCAA ATTTTGCAAATGCCTGAAAAGTTCGTCACTTTCCAAGATAGACTATGTAATGGTAAAGAA CATTTCCTACCAACTTTCAATACTGAACCTTTGCAAGATGCTAAAGTCTCGTTCGCTCAT TTTGTCTTGCCAATGCCGGAGTCCGAAGAAACTGTTGATGAAGACCTATTAGCTATGTGT TACATCTCCATATTGCAAAGAGCTTTGACCTTTTTCCAGGACTGGTTGAACGAAAATCCA GAACTAAAGAAATCCTACAATCTTATGTTAACCAAGGAATGGATCTGTGTCGTTCCACGT TCGAAGGCCTTTTCTGATGAAATGAAGATAGGTTTCAACTCCACAGGTTATTGTGGTATG ATCTTAACCAAAAATGATGAAGTTTTCTCCAAGATTACTGAAAAACCTGAATTGATTAAC GATATCTTATTGGAATGTGGTTTCCCAAACACTTCTGGTCAAAAACCAAACGAATACAAC TATTGA >YCE9 939 residues Pha 0 Code 0 ATGTTTAGTAAATACCTCGTAACTGCATCTTCCCTCTTTGTGGCTTTGACCTCTGCAGCA TCTACCGTTGATCTAGATGCTCTGCTTCTTCTACCAGGGGTCGAGTCCCACGACGGCGTT GATACTGTATTTTCGACCAAAGACTTTTATCAAGTGTCATTCGTCAAATCCATTGCTCCT GCTATCGTAAACAGCTCCGTAATCTTCCACGATGTTTCTCGTGGTGTGGCTATGGGCAAT GTCAAGAGCAGAGCAAGTATCTTCAACCCAGAGGAAACGTATTACGATTGGGAACAGTAC CAAGTAGTAAATAACGGAGACTGGCGAACCGAATGGGCACCTGCCTCTGACTGCATTTGG AGGGAGGAGAAGGATAACAGCGACGAAACACCGGACAGATTCCCCATCTCGGTGCCATAT AATTGGACGTCACAGTACTCAATTGTAGATTATGACACAGACGCTAACGAAGACAATTTA GATTTCAGGTTTATTAAATCATTGCTAGATAAGAAAAATTGGTTGAATAAAATTAACCAG ACTGTTTCCCAATCCAGTATTATGGTAGCACCAATGATTAAGCCATACAATGTGGTCCAG CTTTGGTATTCAAAATATATGGTTTGGGCAAACGTTCAAAGACAATATTGTAGCGGTGTT TATCCAGGAGGGACTCAATGTAGCGCTTGGTCCAGGTACTACCATGTTGATGCACCTACC TGCGATGAGCCTGTCGCCTCTTACATGACCAAAATGTCGGAAAATGAGGTTCAGTGTCCC AATGAGAGAAACGCAACTACCCTAGAGCCTCTCCGCCTGAATAAGCAGGGAGACTCTGAT TTTTCTTTGACTTTCGAGGAAGAGGAAGAGGAAGAGACAGGATCTAAATCTCTTTGGAGT ACATTGAAAAAAATTTTCTCTAAAAGAAGTATAAGTTGA >YCE8 1392 residues Pha 0 Code 0 ATGAACCGTATTACTAGGAAAAGTTGTTTATTCGCGATTATATTTGCATCATTATTTGTG ACACATGCATTGGGTGCCGCTATTGATCCGCCAAGGCGACCACATAATGTGAAGCCTTTT CATAACGGTAATCTCGAACTTCAAAGAAGAGCAAATGAACCGTTTTTTGAAATAGATGTC AAGAGTCTGAACACAAACTCACCGATATCAGAGTTGTGTAAAAAAGATTTGCACGTCATT GAATCGTCTCATGATCTTTTTCATTTACAAAACCAATGTGAATTCATCTTGGGGTCATTA AAAGTCACAAACTATGATTCTAACATTTTGGATTTGAACAGCTTGAGGGCCATTGGTGGT GACCTGATTATTCAGGATTCACCTGAACTGATCAGAATCCAAGCCGGGAACTTGAATAAA ATCGAAGGGCTCTTCCAATTACAGGGACTAACCTCTTTGGTTTCTGTTGAAATTCCAACT TTGAAATTTTGTCAGTCACTGGAGTGGAAAGTTGTTCCCATCTTGAACTACGTCTCCATG GATTCTCAGAATATTGAGATTATAAAGGATATTGTCATATCGGATACTTCATTAGCAAAC ATCGAGAATTTCAACAAGGTTCAGGAAATTGATACTTTCAATATCAATAATAACAGATTT TTAGAAACTATTCATTCGAACGTTAAAACCATTAGGGGACAATTCAGTGTACATGCGAAC GCTAAGGAGCTAGAACTTGAAATGCCACACTTGAGAGAAGTGGAAAACATAACGATTAGG GACACATCATTGGTCTACCTTCCACAATTAACAAAAGTGAAAAGCTCTTTAGAGTTCATC GAAAATTACTTTTACGAATTGAACCTGAACAATTTGCAGAAGATTGGTGGAACATTAGGA ATTATCAACAATGTAAATTTAATAAAAGTTAATTTGGAGAACTTAACAGACATTCAAGGT GGCTTGATGATCGCCGATAACGAATCCCTCGAGGATATTACTTTCCTGCCAAACTTGAAG CAGATTGGAGGTGCTATTTTCTTTGAAGGTTCGTTCAAAGATATCATGTTCGATAGCTTG AAACTGGTGAAAGGTAGCGCTTTTATTAAGAGTTCATCAAACGTGTTGGATTGCAATAAA TGGACAAACCCATCAAATGGAAGATCAATCATCAGGGGTGGGAAATTCACTTGTATTTCT GGTAAGAAGGAAAATACGCTGAATGTTAAACAGGATGGTACAATCATAGAAAAAGGGTAC AAAGATTTAACGCAAGAAGGTGAAGACTCCAAGAAAAGAGTGATTTCAAAATACGCGAAC TCAGCAAATCCAAGCATGCAATTGGACCCCCTTCTTTTTGGTACATGCCTTGTTGCTATG TTATTGTTTTAA >YCE7 777 residues Pha 0 Code 0 ATGAAGAAGACGTTCGAGCAGTTTCGAAAAAGCAATTTACTATTTCAGGTTCTCAAAGGA CCCCAGCATCTAGAATGTCAGAAGTTATTTGTCCTTGATTCTTCATTCAATCCACCACAT CTGGCCCATTTTCAACTACTATCGCAGACTATTAAAAACTTCAAATTGAAGGACACCCGT TCGCATGTTTTATTACTGTTAGCGGTGAATAATGCAGATAAGTTGCCTAAGCCGGCATCT TTTCCAACTCGTCTGGAAATGATGTGCTTATTCGCTGACTACCTTCAGGAGAAGCTCCCC CAATCTGTAGTATCTGTCGGGTTGACTGTTTTCTCGAAATTCATCGACAAGGACAAAATA TTACATGAGCAATTTGTTAAAGGATGCAGTGCAGATATAGGCTACTTAGTTGGTTTTGAT ACAATTGCTAGGATCTTTGATGAAAAATATTATCATCCTTTAAAAATCAGTGATGTAATG GAGAGCTTCATGTCGGGATCTCAATTATATTGCTTGGCGAGAGGCGATTGCCATCTCAGT GCTGAATCGCAACTAAGATACGCCAGTGACATCCTTGAGGGAAAATTCGAACCGGTAATA CCAAGAGAATGGGGCGCTAGGATTCATGTTATGCAAAATGATTATCCAGCATTAAGAAAT GTTTCATCATCCGAGATTAGGAACAAACTGAAGAATGGGCAAGTGGAGAGTTTGAAAGAC GAGTTGCCATTGTGCATATACGATTATTTGATCAATAATAAGACAATATTTGATTGA >YCE5 2283 residues Pha 0 Code 0 ATGAAGATAACGTGTACAGACTTGGTGTACGTCTTCATTTTACTCTTCCTAAACACGAGT TGTGTCCAAGCCGTTTTTTCAGATGATGCATTTATCACTGATTGGCAACTGGCTAACTTA GGTCCTTGGGAGAAAGTCATCCCTGATTCTCGAGACCGCAACAGGGTTCTCATCTTATCG AACCCTACCGAAACTTCCTGCTTAGTTTCTTCGTTTAACGTTTCTTCCGGACAGATTCTT TTCAGAAACGTTTTACCCTTTACCATTGATGAGATTCAACTGGATAGTAATGACCATAAC GCAATGGTTTGTGTGAACTCTTCAAGCAACCATTGGCAGAAATATGATTTACACGATTGG TTTTTACTAGAGGAAGGCGTAGATAATGCCCCTTCTACGACCATTTTACCTCAATCCTCA TATTTAAACGATCAAGTATCTATTAAGAACAATGAACTACATATTCTCGATGAGCAGTCA AAACTGGCAGAATGGAAATTGGAGTTACCTCAAGGGTTCAATAAAGTGGAATATTTTCAT CGTGAAGATCCCCTGGCGTTAGTGTTGAACGTTAATGATACCCAATATATGGGATTCTCT GCCAATGGCACAGAATTGATCCCCGTTTGGCAAAGAGATGAATGGTTGACTAACGTGGTA GACTATGCTGTATTGGACGTCTTCGATTCTAGGGATGTGGAGTTGAACAAAGATATGAAA GCGGAACTTGATTCAAATTCGCTTTGGAATGCTTACTGGCTTAGATTGACAACTAATTGG AATCGCCTTATCAACTTATTGAAAGAAAACCAATTCTCACCAGGACGTGTCTTCACTAAA CTCCTAGCTCTAGACGCTAAGGATACCACGGTATCAGATTTGAAGTTCGGATTCGCCAAA ATCTTAATTGTTTTGACGCATGATGGCTTTATCGGCGGCCTTGATATGGTCAATAAGGGC CAACTTATCTGGAAACTCGATTTAGAAATTGATCAGGGCGTCAAAATGTTCTGGACGGAT AAAAACCATGACGAACTTGTTGTTTTTTCGCATGATGGGCATTATTTGACAATTGAAGTT ACTAAAGATCAACCGATTATCAAATCAAGATCCCCCCTATCTGAAAGGAAAACTGTTGAT TCCGTTATTAGGCTGAATGAACATGATCACCAGTATCTGATTAAGTTTGAGGATAAGGAT CATTTACTGTTCAAATTGAATCCCGGCAAGAATACGGATGTACCAATAGTTGCCAACAAC CATTCTAGTTCCCACATATTCGTCACAGAGCATGACACGAATGGCATTTATGGCTACATA ATCGAAAACGATACGGTAAAACAAACTTGGAAAAAAGCCGTAAATTCGAAAGAGAAAATG GTGGCATATAGCAAGAGGGAAACAACAAACCTAAACACTCTTGGTATTACACTAGGTGAC AAATCGGTTCTTTATAAATATTTGTACCCCAACCTAGCGGCTTATCTGATCGCTAATGAA GAACATCATACAATCACTTTTAACTTAATTGATACCATTACAGGAGAAATCCTCATTACC CAAGAGCACAAGGATTCTCCGGATTTTAGGTTTCCAATGGATATTGTTTTCGGTGAATAT TGGGTCGTTTATTCCTATTTCAGTTCTGAACCTGTTCCAGAACAAAAGTTAGTAGTGGTG GAATTATATGAGTCACTAACCCCAGATGAGCGTTTGTCTAACTCAAGCGACAATTTTTCT TATGATCCATTGACTGGACACATTAACAAACCTCAATTTCAAACTAAACAATTCATTTTT CCCGAGATTATCAAAACAATGTCCATTTCCAAGACAACGGATGATATTACCACAAAGGCA ATCGTTATGGAATTAGAAAATGGACAAATCACCTACATACCAAAGCTTTTATTGAATGCA AGAGGTAAACCAGCAGAAGAAATGGCCAAGGATAAGAAAAAAGAGTTTATGGCTACCCCA TACACGCCAGTTATCCCAATTAATGATAATTTCATTATCACTCATTTCAGAAATCTATTG CCAGGATCCGATTCGCAGTTGATCTCCATCCCAACCAATCTGGAATCCACAAGCATTATA TGTGATCTAGGCCTTGATGTATTTTGTACAAGGATCACACCTTCGGGCCAATTTGATTTA ATGAGTCCTACTTTCGAAAAGGGTAAATTGCTTATTACTATATTCGTCTTGTTGGTGATC ACGTATTTTATCCGTCCTTCTGTTTCAAACAAGAAGTTGAAATCCCAATGGCTAATTAAA TAG >YCE6 324 residues Pha 0 Code 0 ATGGTAAAGGGTAAAACGTTTCTGAAAAGAATCTGTCCGGAAGAAACGTTAAACGAAGAA ACTAAGCAGGAAGTTTCGGTAGGGTTCGATAAGATGAGAACCCTGTTGCGGTCTCGAGAA TCAGGGATGACTTTCTCCCAAGGACCTAAGTTAGCCAGTTGCCAATCAGTGATAAATGCA TCATCTGAAAAAACGGCTTGGACACAACTCGTGTTTAGGAAGAGTAAAATGAAGACGTAC ACCAAGTCTGTACACGTTATCTTCATTGCTATGGGGGAAGGGGAGGATGAAAGTGTTGAT ATGAATGTAGGTATTAGTTATTAA >YCE4 1254 residues Pha 0 Code 0 ATGGCTGTATTTACTCCTCCATCAGGTAATAGCAATTCCACCGACCATACTCACACACAA GATGACCACGACAAAGATGATAATGATATCAAGAAATTCTACATAAGGCCAAGTTTAGGC TTAAAACTGTGGGGTCCGCTCGTACCCGCTCCTGATAACCTACCGGGACTATACACTCTA ATCACTATCCAATCTGCAGTGGGTTTCTTTGCCCTTTGGAGACTGAGAAGGCTCTACAAA CTACCGCCACCGCGCCGCATTGCCACTGGCACTCACTCGGATTTATCCTTTGGCGAACTA CCCAGTGAAATGATTGTCAATGGCAAGACTAAAATCAAAAAGGATATTGCTGACTTTCCA ACTTTGAACCGCTTCTCCACCACCCATGGTGACATTGTGCTCGCCCCTCCTCCCATCATA CCTCGCCAATCTCGATTCGTCAGCGTCAGAAAGCTCTTATGGGGGTTGTTTGGCTCTTTG CTACTTTCTCAGTCACTGTTGGAGCTTACTCGCCTGAACTTTCTTAAATACGACCCCTGG TGCGACGAAATGAAATCCGTACGTGACAAGAAGTTTTTCAACAATATTGTCAAATATTAT CACGAGGGCATAGACCCCACCAAAATAAAAGTCAAGGATGCTATGAACGGTACTCCTCTC TCGACAAATATCCCTGAGGTCAAACAAAGCGTCGCTCTCGCTAGAGCGCAAGTTGAGGCG CAGAATCCCATTATTAAATGGTTCGGACCCTTGGAATACAAGCCCATGTCTTTCAACGAG TACCTCAATCGCATGGAATTTCACTTGGACATGTTCGAGTTTTTTCAAAATAAAAGAAAC ATTAGAGAAAATTCCATTGAACTCATCAATTCCATATCCCACAATCCGCAGTCTTCTTCT ACTGGCCTTGAAGGTCTTTCCGAGTCCAAAAAACTCCATCTACAAAATGTGGAAAAAAGA CTGCATTTCTTAGCATCTTCGGGAGATTCCATTTCCGCACCAGTAAAGAAGAGATCCAGC ACCACACTCTCCCGAGGTGTCATTTTGCCCCATGACACGAAAGGCCCGCAAGATATTGAT CTCGATACAATAAGATCGCTTTATGATCCATGGATGACTTTGGCCTTAGAAACTTCGCTA AGCATCAAATTCATACCAACTACCATGCCCTCCCATACCAAGACACCCACTAGCACGGAC CAGCCGTTACCAGGGCCTACCCCCAAGGCTCTCACTAATGAAAAGACACATTAG >PDI1 1569 residues Pha 0 Code 0 ATGAAGTTTTCTGCTGGTGCCGTCCTGTCATGGTCCTCCCTGCTGCTCGCCTCCTCTGTT TTCGCCCAACAAGAGGCTGTGGCCCCTGAAGACTCCGCTGTCGTTAAGTTGGCCACCGAC TCCTTCAATGAGTACATTCAGTCGCACGACTTGGTGCTTGCGGAGTTTTTTGCTCCATGG TGTGGCCACTGTAAGAACATGGCTCCTGAATACGTTAAAGCCGCCGAGACTTTAGTTGAG AAAAACATTACCTTGGCCCAGATCGACTGTACTGAAAACCAGGATCTGTGTATGGAACAC AACATTCCAGGGTTCCCAAGCTTGAAGATTTTCAAAAACAGCGATGTTAACAACTCGATC GATTACGAGGGACCTAGAACTGCCGAGGCCATTGTCCAATTCATGATCAAGCAAAGCCAA CCGGCTGTCGCCGTTGTTGCTGATCTACCAGCTTACCTTGCTAACGAGACTTTTGTCACT CCAGTTATCGTCCAATCCGGTAAGATTGACGCCGACTTCAACGCCACCTTTTACTCCATG GCCAACAAACACTTCAACGACTACGACTTTGTCTCCGCTGAAAACGCAGACGATGATTTC AAGCTTTCTATTTACTTGCCCTCCGCCATGGACGAGCCTGTAGTATACAACGGTAAGAAA GCCGATATCGCTGACGCTGATGTTTTTGAAAAATGGTTGCAAGTGGAAGCCTTGCCCTAC TTTGGTGAAATCGACGGTTCCGTTTTCGCCCAATACGTCGAAAGCGGTTTGCCTTTGGGT TACTTATTCTACAATGACGAGGAAGAATTGGAAGAATACAAGCCTCTCTTTACCGAGTTG GCCAAAAAGAACAGAGGTCTAATGAACTTTGTTAGCATCGATGCCAGAAAATTCGGCAGA CACGCCGGCAACTTGAACATGAAGGAACAATTCCCTCTATTTGCCATCCACGACATGACT GAAGACTTGAAGTACGGTTTGCCTCAACTCTCTGAAGAGGCGTTTGACGAATTGAGCGAC AAGATCGTGTTGGAGTCTAAGGCTATTGAATCTTTGGTTAAGGACTTCTTGAAAGGTGAT GCCTCCCCAATCGTGAAGTCCCAAGAGATCTTCGAGAACCAAGATTCCTCTGTCTTCCAA TTGGTCGGTAAGAACCATGACGAAATCGTCAACGACCCAAAGAAGGACGTTCTTGTTTTG TACTATGCCCCATGGTGTGGTCACTGTAAGAGATTGGCCCCAACTTACCAAGAACTAGCT GATACCTACGCCAACGCCACATCCGACGTTTTGATTGCTAAACTAGACCACACTGAAAAC GATGTCAGAGGCGTCGTAATTGAAGGTTACCCAACAATCGTCTTATACCCAGGTGGTAAG AAGTCCGAATCTGTTGTGTACCAAGGTTCAAGATCCTTGGACTCTTTATTCGACTTCATC AAGGAAAACGGTCACTTCGACGTCGACGGTAAGGCCTTGTACGAAGAAGCCCAGGAAAAA GCTGCTGAGGAAGCCGATGCTGACGCTGAATTGGCTGACGAAGAAGATGCCATTCACGAT GAATTGTAA >GLK1 1503 residues Pha 0 Code 0 ATGTCATTCGACGACTTACACAAAGCCACTGAGAGAGCGGTCATCCAGGCCGTGGACCAG ATCTGCGACGATTTCGAGGTTACCCCCGAGAAGCTGGACGAATTAACTGCTTACTTCATC GAACAAATGGAAAAAGGTCTAGCTCCACCAAAGGAAGGCCACACATTGGCCTCGGACAAA GGTCTTCCTATGATTCCGGCGTTCGTCACCGGGTCACCCAACGGGACGGAGCGCGGTGTT TTACTAGCCGCCGACCTGGGTGGTACCAATTTCCGTATATGTTCTGTTAACTTGCATGGA GATCATACTTTCTCCATGGAGCAAATGAAGTCCAAGATTCCCGATGATTTGCTAGACGAT GAGAACGTCACATCTGACGACCTGTTTGGGTTTCTAGCACGTCGTACACTGGCCTTTATG AAGAAGTATCACCCGGACGAGTTGGCCAAGGGTAAAGACGCCAAGCCCATGAAACTGGGG TTCACTTTCTCATACCCTGTAGACCAGACCTCTCTAAACTCCGGGACATTGATCCGTTGG ACCAAGGGTTTCCGCATCGCGGACACCGTCGGAAAGGATGTCGTGCAATTGTACCAGGAG CAATTAAGCGCTCAGGGTATGCCTATGATCAAGGTTGTTGCATTAACCAACGACACCGTC GGAACGTACCTATCGCATTGCTACACGTCCGATAACACGGACTCAATGACGTCCGGAGAA ATCTCGGAGCCGGTCATCGGATGTATTTTCGGTACCGGTACCAATGGGTGCTATATGGAG GAGATCAACAAGATCACGAAGTTGCCACAGGAGTTGCGTGACAAGTTGATAAAGGAGGGT AAGACACACATGATCATCAATGTCGAATGGGGGTCCTTCGATAATGAGCTCAAGCACTTG CCTACTACTAAGTATGACGTCGTAATTGACCAGAAACTGTCAACGAACCCGGGATTTCAC TTGTTTGAAAAACGTGTCTCAGGGATGTTCTTGGGTGAGGTGTTGCGTAACATTTTAGTG GACTTGCACTCGCAAGGCTTGCTTTTGCAACAGTACAGGTCCAAGGAACAACTTCCTCGC CACTTGACTACACCTTTCCAGTTGTCATCCGAAGTGCTGTCGCATATTGAAATTGACGAC TCGACAGGTCTACGTGAAACAGAGTTGTCATTATTACAGAGTCTCAGACTGCCCACCACT CCAACAGAGCGTGTTCAAATTCAAAAATTGGTGCGCGCGATTTCTAGGAGATCTGCGTAT TTAGCCGCCGTGCCGCTTGCCGCGATATTGATCAAGACAAATGCTTTGAACAAGAGATAT CATGGTGAAGTCGAGATCGGTTGTGATGGTTCCGTTGTGGAATACTACCCCGGTTTCAGA TCTATGCTGAGACACGCCTTAGCCTTGTCACCCTTGGGTGCCGAGGGTGAGAGGAAGGTG CACTTGAAGATTGCCAAGGATGGTTCCGGAGTGGGTGCCGCCTTGTGTGCGCTTGTAGCA TGA >YCD8 1587 residues Pha 0 Code 0 ATGAGCTATGGAACTATAAATGATATGAATGAATCGGTAACGAACTATCGAATAAAAAAA GCCCAAAACAATATCAAGGGATGGTACGCTTACTCATTTTCTAGCGAACCATTTGTCGTT TCTGCGGTTTCAACGTATATTCCCTTACTACTGCAGCAATTTGCGAGTATAAATGGTGTA AAAGTTCACGATCACTCCATACCCTGCCTGTCAGAAACGGGTAGTGATTCAGATAAGTGT GTTCTTGGTTTGTTCAACAATCGGATCTTCGTAGATACTTCAAGTTTTGCATTATATGTC TTTTCCCTTAGCGTTTTATTCCAAACTATAATAGTCATTTCCGTTTCAGGGATAGTAGAT CTCTGGGGGAGCGTTAAATTCAAAGGCAGAATTCTGGTTTGGTTTGGTATTGTGGGCGCA TTGTCGACTGTTGCGATTTCAAAATTGAATGATACCCAGATTTATTCTCTGGCTGGGCTT TATATAGTGGCCAATGGTTGTTTTGGCGTTATCAATGTTGTTGGGAATTCTCTTCTGCCC ATTTTTGTCAAGGATTCTTTGAAATGTCAAAGTCAAGGAGCTTATGAACCTGATAAGGTA GACTCGTTAACTACTGTTATTAGCGGTAGAGGTGCATCTTTAGGTTATTCAAGTGCCCTC ATTGTTCAGATTGTATCTATGTTCTTAGTCGCATCTAAAAAGGGCAGTAAGCAGGATGTT CAAGTGGCTGTTCTTTTCGTTGGGATTTGGTGGTTTGTGTGGCAACTGCCCATGATCTGG TTGATTGACGATGTGACAATACCGATAAGAGTTGACGATTCTACATTAGCATCCGCCCGC AGTCCGTATCCCGGTGAGCAAGACGCCTTGGGTCAACTAAACTGGAAGAATTACCTTTCA TATGGTTGGGTTTCGCTTTTCGAATCGTTTAAACATGCCAGACTATTGAAAGATGTGATG ATTTTTCTTATTGCGTGGTTTATTATTAGTGATTCCATTACAACTATAAATTCTACAGCG GTTTTGTTCTCCAAGGCAGAACTGCACATGAGTACCCTCAATTTAATCATGATAAGTGTT TTGACCGTTGTAAATGCAATGCTGGGTGCCTTTATGATTCCACAATTTCTTGCCACAAAG TTTCGGTGGACTTCTAGTCAAACTTTGATGTACATTATCATTTGGGCAAGTTTCATACCA TTTTATGGTATTCTTGGATTTTTCTTCAATGCGTTCGGTTTAAAGCATAAGTTTGAAATG TTCTTATTGGCCATTTGGTATGGATTATCACTAGGTGGCCTGTCCGCGGTTTCAAGATCA GTTTTCAGTTTGATTGTACCTCCAGGAAAAGAATCCACGTTTTTTAGTATGTTCAGTATC ACAGATAAGGGGTCGTCCATCCTGGGACCCTTCCTTGTTGGACTGCTTACCGATAAAACG CATAATATTCGCTATTCGTTTTATTTCTTCTTTTTGCTTTTGATGCTATCATTGCCTGTG CTAAACTGTTTGGATGTCAAGAGAGGTAGAAGAGAGGCTGAAGAACTCAGTCAAGTTTTA CCTGAAAGTGAAAGAAGGTTGGATTAG >SRO9 1401 residues Pha 0 Code 0 ATGAAGATCTTTTGGGATCCTAGATCGGTAATAGAACATCAGGATTACTCTGGACCTGCT AACGTGTTTCATCTTCTTTTCACTTCTCTGCCCACGATGTCTGCTGAAACCGCCGCCGCA AACACTGCTACTGCCCCAGTCCCAGAAGTGCAAGAACAAGAGAGCTCCAAGAGCAAGCAA GTCAACTTGACGCCGGCACCATTGCCCACATCTTCCCCATGGAAACTTGCTCCTACTGAG ATCCCTGTTTCTACTATCTCAATAGAAGACTTGGATGCCACAAGAAAGAAGAAGAACAGA ACACCCACTCCGAAATCATCGACTGCTACCAAGTGGGTTCCCATCAAGGCCTCCATTACC GTCTCTGGCACCAAAAGATCCGGTTCCAAGAATGGTGCAAGTAATGGCAACAGCAACAAG AGCAAAAACAACAAAACTGCAGCATCGTCGACATCGTCGAGTAATGCTAACAGGAAAAAG AAGCATCACCAACATAATGCTAAGAAGCAACAACAAATGAAGAAAGATGGCTTTGAATCG GCAGTAGGTGAGGAAGATTCAAAAGACGCTACCTCTCAAGAAAATGGTCAATCTACACAA CAGCAACAACCACCTCACCACCGTAATCATCACCACAGTCATCACCATAACAGCAATGGT CCTCAAAGGAGAAAGTTCCACAACAGTAATAACGCCGGTATGCCTCAGAACCAAGGCTTC CCACCACAGTTTAAACCTTACCAAGGACGCAACGCTCGTAATAACAACAACAACCGCTCT AAATACCACAACCACTTCCATCACAACCAACAACATCCTCAACAACCTATGGTCAAATTA CAGCAACAGTTTTATCCAGTCCAACCAGTGTTAATGGCCATCAACAACATTGCTAGACAA ATTGAATACTATTTCAGCGAAGAAAACTTGACCGTCGACAATTACTTAAGGTCCAAACTC TCCAAGGATGGTTTTGCTCCATTGTCTTTAATCTCTAAGTTTTACAGAGTTGTTAACATG TCCTTCGGAGGTGACACTAACCTGATTTTAGCCGCATTGAGAGAAATTGTCGCTAACGAA GCCGCTACCGTCAATGTTGCAGAAGGTACTTTGGCCGCCAAGGAAGGTGATAACGTTACC GGTGAAGCCAAAGAACCATCTCCATTGGATAAGTACTTCGTTCGTTCCAAGAGCTGGTCA AACTGGTTACCAGAAACTTTTGAAACTGAAATTAATATTGAAAAAGAACTGGTCGGCGAT GCATTGGACCAATTCATGATATCCCTACCACCTGTTCCTCAACAAGAAGAGGAATCATCC ACTGAACTCGCTTCTCAAGAACAAGAAACCAAAGAAGACTCTGCGCCGGTTGCTGCCGGT GAATCCGAGTCTTCCTTATAA >YCD6 1701 residues Pha 0 Code 0 ATGCAGGTTCAAAAAATGGTGAGAGATAACAGTAATAACGGTAGCGATAAAAGCGTCCAT TGGGAGAGGAGGAATAATAACGGCGCAGGCCCCCGTTATCGTTCCAGAAGCGGTAATACC GGTGCTTTGGCAACAAAACTAAGTAATGGGACGCTCTCTGTCAGAGGATTAGTGAAGGAC CGAACAGGAAGCGGCAAGATCGCGGGCTGTGTGGAGGCGTTTCTGGATGCCAGGACCCAA TTGAATACGCCCTGGGACCGTGCTAAGTGCAATTGGCTGGACCAGATAGATTACTATGTA CAGTTGAGAAAGACCGCGTTTTCTAAGGAATTGGACCAACTAAGGAAGCCCATGATCGAT GCATATGTGGCGGAGATGAGGCAGAAGTTTGATGCCTCCTATGGACAATCCAGGGCGCAA TTGGAAGCCAAACTGGCGCAGGTGGACAGTGAATGGCATATGGTACATGGTGATGTGCAT GCAAAACTGGAAAAACTCGTGGAAGAACGCCGGTTTTTGAAAAGATTAAGCGACACGATC GTACCACCCAGGTCCAAAAGATCACAGCGGCTGTCTCCATTGACCAAAGAGGACCGAGCC AACTGTATCTGTCCGCAGCCCAAAGGAATGAGCGACACCGCTTGGTTCGAAGCCATTCAG AAGAAAATGTTAGGAATGAATGGTACCATCAAGCTCCTAGAGACAGAACAGAAACTACTG GCTGACGAGAAAAACAGCGTGAGGAAGACGTTCTGGCCCATGGTGGAAGCACATTCACGC TCGAATGAATTTGCTTATCTGGAGAAATGCATCAGGCTGATGGCCTCTCAGAGAGCAATA TGCTTTTGTCTTGATATAGAGGCTTTCGAAACAAACCAGAACGTAATCACCGAAATTGGG ATTTCAATTTATGACCCCAGGGAAAATATGGTGCCGTCAATGGTTCCAATTACAAAGAAT TACCACCTAATTATCGAGGAGTCCCTGGAACTTAGAAACCAAAAATGGGTCTGTGACTAC AAGGATTGCTACTTATTGGGAGAAAGCTATGTTTTGAGCTTGAAAGAGTGCGTGCATTTC ATTCAATCACTAATAAACTATTACTTGGTCCCGGTGACCGAAGAAGACAAGACATGGTCA AGGGCATTTGTTGGTCATCACGTGAGCGGGGATCTTAAGTGGCTGGAGACTATTGGTGTC AAATTCCCTGGCAGAGGGTATGAAGGCCATCTGGACCATACGCTGCTTTTGGCTGAAACT CCCGGTGATCTAGACGTGTTCATCTTGGACACTGAGCAGTTTTACAGGAAATCGTATGGC GAAAAGGGCAGCAGTCTGGGCAAGATTCTGCGGTTGTTCGAGATACCGCATGCGTTTCTA CACAATGCCGGTAACGATGCCTACTATACCCTGCATTTGTTCATGAAGTTTTGCGATGTT AATTTCAGGAAAATAAGCGGCATGGACGATGTTCTTAAAGTAATGGGCCAAGTAAAAGTT TGGGGAGAACGAGACGTACGAGAGCCTAAAGTGGTGCCCATGTCGTATGCCATCTCCATC GAGGAGGCAGTCAAAAATCGGACGTACCGCAAGGGCGTCAAGAGCAGTAGGAAGGAAAGA GTCTGCCAAACGGAATTCGGTGGGTTAACGTATTTCGGAACTGCTAAAGACGCCTTCACA AGCACTCTTCCGACACACTAA >YCD5 333 residues Pha 0 Code 0 ATGGTATCTCAAGAAACTATCAAGCACGTCAAGGACCTTATTGCAGAAAACGAGATCTTC GTCGCATCCAAAACGTACTGTCCATACTGCCATGCAGCCCTAAACACGCTTTTTGAAAAG TTAAAGGTTCCCAGGTCCAAAGTTCTGGTTTTGCAATTGAATGACATGAAGGAAGGCGCA GACATTCAGGCTGCGTTATATGAGATTAATGGCCAAAGAACCGTGCCAAACATCTATATT AATGGTAAACATATTGGAGGCAACGACGACTTGCAGGAATTGAGGGAGACTGGTGAATTG GAGGAATTGTTAGAACCTATTCTTGCAAATTAA >YCD3 507 residues Pha 0 Code 0 ATGAATAAGTGGAGCAGGCTGTACGTTATAACTGTACGCAGGACTTTTCCAGGGAGAAGA AACATTGTACTGACGCAGTACTGGAATAAGAGCAAGAAAATGAGTGACGAATCGAATGAC GTGAAGTGGAACGATGCCCTGACACCATTGCAGCTGATGGTGCTGAGAGATAAGGCCACT GAAAGGCCCAACACCGGTGCGTATTTACACACCAACGAGTCCGGTGTCTACCATTGTGCC AACTGCGACAGACCGTTGTATTCGAGCAAGGCCAAGTTCGACGCTCGTTGTGGATGGCCC GCATTCTACGAAGAGGTATCCCCTGGAGCCATCACATATCATCGTGACAATTCTTTAATG CCTGCGAGGGTGGAGATATGTTGTGCAAGGTGTGGTGGACACTTGGGACATGTGTTTGAA GGTGAAGGCTGGAAACAGTTGCTAAACTTGCCCAAGGACACCAGACACTGTGTGAACAGT GCGTCTTTAAACCTCAAGAAGGATTAA >STE50 1041 residues Pha 0 Code 0 ATGGAGGACGGTAAACAGGCCATCAATGAGGGATCAAACGATGCTTCGCCGGATCTGGAC GTGAATGGCACAATATTGATGAATAATGAAGACTTTTCCCAGTGGTCGGTTGATGATGTG ATAACTTGGTGTATATCCACGCTGGAGGTGGAAGAAACCGATCCATTATGTCAGAGACTG CGAGAAAATGATATTGTAGGAGATCTTTTGCCGGAATTGTGCTTGCAAGATTGCCAGGAC TTGTGTGACGGTGATTTGAATAAGGCCATAAAATTCAAGATACTGATCAATAAGATGAGA GACAGCAAGTTGGAGTGGAAGGACGACAAGACTCAAGAGGACATGATAACGGTACTGAAA AACTTGTACACTACTACATCTGCGAAATTGCAAGAATTTCAATCGCAGTACACAAGGCTG AGGATGGATGTCTTGGACGTAATGAAGACCAGCTCAAGCTCTTCTCCGATTAACACACAT GGAGTGTCCACTACGGTACCTTCTTCAAACAACACAATTATACCCAGTAGTGACGGTGTG TCTCTTTCACAAACAGACTATTTCGACACAGTTCATAACCGACAATCACCGTCAAGGAGA GAATCCCCGGTAACGGTATTTAGGCAACCCAGTCTTTCCCACTCAAAATCTTTGCACAAG GATAGCAAAAACAAAGTACCCCAAATATCTACAAACCAATCTCACCCATCTGCCGTTTCA ACAGCGAACACACCGGGGCCATCACCTAACGAGGCGTTAAAACAGTTGCGTGCATCTAAA GAAGACTCCTGCGAACGGATCTTGAAAAACGCAATGAAAAGACATAACTTAGCAGATCAG GATTGGAGACAATATGTCTTGGTCATTTGCTATGGGGATCAAGAGAGGCTGTTAGAATTG AACGAAAAGCCTGTGATCATATTCAAGAACTTAAAGCAACAGGGTTTGCACCCCGCCATT ATGTTAAGAAGAAGAGGTGATTTCGAAGAAGTAGCAATGATGAACGGAAGTGACAATGTC ACCCCCGGTGGAAGACTCTAA >HIS4 2400 residues Pha 0 Code 0 ATGGTTTTGCCGATTCTACCGTTAATTGATGATCTGGCCTCATGGAATAGTAAGAAGGAA TACGTTTCACTTGTTGGTCAGGTACTTTTGGATGGCTCGAGCCTGAGTAATGAAGAGATT CTCCAGTTCTCCAAAGAGGAAGAAGTTCCATTGGTGGCTTTGTCCTTGCCAAGTGGTAAA TTCAGCGATGATGAAATCATTGCCTTCTTGAACAACGGAGTTTCTTCTCTGTTCATTGCT AGCCAAGATGCTAAAACAGCCGAACACTTGGTTGAACAATTGAATGTACCAAAGGAGCGT GTTGTTGTGGAAGAGAACGGTGTTTTCTCCAATCAATTCATGGTAAAACAAAAATTCTCG CAAGATAAAATTGTGTCCATAAAGAAATTAAGCAAGGATATGTTGACCAAAGAAGTGCTT GGTGAAGTACGTACAGACCGTCCTGACGGTTTATATACCACCCTAGTTGTCGACCAATAT GAGCGTTGTCTAGGGTTGGTGTATTCTTCGAAGAAATCTATAGCAAAGGCCATCGATTTG GGTCGTGGCGTTTATTATTCTCGTTCTAGGAATGAAATCTGGATCAAGGGTGAAACTTCT GGCAATGGCCAAAAGCTTTTACAAATCTCTACTGACTGTGATTCGGATGCCTTAAAGTTT ATCGTTGAACAAGAAAACGTTGGATTTTGCCACTTGGAGACCATGTCTTGCTTTGGTGAA TTCAAGCATGGTTTGGTGGGGCTAGAATCTTTACTAAAACAAAGGCTACAGGACGCTCCA GAGGAATCTTATACTAGAAGACTATTCAACGACTCTGCATTGTTAGATGCCAAGATCAAG GAAGAAGCTGAAGAACTGACTGAGGCAAAGGGTAAGAAGGAGCTTTCTTGGGAGGCTGCC GATTTGTTCTACTTTGCACTGGCCAAATTAGTGGCCAACGATGTTTCATTGAAGGACGTC GAGAATAATCTGAATATGAAGCATCTGAAGGTTACAAGACGGAAAGGTGATGCTAAGCCA AAGTTTGTTGGACAACCAAAGGCTGAAGAAGAAAAACTGACCGGTCCAATTCACTTGGAC GTGGTGAAGGCTTCCGACAAAGTTGGTGTGCAGAAGGCTTTGAGGAGACCAATCCAAAAG ACTTCTGAAATTATGCATTTAGTCAATCCGATCATCGAAAATGTTAGAGACAAAGGTAAC TCTGCCCTTTTGGAGTACACAGAAAAGTTTGATGGTGTAAAATTATCCAATCCTGTTCTT AATGCTCCATTCCCAGAAGAATACTTTGAAGGTTTAACCGAGGAAATGAAGGAAGCTTTG GACCTTTCAATTGAAAACGTCCGCAAATTCCATGCTGCTCAATTGCCAACAGAGACTCTT GAAGTTGAAACCCAACCTGGTGTCTTGTGTTCCAGATTCCCTCGTCCTATTGAAAAAGTT GGTTTGTATATCCCTGGTGGCACTGCCATTTTACCAAGTACTGCATTAATGCTTGGTGTT CCAGCACAAGTTGCCCAATGTAAGGAGATTGTGTTTGCATCTCCACCAAGAAAATCTGAT GGTAAAGTTTCACCCGAAGTTGTTTATGTCGCAGAAAAAGTTGGCGCTTCCAAGATTGTT CTAGCTGGTGGTGCCCAAGCCGTTGCTGCTATGGCTTACGGGACAGAAACTATTCCTAAA GTGGATAAGATCTTGGGTCCAGGTAATCAATTTGTGACTGCCGCCAAAATGTATGTTCAA AATGACACTCAAGCTCTATGTTCCATTGATATGCCAGCTGGCCCAAGTGAAGTTTTGGTT ATTGCCGATGAAGATGCCGATGTGGATTTTGTTGCAAGTGATTTGCTATCGCAAGCTGAA CACGGTATTGACTCCCAAGTTATCCTTGTTGGTGTTAACTTGAGCGAAAAGAAAATTCAA GAGATTCAAGATGCTGTCCACAATCAAGCTTTACAACTGCCACGTGTGGATATTGTTCGT AAATGTATTGCTCACAGTACGATCGTTCTTTGTGACGGTTACGAAGAAGCCCTTGAAATG TCCAACCAATATGCACCAGAACATTTGATTCTACAAATCGCCAATGCTAACGATTATGTT AAATTGGTTGACAATGCAGGGTCCGTATTTGTGGGTGCTTACACTCCAGAATCGTGCGGT GACTATTCAAGTGGTACTAACCATACATTACCAACCTATGGTTACGCTAGGCAGTACAGT GGTGCCAACACTGCAACCTTCCAAAAGTTTATCACTGCCCAAAACATTACCCCTGAAGGT TTAGAAAACATCGGTAGAGCTGTTATGTGCGTTGCCAAGAAGGAGGGTCTAGACGGTCAC AGAAACGCTGTGAAAATCAGAATGAGTAAGCTTGGGTTGATCCCAAAGGATTTCCAGTAG >BIK1 1323 residues Pha 0 Code 0 ATGGATAGATATCAAAGAAAGATAGGATGTTTCATACAAATCCCAAATTTGGGGCGCGGA CAACTGAAATACGTGGGTCCAGTGGACACGAAAGCTGGAATGTTTGCTGGTGTAGACTTA CTTGCCAACATTGGTAAGAACGATGGATCATTCATGGGGAAGAAGTATTTTCAAACAGAG TATCCTCAAAGTGGACTATTTATCCAGTTGCAAAAAGTCGCATCATTGATCGAGAAGGCA TCGATATCGCAAACCTCGAGAAGAACGACGATGGAACCGCTATCAATACCCAAAAACAGA TCTATTGTGAGGCTCACTAACCAGTTCTCTCCCATGGATGATCCTAAATCCCCCACACCC ATGAGAAGTTTCCGGATCACCAGTCGGCACAGCGGTAATCAACAGTCGATGGACCAGGAG GCATCGGATCACCATCAACAGCAAGAATTTGGTTACGATAACAGAGAAGACAGAATGGAG GTCGACTCTATCCTGTCATCAGACAGAAAGGCTAATCACAACACCACCAGCGATTGGAAA CCGGACAATGGCCACATGAATGACCTCAATAGCAGCGAAGTTACAATTGAATTACGAGAA GCCCAATTGACCATCGAAAAGCTACAAAGGAAACAACTACACTACAAAAGGCTACTCGAT GACCAAAGAATGGTCCTCGAAGAAGTGCAACCGACTTTTGATAGGTATGAAGCCACAATA CAAGAAAGAGAGAAAGAGATAGACCATCTCAAGCAACAATTGGAGCTCGAACGCAGACAG CAAGCCAAACAAAAGCAGTTTTTTGACGCTGAGAATGAACAGCTACTTGCTGTCGTAAGC CAACTACACGAAGAGATCAAAGAAAACGAAGAGAGAAATCTTTCTCATAATCAACCCACT GGTGCCAACGAAGATGTCGAACTCCTGAAAAAACAGCTGGAACAATTACGCAACATAGAA GACCAATTTGAGTTACACAAGACAAAGTGGGCTAAAGAACGCGAACAATTGAAAATGCAT AACGATTCGCTCAGTAAAGAATACCAAAATTTGAGCAAGGAACTATTTTTGACAAAACCA CAAGATTCCTCATCGGAAGAGGTGGCATCCTTAACGAAAAAACTTGAAGAGGCTAATGAA AAAATCAAACAGTTGGAACAGGCTCAAGCACAAACAGCCGTGGAATCGTTGCCAATTTTC GACCCCCCTGCACCAGTCGATACCACGGCAGGAAGACAACAGTGGTGTGAGCATTGCGAT ACGATGGGTCATAATACAGCAGAATGCCCCCATCACAATCCTGACAACCAGCAGTTCTTC TAG >FUS1 1539 residues Pha 0 Code 0 ATGGTAGCAACAATAATGCAGACGACAACAACTGTGCTGACGACAGTCGCCGCAATGTCT ACTACCTTAGCATCAAATTACATATCTTCGCAAGCTAGTTCCTCGACGAGTGTAACAACA GTAACGACAATAGCGACATCAATACGCTCTACACCGTCTAATCTACTCTTTTCTAATGTG GCGGCTCAGCCAAAATCATCTTCAGCAAGCACAATTGGGCTTTCAATCGGACTTCCCATC GGAATATTCTGTTTCGGATTACTTATCCTTTTGTGTTATTTCTACCTTAAAAGGAATTCG GTGTCCATTTCAAATCCACCCATGTCAGCTACGATTCCAAGGGAAGAGGAATATTGTCGC CGCACTAATTGGTTCTCACGGTTATTTCGGCAGAGTAAGTGTGAGGATCAGAATTCATAT TCTAATCGTGATATTGAGAAGTATAACGACACCCAGTGGACCTCGGGTGATAACATGTCT TCAAAAATACAGTACAAAATTTCCAAACCCATAATACCGCAGCATATACTGACACCTAAG AAAACGGTGAAGAACCCATATGCTTGGTCTGGTAAAAACATTTCGTTAGACCCCAAAGTG AACGAAATGGAGGAAGAGAAAGTTGTGGATGCATTCCTGTATACTAAACCACCGAATATT GTCCATATTGAATCCAGCATGCCCTCGTATAATGATTTACCTTCTCAAAAAACGGTGTCC TCAAAGAAAACTGCGTTAAAAACGAGTGAGAAATGGAGTTACGAATCTCCACTATCTCGA TGGTTCTTGAGGGGTTCTACATACTTTAAGGATTATGGCTTATCAAAGACCTCTTTAAAG ACCCCAACTGGGGCTCCACAACTGAAGCAAATGAAAATGCTCTCCCGGATAAGTAAGGGT TACTTCAATGAGTCAGATATAATGCCTGACGAACGATCGCCCATCTTGGAGTATAATAAC ACGCCTCTGGATGCAAATGACAGCGTGAATAACTTGGGTAATACCACGCCAGATTCACAA ATCACATCTTATCGCAACAATAACATCGATCTAATCACGGCAAGACCCCATTCAGTGATA TACGGTACTACTGCACAACAAACTTTGGAAACCAACTTCAATGATCATCATGACTGCAAT AAAAGCACTGAGAAACACGAGTTGATAATACCCACCCCATCAAAACCACTAAAGAAAAGG AAAAAAAGAAGACAAAGTAAAATGTATCAGCATTTACAACATTTGTCACGTTCTAAACCA TTGCCGCTTACTCCAAACTCCAAATATAATGGAGAGGCTAGCGTCCAATTAGGGAAGACA TATACAGTTATTCAGGATTACGAGCCTAGATTGACAGACGAAATAAGAATCTCGCTGGGT GAAAAAGTTAAAATTCTGGCCACTCATACCGATGGATGGTGTCTGGTAGAGAAGTGTAAT ACACGAAAGGGTACTATTCACGTCAGTGTTGACGATAAAAGATACCTCAATGAAGATAGA GGCATTGTGCCTGGTGACTGTCTCCAAGAATACGACTGA >YC08 579 residues Pha 0 Code 0 ATGTCCCCAACTGGAAACTACTTAAACGCTATTACAAACCGTCGTACCATCTACAATTTG AAGCCCGAATTACCACAAGGTGTCGGTTTGGATGATGTAAAGAGAACTGTACACGTTATT CTCAAGAATACGCCAACAGCTTTTAACTCACAAGTGAATCGCGCTGTCATTATCGTTGGT GATACACACAAAAGGATATGGGATGCTGTTGCGAGCGCAATGCCAACTGCTGAAGCCAAG AAGAGACCAGAGTCTTGCAGAGATGAGGCTTACGGTTCAGTCATTTTCTTCACTGATGAA GGACCAACTGAAAACTGCAAGAGATTTTCCAGCCTTGGCACCGCTTTCCCAACATGCGCC GCTCATACGACCGGTGCTGTGCAAATTCAGTCTTGGACTGCCCTCGAACTATTGGGATTG GGGGCTAATTTGCAACACTATAATGACTACGTCAAATCTGCTTTGCCTCAAGATGTTCCT ATTGCGTGGACTGTACAATCTCAATTGGTCTTTGGTGTTCCAACTGCCTTGCCAGAAGAA AAGACTTTTATCAATAACGTAATCAACGTTTATCACTGA >AGP1 1902 residues Pha 0 Code 0 ATGTCGTCGTCGAAGTCTCTATACGAACTGAAAGACTTGAAAAATAGCTCCACAGAAATA CATGCCACGGGGCAGGATAATGAAATTGAATATTTCGAAACAGGCTCCAATGACCGTCCA TCCTCACAACCTCATTTAGGTTACGAACAGCATAACACTTCTGCCGTGCGTAGGTTTTTC GACTCCTTTAAAAGAGCGGATCAGGGTCCACAGGATGAAGTAGAAGCAACACAAATGAAC GATCTTACGTCGGCTATCTCACCTTCTTCTAGACAGGCTCAAGAACTAGAAAAAAATGAA AGTTCGGACAACATAGGCGCTAATACAGGTCATAAGTCGGACTCGCTGAAGAAAACCATT CAGCCTAGACATGTTCTGATGATTGCGTTGGGTACGGGTATCGGTACTGGGTTACTGGTC GGTAACGGTACCGCGTTGGTTCATGCGGGTCCAGCTGGACTACTTATTGGTTACGCTATT ATGGGTTCTATCTTGTACTGTATTATTCAAGCATGTGGTGAAATGGCGCTAGTGTATAGT AACTTGACTGGTGGCTACAATGCATACCCAGTTTCCTTGTGGATGATGGTTTTTGGGTTT GCAGTCGCTTGGGTTTATTGTTTGCAATGGCTGTGTGTGTGTCCTCTGGAATTGGTGACC GCATCCATGACTATCAAATATTGGACGACATCTGTGAACCCGGATGTGTTCGTCATTATT TTCTATGTTTTGGTGATTACTATTAATATTTTCGGTGCTCGTGGTTATGCAGAAGCTGAG TTCTTCTTCAACTGTTGCAAAATTTTGATGATGACTGGGTTCTTCATTCTTGGTATTATC ATCGATGTTGGTGGCGCTGGTAATGATGGTTTTATTGGTGGTAAATACTGGCACGATCCG GGCGCTTTCAATGGTAAACATGCCATTGACAGATTTAAAGGTGTTGTTGCAACATTAGTG ACTGCTGCTTTTGCCTTTGGTGGTTCAGAGTTTATTGCCATCACCACTGCAGAACAATCT AATCCAAGAAAGGCCATTCCAGGTGCGGCCAAACAAATGATCTACAGAATCTTATTCCTA TTCTTGGCTACCATTATTCTACTGGGTTTCTTGGTGCCATACAATTCCGATCAATTATTG GGTTCTACCGGTGGTGGTACTAAAGCCTCGCCATATGTCATTGCTGTTGCATCCCACGGT GTCCGTGTCGTCCCACACTTCATTAACGCCGTTATTCTACTTTCCGTGCTGTCCATGGCT AACTCCTCCTTCTACTCCAGTGCTCGTTTATTTTTAACTCTATCCGAGCAAGGTTACGCT CCTAAGGTTTTCTCCTACATCGACAGAGCCGGTAGACCATTGATTGCCATGGGTGTTTCT GCATTGTTTGCCGTTATTGCCTTCTGTGCTGCATCTCCCAAGGAAGAACAAGTTTTCACT TGGTTATTGGCCATTTCTGGTTTGTCTCAGCTTTTCACATGGACTGCCATTTGTTTATCC CATCTTAGATTTAGAAGAGCCATGAAAGTCCAAGGGAGATCTCTTGGAGAATTGGGTTTC AAATCTCAAACTGGTGTTTGGGGATCTGCCTACGCTTGCATTATGATGATTTTAATTCTT ATTGCCCAATTTTGGGTCGCTATCGCCCCCATTGGTGAAGGTAAGCTGGATGCACAAGCC TTTTTCGAAAACTACTTGGCTATGCCAATCTTGATTGCACTATATGTCGGCTACAAGGTC TGGCACAAGGATTGGAAACTGTTCATCAGGGCCGACAAGATCGACCTAGATTCTCATAGA CAAATCTTTGATGAAGAATTAATCAAGCAAGAAGACGAAGAATATAGGGAACGTTTGAGG AACGGACCTTATTGGAAAAGGGTCGTTGCCTTCTGGTGTTAA >LEU2 1095 residues Pha 0 Code 0 ATGTCTGCCCCTAAGAAGATCGTCGTTTTGCCAGGTGACCACGTTGGTCAAGAAATCACA GCCGAAGCCATTAAGGTTCTTAAAGCTATTTCTGATGTTCGTTCCAATGTCAAGTTCGAT TTCGAAAATCATTTAATTGGTGGTGCTGCTATCGATGCTACAGGTGTCCCACTTCCAGAT GAGGCGCTGGAAGCCTCCAAGAAGGTTGATGCCGTTTTGTTAGGTGCTGTGGGTGGTCCT AAATGGGGTACCGGTAGTGTTAGACCTGAACAAGGTTTACTAAAAATCCGTAAAGAACTT CAATTGTACGCCAACTTAAGACCATGTAACTTTGCATCCGACTCTCTTTTAGACTTATCT CCAATCAAGCCACAATTTGCTAAAGGTACTGACTTCGTTGTTGTCAGAGAATTAGTGGGA GGTATTTACTTTGGTAAGAGAAAGGAAGACGATGGTGATGGTGTCGCTTGGGATAGTGAA CAATACACCGTTCCAGAAGTGCAAAGAATCACAAGAATGGCCGCTTTCATGGCCCTACAA CATGAGCCACCATTGCCTATTTGGTCCTTGGATAAAGCTAATGTTTTGGCCTCTTCAAGA TTATGGAGAAAAACTGTGGAGGAAACCATCAAGAACGAATTCCCTACATTGAAGGTTCAA CATCAATTGATTGATTCTGCCGCCATGATCCTAGTTAAGAACCCAACCCACCTAAATGGT ATTATAATCACCAGCAACATGTTTGGTGATATCATCTCCGATGAAGCCTCCGTTATCCCA GGTTCCTTGGGTTTGTTGCCATCTGCGTCCTTGGCCTCTTTGCCAGACAAGAACACCGCA TTTGGTTTGTACGAACCATGCCACGGTTCTGCTCCAGATTTGCCAAAGAATAAGGTCAAC CCTATCGCCACTATCTTGTCTGCTGCAATGATGTTGAAATTGTCATTGAACTTGCCTGAA GAAGGTAAGGCCATTGAAGATGCAGTTAAAAAGGTTTTGGATGCAGGTATCAGAACTGGT GATTTAGGTGGTTCCAACAGTACCACGGAAGTCGGTGATGCTGTCGCCGAAGAAGTTAAG AAAATCCTTGCTTAA >NFS1 1494 residues Pha 0 Code 0 ATGTTGAAATCAACTGCTACAAGATCGATAACAAGATTATCTCAAGTTTACAACGTTCCA GCGGCCACATATAGGGCTTGTTTGGTAAGCAGGAGATTCTATTCCCCTCCTGCAGCAGGC GTGAAGTTAGACGACAACTTCTCTCTGGAAACGCATACCGATATTCAGGCTGCTGCAAAG GCACAGGCTAGTGCCCGTGCGAGTGCATCCGGTACCACCCCAGATGCTGTAGTAGCTTCT GGTAGCACTGCAATGAGCCATGCTTATCAAGAAAACACAGGTTTTGGTACTCGTCCCATA TATCTTGACATGCAAGCCACTACACCAACAGACCCTAGGGTTTTGGATACGATGTTGAAG TTTTATACGGGACTTTATGGTAATCCTCATTCCAACACTCACTCTTACGGTTGGGAAACA AATACTGCTGTGGAAAATGCTAGAGCTTACGTAGCAAAGATGATCAATGCCGACCCCAAG GAAATAATATTCACTTCGGGAGCGACCGAATCTAATAATATGGTTCTTAAGGGTGTCCCA AGATTTTATAAGAAGACTAAGAAACACATCATCACCACTAGAACGGAACACAAGTGTGTC TTGGAAGCCGCACGGGCCATGATGAAGGAGGGATTTGAAGTCACTTTCCTAAATGTGGAC GATCAAGGTCTTATCGATTTGAAGGAATTGGAAGATGCCATTAGACCAGATACCTGTCTC GTCTCTGTGATGGCTGTCAATAATGAAATCGGTGTCATTCAACCTATTAAAGAAATTGGT GCAATTTGTAGAAAGAATAAGATCTACTTTCATACTGACGCCGCACAAGCCTATGGTAAG ATTCACATTGATGTCAATGAAATGAACATTGATTTACTATCAATTTCTTCTCACAAGATT TACGGTCCAAAGGGAATAGGTGCCATCTATGTAAGAAGGAGACCAAGAGTTAGATTAGAA CCTTTACTATCCGGTGGTGGCCAAGAGAGAGGATTGAGATCTGGTACTTTGGCCCCCCCA TTGGTAGCGGGATTTGGTGAAGCTGCGAGATTGATGAAGAAAGAATTTGACAACGACCAA GCTCACATCAAAAGACTATCCGATAAATTAGTCAAAGGTCTATTATCCGCTGAACATACC ACGTTGAACGGATCTCCAGATCATCGTTATCCAGGGTGTGTTAACGTTTCTTTCGCCTAC GTGGAAGGAGAATCTTTATTGATGGCACTAAGGGATATCGCATTATCCTCGGGTTCAGCC TGTACATCTGCTTCCCTAGAACCTTCTTATGTTTTACATGCGCTGGGTAAGGATGATGCA TTAGCCCATTCTTCCATCAGATTTGGTATTGGTAGATTTAGTACTGAAGAGGAGGTCGAC TACGTCGTTAAGGCCGTTTCTGACAGAGTAAAATTCTTGAGGGAACTTTCACCATTATGG GAAATGGTTCAAGAAGGTATTGACTTAAACTCCATCAAATGGTCAGGTCATTGA >BUD3 4104 residues Pha 0 Code 0 ATGGAGAAAGACCTGTCGTCTCTTTACTCTGAAAAGAAAGACAAAGAGAACGATGAAACC TTATTTAACATCAAACTATCCAAATCTGTTGTCGAGACCACACCGCTAAATGGTCATTCA TTGTTTGATGATGATAAATCACTTTCAGACTGGACGGATAATGTGTTCACTCAATCAGTA TTCTATCACGGGTCAGATGACTTGATATGGGGGAAGTTCTTTGTCTGCGTGTACAAGTCC CCCAACAGCAATAAGTTGAACGCTATAATATTCGACAAATTAGGAACATCATGCTTCGAA TCCGTCGATATATCTTCCAACTCGCAATACTATCCGGCCATTGAGAATTTGAGTCCAAGT GATCAGGAAAGCAATGTTAAGAAATGCATTGCTGTCATTCTGTTACAGCGCTATCCATTA CTTTCACCATCAGACTTATCACAAATATTGTCCAATAAATCGGAAAATTGCGACTATGAC CCCCCTTATGCTGGAGATTTGGCTAGTAGTTGCCAGTTGATAACAGCAGTTCCTCCAGAA GATCTGGGGAAGCGCTTCTTTACATCAGGACTTCTGCAAAATAGATTTGTCAGCTCTACC CTGTTAGATGTTATTTATGAAAACAATGAATCCACCATCGAACTAAATAATAGGTTGGTA TTCCATCTGGGTGAACAACTTGAACAACTTTTTAACCCAGTCACAGAATACTCACCGGAA CAGACAGAATATGGTTATAAGGCGCCAGAGGACGAATTACCCACAGAATCGGATGATGAT CTTGTCAAGGCCATTTGCAACGAGTTATTACAACTACAAACAAATTTTACTTTCAATTTG GTAGAATTTTTGCCAAAATTCCTGATCGCCTTGAGAGTCAGAGTACTCAATGAAGAAATT AATGGGTTATCCACAACCAAATTAAATCGACTCTTCCCACCTACAATAGATGAAGTCACA AGAATCAATTGTATTTTTCTAGACTCGCTAAAGACAGCAATCCCTTACGGTTCCCTCGAA GTACTGAAGGCATGCAGCATTACTATTCCTTATTTCTACAAAGCATATACAAGACACGAG GCGGCCACAAAGAACTTCAGCAAAGATATTAAATTGTTTATTAGGCATTTCAGCAATGTA ATTCCAGAAAGAGAGGTCTACACGGAAATGAAAATCGAGAGTATAATTAAGGGACCTCAG GAAAAACTACTGAAGCTAAACTTAATTATAGAGAGATTGTGGAAGTCGAAAAAATGGAGA CCGAAAAATCAAGAAATGGCAAAAAAATGCTACAACAATATCATTGATGTCATTGATTCG TTTGGAAAATTAGATTCCCCACTTCATTCTTATAGTACCAGAGTATTTACTCCATCGGGA AAAATCCTTACAGAATTAGCCAAATGCTGGCCCGTAGAACTGCAATACAAATGGCTGAAG AGAAGGGTAGTCGGTGTGTATGATGTAGTGGATTTGAATGATGAAAATAAGAGAAATTTA TTAGTCATATTCAGTGATTATGTGGTTTTCATCAATATACTGGAGGCAGAAAGTTACTAC ACTTCAGATGGATCAAACAGGCCCTTAATCTCAGATATTTTAATGAACTCATTGATCAAC GAAGTTCCGTTGCCCTCCAAGATCCCTAAGTTGAAAGTGGAGCGTCATTGCTATATAGAT GAGGTTCTAGTTTCTATATTAGACAAAAGCACTCTACGTTTTGATCGATTGAAGGGAAAA GATTCTTTCTCAATGGTATGTAAATTATCCTCTGCATTTATCTCTTCTTCGTCAGTTGCT GACTTGATTACGAAGGCTAGAATTTTGGAAAAAGACACTGCATTTCATTTATTTAAAGCT AGTAGAAGCCATTTTACATTATATTCTACTGCTCACGAGCTTTGCGCTTATGATTCCGAA AAAATAAAATCAAAATTTGCCTTATTCCTGAACATACCACCATCCAAGGAGATATTGGAG GTCAACAACCTTCATTTGGCTTTTTTTGCAAGATTTTGCAGTAACGATGGTAGAGATAAC ATCGTAATCTTAGACGTCTTAACCAAACATGACGATAAACATATAGAAGTTACATCCGAT AACATTGTTTTCACCATAATTAATCAATTGGCCATTGAAATACCGATATGCTTTTCTTCC TTAAACTCATCGATGGCCAAAGATTTACTCTGTGTAAATGAGAATTTGATAAAAAACTTA GAACATCAATTGGAAGAGGTCAAGCACCCTTCAACAGACGAACATAGGGCTGTTAATAGC AAACTTTCCGGTGCATCCGATTTCGATGCTACTCACGAGAAGAAAAGATCATACGGTACC ATAACAACATTTAGAAGCTATACAAGCGACTTGAAGGACAGTCCATCAGGCGATAATAGT AATGTCACCAAGGAAACTAAGGAAATTTTACCAGTGAAACCTACGAAAAAGTCTTCAAAA AAACCAAGAGAAATTCAAAAGAAGACCAAGACAAACGCCTCTAAAGCAGAGCACATAGAA AAGAAGAAGCCTAACAAAGGCAAAGGGTTTTTTGGCGTGTTAAAAAATGTTTTTGGAAGT AAAAGCAAGAGCAAGCCTTCACCAGTTCAAAGAGTGCCTAAAAAAATATCGCAGAGGCAT CCTAAGTCTCCAGTGAAGAAGCCAATGACCTCAGAAAAGAAATCCTCCCCTAAAAGGGCA GTCGTTTCATCTCCCAAAATTAAAAAGAAAAGTACTTCTTTTTCCACAAAAGAATCACAA ACTGCTAAATCTTCTCTTCGAGCAGTTGAATTCAAATCTGATGACTTGATCGGAAAACCA CCTGATGTTGGAAATGGCGCACATCCTCAAGAAAATACCAGAATATCTTCAGTAGTAAGG GATACAAAATATGTCTCCTACAATCCCTCTCAGCCTGTGACAGAAAATACCAGTAACGAA AAAAATGTCGAACCAAAAGCGGATCAATCCACAAAGCAGGATAACATTTCCAATTTTGCA GATGTAGAGGTATCTGCGTCTTCTTATCCTGAAAAACTTGATGCAGAAACAGATGATCAA ATAATTGGGAAGGCGACGAATTCGTCATCAGTTCATGGAAATAAAGAGCTGCCAGACCTT GCTGAGGTGACTACAGCAAATAGGGTTTCTACAACATCGGCTGGGGACCAACGTATTGAT ACCCAAAGCGAATTTTTACGTGCAGCTGATGTTGAAAACTTAAGTGATGACGATGAACAC AGACAGAATGAAAGTAGAGTTTTTAACGATGACCTCTTTGGTGATTTTATTCCTAAGCAT TACCGTAATAAACAGGAGAACATTAACAGCTCGAGTAATTTGTTTCCAGAGGGAAAGGTG CCCCAAGAAAAGGGCGTATCAAATGAAAACACTAACATATCTCTCAAAACTAATGAAGAT GCATCTACATTGACGCAGAAACTCTCTCCACAAGCGAGTAAAGTGCTGACAGAAAATTCT AATGAATTAAAAGATACCAACAATGAAGGGAAGGACGCAAAGGACATAAAATTAGGAGAT GATTACAGTGATAAAGAAACAGCGAAAGAAATAACTAAACCAAAAAATTTTGTTGAAGGA ATAACTGAACGGAAAGAAATATTCCCCACTATTCCTAGGTTAGCGCCGCCAGCTTCAAAA ATTAACTTTCAAAGGTCACCATCCTATATTGAGCTCTTTCAAGGAATGAGGGTGGTTTTA GATAAGCATGATGCCCATTATAACTGGAAACGCTTGGCTAGTCAAGTCTCCTTAAGTGAG GGACTAAAAGTCAATACTGAGGAAGATGCGGCAATTATAAATAAAAGTCAGGATGATGCC AAGGCGGAAAGAATGACTCAAATTTCTGAAGTGATTGAGTATGAAATGCAGCAACCTATC CCAACTTATTTGCCTAAGGCGCATCTAGATGACTCGGGTATTGAAAAAAGTGATGACAAA TTCTTCGAAATTGAAGAAGAACTTAAGGAAGAATTGAAGGGCAGCAAAACGGTAATGAAG ATGTCGGTAATAATAATCCATCCAATTCTATTCCAAAAATCGAGAAGCCCCCAGCATTCA AAGTTATTAGAACATCGCCTGTGA >GBP2 1284 residues Pha 0 Code 0 ATGGAGAGAGAGCTAGGGATGTATGGAAATGATAGGAGTAGATCAAGATCACCTGTACGT CGTCGTTTGAGCGACGACAGAGACAGGTACGATGATTATAACGATAGTAGCAGTAATAAT GGTAATGGCAGTCGTCGTCAGAGACGCGACCGAGGCTCCCGTTTCAATGATCGGTACGAT CAGAGTTATGGTGGCAGCCGCTACCACGATGATAGGAACTGGCCCCCTCGCCGAGGAGGC CGTGGCAGAGGAGGAAGCAGATCATTCAGAGGGGGACGCGGTGGCGGTAGGGGTCGTACT TTAGGTCCAATTGTTGAAAGAGACTTAGAAAGGCAATTTGACGCGACCAAGAGAAATTTT GAAAATAGTATCTTCGTGAGAAACTTGACTTTTGATTGTACCCCTGAAGACCTTAAGGAA TTGTTTGGTACAGTGGGCGAAGTTGTGGAGGCTGACATTATCACATCAAAGGGCCATCAC CGTGGTATGGGGACTGTGGAATTTACCAAAAACGAATCTGTCCAAGATGCCATATCGAAG TTTGATGGTGCCCTCTTTATGGACCGGAAACTAATGGTAAGACAGGATAATCCTCCTCCT GAAGCTGCCAAGGAATTTTCTAAGAAAGCTACTAGGGAAGAAATAGATAATGGGTTTGAA GTGTTCATCATCAATTTACCGTACTCTATGAATTGGCAATCCTTAAAAGATATGTTTAAA GAATGTGGTCATGTCTTGCGTGCCGATGTAGAATTGGATTTCAACGGATTTTCAAGAGGA TTCGGTTCTGTCATTTATCCTACTGAGGATGAAATGATTAGAGCTATCGATACATTCAAC GGCATGGAAGTAGAAGGTAGAGTTTTGGAAGTTAGAGAAGGGCGTTTCAACAAGAGAAAG AACAATGATCGTTATAATCAAAGGCGTGAGGACCTTGAAGATACCAGAGGTACTGAACCA GGTCTTGCGCAGGATGCCGCTGTCCACATTGATGAAACTGCAGCAAAATTTACTGAAGGT GTCAATCCAGGAGGGGATAGAAACTGTTTCATTTATTGTAGTAATTTACCATTCTCAACA GCAAGAAGCGATTTATTCGACTTGTTTGGGCCTATCGGCAAAATCAATAACGCGGAATTG AAACCACAGGAAAATGGTCAACCAACTGGTGTTGCTGTTGTAGAATATGAAAATTTAGTA GATGCAGATTTTTGTATTCAAAAATTAAATAATTATAATTATGGTGGTTGTAGTTTACAG ATCTCTTATGCTAGACGTGATTAA >ILV6 930 residues Pha 0 Code 0 ATGCTGAGATCGTTATTGCAAAGCGGCCACCGCAGGGTGGTTGCTTCTTCATGTGCTACC ATGGTGCGTTGCAGTTCCTCGTCGACCTCCGCGTTGGCGTACAAGCAGATGCACAGACAC GCAACAAGACCTCCCTTGCCCACACTAGACACTCCTTCCTGGAATGCCAACAGTGCCGTT TCATCCATCATTTACGAAACACCAGCGCCTTCTCGTCAACCAAGAAAACAGCATGTCTTG AACTGTTTGGTGCAAAACGAACCCGGTGTCTTGTCCAGAGTCTCGGGTACGTTAGCTGCC AGAGGCTTTAACATCGATTCGTTGGTCGTGTGCAACACCGAGGTCAAAGACCTAAGTAGA ATGACCATTGTTTTGCAAGGGCAAGATGGCGTAGTCGAACAAGCACGCAGACAAATCGAA GACTTGGTCCCCGTCTACGCCGTCCTAGACTATACCAATTCTGAGATCATCAAAAGAGAG CTAGTGATGGCCAGAATCTCTCTATTGGGTACTGAATACTTCGAAGACCTACTATTGCAC CACCACACTTCCACCAATGCTGGCGCCGCTGACTCCCAAGAATTGGTCGCCGAAATCAGA GAAAAGCAATTCCACCCTGCCAACTTGCCCGCCAGTGAGGTATTAAGGTTGAAGCACGAG CATTTGAACGATATCACCAACTTGACCAACAACTTTGGAGGTCGTGTCGTCGACATCAGC GAAACAAGCTGTATTGTGGAATTGTCTGCAAAACCCACACGTATCTCTGCCTTCTTGAAG TTGGTCGAGCCATTCGGTGTCCTAGAGTGTGCAAGAAGCGGTATGATGGCATTGCCAAGA ACTCCTTTGAAGACAAGCACCGAGGAAGCTGCCGACGAAGACGAAAAGATCAGCGAAATC GTCGACATTTCCCAACTACCACCTGGTTAG >CWH36 393 residues Pha 0 Code 0 ATGGAGCTGGCAAAGGAACGTAATGGCCCACATCAAAAACATCATGGCCAATGTCAAAAT CACTGTACTTCTCCAAACACTGTACGACAAAACAAAACAAACAAACTCTTGTTAGTAAAA AAGAAAGGGAAACTAGTAATATGGAGACACATCGTAAAAAAAATGTTGCACATACGCTTG GTTGTTCTTTGGAGCCATTATCCAGAACAGCACGGACATGGCACTAACCACTATGAATAC ACCAACAACAGTATAGCTAAATTGGACGCGCAGAGAGTTAGTAGAAGAAGAAGGAAGAAA AGGGAAGCGGAGAGAAGAGATTATGACACATACAAACTACTCATTACTCTTTGTTCTTTA TTATTCGTTGGACCTTTGTTTCTTAAAGTATAG >PEL1 1251 residues Pha 0 Code 0 ATGACGACTCGTTTGCTCCAACTCACTCGTCCTCATTACAGATTATTATCCCTACCTCTC CAGAAACCCTTCAATATAAAAAGGCAGATGTCCGCTGCGAACCCTTCTCCATTTGGCAAT TATTTGAACACGATCACTAAGTCCCTACAACAGAATTTACAAACATGCTTTCATTTCCAA GCAAAAGAAATCGATATAATCGAATCTCCATCTCAGTTTTACGATCTCTTGAAGACAAAA ATACTTAATTCACAAAATAGAATATTCATTGCGTCTCTGTATTTAGGCAAAAGCGAGACT GAGTTGGTGGACTGCATATCCCAGGCATTGACCAAGAACCCCAAGTTGAAAGTTTCTTTT CTACTTGATGGCCTTCGAGGAACAAGAGAATTGCCTTCCGCCTGTTCCGCCACTTTATTA TCGTCTTTAGTAGCCAAATATGGGTCAGAGAGAGTGGATTGCCGATTGTACAAGACGCCT GCTTATCATGGTTGGAAAAAAGTCTTGGTTCCCAAGAGATTTAATGAAGGTTTAGGCTTA CAACATATGAAAATATATGGGTTTGATAACGAGGTCATTCTTTCGGGAGCCAACCTTTCG AACGACTATTTCACCAACAGACAAGATAGATACTATCTCTTTAAATCTCGAAACTTCTCC AACTATTATTTTAAATTACATCAACTCATAAGTTCCTTCAGTTATCAGATTATAAAGCCA ATGGTGGATGGTAGCATCAACATCATTTGGCCAGATTCGAATCCTACTGTTGAACCGACG AAAAATAAAAGGCTGTTTTTAAGGGAAGCATCTCAATTACTAGATGGCTTTTTAAAGAGT TCTAAACAAAGCCTCCCGATTACTGCCGTGGGTCAATTCTCCACATTAGTTTACCCAATT TCTCAATTCACTCCACTTTTTCCCAAATATAATGACAAATCGACCGAAAAAAGAACAATA TTGTCATTGCTTTCCACTATAACAAGCAATGCCATTTCTTGGACGTTCACTGCAGGATAC TTCAATATTTTGCCAGACATCAAAGCAAAACTGCTGGCAACGCCGGTTGCTGAGGCAAAT GTAATAACAGCTTCCCCCTTTGCAAACGGCTTTTACCAATCAAAGGGCGTCTCATCAAAT TTACCTGGTGCTTACTTGTACCTGTCAAAAAAATTTCTACAAGATGTATGTAGGTACAGA CAAGATCATGCTATTACCATTAAGAGAATGGCAAAGAGGCGTAGTAAATAA >RER1 567 residues Pha 0 Code 0 ATGGATTACGATAGCTCTGATACAATGAACGGTGGTTCAAGTAACCCCTTAATCACTAAG ATGAATACAATGAAATTATTATATCAACACTATTTGGATAAAGTCACTCCTCACGCTAAG GAGAGGTGGGCTGTATTGGGTGGTTTGTTATGTTTGTTTATGGTTCGTATTACAATGGCC GAAGGCTGGTATGTGATTTGTTATGGTCTAGGTCTATTTTTATTGAATCAATTTTTAGCC TTTTTGACCCCAAAATTCGATATGTCCTTACAGCAAGATGAAGAAAACAACGAATTGGAA GCTGGAGAAAAATCAGAAGAATTCCGTCCATTCATCAGAAGATTACCAGAGTTCAAATTC TGGTATAACAGCATTAGAGCCACTGTCATTTCCCTCTTGTTGTCGCTATTTTCAATCTTC GATATTCCAGTATTTTGGCCCATCTTATTGATGTATTTCATATTATTGTTTTTTTTAACT ATGAGAAGGCAGATTCAACATATGATAAAATATAGATATATACCCTTAGATATCGGTAAG AAGAAATATTCTCATTCTTCTAACTGA >CDC10 969 residues Pha 0 Code 0 ATGGATCCTCTCAGCTCAGTACAGCCTGCTTCTTATGTTGGTTTTGATACCATCACGAAT CAGATCGAACATCGTCTGTTGAAGAAAGGTTTTCAATTTAATATAATGGTTGTTGGCCAA TCCGGATTGGGTAAAAGTACTCTAATAAATACGTTATTTGCCTCACATTTGATTGATTCT GCTACTGGTGATGATATTTCTGCCCTGCCTGTTACAAAAACAACTGAAATGAAAATTTCT ACTCATACTCTTGTGGAGGACCGCGTTCGCTTGAATATTAATGTTATAGATACACCTGGA TTTGGTGACTTTATTGACAATTCTAAAGCTTGGGAGCCTATTGTGAAGTACATTAAGGAA CAACATTCTCAATACTTACGTAAAGAATTGACAGCCCAACGTGAAAGGTTTATTACTGAT ACAAGAGTTCATGCAATTCTTTATTTCCTGCAACCAAATGGAAAGGAGTTGAGCCGCCTT GACGTTGAAGCCTTGAAAAGATTGACAGAAATAGCAAATGTTATACCAGTTATTGGCAAG TCGGATACATTGACTTTAGATGAAAGAACGGAGTTTAGGGAGCTTATTCAAAATGAATTC GAAAAATACAATTTCAAGATTTATCCTTATGATTCGGAAGAACTAACTGACGAGGAATTA GAACTAAACAGAAGTGTTAGATCTATCATTCCGTTTGCAGTGGTTGGTTCTGAGAATGAG ATTGAAATAAACGGTGAAACCTTCAGGGGAAGAAAAACTCGTTGGAGCGCTATTAATGTT GAGGATATCAACCAGTGTGATTTTGTATATTTAAGGGAATTTTTGATTCGAACTCATCTC CAAGACTTAATCGAAACAACTTCCTACATTCATTATGAAGGGTTCAGAGCAAGACAATTA ATTGCCTTGAAAGAAAATGCGAATAGTCGTTCCTCAGCTCATATGTCTAGCAACGCCATT CAACGTTGA >MRPL32 552 residues Pha 0 Code 0 ATGAATTCTTTGATTTTTGGTAAACAATTAGCATTTCACAAAATTGTGCCTACCACTGCA ATTGGGTGGTTGGTACCGCTAGGAAATCCTTCACTGCAGATTCCAGGCCAAAAACAACTG GGATCTATCCACCGTTGGTTGAGAGAAAAGCTACAACAAGATCATAAGGACACTGAAGAT AAAGATTTTTTCTCTAATAATGGTATTCTACTAGCAGTTCCTAAAAAAAAAGTATCACAC CAAAAAAAAAGGCAAAAACTTTACGGTCCAGGTAAGAAGCAATTGAAGATGATTCACCAT TTGAATAAGTGCCCATCATGCGGCCATTATAAGAGAGCCAATACACTGTGTATGTATTGT GTTGGACAAATAAGTCATATATGGAAAACGCATACCGCTAAAGAAGAAATTAAGCCGAGA CAAGAGGAGGAACTTTCCGAACTAGACCAAAGAGTCCTATATCCTGGTAGAAGAGATACC AAATATACCAAGGATTTGAAAGATAAAGATAACTATTTGGAACGTCGCGTTCGGACTTTA AAAAAGGACTAG >YCP4 744 residues Pha 0 Code 0 ATGGTAAAGATTGCGATAATTACTTACTCTACCTACGGGCACATAGACGTTTTAGCCCAA GCTGTTAAGAAAGGTGTGGAGGCAGCTGGTGGTAAAGCTGATATATACAGGGTCGAGGAA ACTTTACCTGATGAAGTCCTCACCAAGATGAACGCTCCTCAGAAACCTGAAGATATTCCT GTTGCCACTGAGAAAACGTTGCTCGAATATGACGCCTTTTTGTTCGGTGTTCCAACTAGG TTTGGTAATTTGCCGGCTCAATGGTCCGCCTTTTGGGATAAAACCGGTGGATTATGGGCC AAGGGCTCTTTGAACGGCAAAGCTGCGGGGATATTCGTTAGTACTTCCAGTTACGGAGGT GGTCAAGAAAGTACCGTTAAAGCCTGTTTGTCTTATTTAGCTCATCACGGAATTATCTTT TTACCACTGGGTTATAAGAATTCATTTGCTGAGTTAGCCAGTATAGAAGAGGTACACGGT GGCTCTCCATGGGGTGCTGGTACCCTTGCAGGACCTGACGGCTCAAGAACTGCGTCTCCA CTTGAATTGAGAATTGCTGAAATTCAAGGTAAAACATTCTACGAAACCGCCAAAAAACTT TTCCCTGCAAAAGAAGCCAAGCCCTCCACTGAAAAGAAGACCACTACTTCTGATGCGGCT AAGAGACAAACTAAACCTGCAGCAGCTACAACTGCAGAAAAGAAGGAGGACAAAGGATTA TTATCCTGCTGTACTGTCATGTAA >CIT2 1383 residues Pha 0 Code 0 ATGACAGTTCCTTATCTAAATTCAAACAGAAATGTTGCATCATATTTACAATCAAATTCA AGCCAAGAAAAGACTCTAAAAGAGAGATTTAGCGAAATCTACCCCATCCATGCTCAAGAT GTAAGGCAATTCGTTAAAGAGCATGGCAAAACTAAAATTAGCGATGTTCTATTAGAACAG GTATATGGTGGTATGAGAGGTATTCCAGGGAGCGTATGGGAAGGTTCCGTTTTGGACCCA GAAGACGGTATTCGTTTCAGAGGTCGTACGATCGCCGACATTCAAAAGGACCTGCCCAAG GCAAAAGGAAGCTCACAACCACTACCAGAAGCTCTCTTTTGGTTATTGCTAACTGGCGAG GTTCCAACTCAAGCGCAAGTTGAAAACTTATCAGCTGATCTAATGTCAAGATCGGAACTA CCTAGTCATGTCGTTCAACTTTTGGATAATTTACCAAAGGACTTACACCCAATGGCTCAA TTCTCTATTGCTGTAACTGCCTTGGAAAGCGAGTCAAAGTTTGCTAAGGCTTATGCTCAA GGAATTTCCAAGCAAGATTATTGGAGTTATACTTTTGAAGATTCACTAGACTTGCTGGGT AAATTGCCAGTTATTGCAGCTAAAATTTATCGTAATGTATTCAAAGATGGCAAAATGGGT GAAGTGGACCCAAATGCCGATTATGCTAAAAATCTGGTCAACTTGATTGGTTCTAAGGAT GAAGATTTCGTGGACTTGATGAGACTTTATTTAACCATTCATTCGGATCACGAAGGTGGT AATGTATCTGCACATACATCCCATCTTGTGGGCTCAGCACTATCATCACCTTATCTGTCC CTTGCATCAGGTTTGAACGGGTTGGCTGGCCCACTTCATGGGCGTGCTAATCAAGAAGTA CTAGAATGGTTATTTGCACTTAAAGAAGAGGTAAATGATGACTACTCTAAAGATACGATC GAAAAATATTTATGGGATACTCTAAACTCAGGAAGAGTCATTCCCGGTTATGGTCATGCT GTGCTAAGGAAAACTGATCCTCGTTATATGGCTCAGCGTAAGTTTGCCATGGACCATTTT CCAGATTATGAATTATTCAAGTTAGTTTCATCAATATACGAGGTAGCACCTGGCGTATTG ACTGAACATGGTAAAACTAAAAATCCATGGCCAAATGTAGATGCTCACTCTGGTGTCTTA TTACAATATTATGGACTAAAAGAATCTTCTTTCTATACCGTTTTATTTGGCGTTTCAAGG GCATTTGGTATTCTTGCTCAATTGATCACTGATAGGGCCATCGGTGCTTCCATTGAAAGG CCAAAGTCCTATTCTACTGAGAAATACAAGGAATTGGTCAAAAACATTGAAAGCAAACTA TAG >YCP7 720 residues Pha 0 Code 0 ATGCAGCCTCATTTAGACAACAACAGTAATAATGACGATGTCAAATTGGATACATTAGGG GAACAAAATGTGTTATCATCCGCAGAAAATATCACTTTACCTGAAGACACCTTTAAATCA TATATGACCTACTTGCTGTACGAGATGGCTCATTACAAACCGATGATATTTTCCTTCTTG GCACTTTCAGTTTCAATTTTAATAGTTGTGATCTTTCATAATGTTAAAGCTTGTGATGTC GTTTTTGGTTTTTCAATTTTCGTCACTTCTATTTTGTTTTTGTCTACGTTGATTCCGTTT AATGTGTATATCTCGGATGAGGGTTTCAGAATTAAGCTTTTGCTGGAAGTTATCACCCAC AGGCCAGCGGTAAAGGGAAAAGAATGGAGAGCAATCACAGACAATATGAATCAATATTTA CTTGATAATGGTTTATGGAGTACTCGCTATTACTTTTATAGTAGTGAAAGATGCTACAAA TTCTTCAGATTTCTTGTGAAAGAAAAACCCCCAGGTGTGAATGTAAATTCATCGGTAAAG GACGCCACAAGTACGCAGATAGATGCACCAGCAAATGAGGCTTCAAATGAGGTAATAAAA TGCTTTAGTTTCAGTTCTGACCCAATATTCGAAGCATACTTTGTTAAAGCAGTAGAAGTT GAGAAACAAGCACAACAGGAATATTGGAGAAAGCAATATCCTGACGCCGATATACCATGA >SAT4 1812 residues Pha 0 Code 0 ATGACTGGTATGAATGATAATAATGCCGCTATTCCTCAGCAAACTCCAAGGAAACATGCG CTATCTTCTAAAGTTATGCAACTTTTTAGAAGCGGTTCAAGATCATCTAGGCAGGGAAAG GCCTCATCGAATATCCAGCCACCTTCTAATATAAACACAAACGTTCCATCGGCGTCTAAA TCAGCCAAATTTGGTTTACATACCCCAACCACTGCTACTCCTAGGGTAGTTTCTAATCCT TCTAATACTGCAGGTGTGAGTAAACCGGGCATGTATATGCCCGAATATTACCAGTCGGCA TCACCATCGCACTCTAGTTCATCCGCATCATTAAACAACCATATTGATATTAACACCTCT AAGTCATCATCAGCTGCTTCTTTAACTTCGTCAGTATCAGCTTTATCCTTATCACCCACA TCAGCCATAAATATTAGCTCCAAAAGTTTGAGCCCAAAGTTCTCTCATCATAGTAACAGC AATACTGCTATTACACCCGCGCCTACTCCCACTGCTTCAAATATTAATAATGTAAATAAG ATAACCAATACAAGTGCACCTATTTGTGGGAGGTTTCTTGTGCATAAAGATGGTACCCAT GAACATCACTTAAAAAATGCTAAGAGACAAGAAAAGCTAAGCACAATGATTAAAAACATG GTTGGTGCGAGCAAATTACGTGGTGAGGCAAAATCTGCTGTCCCTGATATAATAATGGAT CCAAAGACGACTTTAAAATCCAACAAGAATCCTCCTACTCTTTTTGCAGGCTTCATGAAG CAGGTCGTGGATATGGATGATAAATATCCAGAAGGCGCTCCCACAAGTGGCGCTTTAAAT TGTCCTGAAAGGGATATATACAGGTCAGATCAAAAAGATTCCAAAAATAATACGCATAAT ATCACTACTACTAAAAAAGATAGGCAATGTTTTGCCGAAAAGTATGGTCGCTGTCAAGAA GTCCTTGGTAAAGGTGCTTTTGGTGTAGTAAGAATATGTCAAAAGAAAAATGTTTCTTCT CAAGATGGTAATAAAAGTGAAAAGCTTTATGCAGTGAAAGAGTTCAAGCGTAGAACATCC GAATCAGCAGAAAAGTATTCTAAGAGGTTGACTTCTGAATTTTGCATTTCTTCTTCATTA CACCATACAAATATTGTTACTACACTAGATCTTTTCCAAGATGCCAAAGGCGAGTACTGT GAAGTAATGGAATATTGTGCAGGTGGCGATCTATTCACTTTGGTCGTTGCCGCCGGAAAA TTAGAATATATGGAAGCAGATTGTTTCTTCAAGCAGCTTATTAGAGGTGTTGTTTATATG CATGAAATGGGTGTTTGTCATAGAGATTTGAAGCCTGAGAACTTACTGCTTACGCACGAT GGTGTGCTAAAAATTACAGACTTTGGTAACAGCGAATGTTTCAAGATGGCATGGGAAAAA AATATTCACCTTAGTGGAGGCGTTTGCGGTTCATCGCCGTACATCGCCCCAGAGGAATAT ATCAAAGAAGAGTTTGATCCAAGACCCGTAGATATATGGGCATGTGGTGTCATTTATATG GCAATGAGAACTGGTAGACAATTGTGGAGTTCTGCTGAAAAAGACGATCCATTTTATATG AATTATTTAAAAGGACGTAAGGAAAAGGGAGGCTATGAGCCAATCGAAAGTTTAAAAAGA GCCAGGTGTAGGAATGTTATATATTCGATGTTAGATCCCGTTCCGTACAGAAGAATTAAC GGGAAACAAATTTTGAACAGTGAATGGGGAAGGGAGATAAAATGCTGCCATAATGGGCGC GCATTGAAATAA >RVS161 798 residues Pha 0 Code 0 ATGAGTTGGGAAGGTTTTAAGAAAGCTATCAACAGAGCTGGTCACAGTGTGATAATTAAG AATGTCGACAAGACCATTGATAAAGAGTATGACATGGAAGAACGTCGTTATAAAGTTCTT CAAAGAGCAGGTGAGGCATTACAAAAGGAAGCCAAAGGTTTCTTGGACTCATTGAGAGCT GTGACAGCATCACAGACTACCATTGCCGAGGTCATCTCTAACCTCTATGACGATTCAAAA TATGTTGCTGGTGGTGGTTACAACGTTGGTAACTATTATTTGCAATGTGTTCAAGATTTT GATAGCGAAACTGTTAAGCAATTAGACGGGCCCTTAAGAGAAACCGTACTAGATCCAATA ACAAAGTTTTCGACGTATTTCAAAGAAATTGAGGAGGCCATAAAAAAGAGAGACCATAAG AAACAAGACTTCGATGCTGCGAAGGCAAAAGTTCGTAGATTAGTGGACAAACCTGCTAAA GATGCCTCTAAACTGCCAAGGGCTGAAAAAGAATTGAGCTTAGCTAAAGATATTTTCGAA AATCTTAATAACCAATTGAAAACTGAACTACCACAGTTAGTTTCATTAAGAGTACCTTAC TTTGACCCAAGTTTTGAAGCTTTAATCAAGATTCAGCTAAGGTTCTGTACTGATGGTTAC ACTCGTTTAGCGCAGATTCAACAATATTTGGACCAACAATCAAGAGACGACTATGCCAAT GGGTTATTAGACACTAAAATCGAAGAACTATTAGGACAAATGACAAGCCTAGATATTTGT GCGCTCGGGATAAAATAA >YCQ0 852 residues Pha 0 Code 0 ATGTCTGACAAGGAACAAACGAGCGGAAACACAGATTTGGAGAATGCACCAGCAGGATAC TATAGTTCCCATGATAACGACGTTAATGGCGTTGCAGAAGATGAACGTCCATCTCATGAT TCGTTGGGCAAGATTTACACTGGAGGTGATAACAATGAATATATCTATATTGGGCGTCAA AAGTTTTTGAAGAGCGACTTATACCAAGCCTTTGGTGGTACCTTGAATCCAGGGTTAGCT CCTGCTCCAGTGCACAAATTTGCTAATCCTGCGCCCTTAGGTCTTTCAGCCTTCGCGTTG ACGACATTTGTGCTGTCCATGTTCAATGCGAGAGCGCAAGGGATCACTGTTCCTAATGTT GTCGTCGGTTGTGCTATGTTTTATGGTGGTTTGGTGCAATTGATTGCTGGTATTTGGGAG ATAGCTTTGGAAAATACTTTTGGTGGTACCGCATTATGTTCTTACGGTGGGTTTTGGTTG AGTTTCGCTGCAATTTACATTCCTTGGTTTGGTATCTTGGAAGCTTACGAAGACAATGAA TCTGATTTGAATAATGCTTTAGGATTTTATTTGTTGGGGTGGGCCATCTTTACGTTTGGT TTAACCGTTTGTACCATGAAATCCACTGTTATGTTCTTTTTGTTGTTCTTCTTACTAGCA TTAACTTTCCTACTGTTGTCTATTGGTCACTTTGCTAATAGACTTGGTGTCACAAGAGCT GGTGGTGTCCTGGGAGTTGTTGTTGCTTTCATTGCTTGGTACAACGCATATGCAGGTGTT GCTACAAAGCAGAATTCATATGTACTGGCTCGTCCATTCCCATTACCATCTACTGAAAGG GTAATCTTTTAA >ADP1 3150 residues Pha 0 Code 0 ATGGGAAGTCATCGACGTTATCTCTACTATAGTATATTATCATTTCTATTATTATCCTGC TCAGTGGTACTTGCAAAACAAGATGAGACCCCATTCTTTGAAGGTACTTCTTCGAAAAAT TCGCGTCTAACTGCACAAGATAAGGGCAATGATACGTGCCCGCCATGTTTTAATTGTATG CTACCTATTTTTGAATGCAAACAGTTTTCTGAATGCAATTCGTACACTGGTAGATGTGAG TGTATAGAAGGGTTTGCAGGTGATGATTGCTCTCTGCCCCTCTGTGGCGGTCTATCACCG GATGAAAGCGGTAATAAGGATCGTCCCATAAGAGCACAAAATGACACCTGTCATTGTGAT AACGGATGGGGAGGGATCAATTGTGACGTTTGTCAAGAAGATTTTGTCTGTGATGCGTTC ATGCCTGATCCTAGTATTAAGGGGACATGTTATAAGAATGGTATGATTGTAGATAAAGTA TTTTCAGGTTGTAATGTGACCAATGAGAAAATTCTACAGATTTTGAACGGCAAAATACCA CAAATTACATTTGCCTGTGATAAACCTAATCAAGAATGTAATTTTCAGTTTTGGATAGAT CAGTTAGAAAGCTTCTATTGTGGCTTAAGTGATTGTGCCTTTGAATACGACTTGGAACAG AATACCTCCCATTATAAGTGTAATGACGTTCAATGCAAATGCGTTCCCGACACTGTGTTG TGTGGTGCTAAGGGGTCTATAGATATCTCGGATTTCCTGACAGAGACAATAAAAGGGCCA GGAGATTTCAGCTGTGATTTAGAAACAAGGCAATGTAAATTCAGTGAGCCTTCTATGAAT GATTTGATATTGACCGTGTTTGGTGACCCTTATATTACTTTGAAGTGTGAATCCGGTGAA TGTGTTCATTATAGTGAGATTCCAGGTTACAAATCTCCTTCAAAAGATCCAACAGTGTCA TGGCAAGGGAAATTGGTGTTGGCATTGACTGCTGTGATGGTCCTGGCACTTTTTACATTT GCTACCTTTTACATTTCTAAATCTCCGTTATTCAGAAATGGATTGGGTTCCTCAAAGTCT CCCATTCGTTTGCCAGATGAAGATGCGGTGAATAATTTCTTACAAAATGAAGATGACACA CTGGCGACATTAAGTTTTGAAAATATCACTTATAGTGTCCCCTCGATAAATTCAGATGGT GTTGAAGAAACTGTGCTGAATGAAATAAGTGGTATCGTGAAGCCCGGCCAAATATTAGCT ATCATGGGTGGATCTGGTGCGGGTAAAACTACTTTATTAGATATCCTAGCAATGAAACGG AAAACAGGTCACGTTTCGGGTTCCATAAAAGTTAACGGTATTAGTATGGACCGTAAATCT TTCTCGAAAATAATCGGGTTCGTCGATCAAGATGACTTTTTGCTGCCCACTTTGACTGTT TTTGAAACCGTATTAAATAGTGCGCTGTTAAGATTGCCAAAAGCATTGTCATTCGAGGCC AAGAAGGCAAGAGTTTATAAGGTGTTGGAAGAACTAAGAATTATTGATATCAAAGATCGT ATTATTGGTAATGAATTTGATCGTGGTATTAGTGGAGGTGAAAAACGCCGAGTTTCCATT GCATGTGAATTAGTGACATCTCCATTGGTTTTATTTTTGGATGAACCTACATCTGGTTTA GATGCTAGTAATGCCAATAATGTTATTGAATGTTTGGTAAGGTTATCCAGCGACTATAAC AGGACATTGGTGCTATCTATTCATCAGCCAAGATCAAATATATTTTATTTATTCGATAAA TTGGTCCTGTTAAGTAAAGGTGAGATGGTCTATTCCGGAAATGCCAAAAAAGTGTCAGAA TTTTTGAGAAATGAGGGATATATCTGTCCGGACAACTATAATATTGCTGATTATTTGATT GATATTACTTTTGAAGCCGGTCCTCAGGGGAAAAGGAGAAGAATCAGAAACATTTCCGAT TTAGAAGCTGGTACGGATACTAACGATATTGATAATACGATACACCAAACAACATTTACT AGCAGTGATGGTACAACACAGAGAGAGTGGGCTCATCTTGCAGCTCATAGAGATGAGATC AGATCTTTACTCAGAGATGAAGAAGATGTAGAGGGAACAGATGGAAGGCGAGGTGCTACT GAGATTGACTTAAATACCAAACTACTACACGATAAATATAAAGATAGCGTCTATTATGCA GAGCTTTCACAGGAGATCGAGGAAGTTTTAAGCGAAGGTGATGAGGAAAGTAACGTTTTG AATGGAGATTTACCCACAGGTCAACAATCTGCTGGTTTTCTGCAACAGTTATCGATATTG AATTCAAGAAGTTTTAAAAACATGTACAGAAACCCTAAACTATTATTGGGTAATTATTTA CTGACGATCCTATTGAGTTTATTCTTGGGAACACTATATTACAACGTCTCCAATGATATC AGCGGTTTTCAGAACAGAATGGGGCTGTTCTTCTTTATACTAACGTACTTCGGTTTTGTT ACATTCACAGGTCTCAGCTCGTTCGCTCTGGAAAGGATCATTTTCATAAAAGAAAGATCC AATAACTATTACTCGCCACTTGCATACTACATTAGTAAGATAATGAGCGAAGTGGTCCCG CTACGTGTTGTACCACCTATACTCTTGTCATTGATTGTTTACCCAATGACTGGTTTAAAC ATGAAAGACAATGCTTTTTTTAAATGTATTGGAATCCTTATACTGTTTAACCTTGGGATA TCGTTGGAAATCCTAACCATCGGCATAATTTTTGAAGACTTGAATAACTCCATAATATTA AGCGTGCTGGTGCTTTTGGGCTCACTACTGTTTAGCGGACTATTTATCAATACTAAGAAT ATTACAAACGTGGCCTTCAAGTACCTGAAAAACTTCTCTGTGTTTTACTACGCCTACGAA TCTTTATTGATCAATGAGGTCAAAACATTGATGCTGAAAGAGAGAAAGTACGGCTTAAAT ATTGAAGTTCCAGGCGCTACTATCTTGAGCACATTTGGATTTGTTGTCCAAAACCTTGTA TTTGACATCAAGATCCTGGCTCTGTTTAATGTGGTGTTTTTAATAATGGGGTATCTAGCC CTTAAGTGGATAGTTGTGGAACAAAAGTAG >PGK1 1251 residues Pha 0 Code 0 ATGTCTTTATCTTCAAAGTTGTCTGTCCAAGATTTGGACTTGAAGGACAAGCGTGTCTTC ATCAGAGTTGACTTCAACGTCCCATTGGACGGTAAGAAGATCACTTCTAACCAAAGAATT GTTGCTGCTTTGCCAACCATCAAGTACGTTTTGGAACACCACCCAAGATACGTTGTCTTG GCTTCTCACTTGGGTAGACCAAACGGTGAAAGAAACGAAAAATACTCTTTGGCTCCAGTT GCTAAGGAATTGCAATCATTGTTGGGTAAGGATGTCACCTTCTTGAACGACTGTGTGCGT CCAGAAGTTGAAGCCGCTGTCAAGGCTTCTGCCCCAGGTTCCGTTATTTTGTTGGAAAAC TTGCGTTACCACATCGAAGAAGAAGGTTCCAGAAAGGTCGATGGTCAAAAGGTCAAGGCT TCCAAGGAAGATGTTCAAAAGTTCAGACACGAATTGAGCTCTTTGGCTGATGTTTACATC AACGATGCCTTCGGTACCGCTCACAGAGCTCACTCTTCTATGGTCGGTTTCGACTTGCCA CAACGTGCTGCCGGTTTCTTGTTGGAAAAGGAATTGAAGTACTTCGGTAAGGCTTTGGAG AACCCAACCAGACCATTCTTGGCCATCTTAGGTGGTGCCAAGGTTGCTGACAAGATTCAA TTGATTGACAACTTGTTGGACAAGGTCGACTCTATCATCATTGGTGGTGGTATGGCTTTC ACCTTCAAGAAGGTTTTGGAAAACACTGAAATCGGTGACTCCATCTTCGACAAGGCTGGT GCTGAAATCGTTCCAAAGTTGATGGAAAAGGCCAAGGCCAAGGGTGTCGAAGTCGTCTTG CCAGTCGACTTCATCATTGCTGATGCTTTCTCTGCTGATGCCAACACCAAGACTGTCACT GACAAGGAAGGTATTCCAGCTGGCTGGCAAGGGTTGGACAATGGTCCAGAATCTAGAAAG TTGTTTGCTGCTACTGTTGCAAAGGCTAAGACCATTGTCTGGAACGGTCCACCAGGTGTT TTCGAATTCGAAAAGTTCGCTGCTGGTACTAAGGCTTTGTTAGACGAAGTTGTCAAGAGC TCTGCTGCTGGTAACACCGTCATCATTGGTGGTGGTGACACTGCCACTGTCGCTAAGAAG TACGGTGTCACTGACAAGATCTCCCATGTCTCTACTGGTGGTGGTGCTTCTTTGGAATTA TTGGAAGGTAAGGAATTGCCAGGTGTTGCTTTCTTATCCGAAAAGAAATAA >POL4 1749 residues Pha 0 Code 0 ATGTCTCTAAAGGGTAAATTTTTCGCCTTTTTACCTAATCCTAACACATCTTCCAATAAG TTCTTTAAGAGTATATTGGAGAAAAAGGGCGCCACAATTGTGTCAAGTATTCAAAATTGT CTTCAATCTAGCCGTAAGGAAGTTATCATTTTGATTGAGGACTCCTTTGTTGATTCTGAT ATGCATTTGACTCAGAAAGATATTTTCCAAAGGGAAGCAGGCTTAAATGATGTCGATGAA TTTCTTGGTAAGATTGAACAGTCAGGCATTCAATGTGTGAAAACCAGTTGCATCACAAAG TGGGTCCAGAATGATAAATTTGCGTTTCAAAAAGATGATTTGATTAAATTTCAACCATCC ATTATCGTTATATCAGATAACGCTGATGACGGACAAAGTTCTACTGATAAAGAGAGTGAG ATTTCAACTGACGTAGAAAGTGAAAGGAATGATGACAGCAACAATAAGGATATGATACAA GCTTCAAAACCTCTTAAGCGACTTTTACAGGAGGATAAAGGAAGAGCTTCCCTTGTTACT GACAAAACGAAGTACAAAAACAATGAATTGATTATCGGAGCGTTGAAAAGGTTAACAAAA AAATATGAGATCGAAGGTGAGAAATTTCGTGCAAGAAGTTATAGACTGGCTAAACAGTCG ATGGAAAATTGCGATTTCAATGTTCGTTCCGGTGAAGAAGCACATACTAAATTAAGGAAT ATCGGGCCTAGTATTGCCAAAAAAATACAAGTTATATTAGATACGGGAGTTTTACCAGGT TTAAATGATTCAGTGGGATTAGAAGACAAGTTAAAATACTTCAAAAATTGTTACGGCATT GGGTCGGAAATTGCTAAACGCTGGAATCTTCTAAATTTTGAAAGCTTTTGTGTTGCAGCT AAGAAGGATCCAGAGGAGTTTGTATCAGATTGGACAATTTTATTTGGTTGGTCATATTAC GACGATTGGTTATGCAAGATGTCTCGGAATGAATGTTTCACACATTTAAAGAAGGTTCAA AAAGCGCTGCGTGGCATTGATCCTGAATGCCAAGTCGAATTACAGGGAAGTTATAATAGG GGCTATTCCAAGTGTGGTGACATTGATCTTTTATTTTTCAAGCCGTTTTGTAATGACACG ACCGAGTTGGCAAAAATCATGGAAACGCTTTGTATTAAGTTGTACAAGGATGGCTATATC CATTGTTTTTTACAGCTAACGCCAAACTTGGAAAAGCTATTCTTAAAAAGAATAGTGGAG AGATTTCGTACAGCGAAGATTGTTGGGTATGGAGAAAGAAAGAGGTGGTATTCTTCTGAG ATAATCAAGAAATTTTTCATGGGAGTCAAATTCTCTCCAAGAGAATTAGAAGAACTGAAA GAAATGAAAAATGATGAAGGCACATTGTTAATTGAAGAAGAAGAAGAAGAAGAAACAAAA TTAAACCCGATTGACCAATATATGTCTCTGAATGCCAAGGATGGAAATTATTGCAGAAGA TTAGACTTTTTTTGTTGCAAGTGGGATGAGCTTGGAGCAGGAAGAATACACTATACTGGA TCTAAAGAGTACAATAGATGGATAAGAATATTGGCAGCGCAAAAAGGCTTCAAGCTTACA CAACACGGTTTATTTCGAAATAATATCCTTCTCGAAAGCTTTAACGAACGCAGAATTTTC GAGTTATTAAACTTAAAATACGCTGAACCCGAACATAGAAATATCGAATGGGAAAAAAAA ACTGCATAA >YCQ7 2862 residues Pha 0 Code 0 ATGCTGATCATCAATGGGAAGATCATCCCTATAGCTCATACTATTTGCGCATTCTCCGCC TTCTTTGCAGCTTTGGTCACTGGTTATTCATTACATTTTCATAAAATTGTAACCAATGCA CATTATACGTATCCAGATGAGTGGTTTCCTAGTGTATCAGCCACTATCGGGGACCGCTAT CCGGAACGTTCTATTTTCCAAATCTTAATAGCTCTAACTGCTTTTCCAAGATTTTTACTG CTACTAGGTCACTACTACTTGAACCAATCTAAGGTATGCTTCCTTGTCGGTGTACTCCGG ACAGTCTCTTGCGGTGGTTGGGTATACATTACAAGTACAGATGACCACGATATTCATGAT ATATTTATGATCACATACATTGTTTTAACGTTACCATGGGATATAATGATTACCCGCTAT TCTAGTCCTTTAACTTCGAAGAACAAAGGGTTGACTGCTACAATTTTTTTTGGAACATTG TTCCCGATGATTTACTGGTACATTCAGCACTCCGTCCAACAGAGAGCTGGGGCATATTCT ATATATGCTTATTTCGAATGGTCTCTGATTCTTTTAGATATTGCATTTGATGCATTTGCT TACGCTGATTTCAAAAAGATAGATATTGTTCTCGCTTTTAATGAGAAACCCGGTAATACC AGTTTTTTCCAAATTAGAGACTCTAATCCCATAAATTATGGAGAAGAAAAAAGTTCAGAA TTGCAGAAAAGTGGTGAAAAGAAGGTTGAAAAGGAAAAACCCGTTGCTAGAAGCGCAACT GGTTCATATTTCAGGTTTGACTCTTTTTTTTACTTACTAACAAATATTTTTAACGGTTTT CTTTTCTGGTCGAACGTTACGTCCCTTTTATGTAGTATTTGGCATTTCCCGCTATGGTAT ATGGGAATCTCAGGTTATGAAGCTGCAATATTGGGTTATTTGGGACCCATTTTCTTATAT CTGCCGTTCGTTTCTGAAGCCTTCATGCAATATGGTGTACTTTTAGGAGGTATTATTGCC ATTGGTGCCTATATTGTTCAGATGCCAGAATTAAGGTTGATTTCTGTAGCTGTGGGAACT TCCATTACCGTTGCAACGTTTGTACAAAATCTAAGATATATCACAAATGCGGAGACTAGT TTCTCTTTTGCTCTAACTTGGCTGCTAGGTCTTGTTGCATCTGTGATCTTGAAAATGGGG TTCTATACCAACAACCCAACTTGGGTCATTTTAGATGAACGTAATGGTGGGTATAATAAG ACAGCTCTCGTGCTTACTGTTTTATTCGGCATGCTGTCGCCTTATGTTAATTCAATTAAT TTCGAAGGGAAAAGGAATGCTCAAGCAAAATCTGCTTCGTTGATCGGCAAATTATTTTTG GCTGTTGGTTTTGGCTCGTTGTTATTCGGAATTCATCAGTTATTGACGGATTCTTCTACT ACTATTTATTGGGCATGGGAAGGTTACAATGAATCACACGGTCCCTTGCCATGGCCTTGG GGCGCCTTAACTTGTACGGTCATGTTATTTGCTTCTTTGAGTTCTGTGAAGTTTATGGGC AAGCCATTAGTTCCATGTTTGTTGCTTCTCATATCCACTGCTGTACTTTCAGCTAGAAGC ATTACACAATGGCCTAAATATATTTTTGGTGGTTTATTGTACGCTATCGCTATGCTTTGG TTAGTTCCTTCGTATTTTTCTGCATTAGGCCAAGTTCAAAACATATGGGTTTATGTCCTA TCATTCTCCGTTTATATTATCTTTGTCCTTGCCCATGTTTGGGTCGTTGCATACGCATTT GTTCCAATGGGCTGGGTACTGAGGGAGAAGATTGAGACGGTTCTTGCCTTTTCTTCCACA TTTATCATTATTGGTGCTTTAACATGCAAAAACCTTAACGTTCAACTGGTGACTATGGGC AAAAAATTCTTCATTTATGTTTTCTTCTTTGCCGTGGCCCTACTATCACTAACAGCTAGG TTCGTGTATGATATTAGACCTACAGGAATTCCTCAGCCTTATCATCCAGATTCTCAGTTG ATTACAGCTGGTATTTGGACTATCCACTTTGGTCTCGATAATGATATGTGGGCATCTGAA GACAGAATGATCAACCTTATTAAAGATATGGAACTAGATGTGGTAGGTCTACTAGAAACA GATACACAAAGAATTACCATGGGGAACAGGGATCTAACTAGCAAACTAGCTCATGATTTG AATATGTATGCAGATTTCGGACCAGGTCCAAATAAACATACCTGGGGCTGTGTTCTTCTT TCTAAATTCCCTATCGTAAATTCTACGCATCATTTATTGCCCTCTCCAGTTGGGGAACTT GCGCCAGCCATTCATGCCACACTTCAAACGTACAATGACACTCTCGTTGACGTCTTTGTA TTCCATAGTGGACAAGAAGAGGATGAAGAGGATAGAAGACTGCAAAGTAACTACATGGCT AAGCTCATGGGCAATACGACTCGCCCAGCTATTTTATTAAGTTACTTAGTTGTTGATCCA GGTGAAGGCAACTACAATACGTACGTTAGTGAAACATCCGGAATGCACGACATTGATCCC TCTGACGATGATAGATGGTGTGAGTATATCTTGTATAAGGGCTTGAGAAGAACAGGATAT GCTAGAGTTGCAAGAGGAACGATAACCGATACGGAGCTACAAGTTGGTAAGTTCCAAGTT TTGAGTGAGCAAGCGTTAGTAGAGCACTCGGATTCTATGTATGAATACGGTCATATGAGT GAACCGGAATATGAGGACATGAAATTTCCAGATAAGTTTTTAGGCGAAGGTGAGAGGGGT CACTTCTACCATGTTTTTGATGAGCCACGTTATTACTTATAA >SRD1 678 residues Pha 0 Code 0 ATGCGATATAATAATTATGACAACTCTGGAAGTTCCTTCTTAACTAGAGTAGTTAAAAAG TCAGATATGGAGAAAACGTTATTATTAAATAGAGAAATTGATGACTGGAAGTCAAACGAT AAAAAGAAGGCATATAAGGAACGCGGAAGAGTTTATGCAAGTTGCTCATTTATTGAAGTA TCCTTTTCTCAAATAAGGGCTGTTGATGTTGAAAAAAAAATTGAGAATGCCGAACAACTA AGAGATCTTACAAGAAATATTGTTAAGAACAAAACCAGCTCTTTGAACGAAATTACACCC TCAAAGAATCGTGTTATTAGTGCATGCAATTCCGAGAGACGTACGACTAGCCAAGAAGCA AACAATCTTGAAGGCTACCATAGTTGTGCACAAGGAACTAGTCGGTCTGCCAGTATTACG AAGAAATACAGCAAAAAGACTACTAGTCGTCCTAAAAGAGAAAAGAGACAAACAATCCTC CCAAATGGTGAGATAAAGGAATGCTCTAAATGTAAAGACACTTGGACAATTCAATGGCGT AGTGGACCCGACCAAAACAGGGAACTTTGTAGTCCCTGTGGACTCGCCTATGGAAAAAGA CTGAAGAAGGAGAATGAAAAAAAAAGGCAAGCGGCAGATAAAAGGATAGATTCGAAACAA TCCATAGTATCTATTTAA >MAK32 1092 residues Pha 0 Code 0 ATGATGAATGAAGAGGATTCTACAGAAACGAAAAGCCTAGTCATAACTAATGGCATGTTT ATCATAGACGACATCGAGCGTAGTAAATATAATATTCACTATAAGAATGTCCCAGGAGGC GGAGGGACTTTTGCCATTTTGGGTGCATGCATAATATCTTCCGGCAATGTCACATCCAAA GGTTTGAAGTGGATAGTGGACAGAGGCTCTGACTTTCCAAAGGAAGTTATAAGGGAAATA GACTCATGGGGTACTGATGTGAGGTTTCGAGATGACTTTAGCAGATTAACTACCAAAGGG TTGAATTATTACGAGGGAAGTGATGATTTGAGAAAGTTCAAGTTTTTGACGCCGAAGAAG CAGATTAACGTCGATGACTGGATTTCCACATTTGGGCAGAAGATAATTGATGAAATGCAT GCGTTTCATTTGCTATGTTCTGGGTCTAGATGCTTAGACATAATAAACGATCTGCTACGG GTGAAAAGTTCAAAGGGCACAAAACCAATCGTGATTTGGGAGCCATTCCCAGATCTTTGC GACTTTGATCATCAAAATGACATTAAAAGTGTAATGCAGAGGAACGATGTTACGGTAATA TTATCTCCAAATGCCGAAGAATCAAGTCGCTTATTTGGTTTAAGTAGCAAGGAACCGACT AGTTTGGAAGAATGTCTAGCATTAGCGCATCGTTTCGATGATTTCATGGATGAAAACAAT ATGTGTATTCTACGATGCGGTGCCCTCGGAAGCATATCGGTAAGTGAGAAGTTTAAGAAC GGACGAACCTATGACCATTTCCCCGCCTACCATTTCAAAACTCAGTCTAAAGTACTAGAT CCTACTGGCGGGGGAAACTCGTTCCTTGGCGGCTTTGCAGTTTCTTATGCCCTAACGAAA AGCTTAGATATTGCTAGTATATGTGGGAACATCGCTGCAGGCGCAATAATTGAACAATTC GGAATACCGAGGTACGATCCAATTGCTAAAACCTGGAACGGAATCACATTCTTGGATAGA CTGAAATTTTACCTTTCACAGTCCGGTCTTCAATATAATATAAACGATCTTTACAAAAGT CTAACACGATGA >PET18 648 residues Pha 0 Code 0 ATGAGCTGTACCACTGATAAGTTAATACAAAAGTACGACGCCCTTGTTAGGAAAACCACA GAACATAAATTCGCTAAGGAACTATGTGCCGGAACATTGAAGGACCGTAGTTTGTACATC TATTTATCACAAGATCTGCAATTTTTTGAAACTAGCTTAAGGTTGATATGTAAGACGACT TCTTTAGCACCAACTACTCACGCTTTAATAACCTTAGCCAAAAAGATTGGATTTTTTTCT AATGATGAAAACTCATACTTTCATGACTGCTTAGAATTATTGGCACCATCCCTCACCAAG GAAGAAAGAGATAATTTTGACAATAAAGCGATCCCCGGCGTTGATGCGTATATTAATTTC TTAGATGAGCTGAGAAAGGACGCCTCAATTACATGGCCATCCTTAGTAACCAGCTTATGG GTTGCTGAGGAACTCTATTGGAGATGGGCTCGTGATACTCCTAGAGCCCCAGGGTTGCAT TGGAAATATCAAAAATGGATTGATTTACATGATGGTGAGCATTTTCAAACTTGGTGTGAA TTTCTAAAGGCTGAAGTTGACAAGTTTCCCGTCGAAGAAGTGGAAAGCATATTTGTGAAG GTTTCACAGTTCGAGTTCGAATTTTTTGAATCTTGTTACAACGCCTAA >MAK31 267 residues Pha 0 Code 0 ATGGACATCTTGAAACTGTCAGATTTTATTGGAAATACTTTAATAGTTTCCCTTACAGAA GATCGTATTTTAGTTGGAAGCTTGGTTGCTGTAGATGCCCAAATGAATTTGCTATTAGAT CATGTTGAGGAACGTATGGGCTCCAGTAGTAGAATGATGGGCCTAGTCAGCGTCCCTAGG CGTTCCGTTAAGACCATAATGATTGATAAGCCTGTTCTGCAGGAGCTTACTGCGAATAAA GTTGAATTGATGGCTAATATTGTTTAG >HSP30 999 residues Pha 0 Code 0 ATGAACGATACGCTATCAAGCTTTTTAAATCGTAACGAGGCTTTAGGGCTTAATCCACCA CATGGCCTGGATATGCACATTACCAAGAGAGGTTCGGATTGGTTATGGGCAGTGTTTGCA GTCTTTGGCTTTATATTGCTATGCTATGTTGTGATGTTCTTCATTGCGGAGAACAAGGGC TCCAGATTGACTAGATATGCCTTAGCTCCTGCATTTTTGATCACTTTCTTTGAATTTTTT GCTTTCTTCACTTATGCTTCTGATTTAGGTTGGACTGGTGTTCAAGCTGAATTTAACCAC GTCAAGGTTAGCAAGTCTATCACAGGTGAAGTTCCCGGTATTAGACAAATCTTTTACTCG AAATATATTGCCTGGTTCTTGTCCTGGCCATGCCTTTTATTTTTAATCGAGTTAGCCGCT AGTACTACTGGTGAGAATGACGACATTTCCGCCTTGGATATGGTACATTCGCTGTTAATT CAAATCGTGGGTACCTTATTCTGGGTTGTTTCGCTATTAGTTGGTTCATTGATCAAGTCC ACCTACAAGTGGGGTTATTACACCATTGGTGCTGTCGCTATGTTGGTTACCCAAGGTGTG ATATGCCAACGTCAATTCTTCAATTTGAAAACTAGAGGGTTCAATGCACTTATGCTGTGT ACCTGCATGGTAATCGTTTGGTTGTACTTTATCTGTTGGGGTCTAAGTGATGGTGGTAAC CGTATTCAACCAGACGGTGAGGCTATCTTTTATGGTGTTTTGGATTTATGTGTATTTGCC ATTTATCCATGTTACTTGCTAATTGCAGTCAGCCGTGATGGCAAATTGCCAAGGCTATCT TTGACAGGAGGATTCTCTCATCACCATGCTACGGACGATGTGGAAGATGCGGCTCCTGAA ACAAAAGAAGCTGTTCCAGAGAGCCCAAGAGCATCTGGAGAGACTGCAATCCACGAACCC GAACCTGAAGCAGAGCAAGCTGTCGAAGATACTGCTTAG >YCR3 1836 residues Pha 0 Code 0 ATGGCGCGTCAAAAGCTTACTTTCAAAGAACAAATGGATGGTTTCCCCTGGGTCCAACTT GTTGTTGTGTCCTTAGTTAGGTTCAGCGAACCAATTGCGTTTTCGTCACTATTTCCTTAT GTTTATTTCATGGTTAGAGATTTTAATATTGCTCCCAATGATGCTCAAGTGTCCAAATAT TCAGGTTATTTATCTTCATCATTTGCGTTATGCCAAGTCATATCTGCGTACCACTGGGGT AGATTCTCTGAAAAACATGGCAGAAAAATAACATTGACTTGCGGGCTTATAGGAACATCT GTATCATTGTTAATACTGGGATTTTCACACAATTTCTATCAGGCTTTGGTGGCAAGAAGT TTAATGGGATTGCTAAATGGTAACGTCGGCGTTATTAGAACCATTATTGGTGAAATAGCA ACTGAAAGAAAACATCAGGCTTTAGCTTTCAGTACTATGCCTTTATTATTTCAATTTGGT GCCGTTGTTGGGCCTATGATCGGTGGGTTTCTTGTATTTAGAGATGGAACAATGAATGAA GTGCCACTATGGTTTCCACATTTTGCAAAAAGAATAATTAGGTCATATCCGTACGCCTTG CCAAACGTGGTAGTGTGCATGTTTTTGATGTTTGGTTTAACTAATGCAACATTGTTTTTG GAAGAAACACATCCTGCTTTTAAAAATAGAAGAGATTACGGTTTAGAGGTCGGTGATTTT ATTAAGAAGAATATATTTGGTATACAGCCGAAAAGAAGACCCTGGCAAAAGCGCATTCAG GATGATTCGGAAAACATTCACCACCGTAATGAGAATGTGAACAGCAATCGAGGACAAGAT AGTGAAGAGGATGAAAATAGTCCCCTAGTGAATACTACCAATGACGATGATACTGAAAGC ATACAATCGATTGATCCTATTTTAACAAGAAGACAGTCTGTAGGCCTGATTAGGACATAT TCTCTGCATGAACCAACAGACGCTGTGCATGCCAATATAGATACAGCTCCAGACGGTTGT AAAGAAAGTAGTATATTTCATCACGTTTTTCATACAAAAGTATTTTACCCTATATCGGTG AATTTTATTATGGCTTTACATTTGATTGTATACAACGAATTTTTGCCTGTTTTTTTAGCT TATGATTTAGCCGTAGATCCAGAAAATCCAAAGAAGCTGGCTTCAAAATTTCCGTGGAAA ATATCTGGCGGTATAGGTTATGAACCAGAACAAACCGGTACTCTTTTGTCGACAACAGGT ATCTTTGGTTGTTTTGTGGTTATTTTCATTTTTCCCATAGTTGATCGAAATTTCGATTGT TTAACAATTTTCAGAACTTTAGTCAAGCTGTACCCTATTATGTACGTTATGGTTCCTTAC GTTGTTTTTCTACAGAATGAACGGATTCCTAGCTGGTATACTGTCGTCTACTTGTACATA ATCACAGGGATAAAAACATTTTGTGGCGCTTTAACGTCACCACAAATTATGTTATTAATT CATAATTCGAGTCCCTTGAGTTGTAGATCAGTCATCAATGGCGCCACCATTAGTATTTCT GCCTCTGCTCGTTTCATAGGTCCCTTAGTATGGGGCTATATTATGTCTTGGTCCCAGCAA AATGACGTCGCCTGGGTCAGTTGGTGGTCGTTAAGTCTTTTTTGTATGGTAGCTCTTTAT CAAAGTTATAAGATAGCACCAATTGATGATAACGAAAATGAGCTTCATGGACAGGGTAGT GAAGATGCCTACAATTCGCAGTCACAGTCTTCTGATTTAAGAATGGCTCATCGATCTAGT TTAAGCAGCTTAAGTAACCAACGCTGTACCACATGA >SYN 1479 residues Pha 0 Code 0 ATGTTTCATGCTTTCACCTTCCTTAAAGGTGGTAGATTTTACTCTTCACTAACAGTTAAA TCATTGTACGAGCAGGTACACCATACTAGCCATGATCCCATTTCAATTAATGGATGGATC AAATCCATAAGACTATTAAAACGTATAGCGTTTTTGGATTTACAAGATGGGACTTCTGTG AACCCATTAAGAATAGTTATTCCACTCACAAATACTGATGAAGTACAGTTCCTAAAAATT CTGAAAACTGGTCAAACTTTATCTATATCTAATGCTACCTGGCAAAGCACCCCTAATAGA AAACAACCTTTTGAATTGCAAATCAAAAATCCTGTCAAGTCAATTAAACTTGTGGGTCCC GTTTCAGAAAACTATCCATTACAAAAGAAATATCAAACCTTACGTTATTTAAGGTCCTTA CCTACACTAAAATACAGAACCGCTTACTTAAGTGCAATTTTACGGTTAAGATCATTTGTA GAATTCCAGTTCATGCTATATTTCCAGAAAAACCACTTCACCAAAGTTTCACCACCAATA TTAACTTCAAACGATTGTGAAGGTGCCGGCGAGTTGTTTCAAGTCTCCACCAATACGTCG CCAACTGCATCCTCGTACTTTGGGAAGCCGACTTATTTGACTGTGTCCACTCAATTGCAC TTGGAAATTTTAGCGTTATCACTGTCAAGGTGTTGGACGTTATCTCCTTGCTTTAGAGCC GAAAAGAGTGATACTCCAAGACACCTTTCGGAGTTTTGGATGCTTGAAGTGGAAATGTGC TTTGTTAATAGCGTCAACGAGCTAACATCGTTTGTTGAGACTACAATAAAACACATAATT AAAGCTTGTATAGATAACCAACAAGAACTCTTGCCGAAGCAATTTATCTCTTCACAAGAA AATAATGCATCGTCAGAGCTATCAATAAATCAAGAGACACAACAAATTAAAACACGATGG GAAGATTTAATAAATGAAAAATGGCACAATATAACGTATACCAATGCAATAGAAATTCTC AAGAAACGCCACAATGAAGTTTCACACTTTAAGTATGAACCTAAATGGGGACAGCCTTTG CAAACTGAACATGAAAAATTTTTAGCCGGAGAGTATTTTAAGTCCCCAGTTTTCGTTACC GACTATCCACGTCTTTGTAAACCATTCTACATGAAACAAAATTCCACTCCTGACGATACT GTTGGATGCTTTGATCTACTGGTTCCTGGAATGGGTGAAATAATTGGTGGGAGTTTAAGG GAAGATGACTATGACAAGTTATGTAGAGAAATGAAAGCACGCGGGATGAATAGATCTGGA GAATTGGACTGGTATGTTTCTCTGAGAAAAGAAGGAAGTGCACCACACGGAGGCTTTGGT CTAGGGTTTGAGAGATTTATCTCATACTTATATGGCAACCATAATATAAAGGATGCCATA CCCTTTTATAGAACATCTGCAGAATCCATCGATTTTTGA >YCR6 2232 residues Pha 0 Code 0 ATGGAACTTCAGAATGATTTAGAGTCGCTCGATAACGAGCTGAATGATTTTAGTGAAGAT CCATTTCGTGATGATTTCATAACGGATGAAGACGCTGTAAGATCGGGGTGGCGATCTGCG TGGACCAGGATGAAATATTGGTTTTATAAGAATAGACTGAAGTGGACAAACAATCCCATA GTGATTGGCGACGCGAAAGATAGTAGGGATGGTTCTAACTTTAGAAGGGGTATACCGCTA TATGAATTAGACGCGAATGGTCAACCCATTGATACTGAACTTGTTGATGAGAATGAACTT TCTTTTGGAACGGGATTTCGTTCCAAAGTGCCTTTTAAAATAATATTTCGCACATTGCTT GGCTCGCTGGTGTTTGCCATTTTTTTAATTCTGATGATTAACATAGCAAAACCCCATCAC TCCACGAGAGTGCTATCGCACTTTGGCAGTCCTGAATTTGACCCTTACGTGAAGTATTTT AACGGTACGCATGAATTTTTCCCCTTAACGATAGTAATTTCACTAGACGGTTTCCATCCT TCACTCATATCTAAGAGGAACACACCGTTTTTACATGACTTATATGAATTGAAATATGAT GGAGGTATGAATATCACGTCCACACCTTTTATGATACCCAGCTTCCCTACGGAGACCTTT CCCAACCATTGGACGTTGGTTACTGGACAATACCCAATACACCACGGTATAGTCTCTAAC GTATTTTGGGATCCTGATCTTAATGAAGAATTCCATCCAGGTGTATTGGACCCTCGAATA TGGAACAATAATGATACAGAACCAATATGGCAAACTGTTCAGTCTGCATTTGACGGTGAT ATACCATTCAAAGCTGCTACCCATATGTGGCCAGGTAGCGATGTGAATTATACCAAGTAT AAGACTGAAGAGAAACTACAACCTGAACATAAAAAGCCTATTGCTAGAGAGAGAACTCCA TTTTACTTCGACGAATTCAATGCTAAAGAACCACTTTCGCAAAAATTATCCAAGATTATT GAATATGTGGATATGAGTACACTGAACGAAAGACCACAGTTAATTCTCGGTTATGTACCG AACGTAGATGCCTTTGGACATAAGCATGGATATCCGTCAGAGTCGGAATACTATTATGAA GACTTCACTGAAACACTGGGGGAAGTAGATACATTTCTGAAGCAACTAGTGGAATCGCTG CAAGAAAGAAATTTAACCAGCTTTACTAATTTGGTCATTGTTAGCGATCATGGTATGAGC GATATCGTAGTTCCCTCAAATGTTATTATATGGGAAGACTTACTGGACGAAAAATTGAGG AAGGATTATGTATCGCACGCATATCTAGAGGGTCCGATGATGGCTATATCGTTGAAAGAT TCCGGAAACATCAATGAGGTTTACCACAATTTAAAGACTTCTATAGATGAAGACAAGTAT ACGGTTTACGTTAATGGAAATTTCCCCAAAGAATGGAACTTTAATGATGGAAAAAATCAT CACATGGCGTCAATCTGGATTGTGCCCGAGCCTGGGTATGCAGTGATGAAGAAAGAACAA TTGAAGAAGGTGGCAAAAGGTGATCATAAGGACAAAAACGAAGACAATGTGTTCACGATT GGATCACATGGATACGACAATAACGCGATCGATATGAGATCTGTATTTATTGGTATGGGG CCATATTTTCCACAGGGATACATTGAGCCGTTCCAAAATACCGAAATTTACAACCTTTTG TGCGATATTTGCGGTGTGGCAGAAAAGGACAGAAATTCCAATGATGGGACTGGGATGCTT ATGAACCAACTCCGCGAACCCCAGAGCAGCGAAGAAGTAGAGATTGAAGATGACTTTGAT TATTTGGTCAGTAAGTTTGGTGAATTCAGCACTTATAATATAATTTGGGGCGGGTACCCC GAAGAGACAGAACAAGACAATGTTGACAATGATAATGATGACAACGACGATGGAAACACT GATGAAATAGCCGCTATGCCATCTTCGTCATTAACGATAAAACTAGAAATGACAACTTCA ATACCATCAGCAACTGAGACTCTACCGGGCGAAACATCACCATCATCAAGAAGAAGCAGC AGCAGCAGCATACAAGCTAGCGCTACTGCTAGCACAGTGGGGGATTGGCTTCAAGACATA ATCAACGACGCAAAAGATCTCATTGACGACATAATTGACAGCATCGACGATTTAGTCGAT TCTGATACCTAA >GNS1 630 residues Pha 0 Code 0 ATGGAATACGCCACTATGTCTTCTTCGAACTCCACACATAACTTTCAGAGAAAGATTGCT CTTATAGGAGCTAGAAATGTCGGCAAAACCACATTAACGGTTCGCTTCGTAGAATCGCGG TTCGTTGAATCCTATTATCCCACTATTGAAAATGAATTTACCAGGATAATTCCTTATAAA AGTCATGACTGTACTCTGGAAATTCTAGATACTGCAGGCCAAGATGAAGTTTCTCTATTA AACATTAAATCGTTGACGGGCGTACGAGGCATAATGCTGTGCTATAGTATAATAAATCGT GCTAGCTTTGATCTTATTCCCATTCTCTGGGACAAGCTGGTAGATCAGCTGGGTAAGGAT AACCTCCCGGTAATACTTGTGGGTACCAAAGCTGATTTGGGAAGGAGTACAAAAGGTGTA AAAAGGTGTGTCACGAAAGCTGAAGGAGAGAAACTAGCTTCGACAATTGGCAGTCAAGAT AAGAGGAACCAGGCAGCATTTATAGAATGCAGTGCCGAGTTAGATTATAATGTTGAAGAA ACTTTTATGCTCCTTTTGAAACAAATGGAACGTGTCGAAGGAACTCTGGGGCTTGATGCC GAAAATAATAATAAATGTTCTATAATGTGA >FEN2 1539 residues Pha 0 Code 0 ATGATGAAGGAATCGAAATCTATCACTCAACATGAGGTTGAGAGAGAATCTGTTTCTTCC AAACGTGCCATTAAAAAGAGATTACTTCTGTTTAAAATAGACTTGTTTGTGCTATCATTT GTTTGCTTGCAATACTGGATTAATTATGTCGACCGTGTCGGTTTCACCAATGCATATATA TCGGGTATGAAGGAAGATCTTAAGATGGTCGGAAACGATTTGACCGTGTCTAACACAGTT TTCATGATTGGTTACATTGTAGGTATGGTCCCCAATAATTTAATGTTATTGTGTGTTCCA CCTAGGATATGGCTAAGTTTTTGTACGTTTGCCTGGGGTTTATTGACCTTGGGAATGTAC AAAGTTACATCGTTCAAACATATTTGCGCAATTAGATTCTTTCAAGCCTTATTTGAGAGT TGCACATTTTCAGGAACACATTTTGTTTTGGGTTCGTGGTATAAAGAAGACGAATTGCCC ATTAGAAGTGCTATTTTTACAGGTAGCGGTTTGGTGGGATCTATGTTCAGTGGATTTATG CAAACAAGTATCTTTACTCATTTGAATGGGCGGAATGGCTTGGCGGGTTGGAGATGGTTA TTCATTATTGATTTTTGTATCACATTACCCATTGCAATTTATGGGTTTATTTTCTTCCCC GGCCTTCCTGATCAAACAAGTGCTGTTAGCAAATTTTCTATGACGAGATACATTTTTAAT GAACAAGAGCTACATTATGCTAGGAGAAGGCTCCCCGCTAGGGACGAAAGCACCCGGTTA GACTGGTCGACTATTCCTAGAGTCCTAAAAAGGTGGCACTGGTGGATGTTCTCTCTTGTT TGGGTTCTGGGAGGTGAGAATTTGGGTTTCGCATCTAATTCTACATTTGCATTATGGTTA CAAAACCAAAAATATACGTTGGCGCAAAGAAATAATTATCCTTCGGGGATATTTGCCGTA GGTATAGTTTCTACGCTTTGTTCTGCTGTATATATGAGTAAGATCCCAAGAGCTAGGCAT TGGCATGTTTCTGTTTTCATATCATTGGTAATGGTTATTGTTGCGGTACTAATACGTGCA GACCCACTAAATCCAAAAGTCGTCTTTTCTGCACAGTATCTTGGAGGCGTAGCATACGCT GGACAAGCGGTTTTTTTTTCGTGGGCAAACATTATTTGTCATGCAGATCTTCAAGAACGT GCTATCGTTCTTGCTTCAATGAATATGTTTTCAGGGGCCGTTAACGCATGGTGGTCTATA TTATTCTTTGCTTCAGATATGGTGCCCAAGTTTGAGAGAGGTTGCTACGCCCTCTTGGCT ACGGCAATATCAAGCGGAATTGTCTCGGTCGTCATACGCTCACTACAGATAAAAGAGAAT TTGTCTAAGAAACAGGTTCCTTATATAGATGCTAATGACATGCCCGGGGAAGATGACGAT GACGACAACCAGGATAATGAAAATGATGGCGACGACGAGAGTATGGAAGTTGAACTTCAT AATGAGGAAATGGCCGAAATTTCAAATCCTTTCCGATAA >RIM1 444 residues Pha 0 Code 0 ATGTTTTTACGTACTCAAGCTCGTTTCTTCCATGCTACTACCAAGAAGATGGACTTCTCG AAAATGTCCATCGTCGGCCGCATTGGCTCTGAATTCACTGAACATACTTCTGCTAATAAC AATCGTTATTTGAAATATAGTATCGCTTCGCAACCAAGAAGAGATGGCCAAACCAATTGG TATAATATCACCGTTTTCAATGAACCTCAAATCAATTTTTTGACAGAATATGTTAGAAAA GGCGCTTTGGTATATGTTGAAGCAGATGCTGCTAACTATGTCTTCGAGAGAGACGACGGT TCTAAGGGTACTACTTTGAGCTTAGTTCAAAAGGACATTAATTTATTGAAGAATGGGAAG AAATTAGAAGATGCTGAGGGCCAAGAAAATGCTGAGGGCCAAGAAAATGCTGAGGGCCAA GAAAATGCTGCTTCTTCAGAATAA >CRY1 414 residues Pha 0 Code 0 ATGTCTAACGTTGTTCAAGCTCGTGACAATTCCCAAGTTTTTGGTGTTGCTAGAATTTAC GCTTCTTTCAACGATACTTTCGTTCATGTTACCGATTTATCTGGTAAGGAAACCATCGCC AGAGTTACTGGTGGTATGAAGGTTAAGGCTGACAGAGATGAATCTTCTCCATACGCTGCT ATGTTAGCTGCCCAAGATGTTGCCGCTAAGTGTAGGGAAGTCGGTATCACTGCCGTTCAC GTTAAGATCAGAGCTACCGGTGGTACTAGAACCAAGACTCCAGGTCCAGGTGGTCAAGCT GCTTTGAGAGCTTTGGCCAGATCTGGTTTGAGAATTGGCCGTATCGAAGATGTTACCCCA GTTCCATCTGACTCCACCAGAAAGAAGGGTGGTAGAAGAGGTAGAAGATTATGA >YCS2 6504 residues Pha 0 Code 0 ATGAATTCAATTATTAATGCTGCTTCGAAAGTCTTAAGACTCCAAGACGATGTGAAGAAG GCTACTATAATATTAGGAGATATACTGATATTACAACCAATTAATCACGAAGTTGAACCA GATGTAGAAAACTTGGTACAGCATGAACTAACCAAGATAATACAAGGTTATCCCATACAG GATAATATGATTATTAATAGCAAAAAAGGCACAGTTGAAGATGACTTATGCGAACTCAAT AACTATACCTGTTTTGCACTTTCGAAAAGCTTTGATTTATGCCATGATAGCAGAAATTTC AACATAGCGCAGCCGAAACGATGGATACAATTATTAGAGACATTAACTGACTCAGTTAGT TTCGCAGTTATTGTTCAAATTATTCTCACTTTATCTAACATTTCGCTAATAAATAAACAA ACCTTGGGGAAGTTAAAAAAACTGAGGATTCGAATTTTCGAAATACTATCAAATAAAAAC GATAGTTGGAAATCTACATTACTACAGAAAAACCTTATAGAATGGTACATTTTTATGCTT TCCGTGGATTGCACACCTTTAGAATTGCAAAACTTATATCTCCATAAGGAGTTGAAATTC TGTAACGATATCTTGAATTCATTAACACTCCAAGTTTCTGATCCTCGCTCACAAAATTAC CTGCAATTTGAGAACACGTATAAGCTTTTTCAAATACAAAAGTCATCTAGAATTAACAAC TCGTTCCTTTTTTACATAGAATTCAATTCCGTTACCTCAAATAGGATAATGACCATAGAA AAACACATTTATTTGGAAATTAAGGAAGGCCAGTTTTGTATTTCAAATGATAACTACATA ATCGGTTTATTTGAAAACTTCGAATTCGAAGCGGGCACTTTGTACTTTATTGGAGTTTTA ATTGATCACAATAATCGAATAACTCTTTATGTTGATGGAAGTATGATCAATCAGCTCACG TTATTTGAAAACTCTATATGCCAATTAAGCACTTGTGAACTGGGATCCATGATTTGTTCA ATTAAAGTATATAGATTTTATTTGTGGGATGGATTATTAACAGAATTTGCGATAAATATA CTTCAAGCTATCGGCACCAATTACCAATATACATTTAGCAAGAAAAAAGAAGGGCCTGAA GTTTTATCGCTCTGCCAAGACTTTTTGATCGCTAAGGCTCATTTAATGGCCAGGCCTGCA ACAGAAATATCTTCCACAAAATACATCGATGAGATTGAACTTCTTGAAATGGAAAATATC ATTATTGATGTTAACCCAAATGATATTCTTCAAGATTTCACCGAATCGTCTAATTTTACG GTAAAATTTGAGGAAAGCACAAACTCGAAAAATATTCCGGAAGTGGGTAAGTGCTATTTC TATAGGAGTTCAAACTTGGTTTCAAAATTTGTGTCCATTGATTCTATACGGCTTGCGTTT TTAAACATGACAGAATCCGGTAGTATAGACGATCTGTTTCATCATGTATCACATCTGATG AATCTTTTACGAAATATTGATATTCTTAATTGGTTTAAAAAAGACTTTGGCTTCCCTTTA TTTGCTTATACTTTAAAACAAAAAATAACACAAGATTTATCTCAGCCTCTGAATATCCAA TTTTTCAATTTATTCTTAGAATTTTGCGGGTGGGATTTCAACGATATTTCCAAATCCATA ATTCTAGATACTGATGCCTACGAAAACATAGTCCTTAACTTGGATTTATGGTATATGAAT GAGGATCAAAGTTCTCTGGCGTCAGGCGGATTAGAAATTATCAGATTTCTTTTCTTCCAA ATTTCAAGTTTGATGGAAGCCTCTATTTATTCTAAGTTCAATTCCAATAAATTCAATGAT ATGAATATCCTAGAAAAACTATGTTTAAGCTATCAGGCTGTCACAAAAAGAGAAAATCAG AACAGTAAATTTAATGAGCTATCAAATGATTTAATTTCTGTATTTGTTACTTTATTGAAA AGCAATACTGATAAACGACACCTGCAGTGGTTTTTACATCTCTCATATTACTTTATTAAG AGAAAAGATGTACGTTCTACAGAAATTATACTTCAAGCGGTAGATCAACTTTTTTCGTTT TACTTAGATCAAGGTAGCGACGAAAATGCGAAGATACTTTCAGAGATTATACCACTTAAG CTAATGCTGATGATTATGGATCAAATAGTGGAAAATAATGAATCAAACCCTATTACGTGC TTGAATATCTTATTTAAGGTAGTTCTGACCAATAAACCGCTTTTCAAACAATTTTACAAA AATGATGGTTTGAAACTCATATTGACTATGCTTTGTAAGGTAGGGAAAAGCTATCGAGAG GAGATTATTTCTTTGCTTCTCACATATTCTATTGGCAATTATACCACAGCTAACGAAATA TTTTCAGGTGCTGAAGACATGATTGGAGGAATTTCAAACGACAAGATAACTGCAAAAGAA ATTATTTATTTGGCTGTCAACTTCATTGAGTGGCATGTGATTAATTCTAATGCCAGTGAT TCTTCTTCTGTATTGGACCTGAACAACCATATATTAAGATTCGTCGAAGATCTGAAATCG CTGAGCGCTGTTCCGATTAATGAATCTGTATTTGATCCTAAAAAAAGTTATGTGATGGTT TCATTATTAGATCTCTCGATAGCTTTGAATGAATCGGAGGACATCTCAAAGTTCAAGAGC TCTTCAAAAGTGATTTCAGAGCTCATTAAAGGTAATATAATGTGTGCTCTTACGAAATAT GCCGCTTATGATTTCGAAGTCTATATGAGCACATTTTTTTGTCACAGTACAGAATACAAA CTGGTTTATCCAAAAACTGTAATGAACAATTCCAGTTACTTAGAGCTATCATTTATAGTG ACACTCCTACCCGAAATACTTAATGACCTGATAGATAGCAATAACAATTTGAACCTGATG ATGTTGAAGCATCCATACACGATGTCAAATCTCCTTTATTTTCTTCGCAAATTTCGACCT GATACGTCACAGATAGTTATGCCTAAAGATTTTTATTTCTCAAGTTATACATGTCTCTTG CATTGTGTTATTCAGATTGATAAATCATCATTTTACCATTTCAAAAACGTTTCTAAGTCG CAACTGTTACAGGAATTCAAAATCTGCATAATGAACTTAATATATTCCAATACTCTAAAG CAGATAATCTGGGAGAAAGAAGAATACGAGATGTTTTCTGAGTCACTGATGGCGCATCAG GAAGTTTTATTTGCACATGGAGCATGTGATAATGAGACCGTTGGCTTATTGTTAATATTT TTTGCCAACAGATTACGTGATTGTGGATACAACAAAGCAGTCTTCAATTGTATGAAAGTG ATCATTAAGAACAAGGAAAGGAAACTAAAGGAGGTGGCGTGTTTTTTTGACGCAGCGAAT AAAAGTGAAGTACTCGAAGGTTTAAGTAATATCCTCTCATGCAATAACTCTGAAACAATG AACCTCATAACTGAACAATACCCATTTTTTTTCAACAATACACAACAGGTACGGTTCATA AACATTGTCACCAATATCTTGTTTAAGAACAACAATTTTTCTCCAATAAGCGTTAGACAG ATCAAAAACCAAGTTTACGAATGGAAAAATGCAAGATCAGAATACGTCACCCAAAACAAT AAAAAGTGCCTTATTTTATTTAGAAAAGACAACACATCCTTAGATTTTAAAATCAAAAAG TCCATATCAAGATACACTTACAACCTCAAAACGGATAGAGAAGAAAATGCAGTTTTCTAT CGAAATAATTTAAATCTTTTGATTTTTCATCTGAAACATACACTGGAGATACAATCAAAT CCAAATTCGTCCTGCAAGTGGTCATTGGACTTTGCAGAAGATTTTGATGGGATGAAACGG AGGCTTTTGCCTGCTTGGGAACCAAAATATGAACCACTCATTAACGAGGAAGATGCTAAT CAAGATACTATAACAGGTGGTAACAGACAAAGGAGAGAAAGTGGAAGCATTTTATCCTAC GAATTTATCGAACATATGGAGACTCTTGAGTCGGAGCCAGTTGGAGATTTGAATGAGAAT AGAAAAATTCTTAGACTTTTGAAGGATAACGATTCTATTGCAACTATTTGGAATTGCAGT TTGATTATTGGATTAGAAATTAAGGAGGGGATTTTAATTCATGGCAGTAATTACCTTTAC TTTGTAAGTGATTACTATTTTAGTTTAGAGGATAAAAAGATTCTAAAATTATCAGAAGTA TCGCAAGAATCACGGGATATGACGGTTAGCTTAATTAACGGCCCTGATGTTAAAAGGGTA TCAACTTTCCTAAAGCACGAAGTCTTTGTTTGGAAACTTCTCGATATCACTTTCGTTACC AAACGACCCTTTCTACTTCGGGATGTCGCCATCGAATTATTGTTCAAAGAGAGAGTTAGC GCTTTTTTTAGTTTTTACAACAAAAGAGTGAGAGATGACGTTTTACGGGTACTGAATAAG ATCCCGAAGCACCTTCCAGCAGATCCAATTTTTTCAAGCGTTTTACAAGAAATAAACGAC CGAGGAAATAGTATAGTGGCAAGAAATGGAATAGGAAAGGCAAGCATTGCTTCCAAATTC ACTAGCGTCTTCTCAGCGAACAACAGCCTAATAGATGGATTTGAGATCAGCAAAAAATGG GTTAGGGGAGAGATTTCTAATTTTTATTACCTGTTGAGTATCAACATCCTAGCGGGAAGG TCATTCAACGATTTGACCCAATATCCAGTGTTTCCGTGGGTTATTGCAGATTACGAAAGT AACGTACTCGATTTAGAGAATCCTAAAACTTACCGGGACCTATCGAAACCTATGGGCGCT CAAAGTGAGAAAAGGAAATTACAGTTTATAGAGCGTTATGAAGCTTTGGCTTCCCTGGAA AATGCTGATTCCGCACCATTTCATTATGGCACGCATTATTCCTCAGCTATGATAGTATCT TCATATCTGATAAGGCTGAAGCCCTTTGTCGAATCCTTTTTGTTATTGCAAGGCGGAAGT TTTGGCCCTGCAGATCGTTTATTTAGTTCGCTTGAAAGGGCCTGGAGCTCTGCTTCTTCT GAAAATACAACGGATGTCAGGGAATTGACACCTGAATTTTTTTTTCTACCTGAATTTTTG ATCAACGTTAATAGTTATGACTTTGGTACAGACCAAAGCGGTAAAAAAGTTGACGACGTC GTACTTCCACCCTGGGCAAATGGTGACCCAAAGGTTTTCATTCAAAAGAATAGAGAAGCT TTAGAAAGTCCTTATGTATCAGCACATTTACATGAATGGATTGATTTGATATTTGGTTAC AAACAAAAGGGGGAAATTGCTGTGAAATCTGTTAACGTATTCAACAGATTGAGTTACCCA GGCGCTGTAAATCTAGATAATATTGACGATGAAAATGAGCGCAGAGCTATCACAGGCATT ATTCACAACTTTGGTCAAACGCCTTTACAAATATTTCAGGAACCTCATCCGGAAAAAATA GCCTGCAATGTTCAACAGCTAACAACAGAGGTATGGCGTAAGGTTCCAATGAAGCCAATA TTTGAGAAGACAATCTTTAATTTGAATGAAAAGAACAGGTCTGTCGATTATGTTATACAC GATCCTAGTTACTTCGATTCATTATACTGGAGGGGCTTCGCTTTCCCAAACTTGTTTTTC AGAACGGAAGAATCGTTAGTGTCATTGAGAATTGTGCATAAAAATTGGTTAAAAATTGGA CTAGATATTTTTAAAAAGACGCATATGGCTCAGATTACATCGTTTGCGTACTGGAAGTTG GGCGAATTCATAACTGGTGATAAAAATGGGCTGATAAAAGTTTGGAAATATCGTAAAGAT AAGCATTCGGTTTCAGGTAACCTTGAGAACAAAAAAACAATGTTTGGGCACCTATGCGAG CTAAAGGAAATGCGCTGTTATCACGACTACAATACGCTTTTAACCTTAGACATCAGCGGC TTAGTATATGTCTGGGACATGATTAATTTCGAACTAGTGAGACAAATAACAAATGATGCG CAAAAGGTCGCAATATCTCAACATGCAGGGAGCATTATGGTATTGACTAAGAATAACGCC ATTTCGATCTTCAATCTAAATGGACAAATATATACATCAAAGAAATTCGAACCAGCTAAA ATTGTAAGCTCAATTGATTTTTTTGACTTCACTAAGTTAGACGCAGGTTACAGAAAGCAT ATCTATTGGAAAGAGATGGAAATACTACTAGTGGGCTTTGAAGATGGAACTATAGAAATT TACGAGCTCTTTTTGACTTTTCATAATGAATGGGCGATAAAGCTACTGAAACAGCTCTGT ACCGAAAGAGGGAAAGCCATAACTAGCATTAAGGGACAGGGGAAGACATACCTGTCCCAG AAAAGACGCAAGGATACAGCAGAGCCTCATGAGATAGAAGTGATTGCGGGAACATTAGAT GGCAGATTAGCTATTTGGTACTAG >YCS3 3681 residues Pha 0 Code 0 ATGGGGTATCCGCCACCTACACGAAGGCTTGGAGATAAGAAAAGGTACCATTATTCCAAT AATCCTAACCGAAGGCATCCTTCCGCTGTTTATTCCAAGAATAGCTTTCCAAAATCAAGC AATAATGGATTTGTATCTTCTCCTACTGCCGATAATTCAACAAATCCGTCTGTAACTCCC AGTACTGCATCTGTACCTCTTCCTACAGCGGCACCTGGAAGCACGTTTGGTATCGAAGCA CCCAGGCCATCTCGATATGATCCGAGCTCAGTCAGTAGGCCTTCGTCATCATCTTATTCG TCAACAAGAAAAATTGGAAGCCGTTATAACCCAGATGTGGAAAGATCCTCTTCAACCACT AGTTCAACTCCGGAAAGTATGAATACGAGCACCATAACACACACCAATACGGATATCGGA AACTCACGCTATTCTCGAAAAACCATGAGCAGATATAATCCTCAATCTACTAGTTCTACA AACGTTACCCACTTTCCCTCGGCATTATCAAACGCTCCACCGTTTTATGTTGCCAACGGG AGTTCTCGGAGACCTCGATCAATGGATGATTATAGTCCTGATGTAACGAACAAGCTCGAA ACAAATAATGTTTCATCTGTTAATAATAACAGCCCTCATTCTTATTACTCTAGGAGCAAC AAATGGAGATCCATTGGAACGCCTTCCAGACCACCATTTGATAATCATGTCGGCAATATG ACGACCACCAGCAATACTAACTCGATCCATCAAAGGGAACCTTTTTGGAAAGCAAATAGT ACTACTATTTTAAAATCAACTCATTCACAGTCATCGCCTTCCCTTCATACTAAAAAATTT CACGATGCGAATAAATTGGACAAACCAGAGGCTTCAGTTAAAGTTGAAACACCCAGTAAA GATGAGACAAAAACCATATCGTACCATGATAACAATTTTCCACCAAGAAAATCAGTTTCT AAACCTAATGCACCTTTAGAACCCGATAATATCAAGGTTGGCGAAGAAGATGCATTGGGG AAAAAAGAAGTACATAAAAGTGGGCGTGAGATAGCAAAGGAACATCCTACTCCTGTAAAA ATGAAAGAGCATGATGAACTAGAAGCTCGCGCTAAAAAAGTAAATAAAATCAATATTGAT GGAAAGCAGGACGAAATTTGGACGACAGCAAAAACAGTGGCCAGTGCAGTCGAAGTTTCC AAAGAAAGTCATAAGGAACTAACACGCTCTGTTGAAAGGAAGGAAAGTCCAGAAATTAGA GATTATGAAAGAGCATACGATCCGAAAGCCCTGAAAACAGACGCAACAAAGTTGACAGTA GACGATGATAATAAAAGTTACGAAGAACCTCTTGAAAAAGTGGAAGGGTGTATTTTCCCA TTACCAAAAGCAGAAACGAGATTATGGGAATTGAAAAACCAGAAAAGAAACAAAATAATA AGTAAACAAAAGTACTTACTGAAAAAGGCAATTAGGAATTTCTCAGAGTATCCTTTTTAC GCACAGAACAAACTTATACATCAGCAGGCTACCGGACTTATCTTGACGAAAATTATATCA AAGATAAAAAAGGAGGAACATTTGAAAAAAATAAATTTAAAACATGATTATTTCGATCTC CAGAAGAAGTATGAAAAAGAATGCGAAATTTTGACTAAACTGAGTGAAAATTTAAGGAAG GAAGAAATCGAAAATAAACGTAAAGAGCACGAATTAATGGAGCAGAAAAGACGTGAAGAA GGTATCGAAACAGAAAAAGAAAAAAGCTTACGGCATCCATCCTCGTCTTCCTCATCTCGT CGCAGAAATAGGGCTGACTTCGTTGATGATGCGGAAATGGAAAATGTATTGCTACAAATC GACCCAAATTATAAACATTATCAGGCTGCTGCAACAATTCCTCCGCTAATTTTAGATCCA ATCCGCAAATACTCTTACAAATTCTGTGATGTAAATAACTTGGTTACAGACAAAAAGCTT TGGGCGTCTAGAATATTGAAAGACGCCTCTGACAACTTTACTGACCATGAGCACTCTTTA TTTTTGGAGGGTTATTTAATTCATCCTAAAAAATTCGGTAAAATTTCTCACTACATGGGC GGCTTAAGAAGTCCTGAAGAGTGTGTCCTACATTATTATAGAACAAAGAAAACTGTGAAT TATAAACAACTTCTTATCGATAAGAACAAGAAAAGAAAAATGTCAGCCGCTGCGAAGCGC CGCAAGAGGAAGGAAAGAAGTAATGACGAGGAAGTCGAAGTTGATGAGAGTAAAGAAGAG TCAACGAACACGATAGATAAGGAAGAAAAAAGTGAGAACAATGCCGAGGAAAATGTTCAG CCGGTTCTAGTTCAAGGTTCTGAAGTGAAAGGTGATCCATTAGGTACACCGGAAAAAGTT GAAAATATGATTGAAAAGAGAGGCGAAGAGTTTGCAGGTGAATTGGAAAATGCTGAGAGG GTAAATGACTTAAAAAGGGCGCATGATGAAATTGGAGAAGAGAGCAATAAGTCCAGTGTA ATAGAAACCAACAATGAGGTACAAATAATGGCTCCAAAAGGAGGTGTTCGGAATGGTTAT TATCCAGAGGAGACCAAAGAACTTGACTTCAGTTTAGAGAATGCGTTACAGAGAAAGAAA CACAAATCTGCACCAGAGCATAAAACAAGTTATTGGAGTGTTCGTGAATCTCAACTCTTT CCAGAATTGTTGAAGGAGTTTGGCTCTCAATGGTCTCTCATATCAGAAAAACTGGGTACC AAATCTACTACAATGGTAAGGAATTACTACCAAAGAAATGCAGCTCGCAATGGATGGAAA TTACTGGTTGATGAAACCGACTTAAAGCGAGATGGGACTAGTTCAGAATCTGTACAACAA TCTCAAATTTTGATACAACCAGAACGACCAAACATCAATGCCTATAGTAATATTCCTCCT CAACAAAGACCGGCTTTGGGTTATTTTGTTGGACAACCAACTCATGGGCATAATACATCT ATTTCATCTATCGATGGCTCTATAAGACCATTTGGGCCTGATTTTCATCGTGATACCTTT TCTAAAATTAGTGCTCCTTTAACCACTTTACCACCACCAAGACTACCATCTATTCAGTTT CCTCGTTCAGAAATGGCAGAACCTACAGTGACAGATTTGCGTAACAGGCCCTTAGACCAT ATTGACACGTTGGCTGATGCAGCTTCGTCAGTAACAAATAATCAAAACTTCAGTAATGAA AGGAATGCAATTGACATTGGCCGTAAATCGACGACAATCAGCAATCTATTGAATAATTCG GATCGAAGCATGAAATCTTCTTTCCAAAGCGCTTCAAGACACGAAGCACAGCTCGAAGAC ACTCCCAGCATGAACAATATTGTAGTACAAGAAATAAAACCGAATATTACTACGCCAAGA TCGAGTTCTATTTCTGCATTACTAAATCCTGTAAATGGGAATGGGCAATCAAACCCAGAT GGAAGGCCGTTGCTGCCATTTCAGCATGCTATTTCTCAAGGCACTCCTACTTTCCCTTTA CCGGCCCCTCGCACTAGTCCAATAAGTCGTGCGCCTCCAAAGTTCAATTTTTCGAATGAT CCGTTGGCAGCTTTGGCTGCGGTTGCCTCCGCGCCAGATGCAATGAGCAGTTTTTTATCT AAAAAGGAAAATAATAATTGA >GNS1 1044 residues Pha 0 Code 0 ATGAATTCACTCGTTACTCAATATGCTGCTCCGTTGTTCGAGCGTTATCCCCAACTTCAT GACTATTTACCAACTTTGGAGCGACCATTTTTTAATATTTCGTTGTGGGAACATTTCGAT GATGTCGTCACTCGTGTAACTAACGGTAGATTTGTTCCAAGCGAATTCCAATTCATTGCA GGTGAATTACCATTAAGCACTTTGCCCCCTGTGCTATACGCCATCACTGCCTATTACGTT ATTATTTTTGGTGGCAGGTTTTTGTTAAGTAAGTCGAAACCATTTAAATTAAATGGCCTT TTCCAATTGCATAATTTGGTTTTAACTTCACTTTCATTGACGCTTTTATTGCTTATGGTT GAACAATTAGTGCCAATTATTGTTCAGCACGGGTTATACTTCGCTATCTGTAATATTGGT GCTTGGACTCAACCGCTCGTTACATTATATTACATGAATTACATTGTCAAGTTTATTGAA TTTATAGACACCTTTTTCTTGGTGCTAAAACATAAAAAATTGACATTTTTGCATACTTAT CACCATGGCGCTACTGCCTTATTATGTTACACCCAATTGATGGGCACCACATCTATTTCT TGGGTCCCTATTTCATTGAACCTTGGTGTTCACGTGGTTATGTATTGGTACTATTTCTTG GCTGCCAGAGGCATCAGGGTCTGGTGGAAGGAATGGGTTACCAGATTTCAAATTATCCAA TTTGTTTTGGATATCGGTTTCATATATTTTGCTGTCTACCAAAAAGCAGTTCACTTGTAT TTCCCAATTTTGCCACATTGTGGTGACTGTGTGGGTTCAACAACTGCCACCTTTGCAGGT TGTGCCATTATTTCTTCATATTTGGTACTATTTATTTCATTTTACATTAACGTTTATAAA CGTAAAGGCACCAAAACCAGTAGAGTGGTAAAGCGTGCCCACGGCGGTGTTGCCGCAAAG GTTAATGAGTATGTTAACGTTGACTTGAAAAACGTTCCTACTCCATCTCCATCACCAAAA CCTCAACACAGAAGAAAAAGGTAA >RBK1 1002 residues Pha 0 Code 0 ATGGGTATTACAGTAATAGGTTCTCTAAACTATGATTTGGACACATTTACGGATAGATTA CCTAACGCTGGAGAAACTTTCAGGGCTAACCACTTCGAAACACATGCTGGTGGTAAGGGA TTGAACCAAGCTGCGGCCATTGGTAAATTAAAAAACCCCAGCAGCAGATATAGTGTTCGA ATGATTGGTAATGTTGGAAATGATACATTTGGTAAACAATTGAAGGACACTTTATCCGAT TGCGGAGTCGATATCACTCACGTCGGTACTTACGAAGGCATTAATACGGGTACCGCTACC ATATTAATTGAAGAGAAAGCTGGTGGCCAAAATAGGATATTGATTGTAGAAGGTGCTAAC AGCAAGACTATTTATGACCCGAAACAGTTGTGTGAAATTTTTCCAGAGGGCAAGGAGGAA GAAGAGTATGTTGTTTTTCAACACGAAATTCCTGATCCTCTTTCCATTATTAAATGGATA CATGCGAACAGGCCGAATTTTCAGATCGTATATAACCCCTCACCTTTCAAGACCATGCCT AAGAAAGATTGGGAGTTGGTAGACCTTTTGGTCGTTAATGAAATTGAGGGTCTTCAAATC GTGGAAAGTGTATTTGATAATGAACTTGTTGAAGAAATAAGGGAGAAGATAAAGGACGAC TTTTTAGGAGAATATCGTAAAATTTGTGAGCTTTTGTATGAAAAACTCATGAATCGAAAG AAAAGAGGAATTGTGGTTATGACTTTGGGTTCGAGAGGGGTGCTTTTCTGTTCGCACGAA AGCCCTGAAGTACAATTCCTTCCGGCTATTCAAAATGTTTCGGTTGTTGATACTACAGGA GCTGGAGATACTTTCCTGGGCGGTTTGGTTACTCAATTGTATCAAGGAGAGACCTTGTCT ATGGCTATAAAGTTCTCTACATTAGCTAGTTCATTGACCATTCAAAGAAAAGGTGCTGCT GAAAGCATGCCACTGTATAAAGATGTTCAGAAAGATGCATAA >PHO87 2772 residues Pha 0 Code 0 ATGAGATTCTCACACTTTCTCAAATACAACGCTGTCCCTGAATGGCAGAATCATTACCTA GATTATAACGAATTGAAAAATTTGATCTACACATTACAGACAGATGAATTGAAACAAGAA ACGCCAACCGGTGACTTAAACGATGACGCTGACTCTCAGACTCCAGGTCCAATCGCTGAT ATAGAAAGCAACATAGCTGCAGGAGAACCATCTCCATCGAAAAGAAGATTTACACATAAA CTCAAGCGTAAGCTCTTTGGTTCTAAAACACCTTCAGGAAGCAAAAGGGGAGACTCCGAC GAAAAGGCCATAGATGGGAACAATATTAACGAGGAAACAATTGAGTTAGACGAGTTATCT CCTCAAGGGAAAACCACCTCTTTCAATAAGAATTTTATACGTAAGAAATTCTTTGAATCA CGCAGCTCATCTGTGAGTAGCGAGGGAAAGACGCTCTTCAGTTCTTATGATACATTCGTA ACTAACCTGAGCGACGAGAAATTGAAAGTAGATGATTTCTACAAAAGAATGGAAGCTAAG TTCTATGAAAGATTTGACCACTTGATTAATGATTTGGAGAAGGAAGGCATTGTAACAAGA TTGAATGAAACTTTCAATCCTGAAATTCAAGCATTGCCTCCTTTAAGAGAAATTATTTCT GGTACATCAGAGACACATTCATCTAATAACCCATTTGAAATACACTCTTCAAACATCGAC AGTGAATTGAGAAATAGGTTTGATTACAGCGAAGAAGAAATGGATGAAGATGATGACGTT GACGTGTTTGCTGACACTACCGACAATACCGCCCTCTTGAATTATTCGCAATTTAACATT AAATCTCAGAAAAAATCATTATTAAAACAGACAATAATAAATCTTTACATAGACCTTTGC CAGTTGAAATCTTTTATCGAATTGAACAGAATGGGTTTCAGTAAAATTACTAAGAAGTCT GATAAAGTATTGCACATGAACACTAGGCAAGAATTAATAGAAAGTGAAGAATTTTTCAAA GACACCTACATCTTCCAGCATGAAACTTTAAGCAGTTTAAACAGTAAAATTGCACAACTT ATTGAATTTTATGCTGTTCTCATGGGTCAGCCTGGGAACGTAGATTCATGCAAGCAAGAG TTAAAGTCGTACCTGCACGACCACATTGTTTGGGAAAGAAGCAACACATGGAAAGACATG TTGGGCCTCTCTTCGCAAAATAACGATATAATAACTATTGAAGATGAAGCTGAGAAACTT ATGCAAGAAAAGCTTCAAATTGAATATTTCAAGTATCCATTGCCTAAGCCAATTAATTTG AAGTTTACTAAAATTGAAAATTTGGCAGTTCCTAAGCTATTTTTTGGGAAAAGAGCAATG AAAATAGGCTTCATTATCATTGTCACAGGTGTTTTGTTGGGTGTTAAAACTTTCAATGAC CCTGTCGAACACCGGTGTATGGCATTGGTAGAATGCTGTGCTTTCTTATGGGCTAGTGAA GCCATTCCATTACACATCACAGGTTTATTGGTTCCCCTTCTAACTGTCCTTTTTAGGGTA CTAAAAGACGATGACGGTAAGGTAATGGGAGCAGCAGCTGCCTCTACAGAAATCTTAGGT ACAATGTGGTCGTCAACAATTATGATTTTATTAGCAGGTTTCACATTGGGTGAAGCCTTG TCGCAATATAACGTTGCGAAAGTTTTGGCATCGTGGTTATTGGCCCTTGCAGGTACCAAG CCAAGAAATGTCCTTTTAATGGCAATGAGTGTTGTATTCTTTCTTTCGATGTGGATTTCC AACGTTGCCTCCCCAGTATTGACATATTCTCTATTAACACCCTTACTAGATCCGCTGGAC TACACTTCACCGTTTGCTAAGGCATTAGTCATGGGTGTTGCACTTTCGGCAGATATTGGT GGTATGGCTTCACCTATTTCTTCGCCACAGAATATCATCTCCATGCAGTACTTAAAACCT TATGGAATCGGCTGGGGGCAATTTTTTGCTGTCGCTCTGCCTACAGGTATTCTATCGATG CTGTGCTCCTGGGCCTTGATGATACTCACCTTTAAAATAGGCAAAACTAAACTGGAAAAA TTTAAACCAATAAGGACCAGATTTACTATAAAGCAATATTTTATCATCATTGTAACTATT GCTACTATTCTTCTATGGTGTGTAGAGTCACAAATAGAAAGTGCTTTTGGATCGTCCGGT GAAATTGCAGTAATACCGATAGTCCTGTTTTTTGGTACAGGTCTACTATCAACAAAGGAT TTCAACACATTCCCTTGGTCAATTGTTGTTCTTGCTATGGGTGGTATAGCCCTTGGTAAG GCAGTTTCATCTTCAGGCTTGTTGGTAACTATTGCAAGAGCATTACAAAAGAAAATTCAG AACGATGGTGTTTTTGCTATCTTATGTATTTTCGGTATTTTAATGTTAGTTGTGGGCACT TTTGTCTCACATACTGTGTCAGCAATCATCATTATTCCCTTGGTGCAAGAAGTTGGTGAC AAATTATCCGATCCAAAGGCAGCTCCAATTCTTGTGTTCGGTTGCGCCTTGTTAGCCTCA TGCGGTATGGGGTTGGCTTCATCTGGATTTCCAAACGTTACTGCTATTTCTATGACCGAT AAAAAGGGTAATAGATGGCTAACTGTAGGCGCTTTTATCTCCAGAGGTGTTCCTGCTTCG TTGTTAGCGTTTGTCTGCGTAATTACTCTCGGTTATGGTATTAGTTCTTCCGTCTTAAAA GGTAGCACTTAA >BUD5 1617 residues Pha 0 Code 0 ATGAGAACGGCCGTACCGCAGTTGCTGGAAGCAACTGCCTGTGTCTCTAGAGAATGCCCC CTCGTCAAAAGAAGTCAGGACATAAAAAGAGCAAGAAAACGTCTACTCAGTGACTGGTAT AGGCTCGGCGCTGATGCAAACATGGATGCCGTATTATTAGTTGTTAACTCCGCCTGGAGG TTTCTGGCCGTCTGGCGACCCTTCGTAAACTCAATCCAACATGCAACTCAGGAATTGTAT CAAAATATCGCCCATTACCTTCTTCATGGCAACGTAAATATACAGAGGGTCACAGCACTA ATACAGCTCGTAATGGGACAGGACGATTTACTTTTTAGTATGGATGATGTTCTACAAGAG GTCTTCAGAATACAGCTCTATTTGAATAAGATGCTGCCGCACAACTCTCACAAATGGCAA AAGCCATCCCCCTTTGACTCCGCAAACTTACTACTTAACTTCAGAGACTGGACAACTGAC AATGCTCTCCTCCAAGAGTTGCTACTATCCTATCCCACAATTAATAAAAACAAACACAAA AATCACTCCGTCCCTCGTCTAATACAAATCTGGGTAGAGTCTTATTGGCAAGATAGTGAG ACAACATTAAAAGATATCCTCAATTTTTGGTACAGTCACTTGGCTGAATATTATGAATAC CAAGAACTGTTTGCAGACATAGTTCAGCTGTTTATAAACAAAAAAAGAACGAGGCAATTG AAGATTCATTACATTGGTCTAACTGATAAGGAAATCGAAGAAAATAAACCGCCCCTGGAC TACGAAAACTTATTTCTCCAATACGAGATAGACAAAACGAACGCAAATGATGAATTGTGC GGTGCAACTGACCTCAGTGATTTACTTTTCCAATGGAAACAGGGTGAACCTCTAGAAGTC GAAGCCTTCGCTCTAAACGTATCTCCATGGTCACTTGCAAAGACATTGACTCTCTTAGAA TCTTCTCTTTACTTGGATATTGAAACAATAGAATTCACAAGACATTTCAAACACAACGAT ACAACAATTGACTCCGTGTTTACGCTTTCCAACCAGTTATCGTCCTACGTTCTTGAGACA ACTTTGCAGCAAACGCACACCATTTCCTACTGGTTACAAGTTGCACTTGCTTGTCTATAC TTACGAAACTTAAACTCACTTGCTTCAATCATTACATCATTGCAAAATCATTCAATAGAA AGACTATCTCTCCCGATAGATGTTAAATCAGACCACCTTTTTCAGCGCCTAAAAGTCGTC GTACATCCAAACAACAACTACAACGTTTATAGAAGAACAATTAAACATATTTTCCACAGT CAGCTTCCTTGTGTACCTTTTACATCACTGCTTATCAGGGACATTACCTTCATAAGAGAC GGAAACGATACATTCACTAAAGATGGTAATAACGTGAATATGCAAAAGTTCAACCAAATC ACAAAGATAGTCGCTTTTGCGCAATATTTACAACAAAAGCAATATGAAGATATACACTGT TCAAATACTACTGCAAGAAGCTTATTAGGGGCTATGATAAAGGTGCACACTTTATATAAC GACAACAAAGACAGGGCGTATCAAGTCAGTATAGCTAAGGTTCCAAGGCTTACCTAA >MATALPHA2 633 residues Pha 0 Code 0 ATGAATAAAATACCCATTAAAGACCTTTTAAATCCACAAATCACAGATGAGTTTAAATCC AGCATACTAGACATAAATAAAAAGCTCTTTTCTATTTGCTGTAATTTACCTAAGTTACCA GAGAGTGTAACAACAGAAGAAGAAGTTGAATTAAGGGATATATTAGGATTCTTATCTAGG GCCAACAAAAACCGTAAGATTAGTGATGAGGAGAAGAAGTTGTTGCAAACAACATCTCAA CTCACTACTACCATTACTGTATTACTCAAAGAAATGCGCAGCATAGAAAACGATAGAAGT AATTATCAACTTACACAGAAAAATAAATCGGCGGATGGGTTGGTATTTAATGTGGTAACT CAAGATATGATAAACAAAAGTACTAAACCTTACAGAGGACACCGGTTTACAAAAGAAAAT GTCCGAATACTAGAAAGTTGGTTTGCAAAGAACATCGAGAACCCATATCTAGATACCAAG GGCCTAGAGAATCTAATGAAGAATACCAGTTTATCTCGCATTCAAATCAAAAACTGGGTT TCGAATAGAAGAAGAAAAGAAAAAACAATAACAATCGCTCCAGAATTAGCGGACCTCTTG AGCGGTGAGCCTCTGGCAAAGAAGAAAGAATGA >MATALPHA1 528 residues Pha 0 Code 0 ATGTTTACTTCGAAGCCTGCTTTCAAAATTAAGAACAAAGCATCCAAATCATACAGAAAC ACAGCGGTTTCAAAAAAGCTGAAAGAAAAACGTCTAGCTGAGCATGTGAGGCCAAGCTGC TTCAATATTATTCGACCACTCAAGAAAGATATCCAGATTCCTGTTCCTTCCTCTCGATTT TTAAATAAAATCCAAATTCACAGGATAGCGTCTGGAAGTCAAAATACTCAGTTTCGACAG TTCAATAAGACATCTATAAAATCTTCAAAGAAATATTTAAACTCATTTATGGCTTTTAGA GCATATTACTCACAGTTTGGCTCCGGTGTAAAACAAAATGTCTTGTCTTCTCTGCTCGCT GAAGAATGGCACGCGGACAAAATGCAGCACGGAATATGGGACTACTTCGCGCAACAGTAT AATTTTATAAACCCTGGTTTTGGTTTTGTAGAGTGGTTGACGAATAATTATGCTGAAGTA CGTGGTGACGGATATTGGGAAGATGTGTTTGTACATTTGGCCTTATAG >TSM1 4224 residues Pha 0 Code 0 ATGATGTCCTTTTCCAAAAACGCCACTCCTAGAGCCATTGTTAGTGAATCTAGCACTTTG CATGAGATGAAGTTTAGAAATTTTAGAGTTGCCCATGAAAAAATCTCGTTGGATATAGAT CTAGCTACTCACTGCATTACCGGTAGCGCTACTATAATAATCATTCCGTTGATCCAAAAC CTAGAATATGTAACTTTTGATTGCAAGGAAATGACTATTAAAGATGTTCTGGTCGAAAAT CGTCGATGTGATCAATTTATTCATGACGACCCACTTCAAACAAATTTGAATGGATTGACT TCACAAAATGTATTATACAGCGACAATTCCATTGAACAGTCACATTTTTTGAGATCTAAG TTTGCTAGCTTGAATGAATACCCAGAAACGGACTCTAAATCCCAGTTAACTATAAAAATA CCATCTTCCATCAAAATATCTTTGGAGGACGCCAATGCATTAAGTAATTACACTCCGATT ACTCCTTCAATTAAGACTACCCCTGGGTTTCAAGAATCTGTTTTCACTCCAATTACATTA CAAATTGAATATGAAATCAGAAACCCAAAGTCGGGTATTAAATTCGATACTGTGTATGCT GACAAGCCCTGGTTATGGAACGTTTACACTTCAAATGGTGAGATTTGCAGTTCTGCATCA TATTGGGTCCCATGTGTCGATTTGCTTGATGAAAAATCTACATGGGAGTTAGAATTCAGC GTACCGAGATTGGTTAAAAATATAGGTACTTCGAAATTAATCGGACAAAATGGAGAAGAG AGTGAAAAAGAGAAGGAGGATACGCCTGAGCACGATGAAGAGGAAGAGGGGAAGCCGGCA AGAGTTATCAAAGACGAAGATAAGGATTCTAACTTGAAAAATGACGAAGAAGGCAAAAAT AGTAAAAGCAAAGATGCACAAGATAATGATGAAGAAGAAGAGGAAGGCGAAAGTGACGAA GAGGAAGAGGAAGGGGAAGAGGAAAGGCGGAATATTGAGGAAAGCAACAATCCGAGTTTG AGGGATGTGATTGTGTGTTGTTCAGAATATTCAAATATTAAAGAACTTCCGCACCCGATT GATTTGACGAAAAAAAAATGCATATTTCAGATAATTAATCCTGTGGCTCCACATCACATT GGTTGGGCTATAGGCGCCTTTAATTCATGGTCTTTACCTTTGATATCACCTCCAAGTGTT GATGCCGAGGACGAAGTAGAGGAAGACAAGTTGAGAGAGAATGTTGTGGACAATGTTAAC GATACTATGGATGACGACATTGGTTCGGATATTATACCCATTCAAATTTTCACACTTCCG ACGCAGGAAACAGATGAGTTAACAGTTATAAATTCGACAGTTGTCTGCCAAAAAATTATA GATTTCTACTCGAAAGAATTTGGGTCTTATCCTTTCACTTGTTACTCTATGGTGTTTTTA CCTACCGCACCTTCTAAGCATATGGATTTTGCAGCATTAGGCATTTGTAATACCAGATTA TTGTACCCTCTAGAAGTTATTGATAAAGCATTCAGTACTACGAATGAGTTAGCATGGGCA CTTGCTAACCAATGGTCTTGTGTGAATATAACTCCTTTAGATATGAACGACTACTGGTGC TGTCTTGGTATTGCTGGTTATATGGTGTTTCAGGTAACCAAAAAATTAATGGGTAATAAC ACGTATAAATATCAATTAAAGCGTAATAGTGAGGCGATTGTGGAACAAGACTTCGAGAAA CCGCCTATTGGGAGCACTTTTACCGGCAGTTCTAGGCCAATATCTTGGTCTTCTAAAGAT TTGTCCTTTATACAATTGAAGGCACCGATGATACTACACATACTTGACAGAAGGATGACT AAAACAGAACGATCTTTCGGTATGTCTCGAGTATTACCTAAAATTTTCCTTCAAGCTATG TCTGGTGATTTACCGAATAATTCGTTGACTTCATCGCATTTTCAACATGTTTGCGAAAGA GTTAATAAAAGTAAATTAGAGAATTTTTTCAACGAATGGGTATATGGGTCTGGGGTACCC ATATTACGTGTCACCCAAAGATTTAATAGGAAGAGGATGGTTATAGAACTGGGTATAAGG CAAGTTCAAGATGAAGAACTTGGCCACGAAAAAGTGGTAGGGGAGGAAGGATTTTTCAAA AGTGCACTAGACCACTTAGAACATCCAGATTTGAACCGAACCGAATGCTTCACGGGCTCG ATGACTATAAGGATCCATGAACACGATGGTACTCCGTATGAGCATATTGTGGAAATCAAA GATACATTCACAAAAATAGATATTCAGTACAATACAAAGTACAGAAGATTAAGGAAAAGA GGTGGTGGTGCAAATGATGAAAATGGTGTTGAAAACAATAATGAGGAGAAGCCTATTGTT GTGGATGTGAATTGTCTAGGAAATGTATACATGTCGCCCGAAGAGTGTTCCCGATTCAGT TTGACGGAATTTAATCGTACGTCTGAGAGTAATGAATTGCTTAAGCAAAACGAAGCATTT GAGTGGATACGCATAGACTCTGATCTGGAATGGATTTGCCAAATGCACATTAATCAGCCG GATTACATGTTTTCTTCTCAGTTGAGACAAGATGGGGACATAGAGGCCCAACTAGAAGCC ATACGATATTATGAGGACGTCGTTGTTAATGGTGGTGTGAAATCACTTGTTTATTCAAGT ATTTTGTTTAGAACGGCGATCGACGAGCGTTACTTTTTTGGCATAAGACTCGCGGCGTGC GAAGCGCTTAGTAAATACGTATATGATCCGGATTTTACTGGCGGTGTTAAGCATTTAATT CAGATTTTTCAGATTTTGTTTTGCCTAGAAGACTCTAATATTCCAAAGAGTAATAACTTT GAGAATCCTAAGTTGTATTTCTTACAGTGTAATATTCCCAAATATTTGGCTAAAGTGAAA AATGAAAATGGTAAATGTCCAAAATTGGTGAAGCAATTTTTACTGGATATTCTTGTTTAT AATGAGAATGGTGAAAATAAATACAGTGATGATGCGTACGTCCGCAGCTTGATTGAAAAT GTTGTTAAAGTTGCTTTAAATGAGTATAAAGATAAAGCATATATGGAAAAAGTTAAGACT CAGTTATTGAGGTACGAAAATTTGGTGAATTGGCTTTCATCATACGAGTCTTTGATTAAG ACTACTATCATGTATGCTAAGTACAAATTGCATAAAGTGGGTGCTTATGACTTTACGGAA TTGACAGGAATGATAATGCATACATTAACATTAGGTATAAATAACGGAGATATTTCCAGG GAAAGCTTTCAGAATGAGTTTTTAATGGTTTTGAAAATCATGCTTTTAGAAGGTGGTTTA AAAAACAAGGATGCCCTTGTTTTGTTTACTGAAATACTTTGCTTCCATGAGGATTCTTAT ATTAGGGATAAAAGTGTTGATGTGCTTTCTGAATGTGTAAATCTAGTTGTTATGGATGGT AGTTTGGATACCATAAGTGACGATATTAAGTCCTCCGTCCAATCTGTGCACAATGAAGTT AAAAATATAAAAAGTGAGGATGATATTGAGTTGTTTTTAAGTGGTCATTACGTCGATGAT ATGAAAATAAAAATAGAAAAGATTGGCCGTCAAAATATTAGTGGGTTAATACAAATATGC CGAGATATGTTTAAAGGGTATAGCCCTTTGAAGATATTACTCTGGGATGTTTTGAATTTA CCTGTTCTTAGCTTGTACCAGAGGAAGCAAATACATGATCTTGTTAGGGTGATGTACACC CTAATCAACAGTTTTGTAGTTAGATTGGAAACACCAAGGGAGAGAAGACTTGTGGCGAAG ATGAATAGTAATGAAGAAGGTAAACTTGATATTGTTATAAAGCGTGAAAGTATCCTAAAA GTACATATTAAAAAGGAAGTAACCTCTACTGTGGAGGCACCCAAGAAGGCGAATAAGATA AAGATAAGTTTGAAAGGTGATAAACCTGTTAGAAAAGTGGAAAAACAAATTGTGAAGCCG AAGGTAACTAGCAAACAAAGGAAAGTCAAAAGTCATGTGAACCGCATGGGCAGTTTACCT TTACGGTTTGTTAAGATCCAACAACAACCTAGAGTAATGGTGCATTTGTCATCCGTCCCG TATAGCCAATTCGTTCAAATTACAAAAGTCACATCAAGATCGTTTATGGTTAAGATAAGA ACAAAGAATGATGCTAAGAATTGA >YCT5 1476 residues Pha 0 Code 0 ATGAAGCCACAGTGCATACTCATCTCTTTGCTGGTCAACCTCGCATACGCAGAGGAGTAT TTGGTGAGGTTCAAAAATCCCACAGCATTCCAACAATTCACTTCGAATTCCAACAGGTCA TGGAGACAGTTCATCGACAACAAAATTGAGAAGAAATTCTCCATCGGATCCTTCCGCGGC GTGACCATGAACCTGTCCAAGAACTTAGTGAACAAGCTGAAGAAAAGCCCACTGGTGGCT GATATTGTGCCCAACTTCAGGTTCGAAGCTTTTGAAGGCGACAGTGTAAATAGCGCCGAG TCGAGTTATACGTTTAACGCTACCGCCAAATACTCGTACGAAGACGTCGAGGAAGAGCAA AATATAACGTATCAACCAGACGCACCCCGTCACTTGGCCCGGATTTCCCGCCACTACCAA CTCCCATTCGACGTTGGGGACAAGGACCGCTACAAAAGCTGGTTCAATTACTACTATGAA CACGACTATCAAGGTCAAGACGTCAACGCCTATATCATGGATACGGGTATCTTCGCGGAC CATCCGGAATTCGAAGACAGAGTCATCCAGGGGATTGACTTGACCAAAGAAGGGTTTGGC GACCAGAATGGCCACGGAACGCACGTGGCGGGACTCGTAGGTTCCAAAACGTATGGAGCG GCAAAGAGGGTCAATCTTGTGGAGGTCAAAGTCTTGGGCAAAGACGGGTCTGGCGAGGCC AGTAACGTTCTTAGTGGTCTGGAGTTCATCGTGGAACATTGCACAAAGGTCAGTCGCCCA CAGGGTAAAAAATGCGTGGCCAATCTAAGTCTAGGGAGTTTCAGGAGCCCCATAATCAAC ATGGCAGTGGAGGGGGCCATTGAAGAAGGTATTGTATTTGTTGCCGCGGCGGGGAACTTC AATTTAGACGCCTACTGGGCCTCACCTGCGTCTGCAGAAAACGTTATCACCGTAGGGGCC TTTGATGACCACATTGACACGATTGCCAAGTTCAGCAATTGGGGGCCCTGTGTAAACATC TTTGCCCCAGGCGTGGAAATTGAGTCGCTATCTCATCTGAACTACAACGACACTTTAATT TTGTCAGGTACATCTATGTCGACGCCCATTGTCACCGGAGTTGCAGCGATCCTACTCTCG AAGGGAATTGAGCCTGAAATGATAGCACAGGAGATTGAGTATTTGTCCACGCGTAATGTT TTCCATAGAAGAACGTTGTTTTTCAAGCCTTCTACGCCAAACCAGATTCTTTACAACGGC GTCGATAAACTGGACGATCCATATGACGACGAAACGTTCCCTCGATTGAACATAGAGGCA ATTGCTAAGGAACTGGAGGAGTACAATGCCACTTTACAAACTCCTATGTCTGAGAATCTT CAATCTGGTTCAAAACTGTGGGGTTGGAATAACGATGTCACACTACCTCTTGGTGAGATT CGATTGAAGAGGCGTGATTTTATGAAAAATTTGTAG >PETCR46 510 residues Pha 0 Code 0 ATGTGGAGCAGGAACGTCAGATTGCTTGGATCATGGACAAGGTCCTACATGGTCCCCGCC ACCAAGAGAAAAACCATCCCCGTGTACCCACCTGTGCAGCGCATAGCTTCGTCGCAGATT ATGAAGCAGGTGGCCCTCTCAGAAATAGAGTCTCTGGATCCCGGGGCCGTTAAGAGGAAG CTCATCAGTAAAAAGAACAAGGACCGCTTGAAGGCAGGCGACGTGGTCCGGATTGTGTAC GACTCGTCCAAGTGCTCGTACGACACCTTTGTTGGCTACATCCTTTCCATAGACCGCAAA CAACTGGTGCAAGACGCCTCGTTGCTGTTGCGGAACCAGATAGCCAAGACGGCCGTCGAG ATTAGAGTGCCATTGTTTTCGCCGCTGATCGAGAGAATCGACTTGCTAACCCCCCACGTC TCGAGCAGACAAAGAAACAAACACTACTACATCAGAGGTACAAGGTTGGATGTCGGCGAC CTCGAGGCAGGTCTAAGAAGAAAGAAATAG >YCT7 828 residues Pha 0 Code 0 ATGTCACGTCCTGAGGAGTTGGCACCACCGGAGATTTTCTATAATGATAGCGAAGCACAC AAGTACACGGGTTCGACCAGAGTGCAGCATATCCAGGCGAAGATGACGCTGAGGGCGTTG GAGCTTTTGAATCTGCAGCCGTGCAGTTTCATTCTGGATATCGGGTGCGGGTCCGGACTG TCTGGGGAGATTTTGACGCAGGAGGGAGACCATGTGTGGTGTGGTTTGGATATATCGCCC AGCATGCTTGCGACCGGTCTTAGTAGAGAGCTGGAGGGCGACTTGATGTTGCAGGATATG GGCACCGGGATACCGTTCCGGGCGGGCTCGTTTGACGCGGCTATTAGTATCAGTGCGATC CAATGGCTGTGCAATGCGGACACTTCATACAACGATCCTAAACAGCGGTTGATGAGGTTT TTCAACACATTGTATGCTGCACTGAAGAAGGGAGGGAAATTTGTGGCCCAGTTCTACCCG AAAAACGACGACCAGGTGGACGACATACTGCAGTCTGCCAAGGTGGCAGGGTTCAGTGGC GGGCTTGTGGTGGACGACCCAGAGTCTAAAAAGAATAAGAAGTACTACCTTGTGTTGAGC AGTGGGGCCCCACCGCAGGGGGAGGAGCAGGTGAATTTGGACGGTGTGACCATGGACGAG GAGAACGTCAACTTGAAGAAACAACTGCGCCAGCGCTTGAAGGGAGGCAAAGACAAGGAG TCTGCCAAGAGTTTCATTCTAAGAAAGAAGGAGCTCATGAAAAGACGTGGGAGGAAAGTT GCGAAGGACTCCAAGTTCACCGGGAGGAAAAGAAGACACAGGTTCTAG >YCT9 447 residues Pha 0 Code 0 ATGGCGCTGTCCAGGAGCGTGGGGCGAGGATCAAAACTCACGTCCCCAAAAAACGACACA TACTTGCTAGCATCCTTTCGGTGGAACCTCGACCGAGACTTGCTCTTCAGGTGTGAAAGG TACTTTTGCATGTGGGCGTCCACAGGGTACTCCTCCTCCTGCTCCTGCTTCCCTGCCACA CGTTCCGCCTCAGTCGACTCCACTCCTTCAGTCGACTCCACTGGCTCCACCAGCGACGTG GTAGACGACCGTGGCGAAACCTCCATGGACTCCTGTGGCAGGATCACGTTATCGTACGTG ACCGAATGCCGTTTGTTGGCTTCTGCGGAATTGAGTCTGCGGATCTTAAGAAACTCTTCG TCTTGCAACAAATCCTTAGTCTCCGTCATTCTTGCAATCTGTTTTGGCGCTCTTGCTGCA AGCCGTGCTGAACAACCACCTGCGTGA >ARE1 1833 residues Pha 0 Code 0 ATGACGGAGACTAAGGATTTGTTGCAAGACGAAGAGTTTCTTAAGATCCGCAGACTCAAT TCCGCAGAAGCCAACAAACGGCATTCGGTCACGTACGATAACGTGATCCTGCCACAGGAG TCCATGGAGGTTTCGCCACGGTCGTCTACCACGTCGCTGGTGGAGCCAGTGGAGTCGACT GAAGGAGTGGAGTCGACTGAGGCGGAACGTGTGGCAGGGAAGCAGGAGCAGGAGGAGGAG TACCCTGTGGACGCCCACATGCAAAAGTACCTTTCACACCTGAAGAGCAAGTCTCGGTCG AGGTTCCACCGAAAGGATGCTAGCAAGTATGTGTCGTTTTTTGGGGACGTGAGTTTTGAT CCTCGCCCCACGCTCCTGGACAGCGCCATCAACGTGCCCTTCCAGACGACTTTCAAAGGT CCGGTGCTGGAGAAACAGCTCAAAAATTTACAGTTGACAAAGACCAAGACCAAGGCCACG GTGAAGACTACGGTGAAGACTACGGAGAAAACGGACAAGGCAGATGCCCCCCCAGGAGAA AAACTGGAGTCGAACTTTTCAGGGATCTACGTGTTCGCATGGATGTTCTTGGGCTGGATA GCCATCAGGTGCTGCACAGATTACTATGCGTCGTACGGCAGTGCATGGAATAAGCTGGAA ATCGTGCAGTACATGACAACGGACTTGTTCACGATCGCAATGTTGGACTTGGCAATGTTC CTGTGCACTTTCTTCGTGGTTTTCGTGCACTGGCTGGTGAAAAAGCGGATCATCAACTGG AAGTGGACTGGGTTCGTTGCAGTGAGCATCTTCGAGTTGGCTTTCATCCCCGTGACGTTC CCCATTTACGTCTACTACTTTGATTTCAACTGGGTCACGAGAATCTTCCTGTTCCTGCAC TCCGTGGTGTTTGTTATGAAGAGCCACTCGTTTGCCTTTTACAACGGGTATCTTTGGGAC ATAAAGCAGGAACTCGAGTACTCTTCCAAACAGTTGCAAAAATACAAGGAATCTTTGTCC CCAGAGACCCGCGAGATTCTGCAAAAAAGTTGCGACTTTTGCCTTTTCGAATTGAACTAC CAGACCAAGGATAACGACTTCCCCAACAACATCAGTTGCAGCAATTTCTTCATGTTCTGT TTGTTCCCCGTCCTCGTGTACCAGATCAACTACCCAAGAACGTCGCGCATCAGATGGAGG TATGTGTTGGAGAAGGTGTGCGCCATCATTGGCACCATCTTCCTCATGATGGTCACGGCA CAGTTCTTCATGCACCCGGTGGCCATGCGCTGTATCCAGTTCCACAACACGCCCACCTTC GGCGGCTGGATCCCCGCCACGCAAGAGTGGTTCCACCTGCTCTTCGACATGATTCCGGGC TTCACTGTTCTGTACATGCTCACGTTTTACATGATATGGGACGCTTTATTGAATTGCGTG GCGGAGTTGACCAGGTTTGCGGACAGATATTTCTACGGCGACTGGTGGAATTGCGTTTCG TTTGAAGAGTTTAGCAGAATCTGGAACGTCCCCGTTCACAAATTTTTACTAAGACACGTG TACCACAGCTCCATGGGCGCATTGCATTTGAGCAAGAGCCAAGCTACATTATTTACTTTT TTCTTGAGTGCCGTGTTCCACGAAATGGCCATGTTCGCCATTTTCAGAAGGGTTAGAGGA TATCTGTTCATGTTCCAACTGTCGCAGTTTGTGTGGACTGCTTTGAGCAACACCAAGTTT CTACGGGCAAGACCGCAGTTGTCCAACGTTGTCTTTTCGTTTGGTGTCTGTTCAGGGCCC AGTATCATTATGACGTTGTACCTGACCTTATGA >RSC6 1452 residues Pha 0 Code 0 ATGGTAACACAGACCAATCCGGTCCCTGTTACATATCCAACGGATGCTTATATCCCCACG TATCTGCCCGATGATAAGGTCTCCAATCTGGCAGATTTGAAAAAATTGATAGAAATGGAT TCCAGACTAGATTTGTATCTGACAAGAAGGAGGCTGGATACGTCCATCAATTTACCTACA AACACCAAGACCAAGGACCATCCCCCCAATAAAGAGATGCTGAGGATTTACGTCTACAAC ACTACGGAAAGCAGCCCTCGCAGCGATTCTGGCACCCCAGCGGACTCAGGCAAGACTACA TGGACACTGAGAATAGAAGGTAAGCTTCTGCACGAGTCCGCAAACGGAAAGCACCCATTT AGTGAGTTTTTGGAAGGTGTCGCGGTCGACTTTAAAAGACTGAAACCGCTGGGCATGGGC AAGAAGAGGAAACGCGATTCGTCATTGAGCCTTCCTTTGAATCTGCAACAACCCGAATAC AATGATCAAGATAGCACCATGGGCGATAACGACAACGGCGAGGATGAGGACAGTGCAGAG GCAGAATCCAGGGAGGAAATTGTAGACGCACTGGAATGGAACTACGATGAAAACAACGTT GTGGAGTTTGATGGTATCGACATCAAGAGGCAAGGCAAGGATAATTTGCGATGCAGTATA ACCATCCAGTTGAGGGGTGTCGACGGTGGAAAAGTACAGTACTCGCCCAACTTAGCTACC TTGATAGGTATGCAAACGGGCTCCGTTAATGACGCGGTTTATTCGATCTACAAGTACATT TTGATCAACAATCTGTTTGTTACGGAACAAACAGAGGCTCAAGATGGTTCCAACGATGCC GAAGACAGCAGTAACGAGAATAACAATAAAAACGGTGCTGGTGACGATGATGGCGTCGAG GGAAGTACTCCAAAGGATAAGCCCGAATTGGGTGAAGTGAAGCTAGATTCACTCTTACAA AAGGTATTGGATACAAACGCCGCGCACCTCCCCTTGATGAATGTTGTGCAAACCGTGAAC AAACTGGTATCACCCCTACCGCCCATCATCCTAGATTATACAATTGATCTTTCCAAAGAT ACCACCTATGGTGCTACCACCTTGGATGTAGATGTGTCGCACATTCTCCACCAGCCTCAA CCCCAGCCAAATTTACAAAAAGAGGAAGAAACAGATGCTGAAGACACAGCAAAACTACGT GAAATCACAAAGCTTGCCTTGCAGTTGAACTCTAGTGCTCAAAAATACCAGTTTTTCCAC GAACTGTCTTTGCATCCAAGAGAAACGCTGACTCACTACTTATGGTCTTCCAAGCAAAAC GAGCTTGTGCTGCAGGGCGACCAATACTTCAATGAAGATGCTGCAAGAACGAGTGACATA TACAGTAACAACAACAATGACAGGTCACTAATGGGCAATATCTCACTACTGTACTCCCAA GGAAGACTATAA >THR4 1545 residues Pha 0 Code 0 ATGCCTAACGCTTCCCAAGTTTACAGATCTACCAGATCCAGCTCTCCAAAGACAATCTCT TTTGAAGAGGCTATCATTCAAGGTCTGGCCACTGACGGTGGTCTTTTCATTCCACCAACT ATTCCACAAGTGGACCAAGCCACTCTTTTCAATGATTGGTCAAAGCTCTCCTTCCAAGAC TTAGCCTTTGCTATCATGAGACTATACATTGCCCAAGAAGAGATTCCAGATGCTGATCTA AAGGACTTGATCAAGAGATCTTATTCTACTTTCCGTTCTGATGAAGTCACCCCCTTGGTG CAAAACGTCACTGGTGACAAGGAGAATTTGCACATTTTAGAATTATTCCACGGTCCTACC TACGCTTTCAAAGACGTTGCTTTACAATTTGTCGGTAATCTTTTTGAATACTTCTTACAA AGAACCAACGCCAATTTACCTGAAGGCGAGAAAAAGCAAATCACTGTGGTCGGTGCTACT TCCGGTGACACTGGTTCTGCAGCCATCTACGGTTTAAGAGGCAAAAAGGACGTTTCCGTT TTCATCTTATATCCAACCGGTAGAATTTCCCCAATTCAAGAAGAACAAATGACCACCGTT CCAGATGAAAACGTCCAGACTTTGTCTGTTACCGGTACTTTCGACAACTGTCAAGATATC GTCAAAGCTATTTTCGGTGACAAAGAATTCAACTCTAAACACAACGTCGGTGCTGTTAAC TCCATCAACTGGGCAAGAATCTTGGCCCAAATGACCTATTACTTTTATTCATTCTTCCAA GCCACCAACGGTAAGGACTCCAAGAAGGTCAAGTTCGTTGTGCCAAGTGGGAACTTCGGT GATATATTGGCCGGTTATTTTGCCAAGAAAATGGGTTTGCCTATTGAAAAACTGGCCATC GCTACCAATGAAAACGACATTTTGGACAGATTTTTGAAATCTGGTCTATACGAAAGATCA GACAAGGTTGCTGCTACTTTATCCCCAGCAATGGATATCTTAATCTCTTCTAACTTTGAA AGACTACTATGGTACCTAGCTCGTGAATACCTAGCTAATGGTGATGATTTGAAAGCCGGT GAAATCGTCAACAATTGGTTCCAGGAATTGAAGACCAACGGTAAGTTCCAAGTTGACAAA TCCATCATTGAAGGCGCATCAAAGGACTTTACATCAGAAAGAGTTTCCAATGAAGAAACA TCTGAAACAATCAAGAAGATCTACGAATCATCTGTAAATCCAAAACATTACATCTTAGAT CCTCACACAGCTGTCGGTGTTTGCGCCACAGAAAGATTGATTGCAAAAGATAATGACAAG TCCATCCAATACATTTCTCTATCTACCGCTCACCCAGCTAAATTTGCCGATGCTGTAAAC AATGCATTGTCTGGATTTTCCAATTATTCATTTGAAAAGGATGTTTTGCCTGAGGAATTG AAGAAACTATCCACATTAAAGAAGAAATTAAAATTCATCGAAAGAGCTGACGTTGAATTG GTCAAAAACGCTATTGAAGAAGAACTTGCTAAAATGAAATTATAA >CTR86 1692 residues Pha 0 Code 0 ATGCCTATGAACAATTTTCTAGATGAATTCAATTTATTTGATTCAATCATTACCATGATG AAGAACGACCCATGTTGCGTCGAGGATTATGAGCCAATCGTCGAAAACCTGAACCGTATA TTTCAAAGGACGTTTAATGATGAAGAACATAGGAAATCAATGGCTAACTCCCAGCTTTTT TGGGAACGATTAAGAGACACCTTGGAAGCAATGCTGTTGCCAGCGTCGTTAAATGAGAAT AGCTCAATACCGTATACAAGAACAGTGAGGGGCCTTATCTTAATGATGAGAAACCTTGCC GCTGAAAACCAGGAAATACCCCAAAAGCTTTTACTACAAAACCTCGTAATTCGTGGTTTT CTGCATGCAACTAGTGAGTATGTCGTTGACACTCCGCTAATCAAACATCTATACATCGCA TGTTTAACGTGCCTTTTCAATATACAGCAGAACTACTCTACAGTGGATATGACTACTTTT CCAGCTCTTTTACAATTTCTTCAATACCCTTATGGGATCAAATTGGAAGACGGTGAAGAA GAAGAGCATTTCTGGCTACCATATTTATTTCTTTTCAAGACGTATCTCAACAATGATGAA TTTTCCAACGAATTTTTCAGGGATAATGATACACCCCAGAAAGACTATTATTGTGTTAGG GATAGAATATTTTTCGATATAGTGACAGCCAAATTCATCCAGGATCAAGAGAATTCCTTT TTAATTGAGAAGGGCAGAAACTATCTGGATGATTCAAAATTGGAAATAACTTCTATTGAC CTATCTGTCTTAGAATGTATTAGCAAAAGTCTTACAACTGCTTCTTTTGGTAAATACCTC AATGGGTTAGAAGAAAGACAGCCAGGAAAATTCACCACTTTGTTGCAGATATTGCAATTG GTTGTAACGAGTAAAGAAGATTGGAATACCTATGAGTTGACTGCAATTATGTCATGGTGC TACCCCATTCTGCAACGTCTTGCATGCAAGGATATTCCTGCCTTTTTCAATAAAAGTTGT AACGATTATGCTCCTTCAGTTGCCATCCAATTACACTCCACTTTACTTTCTTGCCTGGAC ATAATTTCTGACTTGTGCAAATTCAATCATGTTAGAAAATTCTTAATTTCGTATGACTCT GTGAAAATATTGGTATCTCTCTTGGATACTTTCCAAAAGAATTTGTTGAGGATTAATTTT TTGAAAGGAAACGGTGATACGGTGAATGAAATTAAAATCACAGATCATGAAGGTAACAAA ATCGAGGACCGGTTATTAATTTTCAACCGTGTTAATACCAACGAATCCTTTATTAGGGCT GATAATTTTCCCCATTGTAAATTAGTAATAATCGAAATATTGGCATCGTTAGTGTATGCA CATCCTGAAATCCAAGATCAAATAAGAGAATTAGGTGGTCTTGCATTAATTCTTTCCAAT TGTGTCATCGATGATAATGATCCGTTTATCAAGGAAAGATCTATTGTTTGCTTGAAGTTT TTGTTAAAGAATAATGCCAAGAATCAGGAATATGTCAAAAAAATGGAAGCTCAAGACGTT GTTCAAGACGATGCATTGAGCAAAGCTGGGTTTGAAATATCAGTTGAAAAGGGCGGGAAA GTTAGATTAGTATCTAAAGAAGAAGACCCTGGGAACGAGAATTCTGAGATTATTAGCATA GATGAAGATTAA >PWP2 2772 residues Pha 0 Code 0 ATGAAATCCGATTTCAAGTTCTCTAACCTTTTAGGTACGGTCTACAGGCAAGGTAACATC ACCTTTTCCGATGATGGCAAGCAACTACTCTCACCGGTGGGGAATAGGGTCAGCGTGTTT GACTTAATCAACAACAAATCGTTCACGTTTGAATACGAGCATCGCAAAAATATTGCTGCC ATTGATCTGAACAAACAAGGCACATTGCTGATTTCTATTGACGAGGACGGTCGCGCCATC CTTGTCAATTTCAAAGCCCGTAACGTGCTTCACCATTTCAACTTCAAAGAAAAATGCTCC GCTGTGAAGTTCAGCCCTGATGGGAGACTCTTTGCATTAGCCTCAGGCAGGTTTTTACAG ATTTGGAAGACTCCAGATGTTAATAAAGACAGACAGTTTGCTCCCTTCGTCCGCCATAGG GTGCATGCGGGACACTTTCAAGACATAACGTCTTTGACGTGGTCACAAGATTCCAGATTT ATCCTTACGACTTCCAAAGACTTAAGCGCAAAAATATGGTCCGTAGATTCAGAGGAAAAG AACCTTGCGGCGACAACATTTAATGGGCACAGAGACTACGTTATGGGTGCGTTCTTCAGT CATGATCAGGAAAAAATCTACACTGTAAGCAAAGACGGTGCTGTCTTTGTCTGGGAATTT ACCAAGAGGCCATCCGATGACGACGACAATGAAAGTGAAGACGACGACAAGCAAGAAGAA GTAGATATTTCGAAATACAGCTGGAGAATCACAAAGAAACATTTTTTTTACGCAAACCAA GCCAAAGTAAAGTGTGTCACCTTCCATCCAGCAACAAGGCTTTTAGCTGTCGGATTTACT AGTGGGGAATTCCGTCTTTACGATTTGCCTGATTTCACTTTGATTCAACAGCTTTCTATG GGGCAAAACCCAGTCAACACCGTTAGCGTCAACCAAACCGGCGAATGGCTGGCGTTTGGT TCCAGCAAACTGGGCCAATTACTAGTTTACGAATGGCAATCGGAATCGTATATCTTGAAG CAGCAGGGCCATTTCGATTCCACAAATAGTCTTGCATACTCTCCGGATGGTTCACGTGTA GTGACAGCATCCGAAGATGGGAAAATCAAAGTTTGGGACATTACATCAGGGTTTTGTTTG GCCACTTTTGAAGAACACACCTCTTCAGTTACTGCTGTACAGTTTGCGAAAAGGGGTCAG GTCATGTTCTCATCATCGTTAGATGGTACGGTGAGAGCGTGGGACTTAATCAGGTATCGT AATTTTAGAACATTCACTGGTACTGAAAGAATCCAATTCAATTGTTTAGCGGTGGATCCA TCAGGTGAAGTGGTTTGTGCCGGGTCCCTGGACAATTTTGACATTCATGTTTGGTCCGTG CAAACTGGTCAATTATTAGATGCTTTGTCCGGACATGAAGGCCCTGTTTCGTGTCTTTCA TTTAGTCAAGAGAACAGTGTCTTAGCTTCTGCATCATGGGATAAAACAATTAGAATCTGG TCCATATTTGGTAGAAGCCAACAAGTAGAACCTATAGAAGTTTATTCCGATGTTTTAGCC TTATCAATGAGACCAGATGGTAAAGAAGTTGCAGTATCTACCTTAAAGGGTCAAATATCC ATTTTCAACATAGAAGATGCCAAGCAGGTGGGCAACATTGACTGTAGAAAGGATATAATA TCTGGTAGGTTTAATCAAGATAGGTTCACTGCCAAAAATTCTGAACGATCCAAATTTTTT ACTACAATACATTACAGTTTTGATGGTATGGCTATTGTGGCTGGTGGTAATAATAACTCC ATTTGTCTATATGATGTTCCAAATGAAGTCTTGTTAAAAAGATTCATTGTGTCCAGAAAC ATGGCTTTGAATGGTACTCTCGAATTTTTAAACAGTAAGAAAATGACTGAAGCAGGTTCA TTAGATTTGATTGACGATGCAGGCGAAAATTCAGATTTGGAGGATCGTATTGATAATTCT TTACCAGGGTCTCAAAGAGGTGGCGACCTGTCCACAAGAAAAATGAGACCAGAGGTTAGA GTTACTTCGGTGCAATTCTCCCCAACGGCGAATGCATTTGCCGCTGCTTCAACGGAAGGT TTATTGATATATTCCACCAATGACACGATATTATTTGATCCCTTTGATCTGGATGTGGAC GTCACCCCCCATTCTACTGTAGAGGCGCTACGAGAAAAGCAGTTTTTAAATGCATTAGTA ATGGCGTTCAGGTTAAATGAAGAATATTTGATCAATAAAGTCTATGAAGCCATACCTATT AAGGAAATCCCCTTGGTTGCAAGTAATATTCCTGCAATATATTTACCGAGGATTCTGAAG TTCATCGGTGATTTTGCCATTGAATCCCAACACATTGAGTTTAACCTAATTTGGATCAAA GCTCTATTATCTGCGAGCGGTGGTTACATAAATGAACACAAATATCTCTTCTCGACGGCT ATGAGGTCGATACAAAGATTTATTGTTAGAGTGGCTAAGGAAGTAGTCAATACCACTACT GATAACAAATACACCTATAGATTTTTGGTATCAACTGATGGGTCCATGGAAGATGGCGCG GCTGATGATGACGAGGTTCTATTAAAAGATGACGCAGATGAAGATAACGAAGAGAACGAA GAGAACGATGTAGTCATGGAATCTGACGACGAGGAAGGATGGATTGGTTTCAATGGGAAG GATAACAAATTACCCTTGTCTAATGAAAATGATTCCAGTGATGAAGAAGAAAATGAGAAA GAGCTTCCTTGA >YCU9 777 residues Pha 0 Code 0 ATGGATGACGATCACGAACAGTTGGTCGAAGAACTGGAGGCCGTCGAGGCCATCTATCCG GATCTTCTCTCCAAGAAGCAGGAAGACGGAAGCATCATCGTTGTGAAAGTGCCGCAGCAT GAATACATGACACTGCAGATCTCCTTCCCGACACACTACCCCTCCGAGGAGGCTCCTAAT GTCATCGAAGTTGGTGTCTGCACTTCTTTGGCTAAGCGCGATCTCTACGATACCAAGTAC CTTCAGCATTTGTTCCAGGAAGTGATGGACTCTGTTTTCCACCGCGGATCTGTCTGTCTA TTTGACTTCCTCACAGAACTCGACGGTGTCTTGTACGTTGAACCAGAGGAGGAGACAGAA CCGGTCCAGCAGAGTGACATTCCCACAGACCCCTTCGAGGGCTGGACCGCGTCGGACCCC ATTACTGATAGAGGCTCGACTTTCATGGCCTTTGCAGCACATGTTACCTCCGAGGAACAA GCGTTTGCCATGCTAGACCTACTGAAGACCGACTCCAAGATGCGTAAGGCAAACCATGTC ATGAGTGCATGGCGAATCAAGCAGGATGGCTCTGCGGCAACATATCAAGATTCCGATGAT GACGGTGAAACGGCCGCCGGCTCCAGAATGCTGCACCTCATCACCATCATGGATGTGTGG AACGTCATCGTTGTGGTGGCCCGTTGGTTCGGCGGTGCCCACATAGGTCCCGACCGGTTT AAACACATCAATTCTACGGCAAGAGAAGCTGTTGTCAGGGCCGGCTTCGACTCGTAA >YCV1 1752 residues Pha 0 Code 0 ATGGTGCGTTTTGTTTCAATTTTAAGTTTATTCGGCTGCGCGGCGACGCTTGTCACGGCC CATGATGACATGGACATGGACATGGATATGGACATGGATATGGACATGAATATCGATACG ACAACGTCTCAATCCATAGATGTCTCATCCACGGCTTCAATCGTCCCCGTGCCACATGAA CCAAAACATTTGCATGGCCTTCCTATACTGCAATCGCCCTCGCTTACCCCTGCGGAGAGA TTGTACTGGGAAAACTACAACACCACAACCTACTTTACTACACAGGCTGGGAATAGGTCT GCCCTTCGCTACCACATTATTACGCTGCTCTTGGTTGCATTTGTGCTCTACCCTGTGTCC CTGGCGCTAAGCGCCGCCCGTTCTAGGTGGTACTTACCCCTGCTGTTTGTTAATCTATGC ATTTGTATTTCGTCCGTAATGGCATTGTCCGTGTTCAAAAATACTTTCCCGGAAGAAGAC TGGTATGCGCATAATATCTATGGCACCACTTCTGTGCTACTTCTCGTTTTTATGCTTGTT CACTTCTTCGCTGCGGTGCTTTCTGTCCCCGTCTCATTAGCATCGAAAAAGGAGTACCGT CCGGTTGACACCATCCCTCTGAATGATCTTGAATCTACGCCCGTCATGGTGAATAGTGCA CGTGGCTCTCCAAGTCCTTCTTCCAACAGAGACACGTTGTTCTCGCTCTCTTCAGACACC ACGACCGCCACGGCCACCAATAATAATAAACGGAGACGCGCTGAAGGCGAAGACGAGGGT GATAACACCTCCAACCACGACACTTTGCGCGACGAAGACTACGATAATGATGACGACGAA ATTGCTTCCATTGAAGCGCCACCTCTGCTTCCTCAAGACATACCCGTTTTCCGAATCTTG TTTACCAACACGAAGTACCAGATGCTTGCCGCGCACCTCTCGTGCGTCGCCAACGTGGTC TTTCACATGCTTACCTACCCGCTATTCATGTACATCTTTGTAGACCTAATCATCGGCTTC GCTGTAGGTAACTTGCTCGGCAAGGGCATCCGCATCTTTAATCTCTTGGCCCACTGGATT AAGGGCGGCGTATTTTTTACTCTGGGCGTTGTCTCTTTAGCAAGATACTGCGGTTTCGCA GCTAAGTACGGCTGGGCATGGAACAACATCAGCTTCACCTCTCAACTCACACAAACGCGT TCCTCCAATCTTCTTTTCCGGTTTGCTCCTGCGGGGACTTTCACCATGGAATTCGTTGAA TCCTTCCTCATTTTCTTTTACGGGTCCACCAACATCTTCTTGGAGCACCTGGCAGGAAAC GGCGGCGCATGGACTGCCAAGGATTTACAGCATGTGTCGATAAATTCTCACCGGCCCCAA GGTGTGTGGGCTACTCACGGAGTACAAGCTCAACCATTGGCGATTCGAGCATGCCCGCAA ACGGCCACAGACCGATGTAGTTGCTGCCACACCGGGGTACTCTCCAAACCCGTTCCCCGC TTTCACCATATTTTGGACTGGGATTCTGATGTCCCAGCACGCACAGTCCTCGCAATTTTC TACTACCATTCACACGCAATGGGGATACTTGTTGTCCTATGGGTCCTTCTTCCGTCTGCT AACATTTTTGATTCTGTTTTTGGTGCCCAACACCAACAGTGCCGCATCCAAGCCTTTCAC GGAGTTGATCACCTCGTTCTGTCTCCTCTGTGGTGGTCTGGTATTTATGGAGTCCACGGA TCAGTCCATTGA >G10 474 residues Pha 0 Code 0 ATGCCGCGCATAAAGACCAGAAGATCCAAGCCTGCACCTGACGGGTTCGAAAAAATCAAG CCAACCCTCACAGATTTCGAAATCCAACTCAGAGATGCCCAAAAGGACAAGTCGTCTAAG CTCGCAGCAAAGTCCAATGAGCAGCTCTGGGAGATAATGCAACTCCACCACCAGCGCTCT AGATACATATATACTCTGTACTACAAGAGAAAGGCCATCTCCAAAGACCTTTACGATTGG TTGATAAAGGAAAAGTATGCTGATAAATTGCTAATTGCCAAATGGCGCAAAACCGGGTAT GAAAAACTGTGCTGTCTGCGCTGCATTCAAAAGAACGAAACTAACAACGGTAGCACTTGC ATCTGCAGGGTGCCTCGTGCACAGTTAGAGGAAGAAGCACGCAAAAAGGGCACACAGGTG TCCTTCCATCAGTGCGTCCACTGCGGCTGCCGTGGATGTGCAAGCACAGACTAA >HCM1 1599 residues Pha 0 Code 0 ATGATGAATGAAGACATATCCATCATTGATGGCCATAATAGTTTTTTAACGGAAAAAAGC ACCGTGCTATTAACCCAAGCCAAGAGAACACTAGAAGACGAAAAGGAAATGATTACTCCC CCGAGCTCAACTGTGAGAAAAACAATGAAGGAAGTAAATAAGAGGCCGTCGCATCCCCTC TCACCGGATCACTCGTCCCCAATTGCTCCATCTAAGGCCAAGCGCCAAAGATCGGACACA TGCGCTCGGTCCAATGGTAACCTAACCTTGGAAGAAATTCTTCAATCTTTGGAAAGAAGA AGAATAAATGGTGAACTCGCCAAGAAACCTCCATATTCGTATGCAACTTTGATTTGCTTG GCCATTTTGCAATCTCAGGAGGGAAAGCTAACGCTATCCCAGATATATCATTGGATCCAC GTTCACTTCCCTTATTACAAGCAGAAAGATGCTAGTTGGCAAAATTCAATAAGACATAAC TTGTCTTTAAATGATGCGTTCATCAAGACTGAAAAGTCCTGCGATGGTAAGGGTCATTTC TGGGAGGTCAGACCGGGTGCCGAAACAAAATTTTTCAAAGGTGAAAATCGTGGTTATGAA TTTGTAAAGGACTCCTTACAAGACATTGGGAAGTATTTTGAAATAGATTCTACACTTGAT GAATTAGAACAAGTTGAGAGTGGAGAAGGCAATGATGATCTTCCTGACGAGGAAGAAAGA GAGGAAGCAGGGAAATTCCCTTCCATTGAAATTCAATTGAACTCCTCCCCTATACTGAGA GTTTCCCAGTTACATCACATACCGCAATTGAAAACAGACAACAGTGTACTGAACCCTCAC GAAAACCTAGAATCGATGCGGAACATGATAGAAAACGATGTCAACAATATAGATTCCTTG GAACCTCCTTATGTCATGAAGAAATATCATACTTCTTTAGGCTTACCGTCGCTGGTGAAT GCCAAAGATCATTTCCAGGCGGGTGTGAAAAACAATAATATCACCCAGGCAAATAGATTT AATACACTCCCTATAACTAGCGCAAAGTCTCCTCAGAATTTCAGAAAATATTTCACCTCA TTCAATTCAAATTTTGAAGATTTATCTCCACTTCGAAGTAATGTAGGGGCTGGTTCTCTA CTCGACCCACTTCCGTATTCCCCATTGAAGCTGTACGATCAGAAAAATCTTGCGCTCATG TCGAAACCACAATCTCAGCAATCATATTCCAATTCTCAACTTCCACCTCCACCTTCCTCT CATGGTTCGGACTTACTTAAAACACCCAAGATGAGGCATTCCGATGGCTTAGAGAAAACC CCATCGCGGTTGATAAGCACACCTAAGGACGGTAACTCGATTTTGAGGAAATGGCAGACT CCTTCACACCTTTTTGAAGATTTGTACTGTTCTCCGCTATTTAGAGCTATAGAGACTCCA ATCAGGTATATCACGACGCCGGGGGGCAACTTTGGAAACCCAAATTTCACCAAGAAAGTC CTCTGCACCCGATGTCCTCACAAGCGCAACGAATTCCAAATTTGCTTCAAGCGGGCTGTT TGGCGTGGATGTTTATTCTGTTTGGAAGCGCGCAACTGA >RAD18 1464 residues Pha 0 Code 0 ATGGACCACCAAATAACCACTGCAAGCGACTTCACGACTACTTCAATACCGAGCCTGTAC CAATTGGATACACTTTTGAGATGTCACATTTGTAAAGATTTTCTAAAAGTCCCCGTCTTA ACACCTTGTGGCCATACATTTTGTTCCCTTTGTATTAGAACACATTTGAATAACCAACCA AATTGTCCTCTCTGCCTTTTCGAGTTCAGAGAGTCCTTGCTGAGAAGTGAGTTCCTGGTC AGTGAAATAATTCAAAGTTATACATCCCTACGATCTTCCTTACTAGATGCACTAAGGATA CCGAAGCCTACCCCTGTCCCTGAGAATGAGGAAGTACCAGGTCCTGAAAATTCTTCATGG ATAGAACTCATATCAGAGTCTGAAAGTGACAGTGTAAATGCCGCTGATGATGACTTGCAA ATTGTTGCAACAAGTGAAAGAAAACTTGCCAAAAGATCCATGACTGATATATTACCACTG AGTTCCAAACCATCCAAAAGGAATTTTGCAATGTTCAGAAGTGAACGTATCAAGAAAAAA TCAAAGCCAAATGAACAAATGGCCCAGTGCCCCATATGTCAACAATTTTATCCTCTTAAA GCCCTTGAAAAAACACATTTGGATGAATGCCTAACTTTACAATCACTAGGCAAAAAACCA AAAATTTCTACCACTTTCCCTACAGAGTCAAATCCACATAACAAAAGTTCATCCAGATTC AAGGTACGAACTCCAGAAGTCGACAAAAGCTCATGTGGTGAGACCTCACATGTGGATAAG TATTTAAACTCAATGATGAGTGCAGAACACCAAAGATTGCCGAAGATCAATTTTACGTCT ATGACTCAATCCCAAATAAAACAAAAACTGTCATCGTTGGGACTGTCAACTAATGGTACT AGGCAAAACATGATTAAAAGATACAATCACTACGAAATGCTTTGGAATTCTAATTTTTGT GATTCTCTAGAACCTGTTGATGAAGCTGAACTAAAAAGACAGTTGTTAAGCTGGGATGTT TCACACAATAAAACCCCCCAAAATAGTAGCAACAAGGGTGGAATTTCTAAATTAATGATA ATGAAGAGTAATGGGAAATCTTCTTCATATAGGAAATTACTTGAAAATTTCAAAAACGAT AAATTTAATAGGAAAGGATGGATGGTTATGTTTCGGAAGGATTTTGCTAGGCTTATCAGG GAAGCAAAAATGAAAATAAAAACAGGTTCATCGGACAGTTCAGGTTCAGTGGGACATTCT AATGATGGAGATGGTGTTGAAAAAGTTCAAAGTGACCAGGGAACCGAGGATCAGCAAATG GAGAAGGATCAGGACACTGTTATCAACGAAGATAGAGTTGCTGGTGAAAGAAATTTGCCT AACGAAGATTCAACTGATGCTGACTTATCAAGAGAATTAATGGACTTGAATGAATATAGT AAAGACCCACCCGGTAACAATTAA >CYPR 957 residues Pha 0 Code 0 ATGTGGTTGAAATCCTTGCTGCTCTGCCTGTACTCCTTAGTACTCTGCCAAGTCCACGCT GCACCTTCATCAGGGAAGCAGATTACCTCCAAGGATGTTGATCTTCAGAAAAAATATGAG CCCAGTCCCCCCGCCACACATCGTGGAATAATCACTATCGAATACTTTGATCCCGTTTCG AAGTCGATGAAAGAGGCGGATCTGACTTTTGAGTTGTACGGTACTGTCGTGCCCAAAACT GTGAACAACTTTGCTATGCTGGCCCATGGTGTTAAGGCAGTTATCGAAGGGAAAGATCCC AATGATATACATACTTACTCGTACCGTAAGACCAAAATCAACAAGGTTTACCCTAACAAG TATATCCAGGGTGGTGTGGTTGCCCCAGATGTGGGTCCTTTCACCGTCTATGGGCCCAAA TTTGATGACGAAAACTTTTACTTAAAACATGACAGGCCTGAAAGACTCGCAATGGCCTAT TTTGGACCTGATTCTAACACCTCGGAATTCATCATCACCACTAAAGCCGATGGAAATGAG GAATTGGATGGCAAAAGTGTCGTGTTTGGTCAAATAACTTCTGGTCTAGATCAACTAATG GATGCTATTCAATACACAGAAACAGACGAATATGGAAAGCCTCAGCATGAATTACGGTTC CTGTATTTCGTTCTAGAAATCTTAAAAATTAGTAACATCTTAGATTTGCACGCTGCGTAC ACAGAAAAAGTCGAGAAGTTTAGAAATGGCGATGTGTCTGTTGGCTCCACTTTGGAAAAC ATCTTCCGTAACGATAAAGCCTACACACCTTTAACCACCTCCACTGGAACCACCGCCTAT GATTTAAACCACCCAATTTCCAGAGCCTTGATGTGTTTAACTGTTCTTGGCCTTTGTTTC ATTGCCTACAAGGGCATGCACGAAAAGCCTCATACGGTTTCATTAAGACACAAGTAA >YCW1 366 residues Pha 0 Code 0 ATGATCAGTTCGTGTGTTACTAGATGTTTTGGTAGGGGTAAATGCCTTCCAGGGCCTGCC ACTGCCTCGATATACCAAACGATAAGATGTATATCCACTAATTCAAATAAAGCTGCTGAG GCGCCAATATTTCCAAAGCTGGAAGACGTGAAGATGCATGAGCTCATAGGAAACAACAAT TTTGGTAAAAAGACCTACTACGTGGAGAGAAGCAGGACCGGAAATCTACCGGTGTATTCC GCTTATAAAAATGGAGGTAACAAGATTATCACGGAGATCAGAAAGATTGAAGGAGATGTA ATTCAACTAAGAAATGACTTGCAGGAGCAACTGCCTTTCATACCCAAAAAATCATGGCTG TGGTGA >YCW2 1548 residues Pha 0 Code 0 ATGTCCACCCTGATTCCTCCACCTTCTAAGAAACAAAAGAAAGAGGCTCAACTTCCCAGA GAAGTAGCTATTATTCCGAAAGATTTACCCAATGTTTCAATCAAGTTCCAAGCTTTAGAT ACTGGTGACAATGTAGGTGGCGCCCTGAGAGTTCCCGGTGCTATCTCCGAGAAACAGTTA GAAGAACTTTTAAATCAATTGAACGGTACTTCAGACGATCCAGTGCCATATACCTTCAGC TGTACAATTCAAGGTAAGAAGGCCAGTGACCCTGTGAAGACGATTGATATAACAGATAAC CTATATTCTTCATTAATAAAACCAGGCTATAACAGTACAGAAGATCAGATCACGCTACTG TATACGCCAAGAGCAGTTTTCAAAGTCAAGCCGGTAACTAGAAGTTCATCAGCCATTGCA GGTCACGGTTCCACAATTTTGTGTTCTGCCTTCGCACCACATACGAGTTCTAGGATGGTA ACCGGTGCAGGTGATAATACTGCAAGGATTTGGGACTGTGACACCCAAACGCCAATGCAT ACTCTAAAGGGTCACTACAATTGGGTTCTCTGCGTTTCCTGGTCCCCCGATGGAGAAGTA ATTGCTACGGGATCCATGGACAATACCATAAGATTATGGGACCCAAAAAGCGGTCAGTGT CTAGGTGATGCTCTCAGAGGTCATTCCAAGTGGATCACTTCTTTAAGTTGGGAACCTATA CATCTTGTGAAGCCGGGCTCCAAACCAAGATTAGCTTCATCTTCTAAGGATGGTACTATT AAGATTTGGGACACTGTGAGCAGAGTTTGCCAGTATACGATGAGTGGTCACACAAATTCA GTGTCTTGTGTCAAATGGGGCGGCCAAGGTCTATTGTATAGTGGCTCTCACGATAGAACC GTACGTGTATGGGACATCAATTCGCAGGGCAGATGTATCAACATTTTGAAGTCGCATGCG CACTGGGTTAATCACTTATCTTTATCTACAGATTACGCATTGCGCATTGGTGCATTCGAT CATACAGGTAAGAAGCCTTCTACACCAGAAGAAGCCCAGAAAAAGGCATTGGAAAATTAT GAAAAAATCTGTAAAAAGAATGGAAATTCAGAAGAAATGATGGTTACTGCAAGCGATGAT TATACCATGTTTTTATGGAACCCACTAAAATCTACCAAGCCTATAGCAAGAATGACCGGT CACCAAAAATTAGTCAATCATGTGGCGTTCAGCCCTGATGGTAGGTATATTGTCTCAGCG TCTTTTGATAACTCTATCAAACTTTGGGACGGTAGAGATGGTAAGTTTATCTCCACATTT AGAGGGCATATAGCCAGCGTATACCAGGTTGCGTGGTCATCGGACTGCCGACTACTGGTG TCATGTTCCAAAGATACCACGTTGAAAGTGTGGGATGTAAGAACTAGAAAACTTTCTGTT GACCTCCCTGGTCATAAAGACGAAGTTTATACCGTCGACTGGAGTGTCGACGGTAAAAGA GTGTGTAGTGGTGGGAAAGACAAGATGGTAAGATTGTGGACGCATTGA >SSK22 3945 residues Pha 0 Code 0 ATGATGATGGATATACTGAATACACAGCAACAAAAAGCGGCTGAAGGCGGGAGAGTTCTG GCTCCTCATACCATCTCAAGTAAGCTCGTGAAGAGATTATCAAGTCATTCCAGCCATAAA CTATCAAGATCTGATTTGAAAGCATTGGGTGGCTCGGAAACAATAAGCGACGGCCCCAGT CAGCTGACTTTTAAGGACCGATACGTTTTCAATGAATCGCTATACTTGAAAAAGCTAAAA AAGACCGCTTTAGATGACTACTACACGAGGGGCATAAAACTCACTAACCGCTACGAGGAA GACGACGGTGATGACGAAATTATTCGGTTGTCTAATGGCGACAGAATTGATGAAGACCTG CACTCAGGTGTCAAGTTTTTCTCCACTACACCTTATTGCAGGAAAATGAGGTCAGACAGT GATGAACTAGCTTGGAATGAAATTGCGACCGAACGGTTCAAATGGCAGTCAATGCTGGCC AGAGTGCTGAAGGGAGATATTGTTAAAGGTGAAAAGACGAGGATTGCTAACCAAGTCAAG AAACCAGGGTTAAATAAGGAGCTCTCAGATGAGATATGGCTCGAATTGAAGGCATGGCTG AATGGGAGGACCATGCAAGAGATGGAACAGTCGCTTACATATTTAAGAGATAGTTCAGAT TCCGTTTTTGAAGAGATAATGAAGTTTCAAATTCCACAGGGCAAGATATTGAGCCTGGAT GCACTGGAGGCCATCTTACAAGACCTCATGAACAGATATCACAGCGTTGTCTCTTATTGG CCTAACTTGAAAAAAATGTATAAGGATAAACCAATCACCAATACTGCAGAATTTACCGCT AGAATAGACGTAATGAATTCTTGGCTGAACTTTAAAACGAACTTAACGTTGAGGAGGCAA GAGTTGGACGACTGGATAAACCGTTTCTCACCGATAAGTAGTTCGGATAATTGCCAAGAG GATTTTGATGGTGTGCCCCAATGGAACTGCAAAATGAAGATTCTTGCAGAACAATTGATG AAGGAAAAGAACATCGAGTCTATATTCCAAAAAAAAATTTTCTATCCGCTATCACCTTGG ATGTTCAAACTGAAACTACATTTTATAGTCTACAGAGAAACTTTGACAAAGATGAACATA AAATATCCTTATGAAAGGTTAAGATCACTACTGGCGTTCCCCGTCTATTTAATCAAAGAA GTTATTTTGACTAGATTGTCATATGCACGAAAGCTTAAAAATCCAACAATGATGATGATC GATCAAATGATCGATGATTTTAACGCTTTTATTCGACTTTCTGTGCAATTGAAGTACACA CTGACAAAATATTGCTCCAATTTGCCGTTCGATGTGGATTTTGACCCGACGTTCGAAAAT ACTGTAATAGAAGCCATTCGTTATTTATTTTTTCTGTTGAATTTAAAGTTGATTGATTCC AGTAAACAAAATTTCAAAGCACCCGATCTACTCTTGAAATACTGGGATCACCTAAAAAAC ACCGGTCACTATATTAACGGTGCAGAAACCGTGATTCCAAATGAATTTCTCAAGTTAACT TTGAGACTCGTACATAAATTGCAATTCTATCTTTTGAAACAACAAAACTTCCCACCAACA TTTGCTAACGCTTCAGAAGCAGAAAAATGGCTAAGTTCCATTTTCGAAAATTTGGGTGCC ATGAAAAGAAAGCTGAACAGGTTCAGCAATATTCTAGTCAAGGCGTTCCAAAATTCTGCT GTTTATCAGATTAATCATAATGCACAACTTGTTAAAAAGTTAAAAGATGCTCACTATTTT TTGGTATACTCCGGTAACACTTTTGAGTCTAGTGGTGTATATATGTTTGCTGCTCCTGAA TTATTAGGTTGTGACAATGATACCATCTTAAGAATTTTGCGAAATAAATCCATTGGCTGT GATTTGGTCCCAAAGCTTGACATTGGAAATAATTTGAATGTGTATGATATAACAACAAAA GAAACAGATTTGAACATTCTAGTATCGAAAGGGGAGGATTCCAAAGGAATTCCTTACTAC CGAGTAGTAGCAAATTCGTCAAGTGATTTGGACAGGCATGCTCATCAGTCCAAAAAGAAG AATTTTTCAACAGACCCTTTTGATCAGCACCTTGATGAAAAGAACAATGAAGTTTTTGAA TTGGAAGTTGCTTTGAGCTCATTGGGTGCACTAGTTGTACTATATCCTGGAGAGCCAGTA GTTTGGGATGGACCAGTATATAAGCTTCCAGGTAACAACCTTTTTGCATCCAACGAAATG GATTTAGGGAAAATTGGTAACCCAAATACGTTGATTTTACTCAATCAAGGTTCTAATTAT GCACTGACTTATCAAATCGACAAGTTTAATCAAACGGTAGGTGATTCTGTTTCATTCATA GAGAAACGTTGTTCACTCAATTCAATTGAATCCTCCCTACAAAAAATCAATAAGGCATAT TACAAACTTACTTATACAGTATTGAACAACTACAAAGGAATTCTAGGTAGCTTTATGAAG CAATGTCCGGGAAATGAGTTGTTAAATTCGATATTCATGTTTGGAAGGGATTTTGGAAGA AGTTTCCTTAAATATAACGCCTTTAGCTCAAAGAGGAAGTACGTTATCATCTTTCTGATG GTTAAATTAGGAATGAACTGGTTGAAATTCCTTGTTGAAGAGTGTGATCCTACCGATCAG CGAACTTTCCGATGGTGCGTTCTTGCAATGGATTTTGCGATGCAGATGACTAGTGGTTAT AATATCCTGGCGCTGAATGTAAAGCAATTTCAAGAACTGAAGGAGAGGGTATCAGTATGT ATGTCATTATTAATTTCACATTTCGACGTTATGGGTGCACGAGCCACTGAAGCTGAAAAT GGCATGCAACAGGCAAGATTGAATATTGATACTGAAGAGAATATTGATGAAGAGGCCACC CTAGAAATAAACAGCAGGTTGAGACTGGAAGCTATAAAGACGTTGGAAAAGACTATGAAG AGGAATCCCAGGCAAATGGGTAAGGTATTGGATGCTACAGATCAGGGAAACAAATACCTA CTATCGCTAGCATCCTCATTATCGAATGTATCAATGAGGTGGCAAAAAAGAAGCTTCATT GGCGGTGGAACATTTGGACAGGTATACTCTGCAATTAATCTGGAAAACGGTGAAATCTTA GCTGTTAAGGAAATAAAGATACACGATACCACAACAATGAAGAAGATTTTTCCCCTGATT AAAGAAGAGATGACCGTATTGGAAATGTTAAACCATCCTAATATTGTCCAGTACTATGGT GTCGAAGTACATCGCGATAAAGTTAACATCTTCATGGAATACTGTGAGGGTGGTTCTTTA GCCTCGTTATTGGATCATGGAAGAATTGAAGATGAAATGGTAACACAAGTGTACACATTC GAACTATTAGAAGGTTTGGCATATTTGCACCAATCTGGCGTGGTGCATCGCGACATTAAA CCGGAGAATATCTTGCTGGATTTCAATGGAATCATAAAATATGTGGATTTTGGTACGGCA CGTACCGTTGTAGGATCTAGGACTAGAACTGTGCGGAACGCAGCCGTTCAAGATTTTGGA GTAGAAACAAAGTCCCTCAATGAAATGATGGGGACACCGATGTATATGGCTCCAGAGACT ATTTCAGGCTCGGCAGTTAAGGGAAAACTTGGAGCGGACGATGTATGGGCATTAGGATGT GTTGTGCTAGAAATGGCCACAGGTAGACGACCTTGGTCTAACTTGGATAATGAATGGGCC ATCATGTACCACGTTGCTGCAGGTCGAATACCGCAACTACCCAATAGAGACGAAATGACT GCAGCGGGAAGAGCCCTTCTTGGAAAGGTGTTTGGTTCAAGACCCCACTATGAGGGCTAC TGCTGTGGAACTACTGATAGACCCTTGGATGATACAAATCCGTGA >SOL2 948 residues Pha 0 Code 0 ATGACTACGACGGTACCCAAGATATTCGCGTTTCACGAGTTTTCAGACGTGGCAGAGGCC GTAGCTGACCATGTAGTCCACGCGCAAGACGGTGCATTGGCTCCAAAGAACGAGAGGAAA CACTCTGTTCCCAACATCAGCATGAATGCACTGGATATGACGAGAGAGGCCTCTTGCAAA AGCACAGCATCTGCCGCGGAAGGGAAAAGTGGTAGCAGTGGTAGTGGCAGTGGTAGCAGT AAGCCCAAAAAGGAGAAACGGTTCAAGATTGCTCTCTCCGGTGGGTCATTGATCGAAGTG CTACACGAAGGTCTGCTAAAACGAGACGATGTACGGTGGGGAGACTGGGACATTTACTTT GCAGACGAGAGACTTGTACCCTTCAGCTCGAATGAAAGCAATTATGGATGCGCCAAAAGG AAGATTTTGGACCTGATAGACACGGCGAAGTATGGAACTCCGAAGGTGTACCACATTGAC GAGTCATTGATTGACGACCCGCAAGAATGCGTTGATAACTATGAAAAGGTGCTAATCCGC GGGTTTGCCGGTAGAGATTCCGTCAAACTTCCGATGTTCGACTTGTTCCTGCTTGGTTGT GCCCCCGATGGTCATATCGCATCACTCTTCCCTAACTTCCAGGACAATCTACGTGAGAAA CTTGCATGGGTGGTGCCCGTGGAGAACGCTCCTAGTGGGCCCTCGACCAGAATTTCGCTG ACTATACCTGTAATCTGCCATTCTCACAGGGTTACTTTCGTTGTCGAAGGTGCAACCAAG GCGCCCATCATCAAGACCATTATGGAAAGGCCTGAAAAGGGCCTACCTAGCAGTATTGTC AACGAAGGTGCTGCTGGTCGTGTATCATGGTTTGTTGACGACGATGCTCTTACGGACGTC CTCGTCACCAAAAAAAAGTATAAATTCCACCAAGGTTTGTCTATTTAA >ERS1 783 residues Pha 0 Code 0 ATGGTGTCGTTAGACGATATACTAGGTATCGTGTATGTTACGTCATGGTCGATATCGATG TATCCACCGATAATCACCAATTGGCGCCATAAGTCAGCGAGCGCGATATCGATGGATTTT GTCATGTTAAATACGGCAGGTTACTCTTACCTGGTCATATCCATATTTTTGCAATTGTAC TGCTGGAAAATGACGGGTGATGAGTCTGACTTGGGCAGGCCCAAGTTGACGCAATTTGAT TTCTGGTATTGCCTGCATGGGTGCTTGATGAATGTTGTCTTATTGACCCAGGTGGTAGCT GGAGCGAGAATCTGGCGATTTCCAGGTAAAGGTCACCGCAAGATGAATCCATGGTACCTA AGGATTTTACTCGCATCACTGGCCATTTTTTCACTGCTAACCGTACAATTTATGTACTCC AACTACTGGTACGATTGGCATAACTCAAGAACTCTGGCGTATTGCAACAATTTGTTTTTA CTCAAAATATCGATGTCACTAATCAAGTACATCCCACAAGTGACGCATAACTCGACAAGA AAATCTATGGATTGTTTCCCCATTCAGGGTGTGTTTCTAGATGTCACTGGCGGTATCGCC TCGCTGCTCCAATTGATTTGGCAGTTGTCTAACGATCAAGGTTTCAGTCTGGATACGTTC GTGACAAATTTTGGAAAAGTGGGACTGTCAATGGTAACTTTAATATTCAACTTCATCTTT ATCATGCAGTGGTTTGTATATCGATCTCGAGGCCATGATCTGGCGTCAGAGTACCCGCTG TAG >PAT1 2394 residues Pha 0 Code 0 ATGTCCTTCTTTGGGTTAGAAAATAGCGGTAATGCGCGGGATGGTCCTCTGGACTTTGAA GAGAGTTACAAGGGCTATGGCGAGCACGAACTTGAGGAGAACGACTATTTGAACGACGAA ACATTTGGTGATAATGTTCAGGTTGGTACCGACTTTGATTTTGGAAATCCTCACAGCAGC GGCAGCAGCGGCAACGCAATTGGTGGTAATGGCGTCGGTGCCACGGCTAGATCATATGTT GCAGCTACTGCAGAAGGAATTAGCGGCCCTAGGACCGATGGAACGGCAGCAGCAGGACCT CTAGACCTGAAGCCAATGGAATCTTTGTGGTCTACTGCACCACCTCCAGCAATGGCGCCT TCACCCCAAAGTACAATGGCTCCGGCTCCTGCTCCGCAGCAAATGGCCCCCCTACAGCCA ATCTTGTCGATGCAAGACTTGGAAAGACAACAACGTCAAATGCAGCAACAGTTTATGAAT TTCCACGCCATGGGTCATCCACAGGGTCTCCCACAGGGTCCGCCTCAGCAGCAATTTCCA ATGCAGCCTGCGTCGGGTCAACCAGGTCCCTCACAATTTGCGCCTCCACCTCCACCTCCT GGCGTTAATGTGAATATGAATCAAATGCCAATGGGTCCTGTACAAGTTCCAGTTCAAGCT TCGCCTTCACCCATCGGTATGTCCAACACTCCTTCTCCAGGCCCTGTGGTTGGCGCAACT AAAATGCCTCTGCAAAGTGGACGCAGATCGAAGAGAGATTTGTCGCCTGAAGAGCAAAGA CGTTTGCAGATTCGTCATGCCAAAGTGGAGAAAATCTTGAAATACTCAGGTTTAATGACT CCTCGTGATAAGGACTTCATCACCAGATATCAGTTGTCTCAAATTGTCACTGAGGACCCT TACAATGAGGATTTCTACTTCCAGGTCTACAAGATTATCCAAAGAGGCGGTATCACGTCC GAATCCAACAAAGGTTTGATTGCTAGGGCGTATTTGGAACATTCTGGACACAGACTCGGT GGTCGCTATAAGAGAACCGATATTGCCCTACAGAGAATGCAAAGTCAAGTAGAAAAGGCT GTCACTGTGGCTAAGGAAAGACCTTCTAAGTTGAAGGATCAACAAGCGGCTGCTGGTAAC TCTAGCCAGGATAATAAGCAAGCAAACACGGTTCTGGGCAAAATCTCTTCCACTTTGAAC AGCAAGAATCCAAGAAGACAACTGCAGATCCCCAGACAACAGCCTTCTTCTGACCCCGAT GCGCTAAAAGACGTCACTGACTCTCTGACCAACGTGGACTTGGCCTCTTCAGGGTCCTCC TCTACGGGCTCTTCTGCCGCTGCTGTTGCTTCTAAGCAAAGAAGAAGATCTTCATACGCG TTCAACAACGGTAATGGTGCCACAAATTTGAACAAATCTGGGGGCAAAAAATTCATTCTT GAGTTAATTGAAACAGTTTATGAAGAGATTTTAGACTTGGAAGCTAACTTGAGGAATGGC CAGCAAACTGACAGCACTGCAATGTGGGAGGCCCTTCACATCGACGACAGTTCATATGAC GTAAACCCTTTCATTTCGATGCTATCATTTGATAAAGGTATCAAGATTATGCCTAGAATT TTTAATTTCTTGGATAAGCAGCAAAAATTGAAAATCCTGCAAAAAATCTTCAATGAATTA TCACACTTGCAAATCATCATATTGAGTTCCTACAAGACTACACCAAAACCAACTTTGACA CAATTGAAGAAAGTCGATCTGTTCCAAATGATCATATTAAAGATCATTGTCTCGTTTTTG TCTAATAACTCCAATTTTATCGAAATTATGGGTCTGTTGCTACAGTTAATCAGAAACAAC AACGTTTCGTTCTTGACCACCTCCAAAATTGGTCTAAATTTGATCACCATTTTGATTTCT CGTGCCGCATTAATCAAGCAAGATTCATCAAGATCTAATATTCTTTCCTCTCCTGAAATC TCCACATGGAATGAGATTTATGATAAATTATTCACTTCATTGGAAAGTAAGATTCAGCTG ATTTTCCCTCCAAGGGAATATAACGTCCACATCATGCGTTTACAAAATGACAAGTTTATG GATGAAGCATACTTTGGCCAGTTCCTAGCTAGTTTAGCACTAAGTGGAAAGCTAAACCAC CAGAGAATCATTATTGATGAAGTACGTGATGAAATCTTTGCCACTATTAACGAGGCGGAG ACCTTACAAAAGAAAGAGAAAGAATTGAGTGTATTACCTCAGAGGTCTCAAGAATTAGAC ACAGAGTTAAAATCTATTATTTATAATAAAGAGAAACTATACCAAGATTTGAATTTGTTC CTAAACGTTATGGGGTTGGTGTATCGCGATGGTGAAATATCAGAACTAAAGTAA >SRB8 4284 residues Pha 0 Code 0 ATGAATAACGGTTCTGGTCGATACTTGCTGACTCCCCCAGATGATCTTCACCCCTATGTG CCAAGCTCGAAACCTCAGGAACAAGTATACCCTGATTTCAAGCCTTGGGAGCACACTGCA GCAGAAGATCAAATCCTAGCAAACTTTGTGGCTAAGGGCTTTTACCATACACCAATGGTA AATTTCGAGTCCATATCTGCGAGATCATCTGTTCATGAATCATTAGTCACTCAATCCAAC ATTCTTTCCCAGCAATTCGACAAAATTATCAAGATTAGAGAAGACCACATTAATAAGATC CCCTCAAATTCCACGACGACATTACACGGGCCTGGTTTTCAGTTGCCTAATAGAATAACC CTTACTGATCATAGAAAGGAAACGTGGTTGCATGAATTGAGTTCGTCTCACACTTCGCTG GTCAAAATTGGCAAGTTTATACCTCACGGCTTGAAAAGAAGGCAAGTCATCGAGCAGTGC TATTTAAAATTTATACCATTGAAAAGGGCGATTTGGTTGATAAAGTGCTGCTATTTTATC GAATGGAAATCGAACCACAAAAAGAAGAGGTCAAATGCTGCTGGGGCAGATGATGCCATT TCCATGCACCTGCTAAAGGACTGGACGGATACCTTTGTATACATCCTGGAAAAGCTCATC TTTGATATGACAAATCACTATAACGATTCTCAACAACTGCGTACGTGGAAGAGGCAGATT TCTTATTTTTTAAAACTTTTGGGGAATTGCTACTCACTAAGATTGATCAATAAGGAAATC TTTCATCATTGGCTTGTAGAGTTTATAAATAAGATGGAAAACTTCGAATTTTTGCCATTA TCTTTACATATTTTGATGATTTTTTGGAACGACATCTGCCAAATTGATACAAATGCTCCT GTTGCGGCTACAATAACATCAAGTCAAAAAGAGCCCTTCTTTCTGGTAACAAAAATCACT GATATGCTATTGCACAAATATTATATTGTTTCCAGCAGCAAATCAATGATAAATGACGAG AACTACATCATCAATGATATAAAGAAAAACAACAAGATAAAGTTGAATATTCTCAAAATA TTATCCAGTTTAATTTTGAAAATTTTTCAAGAACAATCTTTAGAGGTGTTTATATTTCCC ACATCTAACTGGGAAATTTACAAGCCCTTACTTTTTGAAATAGTCTCAAACGCCGACACT AATCAAAATTCTGATATGAAGAAAAAATTAGAGTTAATTAGTTACAGAAACGAGTCATTG AAGAATAATTCTTCTATACGAAACGTAATAATGTCTGCCAGCAACGCAAATGACTTTCAA TTAACTATCGTCACCTGTAAACAATTTCCAAAACTATCATGCATTCAATTAAATTGTATA GATACTCAGTTCACCAAGCTACTGGACGATAACCCTACAGAATTCGATTGGCCCACTTAC GTTGACCAAAATCCCCTTACAATGCATAAAATTATTCAATTAATTCTCTGGTCCATACAT CCATCAAGGCAATTTGATCACTATGAATCTAATCAACTGGTAGCGAAATTATTACTATTG CGAATAAATTCAACAGATGAGGATTTGCACGAATTCCAGATAGAAGATGCCATTTGGTCA TTGGTTTTCCAATTAGCCAAAAATTTTTCGGCCCAAAAGAGGGTGGTATCATATATGATG CCTTCTTTGTATCGCCTGCTTAATATACTAATTACTTATGGCATCATTAAGGTCCCTACG TATATCAGAAAGCTAATCAGTTCCGGCCTACTTTATCTCCAAGATTCCAATGATAAGTTT GTGCATGTCCAGCTGTTAATTAACTTGAAAATTTCACCGTTGATGAAAAGTCAATACAAT ATGGTATTGAGGAACGTTATGGAATATGACGTTAAATTTTATGAAATTTTTAATTTCGAC CAACTCGTGGAAATCACAGAACAAATCAAAATGCGAATACTCTCCAATGATATAACTAAT TTGCAACTGTCGAAAACTCCTCTGAGCATTAAAATCATGGTTGCAGAATGGTACTTATCA CATTTATGTTCCGGTATTTTATCTAGTGTTAACCGCACAGTGTTGCTAAAAATATTCAAG ATTTTTTGTATCGATCTGGAGGTTTTCCACCACTTTTTTAAGTGGATCGAGTTTATTGTC TACCATCAATTGCTAAGTGATATAGAATCTCTGGAGGCATTGATGGACATCTTGCTATGC TACCAAAAATTGTTCTCACAATTCATTAATGACCATATTCTTTTTACGAAGACGTTCATA TTCATTTACAAGAAAGTTTTGAAAGAAAAAGACGTGCCTGCTTATAATGTGACTTCATTT ATGCCATTCTGGAAATTTTTTATGAAAAACTTCCCTTTTGTTTTAAAGGTGGATAACGAT TTAAGGATTGAGTTACAATCTGTTTACAATGATGAGAAATTGAAAACTGAGAAGCTGAAG AATGATAAATCAGAAGTCTTGAAGGTGTATTCCATGATCAATAATTCAAACCAAGCTGTT GGACAGACTTGGAATTTTCCCGAGGTGTTTCAAGTAAACATCAGGTTTCTACTACACAAC TCCGAGATCATTGATACAAATACAAGCAAACAGTTCCAGAAAGCACGAAACAATGTCATG CTTTTGATTGCCACTAACTTGAAGGAGTACAATAAATTTATGTCCATTTTCTTGAAAAGG AAAGACTTTACTAACAAAAATTTAATTCAATTGATCTCTCTAAAACTTCTAACTTTTGAA GTGACGCAGAATGTGTTGGGGCTCGAGTATATTATTCGATTATTACCAATAAACTTGGAA AATAATGACGGCTCATATGGTCTGTTTTTGAAGTATCATAAAGAACAATTCATAAAGTCA AATTTTGAGAAAATTTTACTTACATGTTATGAATTAGAAAAAAAATATCATGGCAACGAA TGTGAAATAAATTATTATGAGATCCTATTGAAAATTTTAATAACTTATGGGTCATCTCCC AAATTACTTGCAACATCTACAAAAATCATTATGTTGTTATTGAATGATAGCGTGGAAAAC TCATCTAATATTTTGGAGGATATTTTGTACTACTCAACTTGTCCGTCGGAAACCGATCTT AACGATATTCCATTGGGTAGTGGACAACCAGACAATGACACTGTTGTAACCAACGATGAT AAAAGTGACGATGATGATCACACAGTCGACGAAATTGATCATGTAGAATATTACGTTATG ATGGACTTTGCCAATCTTTGGGTTTTCCAAGCGTTTACCTGTTTCTGCATCAAAAAAATC ATGGAGAATAATGAGCCAGCAATGGCAATGGAAGACTTGAAGAACTTCATATTCCAAATT ATCGAAATAACTAATTCTAATGATTTATGTTCACAAATATTTGACCAACTGAAGGATATG CAGACCATTGAGATGATAACCCAAATAGTGGAGAAAGATTTCTGCACTTCTTGTTTGCAA AACAACAACCAAAAGATAGATGATAATTACATCGTTGTGGTGATCGAGATTATAACGTCA TTATCGATGAGGTTTCAAAGAGAAACTTCTGGTATGATAGTTATTTCCATGGAGAACTAT CATTTACTAATAAAGATCATAAGACAATTAAGTGAACTGAACGAAGGAAATTTATCTAAG AGAGAAATCCAAATAGATGCCGTCTTGAAAATTTTTAGCTTTCATCAGGATTCCATTTTC CAACGCATCATCGCTGATTTATCAGCTGATAAACCCACAAGTCCATTCATTGATAGCATA TGCAAGCTGTTTGATAAAATATCATTTAATTTAAGATTGAAGCTGTTCTTGTACGAAATT TTGTCTTCATTGAAATCATTCGCCATCTATTCATCCACAATTGATGCCCCAGCATTCCAC ACAAGCGGTAAGGTCGAACTACCGAAGAAATTGCTGAACTTACCACCATTCCAAGTGTCC TCTTTCGTTAAGGAAACAAAACTTCATAGTGGCGACTACGGGGAAGAAGAAGATGCAGAC CAAGAAGAATCGTTTAGTTTAAATTTAGGAATCGGCATAGTTGAAATAGCGCACGAAAAC GAACAGAAATGGCTCATTTATGACAAGAAAGATCATAAATATGTCTGCACATTTTCCATG GAGCCGTACCACTTCATCTCCAACTATAATACCAAGTACACAGATGACATGGCTACAGGC AGTAATGATACGACTGCGTTTAACGATTCCTGTGTAAACCTGAGTCTTTTTGATGCTCGG TTTGAGAGGAAAAATCCACATTGA >YCX3 384 residues Pha 0 Code 0 ATGTTGTTCTATAAGCCTGTGATGAGGATGGCGGTGAGACCGCTAAAAAGCATAAGATTC CAGTCCTCATACACCAGTATTACTAAATTGACGAACCTAACAGAATTTAGGAATTTGATC AAGCAAAATGATAAACTAGTCATCGATTTTTATGCTACTTGGTGTGGCCCCTGTAAGATG ATGCAACCACACTTAACGAAATTAATTCAGGCTTATCCAGATGTAAGATTTGTCAAGTGC GACGTGGACGAATCACCAGATATTGCCAAAGAGTGTGAAGTGACGGCTATGCCCACCTTT GTTCTTGGCAAGGATGGCCAACTCATCGGCAAGATCATTGGAGCTAACCCTACTGCTTTA GAGAAGGGAATCAAAGATCTATAA >TUP1 2142 residues Pha 0 Code 0 ATGACTGCCAGCGTTTCGAATACGCAGAATAAGCTGAATGAGCTTCTCGATGCCATCAGA CAGGAGTTTCTCCAAGTCTCACAAGAGGCAAATACCTACCGTCTTCAAAACCAAAAGGAT TACGATTTCAAAATGAACCAGCAGCTGGCTGAGATGCAGCAGATAAGAAACACCGTCTAC GAACTGGAACTAACTCACAGGAAAATGAAGGACGCGTACGAAGAAGAGATCAAGCACTTG AAACTAGGGCTGGAGCAAAGAGACCATCAAATTGCATCTTTGACCGTCCAGCAACAGCGG CAACAGCAACAGCAGCAACAGGTCCAGCAGCATTTACAACAGCAACAGCAGCAGCTAGCC GCTGCATCTGCATCTGTTCCAGTTGCGCAACAACCACCGGCTACTACTTCGGCCACCGCC ACTCCAGCAGCAAACACAACTACTGGTTCGCCATCGGCCTTCCCAGTACAAGCTAGCCGT CCTAATCTGGTTGGCTCACAGTTGCCTACCACCACTTTGCCTGTGGTGTCCTCAAACGCC CAACAACAACTACCACAACAGCAACTGCAACAGCAGCAACTTCAACAACAGCAACCACCT CCCCAGGTTTCCGTGGCACCATTGAGTAACACAGCCATCAACGGATCTCCTACTTCTAAA GAGACCACTACTTTACCCTCTGTCAAGGCACCTGAATCTACGTTGAAAGAAACTGAACCG GAAAATAATAATACCTCGAAGATAAATGACACCGGATCCGCCACCACGGCCACCACTACC ACCGCAACTGAAACTGAAATCAAACCTAAGGAGGAAGACGCCACCCCGGCTAGTTTGCAC CAGGATCACTACTTAGTCCCTTATAATCAAAGAGCAAACCACTCTAAACCTATCCCACCT TTCCTTTTGGATCTAGATTCCCAGTCTGTTCCCGATGCTCTGAAGAAGCAAACAAATGAT TATTATATTTTATACAACCCGGCACTACCAAGAGAAATTGACGTTGAGTTACACAAATCT TTGGATCATACTTCAGTTGTTTGTTGCGTGAAGTTCAGTAACGATGGTGAATACTTAGCC ACAGGCTGCAACAAAACTACTCAAGTGTATCGCGTTTCAGATGGTTCTCTGGTGGCCCGT CTATCTGACGATTCTGCTGCCAATAACCATCGAAATTCGATCACTGAAAATAACACCACC ACGTCCACGGATAACAATACAATGACAACCACTACTACCACCACAATTACTACCACAGCG ATGACTTCGGCAGCAGAATTGGCAAAAGATGTGGAAAACCTGAACACTTCGTCTTCCCCA TCATCCGACTTGTATATCCGTTCAGTGTGTTTTTCTCCAGATGGGAAATTTTTGGCAACA GGTGCTGAAGACAGACTGATTAGAATTTGGGATATTGAAAATAGAAAGATTGTTATGATT CTTCAAGGCCACGAACAAGATATTTATTCATTGGACTACTTTCCCTCAGGTGACAAATTA GTCTCCGGTTCTGGTGACCGTACCGTTCGTATTTGGGACTTACGTACAGGCCAGTGTTCA TTGACTTTATCCATTGAAGATGGTGTTACCACCGTCGCTGTATCACCAGGTGATGGTAAA TACATCGCTGCTGGTTCTCTAGATCGTGCTGTGAGAGTTTGGGATTCCGAGACCGGATTC TTGGTGGAAAGACTAGATTCGGAAAACGAATCCGGTACAGGCCACAAGGACTCTGTTTAT AGCGTTGTCTTCACTAGAGATGGACAAAGCGTTGTATCCGGCTCATTAGATAGATCTGTT AAGCTCTGGAATTTGCAGAATGCAAACAACAAGAGCGATTCGAAAACTCCAAATTCCGGC ACTTGTGAAGTTACGTATATCGGGCATAAAGACTTTGTATTGTCCGTGGCCACCACACAA AATGATGAGTACATCTTGTCCGGTTCCAAAGATCGTGGTGTCCTGTTTTGGGATAAGAAA TCCGGCAATCCGTTATTGATGTTGCAAGGTCATAGGAATTCAGTTATATCTGTGGCTGTG GCAAACGGGTCTCCGCTGGGTCCAGAATATAACGTTTTTGCTACTGGTAGCGGTGATTGT AAAGCAAGGATTTGGAAGTATAAAAAAATAGCGCCAAATTAA >YC16 462 residues Pha 0 Code 0 ATGGTTACGTTCAACTGTGAGGTGTGTAATGATACTGTGCCCAAGAAGAATACCGAAAAG CATTATTATAGATGTCCTAACGCGTACTATACATGCATAGATTGCTCCAAGACGTTTGAA GATGGCGTGAGTTACAAGAATCACACGTCTTGCATCAGCGAGGACGAGAAGTACCAGAAA GCGTTGTACAAGGGCAACAAGAAGCAGAAGCAGAAGCAGCAGCAGAAGCAGCAGCAGAAG CAGCACCAGCACCAGCCAGTGGCAACTCCTGCAAAGAAAGTGGAGAAGCCTGTGATCAAG AAGGCAGAGAAAGTGGAAAAGACCTCGAACGGTATCGAGCTTCACAAGGGCAAGTCGTTG TACAAAATTTTGAAAACCATGAAGGATAAAGGGGCAAAAAAGACCTTCTTGAAAAGTCTG GTTGTGGATTCTGAGGGGCAAATCAGGTATGCAAAGGAATAA >ABP1 1779 residues Pha 0 Code 0 ATGGCTTTGGAACCTATTGATTATACTACTCACTCGAGAGAGATCGACGCAGAGTACCTG AAGATTGTCAGAGGCTCCGATCCTGACACCACCTGGTTGATTATTTCACCCAATGCGAAA AAAGAATACGAACCTGAGTCTACCGGTTCCTCCTTTCACGATTTCTTGCAATTGTTTGAT GAAACCAAGGTCCAGTACGGACTGGCACGTGTGTCCCCACCAGGGTCAGACGTTGAGAAG ATTATTATCATTGGTTGGTGTCCTGATTCTGCGCCATTGAAGACAAGGGCCTCTTTCGCC GCCAATTTTGCTGCAGTTGCTAATAATCTGTTCAAGGGTTACCACGTTCAAGTTACCGCC AGAGACGAGGACGATCTTGACGAAAATGAACTGTTGATGAAAATCAGTAACGCGGCCGGT GCCCGTTATTCTATTCAGACTTCCTCCAAGCAACAGGGGAAGGCTTCCACTCCTCCCGTG AAGAAATCCTTCACACCTTCCAAGAGCCCTGCTCCAGTTTCTAAGAAGGAACCAGTCAAG ACTCCTTCCCCAGCACCTGCTGCTAAGATTTCTTCCCGTGTTAACGACAACAATGACGAC GACGATTGGAATGAGCCTGAATTAAAGGAACGCGACTTCGATCAGGCTCCCCTGAAACCA AATCAATCATCTTACAAACCAATTGGCAAAATCGACTTGCAAAAAGTGATTGCTGAAGAA AAGGCTAAGGAGGACCCACGTCTTGTTCAAAAGCCAACCGCTGCTGGTTCCAAGATTGAT CCTAGTTCTGATATCGCTAATTTAAAGAACGAATCAAAATTAAAGAGGGACTCCGAGTTT AACTCCTTTTTGGGCACCACTAAACCCCCCTCCATGACGGAATCTTCATTAAAGAATGAT GATGATAAAGTCATTAAGGGTTTTAGAAACGAGAAATCACCTGCTCAATTATGGGCCGAA AGAAAGGCAAAGCAAAACAGCGGCAACGCCGAAACTAAGGCTGAGGCACCAAAACCTGAA GTTCCAGAAGATGAGCCTGAAGGTGAACCTGACGTCAAAGATTTGAAATCAAAATTTGAA GGATTGGCCGCTTCAGAAAAAGAGGAGGAAGAAATGGAAAACAAATTTGCTCCTCCTCCA AAGAAATCAGAACCAACTATTATCTCACCAAAACCCTTCTCCAAGCCACAAGAACCTGTG AAAGCTGAAGAAGCCGAGCAGCCTAAGACTGATTACAAGAAGATCGGCAACCCATTACCC GGTATGCACATTGAAGCGGATAATGAGGAAGAACCAGAAGAGAATGATGATGACTGGGAT GATGATGAAGACGAGGCTGCTCAACCTCCTTTGCCTTCGAGGAATGTTGCGTCAGGAGCA CCAGTGCAAAAAGAAGAGCCTGAACAAGAAGAGATCGCCCCAAGCTTACCTTCTAGAAAC TCGATCCCAGCTCCAAAACAAGAAGAAGCACCTGAACAAGCACCTGAAGAAGAAATTGAA GAAGAAGCTGAGGAAGCCGCTCCACAGCTGCCATCAAGAAGCTCTGCAGCTCCTCCTCCG CCTCCAAGACGAGCAACTCCAGAGAAAAAGCCAAAGGAAAATCCTTGGGCCACAGCAGAA TATGATTACGATGCTGCAGAAGATAACGAACTGACCTTTGTGGAAAATGACAAGATTATC AATATTGAATTTGTCGACGATGACTGGTGGCTAGGGGAACTAGAGAAAGACGGCTCAAAA GGTCTCTTCCCCAGCAATTATGTGTCTTTGGGCAACTAG >KIN82 2181 residues Pha 0 Code 0 ATGACTCAGCAAGAATACCGTTCCCCCTCACAACGCTTATCCAAGGGGAGGAGCATGTCG CTACCCAAAATATTTGCTCGTAATTTGAGATCTCTGCAAAACAATGCACCTCCTGGCAAA AACATCAATGTCAATTGTTTGAACGTCAATTCTTGTTCGTTGTCCGCAAGCCCAAGCTCA CAAATTAATATGGCTTGTAATGGAAACAAGCAAGATCTTCCCATACCGTTTCCCCTGCAT GTAGAATGCAACGATAGCTGGTCAAGCTCCAAACTTAACAAGTTCAAATCAATGTTTAAT CATAACAGATCAAAGAGCAGTGGTACTACAGATGCGTCAACTTCAGAAAAAGGTACGCAT AAGCGTGAACCCCGGTCGACGATACATACAGAGCTGTTACAAAGTTCCATTATCGGTGAG CCAAATGTCCATAGTACTACAAGTAGCACACTTATACCCAATGAGGCGATATGCTCCACA CCTAATGAGATCTCAGGTAGCTCTTCTCCGGACGCGGAGTTATTTACCTTTGACATGCCC ACAGACCCGTCATCCTTCCACACTCCTAGCTCCCCAAGTTATATAGCAAAGGACAGTAGA AACCTGAGTAATGGATCTTTGAATGATATTAACGAAAATGAAGAGCTCCAAAATTTCCAT AGAAAAATCAGCGAAAATGGCAGTGCCTCCCCCCTGGCTAACTTGTCATTATCCAATTCA CCAATTGATTCCCCAAGGAAAAATAGCGAAACCAGAAAGGATCAAATACCTATGAACATA ACACCACGTTTAAGGAGGGCCGCTTCCGAACCGTTCAATACGGCAAAGGATGGGTTAATG CGGGAAGATTACATTGCCTTGAAACAACCTCCAAGCTTGGGAGATATTGTAGAACCGAGG AGATCTCGTCGTTTAAGAACCAAGTCATTCGGTAACAAGTTCCAAGACATTACTGTCGAA CCTCAATCCTTCGAAAAAATTAGACTACTTGGCCAAGGTGACGTAGGTAAAGTGTATTTA GTGAGGGAACGCGATACCAACCAGATATTCGCCCTGAAAGTTTTGAATAAACATGAGATG ATCAAGAGGAAGAAAATTAAACGAGTACTCACTGAACAGGAAATTCTCGCGACAAGTGAT CATCCATTTATTGTGACACTGTATCATTCCTTTCAAACCAAAGACTATTTGTATCTCTGT ATGGAATACTGCATGGGAGGGGAATTCTTTAGAGCCTTACAAACAAGAAAAAGTAAATGC ATTGCAGAAGAAGATGCGAAGTTTTACGCCAGTGAAGTAGTAGCAGCTTTGGAATATTTA CACCTACTGGGCTTCATATACAGAGATTTGAAACCCGAAAACATATTACTGCATCAATCT GGTCATGTCATGCTTTCTGACTTTGATTTATCCATCCAAGCAACGGGATCAAAAAAACCC ACCATGAAAGACTCTACGTATTTAGATACAAAAATTTGTTCAGATGGATTCAGAACTAAT TCCTTTGTTGGTACTGAAGAGTATTTAGCTCCAGAAGTAATCAGAGGGAATGGCCACACT GCAGCAGTAGACTGGTGGACTTTAGGAATATTGATTTACGAGATGCTATTTGGCTGTACT CCATTTAAAGGAGATAATTCAAATGAAACATTCTCTAACATTTTAACCAAGGACGTCAAA TTTCCACATGATAAGGAAGTTTCGAAGAATTGTAAAGACCTGATAAAGAAACTACTAAAC AAAAACGAGGCAAAAAGGCTTGGTTCCAAATCAGGAGCTGCAGACATAAAGAGACATCCC TTCTTCAAAAAAGTTCAGTGGTCGTTCTTAAGAAACCAAGACCCCCCTCTAATACCTGCA TTAAATGATAACGGCTGCGAACTTCCTTTTATATTGTCTTGCAATAAACACCCGAAAAGG AACTCAGTGAGTGAACAGGAAACCAAAATGTTCTGTGAGAAAGTTGCAAACGATGATGAA ATTGATGAGGCTGATCCATTCCATGATTTTAATTCTATGAGTTTAACGAAGAAAGATCAC AATATCTTAACCTACTCTGAAAATTATACTACGGAAAAATTCTATACAAAGCAACTTGTA CAAGGCCAAGGCATAACAGCTCACATAGAAGTTTCTTTAAAGACATCATACCTGAACTAT AACATGTTTACAGAAAGATAA >MSH3 3144 residues Pha 0 Code 0 ATGGTGATAGGTAATGAACCTAAACTGGTACTTTTGAGAGCCAAAAGCAGTGCAAATAGA TTTATTTTGTTGAATCTATTAACAATAATGGCGGGACAACCCACAATAAGCAGGTTTTTC AAGAAGGCGGTAAAATCAGAGCTGACGCATAAGCAAGAACAAGAAGTTGCGGTTGGAAAT GGCGCTGGTAGCGAATCCATCTGCCTTGACACTGATGAAGAGGACAATTTATCTTCTGTT GCAAGCACAACAGTAACTAATGATAGCTTTCCACTCAAAGGCAGTGTTTCTTCCAAGAAT TCGAAAAATTCAGAAAAGACTAGTGGTACTTCGACAACATTTAATGATATTGACTTTGCT AAGAAATTGGATAGGATTATGAAAAGACGAAGTGATGAAAATGTTGAGGCTGAAGATGAT GAGGAAGAGGGTGAGGAAGATTTCGTAAAAAAAAAAGCCAGAAAGTCCCCTACAGCGAAA CTTACTCCCTTGGACAAACAGGTGAAGGACCTGAAAATGCATCATAGAGATAAAGTGCTT GTTATTAGAGTAGGCTACAAGTACAAATGTTTTGCAGAGGATGCAGTAACGGTTAGCAGA ATACTTCACATCAAACTTGTGCCTGGAAAATTGACTATCGATGAGTCTAATCCTCAAGAT TGCAATCATAGGCAGTTTGCGTACTGTTCTTTCCCGGATGTCAGATTAAACGTTCACCTA GAGAGACTTGTGCATCATAATTTAAAGGTTGCCGTGGTAGAGCAAGCAGAAACAAGCGCT ATTAAGAAGCATGATCCAGGTGCCAGCAAATCAAGCGTTTTTGAAAGAAAGATTTCAAAT GTCTTTACCAAAGCTACATTTGGTGTTAATTCCACCTTTGTCCTTAGGGGGAAACGTATT CTCGGTGATACAAACAGTATATGGGCTTTGTCCCGTGACGTACATCAGGGAAAGGTGGCT AAATATTCCTTAATTTCTGTCAATTTAAATAACGGGGAAGTCGTGTATGATGAATTTGAA GAGCCTAATCTTGCTGATGAGAAACTACAGATACGAATCAAATATTTACAGCCCATAGAA GTACTGGTAAATACAGATGATCTTCCATTACATGTAGCGAAATTTTTCAAAGATATTTCA TGTCCTTTAATACACAAGCAGGAGTATGATTTGGAAGATCATGTAGTTCAGGCAATAAAA GTAATGAATGAGAAAATTCAACTCTCGCCGTCTCTCATACGCTTAGTTTCTAAGTTATAT TCGCATATGGTTGAGTACAATAATGAGCAGGTGATGTTGATTCCTTCTATCTATTCGCCC TTCGCATCAAAAATACATATGTTACTTGATCCTAACTCCCTGCAAAGTTTGGACATTTTT ACCCATGATGGTGGTAAAGGTTCTTTGTTTTGGTTATTGGACCATACAAGGACATCGTTT GGATTAAGAATGTTGAGAGAATGGATTCTCAAACCTTTGGTTGATGTACACCAAATTGAA GAGCGGCTTGATGCCATTGAGTGCATTACATCCGAAATCAACAACAGTATATTTTTTGAA TCGTTGAATCAAATGTTGAATCATACCCCTGACTTATTAAGAACTTTAAATCGCATAATG TATGGTACAACTTCTAGAAAAGAAGTCTATTTCTATTTAAAGCAAATAACTTCTTTCGTT GATCACTTCAAGATGCATCAATCTTACCTGTCAGAACATTTCAAGTCATCAGATGGAAGG ATAGGCAAACAATCTCCTTTACTTTTTAGACTATTTAGTGAATTGAATGAACTACTTTCT ACCACTCAGTTGCCTCATTTTTTGACCATGATCAACGTTTCTGCGGTAATGGAAAAAAAT TCAGATAAGCAAGTAATGGATTTTTTTAATTTAAATAACTATGATTGTTCAGAGGGTATA ATAAAAATTCAAAGGGAAAGCGAATCAGTACGGTCACAGTTAAAGGAAGAATTGGCAGAA ATACGAAAATATCTCAAACGTCCATATCTAAATTTTAGAGATGAAGTTGATTACTTAATC GAAGTGAAAAACTCGCAAATTAAGGACTTGCCAGATGATTGGATAAAAGTTAACAATACG AAGATGGTCAGTAGATTTACCACTCCCAGAACCCAGAAACTGACTCAAAAGCTAGAATAT TACAAGGACTTATTAATTCGGGAATCTGAACTACAGTATAAAGAATTCTTGAACAAAATT ACGGCAGAATATACAGAGCTCCGTAAAATTACACTCAATTTGGCGCAGTATGACTGTATT TTGTCGTTAGCAGCCACATCATGCAACGTAAATTATGTTAGACCAACTTTTGTGAATGGT CAACAAGCCATAATCGCAAAAAATGCAAGAAATCCAATTATCGAGTCGCTGGATGTTCAT TATGTACCAAATGATATCATGATGTCCCCAGAAAACGGTAAAATCAATATTATAACGGGG CCGAATATGGGTGGGAAATCATCTTATATTAGACAAGTGGCACTGCTTACTATAATGGCA CAGATCGGCTCATTTGTCCCCGCAGAAGAGATCAGATTAAGCATATTTGAAAACGTACTC ACTCGAATCGGTGCGCACGATGATATTATAAACGGTGATTCTACTTTTAAAGTGGAAATG CTTGATATCCTACACATCTTGAAAAATTGCAATAAACGGTCTTTACTATTATTAGACGAA GTGGGAAGAGGTACTGGCACGCACGATGGTATAGCAATTTCTTATGCTTTAATAAAGTAT TTTTCTGAGTTAAGTGACTGCCCCTTGATATTATTTACTACCCATTTTCCCATGCTGGGA GAAATCAAATCTCCGTTAATAAGGAATTATCATATGGATTACGTGGAAGAACAAAAAACT GGCGAGGACTGGATGAGTGTAATTTTTCTATATAAGTTAAAAAAGGGATTGACTTATAAT AGTTATGGGATGAATGTGGCGAAATTGGCACGCCTGGACAAAGATATTATAAATCGGGCA TTCAGTATTTCAGAAGAATTGCGGAAGGAATCCATTAACGAAGACGCGTTGAAATTATTC AGCTCTTTGAAAAGAATATTAAAAAGTGATAATATAACAGCAACGGATAAACTCGCGAAA TTACTATCATTGGATATCCACTGA >CDC39 6327 residues Pha 0 Code 0 ATGCTATCGGCCACATACCGTGATTTGAACACAGCATCTAATTTAGAAACATCAAAGGAA AAACAGGCCGCTCAAATCGTCATTGCACAAATTAGTTTATTATTCACGACTCTTAACAAC GACAATTTTGAATCCGTGGAAAGAGAAATTAGACATATTTTAGACAGGTCGTCCGTAGAT ATTTACATAAAAGTTTGGGAACGATTATTAACCTTAAGTTCTCGGGATATTTTACAAGCG GGAAAATTTTTACTTCAAGAAAATCTACTACACAGACTACTATTAGAATTTGCGAAGGAT TTACCGAAGAAAAGCACAGACCTTATTGAGCTTTTGAAAGAACGAACCTTCAATAACCAG GAGTTTCAAAAACAAACAGGAATTACATTATCACTTTTCATTGATCTATTTGATAAATCT GCAAACAAGGACATTATAGAGTCACTTGACCGCTCCTCTCAGATTAACGATTTCAAGACA ATTAAGATGAATCATACAAATTATTTAAGGAATTTTTTTCTTCAAACCACACCAGAAACA CTAGAGTCCAATCTACGCGACTTATTGCATTCCTTGGAAGGTGAAAGTCTAAATGACTTA TTAGCTCTTTTACTGTCCGAAATACTTTCACCTGGGTCTCAGAATTTACAAAATGATCCC ACACGGAGTTGGTTGACACCTCCGATGGTTTTAGACGCAACGAACCGTGGGAACGTTATA GCAAGATCTATAAGTTCTCTGCAAGCCAACCAGATAAATTGGAATCGTGTGTTTAATTTA ATGTCAACAAAGTATTTCTTGAGCGCACCATTGATGCCTACTACAGCATCTTTGAGTTGC TTATTTGCAGCATTGCACGATGGTCCAGTTATTGATGAATTTTTCAGTTGCGACTGGAAA GTTATTTTCAAACTAGATTTGGCCATTCAACTTCATAAGTGGTCGGTACAGAATGGTTGC TTTGACTTATTAAATGCAGAAGGTACCAGGAAAGTTTCTGAAACCATCCCAAACACAAAG CAATCTTTACTCTACTTATTATCCATTGCATCATTGAATTTAGAATTGTTCCTACAAAGG GAGGAATTGTCTGATGGTCCTATGCTAGCTTATTTTCAAGAGTGCTTCTTTGAAGATTTC AACTACGCCCCTGAATATCTTATTTTAGCATTAGTCAAAGAAATGAAGCGGTTCGTTTTA TTGATAGAAAACAGGACAGTCATAGACGAAATACTTATTACCTTATTGATTCAAGTGCAT AATAAATCACCGTCATCGTTCAAGGACGTTATTTCTACAATAACCGATGATTCTAAAATC GTAGATGCAGCAAAAATCATAATCAACTCGGATGACGCACCTATTGCCAACTTTTTAAAA TCGTTGTTAGATACGGGAAGATTAGATACGGTCATTAATAAACTTCCTTTCAATGAAGCT TTTAAAATTTTGCCATGCGCAAGACAAATTGGTTGGGAGGGGTTCGATACTTTCTTAAAA ACAAAAGTTTCTCCATCTAATGTCGATGTAGTGCTGGAATCACTAGAGGTTCAAACGAAA ATGACTGATACAAACACTCCATTTAGGTCATTAAAGACATTTGACTTATTCGCTTTTCAT TCATTAATTGAAGTACTGAACAAATGCCCACTAGATGTTCTCCAATTACAAAGGTTTGAA TCCTTGGAATTTTCCTTATTAATTGCATTTCCTAGATTGATCAATTTTGGTTTTGGACAC GATGAAGCTATTTTAGCCAATGGTGACATCGCAGGGATTAATAATGATATTGAAAAGGAG ATGCAGAACTATTTACAGAAAATGTATAGTGGTGAGTTAGCCATTAAAGATGTAATCGAA CTTCTGAGAAGGTTAAGAGATAGCGACTTGCCAAGGGACCAGGAAGTCTTCACATGTATT ACCCATGCCGTTATAGCAGAATCGACATTCTTCCAAGATTATCCATTGGATGCATTGGCT ACTACATCTGTTCTTTTTGGATCCATGATTCTCTTTCAACTGTTACGTGGATTCGTATTA GACGTCGCATTTAGGATAATCATGAGGTTTGCCAAGGAGCCTCCAGAGTCCAAGATGTTT AAGTTTGCTGTACAAGCTATTTATGCATTTAGGATACGTTTGGCCGAATATCCACAGTAT TGTAAGGACCTCTTGAGAGATGTTCCGGCTTTGAAGTCTCAGGCTCAAGTTTACCAATCT ATCGTCGAAGCTGCTACCCTAGCAAATGCTCCAAAGGAAAGGTCAAGACCCGTCCAGGAA ATGATCCCATTAAAATTTTTTGCTGTAGATGAAGTTTCATGTCAGATCAATCAAGAAGGT GCTCCTAAAGATGTCGTAGAAAAAGTTCTTTTTGTTCTCAACAACGTTACTCTGGCTAAC TTGAATAATAAGGTTGATGAATTGAAAAAAAGTTTGACACCAAATTATTTTTCTTGGTTT TCCACATATTTAGTTACGCAAAGGGCTAAAACAGAACCTAACTATCATGATCTTTATAGC AAGGTTATAGTTGCTATGGGGTCAGGGTTGCTACATCAGTTCATGGTCAACGTTACTTTG AGACAATTATTTGTCCTACTATCTACAAAAGACGAGCAAGCCATCGATAAAAAGCACCTA AAGAATTTGGCTTCATGGTTAGGATGTATCACATTAGCTTTGAATAAACCAATTAAACAC AAGAATATCGCATTCAGGGAAATGTTAATCGAAGCTTATAAGGAAAATAGACTTGAAATA GTTGTGCCTTTTGTAACAAAGATTTTACAAAGGGCTTCTGAATCAAAAATTTTCAAGCCT CCAAATCCCTGGACTGTTGGCATATTAAAGCTGTTGATTGAGTTGAACGAAAAAGCAAAC TGGAAATTAAGTTTGACTTTCGAAGTTGAGGTTTTATTAAAATCTTTTAATTTGACCACC AAATCTCTCAAGCCCTCGAATTTCATCAATACTCCGGAAGTTATAGAAACTTTATCCGGT GCTTTGGGATCAATCACTCTGGAGCAACAACAAACAGAGCAACAAAGGCAAATTATACTA ATGCAACAACACCAGCAACAGATGCTAATATATCAACAGAGACAACAACAACAACAACAA AGGCAACAACAACAACAACATCATATTAGTGCAAATACAATCGCAGACCAACAAGCGGCA TTTGGCGGCGAGGGTTCAATTTCACACGACAATCCTTTTAACAACTTACTTGGTTCTACT ATTTTTGTAACCCACCCTGACTTGAAGAGGGTATTTCAAATGGCTTTAGCCAAGTCAGTT CGCGAAATTTTGTTGGAAGTAGTCGAAAAGTCATCAGGAATTGCTGTTGTTACGACGACA AAAATAATACTTAAAGACTTTGCCACTGAAGTTGATGAGTCTAAGTTGAAGACGGCTGCA ATCATTATGGTAAGGCATTTGGCACAAAGTTTAGCTCGAGCTACTTCAATTGAACCATTG AAAGAAGGCATACGTTCTACTATGCAATCACTAGCACCGAATTTAATGTCTCTTTCTTCT TCACCTGCAGAGGAGCTTGACACGGCAATAAATGAAAATATTGGCATTGCTCTAGTTTTG ATTGAGAAAGCATCTATGGACAAGTCTACTCAAGATTTAGCAGACCAATTGATGCAAGCG ATTGCTATTCGTCGTTATCACAAGGAAAGAAGGGCAGACCAACCATTTATTACGCAAAAT ACCAATCCATATTCACTGTCTTTACCAGAACCTCTTGGTTTGAAAAACACTGGTGTTACT CCTCAACAATTCAGGGTATACGAAGAATTTGGTAAGAATATTCCAAACTTGGATGTTATT CCGTTTGCAGGATTGCCCGCTCACGCTCCACCGATGACTCAAAATGTGGGTTCAACTCAG CCTCAGCAACAACAAGCGCAAATGCCTACCCAAATCCTAACCTCCGAACAAATAAGAGCT CAACAACAACAGCAGCAATTACAGAAAAGCCGTTTGAATCAGCCATCCCAGTCGGCTCAA CCTCCAGGAGTGAATGTCCCAAATCCTCAAGGTGGGATTGCTGCAGTTCAATCAGATTTG GAACAGAATCAACGTGTTCTCGTTCACCTCATGGACATTTTAGTTTCTCAAATTAAAGAA AATGCTACGAAGAATAACTTAGCTGAATTAGGCGATCAAAACCAAATTAAAACCATCATT TTTCAAATTTTGACATTCATTGCAAAAAGCGCACAAAAGGATCAATTAGCTTTAAAGGTA TCCCAAGCTGTCGTTAATAGCCTTTTTGCCACTAGTGAGAGTCCTCTCTGCAGAGAAGTT TTGTCCCTACTTTTGGAAAAGTTATGTTCTTTATCCCTCGTTGCTAGAAAAGACGTTGTC TGGTGGTTAGTTTATGCCTTGGACAGTAGGAAATTCAATGTTCCCGTTATCAGATCCCTT CTAGAAGTTAATTTAATTGATGCTACAGAATTAGATAACGTTTTAGTTACTGCAATGAAA AATAAAATGGAGAACTCAACTGAATTTGCTATGAAATTAATTCAGAATACTGTCTTGTCT GATGATCCAATTTTGATGAGAATGGACTTCATTAAAACCTTAGAACACTTGGCCTCTTCG GAAGATGAAAATGTAAAGAAATTCATCAAAGAGTTCGAAGATACTAAGATAATGCCAGTG AGGAAAGGTACCAAAACCACAAGAACAGAAAAGCTTTACTTAGTATTTACGGAATGGGTA AAATTACTTCAAAGAGTTGAGAATAACGACGTAATCACAACTGTTTTTATCAAGCAATTA GTCGAAAAGGGTGTTATCAGCGATACTGATAATTTACTTACATTTGTCAAAAGTTCTCTT GAGCTATCAGTTTCTTCATTCAAAGAAAGTGACCCGACTGATGAGGTTTTCATCGCTATT GATGCTCTAGGATCGCTAATTATAAAATTGTTGATTTTACAGGGTTTCAAAGATGATACA AGAAGAGATTACATAAATGCAATATTTTCTGTGATCGTTTTAGTGTTTGCTAAGGATCAT AGCCAAGAGGGTACCACATTCAATGAACGACCATATTTCAGACTATTTTCTAACATCTTA TACGAATGGGCTACCATCAGGACGCACAATTTTGTTAGAATATCTGATTCCAGCACTAGG CAGGAGCTGATCGAATTTGATTCTGTATTTTACAACACTTTCTCAGGATATTTGCACGCT CTGCAACCATTTGCCTTCCCTGGATTCTCATTTGCATGGGTGACACTATTATCACACAGA ATGTTATTACCAATTATGCTAAGATTACCCAATAAAATAGGTTGGGAAAAGTTAATGCTT TTGATTATCGATTTGTTTAAATTTTTGGACCAATACACAAGTAAACATGCAGTCTCTGAC GCTGTTTCGGTTGTTTATAAGGGAACACTGCGTGTTATTTTAGGCATTTCGAATGATATG CCATCCTTTTTGATTGAAAATCACTATGAATTAATGAACAATCTACCTCCAACATATTTC CAACTAAAGAATGTTATTTTATCTGCTATTCCTAAGAATATGACCGTTCCCAACCCATAT GACGTGGATCTTAATATGGAGGATATTCCAGCATGTAAAGAACTACCTGAAGTCTTCTTT GATCCTGTAATTGATTTACACTCATTGAAAAAGCCAGTTGACAACTACCTACGTATTCCC TCAAATTCATTATTAAGAACAATACTAAGCGCTATTTACAAGGATACCTATGACATAAAA AAGGGCGTAGGCTACGACTTTTTATCTGTTGATAGTAAATTAATTCGCGCTATTGTATTA CATGTGGGCATTGAAGCTGGAATAGAGTATAAGAGAACTTCTTCAAATGCGGTATTTAAT ACGAAGTCTTCTTATTATACTTTATTGTTCAATCTGATTCAAAATGGTAGCATCGAAATG AAATATCAAATTATTCTGTCTATTGTGGAACAATTGCGGTATCCAAACATCCACACCTAT TGGTTCAGCTTTGTGTTAATGAATATGTTCAAAAGTGACGAATGGAATGATCAAAAACTT GAAGTCCAAGAAATTATTTTAAGAAACTTTTTAAAAAGAATTATTGTTAACAAACCACAT ACCTGGGGTGTTTCAGTTTTCTTTACTCAGTTGATAAACAATAACGATATTAATCTTTTA GACCTGCCCTTTGTACAAAGTGTTCCCGAAATTAAACTAATTTTACAACAATTAGTAAAA TATTCCAAAAAATACACAACCAGTGAACAAGATGACCAATCCGCCACCATCAATAGAAGG CAAACCCCTCTACAATCCAACGCATAA >YCY4 1176 residues Pha 0 Code 0 ATGGTTTCATTGTTCAAAAGAGGTAAGGCTCCACCGCTCACGAAAGAAGGCCCCACTTCT AAAAAGCCTCCTAACACAGCGTTTAGACAACAAAGGCTTAAGGCATGGCAACCAATACTG TCTCCTCAAAGTGTGCTTCCGTTGTTAATATTCGTTGCATGTATATTTACTCCTATTGGT ATTGGACTCATTGTAAGCGCTACTAAGGTACAAGATCTAACAATTGATTATAGTCATTGT GATACAAAAGCATCTACAACTGCTTTTGAAGATATACCAAAGAAGTACATTAAATATCAC TTTAAAAGTAAAGTTGAAAATAAACCACAATGGAGGCTAACCGAAAATGAAAATGGCGAA CAATCATGCGAACTGCAGTTCGAAATCCCAAACGATATCAAGAAATCCATTTTTATATAT TATAAAATAACCAATTTTTATCAAAATCATCGCAGATATGTCCAATCGTTTGACACAAAG CAAATATTAGGGGAGCCTATCAAAAAAGATGATCTGGATACAAGCTGTAGTCCAATAAGA AGTAGGGAAGACAAAATAATATATCCCTGTGGGTTGATCGCTAATTCCATGTTTAATGAT ACATTTTCTCAGGTGTTGAGTGGTATAGATGACACAGAAGACTATAATTTAACTAACAAG CATATATCATGGAGTATTGATCGTCACAGATTTAAAACCACCAAGTATAATGCTAGCGAT ATTGTTCCACCGCCAAACTGGATGAAGAAGTATCCCGATGGGTATACAGATGAAAATCTT CCTGATATCCATACTTGGGAAGAGTTCCAGGTATGGATGAGGACTGCAGCCTTTCCCAAG TTTTACAAGTTGACGTTGAAAAATGAATCTGCTTCTTTACCGAAGGGTAAATATCAAATG AACATTGAGTTGAATTATCCGATTTCACTCTTTGGTGGCACAAAATCATTTGTACTGACT ACAAATGGAGCTATTGGTGGTAGAAATATGTCACTAGGCGTACTGTACCTCATCGTTGCA GGGCTTTGCGCCTTATTTGGCATCATTTTTTTGGTTAAATTAATCTTCCAACCAAGAGCG ATGGGTGATCACACTTATTTGAATTTTGATGATGAAGAAAACGAGGATTATGAGGATGTA CACGCAGAGAATACAACATTGAGGGAAATTTTATAG >A2 360 residues Pha 0 Code 0 ATGCGCAGCATAGAAAACGATAGAAGTAATTATCAACTTACACAGAAAAATAAATCGGCG GATGGGTTGGTATTTAATGTGGTAACTCAAGATATGATAAACAAAAGTACTAAACCTTAC AGAGGACACCGGTTTACAAAAGAAAATGTCCGAATACTAGAAAGTTGGTTTGCAAAGAAC ATCGAGAACCCATATCTAGATACCAAGGGCCTAGAGAATCTAATGAAGAATACCAGTTTA TCTCGCATTCAAATCAAAAACTGGGTTTCGAATAGAAGAAGAAAAGAAAAAACAATAACA ATCGCTCCAGAATTAGCGGACCTCTTGAGCGGTGAGCCTCTGGCAAAGAAGAAAGAATGA >GIT1 1557 residues Pha 0 Code 0 ATGGAAGACAAAGATATCACATCGGTAAATGAGAAGGAAGTGAACGAGAACACTAATCCT AGAATAATAAAATATGATGCCGAGAGGCGTGCAACCCGTACTGAAACCTCAAAGAAAGAT AAATGGAAAAACATAGTTACAATCATTGCGTCCGGTTTTGCTCTGATAAGTGATGGTTAC GTAAATGGTTCAATGAGTATGCTAAACAAGGTTTTTGTTATGGAGTACGGTAAGAAAAAC TATAGCTCAAAAGTGTCGACTAGAGTTTCCAACGCAGCCCTAGTTGGTATTATTTTTGGC CAATTCTTTATGGGTATCGCTGCTGATTATTATAGTAGAAAATCTTGTATCCTTGTGGCC ACTGCTATCTTGGTTATTGGTAGTGCTCTGTGTGCTGCCTCTCACGGTACTACTGTACCT GGCATGTTTTGGATGTTAACAGTTATGAGAGGTTTGGTAGGTATTGGTGTTGGTGCAGAA TATCCTACCAGTACATTAAGTGCTAATGAGTCTGCTAATGAATATACCACTACCAAAAGA GGTGGTATCCTGGTTATGGTGACAAATTTGCCACTAGCCTTCGGTGGTCCATTTGCTACG ATCATCTTTTTAATCGTCTACAAAATCTGTTCAGGAACAAAACATTTAGAGGCGATCTGG AGGACTGTTTTTGCAATAGGGTGCTTCTGGCCATTGAGTGTGTTCTATTTTAGATGGAAG ACTGCTACTACAGAAGTCTATGAAAAAGGTAGAATCAAGAGAAATATACCATATTTCCTA GCATTGAAATTTTATTGGAAAAGGTTACTTGGTACATGTGGTACATGGTTTATGTATGAT TTTGTTACCTTCCCAAATGGTATTTTCAGTTCAACAATTATCAGTTCCGTTATCAAGGAC CAAAATGATTTAGTAAAAGTGGCAGAGTGGAACTTACTGTTGGGAGTTTTAGCTGTACTG GGTGTACCAATTGGTGCTTATCTGTCCGATCGTATTGGTCGTAAATATACGTTGATGTTT GGTTTCTCTGGGTACATCATCTTTGGTCTAATCATTGGATGTGCGTACGACCAATTGAAA AAAATCACCCCCTTGTTTATTATCTTCTACGCATTCATGAATATGTTAGGTAATGCTGGA CCAGGTGATATGCTTGGTGTTATTAGTAGTGAAGCGTCAGCAACCGCTGTTAGAGGTGTT TTCTATGGTTTATCTGCTGTGACTGGTAAAATCGGTTCTGTAGTAGGCGTCGAATGTTTC CAACCCATTAGGGATAATTTGGGTGCAAGATGGACTTTTATTATTGCTGCAATTTGTGGT CTTATTGGTATCATTATTACATATTTCTTTGTTCCACATTCTCTTGAAAGCGATTTAATG AAGCAAGACGTTGAATTTCACAACTATTTGGTATCCAATGGCTGGACTGGTAAGATGGGA TTTGATGAGACAGATGAAGAATCAATGGTTAGAACTATTGAAGTTGAAGAGAATGGTACT AATTGTAGTAAGAAAAACGCAGAAATAATTTCAGTCAGACAGGTCGATCAAAGTTGA >YCZ0 951 residues Pha 0 Code 0 ATGTCATCTACGGACATCTGGATATCCAATGATGCATCTACTTTTCAAAAGGCACAGCTG CCTACTCAATTACGGCACGTCAAAGTGATTAAAATTCGTGAAGATTCTATCGGAAGGATC ATCCTTCTTATATCGACAGAAATCACAAATGAGGAAAATGCTGATCCAGATCTCTCAGAG ATTTTCATATCAGATTCGCAAGGGTTGAAATTCTCACCTGTTGAATGGACACCAAACCAT CAGTTTGGAAATTTTAGGCTCACTTTTCCTGATTTCTTGAAAGGGACAATATTTGGATCG TTTCATCCTTCCATTGACTATTCTAATCACCAAGTAAACTATACTGAAAATATAGCCGGA GGAGAAACCAAAATATCCGTTGATAACGGCCTCACATGGTCAAATTTGAAAGTTGTTGAT GAAGAAAATGCCGATTCGTTCGGCTGTGATATCACTAGGCCTGAGAGATGTTCACTTCAG GGTTATTTTTACAATCTAAAACTTTCAAATCCTTCTGCTGGGATCATATTAATGACAGGT TCTGTTGGCGATGACAATGAATTCGATCGGAAGGACCGAAAAACTTTCATTTCTAGAGAC GGTGGTCTAACATGGAGGGTGGCCCATAATTCTTCTGGATTATATGCTACTGGTGATCTG GGAAATATTATTGTATATATCCCGTCTCCTTCATATAAAGATGGTGATGTACAATCCAAA CTTTATTTTTCCTTGGACCAAGGTAGAACATGGAATCAATATGAGCTTGTTGACGCTTTA TTTTATATCCATCCATTAGAGTTGATTAATACAACGCCAGATGGATCAGGCTCAAAATTT ATTTTAAGCGGACATCTCATTACTACGGCTAGTCAAGAAGGAAACAACACCAACATCTCA TATATTGCAAGAAGTGTCCTGTATGCGATCGATTTTTCTGCTGCATTTTGA >YCZ1 549 residues Pha 0 Code 0 ATGATATTACTTCATGCCATATATACTCTTTGGGTAATTATACTACTTCCGCTACTCAAT GCAGAGAAATTTGTCCCAAAAGTAACGGAGGCTCCTATAGAAACATCATTTAATCTAGTG AGTTTTGATGATTCCAACACTTCTATCAGATTAGATGGTTGGGGGGTTGTATGGATAAGT TTCGACGCTGGAGAAAATTGGGAAACGGTCAAAGAAATTGAAGAGCGCATTTTCAGATTT ACTGTTGATCCTTTCCATGGACAGGAAAGAGGTTTCGCTTTTATATGTGAATCACCCAAA TTCTACATTACCGACGACCGTGGGGAGTCATGGAGGGCTTTAACTATACCCTCATCAGAA GAATATTTAGATGGCGACTGTTTTATAACTACTCATCCTAGAAACAAAGAACTTCTTATT GCGAATTGCTATAGCTATATGATAGACGCAGACGTTTTATATGACCCAAGTGAAATTTAC TTGAGCAATGATGGGAATCCTTTTTTAAAATTAAACCTTCCTTGGAAAAGAAAAAAGACG ACGATATAA >YCZ2 1107 residues Pha 0 Code 0 ATGAAGGCTGTCGTCATTGAAGACGGTAAAGCGGTTGTCAAAGAGGGCGTTCCCATTCCT GAATTGGAAGAAGGATTCGTATTGATTAAGACACTCGCTGTTGCTGGTAACCCGACTGAT TGGGCACACATTGACTACAAGGTCGGGCCTCAAGGATCTATTCTGGGATGTGACGCTGCC GGCCAAATTGTCAAATTGGGCCCAGCCGTCGATCCTAAAGACTTTTCTATTGGTGATTAT ATTTATGGGTTCATTCACGGATCTTCCGTAAGGTTTCCTTCCAATGGTGCTTTTGCTGAA TATTCTGCTATTTCAACTGTGGTTGCCTACAAATCACCCAATGAACTCAAATTTTTGGGT GAAGATGTTCTACCTGCCGGCCCTGTCAGGTCTTTGGAAGGGGCAGCCACTATCCCAGTG TCACTGACCACAGCTGGCTTGGTGTTGACCTATAACTTGGGCTTGAACCTGAAGTGGGAG CCATCAACCCCACAAAGAAACGGCCCCATCTTATTATGGGGCGGTGCAACTGCAGTAGGT CAGTCGCTCATCCAATTAGCCAATAAATTGAATGGCTTCACCAAGATCATTGTTGTGGCT TCTCGGAAACACGAAAAACTGTTGAAAGAATATGGTGCTGATCAACTATTTGATTACCAT GATATTGACGTGGTAGAACAAATTAAACACAAGTACAACAATATCTCGTATTTAGTCGAC TGTGTCGCGAATCAAAATACGCTTCAACAAGTGTACAAATGTGCGGCCGATAAACAGGAT GCTACCGTTGTCGAATTAACTAATTTGACAGAAGAAAACGTCAAAAAGGAGAATAGGAGG CAAAATGTCACTATTGACAGAACAAGACTGTATTCAATAGGCGGCCATGAAGTACCATTT GGTGGCATTACTTTCCCTGCTGACCCAGAAGCCAGGAGAGCTGCCACCGAATTCGTCAAG TTCATCAATCCAAAGATTAGTGATGGGCAAATTCACCATATTCCAGCAAGGGTCTATAAG AACGGGCTTTACGATGTTCCTCGTATCCTGGAAGACATTAAAATCGGTAAGAACTCTGGT GAAAAACTAGTTGCCGTATTAAACTAG >YCZ3 336 residues Pha 0 Code 0 ATGGAGATGCTCTTGTTTCTGAACGAATCATACATCTTTCATAGGTTTCGTATGTGGAGT ATTGTTTTATGGCACTCATGTGTATTCGTATGCGCAGAATGTGGGAATGCCAATTATAGG GGTGCCGGGGTGCCTTGCAAAACCCTTTTACGCGCGCCTGTGAAGTTTCCGCTTTCGGTC AAAAAGAATATCCGAATTTTAGATTTGGACCCTCGTTCAGAAGCTTATTGTCTAAGCCTA AATTCAGTCTGCTTTAAACGGCTTCCGCGGAAGAAATATTTCCATCTCTTGAATTCGTAC AACATTAAACGTGTGTTGGGAGTCGTATACTGTTAG >PAU3 375 residues Pha 0 Code 0 ATGGTCAAATTAACTTCAATCGCTGCTGGTGTTGCCGCCATCGCTGCCGGTATTGCCGCT GCCCCAGCCACTACCACTCTATCTCCATCTGACGAAAGGGTCAACTTGGTCGAATTGGGT GTTTACGTCTCCGATATCAGAGCTCATTTGGCTCAATACTACTTGTTTCAAGCAGCTCAT CCAACTGAGACCTACCCAGTTGAGATTGCTGAAGCTGTTTTCAACTATGGTGACTTCACC ACTATGTTGACTGGTATTCCAGCTGAACAAGTCACCAGAGTCATCACTGGTGTCCCATGG TACTCCACTAGATTGAGACCAGCCATCTCCAGTGCTCTATCTAAGGACGGTATCTACACT GCTATTCCAAAATAG >YCZ5 1086 residues Pha 0 Code 0 ATGCTTTACCCAGAAAAATTTCAGGGCATCGGTATTTCCAACGCAAAGGATTGGAAGCAT CCTAAATTAGTGAGTTTTGACCCAAAACCCTTTGGCGATCATGACGTTGATGTTGAAATT GAAGCCTGTGGTATCTGCGGATCTGATTTTCATATAGCCGTTGGTAATTGGGGTCCAGTC CCAGAAAATCAAATCCTTGGACATGAAATAATTGGCCGCGTGGTGAAGGTTGGATCCAAG TGCCACACTGGGGTAAAAATCGGTGACCGTGTTGGTGTTGGTGCCCAAGCCTTGGCGTGT TTTGAGTGTGAACGTTGCAAAAGTGACAACGAGCAATACTGTACCAATGACCACGTTTTG ACTATGTGGACTCCTTACAAGGACGGCTACATTTCACAAGGAGGCTTTGCCTCCCACGTG AGGCTTCATGAACACTTTGCTATTCAAATACCAGAAAATATTCCAAGTCCGCTAGCCGCT CCATTATTGTGTGGTGGTATTACAGTTTTCTCTCCACTACTAAGAAATGGCTGTGGTCCA GGTAAGAGGGTAGGTATTGTTGGCATCGGTGGTATTGGGCATATGGGGATTCTGTTGGCT AAAGCTATGGGAGCCGAGGTTTATGCGTTTTCGCGAGGCCACTCCAAGCGGGAGGATTCT ATGAAACTCGGTGCTGATCACTATATTGCTATGTTGGAGGATAAAGGCTGGACAGAACAA TACTCTAACGCTTTGGACCTTCTTGTCGTTTGCTCATCATCTTTGTCGAAAGTTAATTTT GACAGTATCGTTAAGATTATGAAGATTGGAGGCTCCATCGTTTCAATTGCTGCTCCTGAA GTTAATGAAAAGCTTGTTTTAAAACCGTTGGGCCTAATGGGAGTATCAATCTCAAGCAGT GCTATCGGATCTAGGAAGGAAATCGAACAACTATTGAAATTAGTTTCCGAAAAGAATGTC AAAATATGGGTGGAAAAACTTCCGATCAGCGAAGAAGGCGTCAGCCATGCCTTTACAAGG ATGGAAAGCGGAGACGTCAAATACAGATTTACTTTGGTCGATTATGATAAGAAATTCCAT AAATAG >YCZ6 2499 residues Pha 0 Code 0 ATGGATTCGATTACAGTAAAAAAACCTCGGTTAAGATTGGTTTGCCTGCAATGCAAAAAG ATCAAACGGAAATGTGATAAACTGCGGCCTGCTTGCTCGCGATGCCAACAAAATTCATTA CAGTGTGAATATGAAGAGAGAACAGATTTATCTGCCAATGTTGCAGCAAACGACTCTGAT GGATTCAATTCCTCTCATAAGCTCAATTTCGAACAGCAACCTGTACTTGAAAGGACTGGG CTTAGATATTCCTTACAAGTGCCTGAAGGTGTCGTTAATGCTACGCTGTCGATATGGAAC GCCGAAGATATGCTAGTTATAGTAGGATTAGTTACATTTCTGGATTATCCTTTTGCTGCG CATAGTCTGGCGCAACATGACCAGTATATCAGGGCACTTTGTGCTTCGTTGTACGGCATG GCGCTTGTTGACTTTAGCAATTATGCTAATGGTATTCCTTGTGAAGACACATCAAGAAGT ATACTAGGACCATTGTCATTCATAGAAAAGGCCATTTTTAGACGGATAGAACATAGTAAG CAATTTCGAGTTCAGTCTGCCGCCTTAGGGTTATTATACAATGCATTTTCAATGGAAGAA GAAAACTTCTCGACTCTTCTACCGTCACTCATCGCTGAAGTGGAAGACGTGTTGATGCAA AAAAAAGACTGTGAAATACTTTTGAGGTGTTTCTATCAAAATATTTATCCCTTCTATCCT TTTATGGACATTTCACTCTTTGAGAGCGATCTCACTAGTTTGCTTTTACAAGACGACAAT AATCGTTGGAAAATTAGTACTGAAGTTAAAAATGTGCGCAAAAAAATAGAAACTTTGTCA TTACTTACAATAGTAATGGCCATGGCCTTGATGCATTCAAAATTGGATGCAAATCTTCTT TCAATGGTAAAAGAAAATGCCTCCGAAAGTGCCAGGAAACTTTCTCTTTTATGTCATAAA CTATTATGCCTCCTGGATGTATTTCGCTATCCAAATGAGAACACTTTTACTTGCCTTTTA TATTTCTACGTTTCAGAGCATTTAGATCCCGAGAGTCCCGATTGTGTACTGAGCCCCACT AACTTGCTTACTCTGCACCATCTTTTAAATTTGTCCATGACCTTAGGTCTTCAATATGAG CCTTCGAAGTACAAACGTTTCAAAGATCCAGAAGTGATAAGGCAGAGACGGATATTATGG TTAGGAGTTCAGTCATTACTTTTTCAAATTTCTCTTGCTGAAGGTGATGCTGGTAAATCA AATAGTGAATATATGGAGGCATATTTAACAGACTTCGAAGAATATATTGAAGCTTCCTCA GAGTATGAAAAAAGTTCTGCGAGTGAATCGAACGTGCAAATGAATGATATTGTTTGGAAT AAGTACAAATTTCACGTCATTTTGAGTAAACTAATGTCTGATTGCACTTCAGTTATACAA CATCCGCAGCTTTTCCACATTTTAGGAAATATTAAAAGATCTGAAGATTTTATGGCTGAG AACTTTCCTACAAGTTCGATTTACCAACCCCTTCATGAAAAGGAACCAAATGCGATCAAA GTTGGCAAAAGTACGGTTCTCGATGTCATGGATATTCAAAAAACTGAAATATTTCTTACA AATATTGTGGGAAGTATGTGTTTTTTAAACATTTTTGATGTCCTATCGTTACATTTTGAA AAAAAATGTGTTATGCACTGGGAAGAATATGAAAAGAACTATCATTTCCTTACTTTGAAA AGTTTCAATGCATACTTAAAGCTAGCAGGGTTGATATCTGATTATCTCGAGAATAAGTTT CAAGGGAACATTTTAGAGAGTCGCGGTTATATCATAGATAAACAAATATGTTTTATGCTT GTAAGGATCTGGATGTTCCAATGTCGTATTTTGTTAAGGTTTTCATACAAGCAAGAAAGT CAGAAAAAATTGGCCTCTTCCAGTATATCCACTAACGATAATGAAAAAGAAGATGAAATG ATTGTCATTTTAGAAAGACTTATTAAACACATTCGTAACCAAATGGCACATTTAGTGGAT CTAGCAAAGGGAAAACTTCAAGATAGTTACTTTGGTGCTTACCAAACTGTTCCCATGTTT AGATACGTTGTGTATTTGATCGATGTTGGCGGCTTAGTATCTGTGACAAATGGGTTTTGG GATAAGATTTCCAGTGATGGTGAAATACCGCCAAAAGTACAACAAGCCGTGAGATTGAAA TGGGGATTGGACTGCAATAATTCGAGAAGAATCAAACAAAAGTTAATAAGCAGCCAGAGT TTGCAGAGTTTCAATCAAGTTCTGTTGTGCCAGATGGAGGATGCAGTTCTCTCCAGTTCC TTCGCAATAAAAGCCAATACCGCTATGTCCCAAAACACGGCTGAAGAATTTTTCAATATC AGCGAAGAAGAGGCTTTAAATCAACTATTGGAAAACAACAATTTTGATGCCTTCTGGGAT TTATTAGGTGAAAATCTGAGCGATATGCCTTCTTTGTGA >YCZ7 1092 residues Pha 0 Code 0 ATGATTGGGTCCGCGTCCGACTCATCTAGCAAGTTAGGACGCCTCCGATTTCTTTCTGAA ACTGCCGCTATTAAAGTATCCCCGTTAATCCTAGGAGAAGTCTCATACGATGGAGCTCGT TCGGATTTTCTCAAATCAATGAACAAGAATCGAGCTTTTGAATTGCTTGATACTTTTTAC GAGGCAGGTGGAAATTTCATTGATGCCGCAAACAACTGCCAAAACGAGCAATCAGAAGAA TGGATTGGTGAATGGATACAGTCCAGAAGGTTACGTGATCAAATTGTCATTGCAACCAAG TTTATAAAAAGCGATAAAAAGTATAAAGCAGGTGAAAGTAACACTGCCAACTACTGTGGT AATCACAAGCGTAGTTTACATGTGAGTGTGAGGGATTCTCTCCGCAAATTGCAAACTGAT TGGATTGATATACTTTACGTTCACTGGTGGGATTATATGAGTTCAATCGAAGAATTTATG GATAGTTTGCATATTCTGGTCCAGCAGGGCAAGGTCCTCTATTTGGGTGTATCTGATACA CCTGCTTGGGTTGTTTCTGCGGCAAACTACTACGCTACATCTTATGGTAAAACTCCCTTT AGTATCTACCAAGGTAAATGGAACGTGTTGAACAGAGATTTTGAGCGTGATATTATTCCA ATGGCTAGGCATTTCGGTATGGCCCTCGCCCCATGGGATGTCATGGGAGGTGGAAGATTT CAGAGTAAAAAAGCAATGGAGGAACGGAGGAAGAATGGAGAGGGTATTCGTTCTTTCGTT GGCGCCTCCGAACAAACAGATGCAGAAATCAAGATTAGTGAAGCATTGGCCAAGATTGCT GAGGAACATGGCACTGAGTCTGTTACTGCTATTGCTATTGCCTATGTTCGCTCTAAGGCG AAAAATTTTTTTCCGTCGGTTGAAGGAGGAAAAATTGAGGATCTCAAAGAGAACATTAAG GCTCTCAGTATCGATCTAACGCCAGACAATATAAAATACTTAGAAAGTATAGTTCCTTTT GACATCGGATTTCCTAATAATTTTATCGTGTTAAATTCCTTGACTCAAAAATATGGTACG AATAATGTTTAG codonW/Makefile 777 0 0 2405 6671472705 7004 0override cflags = $(CFLAGS) -g objects = codon_us.o codons.o open_fil.o commline.o menu.o tester.o coresp.o linked = rscu cu aau raau tidy reader cutab cutot transl bases base3s dinuc cai fop gc3s gc cbi enc CC=cc CFLAGS= -O -DBSD LN=ln -f all: codonw links codonw: $(objects) $(CC) $(CFLAGS) $(objects) -o codonw -lm clean: \rm -f $(objects) cleanall: \rm -f $(objects) codonw Makefile $(linked) realclean: \rm -f $(objects) codonw Makefile $(linked) codon_us.o: codon_us.c codonW.h $(CC) -c $(CFLAGS) codon_us.c menu.o: menu.c codonW.h $(CC) -c $(CFLAGS) menu.c codons.o: codons.c codonW.h $(CC) -c $(CFLAGS) codons.c coresp.o: coresp.c codonW.h $(CC) -c $(CFLAGS) coresp.c open_fil.o: open_fil.c codonW.h $(CC) -c $(CFLAGS) open_fil.c commline.o: commline.c codonW.h $(CC) -c $(CFLAGS) commline.c tester.o: tester.c codonW.h $(CC) -c $(CFLAGS) tester.c links: codonw $(LN) codonw rscu $(LN) codonw cu $(LN) codonw aau $(LN) codonw raau $(LN) codonw tidy $(LN) codonw reader $(LN) codonw cutab $(LN) codonw cutot $(LN) codonw transl $(LN) codonw bases $(LN) codonw base3s $(LN) codonw dinuc $(LN) codonw cai $(LN) codonw fop $(LN) codonw gc3s $(LN) codonw gc $(LN) codonw cbi $(LN) codonw enc codonW/Makefile.orig 777 0 0 2405 6671472705 7743 0override cflags = $(CFLAGS) -g objects = codon_us.o codons.o open_fil.o commline.o menu.o tester.o coresp.o linked = rscu cu aau raau tidy reader cutab cutot transl bases base3s dinuc cai fop gc3s gc cbi enc CC=cc CFLAGS= -O -DBSD LN=ln -f all: codonw links codonw: $(objects) $(CC) $(CFLAGS) $(objects) -o codonw -lm clean: \rm -f $(objects) cleanall: \rm -f $(objects) codonw Makefile $(linked) realclean: \rm -f $(objects) codonw Makefile $(linked) codon_us.o: codon_us.c codonW.h $(CC) -c $(CFLAGS) codon_us.c menu.o: menu.c codonW.h $(CC) -c $(CFLAGS) menu.c codons.o: codons.c codonW.h $(CC) -c $(CFLAGS) codons.c coresp.o: coresp.c codonW.h $(CC) -c $(CFLAGS) coresp.c open_fil.o: open_fil.c codonW.h $(CC) -c $(CFLAGS) open_fil.c commline.o: commline.c codonW.h $(CC) -c $(CFLAGS) commline.c tester.o: tester.c codonW.h $(CC) -c $(CFLAGS) tester.c links: codonw $(LN) codonw rscu $(LN) codonw cu $(LN) codonw aau $(LN) codonw raau $(LN) codonw tidy $(LN) codonw reader $(LN) codonw cutab $(LN) codonw cutot $(LN) codonw transl $(LN) codonw bases $(LN) codonw base3s $(LN) codonw dinuc $(LN) codonw cai $(LN) codonw fop $(LN) codonw gc3s $(LN) codonw gc $(LN) codonw cbi $(LN) codonw enc codonW/menu.c 777 0 0 144246 10240475344 6533 0/**************************************************************************/ /* CodonW codon usage analysis package */ /* Copyright (C) 2005 John F. Peden */ /* This program is free software; you can redistribute */ /* it and/or modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; version 2 of the */ /* License, */ /* */ /* This program is distributed in the hope that it will be useful, but */ /* WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* You should have received a copy of the GNU General Public License along*/ /* with this program; if not, write to the Free Software Foundation, Inc.,*/ /* 675 Mass Ave, Cambridge, MA 02139, USA. */ /* */ /* */ /* The author can be contacted by email (jfp#hanson-codonw@yahoo.com Anti-*/ /* Spam please change the # in my email to an _) */ /* */ /* For the latest version and information see */ /* http://codonw.sourceforge.net */ /**************************************************************************/ #include #ifdef _WINDOWS #include #endif #include #include #include #include "codonW.h" /************** Main menu **********************************************/ /* Drives the menu system */ /*************************************************************************/ void main_menu ( int menu ) { switch ( menu ) { /* go to menu X */ case 0: menu_initial(); break; case 1: menu_1(); break; case 2: menu_2(); break; case 3: menu_3(); break; case 4: menu_4(); break; case 5: menu_5(); break; case 6: menu_6(); break; case 7: menu_7(); break; case 8: menu_8(); break; case 9: printinfo(); welcome(); pause; clearscr(pm->term_length); break; default: fprintf ( stderr,"ERROR: Unrecognised menu in main_menu\n"); break; } } /* This is the first menu presented when running CodonW */ void menu_initial (void) { int loop = TRUE; int c; while (loop) { /* loop */ printf (" Initial Menu \n"); printf (" Option\n\t (1) Load sequence file\n"); /* printf ("\t (2) Check sequence file for redundancy\n"); */ printf ("\t ( )\n"); printf ("\t (3) Change defaults\n"); printf ("\t (4) Codon usage indices\n"); printf ("\t (5) Correspondence analysis\n"); /* printf ("\t (6) Basic statistics\n"); */ printf ("\t ( ) \n"); printf ("\t (7) Teach yourself codon usage\n"); printf ("\t (8) Change the output written to file\n"); printf ("\t (9) About C-codons\n"); printf ("\t (R) Run C-codons \n"); printf ("\t (Q) Quit \n"); printf (" Select a menu choice, (Q)uit or (H)elp -> "); gets(pm->junk); if (isalpha((int)pm->junk[0])) { c = toupper( (int) pm->junk[0]); switch (c) { case 'Q': my_exit(2,"main menu"); break; case 'R': /* test that all the required files are opened */ if ( pm->inputfile && pm->outputfile && pm->tidyoutfile) loop = FALSE; else { printf("Not all required files are open\n"); printf("About to open input and output files\n"); pause; main_menu(1); loop = FALSE; } break; case 'H': /* help */ chelp ( "main_menu" ); break; default: fprintf( stderr, "The answer %s is not valid\n", pm->junk); pause; break; } /* end of switch c */ } else if (isdigit((int) pm->junk[0])) { c = atoi( pm->junk); if (c > 0 && c <= 9 ) main_menu( (int) c ); else fprintf( stderr, "The answer %s is not valid\n", pm->junk); } clearscr(pm->term_length); } return; } /************************* menu_1 ******************************************/ /* Opens input and output files */ /* It tests if a sequence file is already in memory */ /* if so you have the option to reopen the same file when loaded the */ /* pm->file_loaded is set to true and the 20 characters of the new filename*/ /* are stored */ /***************************************************************************/ void menu_1 (void) { char root[MAX_FILENAME_LEN]; int n; clearscr(pm->term_length); printf (" Loading sequence menu (type h for help)\n"); if ( strlen(pm->curr_infilename) ) { printf ( "The current active file is \"%s\"\n",pm->curr_infilename); fileclose(&pm->inputfile); if (!(pm->inputfile = open_file("input sequence file", pm->curr_infilename, "r", FALSE))) my_exit(1,"menu 1"); } else { printf( " No sequence file is currently loaded\n"); if (!(pm->inputfile = open_file("input sequence file\t", "input.dat", "r", FALSE))) my_exit(1,"menu 1"); } /* copies the filename into pm->curr_infilename */ /* next finds the root of this filename */ /* which is used to construct other filenames */ strncpy(pm->curr_infilename, pm->junk, MAX_FILENAME_LEN - 1); strncpy(root, pm->curr_infilename , MAX_FILENAME_LEN - 5); /* open the .out filename */ for (n = (int) strlen(root); n && root[n] != '.' ; --n); if (n) root[n] = '\0'; /* define root of the filename */ if ( strlen(pm->curr_outfilename)) { printf( "\nThe previous output file was \"%s\"\n", pm->curr_outfilename ); fclose( pm->outputfile); } if (!(pm->outputfile = open_file("output sequence file\t", strcat(root, ".out"), "w", (int)pm->verbose))) my_exit(1,"output menu1"); /* open the .blk filename */ strncpy(pm->curr_outfilename, pm->junk, MAX_FILENAME_LEN - 1); strncpy(root, pm->curr_infilename , MAX_FILENAME_LEN - 5); for (n = (int) strlen(root); n && root[n]!='.' ; --n); if ( n ) root[n] = '\0'; /* find root of filename */ if ( strlen(pm->curr_tidyoutname)) { printf( "\nThe previous bulk output file was \"%s\"\n", pm->curr_tidyoutname ); fclose( pm->tidyoutfile); } if (!(pm->tidyoutfile = open_file("bulk output file\t", strcat(root, ".blk"), "w", (int) pm->verbose))) my_exit(1,"tidyout menu1"); strncpy(pm->curr_tidyoutname, pm->junk, MAX_FILENAME_LEN - 1); clearscr(pm->term_length); return; } /************************* menu_2 ******************************************/ /* Not currently implemented */ /***************************************************************************/ void menu_2 (void) { int loop = TRUE; int c; clearscr(pm->term_length); while ( loop ) { printf (" Menu 2 \n"); printf (" Purifying sequences menu\n"); printf ("\t ( ) Sorry currently unimplemented \n"); printf ("\t (X) Exit this menu\n"); printf (" Select a menu choice, (Q)uit or (H)elp -> "); gets(pm->junk); clearscr(pm->term_length); if (isalpha((int)pm->junk[0]) || pm->junk[0]=='\0' ) { c = toupper( (int) pm->junk[0]); switch ( c ) { case 'Q': my_exit(2,"menu 2"); break; case 'X': case '\0': return; case 'H': chelp("menu_2"); break; default: fprintf( stderr, "The answer %s is not a valid\n", pm->junk); pause; break; } } } return; } /************************* menu_3 ******************************************/ /* To improve flexibility, many of the default values used internally by */ /* CodonW (defined in the header file codonW.h) can be altered at runtime */ /* using this menu. Ten default values can be customised. */ /***************************************************************************/ void menu_3 (void) { int loop = TRUE; int i; int c; clearscr(pm->term_length); while (loop) { printf (" Changing defaults\n"); printf (" Options\n"); printf (" %-40.40s", "(1) Change the ASCII delimiter in output"); printf ("{%s}\n", (pm->seperator == ' ' ) ? "space" : (pm->seperator == '\t') ? "tab" : (pm->seperator == ',' ) ? "," : "ERROR" ); printf (" %-40.40s", "(2) Run silently, No Warnings"); printf ("{%s}\n", (pm->verbose) ? "FALSE" : "TRUE"); printf (" %-40.40s", "(3) Log warnings/information to a file"); printf ("{%s}\n", (strlen(pm->curr_logfilename) > 1) ? "TRUE" : "FALSE"); printf (" %-40.40s", "(4) Number of lines on screen"); printf ("{%d}\n", pm->term_length); printf (" %-40.40s", "(5) Change the genetic code"); printf ("{%s}\n", cu[pm->code].des); printf (" %-40.40s", "(6) Change the Fop/CBI values"); printf ("{%s}\n", fop[pm->f_type].des); printf (" %-40.40s", "(7) Change the CAI values"); printf ("{%s}\n", cai[pm->c_type].des); printf (" %-40.40s", "(8) Output Human or Computer readable"); printf ("{%s readable}\n", (pm->seq_format == 'M') ? "Computer" : "Human"); printf (" %-40.40s", "(9) Concatenate or individual genes"); printf ("{%s genes}\n", (pm->totals == TRUE ? "concatenate": "individual")); printf (" %s", "(10) Correspondence analysis defaults\n"); printf (" (X) Return to previous menu\n"); printf ("Choices enclosed with curly brackets are the current " "defaults\n"); printf (" Select a menu choice, (Q)uit or (H)elp -> "); gets(pm->junk); clearscr(pm->term_length); if (isalpha((int) pm->junk[0])|| pm->junk[0]=='\0') { switch (c = toupper((int) pm->junk[0])){ case 'Q': my_exit(2,"menu 3"); /* decided to quit program */ break; case 'H': chelp("menu_3"); break; case 'X': case '\0': return; /* way out of loop is X or blank line */ break; default: fprintf(stderr,"The answer %s is not a valid\n", pm->junk); pause; continue; break; } } c=0; if (isdigit((int)pm->junk[0])) c = atoi(pm->junk); if ( c <= 0 && c > 10 ) { fprintf( stderr, "The answer %s is not valid\n", pm->junk); continue; } switch ((int) c) { case 1: clearscr(pm->term_length); printf (" The current separator is \"%s\"\n", (pm->seperator == ' ' ) ? "space" : (pm->seperator == '\t') ? "tab" : (pm->seperator == ',' ) ? "," : "ERROR" ); printf (" Please select a new separator \t:"); gets(pm->junk); c = pm->junk[0]; /* take first character of string */ if ( strchr ("\t, ", (int)c) == NULL || c == '\0' ) { /* remember the \0 is in every string */ printf( "WARNING: The chosen separator %s is unsuitable\n", pm->junk); printf( "\tSeparator is unchanged try comma,tab " "or space\n\n"); } else pm->seperator = (char) c; /* specify the column separator */ break; case 2: /* warn about overwriting files?*/ clearscr(pm->term_length); pm->verbose = (char) ((pm->verbose) ? FALSE : TRUE); pm->warn = (char) ((pm->warn ) ? FALSE : TRUE); break; case 3: /* redirect errors to a file */ if ( strlen(pm->curr_logfilename) > 1 ) { strcpy(pm->curr_logfilename , "" ); /* blank logfilename */ pm->my_err = stderr; /* redirects errors */ /* to stderr */ fclose(pm->logfile); /* close logfile */ } else { /* open logfile and redirect stderr */ if (!(pm->logfile = open_file("log filename \t", "warning.log", "w", (int) pm->verbose))) my_exit(1," open log file menu 3"); pm->my_err = pm->logfile; strncpy(pm->curr_logfilename, pm->junk, MAX_FILENAME_LEN-1); } /* end of if */ break; case 4: /* No of line on term*/ printf("Please give the new height of the screen [%i] ", pm->term_length); gets(pm->junk); if ( isdigit( (int) pm->junk[0])) pm->term_length = atoi(pm->junk) ; break; case 5: /*Change genetic code */ clearscr(pm->term_length); printf(" Genetic codes currently supported are\n"); /* NumGeneticCodes is given in codonW.h */ for ( i = 0 ; i < NumGeneticCodes ; i++) { (pm->code == i) ? printf ( " (%i) {%-45.45s %-17.17s}", i, cu[i].des, cu[i].typ) : printf ( " (%i) %-45.45s %-17.17s ", i, cu[i].des, cu[i].typ) ; printf("\n"); } printf("Choice enclosed with curly brackets is " "the current code\n"); printf("Please select a new code [no change]\n"); gets(pm->junk); if ( isdigit( (int) pm->junk[0]) ) { c = (char)atoi(pm->junk); if ( c > 0 && c < NumGeneticCodes && pm->code!= (char) c ){ pm->code = (char) c; initilize_point(pm->code,pm->f_type, pm->c_type); } } break; case 6: /*Change optimal codons*/ clearscr(pm->term_length); printf(" Fop values pre-loaded are\n"); /* NumFopSpecies defined with the Fop_struct in codonW.h */ for ( i = 0 ; i < NumFopSpecies ; i++) { (pm->f_type == i) ? printf (" (%i) {%-25.25s %-40.40s}", i, fop[i].des, fop[i].ref) : printf (" (%i) %-25.25s %-40.40s ", i, fop[i].des, fop[i].ref) ; printf("\n"); } printf ("Choice enclosed with curly brackets is the current " "selection\n"); printf ("Please select a type [no change]\n"); gets(pm->junk); if ( isdigit( (int) pm->junk[0]) ) { c = (char)atoi(pm->junk); if ( c > 0 && c < NumFopSpecies && pm->f_type!=(char) c) { pm->f_type = (char) c; initilize_point(pm->code,pm->f_type, pm->c_type); } } break; case 7: /*Change CAI w values */ clearscr(pm->term_length); printf(" CAI types currently supported are\n"); /* NumCaiSpecies currently defined in codonW.h */ for ( i = 0 ; i < NumCaiSpecies ; i++) { (pm->c_type == i) ? printf (" (%i) {%-25.25s %-40.40s}", i, cai[i].des, cai[i].ref) : printf (" (%i) %-25.25s %-40.40s ", i, cai[i].des, cai[i].ref) ; printf("\n"); } printf ("Choice enclosed with curly brackets is the current " "selection\n"); printf ("Please chose a new CAI [no change]\n"); gets(pm->junk); if ( isdigit( (int) pm->junk[0]) ) { c = (char)atoi( pm->junk); /* if valid value and different from the current choice */ if ( c > 0 && c < NumCaiSpecies && pm->c_type!=(char) c){ pm->c_type = (char) c; initilize_point(pm->code,pm->f_type, pm->c_type); } } break; case 8: /* machine or human readable format */ clearscr(pm->term_length); pm->seq_format = (char) ( pm->seq_format == 'M' ? 'H' : 'M'); /*toggle */ break; case 9: /* concatenate genes? */ clearscr(pm->term_length); pm->totals = (char) (pm->totals == TRUE ? FALSE : TRUE); break; case 10: /* change COA default then go to menu5*/ clearscr(pm->term_length); if( !pm->coa ) menu_5(); else menu_coa(); break; default: fprintf( stderr, "The answer %s is not a valid\n", pm->junk); break; } } return; } /************************* menu_4 ******************************************/ /* Select which indices to calculate */ /***************************************************************************/ void menu_4 (void) { char loop = TRUE; char *choices[] = { " ", "Codon Adaptation Index (CAI)", "Frequency of OPtimal codons (Fop)", "Codon bias index (CBI)", "Effective Number of Codons (ENc)", "GC content of gene (G+C)", "GC of silent 3rd codon posit.(GC3s)", "Silent base composition", "Number of synonymous codons (L_sym)", "Total number of amino acids (L_aa )", "Hydrophobicity of protein (Hydro)", "Aromaticity of protein (Aromo)", "Select all" }; int i,NumChoices; int c; NumChoices = (char) 12; /* size of choices array */ clearscr(pm->term_length); while (loop) { printf (" Codon usage indices\n"); printf (" Options\n"); for (i = 1; i <= NumChoices; i++) { printf(" (%2i) ", i); switch ((int) i) { case 1: (pm->cai) ? printf ("{%-45.45s}", choices[i]) : printf (" %s ", choices[i]); break; case 2: (pm->fop) ? printf ("{%-45.45s}", choices[i]) : printf (" %s ", choices[i]); break; case 3: (pm->cbi) ? printf ("{%-45.45s}", choices[i]) : printf (" %s ", choices[i]); break; case 4: (pm->enc) ? printf ("{%-45.45s}", choices[i]) : printf (" %s ", choices[i]); break; case 5: (pm->gc) ? printf ("{%-45.45s}", choices[i]) : printf (" %s ", choices[i]); break; case 6: (pm->gc3s)? printf ("{%-45.45s}", choices[i]) : printf (" %s ", choices[i]); break; case 7: (pm->sil_base) ? printf ("{%-45.45s}", choices[i]) : printf (" %s ", choices[i]); break; case 8: (pm->L_sym) ? printf ("{%-45.45s}", choices[i]) : printf (" %s ", choices[i]); break; case 9: (pm->L_aa)? printf ("{%-45.45s}", choices[i]) : printf (" %s ", choices[i]); break; case 10: (pm->hyd ) ? printf ("{%-45.45s}", choices[i]) : printf (" %s ", choices[i]); break; case 11: (pm->aro ) ? printf ("{%-45.45s}", choices[i]): printf (" %s ", choices[i]); break; case 12: printf (" %s ", choices[i]); break; default: fprintf(stderr, "programming error \n"); my_exit(99, "menu 4"); break; } printf("\n"); } printf (" (X) Return to previous menu\n"); printf ("Choices enclosed with curly brackets are the current" " selections\n"); printf (" Select a menu choice, (Q)uit or (H)elp -> "); gets(pm->junk); if (isalpha( (int) pm->junk[0]) || pm->junk[0]=='\0') { switch (c = toupper( (int) pm->junk[0])){ case 'Q': my_exit(2,"menu 4"); /* User decides to quit programme*/ break; case 'X': case '\0': return; /* <-back to previous menu-> */ break; case 'H': chelp("menu_4"); continue; break; default: fprintf( stderr, "The answer %s is not a valid choice\n", pm->junk); continue; break; } } else if (isdigit ( (int) pm->junk[0] ) ) { c = atoi(pm->junk); switch ((int) c) { /* User wants to calculate CAI then we explain that it is */ /* dependent on the choice of CAI adaptiveness values */ case 1: pm->cai = (char) ((pm->cai) ? FALSE : TRUE); if( pm->cai){ clearscr(pm->term_length); printf("\nTo calculate CAI a reference set of highly "); printf("expressed genes \nmust be selected\n\n"); printf("The reference set currently selected is that of " "%s\n\n",cai[pm->c_type].des); printf("See the menu 'Change defaults' to change this " "selection\n\n"); printf("If you wish to use a personal choice of CAI " "vaules\n"); printf("\tplease continue and you will be prompted for" " input\n\n"); pause; } break ; case 2: /* User wants to calculate Fop then we explain that it is */ /* dependent on the choice of optimal codons */ pm->fop = (char) ((pm->fop) ? FALSE : TRUE); if(pm->fop){ clearscr(pm->term_length); printf("\n\nYou have chosen to calculate Fop\n\n"); printf("To calculate Fop a set of optimal " "codons must be selected\n"); printf("The optimal codons of %s are the current selection" "\n\n",fop[pm->f_type].des); printf("See the menu 'Change defaults' to change Fop " "selection\n\n"); printf("If you wish to use a personal choice of Fop " "vaules\n"); printf("\tplease continue and you will be prompted for " "input\n\n"); pause; } break ; case 3: /* User wants to calculate CBI then we remind then that it is */ /* dependent on the choice of optimal codons */ pm->cbi = (char) ((pm->cbi) ? FALSE : TRUE); if(pm->cbi){ clearscr(pm->term_length); printf("\n\nYou have chosen to calculate CBI\n\n"); printf("To calculate CBI a set of optimal " "codons must be selected\n"); printf("The optimal codons of %s are the current selection" "\n\n",fop[pm->f_type].des); printf("See the menu 'Change defaults' to change CBI " "selection\n\n"); printf("If you wish to use a personal choice of CBI " "vaules\n"); printf("\tplease continue and you will be prompted for " "input\n\n"); pause; } break ; case 4: /* calc Nc */ pm->enc = (char) ( (pm->enc) ? FALSE : TRUE); break ; case 5: /* calc GC */ pm->gc = (char) ((pm->gc ) ? FALSE : TRUE); break ; case 6: /* calc GC3s */ pm->gc3s =(char) ( (pm->gc3s) ? FALSE : TRUE); break ; case 7: /* calc sil base */ pm->sil_base = (char) ((pm->sil_base) ? FALSE : TRUE); break ; case 8: /* No. synonyms */ pm->L_sym = (char) ((pm->L_sym) ? FALSE : TRUE); break ; case 9: /* No. AminoAcids*/ pm->L_aa = (char) ((pm->L_aa) ? FALSE : TRUE); break ; case 10: /* hydropathicity*/ pm->hyd =(char) ( (pm->hyd ) ? FALSE : TRUE); break; case 11: /* aromatic */ pm->aro = (char) ((pm->aro ) ? FALSE : TRUE); break; case 12: /* all the above */ pm->cai = (char) TRUE; pm->fop = (char) TRUE; pm->cbi = (char) TRUE; pm->enc = (char) TRUE; pm->gc = (char) TRUE; pm->gc3s = (char) TRUE; pm->sil_base = (char) TRUE; pm->L_sym = (char) TRUE; pm->L_aa = (char) TRUE; pm->hyd = (char) TRUE; pm->aro = (char) TRUE; break ; default: fprintf( stderr, "The answer %s is not a valid\n", pm->junk); break; } } else fprintf( stderr, "The answer %s is not a valid choice\n", pm->junk); } return; } /************************* menu_5 ******************************************/ /* Select what type of COA */ /***************************************************************************/ void menu_5 (void) { char *choices[] = { "", "COA on codon usage", "COA on RSCU", "COA on Amino Acid usage", "Do not perform a COA" }; int loop = TRUE; int i,c,NumChoices; NumChoices = 4; clearscr(pm->term_length); while ( loop ) { printf (" Menu 5 Correspondence analysis\n"); printf (" Correspondence analysis (COA) \n"); for (i = 1; i <= NumChoices; i++) { printf(" (%i) ", i); switch ((int) i) { case 1: (pm->coa=='c') ? printf ("{%-45.45s}", choices[1]): printf (" %s ", choices[1]); break; case 2: (pm->coa=='r') ? printf ("{%-45.45s}", choices[2]): printf (" %s ", choices[2]); break; case 3: (pm->coa=='a') ? printf ("{%-45.45s}", choices[3]): printf (" %s ", choices[3]); break; case 4: (pm->coa== 0 ) ? printf ("{%-45.45s}", choices[4]): printf (" %s ", choices[4]); break; default: fprintf(stderr, "programming error \n"); my_exit(99,"menu 5"); break; } printf("\n"); } printf (" (X) Exit this menu\n"); printf (" Select a menu choice, (Q)uit or (H)elp -> "); gets(pm->junk); clearscr(pm->term_length); if (isalpha( (int) pm->junk[0]) || pm->junk[0]=='\0') { c = toupper( (int) pm->junk[0]); switch ( c ) { case 'Q': my_exit(2,"menu 5"); break; case 'X': case '\0': return; break; case 'H': chelp("menu_5_coa"); continue; break; default: fprintf( stderr, "The answer %s is not a valid\n", pm->junk); break; } } else { c = atoi(pm->junk); if ( c > 0 && c <= 4 ) { switch ((int) c){ case 1: pm->coa = 'c'; /* COA of CU */ break ; case 2: pm->coa = 'r'; /* COA of RSCU*/ break ; case 3: pm->coa = 'a'; /* COA of AA */ break ; case 4: pm->coa = FALSE; break; #ifdef DEBUG default: fprintf(pm->my_err,"Error in switch in coa_raw_out\n"); #endif } } else { fprintf(stderr,"The answer %s is not a valid\n", pm->junk); break; } } if ( pm->coa ) { printf( " Do you wish to see the advanced COA menu (Y/N) [N] "); gets( pm->junk ); /* Select the default codon/AAs to analyse, based on genetic code */ initilize_coa (pm->code); if ( (char) toupper( (int) pm->junk[0]) == 'Y' ) menu_coa(); } } /* while loop */ return; } /************************* menu_6 ******************************************/ /* Originally designed for the calculation of correlations and */ /* other simple stats. This code is currently implemented as a perl module */ /* and is waiting to be ported to C hence the menu is unimplemented */ /***************************************************************************/ void menu_6 (void) { int loop = TRUE; int c; clearscr(pm->term_length); while ( loop ) { printf (" Menu 6-Basic Stats\n"); printf ("\n"); printf ("\t ( ) Sorry currently unimplemented \n"); printf ("\t (X) Exit this menu\n"); printf (" Select a menu choice, (Q)uit or (H)elp -> "); gets(pm->junk); clearscr(pm->term_length); if (isalpha( (int) pm->junk[0])|| pm->junk[0] == '\0') { c = toupper( (int) pm->junk[0]); switch ( c ) { case 'Q': my_exit(2,"menu 6"); break; case 'X': case '\0': return; case 'H': chelp("menu_6"); break; default: fprintf( stderr, "The answer %s is not a valid\n", pm->junk); pause; break; } } else { c = atoi(pm->junk); if ( c > 0 && c <= 9 ) main_menu((int) c); else { fprintf( stderr, "The answer %s is not a valid\n", pm->junk); continue; } } } return; } /************************* menu_7 ******************************************/ /* This selection generates random questions about the genetic code that */ /* has been selected. For more information see tester.c */ /***************************************************************************/ void menu_7 (void) { int loop = TRUE; int c; clearscr(pm->term_length); while ( loop ) { printf (" Menu 7 A Bit of fun \n"); printf ("\n"); printf (" (1) Test your knowledge of the genetic code \n"); printf (" (X) Exit this menu\n"); printf (" Select a menu choice, (Q)uit or (H)elp -> "); gets(pm->junk); clearscr(pm->term_length); if (isalpha( (int) pm->junk[0]) || pm->junk[0]=='\0') { c = toupper( (int) pm->junk[0]); switch ( c ) { case 'Q': my_exit(2,"menu 7"); break; case 'X': case '\0': return; case 'H': chelp("menu_7"); continue; break; default: fprintf( stderr, "The answer %s is not a valid\n", pm->junk); pause; break; } } else { c = atoi(pm->junk); if ( c == 1 ) tester(); /****** call tester () ********************/ else { fprintf( stderr, "The answer %s is not a valid\n", pm->junk); continue; } } } return; } /************************* menu_8 ******************************************/ /* This menu allows the selection of the output to be written to the file */ /* .blk. Only one selection can be made at a time. However CodonW can be */ /* rerun with the same input file but with different output options. To */ /* make this easier each time this menu is selected the user is given the */ /* choice of changing the output file */ /***************************************************************************/ void menu_8 (void) { struct multi { /* struct of menu items */ char *string; /* description string */ char prog; /* programme name */ }; char loop = TRUE; int c; int ans1,NumChoices; struct multi aii[] = { " ", ' ', /* Initialise a single value of choices in menu */ "Fasta format output of DNA sequence", 'T', "Reader format output of DNA sequence",'R', "Translate input file to AA sequence", 'N', "Codon Usage" , 'C', "Amino acid usage" , 'A', "RSCU values" , 'S', "Relative Amino Acid usage" , 'L', "Dinucleotide frequencies" , 'D', "Exhaustive base compostion analysis", 'B', "No output written to file" , 'X' }; NumChoices = 10; /* Number of choices in Menu */ /* if there is already an output file available the user may */ /* select to change it */ clearscr(pm->term_length); /* because only one type of bulk option is permitted each time codonw runs, it may be necessary to rerun with the same data file but changing the blk output options, if so the user is prompted with the choice of changing the blk filename */ if ( pm->analysis_run ) { printf (" The current bulk output file is %s do you " "wish to change this (y/n) [n] ", pm->curr_tidyoutname); gets(pm->junk); if ( toupper( (int) pm->junk[0]) == 'Y') { fileclose(&pm->tidyoutfile); if (!(pm->tidyoutfile = open_file("codon usage output file", pm->curr_tidyoutname, "w",(int)pm->verbose))) my_exit(1, "menu 8"); strncpy(pm->curr_tidyoutname, pm->junk, MAX_FILENAME_LEN - 1); } /* matches if ( !strlen (pm->junk) || toupper= ............. */ } else { /* matches if( strlen( pm->curr_cufilename) ) */ printf("Note: No output file has been selected !\n"); } while ( loop ) { printf (" Menu 8\n"); printf (" This output will be saved to %s\n\n", pm->curr_tidyoutname); for ( ans1 = 1; ans1 <= NumChoices; ans1++) { if (aii[ans1].prog != (char) pm->bulk) printf("\n\t (%2d) %s", ans1, aii[ans1].string); else printf("\n\t{(%2d) %-45.45s\t\t}", ans1, aii[ans1].string); } printf ("\n\t ( X) To return to previous menu\n"); printf ("Values enclosed with curly{} brackets are the current " "selection\n"); printf (" Select a menu choice, (Q)uit or (H)elp -> "); gets(pm->junk); clearscr(pm->term_length); if (isalpha( (int) pm->junk[0]) || pm->junk[0]=='\0') { switch (c = toupper( (int) pm->junk[0])){ case 'Q': my_exit(2,"menu 8"); /* User decides to quit */ break; case 'X': case '\0': return; /* <-back to previous menu-> */ case 'H': chelp("menu_8_blk"); continue; break; default: fprintf( stderr, "The answer %s is not a valid\n", pm->junk); pause; break; } } else { c = atoi(pm->junk); if ( c > 0 && c <= NumChoices ) pm->bulk = aii[c].prog; else fprintf( stderr, "The answer %s is not a valid\n", pm->junk); } } /* match while */ return; } /*********************** menu_coa ***************************************/ /* This is the advanced correspondence menu, this menu is optional, when a*/ /* a correspondence analysis is chosen, then the user is given a choice of*/ /* entering this menu */ /**************************************************************************/ void menu_coa (void) { int loop = TRUE; char *p; int c; int i; clearscr(pm->term_length); while ( loop ) { printf ("Advanced Correspondence Analysis\n"); printf (" (1) (Un)Select %s\n", (pm->coa=='a')? "amino acids": "codons"); printf (" (2) Change the number of axis (factors) recorded to file\n"); printf (" (3) Add additional genes after COA\n"); printf (" (4) Toggle level of COA output [%s]\n", (pcoa->level=='e')? "Exhaustive":"Normal"); if(pm->coa != 'a' ) printf (" (5) No. genes used to identify optimal codons [%i%s]\n", (pcoa->fop_gene <0)? (pcoa->fop_gene*-1): pcoa->fop_gene, (pcoa->fop_gene <0)? "%" : " genes"); printf (" (X) Exit this menu\n"); printf (" Select a menu choice, (Q)uit or (H)elp -> "); gets(pm->junk); clearscr(pm->term_length); if (isalpha( (int) pm->junk[0]) || pm->junk[0]=='\0' ) { c = toupper( (int) pm->junk[0]); switch ( c ) { case 'Q': my_exit(2, "menu coa"); break; case 'X' : case '\0': return; case 'H': chelp("menu_coa"); continue; break; default: fprintf( stderr, "The answer %s is not a valid\n", pm->junk); pause; break; } }else{ c = atoi(pm->junk); switch ( (int) c ) { case 1: select_coa( pm->coa ); /* select what to analysis */ break; case 2: /* Num of axis to record */ printf ( "Changing the number of axis generated from %i " "Please input new value [%i]", (int)pcoa->axis,(int)pcoa->axis); gets(pm->junk); if ( !strlen(pm->junk) ) break; if ( isalpha( (int) pm->junk[0])) break; i = (char)atoi(pm->junk); if ( pm->coa == 'a' && (i > 20 || i<0) || ( i<0 || i>59 )) { fprintf(pm->my_err,"Value is out of range adjusting to max value\n"); if ( pm->coa == 'a' ) pcoa->axis = 20; else pcoa->axis = 59; } else { pcoa->axis = i; } break; case 3: /* Add additional genes */ printf("You have elected to add genes after the initial COA is complete\n" "these will not affect the generation of axis (factors) but can\n" "identify were these additional genes fall based on the trends \n" "identified among the original genes\n" "You must have a separate file containing sequence(s) that are\n" "to be added (these genes must be DNA in fasta format)\n" "Please input filename [cancel this option]: "); gets(pm->junk); if ( !strlen(pm->junk) ) break; strncpy(pcoa->add_row,pm->junk,MAX_FILENAME_LEN-1); break; case 4: /* report analysis of inertia */ pcoa->level = (char) ( (pcoa->level=='n')? 'e':'n'); break; case 5: /* how to identify optimal codons */ printf ("You have elected to alter the number of genes used \n" "to identify the optimal codons\n" "You can input either an absolute number of genes or a\n" "percentage (example 10%%)\n " "\tPlease input your choice []"); gets ( pm->junk); if( !strlen(pm->junk) ) continue; if( (p=strchr ( pm->junk,'%')) != NULL) { *p='\0'; pcoa->fop_gene=atoi(pm->junk)*-1; if ( pcoa->fop_gene == 0 || pcoa->fop_gene < 50 ) { /* err_catch */ printf ( " Limits are >0%% and less than 50%%\n"); pcoa->fop_gene= (-10); /* assume default */ } }else { pcoa->fop_gene=atoi(pm->junk); /* set No. genes */ } break; default : fprintf(pm->my_err,"Answer out of range\n"); break; } } } return; } /*********************** select_coa ****************************************/ /* This menu is called if the user wants to change the default codons/AA */ /* to be analysised in the COA. It is called from menu_coa */ /***************************************************************************/ void select_coa ( char choice ) { int loop = TRUE; int last_row[4]; int toggle; int x; char *startpoint, *endpoint; clearscr(pm->term_length); while ( loop ) { if ( choice == 'a' ) { /* if AA analysis then */ for ( x = 1 ; x < 22 ; x++ ) { if (!pcoa->amino[x] ) printf("[(%2i)_%s_%s] ", x, paa->aa3[x],paa->aa1[x] ); else printf(" (%2i)_%s_%s ", x, paa->aa3[x],paa->aa1[x] ); if ( !(x % 4) ) printf( "\n"); } printf( "\n"); /*************** Sample of aa choice output ****************************/ /* ( 1)_Phe_F ( 2)_Leu_L ( 3)_Ile_I ( 4)_Met_M */ /* ( 5)_Val_V ( 6)_Ser_S ( 7)_Pro_P ( 8)_Thr_T */ /* ( 9)_Ala_A (10)_Tyr_Y [(11)_TER_*] (12)_His_H */ /* (13)_Gln_Q (14)_Asn_N (15)_Lys_K (16)_Asp_D */ /* (17)_Glu_E (18)_Cys_C (19)_Trp_W (20)_Arg_R */ /* (21)_Gly_G */ }else { printf ( "Using %s \n", pcu->des ); for ( x = 1 ; x < 65 ; x++ ) { if ( !pcoa->codons[x] ) printf("["); else printf(" "); if (last_row[x%4] != pcu->ca[x] ) printf( "(%2i) %s\t%s", x,paa->aa3[pcu->ca[x]],paa->cod[x]); else printf( "(%2i) \t%s", x,paa->cod[x]); if ( !pcoa->codons[x] ) printf("]"); else printf(" "); last_row[x%4] = pcu->ca[x]; if ( !(x % 4) ) printf( "\n"); if ( !(x % 16)) printf( "\n"); } } /*************** Sample of codon choice output ***********************/ /* Using Universal Genetic code */ /* ( 1) Phe UUU ( 2) Ser UCU ( 3) Tyr UAU ( 4) Cys UGU */ /* ( 5) UUC ( 6) UCC ( 7) UAC ( 8) UGC */ /* ( 9) Leu UUA (10) UCA [(11) TER UAA][(12) TER UGA] */ /* (13) UUG (14) UCG [(15) UAG][(16) Trp UGG] */ printf("%s bracketed will be excluded from the COA. ", (pm->coa == 'a')? "Amino Acids": "Codons" ); printf("Select number(s) that\nidentify the %s you wish to toggle " "(X to exit, H for help) [X] ", (pm->coa == 'a')? "Amino Acids": "Codons" ); gets(pm->junk); if ( !strlen(pm->junk) || toupper( (int) pm->junk[0]) == 'X' ) { loop=FALSE; continue; } if ( toupper( (int) pm->junk[0]) == 'H' ) { chelp("select"); continue; } endpoint = pm->junk; startpoint = pm->junk; /* now toggle the codons and amino acids to be analysed */ while ( toggle = (int) strtol(startpoint,&endpoint,10) ) { if(endpoint == startpoint ) break; startpoint = endpoint; if (pm->coa == 'a' ) { if ( toggle>21 || toggle<1 ) continue; /* check value is valid */ pcoa->amino [toggle]= (char)((pcoa->amino [toggle])?FALSE:TRUE); }else{ if ( toggle>64 || toggle<1 ) continue; /* check value is valid */ pcoa->codons[toggle]= (char)((pcoa->codons[toggle])?FALSE:TRUE); } } } return; } /************************* Welcome *****************************************/ /* Prints a Banner */ /* the \'s are a problem as they must be escaped */ /***************************************************************************/ void welcome ( void ) { printf ("\n\n"); printf (" // \\ // \\ |I \\ // \\ |I\\ I / \n"); printf (" |I |I I |I I |I I |I\\\\ I \\___ \n"); printf (" |I |I I |I I |I I |I \\\\ I \\ \n"); printf (" |I |I I |I I |I I |I \\\\ I |\n"); printf (" \\\\___/ \\\\____/ |I____/ \\\\____/ |I \\\\I \\___/\n"); } /********************** printinfo *****************************************/ /* Prints a summary about this programme, date, version and author of code */ /* whether a debug version */ /***************************************************************************/ int printinfo(void) { # if defined (__FILE__ ) printf("\n\tSource : %s", __FILE__); # endif # if defined (DEBUG) printf("(Debug version)"); # endif printf("\n\tAuthor : John Peden\n"); printf("\tVersion : %.*s\n", strlen(Revision) , Revision ); printf("\tRevised :%.*s %s %.*s\n",(int) strlen(Update) - 7, Update + 6, (*(Update + 7) ? "\n\t by :" : ""), (int) strlen(Author) - 10, Author + 9); #if defined(__DATE__ ) && defined(__TIME__) printf("\n\tCompiled : %s %s\n", __DATE__, __TIME__); #endif printf("\n\t-------------------------------\n\n"); printf(" All sequences must be in a single file separated by title " " lines whose\n first character is either ; or > \n\t any number" " or length of genes is acceptable\n\n"); return 1; } codonW/open_fil.c 777 0 0 26566 10237502143 7340 0/**************************************************************************/ /* CodonW codon usage analysis package */ /* Copyright (C) 2005 John F. Peden */ /* This program is free software; you can redistribute */ /* it and/or modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; version 2 of the */ /* License, */ /* */ /* This program is distributed in the hope that it will be useful, but */ /* WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* You should have received a copy of the GNU General Public License along*/ /* with this program; if not, write to the Free Software Foundation, Inc.,*/ /* 675 Mass Ave, Cambridge, MA 02139, USA. */ /* */ /* */ /* The author can be contacted by email (jfp#hanson-codonw@yahoo.com Anti-*/ /* Spam please change the # in my email to an _) */ /* */ /* For the latest version and information see */ /* http://codonw.sourceforge.net */ /**************************************************************************/ /* This is a general subroutine, so we might as well redefine TRUE & FALSE*/ #ifndef TRUE #define TRUE 1 #endif #ifndef FALSE #define FALSE 0 #endif /* What to do if we can't locate the file we where asked to open */ /* On most systems we will try and be nice and show a choice of filenames */ #ifdef _DOS #define no_file_found() system("dir/w"); #elif BSD || SYSV #define no_file_found() system("ls -F"); #elif defined (WIN32) || defined (_WIN) #define no_file_found() system("dir/w"); #else #define no_file_found() printf("This would have presented a list of files\n\tbut I do not know howto your operating system\n"); #endif /* Include header files */ #include #include #include #include #include #include "codonW.h" /************** open_file **************************************************/ /* This subroutine is a front end to fopen open. It takes four parameters */ /* the parameters are used to generate a user prompt for the */ /* filename, and to give a suggested filename, to give the write perms */ /* for the file, and whether or not to overwrite existing files. */ /* File_needed is just a description of the file being opened. It is */ /* assumed that if this descriptor is missing the file is to be opened */ /* without further user input. If default_filename is blank then there is */ /* no default_filename */ /* write_perm sets up the type of file being opened */ /* verbose tells this function whether to check if there is a */ /* previous version of any file being opened for writing */ /***************************************************************************/ FILE *open_file(char *file_needed, char *default_filename, char *write_perm, int verbose ) { char infile_name[MAX_FILENAME_LEN]=""; FILE *input=NULL; char temp[4]; char *answer = pm->junk; /**********************************************************************/ /* If a string has been given for file_needed it is assumed */ /* that the user will have a choice of file_names to choose */ /* therefore (s)he will be prompted for a name */ /* if a default filename was supplied by the calling function this */ /* will be suggested as well, otherwise there is no default */ /**********************************************************************/ if ( strlen(file_needed)) { while (!strlen(infile_name) ) { printf("\nName of %s (h for help) [%s] ", file_needed,default_filename); gets(infile_name); /* get filename */ if ( WasHelpCalled ( infile_name ) ) { chelp("open_file_query"); /* Help .... */ infile_name[0]='\0'; continue; } if ( !strlen(infile_name) && default_filename ) strcpy(infile_name, default_filename); } /* end of get filename */ } else if ( strlen(default_filename) ) /* use default filename */ strcpy(infile_name, default_filename); else { /* not enough info */ fprintf(stderr, "Programming error: no filename supplied\n"); my_exit (0,"open file"); } /**********************************************************************/ /* At this point infile_name contains a possible filename */ /* Depending on the mode (write_perm) string this is tested 3 ways */ /* */ /* (r or r+) Test if the file exists if not, all the files in the */ /* current directory are listed and the the user is prompted for */ /* an alternative name or they may quit the programme */ /* */ /* (a, a+) Not tested, just open the file */ /* */ /* (w, w+) If the variable verbose = FALSE then no test */ /* If verbose == TRUE then the file is checked to see if */ /* it already exsists, if it does then the user is prompted for */ /* either for permission to overwrite this file or to */ /* suggest an alternative file_name which is then tested as well */ /* the user can type q to quit at any stage of this prompting process */ /**********************************************************************/ if ( !strcmp(write_perm, "r") || !strcmp(write_perm, "r+") ||!strcmp(write_perm, "rb") ){ while ( !(input = fopen (infile_name , write_perm ))) { fprintf(stderr,"\nThese are the files in the current directory " "I cannot find %.*s \n\n",strlen(infile_name),infile_name); no_file_found(); fprintf(stderr, "\n\nPlease enter another filename, " " (Q)uit, (H)elp [%s] ",infile_name); gets(answer); if (strlen (answer)==1 && ((char)toupper((int)answer[0])=='Q')) my_exit(2,"open_file"); else if (WasHelpCalled ( infile_name )){ chelp ("File_not_found"); } else if (strlen (answer)) strcpy (infile_name, answer); } /* end of while loop */ strcpy ( answer,infile_name); /* allow transfer */ return input; } /************************* Append ***********************************/ else if ( !strcmp(write_perm, "a") || !strcmp(write_perm, "a+") || !strcmp(write_perm, "ab") ) { input = fopen (infile_name, write_perm); strcpy ( answer,infile_name); return input; } /************************* Write **********************************/ else if ( !strcmp( write_perm, "w") || !strcmp(write_perm, "w+") ||!strcmp( write_perm, "wb") ) { while ( verbose == TRUE ) { if ( (input = fopen (infile_name , "r")) ) { fclose(input); /* close the filehandle */ fprintf(stderr, "\nWarning :File %.*s " "exists already \n\tDo you wish to" " overwrite ? (y/n/h/q)\t [y] ", strlen(infile_name), infile_name); fgets(temp, 3, stdin); switch (toupper( (int) temp[0])) { case 'Y': case '\0': case '\n': verbose = FALSE; continue; case 'Q': my_exit(2,"open_file2"); break; case 'H': chelp("file_exists"); continue; break; default: fprintf(stderr, "\nYou decided not to overwrite, please enter\n" " another filename, (q)uit, (a)ppend, (h)elp \n" " (a/q/h/filename)\t[a] "); gets(answer); } /* if the answer is 'a' then the default file is opened */ /* as appendable else if 'q' then the programme exits */ /* anything else is taken as a file name */ if ( strlen(answer) <= 1 ) { switch (toupper( (int) answer[0])) { case 'Q': return (NULL); case 'A': case '\0': case'\n': verbose = FALSE; /* leave the while loop */ strcpy(write_perm, "a+"); break; case 'H': chelp("file_append"); continue; break; default: continue; }; /* end of switch */ } } else /* filename is unique */ verbose = FALSE; /* exit the while loop */ } /* match while preserve */ input = fopen (infile_name,write_perm); /* opens filehandle */ strcpy ( answer,infile_name); return input; } /* matchs if w or w+ */ return (NULL); } /************** Main just for testing purposes ***************************/ /* uncomment to test function as a standalone subroutine */ /* will also need to replace my_exit with exit calls */ /*************************************************************************/ /* main () { FILE *test=NULL; if( test = open_file( "test file","","r",NULL)) printf( "Success\n"); else printf( "Failed\n"); } */ /*************************************************************************/ codonW/READ_coa.txt 777 0 0 17333 10237502210 7462 0 ======================================== CodonW was written by John Peden in the laboratory of Paul Sharp at the University of Nottingham. It is distributed under the terms of the GNU public license, see the file License included with the distribution. ======================================== README.coa The permanent result files from a COA created by CodonW have the extension “.coa” for a description of their and contents see Table 1. Short description of output files created by correspondence analysis in CodonW. summary.coa This file contains a summary of all the information generated by correspondence analysis, including all the data written to files listed below, except for the output written to cusort.coa. eigen.coa Each axis generated in the correspondence analysis is represented by a row of information. Each row consists of four columns, (1) the number of the axis, (2) the axis eigenvalue, (3) the relative inertia of the axis, (4) the sum of the relative inertia. amino.coa† or codon.coa Each codon or amino acid included in the correspondence analysis is represented by a row. The first column is description of the variable, the subsequent columns contain the coordinate of the codon or amino acid on the axes, the number of axes is user definable. genes.coa Each row represents one gene, the first column contains a unique description for each gene, and subsequent columns contain the coordinates for each of the recorded axis. If additional genes are added to the correspondence analysis (advanced correspondence analysis option), the coordinates of these genes are appended to this file. cusort.coa† Contains the codon usage of each gene, sorted by the gene’s coordinate on the principal axis, this information is used to generate the table in hilo.coa This files records a 2 way Chi squared contingency test between two subsets (as defined by the “advanced correspondence analysis options”) of genes positioned at the extremes of axis 1 (cusort.coa). cai.coa† Contains the relative usage of each codon within each synonym family, the most frequent codon assigned the value one and all other codons are expressed relative to this. This file can be used to calculate species specific CAI values. fop.coa †and cbi.coa† Contains a list of the optimal codons and non-optimal codons as identified in the file “hilo.coa”. The format of this file can be utilised by CodonW to calculate Fop and CBI using a specific choice of optimal codons. inertia.coa This file is only generated if the exhaustive output option is selected under the advanced correspondence analysis menu. It contains four tables of information, the first two report the absolute contribution of each gene and codon (or amino acid) to the inertia explained by each axis. The second two tables’ report the fraction of variation in each gene and codon (or amino acid) explained by each axis. codon.coa and hilo.coaare not generated during the correspondence analysis of amino acids Detailed explanation of file contents summary.coa ======================================== Correspondence analysis generate a large volume of data, CodonW writes the essential data necessary to interpret the correspondence analysis to the file “summary.coa”. genes.coa codons.coa amino.coa ======================================== The most complex analysis that CodonW performs is correspondence analysis (COA). COA creates a series of orthogonal axis to identify trends that explain the data variation, with each subsequent axis explaining a decreasing amount of the variation. COA positions each gene and codon (or amino acid) on these axes. An important property is that the ordination of the rows (genes) and columns (codons or amino acids) are superimposable. eigen.coa ======================================== The Eigen values of the principle trends, as well as the more accessible fraction (with the cumulative total) of the total data inertia, that each axes is explaining, is recorded to summary.coa and eigen.coa. cusort.coa ======================================== To simplify analyse of codon usage CodonW assumes that the principle trend is correlated with gene expression. It uses this assumption to identify putative optimal codons. Though the adage GIGO “garbage in, garbage out” must be stressed, it is the researchers responsibility to establish that the principle trend is correlated with gene expression (see tutorial for some example of how to do this). To identify the putative optimal codons, the genes are sorted according to their position on the principle, the sorted codon usage of these genes is written to the file “cusort.coa”. Then a number of genes, decided by the advanced correspondence analysis menu option “number of genes used to identify optimal codons”, are read from the start and end of this file (i.e. equivalent the extremes of the principle axis), the codon usage of each set of genes is totalled. The set of genes with the lower Nc (more highly biased) is putatively identified as the more highly expressed. hilo.coa ======================================== Optimal codons are defined as those codons that occur significantly more often in highly expressed genes relative to their frequency in lowly expressed genes. Significance is assessed by a two-way chi square contingency test with the criterion of p < 0.01. The advantage of using a test of significance to identify optimal codons is that variation in codon usage between highly and lowly expressed genes, that is due to random noise is suppressed, but a disadvantage is that the test is dependent on sample size. After CodonW does a two way chi squared test on the genes taken from the extremes of axis 1, their codon usage and RSCU is output as a table to “summary.coa” and “hilo.coa”. those codons which have been putatively identified as optimal p < 0.01 are indicated with an asterisk (*). Though not considered optimal by CodonW, codons that occur more frequently in the highly expressed dataset at 0.01 < p < 0.05 are indicated with a ampersand (@). fop.coa cbi.coa cai.coa ======================================== CodonW measures the degree to which the codon usage of a gene has adapted towards the usage of optimal codons. It does this by calculating these indices, the frequency of optimal codons (Fop), codon bias index, and codon adaptation index (CAI). To calculate these indexes, information about codon usage in the species being analysed is needed. The indices Fop and CBI used the optimal codons for the species. The index CAI uses codon adaptation values. For some species this information is known, and for these the optimal codons and codon adaptiveness values are in-built into codonW (see the “Change Defaults” menu). For other species these indexes cannot be calculated unless the additional information is know. During calculation of these indices the user is prompted for input files. During a COA CodonW generates the output files “cai.coa”, “fop.coa” and “cbi.coa”. These files can be used as input files for their respective indices (they are already in the correct format). Again it must be stressed that CodonW must make a number of assumptions to generate these files. These are: that the major trend in the codon usage is correlated with expression level; that the dataset contains highly expressed genes; that the genes used to identify of optimal codons where highly expressed. If these assumptions are valid then the files “cbi.coa”, “cai.coa” and “fop.coa” can be used to calculate the indexes CBI, CAI and Fop respectively. For the most up to date version see http://codonw.sourceforge.net codonW/Readme.txt 777 0 0 10501 10237502234 7316 0 CodonW is a package for codon usage analysis. It was designed to simplify Multivariate Analysis (MVA) of codon usage. The MVA method employed in CodonW is correspondence analysis (COA) (the most popular MVA method for codon usage analysis). CodonW can gen erate a COA for codon usage, relative synonymous codon usage or amino acid usage. Additional analyses of codon usage include investigation of optimal codons, codon and dinucleotide bias, and/or base composition. CodonW also has the capacity to analysis sequences encoded by genetic codes other than the universal code. Why call it codonW? Well first you must realise that "clustal" (a very popular multiple alignment program by Des Higgins) was originally written in Paul's lab in Trinity College Dublin. Clustal has since been rewritten from FORTRAN into C and undergone several name changes c lustal-> clustalv-> clustalw -> clustalx. There was also a program called "codons" written in FORTRAN by Andrew Lloyd (a post-doc in Paul's lab), this was the original inspiration for codonW. An early version of codonW, written in C, was called codonv. Wh en the code was enhanced to include multivariate analysis, what better name than codonW. CodonW version 1.3 June 1997 ================= The source code for CodonW can be obtained from ftp://molbiol.ox.ac.uk/cu/codonW.tar.Z. Binaries for a number of platforms are also available at this site see ftp://molbiol.ox.ac.uk/cu. To Install and Build on UNIX Platforms ================= Get the source code from ftp://molbiol.ox.ac.uk/cu/codonW.tar.Z Change directory to the directory where you intend to install CodonW. uncompress codonW.tar.Z tar -xvf codonW.tar cd codonw ./codonWinstall all (this writes a makefiel and then builds codonw) This will ask a few questions regarding 'make' and 'cc' and then configure the installation and compile the programs. If you don't understand the questions, just accept the default by pressing the return key and the installation should be OK using the defaults. The install script also creates a number of links to the compiled executable codonW. These links allow codonW to emulate other useful codon usage analysis and sequence manipulation software by passing the menu interface (for more informa tion see README.links). Alternatively you can just elect to only build the main program, and not install the linked programs. ./codonWinstall codonw (compile only the executable codonw) Once you have successfully built codonw, try these commands to get you started. ./codonw -help (for commandline summary) ./codonw (menu interface) There is also a short tutorial. For the most recent documentation on codonW see http://www.molbiol.ox.ac.uk/cu/ To Set the Codonw Help Environment: ================= CodonW has an in-built help system, the help file is called codonW.hlp and should be located in the same directory as the executable codonw. Alternatively the help file can be pointed to by the environment variable CODONW_H, if you are using a C shell you can add something similar to this to your .login script. setenv CODONW_H file_path Where file_path is the fully defined path name for codonW.hlp. Additional Files: ================= README.indices - explanation about the various codon usage indices that codonW calculates. README.coa- explanation about the output files from the correspondence analysis. README.links- explanation about the auxiliary programmes created during the making of codonw. Tutorial- A quick tutorial on the analysis of codon usage of the open reading frames from Saccharomyces cerevisiae chromosome III. input.dat- An input file containing 167 open reading frames from Saccharomyces cerevisiae chromosome III. (see Tutorial). Recoding - A quick explanation about how amino acids and codons have are represented internally within codonW. Bugs This is a beta version of codonW, therefore there may be bugs within the code. If you do find or notice anything strange please e-mail bug reports/complaints/suggestions to johnp@molbiol.ox.ac.uk. Remember to include an example of the input file (and outp ut files) and the options selected that generated the error, don't forget to tell me the make of computer and operating system it was running under. codonW/README_coa.txt 777 0 0 16543 10237502250 7712 0 README.coa The permanent result files from a COA created by CodonW have the extension “.coa” for a description of their and contents see Table 1. Short description of output files created by correspondence analysis in CodonW. summary.coa This file contains a summary of all the information generated by correspondence analysis, including all the data written to files listed below, except for the output written to cusort.coa. eigen.coa Each axis generated in the correspondence analysis is represented by a row of information. Each row consists of four columns, (1) the number of the axis, (2) the axis eigenvalue, (3) the relative inertia of the axis, (4) the sum of the relative inertia. amino.coa† or codon.coa Each codon or amino acid included in the correspondence analysis is represented by a row. The first column is description of the variable, the subsequent columns contain the coordinate of the codon or amino acid on the axes, the number of axes is user definable. genes.coa Each row represents one gene, the first column contains a unique description for each gene, and subsequent columns contain the coordinates for each of the recorded axis. If additional genes are added to the correspondence analysis (advanced correspondence analysis option), the coordinates of these genes are appended to this file. cusort.coa† Contains the codon usage of each gene, sorted by the gene’s coordinate on the principal axis, this information is used to generate the table in hilo.coa This files records a 2 way Chi squared contingency test between two subsets (as defined by the “advanced correspondence analysis options”) of genes positioned at the extremes of axis 1 (cusort.coa). cai.coa† Contains the relative usage of each codon within each synonym family, the most frequent codon assigned the value one and all other codons are expressed relative to this. This file can be used to calculate species specific CAI values. fop.coa †and cbi.coa† Contains a list of the optimal codons and non-optimal codons as identified in the file “hilo.coa”. The format of this file can be utilised by CodonW to calculate Fop and CBI using a specific choice of optimal codons. inertia.coa This file is only generated if the exhaustive output option is selected under the advanced correspondence analysis menu. It contains four tables of information, the first two report the absolute contribution of each gene and codon (or amino acid) to the inertia explained by each axis. The second two tables’ report the fraction of variation in each gene and codon (or amino acid) explained by each axis. codon.coa and hilo.coaare not generated during the correspondence analysis of amino acids Detailed explanation of file contents summary.coa ======================================== Correspondence analysis generate a large volume of data, CodonW writes the essential data necessary to interpret the correspondence analysis to the file “summary.coa”. genes.coa codons.coa amino.coa ======================================== The most complex analysis that CodonW performs is correspondence analysis (COA). COA creates a series of orthogonal axis to identify trends that explain the data variation, with each subsequent axis explaining a decreasing amount of the variation. COA positions each gene and codon (or amino acid) on these axes. An important property is that the ordination of the rows (genes) and columns (codons or amino acids) are superimposable. eigen.coa ======================================== The Eigen values of the principle trends, as well as the more accessible fraction (with the cumulative total) of the total data inertia, that each axes is explaining, is recorded to summary.coa and eigen.coa. cusort.coa ======================================== To simplify analyse of codon usage CodonW assumes that the principle trend is correlated with gene expression. It uses this assumption to identify putative optimal codons. Though the adage GIGO “garbage in, garbage out” must be stressed, it is the researchers responsibility to establish that the principle trend is correlated with gene expression (see tutorial for some example of how to do this). To identify the putative optimal codons, the genes are sorted according to their position on the principle, the sorted codon usage of these genes is written to the file “cusort.coa”. Then a number of genes, decided by the advanced correspondence analysis menu option “number of genes used to identify optimal codons”, are read from the start and end of this file (i.e. equivalent the extremes of the principle axis), the codon usage of each set of genes is totalled. The set of genes with the lower Nc (more highly biased) is putatively identified as the more highly expressed. hilo.coa ======================================== Optimal codons are defined as those codons that occur significantly more often in highly expressed genes relative to their frequency in lowly expressed genes. Significance is assessed by a two-way chi square contingency test with the criterion of p < 0.01. The advantage of using a test of significance to identify optimal codons is that variation in codon usage between highly and lowly expressed genes, that is due to random noise is suppressed, but a disadvantage is that the test is dependent on sample size. After CodonW does a two way chi squared test on the genes taken from the extremes of axis 1, their codon usage and RSCU is output as a table to “summary.coa” and “hilo.coa”. those codons which have been putatively identified as optimal p < 0.01 are indicated with an asterisk (*). Though not considered optimal by CodonW, codons that occur more frequently in the highly expressed dataset at 0.01 < p < 0.05 are indicated with a ampersand (@). fop.coa cbi.coa cai.coa ======================================== CodonW measures the degree to which the codon usage of a gene has adapted towards the usage of optimal codons. It does this by calculating these indices, the frequency of optimal codons (Fop), codon bias index, and codon adaptation index (CAI). To calculate these indexes, information about codon usage in the species being analysed is needed. The indices Fop and CBI used the optimal codons for the species. The index CAI uses codon adaptation values. For some species this information is known, and for these the optimal codons and codon adaptiveness values are in-built into codonW (see the “Change Defaults” menu). For other species these indexes cannot be calculated unless the additional information is know. During calculation of these indices the user is prompted for input files. During a COA CodonW generates the output files “cai.coa”, “fop.coa” and “cbi.coa”. These files can be used as input files for their respective indices (they are already in the correct format). Again it must be stressed that CodonW must make a number of assumptions to generate these files. These are: that the major trend in the codon usage is correlated with expression level; that the dataset contains highly expressed genes; that the genes used to identify of optimal codons where highly expressed. If these assumptions are valid then the files “cbi.coa”, “cai.coa” and “fop.coa” can be used to calculate the indexes CBI, CAI and Fop respectively. codonW/README_indices.txt 777 0 0 17151 10237502264 10567 0Codon usage indices This document describes the indices calculated by CodonW, by default only the G+C content of the sequence is reported. The others being dependent on the genetic code selected. More than one index may be calculated at the same time. Codon Adaptation Index (CAI) (Sharp and Li 1987). CAI is a measurement of the relative adaptiveness of the codon usage of a gene towards the codon usage of highly expressed genes. The relative adaptiveness (w) of each codon is the ratio of the usage of each codon, to that of the most abundant codon for the same amino acid. The relative adaptiveness of codons for albeit a limited choice of species, can be selected from Menu 3. The user can also input a personal choice of values. The CAI index is defined as the geometric mean of these relative adaptiveness values. Non-synonymous codons and termination codons (dependent on genetic code) are excluded. To prevent a codon absent from the reference set but present in other genes from having a relative adaptiveness value of zero, which would cause CAI to evaluate to zero for any genes which used that codon; it was suggested that absent codons should be assigned a frequency of 0.5 when estimating ? (Sharp and Li 1987). An alternative suggestion was that ? should be adjusted to 0.01 where otherwise it would be less than this value (Bulmer 1988). CodonW does not adjust the ? value if a non-zero-input value is found; zero values are assigned a value of 0.01. Frequency of Optimal codons (Fop) (Ikemura 1981). This index, is the ratio of optimal codons to synonymous codons (genetic code dependent). Optimal codons for several species are in-built and can be selected using Menu 3. By default, the optimal codons of E. coli are assumed. The user may also enter a personal choice of optimal codons. If rare synonymous codons have been identified, there is a choice of calculating the original Fop index or a modified Fop index. Fop values for the original index are always between 0 (where no optimal codons are used) and 1 (where only optimal codons are used). When calculating the modified Fop index, negative values are adjusted to zero. Codon Bias Index (CBI) (Bennetzen and Hall 1982). Codon bias index is another measure of directional codon bias, it measures the extent to which a gene uses a subset of optimal codons. CBI is similar to Fop as used by Ikemura, with expected usage used as a scaling factor. In a gene with extreme codon bias, CBI will equal 1.0, in a gene with random codon usage CBI will equal 0.0. Note that it is possible for the number of optimal codons to be less than expected by random change. This results in a negative value for CBI. The effective number of codons (NC) (Wright 1990). This index is a simple measure of overall codon bias and is analogous to the effective number of alleles measure used in population genetics. Knowledge of the optimal codons or a reference set of highly expressed genes is unnecessary. Initially the homozygosity for each amino acid is estimated from the squared codon frequencies (see Equation 5). If amino acids are rare or missing, adjustments must be made. When there are no amino acids in a synonymous family, Nc is not calculated as the gene is either too short or has extremely skewed amino acid usage (Wright 1990). An exception to this is made for genetic codes where isoleucine is the only 3-fold synonymous amino acid, and is not used in the protein gene. The reported value of Nc is always between 20 (when only one codon is effectively used for each amino acid) and 61 (when codons are used randomly). If the calculated Nc is greater than 61 (because codon usage is more evenly distributed than expected), it is adjusted to 61. G+C content of the gene. The frequency of nucleotides that are guanine or cytosine. G+C content 3rd position of synonymous codons (GC3s). This the fraction of codons, that are synonymous at the third codon position, which have either a guanine of cytosine at that third codon position. Silent base compositions. Selection of this option calculates four separate indices, i.e. G3s, C3s, A3s & T3s. Although correlated with GC3s, this index is not directly comparable. It quantifies the usage of each base at synonymous third codon positions. When calculating GC3s each synonymous amino acid has at least one synonym with G or C in the third position. Two or three fold synonymous amino acids do not have an equal choice between bases in the synonymous third position. The index A3s is the frequency that codons have an A at their synonymous third position, relative to the amino acids that could have a synonym with A in the synonymous third codon position. The codon usage analysis of Caenorhabditis elegans identified a trend correlated with the frequency of G3s. Though it was not clear whether it reflected variation in base composition (or mutational biases) among regions of the C. elegans genome, or another factor (Stenico et al. 1994). Length silent sites (Lsil). Frequency of synonymous codons. Length amino acids (Laa). Equivalent to the number of translatable codons. Hydropathicity of protein. The general average hydropathicity or (GRAVY) score, for the hypothetical translated gene product. It is calculated as the arithmetic mean of the sum of the hydropathic indices of each amino acid (Kyte and Doolittle 1982). This index has been used to quantify the major COA trends in the amino acid usage of E. coli genes (Lobry and Gautier 1994). Aromaticity score The frequency of aromatic amino acids (Phe, Tyr, Trp) in the hypothetical translated gene product. The hydropathicity and aromaticity protein scores are indices of amino acid usage. The strongest trend in the variation in the amino acid composition of E. coli genes is correlated with protein hydropathicity, the second trend is correlated with gene expression, while the third is correlated with aromaticity (Lobry and Gautier 1994). The variation in amino acid composition can have applications for the analysis of codon usage. If total codon usage is analysed, a component of the variation will be due to differences in the amino acid composition of genes. Bennetzen, J. L., and B. D. Hall, (1982). Codon selection in yeast. Journal of Biological Chemistry 257: 3026-3031. Bulmer, M., (1988). Are codon usage patterns in unicellular organisms determined by selection-mutation balance. Journal of Evolutionary Biology 1: 15-26. Ikemura, T., (1981). Correlation between the abundance of Escherichia coli transfer RNAs and the occurrence of the respective codons in its protein genes: a proposal for a synonymous codon choice that is optimal for the E. coli system. Journal of Molecular Biology 151: 389- 409. Kyte, J., and R. Doolittle, (1982). A simple method for displaying the hydropathic character of a protein. Journal of Molecular Biology 157: 105-132. Lobry, J. R., and C. Gautier, (1994). Hydrophobicity, expressivity and aromaticity are the major trends of amino acid usage in 999 Escherichia coli chromosome encoded genes. Nucleic Acids Research 22: 3174-3180. Sharp, P. M., and W. H. Li, (1987). The codon adaptation index a measure of directional synonymous codon usage bias, and its potential applications. Nucleic Acids Research 15: 1281-1295. Stenico, M., A. T. Lloyd and P. M. Sharp, (1994). Codon usage in Caenorhabditis elegans delineation of translational selection and mutational biases. Nucleic Acids Research 22: 2437-2446. Wright, F., (1990). The effective number of codons used in a gene. Gene 87 : 23-29. codonW/Recoding.txt 777 0 0 5620 10237502327 7644 0Data Recoding To add computation codonW converts sequence information automatically from it original text format into a numerical format. This is normally transparent to the user. To add additional genetic codes or a personal choice of codon values for calculating the Fop, CAI or CBI indices, some understanding of the schema used to convert the sequences to numerical strings is advisable. When calculating the indices Fop, CBI, or CAI which are measure of codon bias in relation to the codon usage of a set of optimal genes, there is an option of using a personal choice of these values. These are read from file, there must be one value for each codon (64 in total) and they must be found in the file in a set sequence (i.e. the numerical order of the codons, TTT, TCT ... GAG, GGG). This is also the order in which codon and amino acid results are recorded to file. Internally CodonW recodes all nucleotides, codons and amino acids. Nucleotides are recoded as T/U=1, C=2, A=3, G=4. The 20 standard amino acids and the termination codons are recoded as integer values in the range 1 to 21, note that stop codons is assigned the amino acid value 11 (see Table 2). The decision about whether a codon is synonymous, or how many members are in a particular amino acid synonymous family are taken at run time and are dependent on the genetic code chosen. Each codon is recoded into an integer value in the range 1 to 64, see Table 1. The formulae used to recode the codons is: Equation 1 code=((p1-1)*16)+P2+((p3-1)*4) 1<= code <= 64 Where each of the three codon positions is represented by P1, P2 and P3. Using this recoding convention, the codon ATG has the value 45. code=((3-1)*16)+1+((4-1)*4)=45 Unrecognised or non-translatable bases, codons or amino acids are represented all assigned the value zero. Table 1 Numerical values used for recoding codons Code Codon AA Code Codon AA Code Codon AA Code Codon AA 1 UUU Phe 2 UCU Ser 3 UAU Tyr 4 UGU Cys 5 UUC 6 UCC 7 UAC 8 UGC 9 UUA Leu 10 UCA 11 UAA STOP 12 UGA STOP 13 UUG 14 UCG 15 UAG 16 UGG Trp 17 CUU 18 CCU Pro 19 CAU His 20 CGU Arg 21 CUC 22 CCC 23 CAC 24 CGC 25 CUA 26 CCA 27 CAA Gln 28 CGA 29 CUG 30 CCG 31 CAG 32 CGG 33 AUU Ile 34 ACU Thr 35 AAU Asn 36 AGU Ser 37 AUC 38 ACC 39 AAC 40 AGC 41 AUA 42 ACA 43 AAA Lys 44 AGA Arg 45 AUG Met 46 ACG 47 AAG 48 AGG 49 GUU Val 50 GCU Ala 51 GAU Asp 52 GGU Gly 53 GUC 54 GCC 55 GAC 56 GGC 57 GUA 58 GCA 59 GAA Glu 60 GGA 61 GUG 62 GCG 63 GAG 64 GGG Table 2 Numerical values used to recode amino acids. Code AA One letter code Code AA One letter code 1 Phe F 2 Leu L 3 Ile I 4 Met M 5 Val V 6 Ser S 7 Pro P 8 Thr T 9 Ala A 10 Tyr Y 11 Stop * 12 His H 13 Gln Q 14 Asn N 15 Lys K 16 Asp D 17 Glu E 18 Cys C 19 Trp W 20 Arg R 21 Gly G codonW/tester.c 777 0 0 23606 10240473533 7047 0/**************************************************************************/ /* CodonW codon usage analysis package */ /* Copyright (C) 2005 John F. Peden */ /* This program is free software; you can redistribute */ /* it and/or modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; version 2 of the */ /* License, */ /* */ /* This program is distributed in the hope that it will be useful, but */ /* WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* You should have received a copy of the GNU General Public License along*/ /* with this program; if not, write to the Free Software Foundation, Inc.,*/ /* 675 Mass Ave, Cambridge, MA 02139, USA. */ /* */ /* */ /* The author can be contacted by email (jfp#hanson-codonw@yahoo.com Anti-*/ /* Spam please change the # in my email to an _) */ /* */ /* For the latest version and information see */ /* http://codonw.sourceforge.net */ /******** Tester *****************************************************/ /* This function is used to teach the genetic code, it generates a random */ /* series of questions about the selected genetic code. */ /* The questions include */ /* 1 and 3 letter amino acid names */ /* The translation of each codon */ /* The size of each amino acid family */ /**************************************************************************/ #define rand_num(z) (int)((((float)rand()/((long)RAND_MAX))*(float)z)+1) #ifdef _WINDOWS #define beeep Beep(150,150) #include #include #else #define beeep printf("\007") #endif #include #include #include #include #include #include #include "codonW.h" /* The accuracy of the answers are recorded using these three variable */ int num_questions = 0; int num_cheats = 0; int num_wrong = 0; void tester ( void ) { char loop; char main_loop=TRUE; char tmp_AA [4]; char tmp_AA2[4]; srand( (unsigned)time( NULL ) ); /* initialise random num gen */ printf(" Welcome to TESTER \n(which just tests your " "knowledge of the Genetic code)\n" " The genetic code used is dependant on\n what" " code is selected in menu 3\n" " The current code is %s %s\n" "\n If you get stuck try typing ? for a hint\n" " To leave type exit or quit\n", pcu->des, pcu->typ); /******************* main loop ****************************/ while ( main_loop ) { int i,x; i = rand_num(10); /* random number to between 1 and 10 */ printf("Type Help for help:"); /* the switch biases the questions so their freq is not equal */ switch (i) { case 1: case 2: /* amino acid question */ i = rand_num(21); loop = TRUE; while ( loop ) { printf("\nWhat is the three letter equivalent for the AA" " %s ", paa->aa1[i]); gets( pm->junk ) ; strcpy ( tmp_AA, paa->aa3[i] ); for ( x = 0 ; x < (int)strlen(tmp_AA); x++) tmp_AA[x] = (char) toupper( (int) tmp_AA[x]); for ( x = 0 ; x < (int)strlen(pm->junk ); x++) pm->junk [x] = (char) toupper( (int) pm->junk[x]); if ( !strcmp ( pm->junk, "QUIT" ) || !strcmp ( pm->junk, "EXIT" )) { asummary(); main_loop = FALSE; break; } if ( !strcmp ( pm->junk,"HELP")) { chelp("fun"); continue; } if ( !strcmp (pm->junk, "?" ) ) { printf( "Cheat %s", paa->aa3[i]); num_cheats++; /* The user cheated */ continue; } if ( !strcmp (pm->junk , tmp_AA )) { loop = FALSE; } else { num_wrong++; /* Wrong answer */ printf("Wrong answer (try ?)\n"); } } break; case 3: /* How big is this AA family*/ i = rand_num(21); loop = TRUE; while ( loop ) { printf("\nHow many codons encode the Amino Acid %s ", paa->aa1[i]); gets( pm->junk ) ; for ( x = 0 ; x < (int)strlen(pm->junk); x++) pm->junk[x] = (char) toupper( (int) pm->junk[x]); if ( !strcmp ( pm->junk, "QUIT" ) || !strcmp ( pm->junk, "EXIT" )) { asummary(); main_loop = FALSE; break; } if ( !strcmp ( pm->junk,"HELP")) { chelp("fun"); continue; } if ( !strcmp (pm->junk, "?" ) ) { printf( "Cheat %i\n", *(da + i) ); num_cheats++; continue; } if ( atoi(pm->junk) == *(da + i) ) loop = FALSE; else { num_wrong++; printf("Wrong answer (try ?)\n"); } } break; case 4: /* 60% of the time ask */ case 5: /* ask questions about */ case 6: /* codon to aa translation*/ case 7: case 8: case 9: case 10: i = rand_num(64); loop = TRUE; while ( loop ) { printf("\nName the Amino Acid encoded by the codon %s ", paa->cod[i]); gets( pm->junk ); for ( x = 0 ; x < (int)strlen(pm->junk ); x++) pm->junk[x] = (char) toupper( (int) pm->junk[x]); if ( !strcmp ( pm->junk, "QUIT" ) || !strcmp ( pm->junk, "EXIT" )) { asummary(); main_loop = FALSE; break; } if ( !strcmp ( pm->junk,"HELP")) { chelp("fun"); continue; } if ( !strcmp (pm->junk, "?" ) ) { printf( "Cheat %s (%s)", paa->aa1[pcu->ca[i]] , paa->aa3[pcu->ca[i]]); num_cheats++; /* tell me the answer */ continue; } /* allow 1 or 3 letter amino acid code as the ans */ strcpy ( tmp_AA, paa->aa1[pcu->ca[i]] ); strcpy ( tmp_AA2, paa->aa3[pcu->ca[i]] ); /* uppercase everything, the AA names and the answer */ for ( x = 0 ; x < (int)strlen(tmp_AA); x++) tmp_AA[x] = (char)toupper( (int) tmp_AA[x]); for ( x = 0 ; x < (int)strlen(tmp_AA2); x++) tmp_AA2[x] = (char)toupper((int) tmp_AA2[x]); for ( x = 0 ; x < (int)strlen(pm->junk ); x++) pm->junk [x] = (char)toupper((int) pm->junk[x]); if ( !strcmp(tmp_AA, pm->junk) || !strcmp(tmp_AA2,pm->junk) ) { loop = FALSE; } else { printf("Wrong answer (try ?)\n"); num_wrong++; } } break; default: printf("mistake == %i \n", i); exit(0); /* error catch */ break; } /* end of switch */ num_questions++; } /* end of while */ return; } /* end of main */ /*********** Asummary ******************************************************/ /* Write out a summary of the users results */ /***************************************************************************/ void asummary (void) { printf ( " You answered\n \t %5i questions\n", num_questions); printf ( " \t %5i answers were wrong\n", num_wrong); printf ( " \t %5i times you had to ask for a hint\n", num_cheats); printf ( " \t %3.0f%c accuracy \n", (float) ( (num_questions) ? (float)100 * (num_questions - num_wrong) / (float)num_questions : 0 ),'%'); pause; return; } codonW/Tutorial.txt 777 0 0 46225 10237502375 7746 0Tutorial Codon usage analysis Included with this distribution of codonW should be a test dataset of sequences (input.dat). We will use this set of sequence as a typical example of a codon usage analysis. This test dataset is derived from the open reading frames (ORFs) of Saccharomyces cerevisiae chromosome III as annotated in the EMBL feature table for the sequence entry SCCHRIII (accession number X59720). In the current EMBL (Release 51 June 1997) the number of annotated ORFs was 172. The file input.dat contains 111 of these ORFs. The rational and why some ORFs where removed is explained below. The commandline syntax of codonW will be used in this tutorial, all options selected from the commandline are also selectable using the menu system. For more information please read the command line help (codonw -help) or just type "codonw" and use the menu specific online. Build your dataset of genes carefully. Always remember that as in any analysis, but particularly with codon usage, GIGO (garbage in, garbage out). Examine as many sources of information about the data as possible, particularly the original publication and sequence annotations. It is important that the sequences are a representative sample. Five ORFs where removed from the dataset because they where annotated (and had sequence identity) with genes within the previously identified transposable elements Ty2 and Ty5. These ORFs where annotated at positions 1537-2127, 2118-2558, 2816-3742, 84714-86030, 84714-90384. The codon usage of transposable element genes differs from that of chromosomal genes. Further checks of sequence annotation was carried out, those sequences which had not been assigned gene names or SwissProt accession numbers where removed. The SwissProt annotation was also checked, genes described as hypothetical but which did not have any sequence identity with other proteins where removed. Check basic sequence integrity Sequences should be checked to confirm that they match some basic gene characteristics. Each sequence might reasonably be expected to have an initiation codon and a translation termination codon, and no internal stop codons. Those sequences that do not match these characteristics, or sequences that have partial codons or untranslatable codons are flagged by codonw with warning messages. To make a first pass of the input data to check for simple sequence problems: codonw input.dat -nomenu By default codonw will report the codon usage of each gene to the file input.blk. As there are no problems with this dataset there should be no warning messages. However analysis of a previous version of this dataset based on EMBL Release 50 where SCCHRIII had 230 annotated ORFs, generated these typical warning messages. Warning: Sequence 178 "SCCHRIII.PE178______" does not begin with a recognised start codon Warning: Sequence 178 "SCCHRIII.PE178______" is not terminated by a stop codon Warning: Sequence 202 "SCCHRIII.PE202______" does not begin with a recognised start codon Warning: Sequence 202 "SCCHRIII.PE202______" has 1 internal stop codon(s) Warning: Sequence 202 "SCCHRIII.PE202______" is not terminated by a stop codon Each sequence is labelled by its numerical occurrence in the input file (i.e. these are the 178th and 202nd sequences in the input file) and its sequence header line. Sequences that generate warning messages should be examined closely to ascertain why. Some sequences may be annotated as partial sequences and therefore the absence of a start or stop codon or the presence of a 3' partial codon is to be expected. Note the presence of a 5' partial codon would cause a frame shift, it is ESSENTIAL that 5' partial codons are removed. Unless the frame shift that they produce, results in a (incorrect) reading frame that contains internal stop codons, codonw cannot detect this problem. The codon usage of a frame shifted gene sequence could adversely affect the correspondence analysis (COA) (though such genes are often recognisable as being outliers on the COA plots). If a sequence warning is due to incorrect annotation this should be corrected manually. Sequences that produce warnings that cannot be explained or justified (e.g. a gene with internal stop codon) should be excluded. These warning are informational only and do not exclude sequences from the analysis. Codon usage indices Once the initial quality checks have been made for the data we can then proceed with the codon usage analysis (strictly speaking we can generate COA and codon usage indices tasks at the same time). Some of the indices of codon usage bias that CodonW calculates (i.e. Fop, CAI and CBI) use information about a preferred set of codons for highly expressed genes. This information is species specific and does not apply to all species (most eukaryotes and many prokaryotes appear to display no codon preference in highly expressed genes). Therefore care must be taken that the appropriate set of optimal codons are used. For most species the optimal codons are not know and therefore the indices should not be calculated at this stage. However this information is known for Saccharomyces cerevisiae, so we can immediately calculate these indices of codon usage. Later we will see how codonW identifies optimal codons and can generate this information for your species. The default optimal codons and codon adaptation values are those of E. coli. To select an alternative choice we use the c_type (for CAI values ) and f_type (for FOP/CBI) commandline arguments. These switches requires an integer values, this value is the same as the option number if we where using the menu system to change the codon information. Example "-c_type 2" is equivalent to Choose "Main Menu" Choose "Changes Defaults Menu" Choose "Change the CAI values" Choose "(2) Saccharomyces cerevisiae" Example "-f_type 4" is equivalent to Choose "Main Menu" Choose "Changes Defaults Menu" Choose "Change the Fop/CBI values" Choose "(4) Saccharomyces cerevisiae" Therefore to select all the codon usage indices calculated by codonw and to use the optimal codons of Saccharomyces cerevisiae type: codonw input.dat -all_indices -c_type 2 -f_type 4 -nomenu See below for the output of this command The commandline flag -nomenu by passes the menu system, the -all_indices indicates to codonw that you wish to calculate all the codon and amino acid usage indices. These indices areT3s, C3s, A3s, G3s, CAI, CBI, Fop, Nc, GC3s, GC, L_sym, L_aa, Gravy and Aromaticity. For a fuller explanation of what these indices are see Readme.indices. These indices can also be used to check whether there are any identical or almost identical sequences in the input file. If we sort the result file "input.out" we it is much easier to identify the sequences which are similar. sort -k 2n input.out (unix for "sort using the second numerical field") The sorted output reveals the presence of two pairs of identical sequences (Mating type proteins) ALPHA2____________63 0.3636 0.2273 0.4939 0.2177 0.109 MATALPHA2_________63 0.3636 0.2273 0.4939 0.2177 0.109 and ALPHA1____________52 0.4361 0.2180 0.4228 0.2589 0.112 MATALPHA1_________52 0.4361 0.2180 0.4228 0.2589 0.112 Sequences which appear to be multiple copies of the same gene are normally removed from our codon usage datasets, even if the sequences are not identical but where the differences c codon usage bias as observed, lower values indicate stronger bias. A useful feature of ENc is that the affect of GC biases have on the index can be estimated. This allows the comparison of GC3s and ENc against the theoretical values if codon bias was simply caused due to GC mutational bias. A plot of ENc vs. GC3s can be seen at http://www.molbiol.ox.ac.uk/cu/EncVsGC3s.gif. Although the majority of genes in this plot have a degree of codon bias that can be explained in terms of GC mutation, the cluster of genes (six genes with ENc <40) which have much stronger codon bias than be simply explained in terms of mutational biases. These genes are good candidates as genes whose codon usage has been determined by natural selection, probably selection for translational efficiency. Correspondence Analysis (COA) We are now ready to generate a correspondence analysis of the codon usage of SCCHRIII genes. We have a choice about how much information is generated. In this example we will use the default values. codonw input.dat -coa_cu -nomenu -silent (-silent stops all prompting) This generates a COA of codon usage. The summary file is "summary.coa" and contains most of the data generated by the COA. One of the first sections is the "Explanation of the variation by axis" also stored in eigen.coa. The total inertia of the data was 0.263176 Num. Eigenval. R.Iner. R.Sum |Num. Eigenval. R.Iner. R.Sum | 01 +4.5755E-02 +0.1739 +0.1739 |02 +3.2372E-02 +0.1230 +0.2969 | 03 +1.8405E-02 +0.0699 +0.3668 |04 +1.2499E-02 +0.0475 +0.4143 | The relative inertia explained by the first axis is 17.4%, the 2nd axis explains 12.3%, the 3rd 7.0%, etc. (17.45% is not remarkably high for relative inertia explained by the first axis, but as there are ORFs included which are described as hypothetical there may be random noise present in the data if they are not real). The next two sections report position of each gene and codon on the trends. label Axis1 Axis2 Axis3 Axis4 1_YCG9_Probable_____ 0.00904 0.13153 0.34028 -0.05372 2_YCG8________573_re 0.07429 -0.24652 -0.05502 -0.39837 3_ALPHA2________633_ 0.30675 0.04259 -0.22864 -0.03878 4_ALPHA1________528_ 0.16444 0.00399 -0.02000 0.00937 5_CHA1_________1083_ -0.00322 0.10387 0.07137 0.11896 this information is best viewed graphically, an example of the location of the genes on the two principal axes can be seen here http://www.molbiol.ox.ac.uk/cu/axes.gif. Automatic Identification of Putative Optimal Codons Codonw automatically tries to identify the optimal codons in your data, or more precisely identify the codons which contribute to the major trend (if the main trend is selection for translational optimality these should be the optimal codons). It does this by comparing the codon usage of groups of genes taken from each extreme of the principle trend (axis 1). It identifies the set of genes with the highest bias (using the effective number of codons index) and tests for significant differences in the codon usage of between the higher bias set with a two way Chi-squared contingency test. The putative optimal codons are listed in summary.coa and hilo.coa. It is the responsibility of the user to confirm that the major codon usage trend is selection for translational optimality, and not due to some other mutational pressure (see GC variation). The number of genes included in the two groups can be selected using the command line switch ( -coa_num ) as an absolute number of genes, of a percentage of the total genes in the dataset (by default 5%). The analysis of this dataset identified 19 codons that appeared to be optimal. 18 of these agree with optimal codon identified previously using a larger dataset set of 575 genes [Sharp, 1991 #46]. The codon identified in this analysis as being optimal but not in the previous analysis, was GCC; this codon has been previously suggested as being an optimal codon in S. cerevisiae [Bennetzen, 1982 #92]. The U ending codons, AUU, GUU and UGU, which have been previously identified as optimal [Sharp, 1991 #46], where not identified here at p<0.01; although UGU was identified as potentially optimal with a p<0.02. The main reason that the U ending codons where not identified from this dataset was their much higher usage in the lower biased dataset. Caveats 1) The codons identified by codonw, as being optimal will be dependent on the strength of the trend and the size of the datasets. 2) The composition of the genes from chromosome III is quite different from the 575-gene dataset used by Sharp and Cowe. Only one of the 30 genes they considered to be highly expressed, and none of the genes they considered lowly expressed are present in this dataset. The reader is reminded that there are approximately 15,000 yeast genes, so just a little over 1% are located on chromosome III. Codonw generated personal choice of codons On the assumption that the principle trend identified by codonw is selection for translational optimality, and that the genes assigned to the highly bias codon usage group are highly expressed, codonw outputs files with the "optimal codons" and "CAI adaptation fitness values". These files are fop.coa, cbi.coa and cai.coa, their filenames are related to the index they have been formatted for. These files can be used to calculate the indices in species where the preferred codon usage has not been hardwired into codonW. codonw input.dat -fop_file fop.coa codonw input.day -cai_file cai.coa -cbi_file cbi.coa Caveats 1) The original CAI paper calculated fitness values from experimentally determined highly expressed genes. The fitness values that are internal to codonW where derived from these criteria. CAI indices calculated using fitness values derived from genes identified solely by COA, as being highly expressed should not be regarded as true CAI values. 2) The optimal codons stored in the files cbi.coa and fop.coa where identified by codonw using a statistical test of significance, this test is dependent on sample size. 3) The size of the sample taken from the extremes of the axis will affect the identified optimal codons. 4) The principle trend in the variation of codon usage may not be translation optimality. When we calculate the indexes CAI, CBI and Fop using the "codonw" generated optimal codons and fitness values based on this small dataset, as we would expect differ from when these indices are calculated using the codonw internal codon usage information for S. cerevisiae. The internal values are more accurate because the datasets used to generate them where larger, and contained experimentally verified gene sequences. Although the two sets of indices differ, they remain highly correlated, all three indices have correlation coefficients greater than 0.96. Therefore if comparisons between the index values are internally consistent (i.e. they where both calculated using the same optimal codon information) relative comparisons of codon usage and bias can be made. Based on a dataset of 111 genes we have been able to identify optimal codons, which give us some insight into the codon usage of S. cerevisiae. Axis2 is highly correlated with GC3s content Alternative datasets could have been chosen that would present a much simpler analyses of codon usage (i.e. where the optimal codons identified better matched those previously published). This dataset was specifically chosen as the codon usage variation for genes from this chromosome is know to have a second trend, GC3s varies with chromosomal location in a systematic fashion [Sharp, 1993 #39]. When we examine correlation coefficients between the first 4 axes the correlation coefficient between axis2 and GC3s is highly significant (r=0.89). Interestingly the bias is most strong among the U ending codons it is possible that the presence of this trend contributed to why the three U ending codons where not identified here as optimal codons. This trend is quite strong accounting for 12.3% of the relative inertia of the data, the principle trend (apparently selection for translation optimality) accounted for 17.4%. We therefore see how it is possible that the strongest influence on the choice of codon usage might not be translation optimality but mutation biases. Typical output from codonw -all_indices -nomenu ======================= Output ====================================== Genetic code is currently set to Universal Genetic code TGA=* TAA=* TAG=* Welcome to CodonW 1.3 for Help type h Using Saccharomyces cerevisiae (Sharp and Cowe (1991) Yeast 7:657-678) w values to calculate CAI Using Saccharomyces cerevisiae (Sharp and Cowe (1991) Yeast 7:657-678) optimal codons to calculate CBI Using Saccharomyces cerevisiae (Sharp and Cowe (1991) Yeast 7:657-678) optimal codons to calculate Fop .................................................................. Number of sequences: 111 Files used: Input file was input.dat Output file was input.out (codon usage indices, e.g. gc3s) Output file was input.blk (bulk output e.g. raw codon usage) CodonW has finished ====================================================== Tabulation of total codon usage Phe UUU 1483 1.14 Ser UCU 1094 1.47 Tyr UAU 1000 1.12 Cys UGU 434 1.18 UUC 1117 0.86 UCC 773 1.04 UAC 789 0.88 UGC 303 0.82 Leu UUA 1349 1.55 UCA 882 1.19 TER UAA 47 1.27 TER UGA 36 0.97 UUG 1549 1.78 UCG 487 0.66 UAG 28 0.76 Trp UGG 665 1.00 CUU 698 0.80 Pro CCU 747 1.27 His CAU 677 1.15 Arg CGU 328 0.86 CUC 364 0.42 CCC 415 0.71 CAC 499 0.85 CGC 171 0.45 CUA 671 0.77 CCA 911 1.55 Gln CAA 1388 1.35 CGA 151 0.39 CUG 604 0.69 CCG 281 0.48 CAG 668 0.65 CGG 103 0.27 Ile AUU 1612 1.35 Thr ACU 1052 1.38 Asn AAU 1778 1.17 Ser AGU 717 0.97 AUC 1018 0.85 ACC 660 0.87 AAC 1262 0.83 AGC 500 0.67 AUA 943 0.79 ACA 883 1.16 Lys AAA 2118 1.13 Arg AGA 1038 2.71 Met AUG 1156 1.00 ACG 444 0.58 AAG 1645 0.87 AGG 504 1.32 Val GUU 1184 1.49 Ala GCU 1055 1.40 Asp GAU 1905 1.25 Gly GGU 1284 1.87 GUC 674 0.85 GCC 765 1.01 GAC 1145 0.75 GGC 552 0.80 GUA 622 0.78 GCA 836 1.11 Glu GAA 2371 1.41 GGA 557 0.81 GUG 690 0.87 GCG 368 0.49 GAG 995 0.59 GGG 355 0.52 53400 codons (used Universal Genetic code) ======================================================