clustalw-mpi-0.15/0000755000411000001440000000000011015472140012367 5ustar liusersclustalw-mpi-0.15/alnscore.c0000644000411000001440000000436307644152540014363 0ustar liusers#include #include #include #include "clustalw.h" #define MAX(a,b) ((a)>(b)?(a):(b)) #define MIN(a,b) ((a)<(b)?(a):(b)) /* * Prototypes */ static sint count_gaps(sint s1, sint s2, sint l); /* * Global Variables */ extern float gap_open; extern sint nseqs; extern sint *seqlen_array; extern short blosum45mt[]; extern short def_aa_xref[]; extern sint debug; extern sint max_aa; extern char **seq_array; void aln_score(void) { static short *mat_xref, *matptr; static sint maxres; static sint s1,s2,c1,c2; static sint ngaps; static sint i,l1,l2; static lint score; static sint matrix[NUMRES][NUMRES]; /* calculate an overall score for the alignment by summing the scores for each pairwise alignment */ matptr = blosum45mt; mat_xref = def_aa_xref; maxres = get_matrix(matptr, mat_xref, matrix, TRUE, 100); if (maxres == 0) { fprintf(stdout,"Error: matrix blosum30 not found\n"); return; } score=0; for (s1=1;s1<=nseqs;s1++) { for (s2=1;s2=0) && (c1<=max_aa) && (c2>=0) && (c2<=max_aa)) score += matrix[c1][c2]; } ngaps = count_gaps(s1, s2, l1); score -= 100 * gap_open * ngaps; } } score /= 100; info("Alignment Score %d", (pint)score); } static sint count_gaps(sint s1, sint s2, sint l) { sint i, g; sint q, r, *Q, *R; Q = (sint *)ckalloc((l+2) * sizeof(sint)); R = (sint *)ckalloc((l+2) * sizeof(sint)); Q[0] = R[0] = g = 0; for (i=1;i max_aa) q = 1; else q = 0; if (seq_array[s2][i] > max_aa) r = 1; else r = 0; if (((Q[i-1] <= R[i-1]) && (q != 0) && (1-r != 0)) || ((Q[i-1] >= R[i-1]) && (1-q != 0) && (r != 0))) g += 1; if (q != 0) Q[i] = Q[i-1]+1; else Q[i] = 0; if (r != 0) R[i] = R[i-1]+1; else R[i] = 0; } Q=ckfree((void *)Q); R=ckfree((void *)R); return(g); } clustalw-mpi-0.15/calcgapcoeff.c0000644000411000001440000003351207644152540015150 0ustar liusers#include #include #include #include #include "clustalw.h" /* * Prototypes */ void calc_p_penalties(char **aln, sint n, sint fs, sint ls, sint *weight); void calc_h_penalties(char **aln, sint n, sint fs, sint ls, sint *weight); void calc_v_penalties(char **aln, sint n, sint fs, sint ls, sint *weight); sint local_penalty(sint penalty, sint n, sint *pweight, sint *hweight, sint *vweight); float percentid(char *s1, char *s2,sint length); /* * Global variables */ extern sint gap_dist; extern sint max_aa; extern sint debug; extern Boolean dnaflag; extern Boolean use_endgaps; extern Boolean endgappenalties; extern Boolean no_var_penalties, no_hyd_penalties, no_pref_penalties; extern char hyd_residues[]; extern char *amino_acid_codes; /* vwindow is the number of residues used for a window for the variable zone penalties */ /* vll is the lower limit for the variable zone penalties (vll < pen < 1.0) */ int vll=50; int vwindow=5; sint vlut[26][26] = { /* A B C D E F G H I J K L M N O P Q R S T U V W X Y Z */ /*A*/ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*B*/ 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*C*/ 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*D*/ 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*E*/ 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*F*/ 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*G*/ 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*H*/ 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*I*/ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*J*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*K*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*L*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*M*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*N*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*O*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*P*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*Q*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*R*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, /*S*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, /*T*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, /*U*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, /*V*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, /*W*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, /*X*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, /*Y*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, /*Z*/ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; /* pascarella probabilities for opening a gap at specific residues */ char pr[] = {'A' , 'C', 'D', 'E', 'F', 'G', 'H', 'K', 'I', 'L', 'M' , 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'Y', 'W'}; sint pas_op[] = { 87, 87,104, 69, 80,139,100,104, 68, 79, 71,137,126, 93,128,124,111, 75,100, 77}; sint pas_op2[] ={ 88, 57,111, 98, 75,126, 95, 97, 70, 90, 60,122,110,107, 91,125,124, 81,106, 88}; sint pal_op[] = { 84, 69,128, 78, 88,176, 53, 95, 55, 49, 52,148,147,100, 91,129,105, 51,128, 88}; float reduced_gap = 1.0; Boolean nvar_pen,nhyd_pen,npref_pen; /* local copies of ho_hyd_penalties, no_pref_penalties */ sint gdist; /* local copy of gap_dist */ void calc_gap_coeff(char **alignment, sint *gaps, sint **profile, Boolean struct_penalties, char *gap_penalty_mask, sint first_seq, sint last_seq, sint prf_length, sint gapcoef, sint lencoef) { char c; sint i, j; sint is, ie; static sint numseq,val,pcid; static sint *gap_pos; static sint *v_weight, *p_weight, *h_weight; static float scale; numseq = last_seq - first_seq; if(numseq == 2) { pcid=percentid(alignment[first_seq],alignment[first_seq+1],prf_length); } else pcid=0; for (j=0; j 60) { if(debug>0) fprintf(stderr,"Using variable zones to set gap penalties (pcid = %d)\n",pcid); nhyd_pen = npref_pen = TRUE; nvar_pen = FALSE; } else { nvar_pen = TRUE; nhyd_pen = no_hyd_penalties; npref_pen = no_pref_penalties; gdist = gap_dist; } for (i=first_seq; i max_aa)) is++; else break; } for (j=prf_length-1; j>=0; j--) { c = alignment[i][j]; if ((c < 0) || (c > max_aa)) ie--; else break; } } for (j=is; j max_aa)) gaps[j]++; } } if ((!dnaflag) && (nvar_pen == FALSE)) { v_weight = (sint *) ckalloc( (prf_length+2) * sizeof (sint) ); calc_v_penalties(alignment, prf_length, first_seq, last_seq, v_weight); } if ((!dnaflag) && (npref_pen == FALSE)) { p_weight = (sint *) ckalloc( (prf_length+2) * sizeof (sint) ); calc_p_penalties(alignment, prf_length, first_seq, last_seq, p_weight); } if ((!dnaflag) && (nhyd_pen == FALSE)) { h_weight = (sint *) ckalloc( (prf_length+2) * sizeof (sint) ); calc_h_penalties(alignment, prf_length, first_seq, last_seq, h_weight); } gap_pos = (sint *) ckalloc( (prf_length+2) * sizeof (sint) ); /* mark the residues close to an existing gap (set gaps[i] = -ve) */ if (dnaflag || (gdist <= 0)) { for (i=0;i=0) && (i+j 0) { if (i>=prf_length) break; gap_pos[i] = gaps[i]; i++; } for (j = 0; j 0) break; if ((i+j>=0) && (i+j1) { fprintf(stdout,"gap open %d gap ext %d\n",(pint)gapcoef,(pint)lencoef); fprintf(stdout,"gaps:\n"); for(i=0;i 0 && val < 10) { profile[j+1][GAPCOL] *= val; profile[j+1][LENCOL] *= val; } } /* make sure no penalty is zero - even for all-gap positions */ if (profile[j+1][GAPCOL] <= 0) profile[j+1][GAPCOL] = 1; if (profile[j+1][LENCOL] <= 0) profile[j+1][LENCOL] = 1; } /* set the penalties at the beginning and end of the profile */ if(endgappenalties==TRUE) { profile[0][GAPCOL] = gapcoef; profile[0][LENCOL] = lencoef; } else { profile[0][GAPCOL] = 0; profile[0][LENCOL] = 0; profile[prf_length][GAPCOL] = 0; profile[prf_length][LENCOL] = 0; } if (debug>0) { fprintf(stdout,"Opening penalties:\n"); for(i=0;i<=prf_length;i++) fprintf(stdout," %d:%d ",i, (pint)profile[i][GAPCOL]); fprintf(stdout,"\n"); } if (debug>0) { fprintf(stdout,"Extension penalties:\n"); for(i=0;i<=prf_length;i++) fprintf(stdout,"%d:%d ",i, (pint)profile[i][LENCOL]); fprintf(stdout,"\n"); } if ((!dnaflag) && (nvar_pen == FALSE)) v_weight=ckfree((void *)v_weight); if ((!dnaflag) && (npref_pen == FALSE)) p_weight=ckfree((void *)p_weight); if ((!dnaflag) && (nhyd_pen == FALSE)) h_weight=ckfree((void *)h_weight); gap_pos=ckfree((void *)gap_pos); } void calc_v_penalties(char **aln, sint n, sint fs, sint ls, sint *weight) { char ix1,ix2; sint i,j,k,t; for (i=0;i=0 && j max_aa) || (ix2< 0) || (ix2> max_aa)) continue; weight[i] += vlut[amino_acid_codes[ix1]-'A'][amino_acid_codes[ix2]-'A']; t++; } } /* now we have a weight -t < w < t */ weight[i] +=t; if(t>0) weight[i] = (weight[i]*100)/(2*t); else weight[i] = 100; /* now we have a weight vll < w < 100 */ if (weight[i] max_aa)) continue; if (amino_acid_codes[ix] == pr[j]) { weight[i] += (180-pas_op[j]); break; } } } weight[i] /= numseq; } } void calc_h_penalties(char **aln, sint n, sint fs, sint ls, sint *weight) { /* weight[] is the length of the hydrophilic run of residues. */ char ix; sint nh,j,k; sint i,e,s; sint *hyd; float scale; hyd = (sint *)ckalloc((n+2) * sizeof(sint)); nh = (sint)strlen(hyd_residues); for (i=0;i max_aa)) continue; if (amino_acid_codes[ix] == hyd_residues[j]) { hyd[i] = 1; break; } } } i = 0; while (i < n) { if (hyd[i] == 0) i++; else { s = i; while ((hyd[i] != 0) && (i 3) for (j=s; j1) { for(i=0;i 0) { gw *= 0.5; h = TRUE; } } if ((npref_pen == FALSE) && (h==FALSE)) { gw *= ((float)pweight[n]/100.0); } gw *= penalty; return((sint)gw); } float percentid(char *s1, char *s2,sint length) { sint i; sint count,total; float score; count = total = 0; for (i=0;i=0) && (s1[i] #include #include #include #include "clustalw.h" /* * Prototypes */ /* * Global variables */ extern sint max_aa,gap_pos1,gap_pos2; void calc_prf1(sint **profile, char **alignment, sint *gaps, sint matrix[NUMRES][NUMRES], sint *seq_weight, sint prf_length, sint first_seq, sint last_seq) { sint **weighting, sum2, d, i, res; sint numseq; sint r, pos; int f; float scale; weighting = (sint **) ckalloc( (NUMRES+2) * sizeof (sint *) ); for (i=0;i #include #include #include #include "clustalw.h" /* * Prototypes */ /* * Global variables */ extern sint max_aa,gap_pos1,gap_pos2; void calc_prf2(sint **profile, char **alignment, sint *seq_weight,sint prf_length, sint first_seq, sint last_seq) { sint sum1, sum2; sint i, d; sint r; for (r=0; r #include #include #include #include #include #include "clustalw.h" #define MAXERRS 10 /* * Prototypes */ static void create_tree(treeptr ptree, treeptr parent); static void create_node(treeptr pptr, treeptr parent); static treeptr insert_node(treeptr pptr); static void skip_space(FILE *fd); static treeptr avail(void); static void set_info(treeptr p, treeptr parent, sint pleaf, char *pname, float pdist); static treeptr reroot(treeptr ptree, sint nseqs); static treeptr insert_root(treeptr p, float diff); static float calc_root_mean(treeptr root, float *maxdist); static float calc_mean(treeptr nptr, float *maxdist, sint nseqs); static void order_nodes(void); static sint calc_weight(sint leaf); static void group_seqs(treeptr p, sint *next_groups, sint nseqs); static void mark_group1(treeptr p, sint *groups, sint n); static void mark_group2(treeptr p, sint *groups, sint n); static void save_set(sint n, sint *groups); static void clear_tree_nodes(treeptr p); /* * Global variables */ extern Boolean interactive; extern Boolean distance_tree; extern Boolean usemenu; extern sint debug; extern double **tmat; extern sint **sets; extern sint nsets; extern char **names; extern sint *seq_weight; extern Boolean no_weights; char ch; FILE *fd; treeptr *lptr; treeptr *olptr; treeptr *nptr; treeptr *ptrs; sint nnodes = 0; sint ntotal = 0; Boolean rooted_tree = TRUE; static treeptr seq_tree,root; static sint *groups, numseq; void calc_seq_weights(sint first_seq, sint last_seq, sint *sweight) { sint i, nseqs; sint temp, sum, *weight; /* If there are more than three sequences.... */ nseqs = last_seq-first_seq; if ((nseqs >= 2) && (distance_tree == TRUE) && (no_weights == FALSE)) { /* Calculate sequence weights based on Phylip tree. */ weight = (sint *)ckalloc((last_seq+1) * sizeof(sint)); for (i=first_seq; i= 2) { /* If there are more than three sequences.... */ groups = (sint *)ckalloc((nseqs+1) * sizeof(sint)); group_seqs(root, groups, nseqs); groups=ckfree((void *)groups); } else { groups = (sint *)ckalloc((nseqs+1) * sizeof(sint)); for (i=0;ileaf distances for the left and right branches of the tree. */ if (distance_tree == FALSE) { if (rooted_tree == FALSE) { error("input tree is unrooted and has no distances.\nCannot align sequences"); return((sint)0); } } if (rooted_tree == FALSE) { root = reroot(seq_tree, last_seq-first_seq+1); } else { root = seq_tree; } /* calculate the 'order' of each node. */ order_nodes(); if (numseq >= 2) { /* If there are more than three sequences.... */ /* assign the sequence nodes (in the same order as in the alignment file) */ for (i=first_seq; i MAXNAMES) warning("name %s is too long for PHYLIP tree format (max %d chars)", names[i+1],MAXNAMES); for (k=0; k< strlen(names[i+1]) && k0x40) && (c<0x5b)) c=c | 0x20; if (c == ' ') c = '_'; name2[k] = c; } name2[k]='\0'; found = FALSE; for (j=0; jname) && kname[k]; if ((c>0x40) && (c<0x5b)) c=c | 0x20; name1[k] = c; } name1[k]='\0'; if (strcmp(name1, name2) == 0) { olptr[i] = lptr[j]; found = TRUE; } } if (found == FALSE) { error("tree not compatible with alignment:\n%s not found", name2); return((sint)0); } } } return((sint)1); } static void create_tree(treeptr ptree, treeptr parent) { treeptr p; sint i, type; float dist; char name[MAXNAMES+1]; /* is this a node or a leaf ? */ skip_space(fd); ch = (char)getc(fd); if (ch == '(') { /* this must be a node.... */ type = NODE; name[0] = '\0'; ptrs[ntotal] = nptr[nnodes] = ptree; nnodes++; ntotal++; create_node(ptree, parent); p = ptree->left; create_tree(p, ptree); if ( ch == ',') { p = ptree->right; create_tree(p, ptree); if ( ch == ',') { ptree = insert_node(ptree); ptrs[ntotal] = nptr[nnodes] = ptree; nnodes++; ntotal++; p = ptree->right; create_tree(p, ptree); rooted_tree = FALSE; } } skip_space(fd); ch = (char)getc(fd); } /* ...otherwise, this is a leaf */ else { type = LEAF; ptrs[ntotal++] = lptr[numseq++] = ptree; /* get the sequence name */ name[0] = ch; ch = (char)getc(fd); i = 1; while ((ch != ':') && (ch != ',') && (ch != ')')) { if (i < MAXNAMES) name[i++] = ch; ch = (char)getc(fd); } name[i] = '\0'; if (ch != ':') { distance_tree = FALSE; dist = 0.0; } } /* get the distance information */ dist = 0.0; if (ch == ':') { skip_space(fd); fscanf(fd,"%f",&dist); skip_space(fd); ch = (char)getc(fd); } set_info(ptree, parent, type, name, dist); } static void create_node(treeptr pptr, treeptr parent) { treeptr t; pptr->parent = parent; t = avail(); pptr->left = t; t = avail(); pptr->right = t; } static treeptr insert_node(treeptr pptr) { treeptr newnode; newnode = avail(); create_node(newnode, pptr->parent); newnode->left = pptr; pptr->parent = newnode; set_info(newnode, pptr->parent, NODE, "", 0.0); return(newnode); } static void skip_space(FILE *fd) { int c; do c = getc(fd); while(isspace(c)); ungetc(c, fd); } static treeptr avail(void) { treeptr p; p = ckalloc(sizeof(stree)); p->left = NULL; p->right = NULL; p->parent = NULL; p->dist = 0.0; p->leaf = 0; p->order = 0; p->name[0] = '\0'; return(p); } void clear_tree(treeptr p) { clear_tree_nodes(p); nptr=ckfree((void *)nptr); ptrs=ckfree((void *)ptrs); lptr=ckfree((void *)lptr); olptr=ckfree((void *)olptr); } static void clear_tree_nodes(treeptr p) { if (p==NULL) p = root; if (p->left != NULL) { clear_tree_nodes(p->left); } if (p->right != NULL) { clear_tree_nodes(p->right); } p->left = NULL; p->right = NULL; p=ckfree((void *)p); } static void set_info(treeptr p, treeptr parent, sint pleaf, char *pname, float pdist) { p->parent = parent; p->leaf = pleaf; p->dist = pdist; p->order = 0; strcpy(p->name, pname); if (p->leaf == TRUE) { p->left = NULL; p->right = NULL; } } static treeptr reroot(treeptr ptree, sint nseqs) { treeptr p, rootnode, rootptr; float diff, mindiff = 0.0, mindepth = 1.0, maxdist; sint i; Boolean first = TRUE; /* find the difference between the means of leaf->node distances on the left and on the right of each node */ rootptr = ptree; for (i=0; iparent == NULL) diff = calc_root_mean(p, &maxdist); else diff = calc_mean(p, &maxdist, nseqs); if ((diff == 0) || ((diff > 0) && (diff < 2 * p->dist))) { if ((maxdist < mindepth) || (first == TRUE)) { first = FALSE; rootptr = p; mindepth = maxdist; mindiff = diff; } } } /* insert a new node as the ancestor of the node which produces the shallowest tree. */ if (rootptr == ptree) { mindiff = rootptr->left->dist + rootptr->right->dist; rootptr = rootptr->right; } rootnode = insert_root(rootptr, mindiff); diff = calc_root_mean(rootnode, &maxdist); return(rootnode); } static treeptr insert_root(treeptr p, float diff) { treeptr newp, prev, q, t; float dist, prevdist,td; newp = avail(); t = p->parent; prevdist = t->dist; p->parent = newp; dist = p->dist; p->dist = diff / 2; if (p->dist < 0.0) p->dist = 0.0; if (p->dist > dist) p->dist = dist; t->dist = dist - p->dist; newp->left = t; newp->right = p; newp->parent = NULL; newp->dist = 0.0; newp->leaf = NODE; if (t->left == p) t->left = t->parent; else t->right = t->parent; prev = t; q = t->parent; t->parent = newp; while (q != NULL) { if (q->left == prev) { q->left = q->parent; q->parent = prev; td = q->dist; q->dist = prevdist; prevdist = td; prev = q; q = q->left; } else { q->right = q->parent; q->parent = prev; td = q->dist; q->dist = prevdist; prevdist = td; prev = q; q = q->right; } } /* remove the old root node */ q = prev; if (q->left == NULL) { dist = q->dist; q = q->right; q->dist += dist; q->parent = prev->parent; if (prev->parent->left == prev) prev->parent->left = q; else prev->parent->right = q; prev->right = NULL; } else { dist = q->dist; q = q->left; q->dist += dist; q->parent = prev->parent; if (prev->parent->left == prev) prev->parent->left = q; else prev->parent->right = q; prev->left = NULL; } return(newp); } static float calc_root_mean(treeptr root, float *maxdist) { float dist , lsum = 0.0, rsum = 0.0, lmean,rmean,diff; treeptr p; sint i; sint nl, nr; sint direction; /* for each leaf, determine whether the leaf is left or right of the root. */ dist = (*maxdist) = 0; nl = nr = 0; for (i=0; i< numseq; i++) { p = lptr[i]; dist = 0.0; while (p->parent != root) { dist += p->dist; p = p->parent; } if (p == root->left) direction = LEFT; else direction = RIGHT; dist += p->dist; if (direction == LEFT) { lsum += dist; nl++; } else { rsum += dist; nr++; } if (dist > (*maxdist)) *maxdist = dist; } lmean = lsum / nl; rmean = rsum / nr; diff = lmean - rmean; return(diff); } static float calc_mean(treeptr nptr, float *maxdist, sint nseqs) { float dist , lsum = 0.0, rsum = 0.0, lmean,rmean,diff; treeptr p, *path2root; float *dist2node; sint depth = 0, i,j , n = 0; sint nl , nr; sint direction, found; path2root = (treeptr *)ckalloc(nseqs * sizeof(treeptr)); dist2node = (float *)ckalloc(nseqs * sizeof(float)); /* determine all nodes between the selected node and the root; */ depth = (*maxdist) = dist = 0; nl = nr = 0; p = nptr; while (p != NULL) { path2root[depth] = p; dist += p->dist; dist2node[depth] = dist; p = p->parent; depth++; } /* *nl = *nr = 0; for each leaf, determine whether the leaf is left or right of the node. (RIGHT = descendant, LEFT = not descendant) */ for (i=0; i< numseq; i++) { p = lptr[i]; if (p == nptr) { direction = RIGHT; dist = 0.0; } else { direction = LEFT; dist = 0.0; /* find the common ancestor. */ found = FALSE; n = 0; while ((found == FALSE) && (p->parent != NULL)) { for (j=0; j< depth; j++) if (p->parent == path2root[j]) { found = TRUE; n = j; } dist += p->dist; p = p->parent; } if (p == nptr) direction = RIGHT; } if (direction == LEFT) { lsum += dist; lsum += dist2node[n-1]; nl++; } else { rsum += dist; nr++; } if (dist > (*maxdist)) *maxdist = dist; } dist2node=ckfree((void *)dist2node); path2root=ckfree((void *)path2root); lmean = lsum / nl; rmean = rsum / nr; diff = lmean - rmean; return(diff); } static void order_nodes(void) { sint i; treeptr p; for (i=0; iorder++; p = p->parent; } } } static sint calc_weight(sint leaf) { treeptr p; float weight = 0.0; p = olptr[leaf]; while (p->parent != NULL) { weight += p->dist / p->order; p = p->parent; } weight *= 100.0; return((sint)weight); } static void group_seqs(treeptr p, sint *next_groups, sint nseqs) { sint i; sint *tmp_groups; tmp_groups = (sint *)ckalloc((nseqs+1) * sizeof(sint)); for (i=0;ileft != NULL) { if (p->left->leaf == NODE) { group_seqs(p->left, next_groups, nseqs); for (i=0;ileft, tmp_groups, nseqs); } } if (p->right != NULL) { if (p->right->leaf == NODE) { group_seqs(p->right, next_groups, nseqs); for (i=0;iright, tmp_groups, nseqs); } save_set(nseqs, tmp_groups); } for (i=0;i= 2) { /* for each leaf, determine all nodes between the leaf and the root; */ for (i = 0;idist; dist2node[depth] = dist; p = p->parent; depth++; } /* for each pair.... */ for (j=0; j < i; j++) { p = olptr[j]; dist = 0.0; /* find the common ancestor. */ found = FALSE; n = 0; while ((found == FALSE) && (p->parent != NULL)) { for (k=0; k< depth; k++) if (p->parent == path2root[k]) { found = TRUE; n = k; } dist += p->dist; p = p->parent; } dmat[i][j] = dist + dist2node[n-1]; } } nerrs = 0; for (i=0;i 1.0) { if (dmat[i][j] > 1.1 && nerrs0) { strcpy(err_mess,"The following sequences are too divergent to be aligned:\n"); for (i=0;i #include #include #include #include #include #include #include "clustalw.h" #include "mpi.h" extern void *ckalloc(size_t); extern void init_matrix(void); extern void init_interface(void); extern void fill_chartab(void); /* * Global variables */ double **tmat; char revision_level[] = "W (1.82)"; /* JULIE feb 2001 */ Boolean interactive = FALSE; char *help_file_name = "clustalw_help"; sint max_names; /* maximum length of names in current alignment file */ float gap_open, gap_extend; float pw_go_penalty, pw_ge_penalty; FILE *tree; FILE *clustal_outfile, *gcg_outfile, *nbrf_outfile, *phylip_outfile, *gde_outfile, *nexus_outfile; sint *seqlen_array; sint max_aln_length; short usermat[NUMRES][NUMRES], pw_usermat[NUMRES][NUMRES]; short def_aa_xref[NUMRES + 1], aa_xref[NUMRES + 1], pw_aa_xref[NUMRES + 1]; short userdnamat[NUMRES][NUMRES], pw_userdnamat[NUMRES][NUMRES]; short def_dna_xref[NUMRES + 1], dna_xref[NUMRES + 1], pw_dna_xref[NUMRES + 1]; sint nseqs; sint nsets; sint *output_index; sint **sets; sint *seq_weight; sint max_aa; sint gap_pos1; sint gap_pos2; sint mat_avscore; sint profile_no; Boolean usemenu; Boolean dnaflag; Boolean distance_tree; char **seq_array; char **names, **titles; char **args; char seqname[FILENAMELEN + 1]; char *gap_penalty_mask1 = NULL, *gap_penalty_mask2 = NULL; char *sec_struct_mask1 = NULL, *sec_struct_mask2 = NULL; sint struct_penalties; char *ss_name1 = NULL, *ss_name2 = NULL; Boolean user_series = FALSE; UserMatSeries matseries; short usermatseries[MAXMAT][NUMRES][NUMRES]; short aa_xrefseries[MAXMAT][NUMRES + 1]; int main(int argc, char **argv) { int i; int my_rank, np; MPI_Init(&argc, &argv); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); MPI_Comm_size(MPI_COMM_WORLD, &np); if (my_rank == 0) { /* init_amenu(); */ init_interface(); init_matrix(); fill_chartab(); args = (char **) ckalloc(argc * sizeof(char *)); for (i = 1; i < argc; ++i) { args[i - 1] = (char *) ckalloc((strlen(argv[i]) + 1) * sizeof(char)); strcpy(args[i - 1], argv[i]); } usemenu = FALSE; parse_params(FALSE); } else { /* the parallel pairwise alignment */ parallel_compare(); } return 0; } /* * fatal() * * Prints error msg to stdout and exits. * Variadic parameter list can be passed. * * Return values: * none */ void fatal(char *msg, ...) { va_list ap; va_start(ap, msg); fprintf(stdout, "\n\nFATAL ERROR: "); vfprintf(stdout, msg, ap); fprintf(stdout, "\n\n"); va_end(ap); exit(1); } /* * error() * * Prints error msg to stdout. * Variadic parameter list can be passed. * * Return values: * none */ void error(char *msg, ...) { va_list ap; va_start(ap, msg); fprintf(stdout, "\n\nERROR: "); vfprintf(stdout, msg, ap); fprintf(stdout, "\n\n"); va_end(ap); } /* * warning() * * Prints warning msg to stdout. * Variadic parameter list can be passed. * * Return values: * none */ void warning(char *msg, ...) { va_list ap; va_start(ap, msg); fprintf(stdout, "\n\nWARNING: "); vfprintf(stdout, msg, ap); fprintf(stdout, "\n\n"); va_end(ap); } /* * info() * * Prints info msg to stdout. * Variadic parameter list can be passed. * * Return values: * none */ void info(char *msg, ...) { va_list ap; va_start(ap, msg); fprintf(stdout, "\n"); vfprintf(stdout, msg, ap); va_end(ap); } char prompt_for_yes_no(char *title, char *prompt) { char line[80]; char lin2[80]; fprintf(stdout, "\n%s\n", title); strcpy(line, prompt); strcat(line, "(y/n) ? [y]"); getstr(line, lin2); if ((*lin2 != 'n') && (*lin2 != 'N')) return ('y'); else return ('n'); } clustalw-mpi-0.15/gcgcheck.c0000644000411000001440000000046107644152540014306 0ustar liusers#include /* because of toupper() */ int SeqGCGCheckSum(char *seq, int len); int SeqGCGCheckSum(char *seq, int len) { int i; long check; for( i=0, check=0; i< len; i++,seq++) check += ((i % 57)+1) * toupper(*seq); return(check % 10000); } clustalw-mpi-0.15/interface.c0000644000411000001440000032517707647420276014535 0ustar liusers/* command line interface for Clustal W */ /* DES was here MARCH. 1994 */ /* DES was here SEPT. 1994 */ #include #include #include #include #include #include #include "clustalw.h" #include "param.h" #include "mpi.h" /* * Prototypes */ #ifdef UNIX FILE *open_path(char *); #endif static sint check_param(char **args,char *params[], char *param_arg[]); static void set_optional_param(void); static sint find_match(char *probe, char *list[], sint n); static void show_aln(void); static void create_parameter_output(void); static void reset_align(void); static void reset_prf1(void); static void reset_prf2(void); static void calc_gap_penalty_mask(int prf_length,char *struct_mask,char *gap_mask); void print_sec_struct_mask(int prf_length,char *mask,char *struct_mask); static int myexit(int rval); /* * Global variables */ extern sint max_names; extern Boolean interactive; extern double **tmat; extern float gap_open, gap_extend; extern float dna_gap_open, dna_gap_extend; extern float prot_gap_open, prot_gap_extend; extern float pw_go_penalty, pw_ge_penalty; extern float dna_pw_go_penalty, dna_pw_ge_penalty; extern float prot_pw_go_penalty, prot_pw_ge_penalty; extern char revision_level[]; extern sint wind_gap,ktup,window,signif; extern sint dna_wind_gap, dna_ktup, dna_window, dna_signif; extern sint prot_wind_gap,prot_ktup,prot_window,prot_signif; extern sint boot_ntrials; /* number of bootstrap trials */ extern sint nseqs; extern sint new_seq; extern sint *seqlen_array; extern sint divergence_cutoff; extern sint debug; extern Boolean no_weights; extern Boolean neg_matrix; extern Boolean quick_pairalign; extern Boolean reset_alignments_new; /* DES */ extern Boolean reset_alignments_all; /* DES */ extern sint gap_dist; extern Boolean no_hyd_penalties, no_pref_penalties; extern sint max_aa; extern sint gap_pos1, gap_pos2; extern sint max_aln_length; extern sint *output_index, output_order; extern sint profile_no; extern short usermat[], pw_usermat[]; extern short aa_xref[], pw_aa_xref[]; extern short userdnamat[], pw_userdnamat[]; extern short dna_xref[], pw_dna_xref[]; extern sint *seq_weight; extern Boolean lowercase; /* Flag for GDE output - set on comm. line*/ extern Boolean cl_seq_numbers; extern Boolean output_clustal, output_nbrf, output_phylip, output_gcg, output_gde, output_nexus; extern Boolean output_tree_clustal, output_tree_phylip, output_tree_distances, output_tree_nexus; extern sint bootstrap_format; extern Boolean tossgaps, kimura; extern Boolean percent; extern Boolean explicit_dnaflag; /* Explicit setting of sequence type on comm.line*/ extern Boolean usemenu; extern Boolean showaln, save_parameters; extern Boolean dnaflag; extern float transition_weight; extern unsigned sint boot_ran_seed; extern FILE *tree; extern FILE *clustal_outfile, *gcg_outfile, *nbrf_outfile, *phylip_outfile, *nexus_outfile; extern FILE *gde_outfile; extern char hyd_residues[]; extern char *amino_acid_codes; extern char **args; extern char seqname[]; extern char **seq_array; extern char **names, **titles; extern char *gap_penalty_mask1,*gap_penalty_mask2; extern char *sec_struct_mask1,*sec_struct_mask2; extern sint struct_penalties,struct_penalties1,struct_penalties2; extern sint output_struct_penalties; extern Boolean use_ss1, use_ss2; extern char *ss_name1,*ss_name2; char *ss_name = NULL; char *sec_struct_mask = NULL; char *gap_penalty_mask = NULL; char profile1_name[FILENAMELEN+1]; char profile2_name[FILENAMELEN+1]; Boolean empty; Boolean profile1_empty, profile2_empty; /* whether or not profiles */ char outfile_name[FILENAMELEN+1]=""; static char clustal_outname[FILENAMELEN+1], gcg_outname[FILENAMELEN+1]; static char phylip_outname[FILENAMELEN+1],nbrf_outname[FILENAMELEN+1]; static char gde_outname[FILENAMELEN+1],nexus_outname[FILENAMELEN+1]; char clustal_tree_name[FILENAMELEN+1]=""; char dist_tree_name[FILENAMELEN+1]=""; char phylip_tree_name[FILENAMELEN+1]=""; char nexus_tree_name[FILENAMELEN+1]=""; char p1_tree_name[FILENAMELEN+1]=""; char p2_tree_name[FILENAMELEN+1]=""; static char *params[MAXARGS]; static char *param_arg[MAXARGS]; static char *cmd_line_type[] = { " ", "=n ", "=f ", "=string ", "=filename ", ""}; static sint numparams; static Boolean check_tree = TRUE; sint profile1_nseqs; /* have been filled; the no. of seqs in prof 1*/ Boolean use_tree_file = FALSE,new_tree_file = FALSE; Boolean use_tree1_file = FALSE, use_tree2_file = FALSE; Boolean new_tree1_file = FALSE, new_tree2_file = FALSE; static char *lin2; MatMenu dnamatrix_menu = {3, "IUB","iub", "CLUSTALW(1.6)","clustalw", "User defined","" }; MatMenu matrix_menu = {5, "BLOSUM series","blosum", "PAM series","pam", "Gonnet series","gonnet", "Identity matrix","id", "User defined","" }; MatMenu pw_matrix_menu = {5, "BLOSUM 30","blosum", "PAM 350","pam", "Gonnet 250","gonnet", "Identity matrix","id", "User defined","" }; void init_interface(void) { empty=TRUE; profile1_empty = TRUE; /* */ profile2_empty = TRUE; /* */ lin2 = (char *)ckalloc( (MAXLINE+1) * sizeof (char) ); } static sint check_param(char **args,char *params[], char *param_arg[]) { /* #ifndef MAC char *strtok(char *s1, const char *s2); #endif */ sint len,i,j,k,s,n,match[MAXARGS]; Boolean name1 = FALSE; if(args[0]==NULL) return; params[0]=(char *)ckalloc(strlen(args[0])*sizeof(char)); if (args[0][0]!=COMMANDSEP) { name1 = TRUE; strcpy(params[0],args[0]); } else strcpy(params[0],&args[0][1]); for (i=1;i 0) { temp = find_match(param_arg[setscore],score_arg,2); if(temp == 0) percent = TRUE; else if(temp == 1) percent = FALSE; else fprintf(stdout,"\nUnknown SCORE type: %s\n", param_arg[setscore]); } /*** ? /seed=n */ if(setseed != -1) { temp = 0; if(strlen(param_arg[setseed]) > 0) if (sscanf(param_arg[setseed],"%d",&temp)!=1) { fprintf(stdout,"Bad option for /seed (must be integer)\n"); temp = 0; } if(temp > 0) boot_ran_seed = temp; fprintf(stdout,"\ntemp = %d; seed = %u;\n",(pint)temp,boot_ran_seed); } /*** ? /output=PIR, GCG, GDE or PHYLIP */ if(setoutput != -1) if(strlen(param_arg[setoutput]) > 0) { temp = find_match(param_arg[setoutput],output_arg,5); if (temp >= 0 && temp <= 3) { output_clustal = FALSE; output_gcg = FALSE; output_phylip = FALSE; output_nbrf = FALSE; output_gde = FALSE; output_nexus = FALSE; } switch (temp) { case 0: /* GCG */ output_gcg = TRUE; break; case 1: /* GDE */ output_gde = TRUE; break; case 2: /* PIR */ output_nbrf = TRUE; break; case 3: /* PHYLIP */ output_phylip = TRUE; break; case 4: /* PHYLIP */ output_nexus = TRUE; break; default: fprintf(stdout,"\nUnknown OUTPUT type: %s\n", param_arg[setoutput]); } } /*** ? /outputtree=NJ or PHYLIP or DIST or NEXUS */ if(setoutputtree != -1) if(strlen(param_arg[setoutputtree]) > 0) { temp = find_match(param_arg[setoutputtree],outputtree_arg,4); switch (temp) { case 0: /* NJ */ output_tree_clustal = TRUE; break; case 1: /* PHYLIP */ output_tree_phylip = TRUE; break; case 2: /* DIST */ output_tree_distances = TRUE; break; case 3: /* NEXUS */ output_tree_nexus = TRUE; break; default: fprintf(stdout,"\nUnknown OUTPUT TREE type: %s\n", param_arg[setoutputtree]); } } /*** ? /profile (sets type of second input file to profile) */ if(setprofile != -1) profile_type = PROFILE; /*** ? /sequences (sets type of second input file to list of sequences) */ if(setsequences != -1) profile_type = SEQUENCE; /*** ? /ktuple=n */ if(setktuple != -1) { temp = 0; if(strlen(param_arg[setktuple]) > 0) if (sscanf(param_arg[setktuple],"%d",&temp)!=1) { fprintf(stdout,"Bad option for /ktuple (must be integer)\n"); temp = 0; } if(temp > 0) { if(dnaflag) { if(temp <= 4) { ktup = temp; dna_ktup = ktup; wind_gap = ktup + 4; dna_wind_gap = wind_gap; } } else { if(temp <= 2) { ktup = temp; prot_ktup = ktup; wind_gap = ktup + 3; prot_wind_gap = wind_gap; } } } } /*** ? /pairgap=n */ if(setpairgap != -1) { temp = 0; if(strlen(param_arg[setpairgap]) > 0) if (sscanf(param_arg[setpairgap],"%d",&temp)!=1) { fprintf(stdout,"Bad option for /pairgap (must be integer)\n"); temp = 0; } if(temp > 0) if(dnaflag) { if(temp > ktup) { wind_gap = temp; dna_wind_gap = wind_gap; } } else { if(temp > ktup) { wind_gap = temp; prot_wind_gap = wind_gap; } } } /*** ? /topdiags=n */ if(settopdiags != -1) { temp = 0; if(strlen(param_arg[settopdiags]) > 0) if (sscanf(param_arg[settopdiags],"%d",&temp)!=1) { fprintf(stdout,"Bad option for /topdiags (must be integer)\n"); temp = 0; } if(temp > 0) if(dnaflag) { if(temp > ktup) { signif = temp; dna_signif = signif; } } else { if(temp > ktup) { signif = temp; prot_signif = signif; } } } /*** ? /window=n */ if(setwindow != -1) { temp = 0; if(strlen(param_arg[setwindow]) > 0) if (sscanf(param_arg[setwindow],"%d",&temp)!=1) { fprintf(stdout,"Bad option for /window (must be integer)\n"); temp = 0; } if(temp > 0) if(dnaflag) { if(temp > ktup) { window = temp; dna_window = window; } } else { if(temp > ktup) { window = temp; prot_window = window; } } } /*** ? /kimura */ if(setkimura != -1) kimura = TRUE; /*** ? /tossgaps */ if(settossgaps != -1) tossgaps = TRUE; /*** ? /negative */ if(setnegative != -1) neg_matrix = TRUE; /*** ? /noweights */ if(setnoweights!= -1) no_weights = TRUE; /*** ? /pwmatrix=ID (user's file) */ if(setpwmatrix != -1) { temp=strlen(param_arg[setpwmatrix]); if(temp > 0) { for(i=0;i 0) { for(i=0;i 0) { for(i=0;i 0) { for(i=0;i 0) if (sscanf(param_arg[setmaxdiv],"%d",&temp)!=1) { fprintf(stdout,"Bad option for /maxdiv (must be integer)\n"); temp = 0; } if (temp >= 0) divergence_cutoff = temp; } /*** ? /gapdist= n */ if(setgapdist != -1) { temp = 0; if(strlen(param_arg[setgapdist]) > 0) if (sscanf(param_arg[setgapdist],"%d",&temp)!=1) { fprintf(stdout,"Bad option for /gapdist (must be integer)\n"); temp = 0; } if (temp >= 0) gap_dist = temp; } /*** ? /debug= n */ if(setdebug != -1) { temp = 0; if(strlen(param_arg[setdebug]) > 0) if (sscanf(param_arg[setdebug],"%d",&temp)!=1) { fprintf(stdout,"Bad option for /debug (must be integer)\n"); temp = 0; } if (temp >= 0) debug = temp; } /*** ? /outfile= (user's file) */ if(setoutfile != -1) if(strlen(param_arg[setoutfile]) > 0) { strcpy(outfile_name, param_arg[setoutfile]); } /*** ? /case= lower/upper */ if(setcase != -1) if(strlen(param_arg[setcase]) > 0) { temp = find_match(param_arg[setcase],case_arg,2); if(temp == 0) { lowercase = TRUE; } else if(temp == 1) { lowercase = FALSE; } else fprintf(stdout,"\nUnknown case %s\n", param_arg[setcase]); } /*** ? /seqnos=off/on */ if(setseqno != -1) if(strlen(param_arg[setseqno]) > 0) { temp = find_match(param_arg[setseqno],seqno_arg,2); if(temp == 0) { cl_seq_numbers = FALSE; } else if(temp == 1) { cl_seq_numbers = TRUE; } else fprintf(stdout,"\nUnknown SEQNO option %s\n", param_arg[setseqno]); } /*** ? /gapopen=n */ if(setgapopen != -1) { ftemp = 0.0; if(strlen(param_arg[setgapopen]) > 0) if (sscanf(param_arg[setgapopen],"%f",&ftemp)!=1) { fprintf(stdout,"Bad option for /gapopen (must be real number)\n"); ftemp = 0.0; } if(ftemp >= 0.0) if(dnaflag) { gap_open = ftemp; dna_gap_open = gap_open; } else { gap_open = ftemp; prot_gap_open = gap_open; } } /*** ? /gapext=n */ if(setgapext != -1) { ftemp = 0.0; if(strlen(param_arg[setgapext]) > 0) if (sscanf(param_arg[setgapext],"%f",&ftemp)!=1) { fprintf(stdout,"Bad option for /gapext (must be real number)\n"); ftemp = 0.0; } if(ftemp >= 0) if(dnaflag) { gap_extend = ftemp; dna_gap_extend = gap_extend; } else { gap_extend = ftemp; prot_gap_extend = gap_extend; } } /*** ? /transweight=n*/ if(settransweight != -1) { ftemp = 0.0; if(strlen(param_arg[settransweight]) > 0) if (sscanf(param_arg[settransweight],"%f",&ftemp)!=1) { fprintf(stdout,"Bad option for /transweight (must be real number)\n"); ftemp = 0.0; } transition_weight=ftemp; } /*** ? /pwgapopen=n */ if(setpwgapopen != -1) { ftemp = 0.0; if(strlen(param_arg[setpwgapopen]) > 0) if (sscanf(param_arg[setpwgapopen],"%f",&ftemp)!=1) { fprintf(stdout,"Bad option for /pwgapopen (must be real number)\n"); ftemp = 0.0; } if(ftemp >= 0.0) if(dnaflag) { pw_go_penalty = ftemp; dna_pw_go_penalty = pw_go_penalty; } else { pw_go_penalty = ftemp; prot_pw_go_penalty = pw_go_penalty; } } /*** ? /gapext=n */ if(setpwgapext != -1) { ftemp = 0.0; if(strlen(param_arg[setpwgapext]) > 0) if (sscanf(param_arg[setpwgapext],"%f",&ftemp)!=1) { fprintf(stdout,"Bad option for /pwgapext (must be real number)\n"); ftemp = 0.0; } if(ftemp >= 0) if(dnaflag) { pw_ge_penalty = ftemp; dna_pw_ge_penalty = pw_ge_penalty; } else { pw_ge_penalty = ftemp; prot_pw_ge_penalty = pw_ge_penalty; } } /*** ? /outorder=n */ if(setoutorder != -1) { if(strlen(param_arg[setoutorder]) > 0) temp = find_match(param_arg[setoutorder],outorder_arg,2); if(temp == 0) { output_order = INPUT; } else if(temp == 1) { output_order = ALIGNED; } else fprintf(stdout,"\nUnknown OUTPUT ORDER type %s\n", param_arg[setoutorder]); } /*** ? /bootlabels=n */ if(setbootlabels != -1) { if(strlen(param_arg[setbootlabels]) > 0) temp = find_match(param_arg[setbootlabels],bootlabels_arg,2); if(temp == 0) { bootstrap_format = BS_NODE_LABELS; } else if(temp == 1) { bootstrap_format = BS_BRANCH_LABELS; } else fprintf(stdout,"\nUnknown bootlabels type %s\n", param_arg[setoutorder]); } /*** ? /endgaps */ if(setuseendgaps != -1) use_endgaps = FALSE; /*** ? /nopgap */ if(setnopgap != -1) no_pref_penalties = TRUE; /*** ? /nohgap */ if(setnohgap != -1) no_hyd_penalties = TRUE; /*** ? /novgap */ if(setnovgap != -1) no_var_penalties = FALSE; /*** ? /hgapresidues="string" */ if(sethgapres != -1) if(strlen(param_arg[sethgapres]) > 0) { for (i=0;i 0) { temp = find_match(param_arg[setsecstroutput],outputsecstr_arg,4); if(temp >= 0 && temp <= 3) output_struct_penalties = temp; else fprintf(stdout,"\nUnknown case %s\n", param_arg[setsecstroutput]); } /*** ? /helixgap= n */ if(sethelixgap != -1) { temp = 0; if(strlen(param_arg[sethelixgap]) > 0) if (sscanf(param_arg[sethelixgap],"%d",&temp)!=1) { fprintf(stdout,"Bad option for /helixgap (must be integer)\n"); temp = 0; } if (temp >= 1 && temp <= 9) helix_penalty = temp; } /*** ? /strandgap= n */ if(setstrandgap != -1) { temp = 0; if(strlen(param_arg[setstrandgap]) > 0) if (sscanf(param_arg[setstrandgap],"%d",&temp)!=1) { fprintf(stdout,"Bad option for /strandgap (must be integer)\n"); temp = 0; } if (temp >= 1 && temp <= 9) strand_penalty = temp; } /*** ? /loopgap= n */ if(setloopgap != -1) { temp = 0; if(strlen(param_arg[setloopgap]) > 0) if (sscanf(param_arg[setloopgap],"%d",&temp)!=1) { fprintf(stdout,"Bad option for /loopgap (must be integer)\n"); temp = 0; } if (temp >= 1 && temp <= 9) loop_penalty = temp; } /*** ? /terminalgap= n */ if(setterminalgap != -1) { temp = 0; if(strlen(param_arg[setterminalgap]) > 0) if (sscanf(param_arg[setterminalgap],"%d",&temp)!=1) { fprintf(stdout,"Bad option for /terminalgap (must be integer)\n"); temp = 0; } if (temp >= 1 && temp <= 9) { helix_end_penalty = temp; strand_end_penalty = temp; } } /*** ? /helixendin= n */ if(sethelixendin != -1) { temp = 0; if(strlen(param_arg[sethelixendin]) > 0) if (sscanf(param_arg[sethelixendin],"%d",&temp)!=1) { fprintf(stdout,"Bad option for /helixendin (must be integer)\n"); temp = 0; } if (temp >= 0 && temp <= 3) helix_end_minus = temp; } /*** ? /helixendout= n */ if(sethelixendout != -1) { temp = 0; if(strlen(param_arg[sethelixendout]) > 0) if (sscanf(param_arg[sethelixendout],"%d",&temp)!=1) { fprintf(stdout,"Bad option for /helixendout (must be integer)\n"); temp = 0; } if (temp >= 0 && temp <= 3) helix_end_plus = temp; } /*** ? /strandendin= n */ if(setstrandendin != -1) { temp = 0; if(strlen(param_arg[setstrandendin]) > 0) if (sscanf(param_arg[setstrandendin],"%d",&temp)!=1) { fprintf(stdout,"Bad option for /strandendin (must be integer)\n"); temp = 0; } if (temp >= 0 && temp <= 3) strand_end_minus = temp; } /*** ? /strandendout= n */ if(setstrandendout != -1) { temp = 0; if(strlen(param_arg[setstrandendout]) > 0) if (sscanf(param_arg[setstrandendout],"%d",&temp)!=1) { fprintf(stdout,"Bad option for /strandendout (must be integer)\n"); temp = 0; } if (temp >= 0 && temp <= 3) strand_end_plus = temp; } } #ifdef UNIX FILE *open_path(char *fname) /* to open in read-only file fname searching for it through all path directories */ { #define Mxdir 70 char dir[Mxdir+1], *path, *deb, *fin; FILE *fich; sint lf, ltot; char *path1; path=getenv("PATH"); /* get the list of path directories, separated by : */ /* added for File System Standards - Francois */ path1=(char *)ckalloc((strlen(path)+64)*sizeof(char)); strcpy(path1,path); strcat(path1,"/usr/share/clustalx:/usr/local/share/clustalx"); lf=(sint)strlen(fname); deb=path1; do { fin=strchr(deb,':'); if(fin!=NULL) { strncpy(dir,deb,fin-deb); ltot=fin-deb; } else { strcpy(dir,deb); ltot=(sint)strlen(dir); } /* now one directory is in string dir */ if( ltot + lf + 1 <= Mxdir) { dir[ltot]='/'; strcpy(dir+ltot+1,fname); /* now dir is appended with fi lename */ if( (fich = fopen(dir,"r") ) != NULL) break; } else fich = NULL; deb=fin+1; } while (fin != NULL); return fich; } #endif void get_help(char help_pointer) /* Help procedure */ { FILE *help_file; sint i, number, nlines; Boolean found_help; char temp[MAXLINE+1]; char token = '\0'; char *digits = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; char *help_marker = ">>HELP"; extern char *help_file_name; #ifdef VMS if((help_file=fopen(help_file_name,"r","rat=cr","rfm=var"))==NULL) { error("Cannot open help file [%s]",help_file_name); return; } #else #ifdef UNIX if((help_file=open_path(help_file_name))==NULL) { if((help_file=fopen(help_file_name,"r"))==NULL) { error("Cannot open help file [%s]",help_file_name); return; } } #else if((help_file=fopen(help_file_name,"r"))==NULL) { error("Cannot open help file [%s]",help_file_name); return; } #endif #endif /* error("Cannot open help file [%s]",help_file_name); return; } */ nlines = 0; number = -1; found_help = FALSE; while(TRUE) { if(fgets(temp,MAXLINE+1,help_file) == NULL) { if(!found_help) error("No help found in help file"); fclose(help_file); return; } if(strstr(temp,help_marker)) { token = ' '; for(i=strlen(help_marker); i<8; i++) if(strchr(digits, temp[i])) { token = temp[i]; break; } } if(token == help_pointer) { found_help = TRUE; while(fgets(temp,MAXLINE+1,help_file)) { if(strstr(temp, help_marker)){ if(usemenu) { fprintf(stdout,"\n"); getstr("Press [RETURN] to continue",lin2); } fclose(help_file); return; } if(temp[0]!='<') { fputs(temp,stdout); ++nlines; } if(usemenu) { if(nlines >= PAGE_LEN) { fprintf(stdout,"\n"); getstr("Press [RETURN] to continue or X to stop",lin2); if(toupper(*lin2) == 'X') { fclose(help_file); return; } else nlines = 0; } } } if(usemenu) { fprintf(stdout,"\n"); getstr("Press [RETURN] to continue",lin2); } fclose(help_file); } } } static void show_aln(void) /* Alignment screen display procedure */ { FILE *file; sint nlines; char temp[MAXLINE+1]; char file_name[FILENAMELEN+1]; if(output_clustal) strcpy(file_name,clustal_outname); else if(output_nbrf) strcpy(file_name,nbrf_outname); else if(output_gcg) strcpy(file_name,gcg_outname); else if(output_phylip) strcpy(file_name,phylip_outname); else if(output_gde) strcpy(file_name,gde_outname); else if(output_nexus) strcpy(file_name,nexus_outname); #ifdef VMS if((file=fopen(file_name,"r","rat=cr","rfm=var"))==NULL) { #else if((file=fopen(file_name,"r"))==NULL) { #endif error("Cannot open file [%s]",file_name); return; } fprintf(stdout,"\n\n"); nlines = 0; while(fgets(temp,MAXLINE+1,file)) { fputs(temp,stdout); ++nlines; if(nlines >= PAGE_LEN) { fprintf(stdout,"\n"); getstr("Press [RETURN] to continue or X to stop",lin2); if(toupper(*lin2) == 'X') { fclose(file); return; } else nlines = 0; } } fclose(file); fprintf(stdout,"\n"); getstr("Press [RETURN] to continue",lin2); } void parse_params(Boolean xmenus) { sint i,j,len,temp; static sint cl_error_code=0; char path[FILENAMELEN]; Boolean do_align, do_convert, do_align_only, do_tree_only, do_tree, do_boot, do_profile, do_something; if (!xmenus) { fprintf(stdout,"\n\n\n"); fprintf(stdout," CLUSTAL %s Multiple Sequence Alignments\n\n\n",revision_level); } do_align = do_convert = do_align_only = do_tree_only = do_tree = do_boot = do_profile = do_something = FALSE; *seqname=EOS; /* JULIE len=(sint)strlen(paramstr); Stop converting command line to lower case - unix, mac, pc are case sensitive for(i=0;i0) { temp = find_match(param_arg[settype],type_arg,2); if(temp == 0) { dnaflag = FALSE; explicit_dnaflag = TRUE; info("Sequence type explicitly set to Protein"); } else if(temp == 1) { info("Sequence type explicitly set to DNA"); dnaflag = TRUE; explicit_dnaflag = TRUE; } else fprintf(stdout,"\nUnknown sequence type %s\n", param_arg[settype]); } /*************************************************************************** * check to see if 1st parameter does not start with '/' i.e. look for an * * input file as first parameter. The input file can also be specified * * by /infile=fname. * ****************************************************************************/ /* JULIE - moved to check_param() if(paramstr[0] != '/') { strcpy(seqname, params[0]); } */ /**************************************************/ /* Look for /infile=file.ext on the command line */ /**************************************************/ if(setinfile != -1) { if(strlen(param_arg[setinfile]) <= 0) { error("Bad sequence file name"); myexit(1); /* exit(1); */ } strcpy(seqname, param_arg[setinfile]); } if(*seqname != EOS) { profile_no = 0; nseqs = readseqs((sint)1); if(nseqs < 2) { if(nseqs < 0) cl_error_code = 2; else if(nseqs == 0) cl_error_code = 3; else cl_error_code = 4; fprintf(stdout, "\nNo. of seqs. read = %d. No alignment!\n",(pint)nseqs); /* exit(cl_error_code); */ myexit(cl_error_code); } for(i = 1; i<=nseqs; i++) info("Sequence %d: %-*s %6.d %s", (pint)i,max_names,names[i],(pint)seqlen_array[i],dnaflag?"bp":"aa"); empty = FALSE; do_something = TRUE; } set_optional_param(); /*********************************************************/ /* Look for /profile1=file.ext AND /profile2=file2.ext */ /* You must give both file names OR neither. */ /*********************************************************/ if(setprofile1 != -1) { if(strlen(param_arg[setprofile1]) <= 0) { error("Bad profile 1 file name"); /* exit(1); */ myexit(1); } strcpy(seqname, param_arg[setprofile1]); profile_no = 1; profile_input(); if(nseqs <= 0) { if(nseqs<0) cl_error_code=2; else if(nseqs==0) cl_error_code=3; /* exit(cl_error_code); */ myexit(cl_error_code); } strcpy(profile1_name,seqname); } if(setprofile2 != -1) { if(strlen(param_arg[setprofile2]) <= 0) { error("Bad profile 2 file name"); myexit(1); /* exit(1); */ } if(profile1_empty) { error("Only 1 profile file (profile 2) specified."); myexit(1); /* exit(1); */ } strcpy(seqname, param_arg[setprofile2]); profile_no = 2; profile_input(); if(nseqs > profile1_nseqs) do_something = do_profile = TRUE; else { if(nseqs<0) cl_error_code=2; else if(nseqs==0) cl_error_code=3; error("No sequences read from profile 2"); /* exit(cl_error_code); */ myexit(cl_error_code); } strcpy(profile2_name,seqname); } /*************************************************************************/ /* Look for /tree or /bootstrap or /align or /usetree ******************/ /*************************************************************************/ if (setbatch != -1) interactive=FALSE; if (setinteractive != -1) interactive=TRUE; if (interactive) { settree = -1; setbootstrap = -1; setalign = -1; setusetree = -1; setusetree1 = -1; setusetree2 = -1; setnewtree = -1; setconvert = -1; } if(settree != -1 ) if(empty) { error("Cannot draw tree. No input alignment file"); /* exit(1); */ myexit(1); } else do_tree = TRUE; if(setbootstrap != -1) if(empty) { error("Cannot bootstrap tree. No input alignment file"); myexit(1); /* exit(1); */ } else { temp = 0; if(param_arg[setbootstrap] != NULL) if (sscanf(param_arg[setbootstrap],"%d",&temp)!=1) { fprintf(stdout,"Bad option for /bootstrap (must be integer)\n"); temp = 0; }; if(temp > 0) boot_ntrials = temp; do_boot = TRUE; } if(setalign != -1) if(empty) { error("Cannot align sequences. No input file"); myexit(1); /* exit(1) */ } else do_align = TRUE; if(setconvert != -1) if(empty) { error("Cannot convert sequences. No input file"); /* exit(1); */ myexit(1); } else do_convert = TRUE; if(setusetree != -1) if(empty) { error("Cannot align sequences. No input file"); /* exit(1); */ myexit(1); } else { if(strlen(param_arg[setusetree]) == 0) { error("Cannot align sequences. No tree file specified"); /* exit(1); */ myexit(1); } else { strcpy(phylip_tree_name, param_arg[setusetree]); } use_tree_file = TRUE; do_align_only = TRUE; } if(setnewtree != -1) if(empty) { error("Cannot align sequences. No input file"); /* exit(1); */ myexit(1); } else { if(strlen(param_arg[setnewtree]) == 0) { error("Cannot align sequences. No tree file specified"); /* exit(1); */ myexit(1); } else { strcpy(phylip_tree_name, param_arg[setnewtree]); } new_tree_file = TRUE; do_tree_only = TRUE; } if(setusetree1 != -1) if(profile1_empty) { error("Cannot align profiles. No input file"); /* exit(1); */ myexit(1); } else if(profile_type == SEQUENCE) { error("Invalid option /usetree1."); /* exit(1); */ myexit(1); } else { if(strlen(param_arg[setusetree1]) == 0) { error("Cannot align profiles. No tree file specified"); /* exit(1); */ myexit(1); } else { strcpy(p1_tree_name, param_arg[setusetree1]); } use_tree1_file = TRUE; do_align_only = TRUE; } if(setnewtree1 != -1) if(profile1_empty) { error("Cannot align profiles. No input file"); /* exit(1); */ myexit(1); } else if(profile_type == SEQUENCE) { error("Invalid option /newtree1."); /* exit(1); */ myexit(1); } else { if(strlen(param_arg[setnewtree1]) == 0) { error("Cannot align profiles. No tree file specified"); /* exit(1); */ myexit(1); } else { strcpy(p1_tree_name, param_arg[setnewtree1]); } new_tree1_file = TRUE; } if(setusetree2 != -1) if(profile2_empty) { error("Cannot align profiles. No input file"); /* exit(1); */ myexit(1); } else if(profile_type == SEQUENCE) { error("Invalid option /usetree2."); /* exit(1); */ myexit(1); } else { if(strlen(param_arg[setusetree2]) == 0) { error("Cannot align profiles. No tree file specified"); /* exit(1); */ myexit(1); } else { strcpy(p2_tree_name, param_arg[setusetree2]); } use_tree2_file = TRUE; do_align_only = TRUE; } if(setnewtree2 != -1) if(profile2_empty) { error("Cannot align profiles. No input file"); /* exit(1); */ myexit(1); } else if(profile_type == SEQUENCE) { error("Invalid option /newtree2."); /* exit(1); */ myexit(1); } else { if(strlen(param_arg[setnewtree2]) == 0) { error("Cannot align profiles. No tree file specified"); /* exit(1); */ myexit(1); } else { strcpy(p2_tree_name, param_arg[setnewtree2]); } new_tree2_file = TRUE; } if( (!do_tree) && (!do_boot) && (!empty) && (!do_profile) && (!do_align_only) && (!do_tree_only) && (!do_convert)) do_align = TRUE; /*** ? /quicktree */ if(setquicktree != -1) quick_pairalign = TRUE; if(dnaflag) { gap_open = dna_gap_open; gap_extend = dna_gap_extend; pw_go_penalty = dna_pw_go_penalty; pw_ge_penalty = dna_pw_ge_penalty; ktup = dna_ktup; window = dna_window; signif = dna_signif; wind_gap = dna_wind_gap; } else { gap_open = prot_gap_open; gap_extend = prot_gap_extend; pw_go_penalty = prot_pw_go_penalty; pw_ge_penalty = prot_pw_ge_penalty; ktup = prot_ktup; window = prot_window; signif = prot_signif; wind_gap = prot_wind_gap; } if(interactive) { if (!xmenus) usemenu = TRUE; return; } if(!do_something) { error("No input file(s) specified"); /* exit(1); */ myexit(1); } /****************************************************************************/ /* Now do whatever has been requested ***************************************/ /****************************************************************************/ if(do_profile) { if (profile_type == PROFILE) profile_align(p1_tree_name,p2_tree_name); else new_sequence_align(phylip_tree_name); } else if(do_align) align(phylip_tree_name); else if(do_convert) { get_path(seqname,path); if(!open_alignment_output(path)) /* exit(1); */ myexit(1); create_alignment_output(1,nseqs); } else if (do_align_only) get_tree(phylip_tree_name); else if(do_tree_only) make_tree(phylip_tree_name); else if(do_tree) phylogenetic_tree(phylip_tree_name,clustal_tree_name,dist_tree_name,nexus_tree_name); else if(do_boot) bootstrap_tree(phylip_tree_name,clustal_tree_name,nexus_tree_name); fprintf(stdout,"\n"); /* exit(0); */ myexit(0); /*******whew!***now*go*home****/ } Boolean user_mat(char *str, short *mat, short *xref) { sint maxres; FILE *infile; if(usemenu) getstr("Enter name of the matrix file",lin2); else strcpy(lin2,str); if(*lin2 == EOS) return FALSE; if((infile=fopen(lin2,"r"))==NULL) { error("Cannot find matrix file [%s]",lin2); return FALSE; } strcpy(str, lin2); maxres = read_user_matrix(str, mat, xref); if (maxres <= 0) return FALSE; return TRUE; } Boolean user_mat_series(char *str, short *mat, short *xref) { sint maxres; FILE *infile; if(usemenu) getstr("Enter name of the matrix file",lin2); else strcpy(lin2,str); if(*lin2 == EOS) return FALSE; if((infile=fopen(lin2,"r"))==NULL) { error("Cannot find matrix file [%s]",lin2); return FALSE; } strcpy(str, lin2); maxres = read_matrix_series(str, mat, xref); if (maxres <= 0) return FALSE; return TRUE; } sint seq_input(Boolean append) { sint i; sint local_nseqs; if(usemenu) { fprintf(stdout,"\n\nSequences should all be in 1 file.\n"); fprintf(stdout,"\n7 formats accepted: \n"); fprintf(stdout, "NBRF/PIR, EMBL/SwissProt, Pearson (Fasta), GDE, Clustal, GCG/MSF, RSF.\n\n\n"); /*fprintf(stdout, "\nGCG users should use TOPIR to convert their sequence files before use.\n\n\n");*/ } if (append) local_nseqs = readseqs(nseqs+(sint)1); else local_nseqs = readseqs((sint)1); /* 1 is the first seq to be read */ if(local_nseqs < 0) /* file could not be opened */ { return local_nseqs; } else if(local_nseqs == 0) /* no sequences */ { error("No sequences in file! Bad format?"); return local_nseqs; } else { struct_penalties1 = struct_penalties2 = NONE; if (sec_struct_mask1 != NULL) sec_struct_mask1=ckfree(sec_struct_mask1); if (sec_struct_mask2 != NULL) sec_struct_mask2=ckfree(sec_struct_mask2); if (gap_penalty_mask1 != NULL) gap_penalty_mask1=ckfree(gap_penalty_mask1); if (gap_penalty_mask2 != NULL) gap_penalty_mask2=ckfree(gap_penalty_mask2); if (ss_name1 != NULL) ss_name1=ckfree(ss_name1); if (ss_name2 != NULL) ss_name2=ckfree(ss_name2); if(append) nseqs+=local_nseqs; else nseqs=local_nseqs; info("Sequences assumed to be %s", dnaflag?"DNA":"PROTEIN"); if (usemenu) { fprintf(stdout,"\n\n"); for(i=1; i<=nseqs; i++) { /* DES fprintf(stdout,"%s: = ",names[i]); */ info("Sequence %d: %-*s %6.d %s", (pint)i,max_names,names[i],(pint)seqlen_array[i],dnaflag?"bp":"aa"); } } if(dnaflag) { gap_open = dna_gap_open; gap_extend = dna_gap_extend; } else { gap_open = prot_gap_open; gap_extend = prot_gap_extend; } empty=FALSE; } return local_nseqs; } sint profile_input(void) /* read a profile */ { /* profile_no is 1 or 2 */ sint local_nseqs, i; if(profile_no == 2 && profile1_empty) { error("You must read in profile number 1 first"); return 0; } if(profile_no == 1) /* for the 1st profile */ { local_nseqs = readseqs((sint)1); /* (1) means 1st seq to be read = no. 1 */ if(local_nseqs < 0) /* file could not be opened */ { return local_nseqs; } else if(local_nseqs == 0) /* no sequences */ { error("No sequences in file! Bad format?"); return local_nseqs; } else if (local_nseqs > 0) { /* success; found some seqs. */ struct_penalties1 = NONE; if (sec_struct_mask1 != NULL) sec_struct_mask1=ckfree(sec_struct_mask1); if (gap_penalty_mask1 != NULL) gap_penalty_mask1=ckfree(gap_penalty_mask1); if (ss_name1 != NULL) ss_name1=ckfree(ss_name1); if (struct_penalties != NONE) /* feature table / mask in alignment */ { struct_penalties1 = struct_penalties; if (struct_penalties == SECST) { sec_struct_mask1 = (char *)ckalloc((max_aln_length) * sizeof (char)); for (i=0;i0) { for (i=0;i 0) { struct_penalties2 = NONE; if (sec_struct_mask2 != NULL) sec_struct_mask2=ckfree(sec_struct_mask2); if (gap_penalty_mask2 != NULL) gap_penalty_mask2=ckfree(gap_penalty_mask2); if (ss_name2 != NULL) ss_name2=ckfree(ss_name2); if (struct_penalties != NONE) /* feature table / mask in alignment */ { struct_penalties2 = struct_penalties; if (struct_penalties == SECST) { sec_struct_mask2 = (char *)ckalloc((max_aln_length) * sizeof (char)); for (i=0;i0) { for (i=0;i=0) && (tolower(struct_mask[i+j]) != 'a') && (tolower(struct_mask[i+j]) != 'b')) struct_mask[i+j] = 'a'; } for (j = 0; j=prf_length || (tolower(mask[i+j]) != 'a' && mask[i+j] != '$')) break; struct_mask[i+j] = 'a'; } i += j; while (tolower(mask[i]) == 'a' || mask[i] == '$') { if (i>=prf_length) break; if (mask[i] == '$') { struct_mask[i] = 'A'; i++; break; } else struct_mask[i] = mask[i]; i++; } for (j = 0; j=0) && (tolower(mask[i-j-1]) == 'a' || mask[i-j-1] == '$')) struct_mask[i-j-1] = 'a'; } for (j = 0; j=prf_length) break; struct_mask[i+j] = 'a'; } } else if (tolower(mask[i]) == 'b' || mask[i] == '%') { for (j = -strand_end_plus; j<0; j++) { if ((i+j>=0) && (tolower(struct_mask[i+j]) != 'a') && (tolower(struct_mask[i+j]) != 'b')) struct_mask[i+j] = 'b'; } for (j = 0; j=prf_length || (tolower(mask[i+j]) != 'b' && mask[i+j] != '%')) break; struct_mask[i+j] = 'b'; } i += j; while (tolower(mask[i]) == 'b' || mask[i] == '%') { if (i>=prf_length) break; if (mask[i] == '%') { struct_mask[i] = 'B'; i++; break; } else struct_mask[i] = mask[i]; i++; } for (j = 0; j=0) && (tolower(mask[i-j-1]) == 'b' || mask[i-j-1] == '%')) struct_mask[i-j-1] = 'b'; } for (j = 0; j=prf_length) break; struct_mask[i+j] = 'b'; } } else i++; } for(i=0;i=prf_length || (tolower(mask[i+j]) != 'a' && mask[i+j] != '$')) break; struct_mask[i+j] = 'a'; } i += j; while (tolower(mask[i]) == 'a' || mask[i] == '$') { if (i>=prf_length) break; if (mask[i] == '$') { struct_mask[i] = 'A'; i++; break; } else struct_mask[i] = mask[i]; i++; } for (j = 0; j=0) && (tolower(mask[i-j-1]) == 'a' || mask[i-j-1] == '$')) struct_mask[i-j-1] = 'a'; } } else if (tolower(mask[i]) == 'b' || mask[i] == '%') { for (j = 0; j=prf_length || (tolower(mask[i+j]) != 'b' && mask[i+j] != '%')) break; struct_mask[i+j] = 'b'; } i += j; while (tolower(mask[i]) == 'b' || mask[i] == '%') { if (i>=prf_length) break; if (mask[i] == '%') { struct_mask[i] = 'B'; i++; break; } else struct_mask[i] = mask[i]; i++; } for (j = 0; j=0) && (tolower(mask[i-j-1]) == 'b' || mask[i-j-1] == '%')) struct_mask[i-j-1] = 'b'; } } else i++; } } FILE * open_output_file(char *prompt, char *path, char *file_name, char *file_extension) { static char temp[FILENAMELEN+1]; static char local_prompt[MAXLINE]; FILE * file_handle; /* if (*file_name == EOS) { */ strcpy(file_name,path); strcat(file_name,file_extension); /* } */ if(strcmp(file_name,seqname)==0) { warning("Output file name is the same as input file."); if (usemenu) { strcpy(local_prompt,"\n\nEnter new name to avoid overwriting "); strcat(local_prompt," [%s]: "); fprintf(stdout,local_prompt,file_name); /*gets(temp);*/ fgets(temp, FILENAMELEN, stdin); if(*temp != EOS) strcpy(file_name,temp); } } else if (usemenu) { strcpy(local_prompt,prompt); strcat(local_prompt," [%s]: "); fprintf(stdout,local_prompt,file_name); /* gets(temp); */ fgets(temp, FILENAMELEN, stdin); if(*temp != EOS) strcpy(file_name,temp); } #ifdef VMS if((file_handle=fopen(file_name,"w","rat=cr","rfm=var"))==NULL) { #else if((file_handle=fopen(file_name,"w"))==NULL) { #endif error("Cannot open output file [%s]",file_name); return NULL; } return file_handle; } FILE * open_explicit_file(char *file_name) { FILE * file_handle; if (*file_name == EOS) { error("Bad output file [%s]",file_name); return NULL; } #ifdef VMS if((file_handle=fopen(file_name,"w","rat=cr","rfm=var"))==NULL) { #else if((file_handle=fopen(file_name,"w"))==NULL) { #endif error("Cannot open output file [%s]",file_name); return NULL; } return file_handle; } void align(char *phylip_name) { char path[FILENAMELEN+1]; FILE *tree; sint count; int np; if(empty && usemenu) { error("No sequences in memory. Load sequences first."); return; } struct_penalties1 = struct_penalties2 = NONE; if (sec_struct_mask1 != NULL) sec_struct_mask1=ckfree(sec_struct_mask1); if (sec_struct_mask2 != NULL) sec_struct_mask2=ckfree(sec_struct_mask2); if (gap_penalty_mask1 != NULL) gap_penalty_mask1=ckfree(gap_penalty_mask1); if (gap_penalty_mask2 != NULL) gap_penalty_mask2=ckfree(gap_penalty_mask2); if (ss_name1 != NULL) ss_name1=ckfree(ss_name1); if (ss_name2 != NULL) ss_name2=ckfree(ss_name2); get_path(seqname,path); /* DES DEBUG fprintf(stdout,"\n\n Seqname = %s \n Path = %s \n\n",seqname,path); */ if(usemenu || !interactive) { if(!open_alignment_output(path)) return; } if (nseqs >= 2) { get_path(seqname,path); if (phylip_name[0]!=EOS) { if((tree = open_explicit_file( phylip_name))==NULL) return; } else { if((tree = open_output_file( "\nEnter name for new GUIDE TREE file ",path, phylip_name,"dnd")) == NULL) return; } } if (save_parameters) create_parameter_output(); if(reset_alignments_new || reset_alignments_all) reset_align(); info("Start of Pairwise alignments"); info("Aligning..."); if(dnaflag) { gap_open = dna_gap_open; gap_extend = dna_gap_extend; pw_go_penalty = dna_pw_go_penalty; pw_ge_penalty = dna_pw_ge_penalty; ktup = dna_ktup; window = dna_window; signif = dna_signif; wind_gap = dna_wind_gap; } else { gap_open = prot_gap_open; gap_extend = prot_gap_extend; pw_go_penalty = prot_pw_go_penalty; pw_ge_penalty = prot_pw_ge_penalty; ktup = prot_ktup; window = prot_window; signif = prot_signif; wind_gap = prot_wind_gap; } if (quick_pairalign) show_pair((sint)0,nseqs,(sint)0,nseqs); else pairalign((sint)0,nseqs,(sint)0,nseqs); if (nseqs >= 2) { guide_tree(tree,1,nseqs); info("Guide tree file created: [%s]", phylip_name); } /*************************************************** * When "-usetree=xxxx.dnd" is not used, the following * malign() will be called. ***************************************************/ /* count = malign((sint)0,phylip_name); */ /* * if "mpirun -np 1" or "mpirun -np 2", we'll call the original * sequential version of malign(). * */ MPI_Comm_size(MPI_COMM_WORLD, &np); if (np==1 || np==2){ count = malign((sint)0,phylip_name); } else /* * If the environment variable CLUSTALG_PARALLEL_PDIFF is * set to (non-zero), we will call malign_mpi_pdiff(); otherwise * malign_mpi_progressive() will be called. */ { char *p; char environ[]="CLUSTALG_PARALLEL_PDIFF"; p=getenv(environ); if (p) count = malign_mpi_pdiff((sint)0,phylip_name); else count = malign_mpi_progressive((sint)0,phylip_name); } if (count <= 0) return; if (usemenu) fprintf(stdout,"\n\n\n"); create_alignment_output(1,nseqs); if (showaln && usemenu) show_aln(); phylip_name[0]=EOS; } void new_sequence_align(char *phylip_name) { char path[FILENAMELEN+1]; char tree_name[FILENAMELEN+1],temp[MAXLINE+1]; Boolean use_tree; FILE *tree; sint i,j,count; float dscore; Boolean save_ss2; if(profile1_empty && usemenu) { error("No profile in memory. Input 1st profile first."); return; } if(profile2_empty && usemenu) { error("No sequences in memory. Input sequences first."); return; } get_path(profile2_name,path); if(usemenu || !interactive) { if(!open_alignment_output(path)) return; } new_seq = profile1_nseqs+1; /* check for secondary structure information for list of sequences */ save_ss2 = use_ss2; if (struct_penalties2 != NONE && use_ss2 == TRUE && (nseqs - profile1_nseqs > 1)) { if (struct_penalties2 == SECST) warning("Warning: ignoring secondary structure for a list of sequences"); else if (struct_penalties2 == GMASK) warning("Warning: ignoring gap penalty mask for a list of sequences"); use_ss2 = FALSE; } for (i=1;i<=new_seq;i++) { for (j=i+1;j<=new_seq;j++) { dscore = countid(i,j); tmat[i][j] = ((double)100.0 - (double)dscore)/(double)100.0; tmat[j][i] = tmat[i][j]; } } tree_name[0] = EOS; use_tree = FALSE; if (nseqs >= 2) { if (check_tree && usemenu) { strcpy(tree_name,path); strcat(tree_name,"dnd"); #ifdef VMS if((tree=fopen(tree_name,"r","rat=cr","rfm=var"))!=NULL) { #else if((tree=fopen(tree_name,"r"))!=NULL) { #endif if (usemenu) fprintf(stdout,"\nUse the existing GUIDE TREE file, %s (y/n) ? [y]: ", tree_name); /* gets(temp); */ fgets(temp, MAXLINE, stdin); if(*temp != 'n' && *temp != 'N') { strcpy(phylip_name,tree_name); use_tree = TRUE; } fclose(tree); } } else if (!usemenu && use_tree_file) { use_tree = TRUE; } } if (save_parameters) create_parameter_output(); if(reset_alignments_new || reset_alignments_all) { /* reset_prf1(); */ reset_prf2(); } else fix_gaps(); if (struct_penalties1 == SECST) calc_gap_penalty_mask(seqlen_array[1],sec_struct_mask1,gap_penalty_mask1); if (struct_penalties2 == SECST) calc_gap_penalty_mask(seqlen_array[profile1_nseqs+1],sec_struct_mask2,gap_penalty_mask2); /* create the new tree file, if necessary */ if (use_tree == FALSE) { if (nseqs >= 2) { get_path(profile2_name,path); if (phylip_name[0]!=EOS) { if((tree = open_explicit_file( phylip_name))==NULL) return; } else { if((tree = open_output_file( "\nEnter name for new GUIDE TREE file ",path, phylip_name,"dnd")) == NULL) return; } } info("Start of Pairwise alignments"); info("Aligning..."); if(dnaflag) { gap_open = dna_gap_open; gap_extend = dna_gap_extend; pw_go_penalty = dna_pw_go_penalty; pw_ge_penalty = dna_pw_ge_penalty; ktup = dna_ktup; window = dna_window; signif = dna_signif; wind_gap = dna_wind_gap; } else { gap_open = prot_gap_open; gap_extend = prot_gap_extend; pw_go_penalty = prot_pw_go_penalty; pw_ge_penalty = prot_pw_ge_penalty; ktup = prot_ktup; window = prot_window; signif = prot_signif; wind_gap = prot_wind_gap; } if (quick_pairalign) show_pair((sint)0,nseqs,new_seq-2,nseqs); else pairalign((sint)0,nseqs,new_seq-2,nseqs); if (nseqs >= 2) { guide_tree(tree,1,nseqs); info("Guide tree file created: [%s]", phylip_name); } } if (new_tree_file) return; count = seqalign(new_seq-2,phylip_name); use_ss2 = save_ss2; if (count <= 0) return; if (usemenu) fprintf(stdout,"\n\n\n"); create_alignment_output(1,nseqs); if (showaln && usemenu) show_aln(); phylip_name[0]=EOS; } void make_tree(char *phylip_name) { char path[FILENAMELEN+1]; FILE *tree; if(empty) { error("No sequences in memory. Load sequences first."); return; } struct_penalties1 = struct_penalties2 = NONE; if (sec_struct_mask1 != NULL) sec_struct_mask1=ckfree(sec_struct_mask1); if (sec_struct_mask2 != NULL) sec_struct_mask2=ckfree(sec_struct_mask2); if (gap_penalty_mask1 != NULL) gap_penalty_mask1=ckfree(gap_penalty_mask1); if (gap_penalty_mask2 != NULL) gap_penalty_mask2=ckfree(gap_penalty_mask2); if (ss_name1 != NULL) ss_name1=ckfree(ss_name1); if (ss_name2 != NULL) ss_name2=ckfree(ss_name2); if(reset_alignments_new || reset_alignments_all) reset_align(); get_path(seqname,path); if (nseqs < 2) { error("Less than 2 sequences in memory. Phylogenetic tree cannot be built."); return; } if (save_parameters) create_parameter_output(); info("Start of Pairwise alignments"); info("Aligning..."); if(dnaflag) { gap_open = dna_gap_open; gap_extend = dna_gap_extend; pw_go_penalty = dna_pw_go_penalty; pw_ge_penalty = dna_pw_ge_penalty; ktup = dna_ktup; window = dna_window; signif = dna_signif; wind_gap = dna_wind_gap; } else { gap_open = prot_gap_open; gap_extend = prot_gap_extend; pw_go_penalty = prot_pw_go_penalty; pw_ge_penalty = prot_pw_ge_penalty; ktup = prot_ktup; window = prot_window; signif = prot_signif; wind_gap = prot_wind_gap; } if (quick_pairalign) show_pair((sint)0,nseqs,(sint)0,nseqs); else pairalign((sint)0,nseqs,(sint)0,nseqs); if (nseqs >= 2) { get_path(seqname,path); if (phylip_name[0]!=EOS) { if((tree = open_explicit_file( phylip_name))==NULL) return; } else { if((tree = open_output_file( "\nEnter name for new GUIDE TREE file ",path, phylip_name,"dnd")) == NULL) return; } guide_tree(tree,1,nseqs); info("Guide tree file created: [%s]", phylip_name); } if(reset_alignments_new || reset_alignments_all) reset_align(); phylip_name[0]=EOS; } void get_tree(char *phylip_name) { char path[FILENAMELEN+1],temp[MAXLINE+1]; sint count; int np; if(empty) { error("No sequences in memory. Load sequences first."); return; } struct_penalties1 = struct_penalties2 = NONE; if (sec_struct_mask1 != NULL) sec_struct_mask1=ckfree(sec_struct_mask1); if (sec_struct_mask2 != NULL) sec_struct_mask2=ckfree(sec_struct_mask2); if (gap_penalty_mask1 != NULL) gap_penalty_mask1=ckfree(gap_penalty_mask1); if (gap_penalty_mask2 != NULL) gap_penalty_mask2=ckfree(gap_penalty_mask2); if (ss_name1 != NULL) ss_name1=ckfree(ss_name1); if (ss_name2 != NULL) ss_name2=ckfree(ss_name2); get_path(seqname,path); if(usemenu || !interactive) { if(!open_alignment_output(path)) return; } if(reset_alignments_new || reset_alignments_all) reset_align(); get_path(seqname,path); if (nseqs >= 2) { if(usemenu) { strcpy(phylip_name,path); strcat(phylip_name,"dnd"); fprintf(stdout,"\nEnter a name for the guide tree file [%s]: ", phylip_name); /* gets(temp); */ fgets(temp, MAXLINE, stdin); if(*temp != EOS) strcpy(phylip_name,temp); } if(usemenu || !interactive) { #ifdef VMS if((tree=fopen(phylip_name,"r","rat=cr","rfm=var"))==NULL) { #else if((tree=fopen(phylip_name,"r"))==NULL) { #endif error("Cannot open tree file [%s]",phylip_name); return; } } } else { info("Start of Pairwise alignments"); info("Aligning..."); if(dnaflag) { gap_open = dna_gap_open; gap_extend = dna_gap_extend; pw_go_penalty = dna_pw_go_penalty; pw_ge_penalty = dna_pw_ge_penalty; ktup = dna_ktup; window = dna_window; signif = dna_signif; wind_gap = dna_wind_gap; } else { gap_open = prot_gap_open; gap_extend = prot_gap_extend; pw_go_penalty = prot_pw_go_penalty; pw_ge_penalty = prot_pw_ge_penalty; ktup = prot_ktup; window = prot_window; signif = prot_signif; wind_gap = prot_wind_gap; } if (quick_pairalign) show_pair((sint)0,nseqs,(sint)0,nseqs); else pairalign((sint)0,nseqs,(sint)0,nseqs); } if (save_parameters) create_parameter_output(); /*************************************************** * When we use "-usetree=xxxx.dnd", the following * malign() will be called. ***************************************************/ /* * if "mpirun -np 1" or "mpirun -np 2", we'll call the original * sequential version of malign(). * */ MPI_Comm_size(MPI_COMM_WORLD, &np); if (np==1 || np==2){ count = malign((sint)0,phylip_name); } else /* * If the environment variable CLUSTALG_PARALLEL_PDIFF is * set (non-zero), we will call malign_mpi_pdiff(); otherwise * malign_mpi_progressive() will be called. */ { char *p; char environ[]="CLUSTALG_PARALLEL_PROGRESSIVE"; p=getenv(environ); if (p) count = malign_mpi_pdiff(0,phylip_name); else count = malign_mpi_progressive(0,phylip_name); } if (count <= 0) return; if (usemenu) fprintf(stdout,"\n\n\n"); create_alignment_output(1,nseqs); if (showaln && usemenu) show_aln(); phylip_name[0]=EOS; } void profile_align(char *p1_tree_name,char *p2_tree_name) { char path[FILENAMELEN+1]; char tree_name[FILENAMELEN+1]; char temp[MAXLINE+1]; Boolean use_tree1,use_tree2; FILE *tree; sint count,i,j,dscore; if(profile1_empty || profile2_empty) { error("No sequences in memory. Load sequences first."); return; } get_path(profile1_name,path); if(usemenu || !interactive) { if(!open_alignment_output(path)) return; } if(reset_alignments_new || reset_alignments_all) { reset_prf1(); reset_prf2(); } else fix_gaps(); tree_name[0] = EOS; use_tree1 = FALSE; if (profile1_nseqs >= 2) { if (check_tree && usemenu) { strcpy(tree_name,path); strcat(tree_name,"dnd"); #ifdef VMS if((tree=fopen(tree_name,"r","rat=cr","rfm=var"))!=NULL) { #else if((tree=fopen(tree_name,"r"))!=NULL) { #endif fprintf(stdout,"\nUse the existing GUIDE TREE file for Profile 1, %s (y/n) ? [y]: ", tree_name); /* gets(temp); */ fgets(temp, MAXLINE, stdin); if(*temp != 'n' && *temp != 'N') { strcpy(p1_tree_name,tree_name); use_tree1 = TRUE; } fclose(tree); } } else if (!usemenu && use_tree1_file) { use_tree1 = TRUE; } } tree_name[0] = EOS; use_tree2 = FALSE; get_path(profile2_name,path); if (nseqs-profile1_nseqs >= 2) { if (check_tree && usemenu) { strcpy(tree_name,path); strcat(tree_name,"dnd"); #ifdef VMS if((tree=fopen(tree_name,"r","rat=cr","rfm=var"))!=NULL) { #else if((tree=fopen(tree_name,"r"))!=NULL) { #endif fprintf(stdout,"\nUse the existing GUIDE TREE file for Profile 2, %s (y/n) ? [y]: ", tree_name); /* gets(temp); */ fgets(temp, MAXLINE, stdin); if(*temp != 'n' && *temp != 'N') { strcpy(p2_tree_name,tree_name); use_tree2 = TRUE; } fclose(tree); } } else if (!usemenu && use_tree2_file) { use_tree2 = TRUE; } } if (save_parameters) create_parameter_output(); if (struct_penalties1 == SECST) calc_gap_penalty_mask(seqlen_array[1],sec_struct_mask1,gap_penalty_mask1); if (struct_penalties2 == SECST) calc_gap_penalty_mask(seqlen_array[profile1_nseqs+1],sec_struct_mask2,gap_penalty_mask2); if (use_tree1 == FALSE) if (profile1_nseqs >= 2) { for (i=1;i<=profile1_nseqs;i++) { for (j=i+1;j<=profile1_nseqs;j++) { dscore = countid(i,j); tmat[i][j] = (100.0 - dscore)/100.0; tmat[j][i] = tmat[i][j]; } } get_path(profile1_name,path); if (p1_tree_name[0]!=EOS) { if((tree = open_explicit_file(p1_tree_name))==NULL) return; } else { if((tree = open_output_file( "\nEnter name for new GUIDE TREE file for profile 1 ",path, p1_tree_name,"dnd")) == NULL) return; } guide_tree(tree,1,profile1_nseqs); info("Guide tree file created: [%s]", p1_tree_name); } if (use_tree2 == FALSE) if(nseqs-profile1_nseqs >= 2) { for (i=1+profile1_nseqs;i<=nseqs;i++) { for (j=i+1;j<=nseqs;j++) { dscore = countid(i,j); tmat[i][j] = (100.0 - dscore)/100.0; tmat[j][i] = tmat[i][j]; } } if (p2_tree_name[0]!=EOS) { if((tree = open_explicit_file(p2_tree_name))==NULL) return; } else { get_path(profile2_name,path); if((tree = open_output_file( "\nEnter name for new GUIDE TREE file for profile 2 ",path, p2_tree_name,"dnd")) == NULL) return; } guide_tree(tree,profile1_nseqs+1,nseqs-profile1_nseqs); info("Guide tree file created: [%s]", p2_tree_name); } if (new_tree1_file || new_tree2_file) return; /* do an initial alignment to get the pairwise identities between the two profiles - used to set parameters for the final alignment */ count = palign1(); if (count == 0) return; reset_prf1(); reset_prf2(); count = palign2(p1_tree_name,p2_tree_name); if (count == 0) return; if(usemenu) fprintf(stdout,"\n\n\n"); create_alignment_output(1,nseqs); if (showaln && usemenu) show_aln(); p1_tree_name[0]=EOS; p2_tree_name[0]=EOS; } void clustal_out(FILE *clusout, sint fres, sint len, sint fseq, sint lseq) { static char *seq1; static sint *seq_no; static sint *print_seq_no; char *ss_mask1, *ss_mask2; char temp[MAXLINE]; char c; sint val; sint ii,lv1,catident1[NUMRES],catident2[NUMRES],ident,chunks; sint i,j,k,l; sint pos,ptr; sint line_length; /* stop doing this ...... opens duplicate files in VMS DES fclose(clusout); if ((clusout=fopen(clustal_outname,"w")) == NULL) { fprintf(stdout,"Error opening %s\n",clustal_outfile); return; } */ seq_no = (sint *)ckalloc((nseqs+1) * sizeof(sint)); print_seq_no = (sint *)ckalloc((nseqs+1) * sizeof(sint)); for (i=fseq;i<=lseq;i++) { print_seq_no[i] = seq_no[i] = 0; for(j=1;j=0) || (val <=max_aa)) seq_no[i]++; } } seq1 = (char *)ckalloc((max_aln_length+1) * sizeof(char)); if (struct_penalties1 == SECST && use_ss1 == TRUE) { ss_mask1 = (char *)ckalloc((seqlen_array[1]+10) * sizeof(char)); for (i=0;i LINELENGTH) line_length=LINELENGTH; chunks = len/line_length; if(len % line_length != 0) ++chunks; for(lv1=1;lv1<=chunks;++lv1) { pos = ((lv1-1)*line_length)+1; ptr = (len max_aa)) seq1[j]='-'; else { seq1[j]=amino_acid_codes[val]; seq_no[i]++; print_seq_no[i]=1; } } for(;j<=ptr;++j) seq1[j]='-'; strncpy(temp,&seq1[pos],ptr-pos+1); temp[ptr-pos+1]=EOS; fprintf(clusout,"%-*s %s",max_names+5,names[i],temp); if (cl_seq_numbers && print_seq_no[i]) fprintf(clusout," %d",seq_no[i]); fprintf(clusout,"\n"); } for(i=pos;i<=ptr;++i) { seq1[i]=' '; ident=0; for(j=1;res_cat1[j-1]!=NULL;j++) catident1[j-1] = 0; for(j=1;res_cat2[j-1]!=NULL;j++) catident2[j-1] = 0; for(j=fseq;j<=lseq;++j) { if((seq_array[fseq][i+fres-1] >=0) && (seq_array[fseq][i+fres-1] <= max_aa)) { if(seq_array[fseq][i+fres-1] == seq_array[j][i+fres-1]) ++ident; for(k=1;res_cat1[k-1]!=NULL;k++) { for(l=0;(c=res_cat1[k-1][l]);l++) { if (amino_acid_codes[seq_array[j][i+fres-1]]==c) { catident1[k-1]++; break; } } } for(k=1;res_cat2[k-1]!=NULL;k++) { for(l=0;(c=res_cat2[k-1][l]);l++) { if (amino_acid_codes[seq_array[j][i+fres-1]]==c) { catident2[k-1]++; break; } } } } } if(ident==lseq-fseq+1) seq1[i]='*'; else if (!dnaflag) { for(k=1;res_cat1[k-1]!=NULL;k++) { if (catident1[k-1]==lseq-fseq+1) { seq1[i]=':'; break; } } if(seq1[i]==' ') for(k=1;res_cat2[k-1]!=NULL;k++) { if (catident2[k-1]==lseq-fseq+1) { seq1[i]='.'; break; } } } } strncpy(temp,&seq1[pos],ptr-pos+1); temp[ptr-pos+1]=EOS; for(k=0;k max_aa)) residue = '.'; else { residue = amino_acid_codes[val]; } seq[j-fres+1] = residue; } /* pad any short sequences with gaps, to make all sequences the same length */ for(; j<=fres+len-1; j++) seq[j-fres+1] = '.'; all_checks[i] = SeqGCGCheckSum(seq+1, (int)len); } grand_checksum = 0; for(i=1; i<=nseqs; i++) grand_checksum += all_checks[output_index[i]]; grand_checksum = grand_checksum % 10000; fprintf(gcgout,"PileUp\n\n"); fprintf(gcgout,"\n\n MSF:%5d Type: ",(pint)len); if(dnaflag) fprintf(gcgout,"N"); else fprintf(gcgout,"P"); fprintf(gcgout," Check:%6ld .. \n\n", (long)grand_checksum); for(ii=fseq; ii<=lseq; ii++) { i = output_index[ii]; /* for(j=0; j max_aa)) residue = '.'; else { residue = amino_acid_codes[val]; } fprintf(gcgout,"%c",residue); if(j % 10 == 0) fprintf(gcgout," "); } } } /* DES ckfree(output_index); */ seq=ckfree((void *)seq); all_checks=ckfree((void *)all_checks); fprintf(gcgout,"\n\n"); } void nexus_out(FILE *nxsout, sint fres, sint len, sint fseq, sint lseq) { /* static char *aacids = "XCSTPAGNDEQHRKMILVFYW";*/ /* static char *nbases = "XACGT"; */ char residue; sint val; sint i,ii,chunks,block; sint j,k,pos1,pos2; chunks = len/GCG_LINELENGTH; if(len % GCG_LINELENGTH != 0) ++chunks; fprintf(nxsout,"#NEXUS\n"); fprintf(nxsout,"BEGIN DATA;\n"); fprintf(nxsout,"dimensions ntax=%d nchar=%d;\n",(pint)nseqs,(pint)len); fprintf(nxsout,"format missing=?\n"); fprintf(nxsout,"symbols=\""); for(i=0;i<=max_aa;i++) fprintf(nxsout,"%c",amino_acid_codes[i]); fprintf(nxsout,"\"\n"); fprintf(nxsout,"interleave datatype="); fprintf(nxsout, dnaflag ? "DNA " : "PROTEIN "); fprintf(nxsout,"gap= -;\n"); fprintf(nxsout,"\nmatrix"); for(block=1; block<=chunks; block++) { pos1 = ((block-1) * GCG_LINELENGTH)+1; pos2 = (len max_aa)) residue = '-'; else { residue = amino_acid_codes[val]; } fprintf(nxsout,"%c",residue); } } fprintf(nxsout,"\n"); } fprintf(nxsout,";\nend;\n"); /* DES ckfree(output_index); */ } void phylip_out(FILE *phyout, sint fres, sint len, sint fseq, sint lseq) { /* static char *aacids = "XCSTPAGNDEQHRKMILVFYW";*/ /* static char *nbases = "XACGT"; */ char residue; sint val; sint i,ii,chunks,block; sint j,k,pos1,pos2; sint name_len; Boolean warn; char **snames; snames=(char **)ckalloc((lseq-fseq+2)*sizeof(char *)); name_len=0; for(i=fseq; i<=lseq; i++) { snames[i]=(char *)ckalloc((11)*sizeof(char)); ii=strlen(names[i]); strncpy(snames[i],names[i],10); if(name_len10) { warn=FALSE; for(i=fseq; i<=lseq; i++) { for(j=i+1;j<=lseq;j++) { if (strcmp(snames[i],snames[j]) == 0) warn=TRUE; } } if(warn) warning("Truncating sequence names to 10 characters for PHYLIP output.\n" "Names in the PHYLIP format file are NOT unambiguous."); else warning("Truncating sequence names to 10 characters for PHYLIP output."); } chunks = len/GCG_LINELENGTH; if(len % GCG_LINELENGTH != 0) ++chunks; fprintf(phyout,"%6d %6d",(pint)nseqs,(pint)len); for(block=1; block<=chunks; block++) { pos1 = ((block-1) * GCG_LINELENGTH)+1; pos2 = (len max_aa)) residue = '-'; else { residue = amino_acid_codes[val]; } fprintf(phyout,"%c",residue); if(j % 10 == 0) fprintf(phyout," "); } } fprintf(phyout,"\n"); } /* DES ckfree(output_index); */ for(i=fseq;i<=lseq;i++) ckfree(snames[i]); ckfree(snames); } void nbrf_out(FILE *nbout, sint fres, sint len, sint fseq, sint lseq) { /* static char *aacids = "XCSTPAGNDEQHRKMILVFYW";*/ /* static char *nbases = "XACGT"; */ char *seq, residue; sint val; sint i,ii; sint j,slen; sint line_length; seq = (char *)ckalloc((max_aln_length+1) * sizeof(char)); /* decide the line length for this alignment - maximum is LINELENGTH */ line_length=PAGEWIDTH-max_names; line_length=line_length-line_length % 10; /* round to a multiple of 10*/ if (line_length > LINELENGTH) line_length=LINELENGTH; for(ii=fseq; ii<=lseq; ii++) { i = output_index[ii]; fprintf(nbout, dnaflag ? ">DL;" : ">P1;"); fprintf(nbout, "%s\n%s\n", names[i], titles[i]); slen = 0; for(j=fres; j max_aa)) residue = '-'; else { residue = amino_acid_codes[val]; } seq[j-fres] = residue; slen++; } for(j=1; j<=slen; j++) { fprintf(nbout,"%c",seq[j-1]); if((j % line_length == 0) || (j == slen)) fprintf(nbout,"\n"); } fprintf(nbout,"*\n"); } /* DES ckfree(output_index); */ seq=ckfree((void *)seq); } void gde_out(FILE *gdeout, sint fres, sint len, sint fseq, sint lseq) { /* static char *aacids = "XCSTPAGNDEQHRKMILVFYW";*/ /* static char *nbases = "XACGT"; */ char *seq, residue; sint val; char *ss_mask1, *ss_mask2; sint i,ii; sint j,slen; sint line_length; seq = (char *)ckalloc((max_aln_length+1) * sizeof(char)); /* decide the line length for this alignment - maximum is LINELENGTH */ line_length=PAGEWIDTH-max_names; line_length=line_length-line_length % 10; /* round to a multiple of 10*/ if (line_length > LINELENGTH) line_length=LINELENGTH; if (struct_penalties1 == SECST && use_ss1 == TRUE) { ss_mask1 = (char *)ckalloc((seqlen_array[1]+10) * sizeof(char)); for (i=0;i max_aa)) residue = '-'; else { residue = amino_acid_codes[val]; } if (lowercase) seq[j-fres] = (char)tolower((int)residue); else seq[j-fres] = residue; slen++; } for(j=1; j<=slen; j++) { fprintf(gdeout,"%c",seq[j-1]); if((j % line_length == 0) || (j == slen)) fprintf(gdeout,"\n"); } } /* DES ckfree(output_index); */ if (output_struct_penalties == 0 || output_struct_penalties == 2) { if (struct_penalties1 == SECST && use_ss1 == TRUE) { fprintf(gdeout,"\"SS_%-*s\n",max_names,ss_name1); for(i=fres; i #include #include #include #include "clustalw.h" /* * Prototypes */ /* * Global Variables */ extern double **tmat; extern Boolean no_weights; extern sint debug; extern sint max_aa; extern sint nseqs; extern sint profile1_nseqs; extern sint nsets; extern sint **sets; extern sint divergence_cutoff; extern sint *seq_weight; extern sint output_order, *output_index; extern Boolean distance_tree; extern char seqname[]; extern sint *seqlen_array; extern char **seq_array; sint malign(sint istart,char *phylip_name) /* full progressive alignment*/ { static sint *aligned; static sint *group; static sint ix; sint *maxid, max, sum; sint *tree_weight; sint i,j,set,iseq=0; sint status,entries; lint score = 0; info("Start of Multiple Alignment"); /* get the phylogenetic tree from *.ph */ if (nseqs >= 2) { status = read_tree(phylip_name, (sint)0, nseqs); if (status == 0) return((sint)0); } /* calculate sequence weights according to branch lengths of the tree - weights in global variable seq_weight normalised to sum to 100 */ calc_seq_weights((sint)0, nseqs, seq_weight); /* recalculate tmat matrix as percent similarity matrix */ status = calc_similarities(nseqs); if (status == 0) return((sint)0); /* for each sequence, find the most closely related sequence */ maxid = (sint *)ckalloc( (nseqs+1) * sizeof (sint)); for (i=1;i<=nseqs;i++) { maxid[i] = -1; for (j=1;j<=nseqs;j++) if (j!=i && maxid[i] < tmat[i][j]) maxid[i] = tmat[i][j]; } /* group the sequences according to their relative divergence */ if (istart == 0) { sets = (sint **) ckalloc( (nseqs+1) * sizeof (sint *) ); for(i=0;i<=nseqs;i++) sets[i] = (sint *)ckalloc( (nseqs+1) * sizeof (sint) ); create_sets((sint)0,nseqs); info("There are %d groups",(pint)nsets); /* clear the memory used for the phylogenetic tree */ if (nseqs >= 2) clear_tree(NULL); /* start the multiple alignments......... */ info("Aligning..."); /* first pass, align closely related sequences first.... */ ix = 0; aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) ); for (i=0;i<=nseqs;i++) aligned[i] = 0; for(set=1;set<=nsets;++set) { entries=0; for (i=1;i<=nseqs;i++) { if ((sets[set][i] != 0) && (maxid[i] > divergence_cutoff)) { entries++; if (aligned[i] == 0) { if (output_order==INPUT) { ++ix; output_index[i] = i; } else output_index[++ix] = i; aligned[i] = 1; } } } if(entries > 0) score = prfalign(sets[set], aligned); else score=0.0; /* negative score means fatal error... exit now! */ if (score < 0) { return(-1); } if ((entries > 0) && (score > 0)) info("Group %d: Sequences:%4d Score:%d", (pint)set,(pint)entries,(pint)score); else info("Group %d: Delayed", (pint)set); } for (i=0;i<=nseqs;i++) sets[i]=ckfree((void *)sets[i]); sets=ckfree(sets); } else { /* clear the memory used for the phylogenetic tree */ if (nseqs >= 2) clear_tree(NULL); aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) ); ix = 0; for (i=1;i<=istart+1;i++) { aligned[i] = 1; ++ix; output_index[i] = i; } for (i=istart+2;i<=nseqs;i++) aligned[i] = 0; } /* second pass - align remaining, more divergent sequences..... */ /* if not all sequences were aligned, for each unaligned sequence, find it's closest pair amongst the aligned sequences. */ group = (sint *)ckalloc( (nseqs+1) * sizeof (sint)); tree_weight = (sint *) ckalloc( (nseqs) * sizeof(sint) ); for (i=0;i max)) { max = maxid[i]; iseq = i; } } /* align this sequence to the existing alignment */ /* weight sequences with percent identity with profile*/ /* OR...., multiply sequence weights from tree by percent identity with new sequence */ if(no_weights==FALSE) { for (j=0;j= 2) { status = read_tree(phylip_name, (sint)0, nseqs); if (status == 0) return(0); } /* calculate sequence weights according to branch lengths of the tree - weights in global variable seq_weight normalised to sum to 100 */ calc_seq_weights((sint)0, nseqs, seq_weight); tree_weight = (sint *) ckalloc( (nseqs) * sizeof(sint) ); for (i=0;i= 2) clear_tree(NULL); aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) ); ix = 0; for (i=1;i<=istart+1;i++) { aligned[i] = 1; ++ix; output_index[i] = i; } for (i=istart+2;i<=nseqs;i++) aligned[i] = 0; /* for each unaligned sequence, find it's closest pair amongst the aligned sequences. */ group = (sint *)ckalloc( (nseqs+1) * sizeof (sint)); while (ix < nseqs) { if (ix > 0) { for (i=1;i<=nseqs;i++) { if (aligned[i] == 0) { maxid[i] = -1; for (j=1;j<=nseqs;j++) if ((maxid[i] < tmat[i][j]) && (aligned[j] != 0)) maxid[i] = tmat[i][j]; } } } /* find the most closely related sequence to those already aligned */ max = -1; for (i=1;i<=nseqs;i++) { if ((aligned[i] == 0) && (maxid[i] > max)) { max = maxid[i]; iseq = i; } } /* align this sequence to the existing alignment */ entries = 0; for (j=1;j<=nseqs;j++) if (aligned[j] != 0) { group[j] = 1; entries++; } else if (iseq==j) { group[j] = 2; entries++; } aligned[iseq] = 1; /* EITHER....., set sequence weights equal to percent identity with new sequence */ /* for (j=0;j1) for (j=0;j 1) { fprintf(stdout,"new weights\n"); for (j=0;j=0) && (c1= 2) { status = read_tree(p1_tree_name, (sint)0, profile1_nseqs); if (status == 0) return(0); } /* calculate sequence weights according to branch lengths of the tree - weights in global variable seq_weight normalised to sum to 100 */ p1_weight = (sint *) ckalloc( (profile1_nseqs) * sizeof(sint) ); calc_seq_weights((sint)0, profile1_nseqs, p1_weight); /* clear the memory for the phylogenetic tree */ if (profile1_nseqs >= 2) clear_tree(NULL); if (nseqs-profile1_nseqs >= 2) { status = read_tree(p2_tree_name, profile1_nseqs, nseqs); if (status == 0) return(0); } p2_weight = (sint *) ckalloc( (nseqs) * sizeof(sint) ); calc_seq_weights(profile1_nseqs,nseqs, p2_weight); /* clear the memory for the phylogenetic tree */ if (nseqs-profile1_nseqs >= 2) clear_tree(NULL); /* convert tmat distances to similarities */ for (i=1;i 1) { fprintf(stdout,"new weights\n"); for (j=0;j #include #include #include #include "clustalw.h" #include "mpi.h" /* * Prototypes */ /* * Global Variables */ extern double **tmat; extern Boolean no_weights; extern sint debug; extern sint max_aa; extern sint nseqs; extern sint profile1_nseqs; extern sint nsets; extern sint **sets; extern sint divergence_cutoff; extern sint *seq_weight; extern sint output_order, *output_index; extern Boolean distance_tree; extern char seqname[]; extern sint *seqlen_array; extern char **seq_array; sint malign_mpi_pdiff(sint istart,char *phylip_name) /* full progressive alignment*/ { static sint *aligned; static sint *group; static sint ix; sint *maxid, max, sum; sint *tree_weight; sint i,j,set,iseq=0; sint status,entries; lint score = 0; double wtime1, wtime2; info("Start of Multiple Alignment"); /* get the phylogenetic tree from *.ph */ if (nseqs >= 2) { status = read_tree(phylip_name, (sint)0, nseqs); if (status == 0) return((sint)0); } /* calculate sequence weights according to branch lengths of the tree - weights in global variable seq_weight normalised to sum to 100 */ calc_seq_weights((sint)0, nseqs, seq_weight); /* recalculate tmat matrix as percent similarity matrix */ status = calc_similarities(nseqs); if (status == 0) return((sint)0); /* for each sequence, find the most closely related sequence */ maxid = (sint *)ckalloc( (nseqs+1) * sizeof (sint)); for (i=1;i<=nseqs;i++) { maxid[i] = -1; for (j=1;j<=nseqs;j++) if (j!=i && maxid[i] < tmat[i][j]) maxid[i] = tmat[i][j]; } /* group the sequences according to their relative divergence */ if (istart == 0) { sets = (sint **) ckalloc( (nseqs+1) * sizeof (sint *) ); for(i=0;i<=nseqs;i++) sets[i] = (sint *)ckalloc( (nseqs+1) * sizeof (sint) ); create_sets((sint)0,nseqs); info("There are %d groups",(pint)nsets); /* clear the memory used for the phylogenetic tree */ if (nseqs >= 2) clear_tree(NULL); /* start the multiple alignments......... */ info("Aligning..."); /** Measuring wall time by MPI_Wtime() **/ wtime1 = MPI_Wtime(); /* first pass, align closely related sequences first.... */ ix = 0; aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) ); for (i=0;i<=nseqs;i++) aligned[i] = 0; for(set=1;set<=nsets;++set) { entries=0; for (i=1;i<=nseqs;i++) { if ((sets[set][i] != 0) && (maxid[i] > divergence_cutoff)) { entries++; if (aligned[i] == 0) { if (output_order==INPUT) { ++ix; output_index[i] = i; } else output_index[++ix] = i; aligned[i] = 1; } } } if(entries > 0) score = prfalign_mpi_pdiff(sets[set], aligned); else score=0.0; /* negative score means fatal error... exit now! */ if (score < 0) { return(-1); } if ((entries > 0) && (score > 0)) info("Group %d: Sequences:%4d Score:%d", (pint)set,(pint)entries,(pint)score); else info("Group %d: Delayed", (pint)set); } for (i=0;i<=nseqs;i++) sets[i]=ckfree((void *)sets[i]); sets=ckfree(sets); } else { /* clear the memory used for the phylogenetic tree */ if (nseqs >= 2) clear_tree(NULL); aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) ); ix = 0; for (i=1;i<=istart+1;i++) { aligned[i] = 1; ++ix; output_index[i] = i; } for (i=istart+2;i<=nseqs;i++) aligned[i] = 0; } /* second pass - align remaining, more divergent sequences..... */ /* if not all sequences were aligned, for each unaligned sequence, find it's closest pair amongst the aligned sequences. */ group = (sint *)ckalloc( (nseqs+1) * sizeof (sint)); tree_weight = (sint *) ckalloc( (nseqs) * sizeof(sint) ); for (i=0;i max)) { max = maxid[i]; iseq = i; } } /* align this sequence to the existing alignment */ /* weight sequences with percent identity with profile*/ /* OR...., multiply sequence weights from tree by percent identity with new sequence */ if(no_weights==FALSE) { for (j=0;j= 2) { status = read_tree(phylip_name, (sint)0, nseqs); if (status == 0) return(0); } /* calculate sequence weights according to branch lengths of the tree - weights in global variable seq_weight normalised to sum to 100 */ calc_seq_weights((sint)0, nseqs, seq_weight); tree_weight = (sint *) ckalloc( (nseqs) * sizeof(sint) ); for (i=0;i= 2) clear_tree(NULL); aligned = (sint *)ckalloc( (nseqs+1) * sizeof (sint) ); ix = 0; for (i=1;i<=istart+1;i++) { aligned[i] = 1; ++ix; output_index[i] = i; } for (i=istart+2;i<=nseqs;i++) aligned[i] = 0; /* for each unaligned sequence, find it's closest pair amongst the aligned sequences. */ group = (sint *)ckalloc( (nseqs+1) * sizeof (sint)); while (ix < nseqs) { if (ix > 0) { for (i=1;i<=nseqs;i++) { if (aligned[i] == 0) { maxid[i] = -1; for (j=1;j<=nseqs;j++) if ((maxid[i] < tmat[i][j]) && (aligned[j] != 0)) maxid[i] = tmat[i][j]; } } } /* find the most closely related sequence to those already aligned */ max = -1; for (i=1;i<=nseqs;i++) { if ((aligned[i] == 0) && (maxid[i] > max)) { max = maxid[i]; iseq = i; } } /* align this sequence to the existing alignment */ entries = 0; for (j=1;j<=nseqs;j++) if (aligned[j] != 0) { group[j] = 1; entries++; } else if (iseq==j) { group[j] = 2; entries++; } aligned[iseq] = 1; /* EITHER....., set sequence weights equal to percent identity with new sequence */ /* for (j=0;j1) for (j=0;j 1) { fprintf(stdout,"new weights\n"); for (j=0;j=0) && (c1= 2) { status = read_tree(p1_tree_name, (sint)0, profile1_nseqs); if (status == 0) return(0); } /* calculate sequence weights according to branch lengths of the tree - weights in global variable seq_weight normalised to sum to 100 */ p1_weight = (sint *) ckalloc( (profile1_nseqs) * sizeof(sint) ); calc_seq_weights((sint)0, profile1_nseqs, p1_weight); /* clear the memory for the phylogenetic tree */ if (profile1_nseqs >= 2) clear_tree(NULL); if (nseqs-profile1_nseqs >= 2) { status = read_tree(p2_tree_name, profile1_nseqs, nseqs); if (status == 0) return(0); } p2_weight = (sint *) ckalloc( (nseqs) * sizeof(sint) ); calc_seq_weights(profile1_nseqs,nseqs, p2_weight); /* clear the memory for the phylogenetic tree */ if (nseqs-profile1_nseqs >= 2) clear_tree(NULL); /* convert tmat distances to similarities */ for (i=1;i 1) { fprintf(stdout,"new weights\n"); for (j=0;j #include #include #include #include #include "clustalw.h" #include "mpi.h" /* * Prototypes */ /* * Global Variables */ extern double **tmat; extern Boolean no_weights; extern sint debug; extern sint max_aa; extern sint nseqs; extern sint profile1_nseqs; extern sint nsets; extern sint **sets; extern sint divergence_cutoff; extern sint *seq_weight; extern sint output_order, *output_index; extern Boolean distance_tree; extern char seqname[]; extern sint *seqlen_array; extern char **seq_array; #define MIN_N_SEQ_PDIFF 4 /* The followings are for prfalign_mpi_progressive() */ extern float gap_open, gap_extend; extern float transition_weight; extern sint gap_pos1, gap_pos2; extern Boolean neg_matrix; extern sint mat_avscore; extern short usermatseries[MAXMAT][NUMRES][NUMRES]; extern Boolean user_series; extern UserMatSeries matseries; static void determine_parallelizable_sets(sint *** sets, int nseqs, int ***set1, int ***set2); static void kbl_debug_printsets(int nseqs, int which_set, int **set1, int **set2); static int key_compare(const void *e1, const void *e2); static int verify_dependency(int set, int **set1, int **set2, sint ** sets, int nseqs, short *finished, int *new2old); static void myqsort(int **a, int lo, int hi, int *b); sint malign_mpi_progressive(sint istart, char *phylip_name) { /* full progressive alignment */ static sint *aligned; static sint *group; static sint ix; sint *maxid, max, sum; sint *tree_weight; sint i, j, set, iseq = 0; sint status, entries; lint score = 0; double wtime1, wtime2; /**** Kuobin's debugging codes start ********/ int **set1, **set2; int *dset1 = NULL, *dset2 = NULL; int myidx, myidx2; int mybsize; /* the size of the MPI send/recv buffer */ char *mpi_buffer; int position, np, work, from_where, which_set; int *dest; /* array storing the MPI ranks of available processes */ /* finished[1..nseqs]: finished[i] indicates that whether sets[i] has * been "prfalign-ed" or not. */ short *finished; int count; int ret; int *myentries; /* to replace "entries" */ int *new2old; /* for the use of myqsort(): new2old[0..nsets] */ int **dsets; int reverse_rank; /**** Kuobin's debugging codes end ********/ info("Start of Multiple Alignment"); /* get the phylogenetic tree from *.ph */ if (nseqs >= 2) { status = read_tree(phylip_name, (sint) 0, nseqs); if (status == 0) return ((sint) 0); } /* calculate sequence weights according to branch lengths of the tree - weights in global variable seq_weight normalised to sum to 100 */ calc_seq_weights((sint) 0, nseqs, seq_weight); /* recalculate tmat matrix as percent similarity matrix */ status = calc_similarities(nseqs); if (status == 0) return ((sint) 0); /* for each sequence, find the most closely related sequence */ maxid = (sint *) ckalloc((nseqs + 1) * sizeof(sint)); for (i = 1; i <= nseqs; i++) { maxid[i] = -1; for (j = 1; j <= nseqs; j++) if (j != i && maxid[i] < tmat[i][j]) maxid[i] = tmat[i][j]; } /* group the sequences according to their relative divergence */ if (istart == 0) { sets = (sint **) ckalloc((nseqs + 1) * sizeof(sint *)); for (i = 0; i <= nseqs; i++) sets[i] = (sint *) ckalloc((nseqs + 1) * sizeof(sint)); finished = (short *) ckalloc((nseqs + 1) * sizeof(short)); set1 = (sint **) ckalloc((nseqs + 1) * sizeof(sint *)); for (i = 0; i <= nseqs; i++) set1[i] = (sint *) ckalloc((nseqs + 1) * sizeof(sint)); set2 = (sint **) ckalloc((nseqs + 1) * sizeof(sint *)); for (i = 0; i <= nseqs; i++) set2[i] = (sint *) ckalloc((nseqs + 1) * sizeof(sint)); create_sets((sint) 0, nseqs); info("There are %d groups", (pint) nsets); /* clear the memory used for the phylogenetic tree */ if (nseqs >= 2) clear_tree(NULL); /* start the multiple alignments......... */ info("Aligning..."); /** Measuring wall time by MPI_Wtime() **/ wtime1 = MPI_Wtime(); /* first pass, align closely related sequences first.... */ ix = 0; aligned = (sint *) ckalloc((nseqs + 1) * sizeof(sint)); for (i = 0; i <= nseqs; i++) aligned[i] = 0; myentries = (int *) ckalloc((nseqs + 1) * sizeof(int)); for (i = 0; i <= nseqs; i++) myentries[i] = 0; new2old = (int *) ckalloc((nsets + 1) * sizeof(int)); for (i = 0; i <= nsets ; i++) new2old[i] = i; /* * Determine set1[] and set2[]: * * for example: if sets[3]={3,0,1,0,2,2,0} then * set1 = {1,2} * set2 = {2,4,5} */ for (set = 1; set <= nsets; ++set) { int idx1 = 0; int idx2 = 0; myentries[set] = 0; /* number of non-zero elements in sets[set] */ count = 0; for (i = 1; i <= nseqs; i++) { int tmpv = 0; /* a simple temp variable */ if ((tmpv = sets[set][i]) != 0) { count++; if (tmpv == 1) set1[set][++idx1] = i; else if (tmpv == 2) set2[set][++idx2] = i; else fprintf(stderr, "Error: something wrong with sets[%d]\n", set); if (maxid[i] > divergence_cutoff) { myentries[set]++; if (aligned[i] == 0) { if (output_order == INPUT) { ++ix; output_index[i] = i; } else { output_index[++ix] = i; } aligned[i] = 1; } } } } sets[set][0] = count; set1[set][0] = idx1; set2[set][0] = idx2; } /* * Now I am going to sort sets[i] (i=1..nseqs) according to sets[i][0]. */ /* Initially I was using the stdlib's qsort(). qsort((sets + 1), nsets, sizeof(sint **), key_compare); */ myqsort(sets, 1, nsets, new2old); /* **************************************************************** * TEMPORARY TEMPORARY * * Since prf_init() sometimes overwrites the contents * of sets[i][....] (in the case of "Delayed...."), * this would cause trouble for verify_dependency(). * * Temporarily here we make a duplicate copy of sets[][]. * ****************************************************************/ dsets = (int **)calloc((nseqs+1), sizeof(int *)); assert(dsets); for (i=0;i<(nseqs+1);i++) { dsets[i]=(int *)calloc((nseqs+1),sizeof(int)); assert(dsets[i]); } for (i=1;i<=nsets;i++) for (j=0;j<(nseqs+1);j++) dsets[i][j]=sets[i][j]; /* * Main working loop ..... */ MPI_Comm_size(MPI_COMM_WORLD, &np); work = 1; /* * Note: dest[0] holds the number of available processes so far. * dest[1], dest[2] are the ranks of those available processes. */ dest = (int *)malloc((np)*sizeof(int)); assert(dest); dest[0]=(np-1); for (i=1;i=2) { /* * * Note: Temporarily assign the next available MPI process * to prf_init as the "reverse_rank". This process * will be doing preverse_pass(). */ if (sets[set][0]>= MIN_N_SEQ_PDIFF) ret = prf_init(sets[set], aligned, set, dest[1], dest[2]); else ret = prf_init(sets[set], aligned, set, dest[1], 0); if (ret == 1) { if (sets[set][0]>= MIN_N_SEQ_PDIFF) { dest[0] -= 2; memmove((dest+1),(dest+3),dest[0]*sizeof(int)); } else{ dest[0] -= 1; memmove((dest+1),(dest+2),dest[0]*sizeof(int)); } } else if (ret==0) { finished[set] = 1; work--; printf ("\nGroup %2d: Delayed", new2old[set]); kbl_debug_printsets(nseqs, new2old[set], set1, set2); } } else { /* need to wait for a MPI process to return its result */ do { score = prf_update(&from_where, &reverse_rank, &which_set); finished[which_set] = 1; work--; printf ("\nGroup %2d: Sequences:%4d Score: %d (from rank %d)", new2old[which_set], sets[which_set][0], (int) score, from_where); kbl_debug_printsets(nseqs, new2old[which_set], set1, set2); dest[dest[0]+1] = from_where; if (reverse_rank){ dest[dest[0]+2] = reverse_rank; dest[0] +=1; } dest[0] +=1; } while (dest[0]<2); if (sets[set][0]>= MIN_N_SEQ_PDIFF) ret = prf_init(sets[set], aligned, set, dest[1], dest[2]); else ret = prf_init(sets[set], aligned, set, dest[1], 0); if (ret == 1) { if (sets[set][0]>= MIN_N_SEQ_PDIFF) { dest[0] -= 2; memmove((dest+1),(dest+3),dest[0]*sizeof(int)); } else{ dest[0] -= 1; memmove((dest+1),(dest+2),dest[0]*sizeof(int)); } } else if (ret==0) { finished[set] = 1; work--; printf ("\nGroup %2d: Delayed", new2old[set]); kbl_debug_printsets(nseqs, new2old[set], set1, set2); } } } } /* * Waiting for the remaining MPI processes to return. */ for (i = 0; i < work; i++) { score = prf_update(&from_where, &reverse_rank, &which_set); /* info("From rank %d, set %d, Score:%d",from_where, which_set, (int)score); */ finished[which_set] = 1; printf ("\nGroup %2d: Sequences:%4d Score: %d (from rank %d)", new2old[which_set], sets[which_set][0], (int) score, from_where); kbl_debug_printsets(nseqs, new2old[which_set], set1, set2); } for (i = 0; i <= nseqs; i++) sets[i] = ckfree((void *) sets[i]); sets = ckfree(sets); for (i = 0; i <= nseqs; i++) free(dsets[i]); free(dsets); for (i = 0; i <= nseqs; i++) { free(set1[i]); free(set2[i]); } free(set1); free(set2); free(dest); finished = ckfree((void *) finished); } else { /* clear the memory used for the phylogenetic tree */ if (nseqs >= 2) clear_tree(NULL); aligned = (sint *) ckalloc((nseqs + 1) * sizeof(sint)); ix = 0; for (i = 1; i <= istart + 1; i++) { aligned[i] = 1; ++ix; output_index[i] = i; } for (i = istart + 2; i <= nseqs; i++) aligned[i] = 0; } /* second pass - align remaining, more divergent sequences..... */ /* if not all sequences were aligned, for each unaligned sequence, find it's closest pair amongst the aligned sequences. */ group = (sint *) ckalloc((nseqs + 1) * sizeof(sint)); tree_weight = (sint *) ckalloc((nseqs) * sizeof(sint)); for (i = 0; i < nseqs; i++) tree_weight[i] = seq_weight[i]; /* if we haven't aligned any sequences, in the first pass - align the two most closely related sequences now */ if (ix == 0) { max = -1; iseq = 0; for (i = 1; i <= nseqs; i++) { for (j = i + 1; j <= nseqs; j++) { if (max < tmat[i][j]) { max = tmat[i][j]; iseq = i; } } } aligned[iseq] = 1; if (output_order == INPUT) { ++ix; output_index[iseq] = iseq; } else output_index[++ix] = iseq; } while (ix < nseqs) { for (i = 1; i <= nseqs; i++) { if (aligned[i] == 0) { maxid[i] = -1; for (j = 1; j <= nseqs; j++) if ((maxid[i] < tmat[i][j]) && (aligned[j] != 0)) maxid[i] = tmat[i][j]; } } /* find the most closely related sequence to those already aligned */ max = -1; iseq = 0; for (i = 1; i <= nseqs; i++) { if ((aligned[i] == 0) && (maxid[i] > max)) { max = maxid[i]; iseq = i; } } /* align this sequence to the existing alignment */ /* weight sequences with percent identity with profile*/ /* OR...., multiply sequence weights from tree by percent identity with new sequence */ if (no_weights == FALSE) { for (j = 0; j < nseqs; j++) if (aligned[j + 1] != 0) seq_weight[j] = tree_weight[j] * tmat[j + 1][iseq]; /* Normalise the weights, such that the sum of the weights = INT_SCALE_FACTOR */ sum = 0; for (j = 0; j < nseqs; j++) if (aligned[j + 1] != 0) sum += seq_weight[j]; if (sum == 0) { for (j = 0; j < nseqs; j++) seq_weight[j] = 1; sum = j; } for (j = 0; j < nseqs; j++) if (aligned[j + 1] != 0) { seq_weight[j] = (seq_weight[j] * INT_SCALE_FACTOR) / sum; if (seq_weight[j] < 1) seq_weight[j] = 1; } } entries = 0; for (j = 1; j <= nseqs; j++) if (aligned[j] != 0) { group[j] = 1; entries++; } else if (iseq == j) { group[j] = 2; entries++; } aligned[iseq] = 1; /* score = prfalign(group, aligned); */ /* Trying to use pdiff() version of prfalign() */ /* fprintf(stderr,"DEBUG: calling prfalign_mpi_pdiff()...\n"); fflush(stderr); */ score = prfalign_mpi_pdiff(group, aligned); /**** Kuobin's debugging codes start ********/ if (dset1 != NULL) { memset(dset1, 0, (nseqs + 1)); } else { dset1 = (int *) calloc((nseqs + 1), sizeof(int)); } if (dset2 != NULL) { memset(dset2, 0, (nseqs + 1)); } else { dset2 = (int *) calloc((nseqs + 1), sizeof(int)); } myidx = 0; myidx2 = 0; for (i = 1; i <= nseqs; i++) { if (group[i] == 1) { dset1[myidx] = i; myidx++; } else if (group[i] == 2) { dset2[myidx2] = i; myidx2++; } } /**** Kuobin's debugging codes end ********/ info("Sequence:%d Score:%d", (pint) iseq, (pint) score); /**** Kuobin's debugging codes start ********/ printf(" was aligning: ("); myidx = 0; myidx2 = 0; while (dset1[myidx]) { printf("%d ", dset1[myidx]); myidx++; } printf(") and ("); while (dset2[myidx2]) { printf("%d ", dset2[myidx2]); myidx2++; } printf(")"); /**** Kuobin's debugging codes end ********/ if (output_order == INPUT) { ++ix; output_index[iseq] = iseq; } else output_index[++ix] = iseq; } group = ckfree((void *) group); aligned = ckfree((void *) aligned); myentries = ckfree((void *) myentries); new2old = ckfree((void *) new2old); maxid = ckfree((void *) maxid); tree_weight = ckfree((void *) tree_weight); aln_score(); /* make the rest (output stuff) into routine clustal_out in file amenu.c */ /** Measuring wall time by MPI_Wtime() **/ wtime2 = MPI_Wtime(); fprintf(stderr, "\nDEBUG: malign time = %5.3f sec\n", wtime2 - wtime1); fflush(stderr); return (nseqs); } /* * This function will save the number of non-zero elements of each * sets[i] array in * sets[i][0], i = 0 to nseqs. * * In addition, set1[i] (i = 1 to nseqs) is a one dimensional array * storing the sequence number that has '1' in sets[i]; * similarly, set2[i] (i = 1 to nseqs) is a one dimensional array * storing the sequence number that has '2' in sets[i]. */ static void determine_parallelizable_sets(sint *** sets, int nseqs, int ***set1, int ***set2) { sint **tmp, **psets; int i, j; int myidx, myidx2; *set1 = (int **) calloc((nseqs + 1), sizeof(int *)); assert(*set1); for (i = 0; i <= nseqs; i++) { (*set1)[i] = (int *) calloc((nseqs + 1), sizeof(int)); assert((*set1)[i]); } *set2 = (int **) calloc((nseqs + 1), sizeof(int *)); assert(*set2); for (i = 0; i <= nseqs; i++) { (*set2)[i] = (int *) calloc((nseqs + 1), sizeof(int)); assert((*set2)[i]); } tmp = (sint **) calloc((nseqs + 1), sizeof(sint *)); assert(tmp); for (i = 0; i <= (nseqs); i++) { tmp[i] = (sint *) calloc((nseqs + 1), sizeof(sint)); assert(tmp[i]); } psets = *sets; /* To store the number of non-zero elements of each psets[i] into * psets[i][0]. */ for (i = 1; i <= (nseqs); i++) { myidx = 0; myidx2 = 0; for (j = 1; j <= (nseqs); j++) if (psets[i][j]) { tmp[i][0]++; tmp[i][j] = psets[i][j]; if (psets[i][j] == 1) { (*set1)[i][myidx] = j; myidx++; } else if (psets[i][j] == 2) { (*set2)[i][myidx2] = j; myidx2++; } } } for (i = 0; i <= (nseqs); i++) free(psets[i]); free(psets); *sets = tmp; return; } /* * Print the sequences involved in the current sets[which_set][]. */ static void kbl_debug_printsets(int nseqs, int which_set, int **set1, int **set2) { int i; printf(" was aligning: ("); for (i = 1; i <= set1[which_set][0]; i++) printf("%d ", set1[which_set][i]); printf(") and ("); for (i = 1; i <= set2[which_set][0]; i++) printf("%d ", set2[which_set][i]); printf(")"); return; } static int key_compare(const void *e1, const void *e2) { int v1, v2; v1 = *(int *) (*((int *) e1)); v2 = *(int *) (*((int *) e2)); return (v1 < v2) ? -1 : (v1 > v2) ? 1 : 0; } /* * Return 1 if sets[set] is safe to be executed by slave * MPI process; else return 0. */ static int verify_dependency(int set, int **set1, int **set2, sint ** sets, int nseqs, short *finished, int *new2old) { int i, j; int length1, length2; /* number of sequences in the first and the second set */ int flag1, flag2; flag1 = 0; flag2 = 0; length1 = set1[new2old[set]][0]; length2 = set2[new2old[set]][0]; if (length1 > 1) { for (i = 1; i <= nseqs; i++) { if (sets[i][0] == length1) { for (j = 1; j <= length1; j++) { if (sets[i][set1[new2old[set]][j]] == 0) break; } if (j == (length1 + 1)) if (finished[i]) { flag1 = 1; break; } } } } else flag1 = 1; if (length2 > 1) { for (i = 1; i <= nseqs; i++) { if (sets[i][0] == length2) { for (j = 1; j <= length2; j++) { if (sets[i][set2[new2old[set]][j]] == 0) break; } if (j == (length2 + 1)) if (finished[i]) { flag2 = 1; break; } } } } else flag2 = 1; if (flag1 * flag2) return 1; else return 0; } /* * Sort sets[1], sets[2], ... sets[nsets] according * to sets[1][0], sets[2][0], ... sets[nsets][0]. * * Note: b[i]=j means, in the sorted array, the ith * element corresponds to the jth element in * the un-sorted array. */ static void myqsort(int **a, int lo, int hi, int *b) { int i, j; int x; int *tmp; int t; i = lo; j = hi; /* the middle element */ x= a[(lo+hi)/2][0]; do { while (a[i][0] < x) i++; while (a[j][0] > x) j--; if (i <= j) { /* * remember the exchange */ t=b[i]; b[i]=b[j]; b[j]=t; /* exchange a[i] and a[j] */ tmp = a[i]; a[i] = a[j]; a[j] = tmp; i++; j--; } } while (i <= j); if (lo < j) myqsort(a, lo, j, b); if (i < hi) myqsort(a, i, hi, b); return; } clustalw-mpi-0.15/pairalign_new.c0000644000411000001440000005263510351446520015372 0ustar liusers/* Change int h to int gh everywhere DES June 1994 */ #include #include #include #include #include #include "clustalw.h" #include "mpi.h" #define MIN(a,b) ((a)<(b)?(a):(b)) #define MAX(a,b) ((a)>(b)?(a):(b)) #define gap(k) ((k) <= 0 ? 0 : g + gh * (k)) #define tbgap(k) ((k) <= 0 ? 0 : tb + gh * (k)) #define tegap(k) ((k) <= 0 ? 0 : te + gh * (k)) /* * Global variables */ #ifdef MAC #define pwint short #else #define pwint int #endif static sint int_scale; extern double **tmat; extern float pw_go_penalty; extern float pw_ge_penalty; extern float transition_weight; extern sint nseqs; extern sint max_aa; extern sint gap_pos1, gap_pos2; extern sint max_aln_length; extern sint *seqlen_array; extern sint debug; extern sint mat_avscore; extern short blosum30mt[], pam350mt[], idmat[], pw_usermat[], pw_userdnamat[]; extern short clustalvdnamt[], swgapdnamt[]; extern short gon250mt[]; extern short def_dna_xref[], def_aa_xref[], pw_dna_xref[], pw_aa_xref[]; extern Boolean dnaflag; extern char **seq_array; extern char *amino_acid_codes; extern char pw_mtrxname[]; extern char pw_dnamtrxname[]; static float mm_score; static sint print_ptr, last_print; static sint *displ; static pwint *HH, *DD, *RR, *SS; static sint g, gh; static sint seq1, seq2,rseq1, rseq2; static sint matrix[NUMRES][NUMRES]; static pwint maxscore; static sint sb1, sb2, se1, se2; /* * Prototypes */ static void add(sint v); static sint calc_score(sint iat, sint jat, sint v1, sint v2); static float tracepath(sint tsb1, sint tsb2); static void forward_pass(char *ia, char *ib, sint n, sint m); static void reverse_pass(char *ia, char *ib); static sint diff(sint A, sint B, sint M, sint N, sint tb, sint te); static void del(sint k); sint pairalign(sint istart, sint iend, sint jstart, sint jend) { short *mat_xref; static sint si, sj, i,j; static sint n, m, len1, len2, rlen1, rlen2; static sint maxres; static short *matptr; static char c; static float gscale, ghscale; int pidx,pdest; short *xarray,*yarray; double *tarray; int slen; int num_siarray; int ave_load; MPI_Status status; int np,work,position,idx1,idx2; char *mpi_buffer; /* My Buffer SIZE */ char *mystr; int mystrlen,mybsize,myres_size; double wtime1, wtime2; double wtime99, wtime100; displ = (sint *) ckalloc((2 * max_aln_length + 1) * sizeof(sint)); HH = (pwint *) ckalloc((max_aln_length) * sizeof(pwint)); DD = (pwint *) ckalloc((max_aln_length) * sizeof(pwint)); RR = (pwint *) ckalloc((max_aln_length) * sizeof(pwint)); SS = (pwint *) ckalloc((max_aln_length) * sizeof(pwint)); #ifdef MAC int_scale = 10; #else int_scale = 100; #endif gscale = ghscale = 1.0; if (dnaflag) { if (debug > 1) fprintf(stdout, "matrix %s\n", pw_dnamtrxname); if (strcmp(pw_dnamtrxname, "iub") == 0) { matptr = swgapdnamt; mat_xref = def_dna_xref; } else if (strcmp(pw_dnamtrxname, "clustalw") == 0) { matptr = clustalvdnamt; mat_xref = def_dna_xref; gscale = 0.6667; ghscale = 0.751; } else { matptr = pw_userdnamat; mat_xref = pw_dna_xref; } maxres = get_matrix(matptr, mat_xref, matrix, TRUE, int_scale); if (maxres == 0) return ((sint) - 1); matrix[0][4] = transition_weight * matrix[0][0]; matrix[4][0] = transition_weight * matrix[0][0]; matrix[2][11] = transition_weight * matrix[0][0]; matrix[11][2] = transition_weight * matrix[0][0]; matrix[2][12] = transition_weight * matrix[0][0]; matrix[12][2] = transition_weight * matrix[0][0]; } else { if (debug > 1) fprintf(stdout, "matrix %s\n", pw_mtrxname); if (strcmp(pw_mtrxname, "blosum") == 0) { matptr = blosum30mt; mat_xref = def_aa_xref; } else if (strcmp(pw_mtrxname, "pam") == 0) { matptr = pam350mt; mat_xref = def_aa_xref; } else if (strcmp(pw_mtrxname, "gonnet") == 0) { matptr = gon250mt; int_scale /= 10; mat_xref = def_aa_xref; } else if (strcmp(pw_mtrxname, "id") == 0) { matptr = idmat; mat_xref = def_aa_xref; } else { matptr = pw_usermat; mat_xref = pw_aa_xref; } maxres = get_matrix(matptr, mat_xref, matrix, TRUE, int_scale); if (maxres == 0) return ((sint) - 1); } /* * Determine the number of available MPI processes */ MPI_Comm_size(MPI_COMM_WORLD, &np); wtime1 = MPI_Wtime(); if (np < 2) { /* do pairwise alignment sequentially ... */ for (si = MAX(0, istart); si < nseqs && si < iend; si++) { n = seqlen_array[si + 1]; len1 = 0; for (i = 1; i <= n; i++) { c = seq_array[si + 1][i]; if ((c != gap_pos1) && (c != gap_pos2)) len1++; } for (sj = MAX(si + 1, jstart + 1); sj < nseqs && sj < jend; sj++) { m = seqlen_array[sj + 1]; if (n == 0 || m == 0) { tmat[si + 1][sj + 1] = 1.0; tmat[sj + 1][si + 1] = 1.0; continue; } len2 = 0; for (i = 1; i <= m; i++) { c = seq_array[sj + 1][i]; if ((c != gap_pos1) && (c != gap_pos2)) len2++; } if (dnaflag) { g = 2 * (float) pw_go_penalty *int_scale * gscale; gh = pw_ge_penalty * int_scale * ghscale; } else { if (mat_avscore <= 0) g = 2 * (float) (pw_go_penalty + log((double) (MIN(n, m)))) * int_scale; else g = 2 * mat_avscore * (float) (pw_go_penalty + log((double) (MIN(n, m)))) * gscale; gh = pw_ge_penalty * int_scale; } if (debug > 1) fprintf(stdout, "go %d ge %d\n", (pint) g, (pint) gh); /* align the sequences */ seq1 = si + 1; seq2 = sj + 1; forward_pass(&seq_array[seq1][0], &seq_array[seq2][0], n, m); reverse_pass(&seq_array[seq1][0], &seq_array[seq2][0]); last_print = 0; print_ptr = 1; /* sb1 = sb2 = 1; se1 = n-1; se2 = m-1; */ /* use Myers and Miller to align two sequences */ maxscore = diff(sb1 - 1, sb2 - 1, se1 - sb1 + 1, se2 - sb2 + 1, (sint) 0, (sint) 0); /* calculate percentage residue identity */ mm_score = tracepath(sb1, sb2); if (len1 == 0 || len2 == 0) mm_score = 0; else mm_score /= (float) MIN(len1, len2); tmat[si + 1][sj + 1] = ((float) 100.0 - mm_score) / (float) 100.0; tmat[sj + 1][si + 1] = ((float) 100.0 - mm_score) / (float) 100.0; if (debug > 1) { fprintf(stdout, "Sequences (%d:%d) Aligned1. Score: %d CompScore: %d\n", (pint) si + 1, (pint) sj + 1, (pint) mm_score, (pint) maxscore / (MIN(len1, len2) * 100)); } else { info("Sequences (%d:%d) Aligned. Score: %d", (pint) si + 1, (pint) sj + 1, (pint) mm_score); } } } } else { /* do pairwise alignment parallelly ... */ /* Frist, sending all sequences to slaves ... */ /******* Note: Dec 18, 2005 ******************** * If the number of slaves is greater than N*(N-1)/2, * we will MPI_Send the sequence data to N*(N-1) slaves only. * This is to prevent the idled slaves not being able to get out of * their while(1) loop, see "parallel_compare.c". ***********************************************/ mybsize = 0; mybsize += (9+(nseqs+1)+(NUMRES)*(NUMRES))*sizeof(int) + 26*sizeof(char); mybsize += sizeof(char)+2*sizeof(int)+4*sizeof(float); for (i=1;i<=nseqs;i++) mybsize += (seqlen_array[i]+1); if ((nseqs*(nseqs-1)/2)>=(np-1)) { for (pdest = 1; pdest < np ; pdest++ ) MPI_Send(&mybsize, 1, MPI_INT, pdest, PAIRWISE_TAG, MPI_COMM_WORLD); } else { for (pdest = 1; pdest <= (nseqs*(nseqs-1)/2) ; pdest++ ) MPI_Send(&mybsize, 1, MPI_INT, pdest, PAIRWISE_TAG, MPI_COMM_WORLD); } mpi_buffer = (char *)malloc(mybsize*sizeof(char)); assert(mpi_buffer); assert(mpi_buffer); position = 0; MPI_Pack(&nseqs, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&istart, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&iend, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&jstart, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&jend, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&dnaflag, 1, MPI_CHAR, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&pw_go_penalty, 1, MPI_FLOAT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&pw_ge_penalty, 1, MPI_FLOAT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&int_scale, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&gscale, 1, MPI_FLOAT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&ghscale, 1, MPI_FLOAT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&mat_avscore, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(seqlen_array, (nseqs+1), MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); for (i=1;i<=nseqs;i++) MPI_Pack(seq_array[i], seqlen_array[i]+1, MPI_CHAR, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(amino_acid_codes, 26, MPI_CHAR, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&max_aa, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&gap_pos1, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&gap_pos2, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&max_aln_length, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); for (i=0;i=(np-1)) { for (pdest = 1; pdest < np ; pdest++ ) MPI_Send(mpi_buffer, mybsize, MPI_PACKED, pdest, MY_DATA_TAG, MPI_COMM_WORLD); } else { for (pdest = 1; pdest <= (nseqs*(nseqs-1)/2); pdest++ ) MPI_Send(mpi_buffer, mybsize, MPI_PACKED, pdest, MY_DATA_TAG, MPI_COMM_WORLD); } free(mpi_buffer); wtime100 = MPI_Wtime(); fprintf(stderr,"DEBUG: it takes %8.6f sec to send data to all slaves.\n", wtime100-wtime99); fflush(stderr); /* OK, now wait until all slaves have finished ... */ #ifndef STATIC_SCHEDULING_PAIRALIGN /* the default is to use dynamic scheduling */ num_siarray=0; for (si = MAX(0, istart); si < nseqs && si < iend; si++) { for (sj = MAX(si + 1, jstart + 1); sj < nseqs && sj < jend; sj++) { num_siarray++; } } MPI_Comm_size(MPI_COMM_WORLD, &np); ave_load = (num_siarray/(np-1))/PAIRALIGN_NCHUNK; if (ave_load==0) ave_load = 1; pdest = 0; work = 1; while(pdest*ave_load < num_siarray) { if (work1){ MPI_Recv(&myres_size, 1, MPI_INT, MPI_ANY_SOURCE, MY_BSIZE_TAG, MPI_COMM_WORLD, &status); mpi_buffer=(char *)malloc(myres_size*sizeof(char)); assert(mpi_buffer); MPI_Recv(mpi_buffer, myres_size, MPI_PACKED, status.MPI_SOURCE, MY_RESULT_TAG, MPI_COMM_WORLD, &status); position=0; MPI_Unpack(mpi_buffer, myres_size, &position, &slen, 1, MPI_INT, MPI_COMM_WORLD); xarray = (short *)calloc(slen,sizeof(short)); assert(xarray); yarray = (short *)calloc(slen,sizeof(short)); assert(yarray); tarray = (double *)calloc(slen,sizeof(double)); assert(tarray); MPI_Unpack(mpi_buffer, myres_size, &position, xarray, slen, MPI_SHORT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, myres_size, &position, yarray, slen, MPI_SHORT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, myres_size, &position, tarray, slen, MPI_DOUBLE, MPI_COMM_WORLD); for (i=0;i 1) fprintf(stdout, "%d ", (pint) displ[i]); if (displ[i] == 0) { c1 = seq_array[seq1][i1]; c2 = seq_array[seq2][i2]; if (debug > 0) { if (c1 > max_aa) s1[pos] = '-'; else s1[pos] = amino_acid_codes[c1]; if (c2 > max_aa) s2[pos] = '-'; else s2[pos] = amino_acid_codes[c2]; } if ((c1 != gap_pos1) && (c1 != gap_pos2) && (c1 == c2)) count++; ++i1; ++i2; ++pos; } else { if ((k = displ[i]) > 0) { if (debug > 0) for (r = 0; r < k; r++) { s1[pos + r] = '-'; if (seq_array[seq2][i2 + r] > max_aa) s2[pos + r] = '-'; else s2[pos + r] = amino_acid_codes[seq_array[seq2][i2 + r]]; } i2 += k; pos += k; } else { if (debug > 0) for (r = 0; r < (-k); r++) { s2[pos + r] = '-'; if (seq_array[seq1][i1 + r] > max_aa) s1[pos + r] = '-'; else s1[pos + r] = amino_acid_codes[seq_array[seq1][i1 + r]]; } i1 -= k; pos -= k; } } } if (debug > 0) fprintf(stdout, "\n"); if (debug > 0) { for (i = 0; i < pos; i++) fprintf(stdout, "%c", s1[i]); fprintf(stdout, "\n"); for (i = 0; i < pos; i++) fprintf(stdout, "%c", s2[i]); fprintf(stdout, "\n"); } /* if (count <= 0) count = 1; */ score = 100.0 * (float) count; return (score); } static void forward_pass(char *ia, char *ib, sint n, sint m) { sint i, j; pwint f, hh, p, t; maxscore = 0; se1 = se2 = 0; for (i = 0; i <= m; i++) { HH[i] = 0; DD[i] = -g; } for (i = 1; i <= n; i++) { hh = p = 0; f = -g; for (j = 1; j <= m; j++) { f -= gh; t = hh - g - gh; if (f < t) f = t; DD[j] -= gh; t = HH[j] - g - gh; if (DD[j] < t) DD[j] = t; hh = p + matrix[(int) ia[i]][(int) ib[j]]; if (hh < f) hh = f; if (hh < DD[j]) hh = DD[j]; if (hh < 0) hh = 0; p = HH[j]; HH[j] = hh; if (hh > maxscore) { maxscore = hh; se1 = i; se2 = j; } } } } static void reverse_pass(char *ia, char *ib) { sint i, j; pwint f, hh, p, t; pwint cost; cost = 0; sb1 = sb2 = 1; for (i = se2; i > 0; i--) { HH[i] = -1; DD[i] = -1; } for (i = se1; i > 0; i--) { hh = f = -1; if (i == se1) p = 0; else p = -1; for (j = se2; j > 0; j--) { f -= gh; t = hh - g - gh; if (f < t) f = t; DD[j] -= gh; t = HH[j] - g - gh; if (DD[j] < t) DD[j] = t; hh = p + matrix[(int) ia[i]][(int) ib[j]]; if (hh < f) hh = f; if (hh < DD[j]) hh = DD[j]; p = HH[j]; HH[j] = hh; if (hh > cost) { cost = hh; sb1 = i; sb2 = j; if (cost >= maxscore) break; } } if (cost >= maxscore) break; } } static int diff(sint A, sint B, sint M, sint N, sint tb, sint te) { sint type; sint midi, midj, i, j; int midh; static pwint f, hh, e, s, t; if (N <= 0) { if (M > 0) { del(M); } return (-(int) tbgap(M)); } if (M <= 1) { if (M <= 0) { add(N); return (-(int) tbgap(N)); } midh = -(tb + gh) - tegap(N); hh = -(te + gh) - tbgap(N); if (hh > midh) midh = hh; midj = 0; for (j = 1; j <= N; j++) { hh = calc_score(1, j, A, B) - tegap(N - j) - tbgap(j - 1); if (hh > midh) { midh = hh; midj = j; } } if (midj == 0) { del(1); add(N); } else { if (midj > 1) add(midj - 1); displ[print_ptr++] = last_print = 0; if (midj < N) add(N - midj); } return midh; } /* Divide: Find optimum midpoint (midi,midj) of cost midh */ midi = M / 2; HH[0] = 0.0; t = -tb; for (j = 1; j <= N; j++) { HH[j] = t = t - gh; DD[j] = t - g; } t = -tb; for (i = 1; i <= midi; i++) { s = HH[0]; HH[0] = hh = t = t - gh; f = t - g; for (j = 1; j <= N; j++) { if ((hh = hh - g - gh) > (f = f - gh)) f = hh; if ((hh = HH[j] - g - gh) > (e = DD[j] - gh)) e = hh; hh = s + calc_score(i, j, A, B); if (f > hh) hh = f; if (e > hh) hh = e; s = HH[j]; HH[j] = hh; DD[j] = e; } } DD[0] = HH[0]; RR[N] = 0; t = -te; for (j = N - 1; j >= 0; j--) { RR[j] = t = t - gh; SS[j] = t - g; } t = -te; for (i = M - 1; i >= midi; i--) { s = RR[N]; RR[N] = hh = t = t - gh; f = t - g; for (j = N - 1; j >= 0; j--) { if ((hh = hh - g - gh) > (f = f - gh)) f = hh; if ((hh = RR[j] - g - gh) > (e = SS[j] - gh)) e = hh; hh = s + calc_score(i + 1, j + 1, A, B); if (f > hh) hh = f; if (e > hh) hh = e; s = RR[j]; RR[j] = hh; SS[j] = e; } } SS[N] = RR[N]; midh = HH[0] + RR[0]; midj = 0; type = 1; for (j = 0; j <= N; j++) { hh = HH[j] + RR[j]; if (hh >= midh) if (hh > midh || (HH[j] != DD[j] && RR[j] == SS[j])) { midh = hh; midj = j; } } for (j = N; j >= 0; j--) { hh = DD[j] + SS[j] + g; if (hh > midh) { midh = hh; midj = j; type = 2; } } /* Conquer recursively around midpoint */ if (type == 1) { /* Type 1 gaps */ diff(A, B, midi, midj, tb, g); diff(A + midi, B + midj, M - midi, N - midj, g, te); } else { diff(A, B, midi - 1, midj, tb, 0.0); del(2); diff(A + midi + 1, B + midj, M - midi - 1, N - midj, 0.0, te); } return midh; /* Return the score of the best alignment */ } static void del(sint k) { if (last_print < 0) last_print = displ[print_ptr - 1] -= k; else last_print = displ[print_ptr++] = -(k); } clustalw-mpi-0.15/parallel_compare.c0000644000411000001440000021706607673730674016101 0ustar liusers#include #include #include #include #include #include #include "mpi.h" #include "clustalw.h" /* * Prototypes */ static void add(sint v); static sint calc_score(sint iat, sint jat, sint v1, sint v2); static float tracepath(sint tsb1, sint tsb2); static void forward_pass(char *ia, char *ib, sint n, sint m); static void reverse_pass(char *ia, char *ib); static sint diff(sint A, sint B, sint M, sint N, sint tb, sint te); static void del(sint k); static lint pdiff(sint A, sint B, sint i, sint j, sint go1, sint go2); static lint prfscore(sint n, sint m); static sint gap_penalty1(sint i, sint j, sint k); static sint open_penalty1(sint i, sint j); static sint ext_penalty1(sint i, sint j); static sint gap_penalty2(sint i, sint j, sint k); static sint open_penalty2(sint i, sint j); static sint ext_penalty2(sint i, sint j); static void padd(sint k); static void pdel(sint k); static void palign(void); static void my_padd(int *pa, int arg, int *pidx); static void my_pdel(int *pa, int arg, int *pidx); static void my_palign(int *pa, int *pidx); static void mypairwise(int si, int sj, Boolean dnaflag, float pw_go_penalty, float pw_ge_penalty, int int_scale, float gscale,float ghscale, int mat_avscore, short *xarrary, short *yarray, double *tarray); static void mpi_njtree_slave(int, int); #define MIN(a,b) ((a)<(b)?(a):(b)) #define MAX(a,b) ((a)>(b)?(a):(b)) #define gap(k) ((k) <= 0 ? 0 : g + gh * (k)) #define tbgap(k) ((k) <= 0 ? 0 : tb + gh * (k)) #define tegap(k) ((k) <= 0 ? 0 : te + gh * (k)) /* * Global variables */ #define pwint int static sint int_scale; extern sint nseqs; extern sint max_aa; extern sint max_aln_length; extern sint gap_pos1, gap_pos2; extern sint debug; extern sint *seqlen_array; extern char **seq_array; extern char *amino_acid_codes; /* for show_pair() */ extern sint dna_ktup, dna_window, dna_wind_gap, dna_signif; extern sint prot_ktup,prot_window,prot_wind_gap,prot_signif; extern Boolean dnaflag; static sint next; static sint curr_frag,maxsf,vatend; static sint **accum; static sint *diag_index; static char *slopes; sint ktup,window,wind_gap,signif; sint *zza, *zzb, *zzc, *zzd; extern Boolean percent; static void make_p_ptrs(sint *tptr, sint *pl, sint naseq, sint l); static void make_n_ptrs(sint *tptr, sint *pl, sint naseq, sint len); static void put_frag(sint fs, sint v1, sint v2, sint flen); static sint frag_rel_pos(sint a1, sint b1, sint a2, sint b2); static void des_quick_sort(sint *array1, sint *array2, sint array_size); static void pair_align(sint seq_no, sint l1, sint l2); static void mpi_show_pair_slave(int length, int from); static float mm_score; static sint print_ptr, last_print; static sint *displ; static pwint *HH, *DD, *RR, *SS; static sint g, gh; static int seq1, seq2; static sint matrix[NUMRES][NUMRES]; static pwint maxscore; static sint sb1, sb2, se1, se2; static char **alignment; static sint **profile1, **profile2; static lint *gS; static sint prf_length1, prf_length2; static Boolean endgappenalties; static int pidx = 0; static int *pa = NULL; /* array holding the calling sequence of palign(), padd(x) and pdel(y) */ static int *avail_p; static int len_of_xytarray=0; static MPI_Request send_req; static MPI_Request send_req2; void pforward_pass(int midi, int t, int tl, int A, int B, int N, pwint * HH, pwint * DD); void preverse_pass(int midi, int A, int B, int M, int N, int go2, pwint * RR, pwint * SS, lint * gS); void stupid(int a, int b, int c, int d, sint go1, sint go2, lint *pscore, sint *pprint_ptr, sint *plast_print, lint **pdispl, int pmax_aln_length, sint pmax_aa, sint pgap_pos1, sint pgap_pos2, sint **pprofile1, sint **pprofile2, Boolean pendgappenalties, sint pprf_length1, sint pprf_length2, int preverse_rank); void parallel_compare() { char *mpi_buffer; int i, position, mystrlen, idx1, idx2, len1, len2; int istart,iend,jstart,jend; int si,sj,ave_load,npair; short *xarray,*yarray; double *tarray; int slen; int ntimes; int num_siarray; int mybsize; char *mystr; MPI_Status status; static lint score; int arg1, arg2, arg3, arg4, arg5, arg6, origin; int A, B, N, M, go2, midi; int my_rank, np; int navailp; /* number of avaialble processes */ int go1; int *group; int which_set; int preverse_rank; float pw_go_penalty,pw_ge_penalty,gscale,ghscale; int int_scale,mat_avscore; double wtime1, wtime2; long long totallength, totalsquare, total_2_term, bload; long long length1, length2; long long tmp1; int len_siarray, which_interval; int *siarray,*sjarray; mainloop: MPI_Recv(&mybsize, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); switch (status.MPI_TAG) { case PAIRWISE_TAG: goto pairwise_task; break; case MALIGN_TAG: goto malign_task; break; case CALLING_STUPID: goto calling_stupid; break; case DOING_NJTREE: goto doing_njtree; break; case DOING_SHOW_PAIR: goto doing_show_pair; break; case PREVERSE_TAG: goto preverse_task; break; case MY_ENDING_TAG: MPI_Finalize(); return; break; default: fprintf(stderr, "Wrong task submitting.\n"); exit(1); break; } pairwise_task: /* receive from the process whose rank is equal to '0' */ mpi_buffer = (char *) malloc(mybsize * sizeof(char)); assert(mpi_buffer); MPI_Recv(mpi_buffer, mybsize, MPI_PACKED, 0, MY_DATA_TAG, MPI_COMM_WORLD, &status); position = 0; MPI_Unpack(mpi_buffer, mybsize, &position, &nseqs, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &istart, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &iend, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &jstart, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &jend, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &dnaflag, 1, MPI_CHAR, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &pw_go_penalty, 1, MPI_FLOAT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &pw_ge_penalty, 1, MPI_FLOAT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &int_scale, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &gscale, 1, MPI_FLOAT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &ghscale, 1, MPI_FLOAT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &mat_avscore, 1, MPI_INT, MPI_COMM_WORLD); seqlen_array = (sint *) malloc((nseqs + 1) * sizeof(sint)); assert(seqlen_array); MPI_Unpack(mpi_buffer, mybsize, &position, seqlen_array, (nseqs + 1), MPI_INT, MPI_COMM_WORLD); seq_array = (char **) malloc((nseqs + 1) * sizeof(char *)); assert(seq_array); for (i = 1; i <= nseqs; i++) { *(seq_array + i) = malloc((seqlen_array[i] + 1) * sizeof(char)); assert(*(seq_array + i)); MPI_Unpack(mpi_buffer, mybsize, &position, seq_array[i], (seqlen_array[i] + 1), MPI_CHAR, MPI_COMM_WORLD); } amino_acid_codes = (char *) malloc(26 * sizeof(char)); assert(amino_acid_codes); MPI_Unpack(mpi_buffer, mybsize, &position, amino_acid_codes, 26, MPI_CHAR, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &max_aa, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &gap_pos1, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &gap_pos2, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &max_aln_length, 1, MPI_INT, MPI_COMM_WORLD); for (i = 0; i < NUMRES; i++) MPI_Unpack(mpi_buffer, mybsize, &position, *(matrix + i), NUMRES, MPI_INT, MPI_COMM_WORLD); free(mpi_buffer); /************** do computation here *******************/ displ = (sint *) ckalloc((2 * max_aln_length + 1) * sizeof(sint)); HH = (pwint *) ckalloc((max_aln_length) * sizeof(pwint)); DD = (pwint *) ckalloc((max_aln_length) * sizeof(pwint)); RR = (pwint *) ckalloc((max_aln_length) * sizeof(pwint)); SS = (pwint *) ckalloc((max_aln_length) * sizeof(pwint)); #ifndef STATIC_SCHEDULING_PAIRALIGN /* the default is to use DYNAMIC_SCHEDULING_PARIALIGN */ len_siarray = nseqs*(nseqs-1)/2+1; siarray = (int *)calloc(len_siarray, sizeof(int)); assert(siarray); sjarray = (int *)calloc(len_siarray, sizeof(int)); assert(sjarray); num_siarray=0; for (si = MAX(0, istart); si < nseqs && si < iend; si++) { for (sj = MAX(si + 1, jstart + 1); sj < nseqs && sj < jend; sj++) { siarray[num_siarray]=si; sjarray[num_siarray]=sj; num_siarray++; } } MPI_Comm_size(MPI_COMM_WORLD, &np); MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); ave_load = (num_siarray/(np-1))/PAIRALIGN_NCHUNK; if (ave_load==0) ave_load = 1; xarray = (short *)calloc(ave_load,sizeof(short)); assert(xarray); yarray = (short *)calloc(ave_load,sizeof(short)); assert(yarray); tarray = (double *)calloc(ave_load,sizeof(double)); assert(tarray); ntimes = 0; while (1) { MPI_Recv(&which_interval, 1, MPI_INT, 0, WHICH_INTERVAL, MPI_COMM_WORLD, &status); if (which_interval<0) { /* we have finished our task */ break; } else { /* do real work here */ for (i=which_interval*ave_load ; i<(which_interval+1)*ave_load && (i=((long long)my_rank-1LL)*bload) && (length1<((long long)my_rank*bload))) { /* do pairwise alignment (si:sj) */ mypairwise(si,sj,dnaflag,pw_go_penalty,pw_ge_penalty,int_scale, gscale,ghscale,mat_avscore,xarray,yarray,tarray); } } else { /* this is the last MPI process */ if (length1>=((long long)my_rank-1LL)*bload) { /* do pairwise alignment (si:sj) */ mypairwise(si,sj,dnaflag,pw_go_penalty,pw_ge_penalty,int_scale, gscale,ghscale,mat_avscore,xarray,yarray,tarray); } } } } mybsize = (ave_load)*(sizeof(short)*2+sizeof(double)) + sizeof(int); MPI_Send(&mybsize, 1, MPI_INT, 0, MY_BSIZE_TAG, MPI_COMM_WORLD); mpi_buffer = (char *) malloc(mybsize * sizeof(char)); assert(mpi_buffer); position = 0; slen = ave_load; MPI_Pack(&slen, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(xarray, ave_load, MPI_SHORT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(yarray, ave_load, MPI_SHORT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(tarray, ave_load, MPI_DOUBLE, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Send(mpi_buffer, mybsize, MPI_PACKED, 0, MY_RESULT_TAG, MPI_COMM_WORLD); free(mpi_buffer); #endif /**************** house cleaning **************/ free(seqlen_array); for (i = 1; i <= nseqs; i++) free(*(seq_array + i)); free(seq_array); free(amino_acid_codes); free(xarray); free(yarray); free(tarray); displ = ckfree((void *) displ); HH = ckfree((void *) HH); DD = ckfree((void *) DD); RR = ckfree((void *) RR); SS = ckfree((void *) SS); goto mainloop; /************************************************************* * Start of parallel job No.2 ..... ************************************************************/ calling_stupid: /* * Doing MPI_Recv stuff here, pass the received data * to stupid() as regular function arguments. */ /* unpack data */ mpi_buffer = (char *) malloc(mybsize * sizeof(char)); assert(mpi_buffer); MPI_Recv(mpi_buffer, mybsize, MPI_PACKED, 0, MY_DATA_TAG, MPI_COMM_WORLD, &status); position = 0; MPI_Unpack(mpi_buffer, mybsize, &position, &nseqs, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &which_set, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &sb1, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &sb2, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &se1, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &se2, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &go1, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &go2, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &last_print, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &print_ptr, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &max_aln_length, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &max_aa, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &gap_pos1, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &gap_pos2, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &endgappenalties, 1, MPI_CHAR, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &prf_length1, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &prf_length2, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &preverse_rank, 1, MPI_INT, MPI_COMM_WORLD); profile1 = (sint **) ckalloc((prf_length1 + 2) * sizeof(sint *)); for (i = 0; i < prf_length1 + 2; i++) profile1[i] = (sint *) ckalloc((LENCOL + 2) * sizeof(sint)); profile2 = (sint **) ckalloc((prf_length2 + 2) * sizeof(sint *)); for (i = 0; i < prf_length2 + 2; i++) profile2[i] = (sint *) ckalloc((LENCOL + 2) * sizeof(sint)); for (i = 0; i < prf_length1 + 2; i++) MPI_Unpack(mpi_buffer, mybsize, &position, profile1[i], (LENCOL+2), MPI_INT, MPI_COMM_WORLD); for (i = 0; i < prf_length2 + 2; i++) MPI_Unpack(mpi_buffer, mybsize, &position, profile2[i], (LENCOL+2), MPI_INT, MPI_COMM_WORLD); group = (int *) ckalloc((nseqs+1)*sizeof(int)); MPI_Unpack(mpi_buffer, mybsize, &position, group, (nseqs+1), MPI_INT, MPI_COMM_WORLD); free(mpi_buffer); /* * Main work is here .............. */ stupid(sb1, sb2, se1 - sb1, se2 - sb2, go1, go2, &score, &print_ptr, &last_print, &displ, max_aln_length,max_aa, gap_pos1,gap_pos2, profile1, profile2,endgappenalties, prf_length1,prf_length2, preverse_rank); for (i = 0; i < prf_length1 + 2; i++) profile1[i] = ckfree((void *) profile1[i]); profile1 = ckfree((void *) profile1); for (i = 0; i < prf_length2 + 2; i++) profile2[i] = ckfree((void *) profile2[i]); profile2 = ckfree((void *) profile2); /* * Doing MPI_Send stuff here. * Data comes from stupid() as regular function arguments. */ mybsize = 0; mybsize += 8*sizeof(int) + (max_aln_length + 1)*sizeof(int); mybsize += (nseqs+1)*sizeof(int); MPI_Send(&mybsize, 1, MPI_INT, 0, MY_BSIZE_TAG, MPI_COMM_WORLD); mpi_buffer = (char *) malloc(mybsize * sizeof(char)); assert(mpi_buffer); position = 0; MPI_Pack(&score, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&print_ptr, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&last_print, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&prf_length1, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&prf_length2, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&max_aln_length, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&which_set, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&preverse_rank, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(displ, (max_aln_length+1), MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(group, (nseqs+1), MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Send(mpi_buffer, mybsize, MPI_PACKED, 0, MY_RESULT_TAG, MPI_COMM_WORLD); free(mpi_buffer); free(group); free(displ); goto mainloop; doing_njtree: mpi_njtree_slave(mybsize, status.MPI_SOURCE); goto mainloop; doing_show_pair: mpi_show_pair_slave(mybsize, status.MPI_SOURCE); goto mainloop; malign_task: /* receive from the upstream process */ MPI_Comm_size(MPI_COMM_WORLD, &np); /* MPI_Recv(&mybsize, 1, MPI_INT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &status); */ /* check the tag */ /* if (status.MPI_TAG == MY_ENDING_TAG) { MPI_Finalize(); return; } */ /* unpack data */ mpi_buffer = (char *) malloc(mybsize * sizeof(char)); assert(mpi_buffer); origin = status.MPI_SOURCE; MPI_Recv(mpi_buffer, mybsize, MPI_PACKED, origin, MY_DATA_TAG, MPI_COMM_WORLD, &status); position = 0; MPI_Unpack(mpi_buffer, mybsize, &position, &prf_length1, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &prf_length2, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &gap_pos1, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &gap_pos2, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &arg1, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &arg2, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &arg3, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &arg4, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &arg5, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &arg6, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &endgappenalties, 1, MPI_CHAR, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &max_aa, 1, MPI_INT, MPI_COMM_WORLD); max_aln_length = prf_length1 + prf_length2 + 2; HH = (lint *) ckalloc((max_aln_length + 1) * sizeof(lint)); DD = (lint *) ckalloc((max_aln_length + 1) * sizeof(lint)); RR = (lint *) ckalloc((max_aln_length + 1) * sizeof(lint)); SS = (lint *) ckalloc((max_aln_length + 1) * sizeof(lint)); gS = (lint *) ckalloc((max_aln_length + 1) * sizeof(lint)); profile1 = (sint **) ckalloc((prf_length1 + 2) * sizeof(sint *)); for (i = 0; i < prf_length1 + 2; i++) profile1[i] = (sint *) ckalloc((LENCOL + 2) * sizeof(sint)); profile2 = (sint **) ckalloc((prf_length2 + 2) * sizeof(sint *)); for (i = 0; i < prf_length2 + 2; i++) profile2[i] = (sint *) ckalloc((LENCOL + 2) * sizeof(sint)); for (i = arg1; i < arg1 + arg3 + 2; i++) MPI_Unpack(mpi_buffer, mybsize, &position, profile1[i], (LENCOL + 2), MPI_INT, MPI_COMM_WORLD); for (i = arg2; i < arg2 + arg4 + 2; i++) MPI_Unpack(mpi_buffer, mybsize, &position, profile2[i], (LENCOL + 2), MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &navailp, 1, MPI_INT, MPI_COMM_WORLD); avail_p = (int *) malloc((navailp + 1) * sizeof(int)); assert(avail_p); MPI_Unpack(mpi_buffer, mybsize, &position, avail_p, (navailp + 1), MPI_INT, MPI_COMM_WORLD); free(mpi_buffer); /************************************************************** * Allocate pa[] only if the subsequent pdiff() calls will be * executed on the SAME MPI process. * * We assume the argument to padd() and pdel() are always * positive integer!!! * pa[i] == 1: it means a call to palign() has been made * pa[i] == 2: it means a call to padd(x) has been made, * where x == pa[i+1] * pa[i] == 3: it means a call to pdel(x) has been made, * where x == pa[i+1] * pa[i] == 0: the end of this pa[] array. ***************************************************************/ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /* DEBUG: temporarily set the length to three times of max_aln_length */ pa = (int *) calloc(3 * max_aln_length, sizeof(int)); assert(pa); /* * Initialize pa[] by setting pidx to 0. This has to be * done for every instance of parallel_compare(). */ pidx = 0; score = pdiff(arg1, arg2, arg3, arg4, arg5, arg6); /* sending result back to "origin" */ mybsize = 0; mybsize += 1 * sizeof(int); mybsize += 3 * max_aln_length * sizeof(int); /* for pa[] */ MPI_Send(&mybsize, 1, MPI_INT, origin, MY_RESULT_TAG, MPI_COMM_WORLD); mpi_buffer = (char *) malloc(mybsize * sizeof(char)); assert(mpi_buffer); position = 0; MPI_Pack(pa, 3 * max_aln_length, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&score, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Send(mpi_buffer, mybsize, MPI_PACKED, origin, MY_RESULT_TAG, MPI_COMM_WORLD); free(mpi_buffer); HH = ckfree((void *) HH); DD = ckfree((void *) DD); RR = ckfree((void *) RR); SS = ckfree((void *) SS); gS = ckfree((void *) gS); for (i = 0; i < prf_length1 + 2; i++) profile1[i] = ckfree((void *) profile1[i]); for (i = 0; i < prf_length2 + 2; i++) profile2[i] = ckfree((void *) profile2[i]); profile1 = ckfree((void *) profile1); profile2 = ckfree((void *) profile2); free(avail_p); free(pa); goto mainloop; preverse_task: /* unpack data */ mpi_buffer = (char *) malloc(mybsize * sizeof(char)); assert(mpi_buffer); origin = status.MPI_SOURCE; MPI_Recv(mpi_buffer, mybsize, MPI_PACKED, origin, MY_DATA_TAG, MPI_COMM_WORLD, &status); position = 0; MPI_Unpack(mpi_buffer, mybsize, &position, &prf_length1, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &prf_length2, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &endgappenalties, 1, MPI_CHAR, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &max_aa, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &midi, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &go2, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &A, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &B, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &M, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &N, 1, MPI_INT, MPI_COMM_WORLD); max_aln_length = prf_length1 + prf_length2 + 2; RR = (lint *) ckalloc((max_aln_length + 1) * sizeof(lint)); SS = (lint *) ckalloc((max_aln_length + 1) * sizeof(lint)); gS = (lint *) ckalloc((max_aln_length + 1) * sizeof(lint)); profile1 = (sint **) ckalloc((prf_length1 + 2) * sizeof(sint *)); for (i = 0; i < prf_length1 + 2; i++) profile1[i] = (sint *) ckalloc((LENCOL + 2) * sizeof(sint)); profile2 = (sint **) ckalloc((prf_length2 + 2) * sizeof(sint *)); for (i = 0; i < prf_length2 + 2; i++) profile2[i] = (sint *) ckalloc((LENCOL + 2) * sizeof(sint)); for (i = A; i < A + M + 2; i++) MPI_Unpack(mpi_buffer, mybsize, &position, profile1[i], (LENCOL + 2), MPI_INT, MPI_COMM_WORLD); for (i = B; i < B + N + 2; i++) MPI_Unpack(mpi_buffer, mybsize, &position, profile2[i], (LENCOL + 2), MPI_INT, MPI_COMM_WORLD); free(mpi_buffer); preverse_pass(midi, A, B, M, N, go2, RR, SS, gS); /* sending back RR[], SS[] and gS[] */ mybsize = 0; mybsize += 3 * (max_aln_length + 1) * sizeof(int); MPI_Send(&mybsize, 1, MPI_INT, origin, MY_RESULT_TAG, MPI_COMM_WORLD); mpi_buffer = (char *) malloc(mybsize * sizeof(char)); assert(mpi_buffer); position = 0; MPI_Pack(RR, (max_aln_length + 1), MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(SS, (max_aln_length + 1), MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(gS, (max_aln_length + 1), MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Send(mpi_buffer, mybsize, MPI_PACKED, origin, MY_RESULT_TAG, MPI_COMM_WORLD); free(mpi_buffer); RR = ckfree((void *) RR); SS = ckfree((void *) SS); gS = ckfree((void *) gS); for (i = 0; i < prf_length1 + 2; i++) profile1[i] = ckfree((void *) profile1[i]); for (i = 0; i < prf_length2 + 2; i++) profile2[i] = ckfree((void *) profile2[i]); profile1 = ckfree((void *) profile1); profile2 = ckfree((void *) profile2); goto mainloop; return; } static void forward_pass(char *ia, char *ib, sint n, sint m) { sint i, j; pwint f, hh, p, t; maxscore = 0; se1 = se2 = 0; for (i = 0; i <= m; i++) { HH[i] = 0; DD[i] = -g; } for (i = 1; i <= n; i++) { hh = p = 0; f = -g; for (j = 1; j <= m; j++) { f -= gh; t = hh - g - gh; if (f < t) f = t; DD[j] -= gh; t = HH[j] - g - gh; if (DD[j] < t) DD[j] = t; hh = p + matrix[(int) ia[i]][(int) ib[j]]; if (hh < f) hh = f; if (hh < DD[j]) hh = DD[j]; if (hh < 0) hh = 0; p = HH[j]; HH[j] = hh; if (hh > maxscore) { maxscore = hh; se1 = i; se2 = j; } } } return; } static void reverse_pass(char *ia, char *ib) { sint i, j; pwint f, hh, p, t; pwint cost; cost = 0; sb1 = sb2 = 1; for (i = se2; i > 0; i--) { HH[i] = -1; DD[i] = -1; } for (i = se1; i > 0; i--) { hh = f = -1; if (i == se1) p = 0; else p = -1; for (j = se2; j > 0; j--) { f -= gh; t = hh - g - gh; if (f < t) f = t; DD[j] -= gh; t = HH[j] - g - gh; if (DD[j] < t) DD[j] = t; hh = p + matrix[(int) ia[i]][(int) ib[j]]; if (hh < f) hh = f; if (hh < DD[j]) hh = DD[j]; p = HH[j]; HH[j] = hh; if (hh > cost) { cost = hh; sb1 = i; sb2 = j; if (cost >= maxscore) break; } } if (cost >= maxscore) break; } return; } static int diff(sint A, sint B, sint M, sint N, sint tb, sint te) { sint type; sint midi, midj, i, j; int midh; static pwint f, hh, e, s, t; if (N <= 0) { if (M > 0) { del(M); } return (-(int) tbgap(M)); } if (M <= 1) { if (M <= 0) { add(N); return (-(int) tbgap(N)); } midh = -(tb + gh) - tegap(N); hh = -(te + gh) - tbgap(N); if (hh > midh) midh = hh; midj = 0; for (j = 1; j <= N; j++) { hh = calc_score(1, j, A, B) - tegap(N - j) - tbgap(j - 1); if (hh > midh) { midh = hh; midj = j; } } if (midj == 0) { del(1); add(N); } else { if (midj > 1) add(midj - 1); displ[print_ptr++] = last_print = 0; if (midj < N) add(N - midj); } return midh; } /* Divide: Find optimum midpoint (midi,midj) of cost midh */ midi = M / 2; HH[0] = 0.0; t = -tb; for (j = 1; j <= N; j++) { HH[j] = t = t - gh; DD[j] = t - g; } t = -tb; for (i = 1; i <= midi; i++) { s = HH[0]; HH[0] = hh = t = t - gh; f = t - g; for (j = 1; j <= N; j++) { if ((hh = hh - g - gh) > (f = f - gh)) f = hh; if ((hh = HH[j] - g - gh) > (e = DD[j] - gh)) e = hh; hh = s + calc_score(i, j, A, B); if (f > hh) hh = f; if (e > hh) hh = e; s = HH[j]; HH[j] = hh; DD[j] = e; } } DD[0] = HH[0]; RR[N] = 0; t = -te; for (j = N - 1; j >= 0; j--) { RR[j] = t = t - gh; SS[j] = t - g; } t = -te; for (i = M - 1; i >= midi; i--) { s = RR[N]; RR[N] = hh = t = t - gh; f = t - g; for (j = N - 1; j >= 0; j--) { if ((hh = hh - g - gh) > (f = f - gh)) f = hh; if ((hh = RR[j] - g - gh) > (e = SS[j] - gh)) e = hh; hh = s + calc_score(i + 1, j + 1, A, B); if (f > hh) hh = f; if (e > hh) hh = e; s = RR[j]; RR[j] = hh; SS[j] = e; } } SS[N] = RR[N]; midh = HH[0] + RR[0]; midj = 0; type = 1; for (j = 0; j <= N; j++) { hh = HH[j] + RR[j]; if (hh >= midh) if (hh > midh || (HH[j] != DD[j] && RR[j] == SS[j])) { midh = hh; midj = j; } } for (j = N; j >= 0; j--) { hh = DD[j] + SS[j] + g; if (hh > midh) { midh = hh; midj = j; type = 2; } } /* Conquer recursively around midpoint */ if (type == 1) { /* Type 1 gaps */ diff(A, B, midi, midj, tb, g); diff(A + midi, B + midj, M - midi, N - midj, g, te); } else { diff(A, B, midi - 1, midj, tb, 0.0); del(2); diff(A + midi + 1, B + midj, M - midi - 1, N - midj, 0.0, te); } return midh; /* Return the score of the best alignment */ } static void add(sint v) { if (last_print < 0) { displ[print_ptr - 1] = v; displ[print_ptr++] = last_print; } else last_print = displ[print_ptr++] = v; } static sint calc_score(sint iat, sint jat, sint v1, sint v2) { sint ipos, jpos; sint ret; ipos = v1 + iat; jpos = v2 + jat; ret = matrix[(int) seq_array[seq1][ipos]][(int) seq_array[seq2][jpos]]; return (ret); } static float tracepath(sint tsb1, sint tsb2) { char c1, c2; sint i1, i2, r; sint i, k, pos, to_do; sint count; float score; char s1[100], s2[100]; to_do = print_ptr - 1; i1 = tsb1; i2 = tsb2; pos = 0; count = 0; for (i = 1; i <= to_do; ++i) { if (debug > 1) fprintf(stdout, "%d ", (pint) displ[i]); if (displ[i] == 0) { c1 = seq_array[seq1][i1]; c2 = seq_array[seq2][i2]; if (debug > 0) { if (c1 > max_aa) s1[pos] = '-'; else s1[pos] = amino_acid_codes[c1]; if (c2 > max_aa) s2[pos] = '-'; else s2[pos] = amino_acid_codes[c2]; } if ((c1 != gap_pos1) && (c1 != gap_pos2) && (c1 == c2)) count++; ++i1; ++i2; ++pos; } else { if ((k = displ[i]) > 0) { if (debug > 0) for (r = 0; r < k; r++) { s1[pos + r] = '-'; if (seq_array[seq2][i2 + r] > max_aa) s2[pos + r] = '-'; else s2[pos + r] = amino_acid_codes[seq_array[seq2][i2 + r]]; } i2 += k; pos += k; } else { if (debug > 0) for (r = 0; r < (-k); r++) { s2[pos + r] = '-'; if (seq_array[seq1][i1 + r] > max_aa) s1[pos + r] = '-'; else s1[pos + r] = amino_acid_codes[seq_array[seq1][i1 + r]]; } i1 -= k; pos -= k; } } } if (debug > 0) fprintf(stdout, "\n"); if (debug > 0) { for (i = 0; i < pos; i++) fprintf(stdout, "%c", s1[i]); fprintf(stdout, "\n"); for (i = 0; i < pos; i++) fprintf(stdout, "%c", s2[i]); fprintf(stdout, "\n"); } /* if (count <= 0) count = 1; */ score = 100.0 * (float) count; return (score); } static void del(sint k) { if (last_print < 0) last_print = displ[print_ptr - 1] -= k; else last_print = displ[print_ptr++] = -(k); } static lint prfscore(sint n, sint m) { sint ix; lint score; score = 0.0; for (ix = 0; ix <= max_aa; ix++) { score += (profile1[n][ix] * profile2[m][ix]); } score += (profile1[n][gap_pos1] * profile2[m][gap_pos1]); score += (profile1[n][gap_pos2] * profile2[m][gap_pos2]); return (score / 10); } static void pdel(sint k) { if (last_print < 0) last_print = displ[print_ptr - 1] -= k; else last_print = displ[print_ptr++] = -(k); } static void padd(sint k) { if (last_print < 0) { displ[print_ptr - 1] = k; displ[print_ptr++] = last_print; } else last_print = displ[print_ptr++] = k; } static void palign(void) { displ[print_ptr++] = last_print = 0; } /* * pidx: index to pa[] */ static void my_palign(int *pa, int *pidx) { pa[(*pidx)++] = 1; return; } /* * pidx: index to pa[] */ static void my_padd(int *pa, int arg, int *pidx) { pa[(*pidx)++] = 2; pa[(*pidx)++] = arg; return; } /* * pidx: index to pa[] */ static void my_pdel(int *pa, int arg, int *pidx) { pa[(*pidx)++] = 3; pa[(*pidx)++] = arg; return; } lint pdiff(sint A, sint B, sint M, sint N, sint go1, sint go2) { sint midi, midj, type; lint midh; static lint t, tl, g, h; int my_rank, np; int dest1, dest2, lastp; int mybsize, position; MPI_Status status; char *mpi_buffer; MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); { static sint i, j; static lint hh, f, e, s; /* Boundary cases: M <= 1 or N == 0 */ if (debug > 2) fprintf(stdout, "A %d B %d M %d N %d midi %d go1 %d go2 %d\n", (pint) A, (pint) B, (pint) M, (pint) N, (pint) M / 2, (pint) go1, (pint) go2); /* if sequence B is empty */ if (N <= 0) { /* if sequence A is not empty */ if (M > 0) { /* delete residues A[1] to A[M] */ /* pdel(M); */ my_pdel(pa, M, &pidx); } return (-gap_penalty1(A, B, M)); } /* if sequence A is empty */ if (M <= 1) { if (M <= 0) { /* insert residues B[1] to B[N] */ /* padd(N); */ my_padd(pa, N, &pidx); return (-gap_penalty2(A, B, N)); } /* if sequence A has just one residue */ if (go1 == 0) midh = -gap_penalty1(A + 1, B + 1, N); else midh = -gap_penalty2(A + 1, B, 1) - gap_penalty1(A + 1, B + 1, N); midj = 0; for (j = 1; j <= N; j++) { hh = -gap_penalty1(A, B + 1, j - 1) + prfscore(A + 1, B + j) - gap_penalty1(A + 1, B + j + 1, N - j); if (hh > midh) { midh = hh; midj = j; } } if (midj == 0) { /* padd(N); */ my_padd(pa, N, &pidx); /* pdel(1); */ my_pdel(pa, 1, &pidx); } else { if (midj > 1) { /* padd(midj - 1); */ my_padd(pa, midj - 1, &pidx); } /* palign() */ my_palign(pa, &pidx); if (midj < N) { /* padd(N - midj); */ my_padd(pa, N - midj, &pidx); } } return midh; } /* Divide sequence A in half: midi */ midi = M / 2; /* In a forward phase, calculate all HH[j] and HH[j] */ HH[0] = 0.0; t = -open_penalty1(A, B + 1); tl = -ext_penalty1(A, B + 1); for (j = 1; j <= N; j++) { HH[j] = t = t + tl; DD[j] = t - open_penalty2(A + 1, B + j); } if (go1 == 0) t = 0; else t = -open_penalty2(A + 1, B); tl = -ext_penalty2(A + 1, B); if (avail_p[0] > 1) { /* * Let "dest2" to compute preverse_pass() **/ if ((avail_p[0] % 2) == 0) { dest1 = avail_p[1]; dest2 = avail_p[1] + avail_p[0] / 2; } else { dest1 = avail_p[1]; dest2 = avail_p[1] + (avail_p[0] + 1) / 2; } /* determine buffer size */ mybsize = 0; mybsize += 9 * sizeof(int) + sizeof(Boolean); for (i = A; i < A + M + 2; i++) mybsize += (LENCOL + 2) * sizeof(sint); for (i = B; i < B + N + 2; i++) mybsize += (LENCOL + 2) * sizeof(sint); MPI_Send(&mybsize, 1, MPI_INT, dest2, PREVERSE_TAG, MPI_COMM_WORLD); mpi_buffer = (char *) malloc(mybsize * sizeof(char)); assert(mpi_buffer); position = 0; MPI_Pack(&prf_length1, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&prf_length2, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&endgappenalties, 1, MPI_CHAR, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&max_aa, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&midi, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&go2, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&A, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&B, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&M, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&N, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); for (i = A; i < A + M + 2; i++) MPI_Pack(profile1[i], (LENCOL + 2), MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); for (i = B; i < B + N + 2; i++) MPI_Pack(profile2[i], (LENCOL + 2), MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Send(mpi_buffer, mybsize, MPI_PACKED, dest2, MY_DATA_TAG, MPI_COMM_WORLD); free(mpi_buffer); /* in the meantime, we will be computing pforward_pass() */ pforward_pass(midi, t, tl, A, B, N, HH, DD); DD[0] = HH[0]; /* preverse_pass(midi, A, B, M, N, go2, RR, SS, gS); */ /* receiving RR[], SS[], gS[] */ MPI_Recv(&mybsize, 1, MPI_INT, dest2, MY_RESULT_TAG, MPI_COMM_WORLD, &status); mpi_buffer = (char *) malloc(mybsize * sizeof(char)); assert(mpi_buffer); position = 0; MPI_Recv(mpi_buffer, mybsize, MPI_PACKED, dest2, MY_RESULT_TAG, MPI_COMM_WORLD, &status); MPI_Unpack(mpi_buffer, mybsize, &position, RR, (max_aln_length + 1), MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, SS, (max_aln_length + 1), MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, gS, (max_aln_length + 1), MPI_INT, MPI_COMM_WORLD); free(mpi_buffer); } else { /* single process */ pforward_pass(midi, t, tl, A, B, N, HH, DD); DD[0] = HH[0]; preverse_pass(midi, A, B, M, N, go2, RR, SS, gS); } SS[N] = RR[N]; gS[N] = open_penalty2(A + midi + 1, B + N); /* find midj, such that HH[j]+RR[j] or DD[j]+SS[j]+gap is the maximum */ midh = HH[0] + RR[0]; midj = 0; type = 1; for (j = 0; j <= N; j++) { hh = HH[j] + RR[j]; if (hh >= midh) if (hh > midh || (HH[j] != DD[j] && RR[j] == SS[j])) { midh = hh; midj = j; } } for (j = N; j >= 0; j--) { hh = DD[j] + SS[j] + gS[j]; if (hh > midh) { midh = hh; midj = j; type = 2; } } } /* Conquer recursively around midpoint */ /* * avail_p[0] = navailp: the number of available computing processes, * excluding the process whose rank is 0. * avail_p[1] = 1: the MPI rank of the first available process is 1. * avail_p[1] = 2: the MPI rank of the second available process is 2. */ MPI_Comm_rank(MPI_COMM_WORLD, &my_rank); /* * If only one available process ... */ if (avail_p[0] == 1) { if (type == 1) { pdiff(A, B, midi, midj, go1, 1); pdiff(A + midi, B + midj, M - midi, N - midj, 1, go2); } else { pdiff(A, B, midi - 1, midj, go1, 0); my_pdel(pa, 2, &pidx); /* pdel(2); */ pdiff(A + midi + 1, B + midj, M - midi - 1, N - midj, 0, go2); } return midh; } /* * We have more than two, including two, available processes. * * dest1: the MPI rank of the first pdiff() task * dest2: the MPI rank of the second pdiff() task */ if ((avail_p[0] % 2) == 0) { dest1 = avail_p[1]; dest2 = avail_p[1] + avail_p[0] / 2; } else { dest1 = avail_p[1]; dest2 = avail_p[1] + (avail_p[0] + 1) / 2; } lastp = avail_p[0] + avail_p[1] - 1; if (type == 1) { /* Type 1 gaps */ int mybsize; char *mpi_buffer, *mpi_buffer2; int i, j, arg1, arg2, arg3, arg4, arg5, arg6; int *HH2, *RR2, *SS2, *DD2, *displ2, *gS2; int print_ptr2, last_print2; int score, position; int navailp; /* number of available processes */ int *original_pa, *patmp; MPI_Status status; MPI_Request dest2_req; /***************************************************************** MPI_Send(): prf_length1, prf_length2, profile1[][], profile2[][], endgappenalties, max_aa, arg1, arg2, arg3, arg4, arg5, arg6, *****************************************************************/ /********************************************** * DEBUG: send to dest2 **********************************************/ arg1 = A + midi; arg2 = B + midj; arg3 = M - midi; arg4 = N - midj; arg5 = 1; arg6 = go2; mybsize = 0; mybsize += 9 * sizeof(sint) + sizeof(Boolean); for (i = arg1; i < arg1 + arg3 + 2; i++) mybsize += (LENCOL + 2) * sizeof(sint); for (i = arg2; i < arg2 + arg4 + 2; i++) mybsize += (LENCOL + 2) * sizeof(sint); mybsize += sizeof(int); navailp = lastp - dest2 + 1; mybsize += (navailp + 1) * sizeof(int); /* for gap_pos1 and gap_pos2 */ mybsize += 2*sizeof(sint); MPI_Send(&mybsize, 1, MPI_INT, dest2, MALIGN_TAG, MPI_COMM_WORLD); mpi_buffer2 = (char *) malloc(mybsize * sizeof(char)); assert(mpi_buffer2); position = 0; MPI_Pack(&prf_length1, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&prf_length2, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&gap_pos1, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&gap_pos2, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&arg1, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&arg2, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&arg3, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&arg4, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&arg5, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&arg6, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&endgappenalties, 1, MPI_CHAR, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&max_aa, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); for (i = arg1; i < arg1 + arg3 + 2; i++) MPI_Pack(profile1[i], (LENCOL + 2), MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); for (i = arg2; i < arg2 + arg4 + 2; i++) MPI_Pack(profile2[i], (LENCOL + 2), MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); /* * We need to adjust the list of available processes. */ avail_p[0] = navailp; for (i = 1; i <= navailp; i++) avail_p[i] = dest2 + i - 1; MPI_Pack(&navailp, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(avail_p, (navailp + 1), MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); #ifdef NON_BLOCKING MPI_Isend(mpi_buffer2, mybsize, MPI_PACKED, dest2, MY_DATA_TAG, MPI_COMM_WORLD, &dest2_req); /* remember to put MPI_Wait() to a proper place */ MPI_Wait(&dest2_req, &status); free(mpi_buffer2); #else MPI_Send(mpi_buffer2, mybsize, MPI_PACKED, dest2, MY_DATA_TAG, MPI_COMM_WORLD); free(mpi_buffer2); #endif /********************************************* * DEBUG: recursively call pdiff() on dest1 *********************************************/ arg1 = A; arg2 = B; arg3 = midi; arg4 = midj; arg5 = go1; arg6 = 1; original_pa = pa; /* DEBUG: temporarily set the length to three times of max_aln_length */ pa = (int *) calloc(3 * max_aln_length, sizeof(int)); assert(pa); navailp = dest2 - dest1; /* * We need to adjust the list of available processes. */ avail_p[0] = navailp; for (i = 1; i <= navailp; i++) avail_p[i] = dest1 + i - 1; score = pdiff(arg1, arg2, arg3, arg4, arg5, arg6); /***************************************************************** MPI_Recv(): score, pa *****************************************************************/ /************************************************************ * DEBUG: merge the results of the previous pdiff() call ************************************************************/ /* * Integrate pa[] into the global array pa[] ... */ patmp = pa; pa = original_pa; i = 0; /* find the end of pa[] */ while (pa[i]) i++; j = 0; while (patmp[j]) pa[i++] = patmp[j++]; free(patmp); /********************************************* * DEBUG: receive from dest2 *********************************************/ MPI_Recv(&mybsize, 1, MPI_INT, dest2, MY_RESULT_TAG, MPI_COMM_WORLD, &status); mpi_buffer = (char *) malloc(mybsize * sizeof(char)); assert(mpi_buffer); position = 0; MPI_Recv(mpi_buffer, mybsize, MPI_PACKED, dest2, MY_RESULT_TAG, MPI_COMM_WORLD, &status); patmp = (int *) calloc(3 * max_aln_length, sizeof(int)); assert(patmp); MPI_Unpack(mpi_buffer, mybsize, &position, patmp, 3 * max_aln_length, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &score, 1, MPI_INT, MPI_COMM_WORLD); free(mpi_buffer); /* * Integrate patmp[] into the global array pa[] ... */ i = 0; /* find the end of pa[] */ while (pa[i]) i++; j = 0; while (patmp[j]) pa[i++] = patmp[j++]; free(patmp); } else { /* Type 2 gaps */ int mybsize; char *mpi_buffer, *mpi_buffer2; int i, j, arg1, arg2, arg3, arg4, arg5, arg6; int *HH2, *RR2, *SS2, *DD2, *displ2, *gS2; int print_ptr2, last_print2; int score, position; int navailp; /* number of available processes */ int *original_pa, *patmp; MPI_Status status; MPI_Request dest2_req; /***************************************************************** MPI_Send(): prf_length1, prf_length2, profile1[][], profile2[][], endgappenalties, max_aa, arg1, arg2, arg3, arg4, arg5, arg6, *****************************************************************/ /********************************************** * DEBUG: send to dest2 **********************************************/ arg1 = A + midi + 1; arg2 = B + midj; arg3 = M - midi - 1; arg4 = N - midj; arg5 = 0; arg6 = go2; mybsize = 0; mybsize += 9 * sizeof(sint) + sizeof(Boolean); for (i = arg1; i < arg1 + arg3 + 2; i++) mybsize += (LENCOL + 2) * sizeof(sint); for (i = arg2; i < arg2 + arg4 + 2; i++) mybsize += (LENCOL + 2) * sizeof(sint); mybsize += sizeof(int); navailp = lastp - dest2 + 1; mybsize += (navailp + 1) * sizeof(int); /* for gap_pos1 and gap_pos2 */ mybsize += 2*sizeof(sint); MPI_Send(&mybsize, 1, MPI_INT, dest2, MALIGN_TAG, MPI_COMM_WORLD); mpi_buffer2 = (char *) malloc(mybsize * sizeof(char)); assert(mpi_buffer2); position = 0; MPI_Pack(&prf_length1, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&prf_length2, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&gap_pos1, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&gap_pos2, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&arg1, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&arg2, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&arg3, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&arg4, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&arg5, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&arg6, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&endgappenalties, 1, MPI_CHAR, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&max_aa, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); for (i = arg1; i < arg1 + arg3 + 2; i++) MPI_Pack(profile1[i], (LENCOL + 2), MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); for (i = arg2; i < arg2 + arg4 + 2; i++) MPI_Pack(profile2[i], (LENCOL + 2), MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); /* * We need to adjust the list of available processes. */ avail_p[0] = navailp; for (i = 1; i <= navailp; i++) avail_p[i] = dest2 + i - 1; MPI_Pack(&navailp, 1, MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(avail_p, (navailp + 1), MPI_INT, mpi_buffer2, mybsize, &position, MPI_COMM_WORLD); MPI_Send(mpi_buffer2, mybsize, MPI_PACKED, dest2, MY_DATA_TAG, MPI_COMM_WORLD); free(mpi_buffer2); /********************************************* * DEBUG: recursively call pdiff() on dest1 *********************************************/ arg1 = A; arg2 = B; arg3 = midi - 1; arg4 = midj; arg5 = go1; arg6 = 0; original_pa = pa; /* DEBUG: temporarily set the length to three times of max_aln_length */ pa = (int *) calloc(3 * max_aln_length, sizeof(int)); assert(pa); navailp = dest2 - dest1; /* * We need to adjust the list of available processes. */ avail_p[0] = navailp; for (i = 1; i <= navailp; i++) avail_p[i] = dest1 + i - 1; score = pdiff(arg1, arg2, arg3, arg4, arg5, arg6); /***************************************************************** MPI_Recv(): score, pa *****************************************************************/ /************************************************************ * DEBUG: merge the results of the previous pdiff() call ************************************************************/ /* * Integrate pa[] into the global array pa[] ... */ patmp = pa; pa = original_pa; /* * Integrate patmp[] into the global array pa[] ... */ i = 0; /* find the end of pa[] */ while (pa[i]) i++; j = 0; while (patmp[j]) pa[i++] = patmp[j++]; free(patmp); /* We need to correct the pidx value for the subsequence my_pdel() */ pidx = i; /********************************************* * perform the original pdel(2) in between * the two pdiff()'s. *********************************************/ /* pdel(2); */ my_pdel(pa, 2, &pidx); /********************************************* * DEBUG: receive from dest2 *********************************************/ /* MPE_Log_event(5,0,"recv"); */ MPI_Recv(&mybsize, 1, MPI_INT, dest2, MY_RESULT_TAG, MPI_COMM_WORLD, &status); /* MPE_Log_event(6,0,"recved"); */ mpi_buffer = (char *) malloc(mybsize * sizeof(char)); assert(mpi_buffer); position = 0; /* MPE_Log_event(5,0,"recv"); */ MPI_Recv(mpi_buffer, mybsize, MPI_PACKED, dest2, MY_RESULT_TAG, MPI_COMM_WORLD, &status); /* MPE_Log_event(6,0,"recved"); */ patmp = (int *) calloc(3 * max_aln_length, sizeof(int)); assert(patmp); /* MPE_Log_event(13,0,"unpacking"); */ MPI_Unpack(mpi_buffer, mybsize, &position, patmp, 3 * max_aln_length, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &score, 1, MPI_INT, MPI_COMM_WORLD); /* MPE_Log_event(14,0,"unpacked"); */ free(mpi_buffer); /* * Integrate patmp[] into the global array pa[] ... */ i = 0; /* find the end of pa[] */ while (pa[i]) i++; j = 0; while (patmp[j]) pa[i++] = patmp[j++]; free(patmp); } return midh; /* Return the score of the best alignment */ } /* calculate the score for opening a gap at residues A[i] and B[j] */ static sint open_penalty1(sint i, sint j) { sint g; if (!endgappenalties && (i == 0 || i == prf_length1)) return (0); g = profile2[j][GAPCOL] + profile1[i][GAPCOL]; return (g); } /* calculate the score for extending an existing gap at A[i] and B[j] */ static sint ext_penalty1(sint i, sint j) { sint h; if (!endgappenalties && (i == 0 || i == prf_length1)) return (0); h = profile2[j][LENCOL]; return (h); } /* calculate the score for a gap of length k, at residues A[i] and B[j] */ static sint gap_penalty1(sint i, sint j, sint k) { sint ix; sint gp; sint g, h = 0; if (k <= 0) return (0); if (!endgappenalties && (i == 0 || i == prf_length1)) return (0); g = profile2[j][GAPCOL] + profile1[i][GAPCOL]; for (ix = 0; ix < k && ix + j < prf_length2; ix++) h = profile2[ix + j][LENCOL]; gp = g + h * k; return (gp); } /* calculate the score for opening a gap at residues A[i] and B[j] */ static sint open_penalty2(sint i, sint j) { sint g; if (!endgappenalties && (j == 0 || j == prf_length2)) return (0); g = profile1[i][GAPCOL] + profile2[j][GAPCOL]; return (g); } /* calculate the score for extending an existing gap at A[i] and B[j] */ static sint ext_penalty2(sint i, sint j) { sint h; if (!endgappenalties && (j == 0 || j == prf_length2)) return (0); h = profile1[i][LENCOL]; return (h); } /* calculate the score for a gap of length k, at residues A[i] and B[j] */ static sint gap_penalty2(sint i, sint j, sint k) { sint ix; sint gp; sint g, h = 0; if (k <= 0) return (0); if (!endgappenalties && (j == 0 || j == prf_length2)) return (0); g = profile1[i][GAPCOL] + profile2[j][GAPCOL]; for (ix = 0; ix < k && ix + i < prf_length1; ix++) h = profile1[ix + i][LENCOL]; gp = g + h * k; return (gp); } void pforward_pass(int midi, int t, int tl, int A, int B, int N, pwint * HH, pwint * DD) { int i, j; int s, f, g, h, hh, e; for (i = 1; i <= midi; i++) { s = HH[0]; HH[0] = hh = t = t + tl; f = t - open_penalty1(A + i, B + 1); for (j = 1; j <= N; j++) { g = open_penalty1(A + i, B + j); h = ext_penalty1(A + i, B + j); if ((hh = hh - g - h) > (f = f - h)) f = hh; g = open_penalty2(A + i, B + j); h = ext_penalty2(A + i, B + j); if ((hh = HH[j] - g - h) > (e = DD[j] - h)) e = hh; hh = s + prfscore(A + i, B + j); if (f > hh) hh = f; if (e > hh) hh = e; s = HH[j]; HH[j] = hh; DD[j] = e; } } return; } void preverse_pass(int midi, int A, int B, int M, int N, int go2, pwint * RR, pwint * SS, lint * gS) { int i, j, h, t, tl, g, s, f, e, hh; tl = 0; RR[N] = 0; for (j = N - 1; j >= 0; j--) { g = -open_penalty1(A + M, B + j + 1); tl -= ext_penalty1(A + M, B + j + 1); RR[j] = g + tl; SS[j] = RR[j] - open_penalty2(A + M, B + j); gS[j] = open_penalty2(A + M, B + j); } tl = 0.0; for (i = M - 1; i >= midi; i--) { s = RR[N]; if (go2 == 0) g = 0; else g = -open_penalty2(A + i + 1, B + N); tl -= ext_penalty2(A + i + 1, B + N); RR[N] = hh = g + tl; t = open_penalty1(A + i, B + N); f = RR[N] - t; for (j = N - 1; j >= 0; j--) { g = open_penalty1(A + i, B + j + 1); h = ext_penalty1(A + i, B + j + 1); if ((hh = hh - g - h) > (f = f - h - g + t)) f = hh; t = g; g = open_penalty2(A + i + 1, B + j); h = ext_penalty2(A + i + 1, B + j); hh = RR[j] - g - h; if (i == (M - 1)) { e = SS[j] - h; } else { e = SS[j] - h - g + open_penalty2(A + i + 2, B + j); gS[j] = g; } if (hh > e) e = hh; hh = s + prfscore(A + i + 1, B + j + 1); if (f > hh) hh = f; if (e > hh) hh = e; s = RR[j]; RR[j] = hh; SS[j] = e; } } return; } /* * Compute a few arrays used in "nj_tree()/trees.c". * * length: the length of the incoming data (in bytes) * from: the MPI rank of the sending process */ static void mpi_njtree_slave(int length, int from) { MPI_Status status; int myrank; int position; double stmp; int npidx; /* idx for nonempty[] */ char *pbuffer, *pblock; int *mpirow; int *nonempty; double **tmat; int tmat_len; double sumd2; int i, j, which_row; double *rdiq; MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /* We need a buffer to store the packed data */ pbuffer = (char *) malloc(length * sizeof(char)); assert(pbuffer); MPI_Recv(pbuffer, length, MPI_PACKED, from, NJTREE_DATA, MPI_COMM_WORLD, &status); position = 0; MPI_Unpack(pbuffer, length, &position, &tmat_len, 1, MPI_INT, MPI_COMM_WORLD); mpirow = (int *) malloc((tmat_len+1) * sizeof(int)); assert(mpirow); MPI_Unpack(pbuffer, length, &position, mpirow, (tmat_len+1), MPI_INT, MPI_COMM_WORLD); pblock = (char *) malloc((tmat_len + 1) * (tmat_len + 1) * sizeof(double)); assert(pblock); tmat = (double **) malloc((tmat_len + 1) * sizeof(double *)); assert(tmat); for (i = 0; i < (tmat_len + 1); i++) tmat[i] = (double *) pblock + (tmat_len * i); /* * Since not all tmat[][] rows were sending to this MPI slave, * we'd better remember those rows that are not empty. */ nonempty = (int *) calloc((tmat_len+1), sizeof(int)); assert(nonempty); npidx = 0; for (i = 1; i <=tmat_len; i++) { if (mpirow[i] == myrank) { nonempty[npidx++] = i; MPI_Unpack(pbuffer, length, &position, &tmat[i][1], tmat_len, MPI_DOUBLE, MPI_COMM_WORLD); } } /* * Allocate space for rdiq[1..tmat_len] */ rdiq = (double *) calloc((tmat_len+1), sizeof(double)); assert(rdiq); /**** Doing computation here ***********/ sumd2 = 0.0; for (i = 0; i < npidx; i++) { which_row = nonempty[i]; for (j = 1; j <= tmat_len; j++) rdiq[which_row] += tmat[which_row][j]; for (j = which_row+1; j <= tmat_len; j++) sumd2 += tmat[which_row][j]; } /**** Sending information back ***********/ /* Note that I intentionally use rdiq[0] to store "sumd2" */ rdiq[0] = sumd2; MPI_Send(rdiq, (tmat_len+1), MPI_DOUBLE, from, TMAT_ROW_SUM, MPI_COMM_WORLD); free(rdiq); free(mpirow); free(pbuffer); free(pblock); free(tmat); free(nonempty); return; } static void mpi_show_pair_slave(int length, int from) { MPI_Status status; int myrank; int position,mybsize; double stmp; char *pbuffer, *pblock; int i, j, which_row,seq1,seq2; int zero; MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /* We need a buffer to store the packed data */ pbuffer = (char *) malloc(length * sizeof(char)); assert(pbuffer); MPI_Recv(pbuffer, length, MPI_PACKED, from, SHOW_PAIR_DATA, MPI_COMM_WORLD, &status); position = 0; MPI_Unpack(pbuffer, length, &position, &dnaflag, 1, MPI_CHAR, MPI_COMM_WORLD); MPI_Unpack(pbuffer, length, &position, &seq1, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(pbuffer, length, &position, &seq2, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(pbuffer, length, &position, &nseqs, 1, MPI_INT, MPI_COMM_WORLD); seqlen_array = (int *) malloc((nseqs + 1) * sizeof(int)); assert(seqlen_array); MPI_Unpack(pbuffer, length, &position, seqlen_array, (nseqs+1), MPI_INT, MPI_COMM_WORLD); seq_array = (char **) malloc((nseqs + 1) * sizeof(char *)); assert(seq_array); seq_array[seq1] = (char *)malloc((seqlen_array[seq1]+1) * sizeof(char)); assert(seq_array[seq1]); seq_array[seq2] = (char *)malloc((seqlen_array[seq2]+1) * sizeof(char)); assert(seq_array[seq2]); MPI_Unpack(pbuffer, length, &position, seq_array[seq1], (seqlen_array[seq1]+1), MPI_CHAR, MPI_COMM_WORLD); MPI_Unpack(pbuffer, length, &position, seq_array[seq2], (seqlen_array[seq2]+1), MPI_CHAR, MPI_COMM_WORLD); MPI_Unpack(pbuffer, length, &position, &max_aa, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(pbuffer, length, &position, &max_aln_length, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(pbuffer, length, &position, &dna_ktup, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(pbuffer, length, &position, &dna_window, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(pbuffer, length, &position, &dna_signif, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(pbuffer, length, &position, &dna_wind_gap, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(pbuffer, length, &position, &prot_ktup, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(pbuffer, length, &position, &prot_window, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(pbuffer, length, &position, &prot_signif, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(pbuffer, length, &position, &prot_wind_gap, 1, MPI_INT, MPI_COMM_WORLD); free(pbuffer); /**** Allocating spaces ***********/ accum = (sint **)ckalloc( 5*sizeof (sint *) ); for (i=0;i<5;i++) accum[i] = (sint *) ckalloc((2*max_aln_length+1) * sizeof (sint) ); displ = (sint *) ckalloc( (2*max_aln_length +1) * sizeof (sint) ); slopes = (char *)ckalloc( (2*max_aln_length +1) * sizeof (char)); diag_index = (sint *) ckalloc( (2*max_aln_length +1) * sizeof (sint) ); zza = (sint *)ckalloc( (max_aln_length+1) * sizeof (sint) ); zzb = (sint *)ckalloc( (max_aln_length+1) * sizeof (sint) ); zzc = (sint *)ckalloc( (max_aln_length+1) * sizeof (sint) ); zzd = (sint *)ckalloc( (max_aln_length+1) * sizeof (sint) ); /**** Doing computation here ***********/ if (dnaflag) { ktup = dna_ktup; window = dna_window; signif = dna_signif; wind_gap = dna_wind_gap; make_n_ptrs(zza, zzc, seq1, seqlen_array[seq1]); make_n_ptrs(zzb, zzd, seq2, seqlen_array[seq2]); pair_align(seq1, seqlen_array[seq1], seqlen_array[seq2]); } else { ktup = prot_ktup; window = prot_window; signif = prot_signif; wind_gap = prot_wind_gap; make_p_ptrs(zza, zzc, seq1, seqlen_array[seq1]); make_p_ptrs(zzb, zzd, seq2, seqlen_array[seq2]); pair_align(seq1, seqlen_array[seq1], seqlen_array[seq2]); } /* Send information back: * 1. accum[0][maxsf] * 2. seq1, seq2 */ mybsize = 3*sizeof(int); MPI_Send(&mybsize, 1, MPI_INT, from, SHOW_PAIR_RESULT_SIZE, MPI_COMM_WORLD); pbuffer = (char *)malloc(mybsize*sizeof(char)); assert(pbuffer); position=0; zero=0; if (maxsf) MPI_Pack(&accum[0][maxsf], 1, MPI_INT, pbuffer, mybsize, &position, MPI_COMM_WORLD); else MPI_Pack(&zero, 1, MPI_INT, pbuffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&seq1, 1, MPI_INT, pbuffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&seq2, 1, MPI_INT, pbuffer, mybsize, &position, MPI_COMM_WORLD); MPI_Send(pbuffer, mybsize, MPI_PACKED, from, SHOW_PAIR_RESULT, MPI_COMM_WORLD); free(pbuffer); /* house cleaning */ for (i=0;i<5;i++) accum[i]=ckfree((void *)accum[i]); accum=ckfree((void *)accum); displ=ckfree((void *)displ); slopes=ckfree((void *)slopes); diag_index=ckfree((void *)diag_index); zza=ckfree((void *)zza); zzb=ckfree((void *)zzb); zzc=ckfree((void *)zzc); zzd=ckfree((void *)zzd); if (seq1!=seq2) { free(seq_array[seq1]); free(seq_array[seq2]); } else { free(seq_array[seq1]); } free(seq_array); free(seqlen_array); return; } static void make_p_ptrs(sint * tptr, sint * pl, sint naseq, sint l) { static sint a[10]; sint i, j, limit, code, flag; char residue; for (i = 1; i <= ktup; i++) a[i] = (sint) pow((double) (max_aa + 1), (double) (i - 1)); limit = (sint) pow((double) (max_aa + 1), (double) ktup); for (i = 1; i <= limit; ++i) pl[i] = 0; for (i = 1; i <= l; ++i) tptr[i] = 0; for (i = 1; i <= (l - ktup + 1); ++i) { code = 0; flag = FALSE; for (j = 1; j <= ktup; ++j) { residue = seq_array[naseq][i + j - 1]; if ((residue < 0) || (residue > max_aa)) { flag = TRUE; break; } code += ((residue) * a[j]); } if (flag) continue; ++code; if (pl[code] != 0) tptr[i] = pl[code]; pl[code] = i; } } static void make_n_ptrs(sint *tptr,sint *pl,sint naseq,sint len) { static sint pot[]={ 0, 1, 4, 16, 64, 256, 1024, 4096 }; sint i,j,limit,code,flag; char residue; limit = (sint) pow((double)4,(double)ktup); for(i=1;i<=limit;++i) pl[i]=0; for(i=1;i<=len;++i) tptr[i]=0; for(i=1;i<=len-ktup+1;++i) { code=0; flag=FALSE; for(j=1;j<=ktup;++j) { residue = seq_array[naseq][i+j-1]; if((residue<0) || (residue>4)){ flag=TRUE; break; } code += ((residue) * pot[j]); /* DES */ } if(flag) continue; ++code; if(pl[code]!=0) tptr[i]=pl[code]; pl[code]=i; } } static void put_frag(sint fs,sint v1,sint v2,sint flen) { sint end; accum[0][curr_frag]=fs; accum[1][curr_frag]=v1; accum[2][curr_frag]=v2; accum[3][curr_frag]=flen; if(!maxsf) { maxsf=1; accum[4][curr_frag]=0; return; } if(fs >= accum[0][maxsf]) { accum[4][curr_frag]=maxsf; maxsf=curr_frag; return; } else { next=maxsf; while(TRUE) { end=next; next=accum[4][next]; if(fs>=accum[0][next]) break; } accum[4][curr_frag]=next; accum[4][end]=curr_frag; } } static sint frag_rel_pos(sint a1,sint b1,sint a2,sint b2) { sint ret; ret=FALSE; if(a1-b1==a2-b2) { if(a2 0) { if(lst[p] >= ust[p]) p--; else { i = lst[p] - 1; j = ust[p]; pivlin = array1[j]; while(i < j) { for(i=i+1; array1[i] < pivlin; i++) ; for(j=j-1; j > i; j--) if(array1[j] <= pivlin) break; if(i < j) { temp1 = array1[i]; array1[i] = array1[j]; array1[j] = temp1; temp2 = array2[i]; array2[i] = array2[j]; array2[j] = temp2; } } j = ust[p]; temp1 = array1[i]; array1[i] = array1[j]; array1[j] = temp1; temp2 = array2[i]; array2[i] = array2[j]; array2[j] = temp2; if(i-lst[p] < ust[p] - i) { lst[p+1] = lst[p]; ust[p+1] = i - 1; lst[p] = i + 1; } else { lst[p+1] = i + 1; ust[p+1] = ust[p]; ust[p] = i - 1; } p = p + 1; } } return; } static void pair_align(sint seq_no,sint l1,sint l2) { sint pot[8],i,j,l,m,flag,limit,pos,tl1,vn1,vn2,flen,osptr,fs; sint tv1,tv2,encrypt,subt1,subt2,rmndr; char residue; if(dnaflag) { for(i=1;i<=ktup;++i) pot[i] = (sint) pow((double)4,(double)(i-1)); limit = (sint) pow((double)4,(double)ktup); } else { for (i=1;i<=ktup;i++) pot[i] = (sint) pow((double)(max_aa+1),(double)(i-1)); limit = (sint) pow((double)(max_aa+1),(double)ktup); } tl1 = (l1+l2)-1; for(i=1;i<=tl1;++i) { slopes[i]=displ[i]=0; diag_index[i] = i; } /* increment diagonal score for each k_tuple match */ for(i=1;i<=limit;++i) { vn1=zzc[i]; while(TRUE) { if(!vn1) break; vn2=zzd[i]; while(vn2 != 0) { osptr=vn1-vn2+l2; ++displ[osptr]; vn2=zzb[vn2]; } vn1=zza[vn1]; } } /* choose the top SIGNIF diagonals */ des_quick_sort(displ, diag_index, tl1); j = tl1 - signif + 1; if(j < 1) j = 1; /* flag all diagonals within WINDOW of a top diagonal */ for(i=tl1; i>=j; i--) if(displ[i] > 0) { pos = diag_index[i]; l = (1 >pos-window) ? 1 : pos-window; m = (tl1max_aa)) { flag=TRUE; break; } encrypt += ((residue)*pot[j]); } if(flag) continue; ++encrypt; vn2=zzd[encrypt]; flag=FALSE; while(TRUE) { if(!vn2) { flag=TRUE; break; } osptr=i-vn2+l2; if(slopes[osptr]!=1) { vn2=zzb[vn2]; continue; } flen=0; fs=ktup; next=maxsf; /* * A-loop */ while(TRUE) { if(!next) { ++curr_frag; if(curr_frag>=2*max_aln_length) { info("(Partial alignment)"); vatend=1; return; } displ[osptr]=curr_frag; put_frag(fs,i,vn2,flen); } else { tv1=accum[1][next]; tv2=accum[2][next]; if(frag_rel_pos(i,vn2,tv1,tv2)) { if(i-vn2==accum[1][next]-accum[2][next]) { if(i>accum[1][next]+(ktup-1)) fs=accum[0][next]+ktup; else { rmndr=i-accum[1][next]; fs=accum[0][next]+rmndr; } flen=next; next=0; continue; } else { if(displ[osptr]==0) subt1=ktup; else { if(i>accum[1][displ[osptr]]+(ktup-1)) subt1=accum[0][displ[osptr]]+ktup; else { rmndr=i-accum[1][displ[osptr]]; subt1=accum[0][displ[osptr]]+rmndr; } } subt2=accum[0][next]-wind_gap+ktup; if(subt2>subt1) { flen=next; fs=subt2; } else { flen=displ[osptr]; fs=subt1; } next=0; continue; } } else { next=accum[4][next]; continue; } } break; } /* * End of Aloop */ vn2=zzb[vn2]; } } vatend=0; } static void mypairwise(int si, int sj, Boolean dnaflag, float pw_go_penalty, float pw_ge_penalty, int int_scale, float gscale,float ghscale, int mat_avscore, short *xarray, short *yarray, double *tarray) { int n,m,len1,len2,i,j; char c; double wtime1,wtime2; n = seqlen_array[si + 1]; len1 = 0; for (i = 1; i <= n; i++) { c = seq_array[si + 1][i]; if ((c != gap_pos1) && (c != gap_pos2)) len1++; } m = seqlen_array[sj + 1]; if (n == 0 || m == 0) { xarray[len_of_xytarray] = si+1; yarray[len_of_xytarray] = sj+1; tarray[len_of_xytarray] = 1.0; len_of_xytarray++; return; } len2 = 0; for (i = 1; i <= m; i++) { c = seq_array[sj + 1][i]; if ((c != gap_pos1) && (c != gap_pos2)) len2++; } if (dnaflag) { g = 2 * (float) pw_go_penalty *int_scale * gscale; gh = pw_ge_penalty * int_scale * ghscale; } else { if (mat_avscore <= 0) g = 2 * (float) (pw_go_penalty + log((double) (MIN(n, m)))) * int_scale; else g = 2 * mat_avscore * (float) (pw_go_penalty + log((double) (MIN(n, m)))) * gscale; gh = pw_ge_penalty * int_scale; } /* align the sequences */ seq1 = si + 1; seq2 = sj + 1; forward_pass(&seq_array[seq1][0], &seq_array[seq2][0], n, m); reverse_pass(&seq_array[seq1][0], &seq_array[seq2][0]); last_print = 0; print_ptr = 1; /* sb1 = sb2 = 1; se1 = n-1; se2 = m-1; */ /* use Myers and Miller to align two sequences */ maxscore = diff(sb1 - 1, sb2 - 1, se1 - sb1 + 1, se2 - sb2 + 1, (sint) 0, (sint) 0); /* calculate percentage residue identity */ mm_score = tracepath(sb1, sb2); if (len1 == 0 || len2 == 0) mm_score = 0; else mm_score /= (float) MIN(len1, len2); xarray[len_of_xytarray] = si+1; yarray[len_of_xytarray] = sj+1; tarray[len_of_xytarray] = ((float) 100.0 - mm_score) / (float) 100.0; len_of_xytarray++; { int rank; MPI_Comm_rank(MPI_COMM_WORLD,&rank); fprintf(stdout,"Sequences (%d:%d) Aligned. Score: %5.2f (by rank %d)\n", (pint) si + 1, (pint) sj + 1, mm_score, rank); } return; } clustalw-mpi-0.15/prfalign.c0000644000411000001440000007501007644152540014354 0ustar liusers#include #include #include #include #include "clustalw.h" #define ENDALN 127 #define MAX(a,b) ((a)>(b)?(a):(b)) #define MIN(a,b) ((a)<(b)?(a):(b)) /* * Prototypes */ static lint pdiff(sint A,sint B,sint i,sint j,sint go1,sint go2); static lint prfscore(sint n, sint m); static sint gap_penalty1(sint i, sint j,sint k); static sint open_penalty1(sint i, sint j); static sint ext_penalty1(sint i, sint j); static sint gap_penalty2(sint i, sint j,sint k); static sint open_penalty2(sint i, sint j); static sint ext_penalty2(sint i, sint j); static void padd(sint k); static void pdel(sint k); static void palign(void); static void ptracepath(sint *alen); static void add_ggaps(void); static char * add_ggaps_mask(char *mask, int len, char *path1, char *path2); /* * Global variables */ extern double **tmat; extern float gap_open, gap_extend; extern float transition_weight; extern sint gap_pos1, gap_pos2; extern sint max_aa; extern sint nseqs; extern sint *seqlen_array; extern sint *seq_weight; extern sint debug; extern Boolean neg_matrix; extern sint mat_avscore; extern short blosum30mt[], blosum40mt[], blosum45mt[]; extern short blosum62mt2[], blosum80mt[]; extern short pam20mt[], pam60mt[]; extern short pam120mt[], pam160mt[], pam350mt[]; extern short gon40mt[], gon80mt[]; extern short gon120mt[], gon160mt[], gon250mt[], gon350mt[]; extern short clustalvdnamt[],swgapdnamt[]; extern short idmat[]; extern short usermat[]; extern short userdnamat[]; extern Boolean user_series; extern UserMatSeries matseries; extern short def_dna_xref[],def_aa_xref[],dna_xref[],aa_xref[]; extern sint max_aln_length; extern Boolean distance_tree; extern Boolean dnaflag; extern char mtrxname[]; extern char dnamtrxname[]; extern char **seq_array; extern char *amino_acid_codes; extern char *gap_penalty_mask1,*gap_penalty_mask2; extern char *sec_struct_mask1,*sec_struct_mask2; extern sint struct_penalties1, struct_penalties2; extern Boolean use_ss1, use_ss2; extern Boolean endgappenalties; static sint print_ptr,last_print; static sint *displ; static char **alignment; static sint *aln_len; static sint *aln_weight; static char *aln_path1, *aln_path2; static sint alignment_len; static sint **profile1, **profile2; static lint *HH, *DD, *RR, *SS; static lint *gS; static sint matrix[NUMRES][NUMRES]; static sint nseqs1, nseqs2; static sint prf_length1, prf_length2; static sint *gaps; static sint gapcoef1,gapcoef2; static sint lencoef1,lencoef2; static Boolean switch_profiles; lint prfalign(sint *group, sint *aligned) { static Boolean found; static Boolean negative; static Boolean error_given=FALSE; static sint i, j, count = 0; static sint NumSeq; static sint len, len1, len2, is, minlen; static sint se1, se2, sb1, sb2; static sint maxres; static sint int_scale; static short *matptr; static short *mat_xref; static char c; static lint score; static float scale; static double logmin,logdiff; static double pcid; alignment = (char **) ckalloc( nseqs * sizeof (char *) ); aln_len = (sint *) ckalloc( nseqs * sizeof (sint) ); aln_weight = (sint *) ckalloc( nseqs * sizeof (sint) ); for (i=0;i nseqs1) { switch_profiles = TRUE; for (i=0;i 0) fprintf(stdout,"mean tmat %3.1f\n", pcid); /* Make the first profile. */ prf_length1 = 0; for (i=0;iprf_length1) prf_length1=seqlen_array[i+1]; nseqs1 = 0; if (debug>0) fprintf(stdout,"sequences profile 1:\n"); for (i=0;i0) { extern char **names; fprintf(stdout,"%s\n",names[i+1]); } len = seqlen_array[i+1]; alignment[nseqs1] = (char *) ckalloc( (prf_length1+2) * sizeof (char) ); for (j=0;jprf_length2) prf_length2=seqlen_array[i+1]; nseqs2 = 0; if (debug>0) fprintf(stdout,"sequences profile 2:\n"); for (i=0;i0) { extern char **names; fprintf(stdout,"%s\n",names[i+1]); } len = seqlen_array[i+1]; alignment[nseqs1+nseqs2] = (char *) ckalloc( (prf_length2+2) * sizeof (char) ); for (j=0;j0) fprintf(stdout,"%d %d logmin %f logdiff %f\n", (pint)len1,(pint)len2, logmin,logdiff); scale=0.75; if (strcmp(mtrxname, "blosum") == 0) { scale=0.75; if (negative || distance_tree == FALSE) matptr = blosum40mt; else if (pcid > 80.0) { matptr = blosum80mt; } else if (pcid > 60.0) { matptr = blosum62mt2; } else if (pcid > 40.0) { matptr = blosum45mt; } else if (pcid > 30.0) { scale=0.5; matptr = blosum45mt; } else if (pcid > 20.0) { scale=0.6; matptr = blosum45mt; } else { scale=0.6; matptr = blosum30mt; } mat_xref = def_aa_xref; } else if (strcmp(mtrxname, "pam") == 0) { scale=0.75; if (negative || distance_tree == FALSE) matptr = pam120mt; else if (pcid > 80.0) matptr = pam20mt; else if (pcid > 60.0) matptr = pam60mt; else if (pcid > 40.0) matptr = pam120mt; else matptr = pam350mt; mat_xref = def_aa_xref; } else if (strcmp(mtrxname, "gonnet") == 0) { scale/=2.0; if (negative || distance_tree == FALSE) matptr = gon250mt; else if (pcid > 35.0) { matptr = gon80mt; scale/=2.0; } else if (pcid > 25.0) { if(minlen<100) matptr = gon250mt; else matptr = gon120mt; } else { if(minlen<100) matptr = gon350mt; else matptr = gon160mt; } mat_xref = def_aa_xref; int_scale /= 10; } else if (strcmp(mtrxname, "id") == 0) { matptr = idmat; mat_xref = def_aa_xref; } else if(user_series) { matptr=NULL; found=FALSE; for(i=0;i=matseries.mat[i].llimit && pcid<=matseries.mat[i].ulimit) { j=i; found=TRUE; break; } if(found==FALSE) { if(!error_given) warning( "\nSeries matrix not found for sequence percent identity = %d.\n" "(Using first matrix in series as a default.)\n" "This alignment may not be optimal!\n" "SUGGESTION: Check your matrix series input file and try again.",(int)pcid); error_given=TRUE; j=0; } if (debug>0) fprintf(stdout,"pcid %d matrix %d\n",(pint)pcid,(pint)j+1); matptr = matseries.mat[j].matptr; mat_xref = matseries.mat[j].aa_xref; /* this gives a scale of 0.5 for pcid=llimit and 1.0 for pcid=ulimit */ scale=0.5+(pcid-matseries.mat[j].llimit)/((matseries.mat[j].ulimit-matseries.mat[j].llimit)*2.0); } else { matptr = usermat; mat_xref = aa_xref; } if(debug>0) fprintf(stdout,"pcid %3.1f scale %3.1f\n",pcid,scale); maxres = get_matrix(matptr, mat_xref, matrix, negative, int_scale); if (maxres == 0) { fprintf(stdout,"Error: matrix %s not found\n", mtrxname); return(-1); } if (negative) { gapcoef1 = gapcoef2 = 100.0 * (float)(gap_open); lencoef1 = lencoef2 = 100.0 * gap_extend; } else { if (mat_avscore <= 0) gapcoef1 = gapcoef2 = 100.0 * (float)(gap_open + logmin); else gapcoef1 = gapcoef2 = scale * mat_avscore * (float)(gap_open/(logdiff*logmin)); lencoef1 = lencoef2 = 100.0 * gap_extend; } } if (debug>0) { fprintf(stdout,"matavscore %d\n",mat_avscore); fprintf(stdout,"Gap Open1 %d Gap Open2 %d Gap Extend1 %d Gap Extend2 %d\n", (pint)gapcoef1,(pint)gapcoef2, (pint)lencoef1,(pint)lencoef2); fprintf(stdout,"Matrix %s\n", mtrxname); } profile1 = (sint **) ckalloc( (prf_length1+2) * sizeof (sint *) ); for(i=0; i4) { extern char *amino_acid_codes; for (j=0;j<=max_aa;j++) fprintf(stdout,"%c ", amino_acid_codes[j]); fprintf(stdout,"\n"); for (i=0;i4) { extern char *amino_acid_codes; for (j=0;j<=max_aa;j++) fprintf(stdout,"%c ", amino_acid_codes[j]); fprintf(stdout,"\n"); for (i=0;i0) { char c; extern char *amino_acid_codes; for (i=0;i1) fprintf(stdout,"%d ",(pint)displ[i]); if(displ[i]==0) { aln_path1[pos]=2; aln_path2[pos]=2; ++pos; } else { if((k=displ[i])>0) { for(j=0;j<=k-1;++j) { aln_path2[pos+j]=2; aln_path1[pos+j]=1; } pos += k; } else { k = (displ[i]<0) ? displ[i] * -1 : displ[i]; for(j=0;j<=k-1;++j) { aln_path1[pos+j]=2; aln_path2[pos+j]=1; } pos += k; } } } if (debug>1) fprintf(stdout,"\n"); (*alen) = pos; } static void pdel(sint k) { if(last_print<0) last_print = displ[print_ptr-1] -= k; else last_print = displ[print_ptr++] = -(k); } static void padd(sint k) { if(last_print<0) { displ[print_ptr-1] = k; displ[print_ptr++] = last_print; } else last_print = displ[print_ptr++] = k; } static void palign(void) { displ[print_ptr++] = last_print = 0; } static lint pdiff(sint A,sint B,sint M,sint N,sint go1, sint go2) { sint midi,midj,type; lint midh; static lint t, tl, g, h; { static sint i,j; static lint hh, f, e, s; /* Boundary cases: M <= 1 or N == 0 */ if (debug>2) fprintf(stdout,"A %d B %d M %d N %d midi %d go1 %d go2 %d\n", (pint)A,(pint)B,(pint)M,(pint)N,(pint)M/2,(pint)go1,(pint)go2); /* if sequence B is empty.... */ if(N<=0) { /* if sequence A is not empty.... */ if(M>0) { /* delete residues A[1] to A[M] */ pdel(M); } return(-gap_penalty1(A,B,M)); } /* if sequence A is empty.... */ if(M<=1) { if(M<=0) { /* insert residues B[1] to B[N] */ padd(N); return(-gap_penalty2(A,B,N)); } /* if sequence A has just one residue.... */ if (go1 == 0) midh = -gap_penalty1(A+1,B+1,N); else midh = -gap_penalty2(A+1,B,1)-gap_penalty1(A+1,B+1,N); midj = 0; for(j=1;j<=N;j++) { hh = -gap_penalty1(A,B+1,j-1) + prfscore(A+1,B+j) -gap_penalty1(A+1,B+j+1,N-j); if(hh>midh) { midh = hh; midj = j; } } if(midj==0) { padd(N); pdel(1); } else { if(midj>1) padd(midj-1); palign(); if(midj (f=f-h)) f=hh; g = open_penalty2(A+i,B+j); h = ext_penalty2(A+i,B+j); if ((hh=HH[j]-g-h) > (e=DD[j]-h)) e=hh; hh = s + prfscore(A+i, B+j); if (f>hh) hh = f; if (e>hh) hh = e; s = HH[j]; HH[j] = hh; DD[j] = e; } } DD[0]=HH[0]; /* In a reverse phase, calculate all RR[j] and SS[j] */ RR[N]=0.0; tl = 0.0; for(j=N-1;j>=0;j--) { g = -open_penalty1(A+M,B+j+1); tl -= ext_penalty1(A+M,B+j+1); RR[j] = g+tl; SS[j] = RR[j]-open_penalty2(A+M,B+j); gS[j] = open_penalty2(A+M,B+j); } tl = 0.0; for(i=M-1;i>=midi;i--) { s = RR[N]; if (go2 == 0) g = 0; else g = -open_penalty2(A+i+1,B+N); tl -= ext_penalty2(A+i+1,B+N); RR[N] = hh = g+tl; t = open_penalty1(A+i,B+N); f = RR[N]-t; for(j=N-1;j>=0;j--) { g = open_penalty1(A+i,B+j+1); h = ext_penalty1(A+i,B+j+1); if ((hh=hh-g-h) > (f=f-h-g+t)) f=hh; t = g; g = open_penalty2(A+i+1,B+j); h = ext_penalty2(A+i+1,B+j); hh=RR[j]-g-h; if (i==(M-1)) { e=SS[j]-h; } else { e=SS[j]-h-g+open_penalty2(A+i+2,B+j); gS[j] = g; } if (hh > e) e=hh; hh = s + prfscore(A+i+1, B+j+1); if (f>hh) hh = f; if (e>hh) hh = e; s = RR[j]; RR[j] = hh; SS[j] = e; } } SS[N]=RR[N]; gS[N] = open_penalty2(A+midi+1,B+N); /* find midj, such that HH[j]+RR[j] or DD[j]+SS[j]+gap is the maximum */ midh=HH[0]+RR[0]; midj=0; type=1; for(j=0;j<=N;j++) { hh = HH[j] + RR[j]; if(hh>=midh) if(hh>midh || (HH[j]!=DD[j] && RR[j]==SS[j])) { midh=hh; midj=j; } } for(j=N;j>=0;j--) { hh = DD[j] + SS[j] + gS[j]; if(hh>midh) { midh=hh; midj=j; type=2; } } } /* Conquer recursively around midpoint */ if(type==1) { /* Type 1 gaps */ if (debug>2) fprintf(stdout,"Type 1,1: midj %d\n",(pint)midj); pdiff(A,B,midi,midj,go1,1); if (debug>2) fprintf(stdout,"Type 1,2: midj %d\n",(pint)midj); pdiff(A+midi,B+midj,M-midi,N-midj,1,go2); } else { if (debug>2) fprintf(stdout,"Type 2,1: midj %d\n",(pint)midj); pdiff(A,B,midi-1,midj,go1, 0); pdel(2); if (debug>2) fprintf(stdout,"Type 2,2: midj %d\n",(pint)midj); pdiff(A+midi+1,B+midj,M-midi-1,N-midj,0,go2); } return midh; /* Return the score of the best alignment */ } /* calculate the score for opening a gap at residues A[i] and B[j] */ static sint open_penalty1(sint i, sint j) { sint g; if (!endgappenalties &&(i==0 || i==prf_length1)) return(0); g = profile2[j][GAPCOL] + profile1[i][GAPCOL]; return(g); } /* calculate the score for extending an existing gap at A[i] and B[j] */ static sint ext_penalty1(sint i, sint j) { sint h; if (!endgappenalties &&(i==0 || i==prf_length1)) return(0); h = profile2[j][LENCOL]; return(h); } /* calculate the score for a gap of length k, at residues A[i] and B[j] */ static sint gap_penalty1(sint i, sint j, sint k) { sint ix; sint gp; sint g, h = 0; if (k <= 0) return(0); if (!endgappenalties &&(i==0 || i==prf_length1)) return(0); g = profile2[j][GAPCOL] + profile1[i][GAPCOL]; for (ix=0;ix #include #include #include #include #include #include "clustalw.h" #include "mpi.h" #define ENDALN 127 #define MAX(a,b) ((a)>(b)?(a):(b)) #define MIN(a,b) ((a)<(b)?(a):(b)) /* * Prototypes */ static lint pdiff(sint A,sint B,sint i,sint j,sint go1,sint go2); static lint prfscore(sint n, sint m); static sint gap_penalty1(sint i, sint j,sint k); static sint open_penalty1(sint i, sint j); static sint ext_penalty1(sint i, sint j); static sint gap_penalty2(sint i, sint j,sint k); static sint open_penalty2(sint i, sint j); static sint ext_penalty2(sint i, sint j); static void padd(sint k); static void pdel(sint k); static void palign(void); static void ptracepath(sint *alen); static void add_ggaps(void); static char * add_ggaps_mask(char *mask, int len, char *path1, char *path2); /* * Global variables */ extern double **tmat; extern float gap_open, gap_extend; extern float transition_weight; extern sint gap_pos1, gap_pos2; extern sint max_aa; extern sint nseqs; extern sint *seqlen_array; extern sint *seq_weight; extern sint debug; extern Boolean neg_matrix; extern sint mat_avscore; extern short blosum30mt[], blosum40mt[], blosum45mt[]; extern short blosum62mt2[], blosum80mt[]; extern short pam20mt[], pam60mt[]; extern short pam120mt[], pam160mt[], pam350mt[]; extern short gon40mt[], gon80mt[]; extern short gon120mt[], gon160mt[], gon250mt[], gon350mt[]; extern short clustalvdnamt[], swgapdnamt[]; extern short idmat[]; extern short usermat[]; extern short userdnamat[]; extern Boolean user_series; extern UserMatSeries matseries; extern short def_dna_xref[], def_aa_xref[], dna_xref[], aa_xref[]; extern sint max_aln_length; extern Boolean distance_tree; extern Boolean dnaflag; extern char mtrxname[]; extern char dnamtrxname[]; extern char **seq_array; extern char *amino_acid_codes; extern char *gap_penalty_mask1, *gap_penalty_mask2; extern char *sec_struct_mask1, *sec_struct_mask2; extern sint struct_penalties1, struct_penalties2; extern Boolean use_ss1, use_ss2; extern Boolean endgappenalties; static sint print_ptr, last_print; static sint *displ; static char **alignment; static sint *aln_len; static sint *aln_weight; static char *aln_path1, *aln_path2; static sint alignment_len; static sint **profile1, **profile2; static lint *HH, *DD, *RR, *SS; static lint *gS; static sint matrix[NUMRES][NUMRES]; static sint nseqs1, nseqs2; static sint prf_length1, prf_length2; static sint *gaps; static sint gapcoef1, gapcoef2; static sint lencoef1, lencoef2; static Boolean switch_profiles; lint prfalign_mpi_pdiff(sint * group, sint * aligned) { static Boolean found; static Boolean negative; static Boolean error_given = FALSE; static sint i, j, count = 0; static sint NumSeq; static sint len, len1, len2, is, minlen; static sint se1, se2, sb1, sb2; static sint maxres; static sint int_scale; static short *matptr; static short *mat_xref; static char c; static lint score; static float scale; static double logmin, logdiff; static double pcid; int np,position; char *mpi_buffer; int mybsize; MPI_Status status; int arg1, arg2, arg3, arg4, arg5, arg6; int *pa; int *avail_p; /************** DEBUG DEBUG ************************ fprintf(stderr,"Calling prfalign_mpi_pdiff()...\n"); fflush(stderr); ************** DEBUG DEBUG ************************/ alignment = (char **) ckalloc(nseqs * sizeof(char *)); aln_len = (sint *) ckalloc(nseqs * sizeof(sint)); aln_weight = (sint *) ckalloc(nseqs * sizeof(sint)); for (i = 0; i < nseqs; i++) if (aligned[i + 1] == 0) group[i + 1] = 0; nseqs1 = nseqs2 = 0; for (i = 0; i < nseqs; i++) { if (group[i + 1] == 1) nseqs1++; else if (group[i + 1] == 2) nseqs2++; } if ((nseqs1 == 0) || (nseqs2 == 0)) return (0.0); if (nseqs2 > nseqs1) { switch_profiles = TRUE; for (i = 0; i < nseqs; i++) { if (group[i + 1] == 1) group[i + 1] = 2; else if (group[i + 1] == 2) group[i + 1] = 1; } } else switch_profiles = FALSE; int_scale = 100; /* calculate the mean of the sequence pc identities between the two groups */ count = 0; pcid = 0.0; negative = neg_matrix; for (i = 0; i < nseqs; i++) { if (group[i + 1] == 1) for (j = 0; j < nseqs; j++) if (group[j + 1] == 2) { count++; pcid += tmat[i + 1][j + 1]; } } pcid = pcid / (float) count; if (debug > 0) fprintf(stdout, "mean tmat %3.1f\n", pcid); /* Make the first profile. */ prf_length1 = 0; for (i = 0; i < nseqs; i++) if (group[i + 1] == 1) if (seqlen_array[i + 1] > prf_length1) { prf_length1 = seqlen_array[i + 1]; } nseqs1 = 0; if (debug > 0) fprintf(stdout, "sequences profile 1:\n"); for (i = 0; i < nseqs; i++) { if (group[i + 1] == 1) { if (debug > 0) { extern char **names; fprintf(stdout, "%s\n", names[i + 1]); } len = seqlen_array[i + 1]; alignment[nseqs1] = (char *) ckalloc((prf_length1 + 2) * sizeof(char)); for (j = 0; j < len; j++) alignment[nseqs1][j] = seq_array[i + 1][j + 1]; for (j = len; j < prf_length1; j++) alignment[nseqs1][j + 1] = gap_pos1; alignment[nseqs1][prf_length1 + 1] = ENDALN; aln_len[nseqs1] = prf_length1; aln_weight[nseqs1] = seq_weight[i]; nseqs1++; } } /* Make the second profile. */ prf_length2 = 0; for (i = 0; i < nseqs; i++) if (group[i + 1] == 2) if (seqlen_array[i + 1] > prf_length2) prf_length2 = seqlen_array[i + 1]; nseqs2 = 0; if (debug > 0) fprintf(stdout, "sequences profile 2:\n"); for (i = 0; i < nseqs; i++) { if (group[i + 1] == 2) { if (debug > 0) { extern char **names; fprintf(stdout, "%s\n", names[i + 1]); } len = seqlen_array[i + 1]; alignment[nseqs1 + nseqs2] = (char *) ckalloc((prf_length2 + 2) * sizeof(char)); for (j = 0; j < len; j++) alignment[nseqs1 + nseqs2][j] = seq_array[i + 1][j + 1]; for (j = len; j < prf_length2; j++) alignment[nseqs1 + nseqs2][j + 1] = gap_pos1; alignment[nseqs1 + nseqs2][j] = ENDALN; aln_len[nseqs1 + nseqs2] = prf_length2; aln_weight[nseqs1 + nseqs2] = seq_weight[i]; nseqs2++; } } max_aln_length = prf_length1 + prf_length2 + 2; /* calculate real length of profiles - removing gaps! */ len1 = 0; for (i = 0; i < nseqs1; i++) { is = 0; for (j = 0; j < MIN(aln_len[i], prf_length1); j++) { c = alignment[i][j]; if ((c != gap_pos1) && (c != gap_pos2)) is++; } len1 += is; } len1 /= (float) nseqs1; len2 = 0; for (i = nseqs1; i < nseqs2 + nseqs1; i++) { is = 0; for (j = 0; j < MIN(aln_len[i], prf_length2); j++) { c = alignment[i][j]; if ((c != gap_pos1) && (c != gap_pos2)) is++; } len2 += is; } len2 /= (float) nseqs2; if (dnaflag) { scale = 1.0; if (strcmp(dnamtrxname, "iub") == 0) { matptr = swgapdnamt; mat_xref = def_dna_xref; } else if (strcmp(dnamtrxname, "clustalw") == 0) { matptr = clustalvdnamt; mat_xref = def_dna_xref; scale = 0.66; } else { matptr = userdnamat; mat_xref = dna_xref; } maxres = get_matrix(matptr, mat_xref, matrix, neg_matrix, int_scale); if (maxres == 0) return ((sint) - 1); /* matrix[0][4]=transition_weight*matrix[0][0]; matrix[4][0]=transition_weight*matrix[0][0]; matrix[2][11]=transition_weight*matrix[0][0]; matrix[11][2]=transition_weight*matrix[0][0]; matrix[2][12]=transition_weight*matrix[0][0]; matrix[12][2]=transition_weight*matrix[0][0]; */ /* fix suggested by Chanan Rubin at Compugen */ matrix[mat_xref[0]][mat_xref[4]] = transition_weight * matrix[0][0]; matrix[mat_xref[4]][mat_xref[0]] = transition_weight * matrix[0][0]; matrix[mat_xref[2]][mat_xref[11]] = transition_weight * matrix[0][0]; matrix[mat_xref[11]][mat_xref[2]] = transition_weight * matrix[0][0]; matrix[mat_xref[2]][mat_xref[12]] = transition_weight * matrix[0][0]; matrix[mat_xref[12]][mat_xref[2]] = transition_weight * matrix[0][0]; gapcoef1 = gapcoef2 = 100.0 * gap_open * scale; lencoef1 = lencoef2 = 100.0 * gap_extend * scale; } else { if (len1 == 0 || len2 == 0) { logmin = 1.0; logdiff = 1.0; } else { minlen = MIN(len1, len2); logmin = 1.0 / log10((double) minlen); if (len2 < len1) logdiff = 1.0 + 0.5 * log10((double) ((float) len2 / (float) len1)); else if (len1 < len2) logdiff = 1.0 + 0.5 * log10((double) ((float) len1 / (float) len2)); else logdiff = 1.0; if (logdiff < 0.9) logdiff = 0.9; } if (debug > 0) fprintf(stdout, "%d %d logmin %f logdiff %f\n", (pint) len1, (pint) len2, logmin, logdiff); scale = 0.75; if (strcmp(mtrxname, "blosum") == 0) { scale = 0.75; if (negative || distance_tree == FALSE) matptr = blosum40mt; else if (pcid > 80.0) { matptr = blosum80mt; } else if (pcid > 60.0) { matptr = blosum62mt2; } else if (pcid > 40.0) { matptr = blosum45mt; } else if (pcid > 30.0) { scale = 0.5; matptr = blosum45mt; } else if (pcid > 20.0) { scale = 0.6; matptr = blosum45mt; } else { scale = 0.6; matptr = blosum30mt; } mat_xref = def_aa_xref; } else if (strcmp(mtrxname, "pam") == 0) { scale = 0.75; if (negative || distance_tree == FALSE) matptr = pam120mt; else if (pcid > 80.0) matptr = pam20mt; else if (pcid > 60.0) matptr = pam60mt; else if (pcid > 40.0) matptr = pam120mt; else matptr = pam350mt; mat_xref = def_aa_xref; } else if (strcmp(mtrxname, "gonnet") == 0) { scale /= 2.0; if (negative || distance_tree == FALSE) matptr = gon250mt; else if (pcid > 35.0) { matptr = gon80mt; scale /= 2.0; } else if (pcid > 25.0) { if (minlen < 100) matptr = gon250mt; else matptr = gon120mt; } else { if (minlen < 100) matptr = gon350mt; else matptr = gon160mt; } mat_xref = def_aa_xref; int_scale /= 10; } else if (strcmp(mtrxname, "id") == 0) { matptr = idmat; mat_xref = def_aa_xref; } else if (user_series) { matptr = NULL; found = FALSE; for (i = 0; i < matseries.nmat; i++) if (pcid >= matseries.mat[i].llimit && pcid <= matseries.mat[i].ulimit) { j = i; found = TRUE; break; } if (found == FALSE) { if (!error_given) warning ("\nSeries matrix not found for sequence percent identity = %d.\n" "(Using first matrix in series as a default.)\n" "This alignment may not be optimal!\n" "SUGGESTION: Check your matrix series input file and try again.", (int) pcid); error_given = TRUE; j = 0; } if (debug > 0) fprintf(stdout, "pcid %d matrix %d\n", (pint) pcid, (pint) j + 1); matptr = matseries.mat[j].matptr; mat_xref = matseries.mat[j].aa_xref; /* this gives a scale of 0.5 for pcid=llimit and 1.0 for pcid=ulimit */ scale = 0.5 + (pcid - matseries.mat[j].llimit) / ((matseries.mat[j].ulimit - matseries.mat[j].llimit) * 2.0); } else { matptr = usermat; mat_xref = aa_xref; } if (debug > 0) fprintf(stdout, "pcid %3.1f scale %3.1f\n", pcid, scale); maxres = get_matrix(matptr, mat_xref, matrix, negative, int_scale); if (maxres == 0) { fprintf(stdout, "Error: matrix %s not found\n", mtrxname); return (-1); } if (negative) { gapcoef1 = gapcoef2 = 100.0 * (float) (gap_open); lencoef1 = lencoef2 = 100.0 * gap_extend; } else { if (mat_avscore <= 0) gapcoef1 = gapcoef2 = 100.0 * (float) (gap_open + logmin); else gapcoef1 = gapcoef2 = scale * mat_avscore * (float) (gap_open / (logdiff * logmin)); lencoef1 = lencoef2 = 100.0 * gap_extend; } } if (debug > 0) { fprintf(stdout, "matavscore %d\n", mat_avscore); fprintf(stdout, "Gap Open1 %d Gap Open2 %d Gap Extend1 %d Gap Extend2 %d\n", (pint) gapcoef1, (pint) gapcoef2, (pint) lencoef1, (pint) lencoef2); fprintf(stdout, "Matrix %s\n", mtrxname); } profile1 = (sint **) ckalloc((prf_length1 + 2) * sizeof(sint *)); for (i = 0; i < prf_length1 + 2; i++) profile1[i] = (sint *) ckalloc((LENCOL + 2) * sizeof(sint)); profile2 = (sint **) ckalloc((prf_length2 + 2) * sizeof(sint *)); for (i = 0; i < prf_length2 + 2; i++) profile2[i] = (sint *) ckalloc((LENCOL + 2) * sizeof(sint)); /* calculate the Gap Coefficients. */ gaps = (sint *) ckalloc((max_aln_length + 1) * sizeof(sint)); if (switch_profiles == FALSE) calc_gap_coeff(alignment, gaps, profile1, (struct_penalties1 && use_ss1), gap_penalty_mask1, (sint) 0, nseqs1, prf_length1, gapcoef1, lencoef1); else calc_gap_coeff(alignment, gaps, profile1, (struct_penalties2 && use_ss2), gap_penalty_mask2, (sint) 0, nseqs1, prf_length1, gapcoef1, lencoef1); /* calculate the profile matrix. */ calc_prf1(profile1, alignment, gaps, matrix, aln_weight, prf_length1, (sint) 0, nseqs1); if (debug > 4) { extern char *amino_acid_codes; for (j = 0; j <= max_aa; j++) fprintf(stdout, "%c ", amino_acid_codes[j]); fprintf(stdout, "\n"); for (i = 0; i < prf_length1; i++) { for (j = 0; j <= max_aa; j++) fprintf(stdout, "%d ", (pint) profile1[i + 1][j]); fprintf(stdout, "%d ", (pint) profile1[i + 1][gap_pos1]); fprintf(stdout, "%d ", (pint) profile1[i + 1][gap_pos2]); fprintf(stdout, "%d %d\n", (pint) profile1[i + 1][GAPCOL], (pint) profile1[i + 1][LENCOL]); } } /* calculate the Gap Coefficients. */ if (switch_profiles == FALSE) calc_gap_coeff(alignment, gaps, profile2, (struct_penalties2 && use_ss2), gap_penalty_mask2, nseqs1, nseqs1 + nseqs2, prf_length2, gapcoef2, lencoef2); else calc_gap_coeff(alignment, gaps, profile2, (struct_penalties1 && use_ss1), gap_penalty_mask1, nseqs1, nseqs1 + nseqs2, prf_length2, gapcoef2, lencoef2); /* calculate the profile matrix. */ calc_prf2(profile2, alignment, aln_weight, prf_length2, nseqs1, nseqs1 + nseqs2); aln_weight = ckfree((void *) aln_weight); if (debug > 4) { extern char *amino_acid_codes; for (j = 0; j <= max_aa; j++) fprintf(stdout, "%c ", amino_acid_codes[j]); fprintf(stdout, "\n"); for (i = 0; i < prf_length2; i++) { for (j = 0; j <= max_aa; j++) fprintf(stdout, "%d ", (pint) profile2[i + 1][j]); fprintf(stdout, "%d ", (pint) profile2[i + 1][gap_pos1]); fprintf(stdout, "%d ", (pint) profile2[i + 1][gap_pos2]); fprintf(stdout, "%d %d\n", (pint) profile2[i + 1][GAPCOL], (pint) profile2[i + 1][LENCOL]); } } aln_path1 = (char *) ckalloc((max_aln_length + 1) * sizeof(char)); aln_path2 = (char *) ckalloc((max_aln_length + 1) * sizeof(char)); /* align the profiles */ /* use Myers and Miller to align two sequences */ last_print = 0; print_ptr = 1; sb1 = sb2 = 0; se1 = prf_length1; se2 = prf_length2; displ = (sint *) ckalloc((max_aln_length + 1) * sizeof(sint)); MPI_Comm_size(MPI_COMM_WORLD, &np); if (np < 2) { /* we'll run pdiff() sequentially. */ HH = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) ); DD = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) ); RR = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) ); SS = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) ); gS = (lint *) ckalloc( (max_aln_length+1) * sizeof (lint) ); score = pdiff(sb1, sb2, se1-sb1, se2-sb2, profile1[0][GAPCOL], profile1[prf_length1][GAPCOL]); HH=ckfree((void *)HH); DD=ckfree((void *)DD); RR=ckfree((void *)RR); SS=ckfree((void *)SS); gS=ckfree((void *)gS); } else { /* we'll run pdiff() in parallel. */ /********** Starting MPI related stuff *****************/ /*******************************************************/ /* * We have (np-1) available processes to run pdiff(). Note * that process whose rank = 0 does not participate in * the following computation. */ avail_p = (int *)calloc(np, sizeof(int)); assert(avail_p); /* * avail_p[0] = n: there are n available processes (excluding the one * whose rank equal to 0) * avail_p[1] = 1: the MPI rank of the first available process is 1. * avail_p[2] = 2: the MPI rank of the second available process is 2. */ avail_p[0] = (np-1); for (i=1;i 1) fprintf(stdout, "%d ", (pint) displ[i]); if (displ[i] == 0) { aln_path1[pos] = 2; aln_path2[pos] = 2; ++pos; } else { if ((k = displ[i]) > 0) { for (j = 0; j <= k - 1; ++j) { aln_path2[pos + j] = 2; aln_path1[pos + j] = 1; } pos += k; } else { k = (displ[i] < 0) ? displ[i] * -1 : displ[i]; for (j = 0; j <= k - 1; ++j) { aln_path1[pos + j] = 2; aln_path2[pos + j] = 1; } pos += k; } } } if (debug > 1) fprintf(stdout, "\n"); (*alen) = pos; } static void add_ggaps(void) { sint j; sint i, ix; sint len; char *ta; ta = (char *) ckalloc((alignment_len + 1) * sizeof(char)); for (j = 0; j < nseqs1; j++) { ix = 0; for (i = 0; i < alignment_len; i++) { if (aln_path1[i] == 2) { if (ix < aln_len[j]) ta[i] = alignment[j][ix]; else ta[i] = ENDALN; ix++; } else if (aln_path1[i] == 1) { /* insertion in first alignment... */ ta[i] = gap_pos1; } else { fprintf(stdout, "Error in aln_path\n"); } } ta[i] = ENDALN; len = alignment_len; alignment[j] = (char *) realloc(alignment[j], (len + 2) * sizeof(char)); for (i = 0; i < len; i++) alignment[j][i] = ta[i]; alignment[j][i] = ENDALN; aln_len[j] = len; } for (j = nseqs1; j < nseqs1 + nseqs2; j++) { ix = 0; for (i = 0; i < alignment_len; i++) { if (aln_path2[i] == 2) { if (ix < aln_len[j]) ta[i] = alignment[j][ix]; else ta[i] = ENDALN; ix++; } else if (aln_path2[i] == 1) { /* insertion in second alignment... */ ta[i] = gap_pos1; } else { fprintf(stdout, "Error in aln_path\n"); } } ta[i] = ENDALN; len = alignment_len; alignment[j] = (char *) realloc(alignment[j], (len + 2) * sizeof(char)); for (i = 0; i < len; i++) alignment[j][i] = ta[i]; alignment[j][i] = ENDALN; aln_len[j] = len; } ta = ckfree((void *) ta); if (struct_penalties1 != NONE) gap_penalty_mask1 = add_ggaps_mask(gap_penalty_mask1, alignment_len, aln_path1, aln_path2); if (struct_penalties1 == SECST) sec_struct_mask1 = add_ggaps_mask(sec_struct_mask1, alignment_len, aln_path1, aln_path2); if (struct_penalties2 != NONE) gap_penalty_mask2 = add_ggaps_mask(gap_penalty_mask2, alignment_len, aln_path2, aln_path1); if (struct_penalties2 == SECST) sec_struct_mask2 = add_ggaps_mask(sec_struct_mask2, alignment_len, aln_path2, aln_path1); if (debug > 0) { char c; extern char *amino_acid_codes; for (i = 0; i < nseqs1 + nseqs2; i++) { for (j = 0; j < alignment_len; j++) { if (alignment[i][j] == ENDALN) break; else if ((alignment[i][j] == gap_pos1) || (alignment[i][j] == gap_pos2)) c = '-'; else c = amino_acid_codes[alignment[i][j]]; fprintf(stdout, "%c", c); } fprintf(stdout, "\n\n"); } } } static lint prfscore(sint n, sint m) { sint ix; lint score; score = 0.0; for (ix=0; ix<=max_aa; ix++) { score += (profile1[n][ix] * profile2[m][ix]); } score += (profile1[n][gap_pos1] * profile2[m][gap_pos1]); score += (profile1[n][gap_pos2] * profile2[m][gap_pos2]); return(score/10); } static void pdel(sint k) { if(last_print<0) last_print = displ[print_ptr-1] -= k; else last_print = displ[print_ptr++] = -(k); } static void padd(sint k) { if(last_print<0) { displ[print_ptr-1] = k; displ[print_ptr++] = last_print; } else last_print = displ[print_ptr++] = k; } static void palign(void) { displ[print_ptr++] = last_print = 0; } static lint pdiff(sint A,sint B,sint M,sint N,sint go1, sint go2) { sint midi,midj,type; lint midh; static lint t, tl, g, h; { static sint i,j; static lint hh, f, e, s; /* Boundary cases: M <= 1 or N == 0 */ if (debug>2) fprintf(stdout,"A %d B %d M %d N %d midi %d go1 %d go2 %d\n", (pint)A,(pint)B,(pint)M,(pint)N,(pint)M/2,(pint)go1,(pint)go2); /* if sequence B is empty.... */ if(N<=0) { /* if sequence A is not empty.... */ if(M>0) { /* delete residues A[1] to A[M] */ pdel(M); } return(-gap_penalty1(A,B,M)); } /* if sequence A is empty.... */ if(M<=1) { if(M<=0) { /* insert residues B[1] to B[N] */ padd(N); return(-gap_penalty2(A,B,N)); } /* if sequence A has just one residue.... */ if (go1 == 0) midh = -gap_penalty1(A+1,B+1,N); else midh = -gap_penalty2(A+1,B,1)-gap_penalty1(A+1,B+1,N); midj = 0; for(j=1;j<=N;j++) { hh = -gap_penalty1(A,B+1,j-1) + prfscore(A+1,B+j) -gap_penalty1(A+1,B+j+1,N-j); if(hh>midh) { midh = hh; midj = j; } } if(midj==0) { padd(N); pdel(1); } else { if(midj>1) padd(midj-1); palign(); if(midj (f=f-h)) f=hh; g = open_penalty2(A+i,B+j); h = ext_penalty2(A+i,B+j); if ((hh=HH[j]-g-h) > (e=DD[j]-h)) e=hh; hh = s + prfscore(A+i, B+j); if (f>hh) hh = f; if (e>hh) hh = e; s = HH[j]; HH[j] = hh; DD[j] = e; } } DD[0]=HH[0]; /* In a reverse phase, calculate all RR[j] and SS[j] */ RR[N]=0.0; tl = 0.0; for(j=N-1;j>=0;j--) { g = -open_penalty1(A+M,B+j+1); tl -= ext_penalty1(A+M,B+j+1); RR[j] = g+tl; SS[j] = RR[j]-open_penalty2(A+M,B+j); gS[j] = open_penalty2(A+M,B+j); } tl = 0.0; for(i=M-1;i>=midi;i--) { s = RR[N]; if (go2 == 0) g = 0; else g = -open_penalty2(A+i+1,B+N); tl -= ext_penalty2(A+i+1,B+N); RR[N] = hh = g+tl; t = open_penalty1(A+i,B+N); f = RR[N]-t; for(j=N-1;j>=0;j--) { g = open_penalty1(A+i,B+j+1); h = ext_penalty1(A+i,B+j+1); if ((hh=hh-g-h) > (f=f-h-g+t)) f=hh; t = g; g = open_penalty2(A+i+1,B+j); h = ext_penalty2(A+i+1,B+j); hh=RR[j]-g-h; if (i==(M-1)) { e=SS[j]-h; } else { e=SS[j]-h-g+open_penalty2(A+i+2,B+j); gS[j] = g; } if (hh > e) e=hh; hh = s + prfscore(A+i+1, B+j+1); if (f>hh) hh = f; if (e>hh) hh = e; s = RR[j]; RR[j] = hh; SS[j] = e; } } SS[N]=RR[N]; gS[N] = open_penalty2(A+midi+1,B+N); /* find midj, such that HH[j]+RR[j] or DD[j]+SS[j]+gap is the maximum */ midh=HH[0]+RR[0]; midj=0; type=1; for(j=0;j<=N;j++) { hh = HH[j] + RR[j]; if(hh>=midh) if(hh>midh || (HH[j]!=DD[j] && RR[j]==SS[j])) { midh=hh; midj=j; } } for(j=N;j>=0;j--) { hh = DD[j] + SS[j] + gS[j]; if(hh>midh) { midh=hh; midj=j; type=2; } } } /* Conquer recursively around midpoint */ if(type==1) { /* Type 1 gaps */ pdiff(A,B,midi,midj,go1,1); pdiff(A+midi,B+midj,M-midi,N-midj,1,go2); } else { pdiff(A,B,midi-1,midj,go1, 0); pdel(2); pdiff(A+midi+1,B+midj,M-midi-1,N-midj,0,go2); } return midh; /* Return the score of the best alignment */ } /* calculate the score for opening a gap at residues A[i] and B[j] */ static sint open_penalty1(sint i, sint j) { sint g; if (!endgappenalties &&(i==0 || i==prf_length1)) return(0); g = profile2[j][GAPCOL] + profile1[i][GAPCOL]; return(g); } /* calculate the score for extending an existing gap at A[i] and B[j] */ static sint ext_penalty1(sint i, sint j) { sint h; if (!endgappenalties &&(i==0 || i==prf_length1)) return(0); h = profile2[j][LENCOL]; return(h); } /* calculate the score for a gap of length k, at residues A[i] and B[j] */ static sint gap_penalty1(sint i, sint j, sint k) { sint ix; sint gp; sint g, h = 0; if (k <= 0) return(0); if (!endgappenalties &&(i==0 || i==prf_length1)) return(0); g = profile2[j][GAPCOL] + profile1[i][GAPCOL]; for (ix=0;ix #include #include #include #include #include "clustalw.h" #include "mpi.h" #define ENDALN 127 #define MAX(a,b) ((a)>(b)?(a):(b)) #define MIN(a,b) ((a)<(b)?(a):(b)) /* * Prototypes */ static lint pdiff(sint A, sint B, sint i, sint j, sint go1, sint go2); static lint prfscore(sint n, sint m); static sint gap_penalty1(sint i, sint j, sint k); static sint open_penalty1(sint i, sint j); static sint ext_penalty1(sint i, sint j); static sint gap_penalty2(sint i, sint j, sint k); static sint open_penalty2(sint i, sint j); static sint ext_penalty2(sint i, sint j); static void padd(sint k); static void pdel(sint k); static void palign(void); static void ptracepath(sint * alen); static void add_ggaps(void); static char *add_ggaps_mask(char *mask, int len, char *path1, char *path2); /* * Global variables */ extern double **tmat; extern float gap_open, gap_extend; extern float transition_weight; extern sint gap_pos1, gap_pos2; extern sint max_aa; extern sint nseqs; extern sint *seqlen_array; extern sint *seq_weight; extern sint debug; extern Boolean neg_matrix; extern sint mat_avscore; extern short blosum30mt[], blosum40mt[], blosum45mt[]; extern short blosum62mt2[], blosum80mt[]; extern short pam20mt[], pam60mt[]; extern short pam120mt[], pam160mt[], pam350mt[]; extern short gon40mt[], gon80mt[]; extern short gon120mt[], gon160mt[], gon250mt[], gon350mt[]; extern short clustalvdnamt[], swgapdnamt[]; extern short idmat[]; extern short usermat[]; extern short userdnamat[]; extern Boolean user_series; extern UserMatSeries matseries; extern short def_dna_xref[], def_aa_xref[], dna_xref[], aa_xref[]; extern sint max_aln_length; extern Boolean distance_tree; extern Boolean dnaflag; extern char mtrxname[]; extern char dnamtrxname[]; extern char **seq_array; extern char *amino_acid_codes; extern char *gap_penalty_mask1, *gap_penalty_mask2; extern char *sec_struct_mask1, *sec_struct_mask2; extern sint struct_penalties1, struct_penalties2; extern Boolean use_ss1, use_ss2; extern Boolean endgappenalties; static sint print_ptr, last_print; static sint *displ; static char **alignment; static sint *aln_len; static sint *aln_weight; static char *aln_path1, *aln_path2; static sint alignment_len; static sint **profile1, **profile2; static lint *HH, *DD, *RR, *SS; static lint *gS; static sint matrix[NUMRES][NUMRES]; static sint nseqs1, nseqs2; static sint prf_length1, prf_length2; static sint *gaps; static sint gapcoef1, gapcoef2; static sint lencoef1, lencoef2; static Boolean switch_profiles; /* * group[] is from the array sets[set] in malign.c, * aligned is from the array aligned[] in malign.c, * which_set is from the variable "set" in malign.c, * dest is the MPI rank of the destination process. * * Note: return 1 if ok; return 0 if the alignment is skipped * since either nseqs1 or nseqs2 is zero; return -1 on error. * * In order not to change the contents of group[], I pass the * array as group2[] and make a copy of it to group[]. * * * Note: "reverse_rank" is the rank of the MPI process which is going to * execute preverse_pass(). */ lint prf_init(sint *group2, sint * aligned, int which_set, int dest, int reverse_rank) { static Boolean found; static Boolean negative; static Boolean error_given = FALSE; static sint i, j, count = 0; static sint NumSeq; static sint len, len1, len2, is, minlen; static sint se1, se2, sb1, sb2; static sint maxres; static sint int_scale; static short *matptr; static short *mat_xref; static char c; static lint score; static float scale; static double logmin, logdiff; static double pcid; int mybsize, position; char *mpi_buffer; sint *group; alignment = (char **) ckalloc(nseqs * sizeof(char *)); aln_len = (sint *) ckalloc(nseqs * sizeof(sint)); aln_weight = (sint *) ckalloc(nseqs * sizeof(sint)); group = (sint *)calloc((nseqs+1),sizeof(sint)); assert(group); for (i=0;i<(nseqs+1);i++) group[i] = group2[i]; for (i = 0; i < nseqs; i++) if (aligned[i + 1] == 0) group[i + 1] = 0; nseqs1 = nseqs2 = 0; for (i = 0; i < nseqs; i++) { if (group[i + 1] == 1) nseqs1++; else if (group[i + 1] == 2) nseqs2++; } if ((nseqs1 == 0) || (nseqs2 == 0)) return (0); if (nseqs2 > nseqs1) { switch_profiles = TRUE; for (i = 0; i < nseqs; i++) { if (group[i + 1] == 1) group[i + 1] = 2; else if (group[i + 1] == 2) group[i + 1] = 1; } } else switch_profiles = FALSE; int_scale = 100; /* calculate the mean of the sequence pc identities between the two groups */ count = 0; pcid = 0.0; negative = neg_matrix; for (i = 0; i < nseqs; i++) { if (group[i + 1] == 1) for (j = 0; j < nseqs; j++) if (group[j + 1] == 2) { count++; pcid += tmat[i + 1][j + 1]; } } pcid = pcid / (float) count; if (debug > 0) fprintf(stdout, "mean tmat %3.1f\n", pcid); /* Make the first profile. */ prf_length1 = 0; for (i = 0; i < nseqs; i++) if (group[i + 1] == 1) if (seqlen_array[i + 1] > prf_length1) prf_length1 = seqlen_array[i + 1]; nseqs1 = 0; if (debug > 0) fprintf(stdout, "sequences profile 1:\n"); for (i = 0; i < nseqs; i++) { if (group[i + 1] == 1) { if (debug > 0) { extern char **names; fprintf(stdout, "%s\n", names[i + 1]); } len = seqlen_array[i + 1]; alignment[nseqs1] = (char *) ckalloc((prf_length1 + 2) * sizeof(char)); for (j = 0; j < len; j++) alignment[nseqs1][j] = seq_array[i + 1][j + 1]; for (j = len; j < prf_length1; j++) alignment[nseqs1][j + 1] = gap_pos1; alignment[nseqs1][prf_length1 + 1] = ENDALN; aln_len[nseqs1] = prf_length1; aln_weight[nseqs1] = seq_weight[i]; nseqs1++; } } /* Make the second profile. */ prf_length2 = 0; for (i = 0; i < nseqs; i++) if (group[i + 1] == 2) if (seqlen_array[i + 1] > prf_length2) prf_length2 = seqlen_array[i + 1]; nseqs2 = 0; if (debug > 0) fprintf(stdout, "sequences profile 2:\n"); for (i = 0; i < nseqs; i++) { if (group[i + 1] == 2) { if (debug > 0) { extern char **names; fprintf(stdout, "%s\n", names[i + 1]); } len = seqlen_array[i + 1]; alignment[nseqs1 + nseqs2] = (char *) ckalloc((prf_length2 + 2) * sizeof(char)); for (j = 0; j < len; j++) alignment[nseqs1 + nseqs2][j] = seq_array[i + 1][j + 1]; for (j = len; j < prf_length2; j++) alignment[nseqs1 + nseqs2][j + 1] = gap_pos1; alignment[nseqs1 + nseqs2][j] = ENDALN; aln_len[nseqs1 + nseqs2] = prf_length2; aln_weight[nseqs1 + nseqs2] = seq_weight[i]; nseqs2++; } } max_aln_length = prf_length1 + prf_length2 + 2; /* calculate real length of profiles - removing gaps! */ len1 = 0; for (i = 0; i < nseqs1; i++) { is = 0; for (j = 0; j < MIN(aln_len[i], prf_length1); j++) { c = alignment[i][j]; if ((c != gap_pos1) && (c != gap_pos2)) is++; } len1 += is; } len1 /= (float) nseqs1; len2 = 0; for (i = nseqs1; i < nseqs2 + nseqs1; i++) { is = 0; for (j = 0; j < MIN(aln_len[i], prf_length2); j++) { c = alignment[i][j]; if ((c != gap_pos1) && (c != gap_pos2)) is++; } len2 += is; } len2 /= (float) nseqs2; if (dnaflag) { scale = 1.0; if (strcmp(dnamtrxname, "iub") == 0) { matptr = swgapdnamt; mat_xref = def_dna_xref; } else if (strcmp(dnamtrxname, "clustalw") == 0) { matptr = clustalvdnamt; mat_xref = def_dna_xref; scale = 0.66; } else { matptr = userdnamat; mat_xref = dna_xref; } maxres = get_matrix(matptr, mat_xref, matrix, neg_matrix, int_scale); if (maxres == 0) return ((sint) - 1); /* matrix[0][4]=transition_weight*matrix[0][0]; matrix[4][0]=transition_weight*matrix[0][0]; matrix[2][11]=transition_weight*matrix[0][0]; matrix[11][2]=transition_weight*matrix[0][0]; matrix[2][12]=transition_weight*matrix[0][0]; matrix[12][2]=transition_weight*matrix[0][0]; */ /* fix suggested by Chanan Rubin at Compugen */ matrix[mat_xref[0]][mat_xref[4]] = transition_weight * matrix[0][0]; matrix[mat_xref[4]][mat_xref[0]] = transition_weight * matrix[0][0]; matrix[mat_xref[2]][mat_xref[11]] = transition_weight * matrix[0][0]; matrix[mat_xref[11]][mat_xref[2]] = transition_weight * matrix[0][0]; matrix[mat_xref[2]][mat_xref[12]] = transition_weight * matrix[0][0]; matrix[mat_xref[12]][mat_xref[2]] = transition_weight * matrix[0][0]; gapcoef1 = gapcoef2 = 100.0 * gap_open * scale; lencoef1 = lencoef2 = 100.0 * gap_extend * scale; } else { if (len1 == 0 || len2 == 0) { logmin = 1.0; logdiff = 1.0; } else { minlen = MIN(len1, len2); logmin = 1.0 / log10((double) minlen); if (len2 < len1) logdiff = 1.0 + 0.5 * log10((double) ((float) len2 / (float) len1)); else if (len1 < len2) logdiff = 1.0 + 0.5 * log10((double) ((float) len1 / (float) len2)); else logdiff = 1.0; if (logdiff < 0.9) logdiff = 0.9; } if (debug > 0) fprintf(stdout, "%d %d logmin %f logdiff %f\n", (pint) len1, (pint) len2, logmin, logdiff); scale = 0.75; if (strcmp(mtrxname, "blosum") == 0) { scale = 0.75; if (negative || distance_tree == FALSE) matptr = blosum40mt; else if (pcid > 80.0) { matptr = blosum80mt; } else if (pcid > 60.0) { matptr = blosum62mt2; } else if (pcid > 40.0) { matptr = blosum45mt; } else if (pcid > 30.0) { scale = 0.5; matptr = blosum45mt; } else if (pcid > 20.0) { scale = 0.6; matptr = blosum45mt; } else { scale = 0.6; matptr = blosum30mt; } mat_xref = def_aa_xref; } else if (strcmp(mtrxname, "pam") == 0) { scale = 0.75; if (negative || distance_tree == FALSE) matptr = pam120mt; else if (pcid > 80.0) matptr = pam20mt; else if (pcid > 60.0) matptr = pam60mt; else if (pcid > 40.0) matptr = pam120mt; else matptr = pam350mt; mat_xref = def_aa_xref; } else if (strcmp(mtrxname, "gonnet") == 0) { scale /= 2.0; if (negative || distance_tree == FALSE) matptr = gon250mt; else if (pcid > 35.0) { matptr = gon80mt; scale /= 2.0; } else if (pcid > 25.0) { if (minlen < 100) matptr = gon250mt; else matptr = gon120mt; } else { if (minlen < 100) matptr = gon350mt; else matptr = gon160mt; } mat_xref = def_aa_xref; int_scale /= 10; } else if (strcmp(mtrxname, "id") == 0) { matptr = idmat; mat_xref = def_aa_xref; } else if (user_series) { matptr = NULL; found = FALSE; for (i = 0; i < matseries.nmat; i++) if (pcid >= matseries.mat[i].llimit && pcid <= matseries.mat[i].ulimit) { j = i; found = TRUE; break; } if (found == FALSE) { if (!error_given) warning ("\nSeries matrix not found for sequence percent identity = %d.\n" "(Using first matrix in series as a default.)\n" "This alignment may not be optimal!\n" "SUGGESTION: Check your matrix series input file and try again.", (int) pcid); error_given = TRUE; j = 0; } if (debug > 0) fprintf(stdout, "pcid %d matrix %d\n", (pint) pcid, (pint) j + 1); matptr = matseries.mat[j].matptr; mat_xref = matseries.mat[j].aa_xref; /* this gives a scale of 0.5 for pcid=llimit and 1.0 for pcid=ulimit */ scale = 0.5 + (pcid - matseries.mat[j].llimit) / ((matseries.mat[j].ulimit - matseries.mat[j].llimit) * 2.0); } else { matptr = usermat; mat_xref = aa_xref; } if (debug > 0) fprintf(stdout, "pcid %3.1f scale %3.1f\n", pcid, scale); maxres = get_matrix(matptr, mat_xref, matrix, negative, int_scale); if (maxres == 0) { fprintf(stdout, "Error: matrix %s not found\n", mtrxname); return (-1); } if (negative) { gapcoef1 = gapcoef2 = 100.0 * (float) (gap_open); lencoef1 = lencoef2 = 100.0 * gap_extend; } else { if (mat_avscore <= 0) gapcoef1 = gapcoef2 = 100.0 * (float) (gap_open + logmin); else gapcoef1 = gapcoef2 = scale * mat_avscore * (float) (gap_open / (logdiff * logmin)); lencoef1 = lencoef2 = 100.0 * gap_extend; } } if (debug > 0) { fprintf(stdout, "matavscore %d\n", mat_avscore); fprintf(stdout, "Gap Open1 %d Gap Open2 %d Gap Extend1 %d Gap Extend2 %d\n", (pint) gapcoef1, (pint) gapcoef2, (pint) lencoef1, (pint) lencoef2); fprintf(stdout, "Matrix %s\n", mtrxname); } profile1 = (sint **) ckalloc((prf_length1 + 2) * sizeof(sint *)); for (i = 0; i < prf_length1 + 2; i++) profile1[i] = (sint *) ckalloc((LENCOL + 2) * sizeof(sint)); profile2 = (sint **) ckalloc((prf_length2 + 2) * sizeof(sint *)); for (i = 0; i < prf_length2 + 2; i++) profile2[i] = (sint *) ckalloc((LENCOL + 2) * sizeof(sint)); /* calculate the Gap Coefficients. */ gaps = (sint *) ckalloc((max_aln_length + 1) * sizeof(sint)); if (switch_profiles == FALSE) calc_gap_coeff(alignment, gaps, profile1, (struct_penalties1 && use_ss1), gap_penalty_mask1, (sint) 0, nseqs1, prf_length1, gapcoef1, lencoef1); else calc_gap_coeff(alignment, gaps, profile1, (struct_penalties2 && use_ss2), gap_penalty_mask2, (sint) 0, nseqs1, prf_length1, gapcoef1, lencoef1); /* calculate the profile matrix. */ calc_prf1(profile1, alignment, gaps, matrix, aln_weight, prf_length1, (sint) 0, nseqs1); if (debug > 4) { extern char *amino_acid_codes; for (j = 0; j <= max_aa; j++) fprintf(stdout, "%c ", amino_acid_codes[j]); fprintf(stdout, "\n"); for (i = 0; i < prf_length1; i++) { for (j = 0; j <= max_aa; j++) fprintf(stdout, "%d ", (pint) profile1[i + 1][j]); fprintf(stdout, "%d ", (pint) profile1[i + 1][gap_pos1]); fprintf(stdout, "%d ", (pint) profile1[i + 1][gap_pos2]); fprintf(stdout, "%d %d\n", (pint) profile1[i + 1][GAPCOL], (pint) profile1[i + 1][LENCOL]); } } /* calculate the Gap Coefficients. */ if (switch_profiles == FALSE) calc_gap_coeff(alignment, gaps, profile2, (struct_penalties2 && use_ss2), gap_penalty_mask2, nseqs1, nseqs1 + nseqs2, prf_length2, gapcoef2, lencoef2); else calc_gap_coeff(alignment, gaps, profile2, (struct_penalties1 && use_ss1), gap_penalty_mask1, nseqs1, nseqs1 + nseqs2, prf_length2, gapcoef2, lencoef2); /* calculate the profile matrix. */ calc_prf2(profile2, alignment, aln_weight, prf_length2, nseqs1, nseqs1 + nseqs2); aln_weight = ckfree((void *) aln_weight); if (debug > 4) { extern char *amino_acid_codes; for (j = 0; j <= max_aa; j++) fprintf(stdout, "%c ", amino_acid_codes[j]); fprintf(stdout, "\n"); for (i = 0; i < prf_length2; i++) { for (j = 0; j <= max_aa; j++) fprintf(stdout, "%d ", (pint) profile2[i + 1][j]); fprintf(stdout, "%d ", (pint) profile2[i + 1][gap_pos1]); fprintf(stdout, "%d ", (pint) profile2[i + 1][gap_pos2]); fprintf(stdout, "%d %d\n", (pint) profile2[i + 1][GAPCOL], (pint) profile2[i + 1][LENCOL]); } } /* align the profiles */ /* use Myers and Miller to align two sequences */ last_print = 0; print_ptr = 1; sb1 = sb2 = 0; se1 = prf_length1; se2 = prf_length2; /* Doing MPI_Send stuff here ... */ mybsize = 0; mybsize += 17 * sizeof(int); mybsize += sizeof(char); mybsize += (((LENCOL + 2) * (prf_length1 + 2)) * sizeof(int)); mybsize += (((LENCOL + 2) * (prf_length2 + 2)) * sizeof(int)); mybsize += (nseqs + 1) * sizeof(int); MPI_Send(&mybsize, 1, MPI_INT, dest, CALLING_STUPID, MPI_COMM_WORLD); mpi_buffer = (char *) malloc(mybsize * sizeof(char)); assert(mpi_buffer); position = 0; MPI_Pack(&nseqs, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&which_set, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&sb1, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&sb2, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&se1, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&se2, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&profile1[0][GAPCOL], 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&profile1[prf_length1][GAPCOL], 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&last_print, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&print_ptr, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&max_aln_length, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&max_aa, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&gap_pos1, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&gap_pos2, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&endgappenalties, 1, MPI_CHAR, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&prf_length1, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&prf_length2, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&reverse_rank, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); for (i = 0; i < prf_length1 + 2; i++) MPI_Pack(profile1[i], (LENCOL + 2), MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); for (i = 0; i < prf_length2 + 2; i++) MPI_Pack(profile2[i], (LENCOL + 2), MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(group, (nseqs + 1), MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Send(mpi_buffer, mybsize, MPI_PACKED, dest, MY_DATA_TAG, MPI_COMM_WORLD); free(mpi_buffer); /* * Seems OK to free() profile1[] and profile2[] here. */ for (i = 0; i < prf_length1 + 2; i++) profile1[i] = ckfree((void *) profile1[i]); profile1 = ckfree((void *) profile1); for (i = 0; i < prf_length2 + 2; i++) profile2[i] = ckfree((void *) profile2[i]); profile2 = ckfree((void *) profile2); for (i = 0; i < nseqs1 + nseqs2; i++) alignment[i] = ckfree((void *) alignment[i]); alignment = ckfree((void *) alignment); aln_len = ckfree((void *) aln_len); gaps = ckfree((void *) gaps); free(group); return 1; } /* * To Receive displ[] from MPI slaves and * to update seq_array[][] and seqlen_array[]. * * Note: *from_where is the rank of the sending MPI process, * *which_set is the "set" variable in malign.c. */ lint prf_update(int *from_where, int *preverse_rank, int *which_set) { int mybsize, position; char *mpi_buffer; MPI_Status status; sint *group; int score, i, j; int NumSeq, len; /* Doing MPI_Recv stuff here ... */ MPI_Recv(&mybsize, 1, MPI_INT, MPI_ANY_SOURCE, MY_BSIZE_TAG, MPI_COMM_WORLD, &status); mpi_buffer = (char *) malloc(mybsize * sizeof(char)); assert(mpi_buffer); *from_where = status.MPI_SOURCE; MPI_Recv(mpi_buffer, mybsize, MPI_PACKED, status.MPI_SOURCE, MY_RESULT_TAG, MPI_COMM_WORLD, &status); position = 0; MPI_Unpack(mpi_buffer, mybsize, &position, &score, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &print_ptr, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &last_print, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &prf_length1, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &prf_length2, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, &max_aln_length, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, which_set, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, preverse_rank, 1, MPI_INT, MPI_COMM_WORLD); #ifdef DEBUG { int rank; MPI_Comm_rank(MPI_COMM_WORLD,&rank); fprintf(stderr,"DEBUG: line 712, rank %d, preverse_rank = %d\n", rank,*preverse_rank); fflush(stderr); } #endif displ = (sint *) ckalloc((max_aln_length + 1) * sizeof(sint)); MPI_Unpack(mpi_buffer, mybsize, &position, displ, (max_aln_length + 1), MPI_INT, MPI_COMM_WORLD); group = (sint *) ckalloc((nseqs + 1) * sizeof(sint)); MPI_Unpack(mpi_buffer, mybsize, &position, group, (nseqs + 1), MPI_INT, MPI_COMM_WORLD); free(mpi_buffer); aln_path1 = (char *) ckalloc((max_aln_length + 1) * sizeof(char)); aln_path2 = (char *) ckalloc((max_aln_length + 1) * sizeof(char)); /* * This alignment[][] array was used to be initialized in * prf_init(). It has to be re-initialized here for the * specific "group". */ alignment = (char **) ckalloc(nseqs * sizeof(char *)); aln_len = (sint *) ckalloc(nseqs * sizeof(sint)); nseqs1 = 0; for (i = 0; i < nseqs; i++) { if (group[i + 1] == 1) { len = seqlen_array[i + 1]; alignment[nseqs1] = (char *) ckalloc((prf_length1 + 2) * sizeof(char)); for (j = 0; j < len; j++) alignment[nseqs1][j] = seq_array[i + 1][j + 1]; for (j = len; j < prf_length1; j++) alignment[nseqs1][j + 1] = gap_pos1; alignment[nseqs1][prf_length1 + 1] = ENDALN; aln_len[nseqs1] = prf_length1; nseqs1++; } } nseqs2 = 0; for (i = 0; i < nseqs; i++) { if (group[i + 1] == 2) { len = seqlen_array[i + 1]; alignment[nseqs1 + nseqs2] = (char *) ckalloc((prf_length2 + 2) * sizeof(char)); for (j = 0; j < len; j++) alignment[nseqs1 + nseqs2][j] = seq_array[i + 1][j + 1]; for (j = len; j < prf_length2; j++) alignment[nseqs1 + nseqs2][j + 1] = gap_pos1; alignment[nseqs1 + nseqs2][j] = ENDALN; aln_len[nseqs1 + nseqs2] = prf_length2; nseqs2++; } } ptracepath(&alignment_len); displ = ckfree((void *) displ); add_ggaps(); prf_length1 = alignment_len; aln_path1 = ckfree((void *) aln_path1); aln_path2 = ckfree((void *) aln_path2); NumSeq = 0; for (j = 0; j < nseqs; j++) { if (group[j + 1] == 1) { seqlen_array[j + 1] = prf_length1; realloc_seq(j + 1, prf_length1); for (i = 0; i < prf_length1; i++) seq_array[j + 1][i + 1] = alignment[NumSeq][i]; NumSeq++; } } for (j = 0; j < nseqs; j++) { if (group[j + 1] == 2) { seqlen_array[j + 1] = prf_length1; seq_array[j + 1] = (char *) realloc(seq_array[j + 1], (prf_length1 + 2) * sizeof(char)); realloc_seq(j + 1, prf_length1); for (i = 0; i < prf_length1; i++) seq_array[j + 1][i + 1] = alignment[NumSeq][i]; NumSeq++; } } for (i = 0; i < nseqs1 + nseqs2; i++) alignment[i] = ckfree((void *) alignment[i]); alignment = ckfree((void *) alignment); aln_len = ckfree((void *) aln_len); /* gaps = ckfree((void *) gaps); */ group = ckfree((void *) group); #ifdef DEBUG { int rank; MPI_Comm_rank(MPI_COMM_WORLD,&rank); fprintf(stderr,"DEBUG: line 822, rank %d\n", rank); fflush(stderr); } #endif return (score / 100); } static void add_ggaps(void) { sint j; sint i, ix; sint len; char *ta; ta = (char *) ckalloc((alignment_len + 1) * sizeof(char)); for (j = 0; j < nseqs1; j++) { ix = 0; for (i = 0; i < alignment_len; i++) { if (aln_path1[i] == 2) { if (ix < aln_len[j]) ta[i] = alignment[j][ix]; else ta[i] = ENDALN; ix++; } else if (aln_path1[i] == 1) { /* insertion in first alignment... */ ta[i] = gap_pos1; } else { fprintf(stdout, "Error in aln_path\n"); } } ta[i] = ENDALN; len = alignment_len; alignment[j] = (char *) realloc(alignment[j], (len + 2) * sizeof(char)); for (i = 0; i < len; i++) alignment[j][i] = ta[i]; alignment[j][i] = ENDALN; aln_len[j] = len; } for (j = nseqs1; j < nseqs1 + nseqs2; j++) { ix = 0; for (i = 0; i < alignment_len; i++) { if (aln_path2[i] == 2) { if (ix < aln_len[j]) ta[i] = alignment[j][ix]; else ta[i] = ENDALN; ix++; } else if (aln_path2[i] == 1) { /* insertion in second alignment... */ ta[i] = gap_pos1; } else { fprintf(stdout, "Error in aln_path\n"); } } ta[i] = ENDALN; len = alignment_len; alignment[j] = (char *) realloc(alignment[j], (len + 2) * sizeof(char)); for (i = 0; i < len; i++) alignment[j][i] = ta[i]; alignment[j][i] = ENDALN; aln_len[j] = len; } ta = ckfree((void *) ta); if (struct_penalties1 != NONE) gap_penalty_mask1 = add_ggaps_mask(gap_penalty_mask1, alignment_len, aln_path1, aln_path2); if (struct_penalties1 == SECST) sec_struct_mask1 = add_ggaps_mask(sec_struct_mask1, alignment_len, aln_path1, aln_path2); if (struct_penalties2 != NONE) gap_penalty_mask2 = add_ggaps_mask(gap_penalty_mask2, alignment_len, aln_path2, aln_path1); if (struct_penalties2 == SECST) sec_struct_mask2 = add_ggaps_mask(sec_struct_mask2, alignment_len, aln_path2, aln_path1); if (debug > 0) { char c; extern char *amino_acid_codes; for (i = 0; i < nseqs1 + nseqs2; i++) { for (j = 0; j < alignment_len; j++) { if (alignment[i][j] == ENDALN) break; else if ((alignment[i][j] == gap_pos1) || (alignment[i][j] == gap_pos2)) c = '-'; else c = amino_acid_codes[alignment[i][j]]; fprintf(stdout, "%c", c); } fprintf(stdout, "\n\n"); } } } static char *add_ggaps_mask(char *mask, int len, char *path1, char *path2) { int i, ix; char *ta; ta = (char *) ckalloc((len + 1) * sizeof(char)); ix = 0; if (switch_profiles == FALSE) { for (i = 0; i < len; i++) { if (path1[i] == 2) { ta[i] = mask[ix]; ix++; } else if (path1[i] == 1) ta[i] = gap_pos1; } } else { for (i = 0; i < len; i++) { if (path2[i] == 2) { ta[i] = mask[ix]; ix++; } else if (path2[i] == 1) ta[i] = gap_pos1; } } mask = (char *) realloc(mask, (len + 2) * sizeof(char)); for (i = 0; i < len; i++) mask[i] = ta[i]; mask[i] = '\0'; ta = ckfree((void *) ta); return (mask); } static lint prfscore(sint n, sint m) { sint ix; lint score; score = 0.0; for (ix = 0; ix <= max_aa; ix++) { score += (profile1[n][ix] * profile2[m][ix]); } score += (profile1[n][gap_pos1] * profile2[m][gap_pos1]); score += (profile1[n][gap_pos2] * profile2[m][gap_pos2]); return (score / 10); } static void ptracepath(sint * alen) { sint i, j, k, pos, to_do; pos = 0; to_do = print_ptr - 1; for (i = 1; i <= to_do; ++i) { if (debug > 1) fprintf(stdout, "%d ", (pint) displ[i]); if (displ[i] == 0) { aln_path1[pos] = 2; aln_path2[pos] = 2; ++pos; } else { if ((k = displ[i]) > 0) { for (j = 0; j <= k - 1; ++j) { aln_path2[pos + j] = 2; aln_path1[pos + j] = 1; } pos += k; } else { k = (displ[i] < 0) ? displ[i] * -1 : displ[i]; for (j = 0; j <= k - 1; ++j) { aln_path1[pos + j] = 2; aln_path2[pos + j] = 1; } pos += k; } } } if (debug > 1) fprintf(stdout, "\n"); (*alen) = pos; } static void pdel(sint k) { if (last_print < 0) last_print = displ[print_ptr - 1] -= k; else last_print = displ[print_ptr++] = -(k); } static void padd(sint k) { if (last_print < 0) { displ[print_ptr - 1] = k; displ[print_ptr++] = last_print; } else last_print = displ[print_ptr++] = k; } static void palign(void) { displ[print_ptr++] = last_print = 0; } static lint pdiff(sint A, sint B, sint M, sint N, sint go1, sint go2) { sint midi, midj, type; lint midh; lint t, tl, g, h; sint i, j; lint hh, f, e, s; /* Boundary cases: M <= 1 or N == 0 */ if (debug > 2) fprintf(stdout, "A %d B %d M %d N %d midi %d go1 %d go2 %d\n", (pint) A, (pint) B, (pint) M, (pint) N, (pint) M / 2, (pint) go1, (pint) go2); /* if sequence B is empty.... */ if (N <= 0) { /* if sequence A is not empty.... */ if (M > 0) { /* delete residues A[1] to A[M] */ pdel(M); } return (-gap_penalty1(A, B, M)); } /* if sequence A is empty.... */ if (M <= 1) { if (M <= 0) { /* insert residues B[1] to B[N] */ padd(N); return (-gap_penalty2(A, B, N)); } /* if sequence A has just one residue.... */ if (go1 == 0) midh = -gap_penalty1(A + 1, B + 1, N); else midh = -gap_penalty2(A + 1, B, 1) - gap_penalty1(A + 1, B + 1, N); midj = 0; for (j = 1; j <= N; j++) { hh = -gap_penalty1(A, B + 1, j - 1) + prfscore(A + 1, B + j) - gap_penalty1(A + 1, B + j + 1, N - j); if (hh > midh) { midh = hh; midj = j; } } if (midj == 0) { padd(N); pdel(1); } else { if (midj > 1) padd(midj - 1); palign(); if (midj < N) padd(N - midj); } return midh; } /* Divide sequence A in half: midi */ midi = M / 2; /* In a forward phase, calculate all HH[j] and HH[j] */ HH[0] = 0.0; t = -open_penalty1(A, B + 1); tl = -ext_penalty1(A, B + 1); for (j = 1; j <= N; j++) { HH[j] = t = t + tl; DD[j] = t - open_penalty2(A + 1, B + j); } if (go1 == 0) t = 0; else t = -open_penalty2(A + 1, B); tl = -ext_penalty2(A + 1, B); for (i = 1; i <= midi; i++) { s = HH[0]; HH[0] = hh = t = t + tl; f = t - open_penalty1(A + i, B + 1); for (j = 1; j <= N; j++) { g = open_penalty1(A + i, B + j); h = ext_penalty1(A + i, B + j); if ((hh = hh - g - h) > (f = f - h)) f = hh; g = open_penalty2(A + i, B + j); h = ext_penalty2(A + i, B + j); if ((hh = HH[j] - g - h) > (e = DD[j] - h)) e = hh; hh = s + prfscore(A + i, B + j); if (f > hh) hh = f; if (e > hh) hh = e; s = HH[j]; HH[j] = hh; DD[j] = e; } } DD[0] = HH[0]; /* In a reverse phase, calculate all RR[j] and SS[j] */ RR[N] = 0.0; tl = 0.0; for (j = N - 1; j >= 0; j--) { g = -open_penalty1(A + M, B + j + 1); tl -= ext_penalty1(A + M, B + j + 1); RR[j] = g + tl; SS[j] = RR[j] - open_penalty2(A + M, B + j); gS[j] = open_penalty2(A + M, B + j); } tl = 0.0; for (i = M - 1; i >= midi; i--) { s = RR[N]; if (go2 == 0) g = 0; else g = -open_penalty2(A + i + 1, B + N); tl -= ext_penalty2(A + i + 1, B + N); RR[N] = hh = g + tl; t = open_penalty1(A + i, B + N); f = RR[N] - t; for (j = N - 1; j >= 0; j--) { g = open_penalty1(A + i, B + j + 1); h = ext_penalty1(A + i, B + j + 1); if ((hh = hh - g - h) > (f = f - h - g + t)) f = hh; t = g; g = open_penalty2(A + i + 1, B + j); h = ext_penalty2(A + i + 1, B + j); hh = RR[j] - g - h; if (i == (M - 1)) { e = SS[j] - h; } else { e = SS[j] - h - g + open_penalty2(A + i + 2, B + j); gS[j] = g; } if (hh > e) e = hh; hh = s + prfscore(A + i + 1, B + j + 1); if (f > hh) hh = f; if (e > hh) hh = e; s = RR[j]; RR[j] = hh; SS[j] = e; } } SS[N] = RR[N]; gS[N] = open_penalty2(A + midi + 1, B + N); /* find midj, such that HH[j]+RR[j] or DD[j]+SS[j]+gap is the maximum */ midh = HH[0] + RR[0]; midj = 0; type = 1; for (j = 0; j <= N; j++) { hh = HH[j] + RR[j]; if (hh >= midh) if (hh > midh || (HH[j] != DD[j] && RR[j] == SS[j])) { midh = hh; midj = j; } } for (j = N; j >= 0; j--) { hh = DD[j] + SS[j] + gS[j]; if (hh > midh) { midh = hh; midj = j; type = 2; } } /* Conquer recursively around midpoint */ if (type == 1) { /* Type 1 gaps */ if (debug > 2) fprintf(stdout, "Type 1,1: midj %d\n", (pint) midj); pdiff(A, B, midi, midj, go1, 1); if (debug > 2) fprintf(stdout, "Type 1,2: midj %d\n", (pint) midj); pdiff(A + midi, B + midj, M - midi, N - midj, 1, go2); } else { if (debug > 2) fprintf(stdout, "Type 2,1: midj %d\n", (pint) midj); pdiff(A, B, midi - 1, midj, go1, 0); pdel(2); if (debug > 2) fprintf(stdout, "Type 2,2: midj %d\n", (pint) midj); pdiff(A + midi + 1, B + midj, M - midi - 1, N - midj, 0, go2); } return midh; /* Return the score of the best alignment */ } /* calculate the score for opening a gap at residues A[i] and B[j] */ static sint open_penalty1(sint i, sint j) { sint g; if (!endgappenalties && (i == 0 || i == prf_length1)) return (0); g = profile2[j][GAPCOL] + profile1[i][GAPCOL]; return (g); } /* calculate the score for extending an existing gap at A[i] and B[j] */ static sint ext_penalty1(sint i, sint j) { sint h; if (!endgappenalties && (i == 0 || i == prf_length1)) return (0); h = profile2[j][LENCOL]; return (h); } /* calculate the score for a gap of length k, at residues A[i] and B[j] */ static sint gap_penalty1(sint i, sint j, sint k) { sint ix; sint gp; sint g, h = 0; if (k <= 0) return (0); if (!endgappenalties && (i == 0 || i == prf_length1)) return (0); g = profile2[j][GAPCOL] + profile1[i][GAPCOL]; for (ix = 0; ix < k && ix + j < prf_length2; ix++) h = profile2[ix + j][LENCOL]; gp = g + h * k; return (gp); } /* calculate the score for opening a gap at residues A[i] and B[j] */ static sint open_penalty2(sint i, sint j) { sint g; if (!endgappenalties && (j == 0 || j == prf_length2)) return (0); g = profile1[i][GAPCOL] + profile2[j][GAPCOL]; return (g); } /* calculate the score for extending an existing gap at A[i] and B[j] */ static sint ext_penalty2(sint i, sint j) { sint h; if (!endgappenalties && (j == 0 || j == prf_length2)) return (0); h = profile1[i][LENCOL]; return (h); } /* calculate the score for a gap of length k, at residues A[i] and B[j] */ static sint gap_penalty2(sint i, sint j, sint k) { sint ix; sint gp; sint g, h = 0; if (k <= 0) return (0); if (!endgappenalties && (j == 0 || j == prf_length2)) return (0); g = profile1[i][GAPCOL] + profile2[j][GAPCOL]; for (ix = 0; ix < k && ix + i < prf_length1; ix++) h = profile1[ix + i][LENCOL]; gp = g + h * k; return (gp); } clustalw-mpi-0.15/random.c0000644000411000001440000000315607644152540014034 0ustar liusers/* * * Rand.c * * - linear and additive congruential random number generators * (see R. Sedgewick, Algorithms, Chapter 35) * * Implementation: R. Fuchs, EMBL Data Library, 1991 * */ #include unsigned long linrand(unsigned long r); unsigned long addrand(unsigned long r); void addrandinit(unsigned long s); static unsigned long mult(unsigned long p,unsigned long q); #define m1 10000 #define m 100000000 static unsigned long mult(unsigned long p, unsigned long q); /* linear congruential method * * linrand() returns an unsigned long random number in the range 0 to r-1 */ unsigned long linrand(unsigned long r) { static unsigned long a=1234567; a = (mult(a,31415821)+1) % m; return( ( (a / m1) * r) / m1 ); } static unsigned long mult(unsigned long p, unsigned long q) { unsigned long p1,p0,q1,q0; p1 = p/m1; p0 = p % m1; q1 = q/m1; q0 = q % m1; return((((p0*q1 + p1*q0) % m1) * m1 + p0*q0) % m); } /* additive congruential method * * addrand() returns an unsigned long random number in the range 0 to r-1 * The random number generator is initialized by addrandinit() */ static unsigned long j; static unsigned long a[55]; unsigned long addrand(unsigned long r) { int x,y; /* fprintf(stdout,"\n j = %d",j); */ j = (j + 1) % 55; /* fprintf(stdout,"\n j = %d",j); */ x = (j+23)%55; y = (j+54)%55; a[j] = (a[x] + a[y]) % m; /* a[j] = (a[(j+23)%55] + a[(j+54)%55]) % m; */ /* fprintf(stdout,"\n a[j] = %d",a[j]); */ return( ((a[j] / m1) * r) / m1 ); } void addrandinit(unsigned long s) { a[0] = s; j = 0; do { ++j; a[j] = (mult(31,a[j-1]) + 1) % m; } while (j<54); } clustalw-mpi-0.15/readmat.c0000644000411000001440000002442107644152540014167 0ustar liusers#include #include #include #include #include #include "clustalw.h" #include "matrices.h" /* * Prototypes */ static Boolean commentline(char *line); /* * Global variables */ extern char *amino_acid_codes; extern sint gap_pos1, gap_pos2; extern sint max_aa; extern short def_dna_xref[],def_aa_xref[]; extern sint mat_avscore; extern sint debug; extern Boolean dnaflag; extern Boolean user_series; extern UserMatSeries matseries; extern short usermatseries[MAXMAT][NUMRES][NUMRES]; extern short aa_xrefseries[MAXMAT][NUMRES+1]; void init_matrix(void) { char c1,c2; short i, j, maxres; max_aa = strlen(amino_acid_codes)-2; gap_pos1 = NUMRES-2; /* code for gaps inserted by clustalw */ gap_pos2 = NUMRES-1; /* code for gaps already in alignment */ /* set up cross-reference for default matrices hard-coded in matrices.h */ for (i=0;i max) max = matrix[i][j]; } if (debug>1) fprintf(stdout,"maxres %d\n",(pint)max_aa); if (debug>1) fprintf(stdout,"average mismatch score %d\n",(pint)av3); if (debug>1) fprintf(stdout,"average match score %d\n",(pint)av2); if (debug>1) fprintf(stdout,"average score %d\n",(pint)av1); /* if requested, make a positive matrix - add -(lowest score) to every entry */ if (neg_flag == FALSE) { if (debug>1) fprintf(stdout,"min %d max %d\n",(pint)min,(pint)max); if (min < 0) { for (i=0;i<=max_aa;i++) { ti = xref[i]; if (ti != -1) { for (j=0;j<=max_aa;j++) { tj = xref[j]; /* if (tj != -1) matrix[ti][tj] -= (2*av3); */ if (tj != -1) matrix[ti][tj] -= min; } } } } /* gr_score = av3; gg_score = -av3; */ } for (i=0;i 100 || ulimit <0 || ulimit>100) { error("Bad format in file %s\n",filename); fclose(fd); return((sint)0); } if(ulimit<=llimit) { error("in file %s: lower limit is greater than upper (%d-%d)\n",filename,llimit,ulimit); fclose(fd); return((sint)0); } n=read_user_matrix(mat_filename,&usermatseries[nmat][0][0],&aa_xrefseries[nmat][0]); if(n<=0) { error("Bad format in matrix file %s\n",mat_filename); fclose(fd); return((sint)0); } matseries.mat[nmat].llimit=llimit; matseries.mat[nmat].ulimit=ulimit; matseries.mat[nmat].matptr=&usermatseries[nmat][0][0]; matseries.mat[nmat].aa_xref=&aa_xrefseries[nmat][0]; nmat++; } } fclose(fd); matseries.nmat=nmat; maxres=n; return(maxres); } sint read_user_matrix(char *filename, short *usermat, short *xref) { double f; FILE *fd; sint numargs,farg; sint i, j, k = 0; char codes[NUMRES]; char inline1[1024]; char *args[NUMRES+4]; char c1,c2; sint ix1, ix = 0; sint maxres = 0; float scale; if (filename[0] == '\0') { error("comparison matrix not specified"); return((sint)0); } if ((fd=fopen(filename,"r"))==NULL) { error("cannot open %s", filename); return((sint)0); } maxres = 0; while (fgets(inline1,1024,fd) != NULL) { if (commentline(inline1)) continue; if(linetype(inline1,"CLUSTAL_SERIES")) { error("in %s - single matrix expected.", filename); fclose(fd); return((sint)0); } /* read residue characters. */ k = 0; for (j=0;jNUMRES) { error("too many entries in matrix %s",filename); fclose(fd); return((sint)0); } } codes[k] = '\0'; break; } if (k == 0) { error("wrong format in matrix %s",filename); fclose(fd); return((sint)0); } /* cross-reference the residues */ for (i=0;i #include #include #include #include "clustalw.h" #define MIN(a,b) ((a)<(b)?(a):(b)) /* * Prototypes */ static char * get_seq(char *,sint *,char *); static char * get_clustal_seq(char *,sint *,char *,sint); static char * get_msf_seq(char *,sint *,char *,sint); static void check_infile(sint *); static void p_encode(char *, char *, sint); static void n_encode(char *, char *, sint); static sint res_index(char *,char); static Boolean check_dnaflag(char *, sint); static sint count_clustal_seqs(void); static sint count_pir_seqs(void); static sint count_msf_seqs(void); static sint count_rsf_seqs(void); static void get_swiss_feature(char *line,sint len); static void get_rsf_feature(char *line,sint len); static void get_swiss_mask(char *line,sint len); static void get_clustal_ss(sint length); static void get_embl_ss(sint length); static void get_rsf_ss(sint length); static void get_gde_ss(sint length); static Boolean cl_blankline(char *line); /* * Global variables */ extern sint max_names; FILE *fin; extern Boolean usemenu, dnaflag, explicit_dnaflag; extern Boolean interactive; extern char seqname[]; extern sint nseqs; extern sint *seqlen_array; extern sint *output_index; extern char **names,**titles; extern char **seq_array; extern Boolean profile1_empty, profile2_empty; extern sint gap_pos2; extern sint max_aln_length; extern char *gap_penalty_mask, *sec_struct_mask; extern sint struct_penalties; extern char *ss_name; extern sint profile_no; extern sint debug; char *amino_acid_codes = "ABCDEFGHIKLMNPQRSTUVWXYZ-"; /* DES */ static sint seqFormat; static char chartab[128]; static char *formatNames[] = {"unknown","EMBL/Swiss-Prot","PIR", "Pearson","GDE","Clustal","Pileup/MSF","RSF","USER","PHYLIP","NEXUS"}; void fill_chartab(void) /* Create translation and check table */ { register sint i; register char c; for(i=0;i<128;chartab[i++]=0); for(i=0;(c=amino_acid_codes[i]);i++) chartab[(int)c]=chartab[tolower(c)]=c; } static char * get_msf_seq(char *sname,sint *len,char *tit,sint seqno) /* read the seqno_th. sequence from a PILEUP multiple alignment file */ { static char line[MAXLINE+1]; char *seq = NULL; sint i,j,k; unsigned char c; fseek(fin,0,0); /* start at the beginning */ *len=0; /* initialise length to zero */ for(i=0;;i++) { if(fgets(line,MAXLINE+1,fin)==NULL) return NULL; /* read the title*/ if(linetype(line,"//") ) break; /* lines...ignore*/ } while (fgets(line,MAXLINE+1,fin) != NULL) { if(!blankline(line)) { for(i=1;i 0;i--) if(isspace(sname[i])) { sname[i]=EOS; } else break; blank_to_(sname); if (interactive) { strcpy(title,"Found secondary structure in alignment file: "); strcat(title,sname); (*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties "); } else (*lin2) = 'y'; if ((*lin2 != 'n') && (*lin2 != 'N')) { struct_penalties = SECST; for (i=0;i length) break; } strcpy(ss_name,sname); } } /* or is it a gap penalty mask entry? */ else if (strncmp(&line[1],"GM_",3) == 0) { for (i=1;i<=MAXNAMES-3;i++) { if (line[i+3] == '(' || line[i+3] == '\n') break; sname[i-1] = line[i+3]; } i--; sname[i]=EOS; if (sname[i-1] == '(') sscanf(&line[i+3],"%d",&offset); else offset = 0; for(i--;i > 0;i--) if(isspace(sname[i])) { sname[i]=EOS; } else break; blank_to_(sname); if (interactive) { strcpy(title,"Found gap penalty mask in alignment file: "); strcat(title,sname); (*lin2)=prompt_for_yes_no(title,"Use it to set local gap penalties "); } else (*lin2) = 'y'; if ((*lin2 != 'n') && (*lin2 != 'N')) { struct_penalties = GMASK; for (i=0;i length) break; } strcpy(ss_name,sname); } } if (struct_penalties != NONE) break; } } static void get_swiss_feature(char *line, sint len) { char c, s, feature[MAXLINE+1]; int i, start_pos, end_pos; if (sscanf(line,"%s%d%d",feature,&start_pos,&end_pos) != 3) { return; } if (strcmp(feature,"HELIX") == 0) { c = 'A'; s = '$'; } else if (strcmp(feature,"STRAND") == 0) { c = 'B'; s = '%'; } else return; if(start_pos >=len || end_pos>=len) return; sec_struct_mask[start_pos-1] = s; for (i=start_pos;i=len || end_pos >= len) return; sec_struct_mask[start_pos-1] = s; for (i=start_pos;i 9) return; if(start_pos>=len || end_pos >= len) return; for (i=start_pos-1;i 2 && line[strlen(line)-2]=='.' && line[strlen(line)-3]=='.' ) continue; if(seq==NULL) seq=(char *)ckalloc((MAXLINE+2)*sizeof(char)); else seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char)); for(i=0;i<=MAXLINE;i++) { c=line[i]; if(c == '\n' || c == EOS || c == '/') break; /* EOL */ c=chartab[c]; if(c) { got_seq=TRUE; seq[++(*len)]=c; } } if(c == '/') break; } break; /************************************/ case PIR: while(*line != '>') fgets(line,MAXLINE+1,fin); for(i=4;i<=strlen(line);i++) /* DES */ if(line[i] != ' ') break; strncpy(sname,line+i,MAXNAMES); /* remember entryname */ sname[MAXNAMES]=EOS; rtrim(sname); blank_to_(sname); fgets(line,MAXLINE+1,fin); strncpy(tit,line,MAXTITLES); tit[MAXTITLES]=EOS; i=strlen(tit); if(tit[i-1]=='\n') tit[i-1]=EOS; *len=0; while(fgets(line,MAXLINE+1,fin)) { if(seq==NULL) seq=(char *)ckalloc((MAXLINE+2)*sizeof(char)); else seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char)); for(i=0;i<=MAXLINE;i++) { c=line[i]; if(c == '\n' || c == EOS || c == '*') break; /* EOL */ c=chartab[c]; if(c) seq[++(*len)]=c; } if(c == '*') break; } break; /***********************************************/ case PEARSON: while(*line != '>') fgets(line,MAXLINE+1,fin); for(i=1;i<=strlen(line);i++) /* DES */ if(line[i] != ' ') break; strncpy(sname,line+i,MAXNAMES); /* remember entryname */ for(i=1;i<=strlen(sname);i++) /* DES */ if(sname[i] == ' ') break; sname[i]=EOS; rtrim(sname); blank_to_(sname); *tit=EOS; *len=0; while(fgets(line,MAXLINE+1,fin)) { if(seq==NULL) seq=(char *)ckalloc((MAXLINE+2)*sizeof(char)); else seq=(char *)ckrealloc(seq,((*len)+MAXLINE+2)*sizeof(char)); for(i=0;i<=MAXLINE;i++) { c=line[i]; if(c == '\n' || c == EOS || c == '>') break; /* EOL */ c=chartab[c]; if(c) seq[++(*len)]=c; } if(c == '>') break; } break; /**********************************************/ case GDE: if (dnaflag) { while(*line != '#') fgets(line,MAXLINE+1,fin); } else { while(*line != '%') fgets(line,MAXLINE+1,fin); } for (i=1;i<=MAXNAMES;i++) { if (line[i] == '(' || line[i] == '\n') break; sname[i-1] = line[i]; } i--; sname[i]=EOS; if (sname[i-1] == '(') sscanf(&line[i],"%d",&offset); else offset = 0; for(i--;i > 0;i--) if(isspace(sname[i])) { sname[i]=EOS; } else break; blank_to_(sname); *tit=EOS; *len=0; for (i=0;i file not found */ } strcpy(seqname,line); no_seqs=0; check_infile(&no_seqs); info("Sequence format is %s",formatNames[seqFormat]); if(seqFormat==NEXUS) error("Cannot read nexus format"); /* DES DEBUG fprintf(stdout,"\n\n File name = %s\n\n",seqname); */ if(no_seqs == 0) return 0; /* return the number of seqs. (zero here)*/ /* if((no_seqs + first_seq -1) > MAXN) { error("Too many sequences. Maximum is %d",(pint)MAXN); return 0; } */ /* DES */ /* if(seqFormat == CLUSTAL) { info("no of sequences = %d",(pint)no_seqs); return no_seqs; } */ max_aln_length = 0; /* if this is a multiple alignment, or profile 1 - free any memory used by previous alignments, then allocate memory for the new alignment */ if(first_seq == 1) { max_names = 0; free_aln(nseqs); alloc_aln(no_seqs); } /* otherwise, this is a profile 2, and we need to reallocate the arrays, leaving the data for profile 1 intact */ else realloc_aln(first_seq,no_seqs); for(i=1;imax_aln_length) max_aln_length=seqlen_array[i]; if(strlen(names[i])>max_names) max_names=strlen(names[i]); } for(i=first_seq;i<=first_seq+no_seqs-1;i++) { /* get the seqs now*/ output_index[i] = i; /* default output order */ if(seqFormat == CLUSTAL) seq1=get_clustal_seq(sname1,&l1,title,i-first_seq+1); else if(seqFormat == MSF) seq1=get_msf_seq(sname1,&l1,title,i-first_seq+1); else seq1=get_seq(sname1,&l1,title); if(seq1==NULL) break; /* JULIE */ /* Set max length of dynamically allocated arrays in prfalign.c */ if (l1 > max_aln_length) max_aln_length = l1; seqlen_array[i]=l1; /* store the length */ strcpy(names[i],sname1); /* " " name */ strcpy(titles[i],title); /* " " title */ if(!explicit_dnaflag) { dnaflag1 = check_dnaflag(seq1,l1); /* check DNA/Prot */ if(i == 1) dnaflag = dnaflag1; } /* type decided by first seq*/ else dnaflag1 = dnaflag; alloc_seq(i,l1); if(dnaflag) n_encode(seq1,seq_array[i],l1); /* encode the sequence*/ else /* as ints */ p_encode(seq1,seq_array[i],l1); if(seq1!=NULL) seq1=ckfree(seq1); } max_aln_length *= 2; /* JULIE check sequence names are all different - otherwise phylip tree is confused. */ for(i=1;i<=first_seq+no_seqs-1;i++) { for(j=i+1;j<=first_seq+no_seqs-1;j++) { if (strncmp(names[i],names[j],MAXNAMES) == 0) { error("Multiple sequences found with same name, %s (first %d chars are significant)", names[i],MAXNAMES); return 0; } } } for(i=first_seq;i<=first_seq+no_seqs-1;i++) { if(seqlen_array[i]>max_aln_length) max_aln_length=seqlen_array[i]; } /* look for a feature table / gap penalty mask (only if this is a profile) */ if (profile_no > 0) { rewind(fin); struct_penalties = NONE; gap_penalty_mask = (char *)ckalloc((max_aln_length+1) * sizeof (char)); sec_struct_mask = (char *)ckalloc((max_aln_length+1) * sizeof (char)); ss_name = (char *)ckalloc((MAXNAMES+1) * sizeof (char)); if (seqFormat == CLUSTAL) { get_clustal_ss(max_aln_length); } else if (seqFormat == GDE) { get_gde_ss(max_aln_length); } else if (seqFormat == EMBLSWISS) { get_embl_ss(max_aln_length); } else if (seqFormat == RSF) { get_rsf_ss(max_aln_length); } } for(i=first_seq;i<=first_seq+no_seqs-1;i++) { if(strlen(names[i])>max_names) max_names=strlen(names[i]); } if(max_names<10) max_names=10; fclose(fin); return no_seqs; /* return the number of seqs. read in this call */ } static Boolean check_dnaflag(char *seq, sint slen) /* check if DNA or Protein The decision is based on counting all A,C,G,T,U or N. If >= 85% of all characters (except -) are as above => DNA */ { sint i, c, nresidues, nbases; float ratio; char *dna_codes="ACGTUN"; nresidues = nbases = 0; for(i=1; i <= slen; i++) { if(seq[i] != '-') { nresidues++; if(seq[i] == 'N') nbases++; else { c = res_index(dna_codes, seq[i]); if(c >= 0) nbases++; } } } if( (nbases == 0) || (nresidues == 0) ) return FALSE; ratio = (float)nbases/(float)nresidues; /* DES fprintf(stdout,"\n nbases = %d, nresidues = %d, ratio = %f\n", (pint)nbases,(pint)nresidues,(pint)ratio); */ if(ratio >= 0.85) return TRUE; else return FALSE; } static void check_infile(sint *nseqs) { char line[MAXLINE+1]; sint i; *nseqs=0; while (fgets(line,MAXLINE+1,fin) != NULL) { if(!blankline(line)) break; } for(i=strlen(line)-1;i>=0;i--) if(isgraph(line[i])) break; line[i+1]=EOS; for(i=0;i<=6;i++) line[i] = toupper(line[i]); if( linetype(line,"ID") ) { /* EMBL/Swiss-Prot format ? */ seqFormat=EMBLSWISS; (*nseqs)++; } else if( linetype(line,"CLUSTAL") ) { seqFormat=CLUSTAL; } else if( linetype(line,"PILEUP") ) { seqFormat = MSF; } else if( linetype(line,"!!AA_MULTIPLE_ALIGNMENT") ) { seqFormat = MSF; dnaflag = FALSE; } else if( linetype(line,"!!NA_MULTIPLE_ALIGNMENT") ) { seqFormat = MSF; dnaflag = TRUE; } else if( strstr(line,"MSF") && line[strlen(line)-1]=='.' && line[strlen(line)-2]=='.' ) { seqFormat = MSF; } else if( linetype(line,"!!RICH_SEQUENCE") ) { seqFormat = RSF; } else if( linetype(line,"#NEXUS") ) { seqFormat=NEXUS; return; } else if(*line == '>') { /* no */ seqFormat=(line[3] == ';')?PIR:PEARSON; /* distinguish PIR and Pearson */ (*nseqs)++; } else if((*line == '"') || (*line == '%') || (*line == '#')) { seqFormat=GDE; /* GDE format */ if (*line == '%') { (*nseqs)++; dnaflag = FALSE; } else if (*line == '#') { (*nseqs)++; dnaflag = TRUE; } } else { seqFormat=UNKNOWN; return; } while(fgets(line,MAXLINE+1,fin) != NULL) { switch(seqFormat) { case EMBLSWISS: if( linetype(line,"ID") ) (*nseqs)++; break; case PIR: *nseqs = count_pir_seqs(); fseek(fin,0,0); return; case PEARSON: if( *line == '>' ) (*nseqs)++; break; case GDE: if(( *line == '%' ) && ( dnaflag == FALSE)) (*nseqs)++; else if (( *line == '#') && ( dnaflag == TRUE)) (*nseqs)++; break; case CLUSTAL: *nseqs = count_clustal_seqs(); /* DES */ /* fprintf(stdout,"\nnseqs = %d\n",(pint)*nseqs); */ fseek(fin,0,0); return; case MSF: *nseqs = count_msf_seqs(); fseek(fin,0,0); return; case RSF: fseek(fin,0,0); *nseqs = count_rsf_seqs(); fseek(fin,0,0); return; case USER: default: break; } } fseek(fin,0,0); } static sint count_pir_seqs(void) /* count the number of sequences in a pir alignment file */ { char line[MAXLINE+1],c; sint nseqs, i; Boolean seq_ok; seq_ok = FALSE; while (fgets(line,MAXLINE+1,fin) != NULL) { /* Look for end of first seq */ if(*line == '>') break; for(i=0;seq_ok == FALSE;i++) { c=line[i]; if(c == '*') { seq_ok = TRUE; /* ok - end of sequence found */ break; } /* EOL */ if(c == '\n' || c == EOS) break; /* EOL */ } if (seq_ok == TRUE) break; } if (seq_ok == FALSE) { error("PIR format sequence end marker '*'\nmissing for one or more sequences."); return (sint)0; /* funny format*/ } nseqs = 1; while (fgets(line,MAXLINE+1,fin) != NULL) { if(*line == '>') { /* Look for start of next seq */ seq_ok = FALSE; while (fgets(line,MAXLINE+1,fin) != NULL) { /* Look for end of seq */ if(*line == '>') { error("PIR format sequence end marker '*' missing for one or more sequences."); return (sint)0; /* funny format*/ } for(i=0;seq_ok == FALSE;i++) { c=line[i]; if(c == '*') { seq_ok = TRUE; /* ok - sequence found */ break; } /* EOL */ if(c == '\n' || c == EOS) break; /* EOL */ } if (seq_ok == TRUE) { nseqs++; break; } } } } return (sint)nseqs; } static sint count_clustal_seqs(void) /* count the number of sequences in a clustal alignment file */ { char line[MAXLINE+1]; sint nseqs; while (fgets(line,MAXLINE+1,fin) != NULL) { if(!cl_blankline(line)) break; /* Look for next non- */ } /* blank line */ nseqs = 1; while (fgets(line,MAXLINE+1,fin) != NULL) { if(cl_blankline(line)) return nseqs; nseqs++; } return (sint)0; /* if you got to here-funny format/no seqs.*/ } static sint count_msf_seqs(void) { /* count the number of sequences in a PILEUP alignment file */ char line[MAXLINE+1]; sint nseqs; while (fgets(line,MAXLINE+1,fin) != NULL) { if(linetype(line,"//")) break; } while (fgets(line,MAXLINE+1,fin) != NULL) { if(!blankline(line)) break; /* Look for next non- */ } /* blank line */ nseqs = 1; while (fgets(line,MAXLINE+1,fin) != NULL) { if(blankline(line)) return nseqs; nseqs++; } return (sint)0; /* if you got to here-funny format/no seqs.*/ } static sint count_rsf_seqs(void) { /* count the number of sequences in a GCG RSF alignment file */ char line[MAXLINE+1]; sint nseqs; nseqs = 0; /* skip the comments */ while (fgets(line,MAXLINE+1,fin) != NULL) { if(line[strlen(line)-2]=='.' && line[strlen(line)-3]=='.') break; } while (fgets(line,MAXLINE+1,fin) != NULL) { if( *line == '{' ) nseqs++; } return (sint)nseqs; } static void p_encode(char *seq, char *naseq, sint l) { /* code seq as ints .. use gap_pos2 for gap */ register sint i; /* static char *aacids="CSTPAGNDEQHRKMILVFYW";*/ for(i=1;i<=l;i++) if(seq[i] == '-') naseq[i] = gap_pos2; else naseq[i] = res_index(amino_acid_codes,seq[i]); naseq[i] = -3; } static void n_encode(char *seq,char *naseq,sint l) { /* code seq as ints .. use gap_pos2 for gap */ register sint i; /* static char *nucs="ACGTU"; */ for(i=1;i<=l;i++) { if(seq[i] == '-') /* if a gap character -> code = gap_pos2 */ naseq[i] = gap_pos2; /* this is the code for a gap in */ else { /* the input files */ naseq[i]=res_index(amino_acid_codes,seq[i]); } } naseq[i] = -3; } static sint res_index(char *t,char c) { register sint i; for(i=0;t[i] && t[i] != c;i++) ; if(t[i]) return(i); else return -1; } clustalw-mpi-0.15/showpair.c0000644000411000001440000004422407644152540014411 0ustar liusers#include #include #include #include #include #include "clustalw.h" #include static void make_p_ptrs(sint * tptr, sint * pl, sint naseq, sint l); static void make_n_ptrs(sint * tptr, sint * pl, sint naseq, sint len); static void put_frag(sint fs, sint v1, sint v2, sint flen); static sint frag_rel_pos(sint a1, sint b1, sint a2, sint b2); static void des_quick_sort(sint * array1, sint * array2, sint array_size); static void pair_align(sint seq_no, sint l1, sint l2); typedef struct { int i; int j; }IANDJ; /* * Prototypes */ /* * Global variables */ extern sint *seqlen_array; extern char **seq_array; extern sint dna_ktup, dna_window, dna_wind_gap, dna_signif; /* params for DNA */ extern sint prot_ktup, prot_window, prot_wind_gap, prot_signif; /* params for prots */ extern sint nseqs; extern Boolean dnaflag; extern double **tmat; extern sint max_aa; extern sint max_aln_length; static sint next; static sint curr_frag, maxsf, vatend; static sint **accum; static sint *diag_index; static char *slopes; sint ktup, window, wind_gap, signif; /* Pairwise aln. params */ sint *displ; sint *zza, *zzb, *zzc, *zzd; extern Boolean percent; static void make_p_ptrs(sint * tptr, sint * pl, sint naseq, sint l) { static sint a[10]; sint i, j, limit, code, flag; char residue; for (i = 1; i <= ktup; i++) a[i] = (sint) pow((double) (max_aa + 1), (double) (i - 1)); limit = (sint) pow((double) (max_aa + 1), (double) ktup); for (i = 1; i <= limit; ++i) pl[i] = 0; for (i = 1; i <= l; ++i) tptr[i] = 0; for (i = 1; i <= (l - ktup + 1); ++i) { code = 0; flag = FALSE; for (j = 1; j <= ktup; ++j) { residue = seq_array[naseq][i + j - 1]; if ((residue < 0) || (residue > max_aa)) { flag = TRUE; break; } code += ((residue) * a[j]); } if (flag) continue; ++code; if (pl[code] != 0) tptr[i] = pl[code]; pl[code] = i; } } static void make_n_ptrs(sint * tptr, sint * pl, sint naseq, sint len) { static sint pot[] = { 0, 1, 4, 16, 64, 256, 1024, 4096 }; sint i, j, limit, code, flag; char residue; limit = (sint) pow((double) 4, (double) ktup); for (i = 1; i <= limit; ++i) pl[i] = 0; for (i = 1; i <= len; ++i) tptr[i] = 0; for (i = 1; i <= len - ktup + 1; ++i) { code = 0; flag = FALSE; for (j = 1; j <= ktup; ++j) { residue = seq_array[naseq][i + j - 1]; if ((residue < 0) || (residue > 4)) { flag = TRUE; break; } code += ((residue) * pot[j]); /* DES */ } if (flag) continue; ++code; if (pl[code] != 0) tptr[i] = pl[code]; pl[code] = i; } } static void put_frag(sint fs, sint v1, sint v2, sint flen) { sint end; accum[0][curr_frag] = fs; accum[1][curr_frag] = v1; accum[2][curr_frag] = v2; accum[3][curr_frag] = flen; if (!maxsf) { maxsf = 1; accum[4][curr_frag] = 0; return; } if (fs >= accum[0][maxsf]) { accum[4][curr_frag] = maxsf; maxsf = curr_frag; return; } else { next = maxsf; while (TRUE) { end = next; next = accum[4][next]; if (fs >= accum[0][next]) break; } accum[4][curr_frag] = next; accum[4][end] = curr_frag; } } static sint frag_rel_pos(sint a1, sint b1, sint a2, sint b2) { sint ret; ret = FALSE; if (a1 - b1 == a2 - b2) { if (a2 < a1) ret = TRUE; } else { if (a2 + ktup - 1 < a1 && b2 + ktup - 1 < b1) ret = TRUE; } return ret; } static void des_quick_sort(sint * array1, sint * array2, sint array_size) /* */ /* Quicksort routine, adapted from chapter 4, page 115 of software tools */ /* by Kernighan and Plauger, (1986) */ /* Sort the elements of array1 and sort the */ /* elements of array2 accordingly */ /* */ { sint temp1, temp2; sint p, pivlin; sint i, j; sint lst[50], ust[50]; /* the maximum no. of elements must be */ /* < log(base2) of 50 */ lst[1] = 1; ust[1] = array_size - 1; p = 1; while (p > 0) { if (lst[p] >= ust[p]) p--; else { i = lst[p] - 1; j = ust[p]; pivlin = array1[j]; while (i < j) { for (i = i + 1; array1[i] < pivlin; i++); for (j = j - 1; j > i; j--) if (array1[j] <= pivlin) break; if (i < j) { temp1 = array1[i]; array1[i] = array1[j]; array1[j] = temp1; temp2 = array2[i]; array2[i] = array2[j]; array2[j] = temp2; } } j = ust[p]; temp1 = array1[i]; array1[i] = array1[j]; array1[j] = temp1; temp2 = array2[i]; array2[i] = array2[j]; array2[j] = temp2; if (i - lst[p] < ust[p] - i) { lst[p + 1] = lst[p]; ust[p + 1] = i - 1; lst[p] = i + 1; } else { lst[p + 1] = i + 1; ust[p + 1] = ust[p]; ust[p] = i - 1; } p = p + 1; } } return; } static void pair_align(sint seq_no, sint l1, sint l2) { sint pot[8], i, j, l, m, flag, limit, pos, tl1, vn1, vn2, flen, osptr, fs; sint tv1, tv2, encrypt, subt1, subt2, rmndr; char residue; if (dnaflag) { for (i = 1; i <= ktup; ++i) pot[i] = (sint) pow((double) 4, (double) (i - 1)); limit = (sint) pow((double) 4, (double) ktup); } else { for (i = 1; i <= ktup; i++) pot[i] = (sint) pow((double) (max_aa + 1), (double) (i - 1)); limit = (sint) pow((double) (max_aa + 1), (double) ktup); } tl1 = (l1 + l2) - 1; for (i = 1; i <= tl1; ++i) { slopes[i] = displ[i] = 0; diag_index[i] = i; } /* increment diagonal score for each k_tuple match */ for (i = 1; i <= limit; ++i) { vn1 = zzc[i]; while (TRUE) { if (!vn1) break; vn2 = zzd[i]; while (vn2 != 0) { osptr = vn1 - vn2 + l2; ++displ[osptr]; vn2 = zzb[vn2]; } vn1 = zza[vn1]; } } /* choose the top SIGNIF diagonals */ des_quick_sort(displ, diag_index, tl1); j = tl1 - signif + 1; if (j < 1) j = 1; /* flag all diagonals within WINDOW of a top diagonal */ for (i = tl1; i >= j; i--) if (displ[i] > 0) { pos = diag_index[i]; l = (1 > pos - window) ? 1 : pos - window; m = (tl1 < pos + window) ? tl1 : pos + window; for (; l <= m; l++) slopes[l] = 1; } for (i = 1; i <= tl1; i++) displ[i] = 0; curr_frag = maxsf = 0; for (i = 1; i <= (l1 - ktup + 1); ++i) { encrypt = flag = 0; for (j = 1; j <= ktup; ++j) { residue = seq_array[seq_no][i + j - 1]; if ((residue < 0) || (residue > max_aa)) { flag = TRUE; break; } encrypt += ((residue) * pot[j]); } if (flag) continue; ++encrypt; vn2 = zzd[encrypt]; flag = FALSE; while (TRUE) { if (!vn2) { flag = TRUE; break; } osptr = i - vn2 + l2; if (slopes[osptr] != 1) { vn2 = zzb[vn2]; continue; } flen = 0; fs = ktup; next = maxsf; /* * A-loop */ while (TRUE) { if (!next) { ++curr_frag; if (curr_frag >= 2 * max_aln_length) { info("(Partial alignment)"); vatend = 1; return; } displ[osptr] = curr_frag; put_frag(fs, i, vn2, flen); } else { tv1 = accum[1][next]; tv2 = accum[2][next]; if (frag_rel_pos(i, vn2, tv1, tv2)) { if (i - vn2 == accum[1][next] - accum[2][next]) { if (i > accum[1][next] + (ktup - 1)) fs = accum[0][next] + ktup; else { rmndr = i - accum[1][next]; fs = accum[0][next] + rmndr; } flen = next; next = 0; continue; } else { if (displ[osptr] == 0) subt1 = ktup; else { if (i > accum[1][displ[osptr]] + (ktup - 1)) subt1 = accum[0][displ[osptr]] + ktup; else { rmndr = i - accum[1][displ[osptr]]; subt1 = accum[0][displ[osptr]] + rmndr; } } subt2 = accum[0][next] - wind_gap + ktup; if (subt2 > subt1) { flen = next; fs = subt2; } else { flen = displ[osptr]; fs = subt1; } next = 0; continue; } } else { next = accum[4][next]; continue; } } break; } /* * End of Aloop */ vn2 = zzb[vn2]; } } vatend = 0; } void show_pair(sint istart, sint iend, sint jstart, sint jend) { sint i, j, dsr; double calc_score; /* MPI variables */ int np, sbsize, sbsize2, position,tmp_score; char *sbuffer; int ii,jj; int work, from, myrank,seq1,seq2; IANDJ *pij; int idx, loopidx; double wtime1,wtime2; MPI_Status status; /* MPI bookeeping */ MPI_Comm_size(MPI_COMM_WORLD, &np); MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /* determine the exact (i:j) based on istart and jstart */ pij = (IANDJ *)malloc(((iend-istart)*(jend-jstart))*sizeof(IANDJ)); assert(pij); idx=0; for (i=istart+1;i<=(jstart+1);i++) { for (j=(jstart+2);j<=jend;j++) { pij[idx].i = i; pij[idx].j = j; idx++; } } for (i=jstart+2;i<=iend;i++) { for (j=(i+1);j<=jend;j++) { pij[idx].i = j; pij[idx].j = i; idx++; } } wtime1 = MPI_Wtime(); /* retain the old codes for the case where (np==1) ... */ if (np > 1) goto doing_show_pair; accum = (sint **) ckalloc(5 * sizeof(sint *)); for (i = 0; i < 5; i++) accum[i] = (sint *) ckalloc((2 * max_aln_length + 1) * sizeof(sint)); displ = (sint *) ckalloc((2 * max_aln_length + 1) * sizeof(sint)); slopes = (char *) ckalloc((2 * max_aln_length + 1) * sizeof(char)); diag_index = (sint *) ckalloc((2 * max_aln_length + 1) * sizeof(sint)); zza = (sint *) ckalloc((max_aln_length + 1) * sizeof(sint)); zzb = (sint *) ckalloc((max_aln_length + 1) * sizeof(sint)); zzc = (sint *) ckalloc((max_aln_length + 1) * sizeof(sint)); zzd = (sint *) ckalloc((max_aln_length + 1) * sizeof(sint)); if (dnaflag) { ktup = dna_ktup; window = dna_window; signif = dna_signif; wind_gap = dna_wind_gap; } else { ktup = prot_ktup; window = prot_window; signif = prot_signif; wind_gap = prot_wind_gap; } fprintf(stdout, "\n\n"); for (i = istart + 1; i <= iend; ++i) { if (dnaflag) make_n_ptrs(zza, zzc, i, seqlen_array[i]); else make_p_ptrs(zza, zzc, i, seqlen_array[i]); for (j = jstart + 2; j <= jend; ++j) { if (dnaflag) make_n_ptrs(zzb, zzd, j, seqlen_array[j]); else make_p_ptrs(zzb, zzd, j, seqlen_array[j]); pair_align(i, seqlen_array[i], seqlen_array[j]); if (!maxsf) calc_score = 0.0; else { calc_score = (double) accum[0][maxsf]; if (percent) { dsr = (seqlen_array[i] < seqlen_array[j]) ? seqlen_array[i] : seqlen_array[j]; calc_score = (calc_score / (double) dsr) * 100.0; } } /* tmat[i][j]=calc_score; tmat[j][i]=calc_score; */ tmat[i][j] = (100.0 - calc_score) / 100.0; tmat[j][i] = (100.0 - calc_score) / 100.0; if (calc_score > 0.1) info("Sequences (%d:%d) Aligned. Score: %lg", (pint) i, (pint) j, calc_score); else info("Sequences (%d:%d) Not Aligned", (pint) i, (pint) j); } } for (i = 0; i < 5; i++) accum[i] = ckfree((void *) accum[i]); accum = ckfree((void *) accum); displ = ckfree((void *) displ); slopes = ckfree((void *) slopes); diag_index = ckfree((void *) diag_index); zza = ckfree((void *) zza); zzb = ckfree((void *) zzb); zzc = ckfree((void *) zzc); zzd = ckfree((void *) zzd); doing_show_pair: accum = (sint **) ckalloc(5 * sizeof(sint *)); for (i = 0; i < 5; i++) accum[i] = (sint *) ckalloc((2 * max_aln_length + 1) * sizeof(sint)); work = 1; for (loopidx=0;loopidx 0.1) info("Sequences (%d:%d) Aligned. Score: %lg\tdone by rank %d", seq1, seq2, calc_score, from); else info("Sequences (%d:%d) Not Aligned", seq1, seq2); /* determine the size of the sending buffer */ sbsize = 0; sbsize += 15 * sizeof(int) + 1 * sizeof(char) + (seqlen_array[ii] + 1 + seqlen_array[jj] + 1) * sizeof(char); sbsize += (nseqs)*sizeof(int); MPI_Send(&sbsize, 1, MPI_INT, from, DOING_SHOW_PAIR, MPI_COMM_WORLD); sbuffer = (char *) malloc(sbsize * sizeof(char)); assert(sbuffer); position = 0; /* pack data */ MPI_Pack(&dnaflag, 1, MPI_CHAR, sbuffer, sbsize, &position, MPI_COMM_WORLD); MPI_Pack(&ii, 1, MPI_INT, sbuffer, sbsize, &position, MPI_COMM_WORLD); MPI_Pack(&jj, 1, MPI_INT, sbuffer, sbsize, &position, MPI_COMM_WORLD); MPI_Pack(&nseqs, 1, MPI_INT, sbuffer, sbsize, &position, MPI_COMM_WORLD); MPI_Pack(seqlen_array, (nseqs+1), MPI_INT, sbuffer, sbsize, &position, MPI_COMM_WORLD); MPI_Pack(seq_array[ii], seqlen_array[ii] + 1, MPI_CHAR, sbuffer, sbsize, &position, MPI_COMM_WORLD); MPI_Pack(seq_array[jj], seqlen_array[jj] + 1, MPI_CHAR, sbuffer, sbsize, &position, MPI_COMM_WORLD); MPI_Pack(&max_aa, 1, MPI_INT, sbuffer, sbsize, &position, MPI_COMM_WORLD); MPI_Pack(&max_aln_length, 1, MPI_INT, sbuffer, sbsize, &position, MPI_COMM_WORLD); MPI_Pack(&dna_ktup, 1, MPI_INT, sbuffer, sbsize, &position, MPI_COMM_WORLD); MPI_Pack(&dna_window, 1, MPI_INT, sbuffer, sbsize, &position, MPI_COMM_WORLD); MPI_Pack(&dna_signif, 1, MPI_INT, sbuffer, sbsize, &position, MPI_COMM_WORLD); MPI_Pack(&dna_wind_gap, 1, MPI_INT, sbuffer, sbsize, &position, MPI_COMM_WORLD); MPI_Pack(&prot_ktup, 1, MPI_INT, sbuffer, sbsize, &position, MPI_COMM_WORLD); MPI_Pack(&prot_window, 1, MPI_INT, sbuffer, sbsize, &position, MPI_COMM_WORLD); MPI_Pack(&prot_signif, 1, MPI_INT, sbuffer, sbsize, &position, MPI_COMM_WORLD); MPI_Pack(&prot_wind_gap, 1, MPI_INT, sbuffer, sbsize, &position, MPI_COMM_WORLD); /* send data */ MPI_Send(sbuffer, sbsize, MPI_PACKED, from, SHOW_PAIR_DATA, MPI_COMM_WORLD); free(sbuffer); } } /* handle the remaining ones */ for (i = 0; i < (work - 1); i++) { /* receive data */ MPI_Recv(&sbsize2, 1, MPI_INT, MPI_ANY_SOURCE, SHOW_PAIR_RESULT_SIZE, MPI_COMM_WORLD, &status); from = status.MPI_SOURCE; sbuffer = (char *) malloc(sbsize2 * sizeof(char)); assert(sbuffer); MPI_Recv(sbuffer, sbsize2, MPI_PACKED, from, SHOW_PAIR_RESULT, MPI_COMM_WORLD, &status); position = 0; MPI_Unpack(sbuffer, sbsize2, &position, &tmp_score, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(sbuffer, sbsize2, &position, &seq1, 1, MPI_INT, MPI_COMM_WORLD); MPI_Unpack(sbuffer, sbsize2, &position, &seq2, 1, MPI_INT, MPI_COMM_WORLD); free(sbuffer); /* compute calc_score */ calc_score = (double)tmp_score; if (percent) { dsr = (seqlen_array[seq1] < seqlen_array[seq2]) ? seqlen_array[seq1] : seqlen_array[seq2]; calc_score = (calc_score / (double) dsr) * 100.0; } tmat[seq1][seq2] = (100.0 - calc_score) / 100.0; tmat[seq2][seq1] = (100.0 - calc_score) / 100.0; if (calc_score > 0.1) info("Sequences (%d:%d) Aligned. Score: %lg\tdone by rank %d", seq1, seq2, calc_score, from); else info("Sequences (%d:%d) Not Aligned", seq1, seq2); } /** Measuring wall time by MPI_Wtime() **/ wtime2 = MPI_Wtime(); fprintf(stderr,"\nDEBUG: quick align (show_pair) time = %5.3f sec\n", wtime2 - wtime1); fflush(stderr); for (i = 0; i < 5; i++) accum[i] = ckfree((void *) accum[i]); accum = ckfree((void *) accum); free(pij); return; } clustalw-mpi-0.15/stupid.c0000644000411000001440000003211107644152540014055 0ustar liusers #include #include #include #include #include "clustalw.h" #include "mpi.h" static void padd(sint k); static void pdel(sint k); static void palign(void); static lint prfscore(sint n, sint m); static sint gap_penalty1(sint i, sint j, sint k); static sint open_penalty1(sint i, sint j); static sint ext_penalty1(sint i, sint j); static sint gap_penalty2(sint i, sint j, sint k); static sint open_penalty2(sint i, sint j); static sint ext_penalty2(sint i, sint j); static lint *HH, *DD, *RR, *SS; static lint *gS; static sint print_ptr; static sint last_print; static lint *displ; static sint max_aa; static sint max_aln_length; static sint gap_pos1, gap_pos2; static sint **profile1, **profile2; static Boolean endgappenalties; static sint prf_length1, prf_length2; static int reverse_rank; static lint mypdiff(sint A, sint B, sint M, sint N, sint go1, sint go2); /* * Using MPI to computer the preverse_pass() upto this level * of the recursive mypdiff() calls. */ static int plevel; #define MYPDIFFLEVEL 3 /******************************************************************* * Input from prfalign(): * a,b,c,d,go1,go2,print_ptr,last_print,displ,max_aln_length,max_aa, * gap_pos1,gap_pos2,profile1,profile2,pendgappenalties, * prf_length1,prf_length2; * * Output to prfalign(): * score, print_ptr, last_print, displ; * *******************************************************************/ void stupid(int a, int b, int c, int d, sint go1, sint go2, lint *pscore, sint *pprint_ptr, sint *plast_print, lint **pdispl, int pmax_aln_length, sint pmax_aa, sint pgap_pos1, sint pgap_pos2, sint **pprofile1, sint **pprofile2, Boolean pendgappenalties, sint pprf_length1, sint pprf_length2, int preverse_rank){ lint score; print_ptr = *pprint_ptr; last_print = *plast_print; max_aa = pmax_aa; gap_pos1 = pgap_pos1; gap_pos2 = pgap_pos2; profile1 = pprofile1; profile2 = pprofile2; pendgappenalties = endgappenalties; prf_length1 = pprf_length1; prf_length2 = pprf_length2; max_aln_length = pmax_aln_length; reverse_rank = preverse_rank; plevel = 0; HH = (lint *) ckalloc((max_aln_length + 1) * sizeof(lint)); DD = (lint *) ckalloc((max_aln_length + 1) * sizeof(lint)); RR = (lint *) ckalloc((max_aln_length + 1) * sizeof(lint)); SS = (lint *) ckalloc((max_aln_length + 1) * sizeof(lint)); gS = (lint *) ckalloc((max_aln_length + 1) * sizeof(lint)); displ = (sint *) ckalloc((max_aln_length + 1) * sizeof(sint)); score = mypdiff(a, b, c, d, go1, go2); HH = ckfree((void *) HH); DD = ckfree((void *) DD); RR = ckfree((void *) RR); SS = ckfree((void *) SS); gS = ckfree((void *) gS); *pprint_ptr = print_ptr; *plast_print = last_print; *pscore = score; *pdispl = displ; return ; } static lint mypdiff(sint A, sint B, sint M, sint N, sint go1, sint go2) { sint midi, midj, type; lint midh; lint t, tl, g, h; sint i, j; lint hh, f, e, s; int mybsize,position; char *mpi_buffer; MPI_Status status; /* remember the number of times that mypdiff() have been called */ plevel++; /* Boundary cases: M <= 1 or N == 0 */ /* if sequence B is empty.... */ if (N <= 0) { /* if sequence A is not empty.... */ if (M > 0) { /* delete residues A[1] to A[M] */ pdel(M); } return (-gap_penalty1(A, B, M)); } /* if sequence A is empty.... */ if (M <= 1) { if (M <= 0) { /* insert residues B[1] to B[N] */ padd(N); return (-gap_penalty2(A, B, N)); } /* if sequence A has just one residue.... */ if (go1 == 0) midh = -gap_penalty1(A + 1, B + 1, N); else midh = -gap_penalty2(A + 1, B, 1) - gap_penalty1(A + 1, B + 1, N); midj = 0; for (j = 1; j <= N; j++) { hh = -gap_penalty1(A, B + 1, j - 1) + prfscore(A + 1, B + j) - gap_penalty1(A + 1, B + j + 1, N - j); if (hh > midh) { midh = hh; midj = j; } } if (midj == 0) { padd(N); pdel(1); } else { if (midj > 1) padd(midj - 1); palign(); if (midj < N) padd(N - midj); } return midh; } /* Divide sequence A in half: midi */ midi = M / 2; /* In a forward phase, calculate all HH[j] and HH[j] */ HH[0] = 0.0; t = -open_penalty1(A, B + 1); tl = -ext_penalty1(A, B + 1); for (j = 1; j <= N; j++) { HH[j] = t = t + tl; DD[j] = t - open_penalty2(A + 1, B + j); } if (go1 == 0) t = 0; else t = -open_penalty2(A + 1, B); tl = -ext_penalty2(A + 1, B); #ifdef OLD_CODES for (i = 1; i <= midi; i++) { s = HH[0]; HH[0] = hh = t = t + tl; f = t - open_penalty1(A + i, B + 1); for (j = 1; j <= N; j++) { g = open_penalty1(A + i, B + j); h = ext_penalty1(A + i, B + j); if ((hh = hh - g - h) > (f = f - h)) f = hh; g = open_penalty2(A + i, B + j); h = ext_penalty2(A + i, B + j); if ((hh = HH[j] - g - h) > (e = DD[j] - h)) e = hh; hh = s + prfscore(A + i, B + j); if (f > hh) hh = f; if (e > hh) hh = e; s = HH[j]; HH[j] = hh; DD[j] = e; } } DD[0] = HH[0]; /* In a reverse phase, calculate all RR[j] and SS[j] */ RR[N] = 0.0; tl = 0.0; for (j = N - 1; j >= 0; j--) { g = -open_penalty1(A + M, B + j + 1); tl -= ext_penalty1(A + M, B + j + 1); RR[j] = g + tl; SS[j] = RR[j] - open_penalty2(A + M, B + j); gS[j] = open_penalty2(A + M, B + j); } tl = 0.0; for (i = M - 1; i >= midi; i--) { s = RR[N]; if (go2 == 0) g = 0; else g = -open_penalty2(A + i + 1, B + N); tl -= ext_penalty2(A + i + 1, B + N); RR[N] = hh = g + tl; t = open_penalty1(A + i, B + N); f = RR[N] - t; for (j = N - 1; j >= 0; j--) { g = open_penalty1(A + i, B + j + 1); h = ext_penalty1(A + i, B + j + 1); if ((hh = hh - g - h) > (f = f - h - g + t)) f = hh; t = g; g = open_penalty2(A + i + 1, B + j); h = ext_penalty2(A + i + 1, B + j); hh = RR[j] - g - h; if (i == (M - 1)) { e = SS[j] - h; } else { e = SS[j] - h - g + open_penalty2(A + i + 2, B + j); gS[j] = g; } if (hh > e) e = hh; hh = s + prfscore(A + i + 1, B + j + 1); if (f > hh) hh = f; if (e > hh) hh = e; s = RR[j]; RR[j] = hh; SS[j] = e; } } #endif /* * "reverse_rank" is the rank of the MPI process that is going to * execute preverse_pass(). */ if (reverse_rank) { /* determine buffer size */ mybsize = 0; mybsize += 9 * sizeof(int) + sizeof(Boolean); for (i = A; i < A + M + 2; i++) mybsize += (LENCOL + 2) * sizeof(sint); for (i = B; i < B + N + 2; i++) mybsize += (LENCOL + 2) * sizeof(sint); MPI_Send(&mybsize, 1, MPI_INT, reverse_rank, PREVERSE_TAG, MPI_COMM_WORLD); mpi_buffer = (char *) malloc(mybsize * sizeof(char)); assert(mpi_buffer); position = 0; MPI_Pack(&prf_length1, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&prf_length2, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&endgappenalties, 1, MPI_CHAR, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&max_aa, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&midi, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&go2, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&A, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&B, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&M, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Pack(&N, 1, MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); for (i = A; i < A + M + 2; i++) MPI_Pack(profile1[i], (LENCOL + 2), MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); for (i = B; i < B + N + 2; i++) MPI_Pack(profile2[i], (LENCOL + 2), MPI_INT, mpi_buffer, mybsize, &position, MPI_COMM_WORLD); MPI_Send(mpi_buffer, mybsize, MPI_PACKED, reverse_rank, MY_DATA_TAG, MPI_COMM_WORLD); free(mpi_buffer); /* in the meantime, we will be computing pforward_pass() */ pforward_pass(midi, t, tl, A, B, N, HH, DD); DD[0] = HH[0]; #ifdef DEBUG { int rank; MPI_Comm_rank(MPI_COMM_WORLD,&rank); fprintf(stderr,"DEBUG: my rank=%d, line 415, rank %d is doing preverse_pass()\n", rank,reverse_rank); fflush(stderr); } #endif /* preverse_pass(midi, A, B, M, N, go2, RR, SS, gS); */ /* receiving RR[], SS[], gS[] */ MPI_Recv(&mybsize, 1, MPI_INT, reverse_rank, MY_RESULT_TAG, MPI_COMM_WORLD, &status); mpi_buffer = (char *) malloc(mybsize * sizeof(char)); assert(mpi_buffer); position = 0; MPI_Recv(mpi_buffer, mybsize, MPI_PACKED, reverse_rank, MY_RESULT_TAG, MPI_COMM_WORLD, &status); MPI_Unpack(mpi_buffer, mybsize, &position, RR, (max_aln_length + 1), MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, SS, (max_aln_length + 1), MPI_INT, MPI_COMM_WORLD); MPI_Unpack(mpi_buffer, mybsize, &position, gS, (max_aln_length + 1), MPI_INT, MPI_COMM_WORLD); free(mpi_buffer); if (plevel>= (MYPDIFFLEVEL)) reverse_rank = 0; } else { /* single process */ pforward_pass(midi, t, tl, A, B, N, HH, DD); DD[0] = HH[0]; preverse_pass(midi, A, B, M, N, go2, RR, SS, gS); } SS[N] = RR[N]; gS[N] = open_penalty2(A + midi + 1, B + N); /* find midj, such that HH[j]+RR[j] or DD[j]+SS[j]+gap is the maximum */ midh = HH[0] + RR[0]; midj = 0; type = 1; for (j = 0; j <= N; j++) { hh = HH[j] + RR[j]; if (hh >= midh) if (hh > midh || (HH[j] != DD[j] && RR[j] == SS[j])) { midh = hh; midj = j; } } for (j = N; j >= 0; j--) { hh = DD[j] + SS[j] + gS[j]; if (hh > midh) { midh = hh; midj = j; type = 2; } } /* Conquer recursively around midpoint */ if (type == 1) { /* Type 1 gaps */ mypdiff(A, B, midi, midj, go1, 1); mypdiff(A + midi, B + midj, M - midi, N - midj, 1, go2); } else { mypdiff(A, B, midi - 1, midj, go1, 0); pdel(2); mypdiff(A + midi + 1, B + midj, M - midi - 1, N - midj, 0, go2); } return midh; /* Return the score of the best alignment */ } static void pdel(sint k) { if (last_print < 0) last_print = displ[print_ptr - 1] -= k; else last_print = displ[print_ptr++] = -(k); } static void padd(sint k) { if (last_print < 0) { displ[print_ptr - 1] = k; displ[print_ptr++] = last_print; } else last_print = displ[print_ptr++] = k; } static void palign(void) { displ[print_ptr++] = last_print = 0; } static lint prfscore(sint n, sint m) { sint ix; lint score; score = 0.0; for (ix = 0; ix <= max_aa; ix++) { score += (profile1[n][ix] * profile2[m][ix]); } score += (profile1[n][gap_pos1] * profile2[m][gap_pos1]); score += (profile1[n][gap_pos2] * profile2[m][gap_pos2]); return (score / 10); } /* calculate the score for opening a gap at residues A[i] and B[j] */ static sint open_penalty1(sint i, sint j) { sint g; if (!endgappenalties && (i == 0 || i == prf_length1)) return (0); g = profile2[j][GAPCOL] + profile1[i][GAPCOL]; return (g); } /* calculate the score for extending an existing gap at A[i] and B[j] */ static sint ext_penalty1(sint i, sint j) { sint h; if (!endgappenalties && (i == 0 || i == prf_length1)) return (0); h = profile2[j][LENCOL]; return (h); } /* calculate the score for a gap of length k, at residues A[i] and B[j] */ static sint gap_penalty1(sint i, sint j, sint k) { sint ix; sint gp; sint g, h = 0; if (k <= 0) return (0); if (!endgappenalties && (i == 0 || i == prf_length1)) return (0); g = profile2[j][GAPCOL] + profile1[i][GAPCOL]; for (ix = 0; ix < k && ix + j < prf_length2; ix++) h = profile2[ix + j][LENCOL]; gp = g + h * k; return (gp); } /* calculate the score for opening a gap at residues A[i] and B[j] */ static sint open_penalty2(sint i, sint j) { sint g; if (!endgappenalties && (j == 0 || j == prf_length2)) return (0); g = profile1[i][GAPCOL] + profile2[j][GAPCOL]; return (g); } /* calculate the score for extending an existing gap at A[i] and B[j] */ static sint ext_penalty2(sint i, sint j) { sint h; if (!endgappenalties && (j == 0 || j == prf_length2)) return (0); h = profile1[i][LENCOL]; return (h); } /* calculate the score for a gap of length k, at residues A[i] and B[j] */ static sint gap_penalty2(sint i, sint j, sint k) { sint ix; sint gp; sint g, h = 0; if (k <= 0) return (0); if (!endgappenalties && (j == 0 || j == prf_length2)) return (0); g = profile1[i][GAPCOL] + profile2[j][GAPCOL]; for (ix = 0; ix < k && ix + i < prf_length1; ix++) h = profile1[ix + i][LENCOL]; gp = g + h * k; return (gp); } clustalw-mpi-0.15/trees.c0000644000411000001440000014204507644152540013677 0ustar liusers/* Phyle of filogenetic tree calculating functions for CLUSTAL W */ /* DES was here FEB. 1994 */ #include #include #include #include #include #include "clustalw.h" #include "dayhoff.h" /* set correction for amino acid distances >= 75% */ #include "mpi.h" /* * Prototypes */ Boolean transition(sint base1, sint base2); void tree_gap_delete(void); void distance_matrix_output(FILE * ofile); void nj_tree(char **tree_description, FILE * tree); void compare_tree(char **tree1, char **tree2, sint * hits, sint n); void print_phylip_tree(char **tree_description, FILE * tree, sint bootstrap); void print_nexus_tree(char **tree_description, FILE * tree, sint bootstrap); sint two_way_split(char **tree_description, FILE * tree, sint start_row, sint flag, sint bootstrap); sint two_way_split_nexus(char **tree_description, FILE * tree, sint start_row, sint flag, sint bootstrap); void print_tree(char **tree_description, FILE * tree, sint * totals); static Boolean is_ambiguity(char c); static void overspill_message(sint overspill, sint total_dists); static void mpi_njtree_master(double *sumd, double *rdiq, double **tmat, int tmat_len); static double walltime(double *t0); /* * Global variables */ extern sint max_names; extern double **tmat; /* general nxn array of reals; allocated from main */ /* this is used as a distance matrix */ extern Boolean dnaflag; /* TRUE for DNA seqs; FALSE for proteins */ extern Boolean tossgaps; /* Ignore places in align. where ANY seq. has a gap */ extern Boolean kimura; /* Use correction for multiple substitutions */ extern Boolean output_tree_clustal; /* clustal text output for trees */ extern Boolean output_tree_phylip; /* phylip nested parentheses format */ extern Boolean output_tree_distances; /* phylip distance matrix */ extern Boolean output_tree_nexus; /* nexus format tree */ extern sint bootstrap_format; /* bootstrap file format */ extern Boolean empty; /* any sequences in memory? */ extern Boolean usemenu; /* interactive (TRUE) or command line (FALSE) */ extern sint nseqs; extern sint max_aln_length; extern sint *seqlen_array; /* the lengths of the sequences */ extern char **seq_array; /* the sequences */ extern char **names; /* the seq. names */ extern char seqname[]; /* name of input file */ extern sint gap_pos1, gap_pos2; extern Boolean use_ambiguities; extern char *amino_acid_codes; static double *av; static double *left_branch, *right_branch; static double *save_left_branch, *save_right_branch; static sint *boot_totals; static sint *tkill; /* The next line is a fossil from the days of using the cc ran() static int ran_factor; */ static sint *boot_positions; static FILE *phylip_phy_tree_file; static FILE *clustal_phy_tree_file; static FILE *distances_phy_tree_file; static FILE *nexus_phy_tree_file; static Boolean verbose; static char *tree_gaps; static sint first_seq, last_seq; /* array of weights; 1 for use this posn.; 0 don't */ extern sint boot_ntrials; /* number of bootstrap trials */ extern unsigned sint boot_ran_seed; /* random number generator seed */ void phylogenetic_tree(char *phylip_name, char *clustal_name, char *dist_name, char *nexus_name) /* Calculate a tree using the distances in the nseqs*nseqs array tmat. This is the routine for getting the REAL trees after alignment. */ { char path[FILENAMELEN + 1]; sint i, j; sint overspill = 0; sint total_dists; static char **standard_tree; static char **save_tree; char lin2[10]; if (empty) { error("You must load an alignment first"); return; } if (nseqs < 2) { error("Alignment has only %d sequences", nseqs); return; } first_seq = 1; last_seq = nseqs; get_path(seqname, path); if (output_tree_clustal) { if (clustal_name[0] != EOS) { if ((clustal_phy_tree_file = open_explicit_file(clustal_name)) == NULL) return; } else { if ((clustal_phy_tree_file = open_output_file ("\nEnter name for CLUSTAL tree output file ", path, clustal_name, "nj")) == NULL) return; } } if (output_tree_phylip) { if (phylip_name[0] != EOS) { if ((phylip_phy_tree_file = open_explicit_file(phylip_name)) == NULL) return; } else { if ((phylip_phy_tree_file = open_output_file ("\nEnter name for PHYLIP tree output file ", path, phylip_name, "ph")) == NULL) return; } } if (output_tree_distances) { if (dist_name[0] != EOS) { if ((distances_phy_tree_file = open_explicit_file(dist_name)) == NULL) return; } else { if ((distances_phy_tree_file = open_output_file ("\nEnter name for distance matrix output file ", path, dist_name, "dst")) == NULL) return; } } if (output_tree_nexus) { if (nexus_name[0] != EOS) { if ((nexus_phy_tree_file = open_explicit_file(nexus_name)) == NULL) return; } else { if ((nexus_phy_tree_file = open_output_file ("\nEnter name for NEXUS tree output file ", path, nexus_name, "tre")) == NULL) return; } } boot_positions = (sint *) ckalloc((seqlen_array[first_seq] + 2) * sizeof(sint)); for (j = 1; j <= seqlen_array[first_seq]; ++j) boot_positions[j] = j; if (output_tree_clustal) { verbose = TRUE; /* Turn on file output */ if (dnaflag) overspill = dna_distance_matrix(clustal_phy_tree_file); else overspill = prot_distance_matrix(clustal_phy_tree_file); } if (output_tree_phylip) { verbose = FALSE; /* Turn off file output */ if (dnaflag) overspill = dna_distance_matrix(phylip_phy_tree_file); else overspill = prot_distance_matrix(phylip_phy_tree_file); } if (output_tree_nexus) { verbose = FALSE; /* Turn off file output */ if (dnaflag) overspill = dna_distance_matrix(nexus_phy_tree_file); else overspill = prot_distance_matrix(nexus_phy_tree_file); } if (output_tree_distances) { verbose = FALSE; /* Turn off file output */ if (dnaflag) overspill = dna_distance_matrix(distances_phy_tree_file); else overspill = prot_distance_matrix(distances_phy_tree_file); distance_matrix_output(distances_phy_tree_file); } /* check if any distances overflowed the distance corrections */ if (overspill > 0) { total_dists = (nseqs * (nseqs - 1)) / 2; overspill_message(overspill, total_dists); } if (output_tree_clustal) verbose = TRUE; /* Turn on file output */ standard_tree = (char **) ckalloc((nseqs + 1) * sizeof(char *)); for (i = 0; i < nseqs + 1; i++) standard_tree[i] = (char *) ckalloc((nseqs + 1) * sizeof(char)); save_tree = (char **) ckalloc((nseqs + 1) * sizeof(char *)); for (i = 0; i < nseqs + 1; i++) save_tree[i] = (char *) ckalloc((nseqs + 1) * sizeof(char)); if (output_tree_clustal || output_tree_phylip || output_tree_nexus) nj_tree(standard_tree, clustal_phy_tree_file); for (i = 1; i < nseqs + 1; i++) for (j = 1; j < nseqs + 1; j++) save_tree[i][j] = standard_tree[i][j]; if (output_tree_phylip) print_phylip_tree(standard_tree, phylip_phy_tree_file, 0); for (i = 1; i < nseqs + 1; i++) for (j = 1; j < nseqs + 1; j++) standard_tree[i][j] = save_tree[i][j]; if (output_tree_nexus) print_nexus_tree(standard_tree, nexus_phy_tree_file, 0); /* print_tree(standard_tree,phy_tree_file); */ tree_gaps = ckfree((void *) tree_gaps); boot_positions = ckfree((void *) boot_positions); if (left_branch != NULL) left_branch = ckfree((void *) left_branch); if (right_branch != NULL) right_branch = ckfree((void *) right_branch); if (tkill != NULL) tkill = ckfree((void *) tkill); if (av != NULL) av = ckfree((void *) av); for (i = 0; i < nseqs + 1; i++) standard_tree[i] = ckfree((void *) standard_tree[i]); standard_tree = ckfree((void *) standard_tree); for (i = 0; i < nseqs + 1; i++) save_tree[i] = ckfree((void *) save_tree[i]); save_tree = ckfree((void *) save_tree); if (output_tree_clustal) { fclose(clustal_phy_tree_file); info("Phylogenetic tree file created: [%s]", clustal_name); } if (output_tree_phylip) { fclose(phylip_phy_tree_file); info("Phylogenetic tree file created: [%s]", phylip_name); } if (output_tree_distances) { fclose(distances_phy_tree_file); info("Distance matrix file created: [%s]", dist_name); } if (output_tree_nexus) { fclose(nexus_phy_tree_file); info("Nexus tree file created: [%s]", nexus_name); } } static void overspill_message(sint overspill, sint total_dists) { char err_mess[1024] = ""; sprintf(err_mess, "%d of the distances out of a total of %d", (pint) overspill, (pint) total_dists); strcat(err_mess, "\n were out of range for the distance correction."); strcat(err_mess, "\n"); strcat(err_mess, "\n SUGGESTIONS: 1) remove the most distant sequences"); strcat(err_mess, "\n or 2) use the PHYLIP package"); strcat(err_mess, "\n or 3) turn off the correction."); strcat(err_mess, "\n Note: Use option 3 with caution! With this degree"); strcat(err_mess, "\n of divergence you will have great difficulty"); strcat(err_mess, "\n getting robust and reliable trees."); strcat(err_mess, "\n\n"); warning(err_mess); } Boolean transition(sint base1, sint base2) { /* TRUE if transition; else FALSE */ /* assumes that the bases of DNA sequences have been translated as a,A = 0; c,C = 1; g,G = 2; t,T,u,U = 3; N = 4; a,A = 0; c,C = 2; g,G = 6; t,T,u,U =17; A <--> G and T <--> C are transitions; all others are transversions. */ if (((base1 == 0) && (base2 == 6)) || ((base1 == 6) && (base2 == 0))) return TRUE; /* A <--> G */ if (((base1 == 17) && (base2 == 2)) || ((base1 == 2) && (base2 == 17))) return TRUE; /* T <--> C */ return FALSE; } void tree_gap_delete(void) { /* flag all positions in alignment that have a gap *//* in ANY sequence */ sint seqn; sint posn; tree_gaps = (char *) ckalloc((max_aln_length + 1) * sizeof(char)); for (posn = 1; posn <= seqlen_array[first_seq]; ++posn) { tree_gaps[posn] = 0; for (seqn = 1; seqn <= last_seq - first_seq + 1; ++seqn) { if ((seq_array[seqn + first_seq - 1][posn] == gap_pos1) || (seq_array[seqn + first_seq - 1][posn] == gap_pos2)) { tree_gaps[posn] = 1; break; } } } } void distance_matrix_output(FILE * ofile) { sint i, j; fprintf(ofile, "%6d", (pint) last_seq - first_seq + 1); for (i = 1; i <= last_seq - first_seq + 1; i++) { fprintf(ofile, "\n%-*s ", max_names, names[i]); for (j = 1; j <= last_seq - first_seq + 1; j++) { fprintf(ofile, "%6.3f ", tmat[i][j]); if (j % 8 == 0) { if (j != last_seq - first_seq + 1) fprintf(ofile, "\n"); if (j != last_seq - first_seq + 1) fprintf(ofile, " "); } } } } void nj_tree(char **tree_description, FILE * tree) { register int i; sint l[4], nude, k; sint nc, mini, minj, j, ii, jj; double fnseqs, fnseqs2 = 0, sumd; double diq, djq, dij, d2r, dr, dio, djo, da; double tmin, total, dmin; double bi, bj, b1, b2, b3, branch[4]; sint typei, typej; /* 0 = node; 1 = OTU */ double *rdiq; fnseqs = (double) last_seq - first_seq + 1; /*********************** First initialisation ***************************/ if (verbose) { fprintf(tree, "\n\n\t\t\tNeighbor-joining Method\n"); fprintf(tree, "\n Saitou, N. and Nei, M. (1987)"); fprintf(tree, " The Neighbor-joining Method:"); fprintf(tree, "\n A New Method for Reconstructing Phylogenetic Trees."); fprintf(tree, "\n Mol. Biol. Evol., 4(4), 406-425\n"); fprintf(tree, "\n\n This is an UNROOTED tree\n"); fprintf(tree, "\n Numbers in parentheses are branch lengths\n\n"); } if (fnseqs == 2) { if (verbose) fprintf(tree, "Cycle 1 = SEQ: 1 (%9.5f) joins SEQ: 2 (%9.5f)", tmat[first_seq][first_seq + 1], tmat[first_seq][first_seq + 1]); return; } mini = minj = 0; left_branch = (double *) ckalloc((nseqs + 2) * sizeof(double)); right_branch = (double *) ckalloc((nseqs + 2) * sizeof(double)); tkill = (sint *) ckalloc((nseqs + 1) * sizeof(sint)); av = (double *) ckalloc((nseqs + 1) * sizeof(double)); for (i = 1; i <= last_seq - first_seq + 1; ++i) { tmat[i][i] = av[i] = 0.0; tkill[i] = 0; } /* * Since we are going to pre-compute diq[] and djq[] based on * the current tmat[][], here we will allocate space for * diq[1..(last_seq-first_seq+1)]. */ rdiq = (double *)malloc(((last_seq-first_seq+1)+1)*sizeof(double)); assert(rdiq); #ifdef SERIAL_NJTREE fprintf(stderr,"DEBUG: Using serial codes in nj_tree() ...\n"); fflush(stderr); #else fprintf(stderr,"DEBUG: Using MPI codes in nj_tree() ...\n"); fflush(stderr); #endif /*********************** Enter The Main Cycle ***************************/ for (nc = 1; nc <= (last_seq - first_seq + 1 - 3); ++nc) { sumd = 0.0; for (i = 1; i <= last_seq - first_seq + 1; ++i) { rdiq[i] = 0.0; } #ifdef SERIAL_NJTREE for (i = 1; i <= last_seq - first_seq; i++) { for (j = i+1; j <= (last_seq-first_seq+1) ; j++) { tmat[j][i] = tmat[i][j]; sumd += tmat[i][j]; } } for (i = 1; i <= last_seq - first_seq+1; i++) for (j = 1; j <= (last_seq-first_seq+1) ; j++) rdiq[i]+= tmat[i][j]; #else /* we'll use MPI. */ for (i = 1; i <= last_seq - first_seq; i++) { for (j = i+1; j <= (last_seq-first_seq+1) ; j++) { tmat[j][i] = tmat[i][j]; } } mpi_njtree_master(&sumd, rdiq, tmat, (last_seq-first_seq+1)); #endif tmin = 99999.0; fnseqs2 = fnseqs - 2.0; /*.................compute SMATij values and find the smallest one ........*/ for (jj = 2; jj <= last_seq - first_seq + 1; ++jj) if (tkill[jj] != 1) for (ii = 1; ii < jj; ++ii) if (tkill[ii] != 1) { diq = djq = 0.0; diq = rdiq[ii]; djq = rdiq[jj]; dij = tmat[ii][jj]; d2r = diq + djq - (2.0 * dij); dr = sumd - dij - d2r; /* Since fnseqs2 is always (fnseqs - 2), we can * move it out of this loop structure. */ /* fnseqs2 = fnseqs - 2.0; */ total = d2r + fnseqs2 * dij + dr * 2.0; total = total / (2.0 * fnseqs2); if (total < tmin) { tmin = total; mini = ii; minj = jj; } } /*.................compute branch lengths and print the results ........*/ dio = rdiq[mini]; djo = rdiq[minj]; dmin = tmat[mini][minj]; dio = (dio - dmin) / fnseqs2; djo = (djo - dmin) / fnseqs2; bi = (dmin + dio - djo) * 0.5; bj = dmin - bi; bi = bi - av[mini]; bj = bj - av[minj]; if (av[mini] > 0.0) typei = 0; else typei = 1; if (av[minj] > 0.0) typej = 0; else typej = 1; if (verbose) fprintf(tree, "\n Cycle%4d = ", (pint) nc); /* set negative branch lengths to zero. Also set any tiny positive branch lengths to zero. */ if (fabs(bi) < 0.0001) bi = 0.0; if (fabs(bj) < 0.0001) bj = 0.0; if (verbose) { if (typei == 0) fprintf(tree, "Node:%4d (%9.5f) joins ", (pint) mini, bi); else fprintf(tree, " SEQ:%4d (%9.5f) joins ", (pint) mini, bi); if (typej == 0) fprintf(tree, "Node:%4d (%9.5f)", (pint) minj, bj); else fprintf(tree, " SEQ:%4d (%9.5f)", (pint) minj, bj); fprintf(tree, "\n"); } left_branch[nc] = bi; right_branch[nc] = bj; for (i = 1; i <= last_seq - first_seq + 1; i++) tree_description[nc][i] = 0; if (typei == 0) { for (i = nc - 1; i >= 1; i--) if (tree_description[i][mini] == 1) { for (j = 1; j <= last_seq - first_seq + 1; j++) if (tree_description[i][j] == 1) tree_description[nc][j] = 1; break; } } else tree_description[nc][mini] = 1; if (typej == 0) { for (i = nc - 1; i >= 1; i--) if (tree_description[i][minj] == 1) { for (j = 1; j <= last_seq - first_seq + 1; j++) if (tree_description[i][j] == 1) tree_description[nc][j] = 1; break; } } else tree_description[nc][minj] = 1; /* Here is where the -0.00005 branch lengths come from for 3 or more identical seqs. */ /* if(dmin <= 0.0) dmin = 0.0001; */ if (dmin <= 0.0) dmin = 0.000001; av[mini] = dmin * 0.5; /*........................Re-initialisation................................*/ fnseqs = fnseqs - 1.0; tkill[minj] = 1; for (j = 1; j <= last_seq - first_seq + 1; ++j) if (tkill[j] != 1) { da = (tmat[mini][j] + tmat[minj][j]) * 0.5; if ((mini - j) < 0) tmat[mini][j] = da; if ((mini - j) > 0) tmat[j][mini] = da; } for (j = 1; j <= last_seq - first_seq + 1; ++j) tmat[minj][j] = tmat[j][minj] = 0.0; } free(rdiq); /**end main cycle**/ /******************************Last Cycle (3 Seqs. left)********************/ nude = 1; for (i = 1; i <= last_seq - first_seq + 1; ++i) if (tkill[i] != 1) { l[nude] = i; nude = nude + 1; } b1 = (tmat[l[1]][l[2]] + tmat[l[1]][l[3]] - tmat[l[2]][l[3]]) * 0.5; b2 = tmat[l[1]][l[2]] - b1; b3 = tmat[l[1]][l[3]] - b1; branch[1] = b1 - av[l[1]]; branch[2] = b2 - av[l[2]]; branch[3] = b3 - av[l[3]]; /* Reset tiny negative and positive branch lengths to zero */ if (fabs(branch[1]) < 0.0001) branch[1] = 0.0; if (fabs(branch[2]) < 0.0001) branch[2] = 0.0; if (fabs(branch[3]) < 0.0001) branch[3] = 0.0; left_branch[last_seq - first_seq + 1 - 2] = branch[1]; left_branch[last_seq - first_seq + 1 - 1] = branch[2]; left_branch[last_seq - first_seq + 1] = branch[3]; for (i = 1; i <= last_seq - first_seq + 1; i++) tree_description[last_seq - first_seq + 1 - 2][i] = 0; if (verbose) fprintf(tree, "\n Cycle%4d (Last cycle, trichotomy):\n", (pint) nc); for (i = 1; i <= 3; ++i) { if (av[l[i]] > 0.0) { if (verbose) fprintf(tree, "\n\t\t Node:%4d (%9.5f) ", (pint) l[i], branch[i]); for (k = last_seq - first_seq + 1 - 3; k >= 1; k--) if (tree_description[k][l[i]] == 1) { for (j = 1; j <= last_seq - first_seq + 1; j++) if (tree_description[k][j] == 1) tree_description[last_seq - first_seq + 1 - 2][j] = i; break; } } else { if (verbose) fprintf(tree, "\n\t\t SEQ:%4d (%9.5f) ", (pint) l[i], branch[i]); tree_description[last_seq - first_seq + 1 - 2][l[i]] = i; } if (i < 3) { if (verbose) fprintf(tree, "joins"); } } if (verbose) fprintf(tree, "\n"); } void bootstrap_tree(char *phylip_name, char *clustal_name, char *nexus_name) { sint i, j; int ranno; char path[MAXLINE + 1]; char dummy[10]; char err_mess[1024]; static char **sample_tree; static char **standard_tree; static char **save_tree; sint total_dists, overspill = 0, total_overspill = 0; sint nfails = 0; if (empty) { error("You must load an alignment first"); return; } if (nseqs < 4) { error("Alignment has only %d sequences", nseqs); return; } if (!output_tree_clustal && !output_tree_phylip && !output_tree_nexus) { error ("You must select either clustal or phylip or nexus tree output format"); return; } get_path(seqname, path); if (output_tree_clustal) { if (clustal_name[0] != EOS) { if ((clustal_phy_tree_file = open_explicit_file(clustal_name)) == NULL) return; } else { if ((clustal_phy_tree_file = open_output_file ("\nEnter name for bootstrap output file ", path, clustal_name, "njb")) == NULL) return; } } first_seq = 1; last_seq = nseqs; if (output_tree_phylip) { if (phylip_name[0] != EOS) { if ((phylip_phy_tree_file = open_explicit_file(phylip_name)) == NULL) return; } else { if ((phylip_phy_tree_file = open_output_file ("\nEnter name for bootstrap output file ", path, phylip_name, "phb")) == NULL) return; } } if (output_tree_nexus) { if (nexus_name[0] != EOS) { if ((nexus_phy_tree_file = open_explicit_file(nexus_name)) == NULL) return; } else { if ((nexus_phy_tree_file = open_output_file ("\nEnter name for bootstrap output file ", path, nexus_name, "treb")) == NULL) return; } } boot_totals = (sint *) ckalloc((nseqs + 1) * sizeof(sint)); for (i = 0; i < nseqs + 1; i++) boot_totals[i] = 0; boot_positions = (sint *) ckalloc((seqlen_array[first_seq] + 2) * sizeof(sint)); for (j = 1; j <= seqlen_array[first_seq]; ++j) /* First select all positions for */ boot_positions[j] = j; /* the "standard" tree */ if (output_tree_clustal) { verbose = TRUE; /* Turn on file output */ if (dnaflag) overspill = dna_distance_matrix(clustal_phy_tree_file); else overspill = prot_distance_matrix(clustal_phy_tree_file); } if (output_tree_phylip) { verbose = FALSE; /* Turn off file output */ if (dnaflag) overspill = dna_distance_matrix(phylip_phy_tree_file); else overspill = prot_distance_matrix(phylip_phy_tree_file); } if (output_tree_nexus) { verbose = FALSE; /* Turn off file output */ if (dnaflag) overspill = dna_distance_matrix(nexus_phy_tree_file); else overspill = prot_distance_matrix(nexus_phy_tree_file); } /* check if any distances overflowed the distance corrections */ if (overspill > 0) { total_dists = (nseqs * (nseqs - 1)) / 2; overspill_message(overspill, total_dists); } tree_gaps = ckfree((void *) tree_gaps); if (output_tree_clustal) verbose = TRUE; /* Turn on screen output */ standard_tree = (char **) ckalloc((nseqs + 1) * sizeof(char *)); for (i = 0; i < nseqs + 1; i++) standard_tree[i] = (char *) ckalloc((nseqs + 1) * sizeof(char)); /* compute the standard tree */ if (output_tree_clustal || output_tree_phylip || output_tree_nexus) nj_tree(standard_tree, clustal_phy_tree_file); if (output_tree_clustal) fprintf(clustal_phy_tree_file, "\n\n\t\t\tBootstrap Confidence Limits\n\n"); /* save the left_branch and right_branch for phylip output */ save_left_branch = (double *) ckalloc((nseqs + 2) * sizeof(double)); save_right_branch = (double *) ckalloc((nseqs + 2) * sizeof(double)); for (i = 1; i <= nseqs; i++) { save_left_branch[i] = left_branch[i]; save_right_branch[i] = right_branch[i]; } /* The next line is a fossil from the days of using the cc ran() ran_factor = RAND_MAX / seqlen_array[first_seq]; */ if (usemenu) boot_ran_seed = getint("\n\nEnter seed no. for random number generator ", 1, 1000, boot_ran_seed); /* do not use the native cc ran() srand(boot_ran_seed); */ addrandinit((unsigned long) boot_ran_seed); if (output_tree_clustal) fprintf(clustal_phy_tree_file, "\n Random number generator seed = %7u\n", boot_ran_seed); if (usemenu) boot_ntrials = getint("\n\nEnter number of bootstrap trials ", 1, 10000, boot_ntrials); if (output_tree_clustal) { fprintf(clustal_phy_tree_file, "\n Number of bootstrap trials = %7d\n", (pint) boot_ntrials); fprintf(clustal_phy_tree_file, "\n\n Diagrammatic representation of the above tree: \n"); fprintf(clustal_phy_tree_file, "\n Each row represents 1 tree cycle;"); fprintf(clustal_phy_tree_file, " defining 2 groups.\n"); fprintf(clustal_phy_tree_file, "\n Each column is 1 sequence; "); fprintf(clustal_phy_tree_file, "the stars in each line show 1 group; "); fprintf(clustal_phy_tree_file, "\n the dots show the other\n"); fprintf(clustal_phy_tree_file, "\n Numbers show occurences in bootstrap samples."); } /* print_tree(standard_tree, clustal_phy_tree_file, boot_totals); */ verbose = FALSE; /* Turn OFF screen output */ left_branch = ckfree((void *) left_branch); right_branch = ckfree((void *) right_branch); tkill = ckfree((void *) tkill); av = ckfree((void *) av); sample_tree = (char **) ckalloc((nseqs + 1) * sizeof(char *)); for (i = 0; i < nseqs + 1; i++) sample_tree[i] = (char *) ckalloc((nseqs + 1) * sizeof(char)); if (usemenu) fprintf(stdout, "\n\nEach dot represents 10 trials\n\n"); total_overspill = 0; nfails = 0; for (i = 1; i <= boot_ntrials; ++i) { for (j = 1; j <= seqlen_array[first_seq]; ++j) { /* select alignment */ /* positions for */ ranno = addrand((unsigned long) seqlen_array[1]) + 1; boot_positions[j] = ranno; /* bootstrap sample */ } if (output_tree_clustal) { if (dnaflag) overspill = dna_distance_matrix(clustal_phy_tree_file); else overspill = prot_distance_matrix(clustal_phy_tree_file); } if (output_tree_phylip) { if (dnaflag) overspill = dna_distance_matrix(phylip_phy_tree_file); else overspill = prot_distance_matrix(phylip_phy_tree_file); } if (output_tree_nexus) { if (dnaflag) overspill = dna_distance_matrix(nexus_phy_tree_file); else overspill = prot_distance_matrix(nexus_phy_tree_file); } if (overspill > 0) { total_overspill = total_overspill + overspill; nfails++; } tree_gaps = ckfree((void *) tree_gaps); if (output_tree_clustal || output_tree_phylip || output_tree_nexus) nj_tree(sample_tree, clustal_phy_tree_file); left_branch = ckfree((void *) left_branch); right_branch = ckfree((void *) right_branch); tkill = ckfree((void *) tkill); av = ckfree((void *) av); compare_tree(standard_tree, sample_tree, boot_totals, last_seq - first_seq + 1); if (usemenu) { if (i % 10 == 0) fprintf(stdout, "."); if (i % 100 == 0) fprintf(stdout, "\n"); } } /* check if any distances overflowed the distance corrections */ if (nfails > 0) { total_dists = (nseqs * (nseqs - 1)) / 2; fprintf(stdout, "\n"); fprintf(stdout, "\n WARNING: %ld of the distances out of a total of %ld times %ld", (long) total_overspill, (long) total_dists, (long) boot_ntrials); fprintf(stdout, "\n were out of range for the distance correction."); fprintf(stdout, "\n This affected %d out of %d bootstrap trials.", (pint) nfails, (pint) boot_ntrials); fprintf(stdout, "\n This may not be fatal but you have been warned!"); fprintf(stdout, "\n"); fprintf(stdout, "\n SUGGESTIONS: 1) turn off the correction"); fprintf(stdout, "\n or 2) remove the most distant sequences"); fprintf(stdout, "\n or 3) use the PHYLIP package."); fprintf(stdout, "\n\n"); if (usemenu) getstr("Press [RETURN] to continue", dummy); } boot_positions = ckfree((void *) boot_positions); for (i = 1; i < nseqs + 1; i++) sample_tree[i] = ckfree((void *) sample_tree[i]); sample_tree = ckfree((void *) sample_tree); /* fprintf(clustal_phy_tree_file,"\n\n Bootstrap totals for each group\n"); */ if (output_tree_clustal) print_tree(standard_tree, clustal_phy_tree_file, boot_totals); save_tree = (char **) ckalloc((nseqs + 1) * sizeof(char *)); for (i = 0; i < nseqs + 1; i++) save_tree[i] = (char *) ckalloc((nseqs + 1) * sizeof(char)); for (i = 1; i < nseqs + 1; i++) for (j = 1; j < nseqs + 1; j++) save_tree[i][j] = standard_tree[i][j]; if (output_tree_phylip) { left_branch = (double *) ckalloc((nseqs + 2) * sizeof(double)); right_branch = (double *) ckalloc((nseqs + 2) * sizeof(double)); for (i = 1; i <= nseqs; i++) { left_branch[i] = save_left_branch[i]; right_branch[i] = save_right_branch[i]; } print_phylip_tree(standard_tree, phylip_phy_tree_file, bootstrap_format); left_branch = ckfree((void *) left_branch); right_branch = ckfree((void *) right_branch); } for (i = 1; i < nseqs + 1; i++) for (j = 1; j < nseqs + 1; j++) standard_tree[i][j] = save_tree[i][j]; if (output_tree_nexus) { left_branch = (double *) ckalloc((nseqs + 2) * sizeof(double)); right_branch = (double *) ckalloc((nseqs + 2) * sizeof(double)); for (i = 1; i <= nseqs; i++) { left_branch[i] = save_left_branch[i]; right_branch[i] = save_right_branch[i]; } print_nexus_tree(standard_tree, nexus_phy_tree_file, bootstrap_format); left_branch = ckfree((void *) left_branch); right_branch = ckfree((void *) right_branch); } boot_totals = ckfree((void *) boot_totals); save_left_branch = ckfree((void *) save_left_branch); save_right_branch = ckfree((void *) save_right_branch); for (i = 1; i < nseqs + 1; i++) standard_tree[i] = ckfree((void *) standard_tree[i]); standard_tree = ckfree((void *) standard_tree); for (i = 0; i < nseqs + 1; i++) save_tree[i] = ckfree((void *) save_tree[i]); save_tree = ckfree((void *) save_tree); if (output_tree_clustal) fclose(clustal_phy_tree_file); if (output_tree_phylip) fclose(phylip_phy_tree_file); if (output_tree_nexus) fclose(nexus_phy_tree_file); if (output_tree_clustal) info("Bootstrap output file completed [%s]", clustal_name); if (output_tree_phylip) info("Bootstrap output file completed [%s]", phylip_name); if (output_tree_nexus) info("Bootstrap output file completed [%s]", nexus_name); } void compare_tree(char **tree1, char **tree2, sint * hits, sint n) { sint i, j, k; sint nhits1, nhits2; for (i = 1; i <= n - 3; i++) { for (j = 1; j <= n - 3; j++) { nhits1 = 0; nhits2 = 0; for (k = 1; k <= n; k++) { if (tree1[i][k] == tree2[j][k]) nhits1++; if (tree1[i][k] != tree2[j][k]) nhits2++; } if ((nhits1 == last_seq - first_seq + 1) || (nhits2 == last_seq - first_seq + 1)) hits[i]++; } } } void print_nexus_tree(char **tree_description, FILE * tree, sint bootstrap) { sint i; sint old_row; fprintf(tree, "#NEXUS\n\n"); fprintf(tree, "BEGIN TREES;\n\n"); fprintf(tree, "\tTRANSLATE\n"); for (i = 1; i < nseqs; i++) { fprintf(tree, "\t\t%d %s,\n", (pint) i, names[i]); } fprintf(tree, "\t\t%d %s\n", (pint) nseqs, names[nseqs]); fprintf(tree, "\t\t;\n"); fprintf(tree, "\tUTREE PAUP_1= "); if (last_seq - first_seq + 1 == 2) { fprintf(tree, "(%d:%7.5f,%d:%7.5f);", first_seq, tmat[first_seq][first_seq + 1], first_seq + 1, tmat[first_seq][first_seq + 1]); } else { fprintf(tree, "("); old_row = two_way_split_nexus(tree_description, tree, last_seq - first_seq + 1 - 2, 1, bootstrap); fprintf(tree, ":%7.5f", left_branch[last_seq - first_seq + 1 - 2]); if ((bootstrap == BS_BRANCH_LABELS) && (old_row > 0) && (boot_totals[old_row] > 0)) fprintf(tree, "[%d]", (pint) boot_totals[old_row]); fprintf(tree, ","); old_row = two_way_split_nexus(tree_description, tree, last_seq - first_seq + 1 - 2, 2, bootstrap); fprintf(tree, ":%7.5f", left_branch[last_seq - first_seq + 1 - 1]); if ((bootstrap == BS_BRANCH_LABELS) && (old_row > 0) && (boot_totals[old_row] > 0)) fprintf(tree, "[%d]", (pint) boot_totals[old_row]); fprintf(tree, ","); old_row = two_way_split_nexus(tree_description, tree, last_seq - first_seq + 1 - 2, 3, bootstrap); fprintf(tree, ":%7.5f", left_branch[last_seq - first_seq + 1]); if ((bootstrap == BS_BRANCH_LABELS) && (old_row > 0) && (boot_totals[old_row] > 0)) fprintf(tree, "[%d]", (pint) boot_totals[old_row]); fprintf(tree, ")"); if (bootstrap == BS_NODE_LABELS) fprintf(tree, "TRICHOTOMY"); fprintf(tree, ";"); } fprintf(tree, "\nENDBLOCK;\n"); } sint two_way_split_nexus (char **tree_description, FILE * tree, sint start_row, sint flag, sint bootstrap) { sint row, new_row = 0, old_row, col, test_col = 0; Boolean single_seq; if (start_row != last_seq - first_seq + 1 - 2) fprintf(tree, "("); for (col = 1; col <= last_seq - first_seq + 1; col++) { if (tree_description[start_row][col] == flag) { test_col = col; break; } } single_seq = TRUE; for (row = start_row - 1; row >= 1; row--) if (tree_description[row][test_col] == 1) { single_seq = FALSE; new_row = row; break; } if (single_seq) { tree_description[start_row][test_col] = 0; fprintf(tree, "%d", test_col + first_seq - 1); if (start_row == last_seq - first_seq + 1 - 2) { return (0); } fprintf(tree, ":%7.5f,", left_branch[start_row]); } else { for (col = 1; col <= last_seq - first_seq + 1; col++) { if ((tree_description[start_row][col] == 1) && (tree_description[new_row][col] == 1)) tree_description[start_row][col] = 0; } old_row = two_way_split_nexus(tree_description, tree, new_row, (sint) 1, bootstrap); if (start_row == last_seq - first_seq + 1 - 2) { return (new_row); } fprintf(tree, ":%7.5f", left_branch[start_row]); if ((bootstrap == BS_BRANCH_LABELS) && (boot_totals[old_row] > 0)) fprintf(tree, "[%d]", (pint) boot_totals[old_row]); fprintf(tree, ","); } for (col = 1; col <= last_seq - first_seq + 1; col++) if (tree_description[start_row][col] == flag) { test_col = col; break; } single_seq = TRUE; new_row = 0; for (row = start_row - 1; row >= 1; row--) if (tree_description[row][test_col] == 1) { single_seq = FALSE; new_row = row; break; } if (single_seq) { tree_description[start_row][test_col] = 0; fprintf(tree, "%d", test_col + first_seq - 1); fprintf(tree, ":%7.5f)", right_branch[start_row]); } else { for (col = 1; col <= last_seq - first_seq + 1; col++) { if ((tree_description[start_row][col] == 1) && (tree_description[new_row][col] == 1)) tree_description[start_row][col] = 0; } old_row = two_way_split_nexus(tree_description, tree, new_row, (sint) 1, bootstrap); fprintf(tree, ":%7.5f", right_branch[start_row]); if ((bootstrap == BS_BRANCH_LABELS) && (boot_totals[old_row] > 0)) fprintf(tree, "[%d]", (pint) boot_totals[old_row]); fprintf(tree, ")"); } if ((bootstrap == BS_NODE_LABELS) && (boot_totals[start_row] > 0)) fprintf(tree, "%d", (pint) boot_totals[start_row]); return (start_row); } void print_phylip_tree(char **tree_description, FILE * tree, sint bootstrap) { sint old_row; if (last_seq - first_seq + 1 == 2) { fprintf(tree, "(%s:%7.5f,%s:%7.5f);", names[first_seq], tmat[first_seq][first_seq + 1], names[first_seq + 1], tmat[first_seq][first_seq + 1]); return; } fprintf(tree, "(\n"); old_row = two_way_split(tree_description, tree, last_seq - first_seq + 1 - 2, 1, bootstrap); fprintf(tree, ":%7.5f", left_branch[last_seq - first_seq + 1 - 2]); if ((bootstrap == BS_BRANCH_LABELS) && (old_row > 0) && (boot_totals[old_row] > 0)) fprintf(tree, "[%d]", (pint) boot_totals[old_row]); fprintf(tree, ",\n"); old_row = two_way_split(tree_description, tree, last_seq - first_seq + 1 - 2, 2, bootstrap); fprintf(tree, ":%7.5f", left_branch[last_seq - first_seq + 1 - 1]); if ((bootstrap == BS_BRANCH_LABELS) && (old_row > 0) && (boot_totals[old_row] > 0)) fprintf(tree, "[%d]", (pint) boot_totals[old_row]); fprintf(tree, ",\n"); old_row = two_way_split(tree_description, tree, last_seq - first_seq + 1 - 2, 3, bootstrap); fprintf(tree, ":%7.5f", left_branch[last_seq - first_seq + 1]); if ((bootstrap == BS_BRANCH_LABELS) && (old_row > 0) && (boot_totals[old_row] > 0)) fprintf(tree, "[%d]", (pint) boot_totals[old_row]); fprintf(tree, ")"); if (bootstrap == BS_NODE_LABELS) fprintf(tree, "TRICHOTOMY"); fprintf(tree, ";\n"); } sint two_way_split (char **tree_description, FILE * tree, sint start_row, sint flag, sint bootstrap) { sint row, new_row = 0, old_row, col, test_col = 0; Boolean single_seq; if (start_row != last_seq - first_seq + 1 - 2) fprintf(tree, "(\n"); for (col = 1; col <= last_seq - first_seq + 1; col++) { if (tree_description[start_row][col] == flag) { test_col = col; break; } } single_seq = TRUE; for (row = start_row - 1; row >= 1; row--) if (tree_description[row][test_col] == 1) { single_seq = FALSE; new_row = row; break; } if (single_seq) { tree_description[start_row][test_col] = 0; fprintf(tree, "%.*s", max_names, names[test_col + first_seq - 1]); if (start_row == last_seq - first_seq + 1 - 2) { return (0); } fprintf(tree, ":%7.5f,\n", left_branch[start_row]); } else { for (col = 1; col <= last_seq - first_seq + 1; col++) { if ((tree_description[start_row][col] == 1) && (tree_description[new_row][col] == 1)) tree_description[start_row][col] = 0; } old_row = two_way_split(tree_description, tree, new_row, (sint) 1, bootstrap); if (start_row == last_seq - first_seq + 1 - 2) { return (new_row); } fprintf(tree, ":%7.5f", left_branch[start_row]); if ((bootstrap == BS_BRANCH_LABELS) && (boot_totals[old_row] > 0)) fprintf(tree, "[%d]", (pint) boot_totals[old_row]); fprintf(tree, ",\n"); } for (col = 1; col <= last_seq - first_seq + 1; col++) if (tree_description[start_row][col] == flag) { test_col = col; break; } single_seq = TRUE; new_row = 0; for (row = start_row - 1; row >= 1; row--) if (tree_description[row][test_col] == 1) { single_seq = FALSE; new_row = row; break; } if (single_seq) { tree_description[start_row][test_col] = 0; fprintf(tree, "%.*s", max_names, names[test_col + first_seq - 1]); fprintf(tree, ":%7.5f)\n", right_branch[start_row]); } else { for (col = 1; col <= last_seq - first_seq + 1; col++) { if ((tree_description[start_row][col] == 1) && (tree_description[new_row][col] == 1)) tree_description[start_row][col] = 0; } old_row = two_way_split(tree_description, tree, new_row, (sint) 1, bootstrap); fprintf(tree, ":%7.5f", right_branch[start_row]); if ((bootstrap == BS_BRANCH_LABELS) && (boot_totals[old_row] > 0)) fprintf(tree, "[%d]", (pint) boot_totals[old_row]); fprintf(tree, ")\n"); } if ((bootstrap == BS_NODE_LABELS) && (boot_totals[start_row] > 0)) fprintf(tree, "%d", (pint) boot_totals[start_row]); return (start_row); } void print_tree(char **tree_description, FILE * tree, sint * totals) { sint row, col; fprintf(tree, "\n"); for (row = 1; row <= last_seq - first_seq + 1 - 3; row++) { fprintf(tree, " \n"); for (col = 1; col <= last_seq - first_seq + 1; col++) { if (tree_description[row][col] == 0) fprintf(tree, "*"); else fprintf(tree, "."); } if (totals[row] > 0) fprintf(tree, "%7d", (pint) totals[row]); } fprintf(tree, " \n"); for (col = 1; col <= last_seq - first_seq + 1; col++) fprintf(tree, "%1d", (pint) tree_description[last_seq - first_seq + 1 - 2][col]); fprintf(tree, "\n"); } sint dna_distance_matrix(FILE * tree) { sint m, n; sint j, i; sint res1, res2; sint overspill = 0; double p, q, e, a, b, k; tree_gap_delete(); /* flag positions with gaps (tree_gaps[i] = 1 ) */ if (verbose) { fprintf(tree, "\n"); fprintf(tree, "\n DIST = percentage divergence (/100)"); fprintf(tree, "\n p = rate of transition (A <-> G; C <-> T)"); fprintf(tree, "\n q = rate of transversion"); fprintf(tree, "\n Length = number of sites used in comparison"); fprintf(tree, "\n"); if (tossgaps) { fprintf(tree, "\n All sites with gaps (in any sequence) deleted!"); fprintf(tree, "\n"); } if (kimura) { fprintf(tree, "\n Distances corrected by Kimura's 2 parameter model:"); fprintf(tree, "\n\n Kimura, M. (1980)"); fprintf(tree, " A simple method for estimating evolutionary "); fprintf(tree, "rates of base"); fprintf(tree, "\n substitutions through comparative studies of "); fprintf(tree, "nucleotide sequences."); fprintf(tree, "\n J. Mol. Evol., 16, 111-120."); fprintf(tree, "\n\n"); } } for (m = 1; m < last_seq - first_seq + 1; ++m) /* for every pair of sequence */ for (n = m + 1; n <= last_seq - first_seq + 1; ++n) { p = q = e = 0.0; tmat[m][n] = tmat[n][m] = 0.0; for (i = 1; i <= seqlen_array[first_seq]; ++i) { j = boot_positions[i]; if (tossgaps && (tree_gaps[j] > 0)) goto skip; /* gap position */ res1 = seq_array[m + first_seq - 1][j]; res2 = seq_array[n + first_seq - 1][j]; if ((res1 == gap_pos1) || (res1 == gap_pos2) || (res2 == gap_pos1) || (res2 == gap_pos2)) goto skip; /* gap in a seq */ if (!use_ambiguities) if (is_ambiguity(res1) || is_ambiguity(res2)) goto skip; /* ambiguity code in a seq */ e = e + 1.0; if (res1 != res2) { if (transition(res1, res2)) p = p + 1.0; else q = q + 1.0; } skip:; } /* Kimura's 2 parameter correction for multiple substitutions */ if (!kimura) { if (e == 0) { fprintf(stdout, "\n WARNING: sequences %d and %d are non-overlapping\n", m, n); k = 0.0; p = 0.0; q = 0.0; } else { k = (p + q) / e; if (p > 0.0) p = p / e; else p = 0.0; if (q > 0.0) q = q / e; else q = 0.0; } tmat[m][n] = tmat[n][m] = k; if (verbose) /* if screen output */ fprintf(tree, "%4d vs.%4d: DIST = %7.4f; p = %6.4f; q = %6.4f; length = %6.0f\n", (pint) m, (pint) n, k, p, q, e); } else { if (e == 0) { fprintf(stdout, "\n WARNING: sequences %d and %d are non-overlapping\n", m, n); p = 0.0; q = 0.0; } else { if (p > 0.0) p = p / e; else p = 0.0; if (q > 0.0) q = q / e; else q = 0.0; } if (((2.0 * p) + q) == 1.0) a = 0.0; else a = 1.0 / (1.0 - (2.0 * p) - q); if (q == 0.5) b = 0.0; else b = 1.0 / (1.0 - (2.0 * q)); /* watch for values going off the scale for the correction. */ if ((a <= 0.0) || (b <= 0.0)) { overspill++; k = 3.5; /* arbitrary high score */ } else k = 0.5 * log(a) + 0.25 * log(b); tmat[m][n] = tmat[n][m] = k; if (verbose) /* if screen output */ fprintf(tree, "%4d vs.%4d: DIST = %7.4f; p = %6.4f; q = %6.4f; length = %6.0f\n", (pint) m, (pint) n, k, p, q, e); } } return overspill; /* return the number of off-scale values */ } sint prot_distance_matrix(FILE * tree) { sint m, n; sint j, i; sint res1, res2; sint overspill = 0; double p, e, k, table_entry; tree_gap_delete(); /* flag positions with gaps (tree_gaps[i] = 1 ) */ if (verbose) { fprintf(tree, "\n"); fprintf(tree, "\n DIST = percentage divergence (/100)"); fprintf(tree, "\n Length = number of sites used in comparison"); fprintf(tree, "\n\n"); if (tossgaps) { fprintf(tree, "\n All sites with gaps (in any sequence) deleted"); fprintf(tree, "\n"); } if (kimura) { fprintf(tree, "\n Distances up tp 0.75 corrected by Kimura's empirical method:"); fprintf(tree, "\n\n Kimura, M. (1983)"); fprintf(tree, " The Neutral Theory of Molecular Evolution."); fprintf(tree, "\n Page 75. Cambridge University Press, Cambridge, England."); fprintf(tree, "\n\n"); } } for (m = 1; m < nseqs; ++m) /* for every pair of sequence */ for (n = m + 1; n <= nseqs; ++n) { p = e = 0.0; tmat[m][n] = tmat[n][m] = 0.0; for (i = 1; i <= seqlen_array[1]; ++i) { j = boot_positions[i]; if (tossgaps && (tree_gaps[j] > 0)) goto skip; /* gap position */ res1 = seq_array[m][j]; res2 = seq_array[n][j]; if ((res1 == gap_pos1) || (res1 == gap_pos2) || (res2 == gap_pos1) || (res2 == gap_pos2)) goto skip; /* gap in a seq */ e = e + 1.0; if (res1 != res2) p = p + 1.0; skip:; } if (p <= 0.0) k = 0.0; else k = p / e; /* DES debug */ /* fprintf(stdout,"Seq1=%4d Seq2=%4d k =%7.4f \n",(pint)m,(pint)n,k); */ /* DES debug */ if (kimura) { if (k < 0.75) { /* use Kimura's formula */ if (k > 0.0) k = -log(1.0 - k - (k * k / 5.0)); } else { if (k > 0.930) { overspill++; k = 10.0; /* arbitrarily set to 1000% */ } else { table_entry = (k * 1000.0) - 750.0; k = (double) dayhoff_pams[(int) table_entry]; k = k / 100.0; } } } tmat[m][n] = tmat[n][m] = k; if (verbose) /* if screen output */ fprintf(tree, "%4d vs.%4d DIST = %6.4f; length = %6.0f\n", (pint) m, (pint) n, k, e); } return overspill; } void guide_tree(FILE * tree, sint firstseq, sint numseqs) /* Routine for producing unrooted NJ trees from seperately aligned pairwise distances. This produces the GUIDE DENDROGRAMS in PHYLIP format. */ { static char **standard_tree; sint i; float dist; double zero, before, after; phylip_phy_tree_file = tree; verbose = FALSE; first_seq = firstseq; last_seq = first_seq + numseqs - 1; if (numseqs == 2) { dist = tmat[firstseq][firstseq + 1] / 2.0; fprintf(tree, "(%s:%0.5f,%s:%0.5f);\n", names[firstseq], dist, names[firstseq + 1], dist); } else { standard_tree = (char **) ckalloc((last_seq - first_seq + 2) * sizeof(char *)); for (i = 0; i < last_seq - first_seq + 2; i++) standard_tree[i] = (char *) ckalloc((last_seq - first_seq + 2) * sizeof(char)); zero = 0.0; before = walltime(&zero); nj_tree(standard_tree, clustal_phy_tree_file); after = walltime(&before); fprintf(stderr,"DEBUG: nj_tree() takes %8.6f sec\n", after); print_phylip_tree(standard_tree, phylip_phy_tree_file, 0); if (left_branch != NULL) left_branch = ckfree((void *) left_branch); if (right_branch != NULL) right_branch = ckfree((void *) right_branch); if (tkill != NULL) tkill = ckfree((void *) tkill); if (av != NULL) av = ckfree((void *) av); for (i = 1; i < last_seq - first_seq + 2; i++) standard_tree[i] = ckfree((void *) standard_tree[i]); standard_tree = ckfree((void *) standard_tree); } fclose(phylip_phy_tree_file); } static Boolean is_ambiguity(char c) { int i; char codes[] = "ACGTU"; if (use_ambiguities == TRUE) { return FALSE; } for (i = 0; i < 5; i++) if (amino_acid_codes[c] == codes[i]) return FALSE; return TRUE; } #include static double walltime(double *t0) { double mic, time; double mega = 0.000001; struct timeval tp; struct timezone tzp; static long base_sec=0; static long base_usec=0; (void)gettimeofday(&tp, &tzp); if (base_sec == 0) { base_sec = tp.tv_sec; base_usec = tp.tv_usec; } time = (double) (tp.tv_sec - base_sec); mic = (double) (tp.tv_usec - base_usec); time = (time+mic*mega)- *t0; return(time); } static void mpi_njtree_master(double *sumd, double *rdiq, double **tmat, int tmat_len) { MPI_Request *mpirequest; MPI_Status *mpistatus; MPI_Status status; int i, j; int *mpirow; int myidx; int *buffer_len; char **sbuffer; int *position; double *rdiq_tmp; int work, dest; int np; MPI_Comm_size(MPI_COMM_WORLD, &np); /* * Prepare to send data to slaves */ *sumd = 0.0; /* * First, we need to determine which row is sending to which * slave. We have a total of (tmat_len) rows. The length of each * row is (tmat_len). */ /* * mpirow[1..tmat_len] = the rank of the MPI process * who is responsible for this row. */ mpirow = (int *) malloc((tmat_len+1) * sizeof(int)); assert(mpirow); myidx = 1; while (myidx <= tmat_len) { for (i = 1; i <= (np - 1) && myidx <= tmat_len; i++) { mpirow[myidx] = i; myidx++; } } /* * Now it's time to pack and send rows. * * We need (np-1) buffers. */ sbuffer = (char **) malloc(np * sizeof(char *)); assert(sbuffer); buffer_len = (int *) calloc(np, sizeof(int)); assert(buffer_len); position = (int *) malloc(np * sizeof(int)); /* determine the length of each of the (np-1) buffer */ for (i = 1; i <= tmat_len; i++) buffer_len[mpirow[i]] += (tmat_len); for (i = 1; i < np; i++) buffer_len[i] = sizeof(int) + buffer_len[i] * sizeof(double) + (tmat_len+1) * sizeof(int); /* * Allocate space for sbuffer[1..(np-1)]. We'll also * send the mpirow[1..(tmat_len-1)] array to slaves. */ for (i = 1; i < np; i++) { sbuffer[i] = (char *) malloc(buffer_len[i]); assert(sbuffer[i]); } /* * Now we pack mpirow[] then tmat[]. */ for (i = 1; i < np; i++) { position[i] = 0; MPI_Pack(&tmat_len, 1, MPI_INT, sbuffer[i], buffer_len[i], &position[i], MPI_COMM_WORLD); MPI_Pack(mpirow, (tmat_len+1), MPI_INT, sbuffer[i], buffer_len[i], &position[i], MPI_COMM_WORLD); } for (i = 1; i <= tmat_len; i++) { MPI_Pack(&(tmat[i][1]), tmat_len, MPI_DOUBLE, sbuffer[mpirow[i]], buffer_len[mpirow[i]], &position[mpirow[i]], MPI_COMM_WORLD); } /* * Send length then the actual data to slaves. */ mpirequest = (MPI_Request *) malloc(np * sizeof(MPI_Request)); assert(mpirequest); mpistatus = (MPI_Status *) malloc(np * sizeof(MPI_Status)); assert(mpistatus); for (i = 1; i < np; i++) { MPI_Send(&buffer_len[i], 1, MPI_INT, i, DOING_NJTREE, MPI_COMM_WORLD); } for (i = 1; i < np; i++) { MPI_Isend(sbuffer[i], buffer_len[i], MPI_PACKED, i, NJTREE_DATA, MPI_COMM_WORLD, &mpirequest[i - 1]); } MPI_Waitall(np - 1, mpirequest, mpistatus); /* Wait for result to come back */ /* * Allocate space for rdiq[1..(tmat_len)] */ rdiq_tmp = (double *) calloc((tmat_len+1), sizeof(double)); assert(rdiq); work = (np - 1); *sumd = 0.0; while (work > 0) { MPI_Recv(rdiq_tmp, (tmat_len+1), MPI_DOUBLE, MPI_ANY_SOURCE, TMAT_ROW_SUM, MPI_COMM_WORLD, &status); dest = status.MPI_SOURCE; *sumd += rdiq_tmp[0]; for (i = 1; i <= (tmat_len); i++) rdiq[i] += rdiq_tmp[i]; work--; } /* house cleaning */ for (i = 1; i < np; i++) { free(sbuffer[i]); } free(sbuffer); free(buffer_len); free(position); free(rdiq_tmp); free(mpirow); free(mpirequest); free(mpistatus); /* for (i = 1; i < np; i++) { MPI_Send(0, 0, MPI_INT, i, MY_ENDING_TAG, MPI_COMM_WORLD); } */ return; } clustalw-mpi-0.15/util.c0000644000411000001440000001747407644152540013541 0ustar liusers#include #include #include #include #include #include #include "clustalw.h" extern char **seq_array; extern sint *seqlen_array; extern char **names,**titles; extern sint *output_index; extern sint *seq_weight; extern double **tmat; /* * ckalloc() * * Tries to allocate "bytes" bytes of memory. Exits program if failed. * Return value: * Generic pointer to the newly allocated memory. */ void *ckalloc(size_t bytes) { register void *ret; if( (ret = calloc(bytes, sizeof(char))) == NULL) /* if( (ret = malloc(bytes)) == NULL) */ fatal("Out of memory\n"); else return ret; return ret; } /* * ckrealloc() * * Tries to reallocate "bytes" bytes of memory. Exits program if failed. * Return value: * Generic pointer to the re-allocated memory. */ void *ckrealloc(void *ptr, size_t bytes) { register void *ret=NULL; if (ptr == NULL) fatal("Bad call to ckrealloc\n"); else if( (ret = realloc(ptr, bytes)) == NULL) fatal("Out of memory\n"); else return ret; return ret; } /* * ckfree() * * Tries to free memory allocated by ckalloc. * Return value: * None. */ void *ckfree(void *ptr) { if (ptr == NULL) warning("Bad call to ckfree\n"); else { free(ptr); ptr = NULL; } return ptr; } /* * rtrim() * * Removes trailing blanks from a string * * Return values: * Pointer to the processed string */ char * rtrim(char *str) { register int p; p = strlen(str) - 1; while ( isspace(str[p]) ) p--; str[p + 1] = EOS; return str; } /* * blank_to_() * * Replace blanks in a string with underscores * * Also replaces , ; : ( or ) with _ * * Return value: * Pointer to the processed string */ char * blank_to_(char *str) { int i,p; p = strlen(str) - 1; for(i=0;i<=p;i++) if( (str[i]==' ') || (str[i]==';') || (str[i]==',') || (str[i]=='(') || (str[i]==')') || (str[i]==':') ) str[i] = '_'; return str; } /* * upstr() * * Converts string str to uppercase. * Return values: * Pointer to the converted string. */ char * upstr(char *str) { register char *s = str; while( (*s = toupper(*s)) ) s++; return str; } /* * lowstr() * * Converts string str to lower case. * Return values: * Pointer to the converted string. */ char * lowstr(char *str) { register char *s = str; while( (*s = tolower(*s)) ) s++; return str; } void getstr(char *instr,char *outstr) { fprintf(stdout,"%s: ",instr); /* gets(outstr); */ fgets(outstr, MAXLINE, stdin); } double getreal(char *instr,double minx,double maxx,double def) { int status; float ret; char line[MAXLINE]; while(TRUE) { fprintf(stdout,"%s (%.1f-%.1f) [%.1f]: ",instr,minx,maxx,def); /* gets(line); */ fgets(line, MAXLINE, stdin); status=sscanf(line,"%f",&ret); if(status == EOF) return def; if(ret>maxx) { fprintf(stdout,"ERROR: Max. value=%.1f\n\n",maxx); continue; } if(retmaxx) { fprintf(stdout,"ERROR: Max. value=%d\n\n",(pint)maxx); continue; } if(ret-1;--i) { if(str[i]==DIRDELIM) { i = -1; break; } if(str[i]=='.') break; } if(i<0) strcat(path,"."); else path[i+1]=EOS; } void alloc_aln(sint nseqs) { sint i,j; seqlen_array = (sint *)ckalloc( (nseqs+1) * sizeof (sint)); seq_array = (char **)ckalloc( (nseqs + 1) * sizeof (char *) ); for(i=0;i0.5 and weak score =<0.5. Strong matching columns to be assigned ':' and weak matches assigned '.' in the clustal output format. */ char *res_cat1[] = { "STA", "NEQK", "NHQK", "NDEQ", "QHRK", "MILV", "MILF", "HY", "FYW", NULL }; char *res_cat2[] = { "CSA", "ATV", "SAG", "STNK", "STPA", "SGND", "SNDEQK", "NDEQHK", "NEQHRK", "FVLIM", "HFY", NULL }; static char *type_arg[] = { "protein", "dna", ""}; static char *bootlabels_arg[] = { "node", "branch", ""}; static char *outorder_arg[] = { "input", "aligned", ""}; static char *case_arg[] = { "lower", "upper", ""}; static char *seqno_arg[] = { "off", "on", ""}; static char *score_arg[] = { "percent", "absolute", ""}; static char *output_arg[] = { "gcg", "gde", "pir", "phylip", "nexus", ""}; static char *outputtree_arg[] = { "nj", "phylip", "dist", "nexus", ""}; static char *outputsecstr_arg[] = { "structure", "mask", "both", "none", ""}; /* command line initialisation type = 0 no argument type = 1 integer argument type = 2 float argument type = 3 string argument type = 4 filename type = 5 opts */ #define NOARG 0 #define INTARG 1 #define FLTARG 2 #define STRARG 3 #define FILARG 4 #define OPTARG 5 /* command line switches for DATA **************************/ cmd_line_data cmd_line_file[] = { "infile", &setinfile, FILARG, NULL, "profile1", &setprofile1, FILARG, NULL, "profile2", &setprofile2, FILARG, NULL, "", NULL, -1}; /* command line switches for VERBS **************************/ cmd_line_data cmd_line_verb[] = { "help", &sethelp, NOARG, NULL, "check", &sethelp, NOARG, NULL, "options", &setoptions, NOARG, NULL, "align", &setalign, NOARG, NULL, "newtree", &setnewtree, FILARG, NULL, "usetree", &setusetree, FILARG, NULL, "newtree1", &setnewtree1, FILARG, NULL, "usetree1", &setusetree1, FILARG, NULL, "newtree2", &setnewtree2, FILARG, NULL, "usetree2", &setusetree2, FILARG, NULL, "bootstrap", &setbootstrap, NOARG, NULL, "tree", &settree, NOARG, NULL, "quicktree", &setquicktree, NOARG, NULL, "convert", &setconvert, NOARG, NULL, "interactive", &setinteractive, NOARG, NULL, "batch", &setbatch, NOARG, NULL, "", NULL, -1}; /* command line switches for PARAMETERS **************************/ cmd_line_data cmd_line_para[] = { "type", &settype, OPTARG, type_arg, "profile", &setprofile, NOARG, NULL, "sequences", &setsequences, NOARG, NULL, "matrix", &setmatrix, FILARG, NULL, "dnamatrix", &setdnamatrix, FILARG, NULL, "negative", &setnegative, NOARG, NULL, "noweights", &setnoweights, NOARG, NULL, "gapopen", &setgapopen, FLTARG, NULL, "gapext", &setgapext, FLTARG, NULL, "endgaps", &setuseendgaps, NOARG, NULL, "nopgap", &setnopgap, NOARG, NULL, "nohgap", &setnohgap, NOARG, NULL, "novgap", &setnovgap, NOARG, NULL, "hgapresidues", &sethgapres, STRARG, NULL, "maxdiv", &setmaxdiv, INTARG, NULL, "gapdist", &setgapdist, INTARG, NULL, "pwmatrix", &setpwmatrix, FILARG, NULL, "pwdnamatrix", &setpwdnamatrix, FILARG, NULL, "pwgapopen", &setpwgapopen, FLTARG, NULL, "pwgapext", &setpwgapext, FLTARG, NULL, "ktuple", &setktuple, INTARG, NULL, "window", &setwindow, INTARG, NULL, "pairgap", &setpairgap, INTARG, NULL, "topdiags", &settopdiags, INTARG, NULL, "score", &setscore, OPTARG, score_arg, "transweight", &settransweight, FLTARG, NULL, "seed", &setseed, INTARG, NULL, "kimura", &setkimura, NOARG, NULL, "tossgaps", &settossgaps, NOARG, NULL, "bootlabels", &setbootlabels, OPTARG, bootlabels_arg, "debug", &setdebug, INTARG, NULL, "output", &setoutput, OPTARG, output_arg, "outputtree", &setoutputtree, OPTARG, outputtree_arg, "outfile", &setoutfile, FILARG, NULL, "outorder", &setoutorder, OPTARG, outorder_arg, "case", &setcase, OPTARG, case_arg, "seqnos", &setseqno, OPTARG, seqno_arg, "nosecstr1", &setsecstr1, NOARG, NULL, "nosecstr2", &setsecstr2, NOARG, NULL, "secstrout", &setsecstroutput, OPTARG, outputsecstr_arg, "helixgap", &sethelixgap, INTARG, NULL, "strandgap", &setstrandgap, INTARG, NULL, "loopgap", &setloopgap, INTARG, NULL, "terminalgap", &setterminalgap, INTARG, NULL, "helixendin", &sethelixendin, INTARG, NULL, "helixendout", &sethelixendout, INTARG, NULL, "strandendin", &setstrandendin, INTARG, NULL, "strandendout",&setstrandendout, INTARG, NULL, "", NULL, -1}; clustalw-mpi-0.15/dele.input0000644000411000001440000001513307644152540014400 0ustar liusers>gi|1|ref|XP_002914.4| ATP-binding cassette, sub-family C (CFTR/MRP), member 5 [Homo sapiens] MKDIDIGKEYIIPSPGYRSVRERTSTSGTHRDREDSKFRRTRPLECQDALETAARAEGLSLDASMHSQLR ILDEEHPKGKYHHGLSALKPIRTTSKHQHPVDNAGLFSCMTFSWLSSLARVAHKKGELSMEDVWSLSKHE SSDVNCRRLERLWQEELNEVGPDAASLRRVVWIFCRTRLILSIVCLMITQLAGFSGPAFMVKHLLEYTQA TESNLQYSLLLVLGLLLTEIVRSWSLALTWALNYRTGVRLRGAILTMAFKKILKLKNIKEKSLGELINIC SNDGQRMFEAAAVGSLLAGGPVVAILGMIYNVIILGPTGFLGSAVFILFYPAMMFASRLTAYFRRKCVAA TDERVQKMNEVLTYIKFIKMYAWVKAFSQSVQKIREEERRILEKAGYFQSITVGVAPIVVVIASVVTFSV HMTLGFDLTAAQAFTVVTVFNSMTFALKVTPFSVKSLSEASVAVDRFKSLFLMEEVHMIKNKPASPHIKI EMKNATLAWDSSHSSIQNSPKLTPKMKKDKRASRGKKEKVRQLQRTEHQAVLAEQKGHLLLDSDERPSPE EEEGKHIHLGHLRLQRTLHSIDLEIQEGKLVGICGSVGSGKTSLISAILGQMTLLEGSIAISGTFAYVAQ QAWILNATLRDNILFGKEYDEERYNSVLNSCCLRPDLAILPSSDLTEIGERGANLSGGQRQRISLARALY SDRSIYILDDPLSALDAHVGNHIFNSAIRKHLKSKTVLFVTHQLQYLVDCDEVIFMKEGCITERGTHEEL MNLNGDYATIFNNLLLGETPPVEINSKKETSGSQKKSQDKGPKTGSVKKEKAVKPEEGQLVQLEEKGQGS VPWSVYGVYIQAAGGPLAFLVIMALFMLNVGSTAFSTWWLSYWIKQGSGNTTVTRGNETSVSDSMKDNPH MQYYASIYALSMAVMLILKAIRGVVFVKGTLRASSRLHDELFRRILRSPMKFFDTTPTGRILNRFSKDMD EVDVRLPFQAEMFIQNVILVFFCVGMIAGVFPWFLVAVGPLVILFSVLHIVSRVLIRELKRLDNITQSPF LSHITSSIQGLATIHAYNKGQEFLHRYQELLDDNQAPFFLFTCAMRWLAVRLDLISIALITTTGLMIVLM HGQIPPAYAGLAISYAVQLTGLFQFTVRLASETEARFTSVERINHYIKTLSLEAPARIKNKAPSPDWPQE GEVTFENAEMRYRENLPLVLKKVSFTIKPKEKIGIVGRTGSGKSSLGMALFRLVELSGGCIKIDGVRISD IGLADLRSKLSIIPQEPVLFSGTVRSNLDPFNQYTEDQIWDALERTHMKECIAQLPLKLESEVMENGDNF SVGERQLLCIARALLRHCKILILDEATAAMDTETDLLIQETIREAFADCTMLTIAHRLHTVLGSDRIMVL AQGQVVEFDTPSVLLSNDSSRFYAMFAAAENKVAVKG >gi|2|ref|XP_083829.1| ATP-binding cassette, sub-family C (CFTR/MRP), member 2 [Homo sapiens] MLEKFCNSTFWNSSFLDSPEADLPLCFEQTVLVWIPLGYLWLLAPWQLLHVYKSRTKRSSTTKLYLAKQV FVGFLLILAAIELALVLTEDSGQATVPAVRYTNPSLYLGTWLLVLLIQYSRQWCVQKNSWFLSLFWILSI LCGTFQFQTLIRTLLQGDNSNLAYSCLFFISYGFQILILIFSAFSENNESSNNPSSIASFLSSITYSWYD SIILKGYKRPLTLEDVWEVDEEMKTKTLVSKFETHMKRELQKARRALQRRQEKSSQQNSGARLPGLNKNQ SQSQDALVLEDVEKKKKKSGTKKDVPKSWLMKALFKTFYMVLLKSFLLKLVNDIFTFVSPQLLKLLISFA SDRDTYLWIGYLCAILLFTAALIQSFCLQCYFQLCFKLGVKVRTAIMASVYKKALTLSNLARKEYTVGET VNLMSVDAQKLMDVTNFMHMLWSSVLQIVLSIFFLWRELGPSVLAGVGVMVLVIPINAILSTKSKTIQVK NMKNKDKRLKIMNEILSGIKILKYFAWEPSFRDQVQNLRKKELKNLLAFSQLQCVVIFVFQLTPVLVSVV TFSVYVLVDSNNILDAQKAFTSITLFNILRFPLSMLPMMISSMLQASVSTERLEKYLGGDDLDTSAIRHD CNFDKAMQFSEASFTWEHDSEATVRE >gi|3|ref|XP_004980.4| cystic fibrosis transmembrane conductance regulator, ATP-binding cassette (sub-family C, member 7) [Homo sapiens] MQRSPLEKASVVSKLFFSWTRPILRKGYRQRLELSDIYQIPSVDSADNLSEKLEREWDRELASKKNPKLI NALRRCFFWRFMFYGIFLYLGEVTKAVQPLLLGRIIASYDPDNKEERSIAIYLGIGLCLLFIVRTLLLHP AIFGLHHIGMQMRIAMFSLIYKKTLKLSSRVLDKISIGQLVSLLSNNLNKFDEGLALAHFVWIAPLQVAL LMGLIWELLQASAFCGLGFLIVLALFQAGLGRMMMKYRDQRAGKISERLVITSEMIENIQSVKAYCWEEA MEKMIENLRQTELKLTRKAAYVRYFNSSAFFFSGFFVVFLSVLPYALIKGIILRKIFTTISFCIVLRMAV TRQFPWAVQTWYDSLGAINKIQDFLQKQEYKTLEYNLTTTEVVMENVTAFWEEGFGELFEKAKQNNNNRK TSNGDDSLFFSNFSLLGTPVLKDINFKIERGQLLAVAGSTGAGKTSLLMVIMGELEPSEGKIKHSGRISF CSQFSWIMPGTIKENIIFGVSYDEYRYRSVIKACQLEEDISKFAEKDNIVLGEGGITLSGGQRARISLAR AVYKDADLYLLDSPFGYLDVLTEKEIFESCVCKLMANKTRILVTSKMEHLKKADKILILHEGSSYFYGTF SELQNLQPDFSSKLMGCDSFDQFSAERRNSILTETLHRFSLEGDAPVSWTETKKQSFKQTGEFGEKRKNS ILNPINSIRKFSIVQKTPLQMNGIEEDSDEPLERRLSLVPDSEQGEAILPRISVISTGPTLQARRRQSVL NLMTHSVNQGQNIHRKTTASTRKVSLAPQANLTELDIYSRRLSQETGLEISEEINEEDLKECFFDDMESI PAVTTWNTYLRYITVHKSLIFVLIWCLVIFLAEVAASLVVLWLLGNTPLQDKGNSTHSRNNSYAVIITST SSYYVFYIYVGVADTLLAMGFFRGLPLVHTLITVSKILHHKMLHSVLQAPMSTLNTLKAGGILNRFSKDI AILDDLLPLTIFDFIQLLLIVIGAIAVVAVLQPYIFVATVPVIVAFIMLRAYFLQTSQQLKQLESEGRSP IFTHLVTSLKGLWTLRAFGRQPYFETLFHKALNLHTANWFLYLSTLRWFQMRIEMIFVIFFIAVTFISIL TTGEGEGRVGIILTLAMNIMSTLQWAVNSSIDVDSLMRSVSRVFKFIDMPTEGKPTKSTKPYKNGQLSKV MIIENSHVKKDDIWPSGGQMTVKDLTAKYTEGGNAILENISFSISPGQRVGLLGRTGSGKSTLLSAFLRL LNTEGEIQIDGVSWDSITLQQWRKAFGVIPQKVFIFSGTFRKNLDPYEQWSDQEIWKVADEVGLRSVIEQ FPGKLDFVLVDGGCVLSHGHKQLMCLARSVLSKAKILLLDEPSAHLDPVTYQIIRRTLKQAFADCTVILC EHRIEAMLECQQFLVIEENKVRQYDSIQKLLNERSLFRQAISPSDRVKLFPHRNSSKCKSKPQIAALKEE TEEEVQDTRL >gi|4|ref|NP_065132.1| PIST; fused in glioblastoma; Golgi associated PDZ and coiled-coil motif containing protein; CFTR-associated ligand [Homo sapiens] MSAGGPCPAAAGGGPGGASCSVGAPGGVSMFRWLEVLEKEFDKAFVDVDLLLGEIDPDQADITYEGRQKM TSLSSCFAQLCHKAQSVSQINHKLEAQLVDLKSELTETQAEKVVLEKEVHDQLLQLHSIQLQLHAKTGQS ADSGTIKAKLSGPSVEELERELEANKKEKMKEAQLEAEVKLLRKENEALRRHIAVLQAEVYGARLAAKYL DKELAGRVQQIQLLGRDMKGPAHDKLWNQLEAEIHLHRHKTVIRACRGRNDLKRPMQAPPGHDQDSLKKS QGVGPIRKVLLLKEDHEGLGISITGGKEHGVPILISEIHPGQPADRCGGLHVGDAILAVNGVNLRDTKHK EAVTILSQQRGEIEFEVVYVAPEVDSDDENVEYEDESGHRYRLYLDELEGGGNPGASCKDTSGEIKVLQG FNKKAVTDTHENGDLGTASETPLDDGASKLDDLHTLYHKKSY >gi|5|ref|NP_542148.1| ATP-binding cassette, sub-family C (CFTR/MRP), member 3 [Rattus norvegicus] MDRLCGSGELGSKFWDSNLTVYTNTPDLTPCFQNSLLAWVPCIYLWAALPCYLFYLRHHRLGYIVLSCLS RLKTALGVLLWCISWVDLFYSFHGLVHGSSPAPVFFITPLLVGITMLLATLLIQYERLRGVRSSGVLIIF WLLCVICAIIPFRSKILLALAEGKILDPFRFTTFYIYFALVLCAFILSCFQEKPPLFSPENLDTNPCPEA SAGFFSRLSFWWFTKLAILGYRRPLEDSDLWSLSEEDCSHKVVQRLLEAWQKQQTQASGPQTAALEPKIA GEDEVLLKARPKTKKPSFLRALVRTFTSSLLMGACFKLIQDLLSFINPQLLSILIRFISDPTAPTWWGFL LAGLMFVSSTMQTLILHQHYHCIFVMALRIRTAIIGVIYRKALTITNSVKREYTVGEMVNLMSVDAQRFM DVSPFINLLWSAPLQVILAIYFLWQILGPSALAGVAVIVLLIPLNGAVSMKMKTYQVQQMKFKDSRIKLM SEILNGIKVLKLYAWEPTFLEQVEGIRQGELQLLRKGAYLQAISTFIWVCTPFMVTLITLGVYVCVDKNN VLDAEKAFVSLSLFNILKIPLNLLPQLISGMTQTSVSLKRIQDFLNQDELDPQCVERKTISPGRAITIHN GTFSWSKDLPPTLHSLNIQIPKGALVAVVGPVGCGKSSLVSALLGEMEKLEGAVSVKGSVAYVPQQAWIQ NCTLQENVLFGQPMNPKRYQQALETCALLADLDVLPGGDQTEIGEKGINLSGGQRQRVSLARAVYSDANI FLLDDPLSAVDSHVAKHIFDQVIGPEGVLAGKTRVLVTHGISFLPQTDFIIVLADGQITEMGHYSELLQH DGSFANFLRNYAPDENQEANEGVLQHANEEVLLLEDTLSTHTDLTDTEPAIYEVRKQFMREMSSLSSEGE GQNRPVLKRYTSSLEKEVPATQTKETGALIKEEIAETGNVKLSVYWDYAKSVGLCTTLFICLLYAGQNAV AIGANVWLSAWTNDVEEHGQQNNTSVRLGVYATLGILQGLLVMLSAFTMVVGAIQAARLLHTALLHNQIR APQSFFDTTPSGRILNRFSKDIYVIDEVLAPTILMLFNSFYTSISTIVVIVASTPLFCVVVLPLAVFYGF VQRFYVATSRQLKRLESVSRSPIFSHFSETVTGTSVIRAYGRVQDFKVLSDAKVDSNQKTTYPYIASNRW LGVHVEFVGNCVVLFSALFAVIGRNSLNPGLVGLSVSYALQVTLSLNWMIRTLSDLESNIIAVERVKEYS KTETEAPWVLESNRAPEGWPRSGVVEFRNYSVRYRPGLELVLKNLTLHVQGGEKVGIVGRTGAGKSSMTL CLFRILEAAEGEIFIDGLNVAHIGLHDLRSQLTIIPQDPILFSGTLRMNLDPFGRYSDEDIWRTLELSHL SAFVSSQPTGLDFQCSEGGDNLSVGQRQLVCLARALLRKSRVLVLDEATAAIDLETDDLIQGTIRTQFED CTVLTIAHRLNTIMDYNRVLVLDKGVVAEFDSPVNLIAAGGIFYGMAKDAGLA >gi|6|gb|AAL47160.1|AF450008_1 CFTR-associated ligand [Homo sapiens] MSAGGPCPAAAGGGPGGASCSVGAPGGVSMFRWLEVLEKEFDKAFVDVDLLLGEIDPDQADITYEGRQKM TSLSSCFAQLCHKAQSVSQINHKLEAQLVDLKSELTETQAEKVVLEKEVHDQLLQLHSIQLQLHAKTGQS ADSGTIKAKLERELEANKKEKMKEAQLEAEVKLLRKENEALRRHIAVLQAEVYGARLAAKYLDKELAGRV QQIQLLGRDMKGPAHDKLWNQLEAEIHLHRHKTVIRACRGRNDLKRPMQAPPGHDQDSLKKSQGVGPIRK VLLLKEDHEGLGISITGGKEHGVPILISEIHPGQPADRCGGLHVGDAILAVNGVNLRDTKHKEAVTILSQ QRGEIEFEVVYVAPEVDSDDENVEYEDESGHRYRLYLDELEGGGNPGASCKDTSGEIKVLQGFNKKAVTD THENGDLGTASETPLDDGASKLDDLHTLYHKKSY clustalw-mpi-0.15/CFTR.input0000644000411000001440000012326507644152540014233 0ustar liusers>gi|18599218|ref|XP_002914.4| ATP-binding cassette, sub-family C (CFTR/MRP), member 5 [Homo sapiens] MKDIDIGKEYIIPSPGYRSVRERTSTSGTHRDREDSKFRRTRPLECQDALETAARAEGLSLDASMHSQLR ILDEEHPKGKYHHGLSALKPIRTTSKHQHPVDNAGLFSCMTFSWLSSLARVAHKKGELSMEDVWSLSKHE SSDVNCRRLERLWQEELNEVGPDAASLRRVVWIFCRTRLILSIVCLMITQLAGFSGPAFMVKHLLEYTQA TESNLQYSLLLVLGLLLTEIVRSWSLALTWALNYRTGVRLRGAILTMAFKKILKLKNIKEKSLGELINIC SNDGQRMFEAAAVGSLLAGGPVVAILGMIYNVIILGPTGFLGSAVFILFYPAMMFASRLTAYFRRKCVAA TDERVQKMNEVLTYIKFIKMYAWVKAFSQSVQKIREEERRILEKAGYFQSITVGVAPIVVVIASVVTFSV HMTLGFDLTAAQAFTVVTVFNSMTFALKVTPFSVKSLSEASVAVDRFKSLFLMEEVHMIKNKPASPHIKI EMKNATLAWDSSHSSIQNSPKLTPKMKKDKRASRGKKEKVRQLQRTEHQAVLAEQKGHLLLDSDERPSPE EEEGKHIHLGHLRLQRTLHSIDLEIQEGKLVGICGSVGSGKTSLISAILGQMTLLEGSIAISGTFAYVAQ QAWILNATLRDNILFGKEYDEERYNSVLNSCCLRPDLAILPSSDLTEIGERGANLSGGQRQRISLARALY SDRSIYILDDPLSALDAHVGNHIFNSAIRKHLKSKTVLFVTHQLQYLVDCDEVIFMKEGCITERGTHEEL MNLNGDYATIFNNLLLGETPPVEINSKKETSGSQKKSQDKGPKTGSVKKEKAVKPEEGQLVQLEEKGQGS VPWSVYGVYIQAAGGPLAFLVIMALFMLNVGSTAFSTWWLSYWIKQGSGNTTVTRGNETSVSDSMKDNPH MQYYASIYALSMAVMLILKAIRGVVFVKGTLRASSRLHDELFRRILRSPMKFFDTTPTGRILNRFSKDMD EVDVRLPFQAEMFIQNVILVFFCVGMIAGVFPWFLVAVGPLVILFSVLHIVSRVLIRELKRLDNITQSPF LSHITSSIQGLATIHAYNKGQEFLHRYQELLDDNQAPFFLFTCAMRWLAVRLDLISIALITTTGLMIVLM HGQIPPAYAGLAISYAVQLTGLFQFTVRLASETEARFTSVERINHYIKTLSLEAPARIKNKAPSPDWPQE GEVTFENAEMRYRENLPLVLKKVSFTIKPKEKIGIVGRTGSGKSSLGMALFRLVELSGGCIKIDGVRISD IGLADLRSKLSIIPQEPVLFSGTVRSNLDPFNQYTEDQIWDALERTHMKECIAQLPLKLESEVMENGDNF SVGERQLLCIARALLRHCKILILDEATAAMDTETDLLIQETIREAFADCTMLTIAHRLHTVLGSDRIMVL AQGQVVEFDTPSVLLSNDSSRFYAMFAAAENKVAVKG >gi|18574439|ref|XP_083829.1| ATP-binding cassette, sub-family C (CFTR/MRP), member 2 [Homo sapiens] MLEKFCNSTFWNSSFLDSPEADLPLCFEQTVLVWIPLGYLWLLAPWQLLHVYKSRTKRSSTTKLYLAKQV FVGFLLILAAIELALVLTEDSGQATVPAVRYTNPSLYLGTWLLVLLIQYSRQWCVQKNSWFLSLFWILSI LCGTFQFQTLIRTLLQGDNSNLAYSCLFFISYGFQILILIFSAFSENNESSNNPSSIASFLSSITYSWYD SIILKGYKRPLTLEDVWEVDEEMKTKTLVSKFETHMKRELQKARRALQRRQEKSSQQNSGARLPGLNKNQ SQSQDALVLEDVEKKKKKSGTKKDVPKSWLMKALFKTFYMVLLKSFLLKLVNDIFTFVSPQLLKLLISFA SDRDTYLWIGYLCAILLFTAALIQSFCLQCYFQLCFKLGVKVRTAIMASVYKKALTLSNLARKEYTVGET VNLMSVDAQKLMDVTNFMHMLWSSVLQIVLSIFFLWRELGPSVLAGVGVMVLVIPINAILSTKSKTIQVK NMKNKDKRLKIMNEILSGIKILKYFAWEPSFRDQVQNLRKKELKNLLAFSQLQCVVIFVFQLTPVLVSVV TFSVYVLVDSNNILDAQKAFTSITLFNILRFPLSMLPMMISSMLQASVSTERLEKYLGGDDLDTSAIRHD CNFDKAMQFSEASFTWEHDSEATVRE >gi|14753227|ref|XP_004980.4| cystic fibrosis transmembrane conductance regulator, ATP-binding cassette (sub-family C, member 7) [Homo sapiens] MQRSPLEKASVVSKLFFSWTRPILRKGYRQRLELSDIYQIPSVDSADNLSEKLEREWDRELASKKNPKLI NALRRCFFWRFMFYGIFLYLGEVTKAVQPLLLGRIIASYDPDNKEERSIAIYLGIGLCLLFIVRTLLLHP AIFGLHHIGMQMRIAMFSLIYKKTLKLSSRVLDKISIGQLVSLLSNNLNKFDEGLALAHFVWIAPLQVAL LMGLIWELLQASAFCGLGFLIVLALFQAGLGRMMMKYRDQRAGKISERLVITSEMIENIQSVKAYCWEEA MEKMIENLRQTELKLTRKAAYVRYFNSSAFFFSGFFVVFLSVLPYALIKGIILRKIFTTISFCIVLRMAV TRQFPWAVQTWYDSLGAINKIQDFLQKQEYKTLEYNLTTTEVVMENVTAFWEEGFGELFEKAKQNNNNRK TSNGDDSLFFSNFSLLGTPVLKDINFKIERGQLLAVAGSTGAGKTSLLMVIMGELEPSEGKIKHSGRISF CSQFSWIMPGTIKENIIFGVSYDEYRYRSVIKACQLEEDISKFAEKDNIVLGEGGITLSGGQRARISLAR AVYKDADLYLLDSPFGYLDVLTEKEIFESCVCKLMANKTRILVTSKMEHLKKADKILILHEGSSYFYGTF SELQNLQPDFSSKLMGCDSFDQFSAERRNSILTETLHRFSLEGDAPVSWTETKKQSFKQTGEFGEKRKNS ILNPINSIRKFSIVQKTPLQMNGIEEDSDEPLERRLSLVPDSEQGEAILPRISVISTGPTLQARRRQSVL NLMTHSVNQGQNIHRKTTASTRKVSLAPQANLTELDIYSRRLSQETGLEISEEINEEDLKECFFDDMESI PAVTTWNTYLRYITVHKSLIFVLIWCLVIFLAEVAASLVVLWLLGNTPLQDKGNSTHSRNNSYAVIITST SSYYVFYIYVGVADTLLAMGFFRGLPLVHTLITVSKILHHKMLHSVLQAPMSTLNTLKAGGILNRFSKDI AILDDLLPLTIFDFIQLLLIVIGAIAVVAVLQPYIFVATVPVIVAFIMLRAYFLQTSQQLKQLESEGRSP IFTHLVTSLKGLWTLRAFGRQPYFETLFHKALNLHTANWFLYLSTLRWFQMRIEMIFVIFFIAVTFISIL TTGEGEGRVGIILTLAMNIMSTLQWAVNSSIDVDSLMRSVSRVFKFIDMPTEGKPTKSTKPYKNGQLSKV MIIENSHVKKDDIWPSGGQMTVKDLTAKYTEGGNAILENISFSISPGQRVGLLGRTGSGKSTLLSAFLRL LNTEGEIQIDGVSWDSITLQQWRKAFGVIPQKVFIFSGTFRKNLDPYEQWSDQEIWKVADEVGLRSVIEQ FPGKLDFVLVDGGCVLSHGHKQLMCLARSVLSKAKILLLDEPSAHLDPVTYQIIRRTLKQAFADCTVILC EHRIEAMLECQQFLVIEENKVRQYDSIQKLLNERSLFRQAISPSDRVKLFPHRNSSKCKSKPQIAALKEE TEEEVQDTRL >gi|9966877|ref|NP_065132.1| PIST; fused in glioblastoma; Golgi associated PDZ and coiled-coil motif containing protein; CFTR-associated ligand [Homo sapiens] MSAGGPCPAAAGGGPGGASCSVGAPGGVSMFRWLEVLEKEFDKAFVDVDLLLGEIDPDQADITYEGRQKM TSLSSCFAQLCHKAQSVSQINHKLEAQLVDLKSELTETQAEKVVLEKEVHDQLLQLHSIQLQLHAKTGQS ADSGTIKAKLSGPSVEELERELEANKKEKMKEAQLEAEVKLLRKENEALRRHIAVLQAEVYGARLAAKYL DKELAGRVQQIQLLGRDMKGPAHDKLWNQLEAEIHLHRHKTVIRACRGRNDLKRPMQAPPGHDQDSLKKS QGVGPIRKVLLLKEDHEGLGISITGGKEHGVPILISEIHPGQPADRCGGLHVGDAILAVNGVNLRDTKHK EAVTILSQQRGEIEFEVVYVAPEVDSDDENVEYEDESGHRYRLYLDELEGGGNPGASCKDTSGEIKVLQG FNKKAVTDTHENGDLGTASETPLDDGASKLDDLHTLYHKKSY >gi|18034783|ref|NP_542148.1| ATP-binding cassette, sub-family C (CFTR/MRP), member 3 [Rattus norvegicus] MDRLCGSGELGSKFWDSNLTVYTNTPDLTPCFQNSLLAWVPCIYLWAALPCYLFYLRHHRLGYIVLSCLS RLKTALGVLLWCISWVDLFYSFHGLVHGSSPAPVFFITPLLVGITMLLATLLIQYERLRGVRSSGVLIIF WLLCVICAIIPFRSKILLALAEGKILDPFRFTTFYIYFALVLCAFILSCFQEKPPLFSPENLDTNPCPEA SAGFFSRLSFWWFTKLAILGYRRPLEDSDLWSLSEEDCSHKVVQRLLEAWQKQQTQASGPQTAALEPKIA GEDEVLLKARPKTKKPSFLRALVRTFTSSLLMGACFKLIQDLLSFINPQLLSILIRFISDPTAPTWWGFL LAGLMFVSSTMQTLILHQHYHCIFVMALRIRTAIIGVIYRKALTITNSVKREYTVGEMVNLMSVDAQRFM DVSPFINLLWSAPLQVILAIYFLWQILGPSALAGVAVIVLLIPLNGAVSMKMKTYQVQQMKFKDSRIKLM SEILNGIKVLKLYAWEPTFLEQVEGIRQGELQLLRKGAYLQAISTFIWVCTPFMVTLITLGVYVCVDKNN VLDAEKAFVSLSLFNILKIPLNLLPQLISGMTQTSVSLKRIQDFLNQDELDPQCVERKTISPGRAITIHN GTFSWSKDLPPTLHSLNIQIPKGALVAVVGPVGCGKSSLVSALLGEMEKLEGAVSVKGSVAYVPQQAWIQ NCTLQENVLFGQPMNPKRYQQALETCALLADLDVLPGGDQTEIGEKGINLSGGQRQRVSLARAVYSDANI FLLDDPLSAVDSHVAKHIFDQVIGPEGVLAGKTRVLVTHGISFLPQTDFIIVLADGQITEMGHYSELLQH DGSFANFLRNYAPDENQEANEGVLQHANEEVLLLEDTLSTHTDLTDTEPAIYEVRKQFMREMSSLSSEGE GQNRPVLKRYTSSLEKEVPATQTKETGALIKEEIAETGNVKLSVYWDYAKSVGLCTTLFICLLYAGQNAV AIGANVWLSAWTNDVEEHGQQNNTSVRLGVYATLGILQGLLVMLSAFTMVVGAIQAARLLHTALLHNQIR APQSFFDTTPSGRILNRFSKDIYVIDEVLAPTILMLFNSFYTSISTIVVIVASTPLFCVVVLPLAVFYGF VQRFYVATSRQLKRLESVSRSPIFSHFSETVTGTSVIRAYGRVQDFKVLSDAKVDSNQKTTYPYIASNRW LGVHVEFVGNCVVLFSALFAVIGRNSLNPGLVGLSVSYALQVTLSLNWMIRTLSDLESNIIAVERVKEYS KTETEAPWVLESNRAPEGWPRSGVVEFRNYSVRYRPGLELVLKNLTLHVQGGEKVGIVGRTGAGKSSMTL CLFRILEAAEGEIFIDGLNVAHIGLHDLRSQLTIIPQDPILFSGTLRMNLDPFGRYSDEDIWRTLELSHL SAFVSSQPTGLDFQCSEGGDNLSVGQRQLVCLARALLRKSRVLVLDEATAAIDLETDDLIQGTIRTQFED CTVLTIAHRLNTIMDYNRVLVLDKGVVAEFDSPVNLIAAGGIFYGMAKDAGLA >gi|17865154|gb|AAL47160.1|AF450008_1 CFTR-associated ligand [Homo sapiens] MSAGGPCPAAAGGGPGGASCSVGAPGGVSMFRWLEVLEKEFDKAFVDVDLLLGEIDPDQADITYEGRQKM TSLSSCFAQLCHKAQSVSQINHKLEAQLVDLKSELTETQAEKVVLEKEVHDQLLQLHSIQLQLHAKTGQS ADSGTIKAKLERELEANKKEKMKEAQLEAEVKLLRKENEALRRHIAVLQAEVYGARLAAKYLDKELAGRV QQIQLLGRDMKGPAHDKLWNQLEAEIHLHRHKTVIRACRGRNDLKRPMQAPPGHDQDSLKKSQGVGPIRK VLLLKEDHEGLGISITGGKEHGVPILISEIHPGQPADRCGGLHVGDAILAVNGVNLRDTKHKEAVTILSQ QRGEIEFEVVYVAPEVDSDDENVEYEDESGHRYRLYLDELEGGGNPGASCKDTSGEIKVLQGFNKKAVTD THENGDLGTASETPLDDGASKLDDLHTLYHKKSY >gi|6978669|ref|NP_036965.1| ATP-binding cassette, sub-family C (CFTR/MRP), member 2; Canalicular multispecific organic anion transporter [Rattus norvegicus] MDKFCNSTFWDLSLLESPEADLPLCFEQTVLVWIPLGFLWLLAPWQLYSVYRSRTKRSSITKFYLAKQVF VVFLLILAAIDLSLALTEDTGQATVPPVRYTNPILYLCTWLLVLAVQHSRQWCVRKNSWFLSLFWILSVL CGVFQFQTLIRALLKDSKSNMAYSYLFFVSYGFQIVLLILTAFSGPSDSTQTPSVTASFLSSITFSWYDR TVLKGYKHPLTLEDVWDIDEGFKTRSVTSKFEAAMTKDLQKARQAFQRRLQKSQRKPEATLHGLNKKQSQ SQDVLVLEEAKKKSEKTTKDYPKSWLIKSLFKTFHVVILKSFILKLIHDLLVFLNPQLLKLLIGFVKSSN SYVWFGYICAILMFAVTLIQSFCLQSYFQHCFVLGMCVRTTVMSSIYKKALTLSNLARKQYTIGETVNLM SVDSQKLMDATNYMQLVWSSVIQITLSIFFLWRELGPSILAGVGVMVLLIPVNGVLATKIRNIQVQNMKN KDKRLKIMNEILSGIKILKYFAWEPSFQEQVQGIRKKELKNLLRFGQLQSLLIFILQITPILVSVVTFSV YVLVDSANVLNAEKAFTSITLFNILRFPLSMLPMVTSSILQASVSVDRLERYLGGDDLDTSAIRRVSNFD KAVKFSEASFTWDPDLEATIQDVNLDIKPGQLVAVVGTVGSGKSSLVSAMLGEMENVHGHITIQGSTAYV PQQSWIQNGTIKDNILFGSEYNEKKYQQVLKACALLPDLEILPGGDMAEIGEKGINLSGGQKQRVSLARA AYQDADIYILDDPLSAVDAHVGKHIFNKVVGPNGLLAGKTRIFVTHGIHFLPQVDEIVVLGKGTILEKGS YRDLLDKKGVFARNWKTFMKHSGPEGEATVNNDSEAEDDDDGLIPTMEEIPEDAASLAMRRENSLRRTLS RSSRSSSRRGKSLKNSLKIKNVNVLKEKEKEVEGQKLIKKEFVETGKVKFSIYLKYLQAVGWWSILFIIL FYGLNNVAFIGSNLWLSAWTSDSDNLNGTNNSSSHRDMRIGVFGALGLAQGICLLISTLWSIYACRNASK ALHGQLLTNILRAPMRFFDTTPTGRIVNRFSGDISTVDDLLPQTLRSWMMCFFGIAGTLVMICMATPVFA IIIIPLSILYISVQVFYVATSRQLRRLDSVTKSPIYSHFSETVTGLPIIRAFEHQQRFLAWNEKQIDINQ KCVFSWITSNRWLAIRLELVGNLVVFCSALLLVIYRKTLTGDVVGFVLSNALNITQTLNWLVRMTSEAET NIVAVERISEYINVENEAPWVTDKRPPADWPRHGEIQFNNYQVRYRPELDLVLKGITCNIKSGEKVGVVG RTGAGKSSLTNCLFRILESAGGQIIIDGIDVASIGLHDLRERLTIIPQDPILFSGSLRMNLDPFNKYSDE EVWRALELAHLRSFVSGLQLGLLSEVTEGGDNLSIGQRQLLCLGRAVLRKSKILVLDEATAAVDLETDSL IQTTIRKEFSQCTVITIAHRLHTIMDSDKIMVLDNGKIVEYGSPEELLSNRGSFYLMAKEAGIENVNHTE L >gi|13124088|sp|Q9R0A1|CLC2_MOUSE Chloride channel protein 2 (ClC-2) MAAATAAAAAAAAAGEGMEPRALQYEQTLMYGRYTQELGAFAKEEAARIRLGGPEPWKGSPSARATPELL EYGQSRCARCRICSVRCHKFLVSRVGEDWIFLVLLGLLMALVSWAMDYAIAVCLQAQQWMSRGLNTNILL QYLAWVTYPVVLITFSAGFTQILAPQAVGSGIPEMKTILRGVVLKEYLTLKTFVAKVIGLTCALGSGMPL GKEGPFVHIASMCAALLSKFLSLFGGIYEHESRNTEMLAAACAVGVGCCFAAPIGGVLFSIEVTSTFFAV RNYWRGFFAATFSAFIFRVLAVWNRDEETITALFKTRFRLDFPFDLQELPAFAVIGIASGFGGALFVYLN RKIVQVMRKQKTINRFLMRKRLLFPALVTLLISTLTFPPGFGQFMAGQLSQKETLVTLFDNRTWVRQGLV EDLELPSTSQAWSPPRANVFLTLVIFILMKFWMSALATTIPVPCGAFMPVFVIGAAFGRLVGESMAAWFP DGIHTDSSTYRIVPGGYAVVGAAALAGAVTHTVSTAVIVFELTGQIAHILPVMIAVILANAVAQSLQPSL YDSIIRIKKLPYLPELGWGRHQQYRVRVEDIMVRDVPHVALSCTFRDLRLALHRTKGRMLALVESPESMI LLGSIERSQVVALLGAQLSPARRRQHMQKLRKAQLSSPSDQESPPSSETSIRFQVNTEDSGFSGAHGQTH KPLKPALKRGPSNSTSLQEGTTGNMESAGIALRSLFCGSPPLEATSELEKSESCDKRKLKRVRISLASDS DPEAEMSPEEILEWEEQQLDEPVNFSDCKIDPAPFQLVERTSLHKTHTIFSLLGVDHAYVTSIGRLIGIV TLKELRKAIEGSVTAQGVKVRPPLASFRDSATSSSDTETTEVHALWGPRSRHGLPREGTPSDSDDKCQ >gi|2506123|sp|P14772|BPT1_YEAST Bile pigment transporter 1 MSSLEVVDGCPYGYRPYPDSGTNALNPCFISVISAWQAVFFLLIGSYQLWKLYKNNKVPPRFKNFPTLPS KINSRHLTHLTNVCFQSTLIICELALVSQSSDRVYPFILKKALYLNLLFNLGISLPTQYLAYFKSTFSMG NQLFYYMFQILLQLFLILQRYYHGSSNERLTVISGQTAMILEVLLLFNSVAIFIYDLCIFEPINELSEYY KKNGWYPPVHVLSYITFIWMNKLIVETYRNKKIKDPNQLPLPPVDLNIKSISKEFKANWELEKWLNRNSL WRAIWKSFGRTISVAMLYETTSDLLSVVQPQFLRIFIDGLNPETSSKYPPLNGVFIALTLFVISVVSVFL TNQFYIGIFEAGLGIRGSLASLVYQKSLRLTLAERNEKSTGDILNLMSVDVLRIQRFFENAQTIIGAPIQ IIVVLTSLYWLLGKAVIGGLVTMAIMMPINAFLSRKVKKLSKTQMKYKDMRIKTITELLNAIKSIKLYAW EEPMMARLNHVRNDMELKNFRKIGIVSNLIYFAWNCVPLMVTCSTFGLFSLFSDSPLSPAIVFPSLSLFN ILNSAIYSVPSMINTIIETSVSMERLKSFLLSDEIDDSFIERIDPSADERALPAIEMNNITFLWKSKEVL TSSQSGDNLRTDEESIIGSSQIALKNIDHFEAKRGDLVCVVGRVGAGKSTFLKAILGQLPCMSGSRDSIP PKLIIRSSSVAYCSQESWIMNASVRENILFGHKFDQDYYDLTIKACQLLPDLKILPDGDETLVGEKGISL SGGQKARLSLARAVYSRADIYLLDDILSAVDAEVSKNIIEYVLIGKTALLKNKTIILTTNTVSILKHSQM IYALENGEIVEQGNYEDVMNRKNNTSKLKKLLEEFDSPIDNGNESDVQTEHRSESEVDEPLQLKVTESET EDEVVTESELELIKANSRRASLATLRPRPFVGAQLDSVKKTAQKAEKTEVGRVKTKIYLAYIKACGVLGV VLFFLFMILTRVFDLAENFWLKYWSESNEKNGSNERVWMFVGVYSLIGVASAAFNNLRSIMMLLYCSIRG SKKLHESMAKSVIRSPMTFFETTPVGRIINRFSSDMDAVDSNLQYIFSFFFKSILTYLVTVILVGYNMPW FLVFNMFLVVIYIYYQTFYIVLSRELKRLISISYSPIMSLMSESLNGYSIIDAYDHFERFIYLNYEKIQY NVDFVFNFRSTNRWLSVRLQTIGATIVLATAILALATMNTKRQLSSGMVGLLMSYSLEVTGSLTWIVRTT VTIETNIVSVERIVEYCELPPEAQSINPEKRPDENWPSKGGIEFKNYSTKYRENLDPVLNNINVKIEPCE KVGIVGRTGAGKSTLSLALFRILEPTEGKIIIDGIDISDIGLFDLRSHLAIIPQDAQAFEGTVKTNLDPF NRYSEDELKRAVEQAHLKPHLEKMLHSKPRGDDSNEEDGNVNDILDVKINENGSNLSVGQRQLLCLARAL LNRSKILVLDEATASVDMETDKIIQDTIRREFKDRTILTIAHRIDTVLDSDKIIVLDQGSVREFDSPSKL LSDKTSIFYSLCEKGGYLK >gi|18266164|gb|AAL67487.1|AF459789_1 GASZ [Mus musculus] MAAGTLRGLAVAGGGESSDSEDDGWDIGYLDRSSQKLKRSLPVEEKTETFKKALTTGDISLVKELLDSGI NVDSSFRYGWTPLMYAASVANAELVRFLLDRGANASFDKDKLTILISACSARGSEEQVLKCVELLLSRNA DPNTACRRLMTPIMYAARDGHTQVVALLVAHGAEVNAQDENGYTALTWAARQGHKNVILKLLELGANKML QTKDGRTPSEIAKRNKHLEIFNFLSLTLNPLEGKLQQLTKEETICKLLATDSDKEKDHIFSPYTAFGDLE IFLHGLGLEHMTDSLKEKDITLRHLLTMKKDELTKNGIASKDQQKILAALKELEVEEINFGKLPEVTKLE ISGDEFLNFLLKLNKQCGHLITAVQNIITELPVNSHKIVLEWASPRNFTSVCEELVSNVEDLNEEVCRLK ELIQKMQNERENDPTHIPLVEEVSTWKTRILKRSAVTVCGFGLLLFIGKLTLQRK >gi|1709489|sp|P54790|ORC3_YEAST Origin recognition complex subunit 3 (Origin recognition complex protein 62 kDa subunit) MSDLNQSKKMNVSEFADAQRSHYTVYPSLPQSNKNDKHIPFVKLLSGKESEVNVEKRWELYHQLHSHFHD QVDHIIDNIEADLKAEISDLLYSETTQKRRCFNTIFLLGSDSTTKIELKDESSRYNVLIELTPKESPNVR MMLRRSMYKLYSAADAEEHPTIKYEDINDEDGDFTEQNNDVSYDLSLVENFKRLFGKDLAMVFNFKDVDS INFNTLDNFIILLKSAFKYDHVKISLIFNINTNLSNIEKNLRQSTIRLLKRNYHKLDVSSNKGFKYGNQI FQSFLDTVDGKLNLSDRFVEFILSKMANNTNHNLQLLTKMLDYSLMSYFFQNAFSVFIDPVNVDFLNDDY LKILSRCPTFMFFVEGLIKQHAPADEILSLLTNKNRGLEEFFVEFLVRENPINGHAKFVARFLEEELNIT NFNLIELYHNLLIGKLDSYLDRWSACKEYKDRLHFEPIDTIFQELFTLDNRSGLLTQSIFPSYKSNIEDN LLSWEQVLPSLDKENYDTLSGDLDKIMAPVLGQLFKLYREANMTINIYDFYIAFRETLPKEEILNFIRKD PSNTKLLELAETPDAFDKVALILFMQAIFAFENMGLIKFQSTKSYDLVEKCVWRGI >gi|1706521|sp|P32892|DRS1_YEAST Probable ATP-dependent RNA helicase DRS1 MVVGTKKYSNLDFVPTISDSEDDVPILDSSDDEKVEAKKTTKKRKGKNNKKKVSEGDNLDEDVHEDLDAG FKFDLDADDTTSNFQGWNFLAEGESNKDDAEAFVKKDVDLDKIIRRKGGLVKMAHIDSKQEEETEKEKVE KENDSDDEELAMDGFGMGAPMNNGDENQSEEEEEEEEKEEEEEEEEEQEEMTLEKGGKDDEIDEEDDSEE AKADFYAPETEGDEAKKQMYENFNSLSLSRPVLKGLASLGYVKPSPIQSATIPIALLGKDIIAGAVTGSG KTAAFMIPIIERLLYKPAKIASTRVIVLLPTRELAIQVADVGKQIARFVSGITFGLAVGGLNLRQQEQML KSRPDIVIATPGRFIDHIRNSASFNVDSVEILVMDEADRMLEEGFQDELNEIMGLLPSNRQNLLFSATMN SKIKSLVSLSLKKPVRIMIDPPKKAATKLTQEFVRIRKRDHLKPALLFNLIRKLDPTGQKRIVVFVARKE TAHRLRIIMGLLGMSVGELHGSLTQEQRLDSVNKFKNLEVPVLICTDLASRGLDIPKIEVVINYDMPKSY EIYLHRVGRTARAGREGRSVTFVGESSQDRSIVRAAIKSVEENKSLTQGKALGRNVDWVQIEETNKLVES MNDTIEDILVEEKEEKEILRAEMQLRKGENMLKHKKEIQARPRRTWFQSESDKKNSKVLGALSRNKKVTN SKKRKREEAKADGNGARSYRKTKTDRIADQERTFKKQKSTNSNKKKGFKSRR >gi|1706485|sp|P54861|DNM1_YEAST Dynamin-related protein DNM1 MASLEDLIPTVNKLQDVMYDSGIDTLDLPILAVVGSQSSGKSSILETLVGRDFLPRGTGIVTRRPLVLQL NNISPNSPLIEEDDNSVNPHDEVTKISGFEAGTKPLEYRGKERNHADEWGEFLHIPGKRFYDFDDIKREI ENETARIAGKDKGISKIPINLKVFSPHVLNLTLVDLPGITKVPIGEQPPDIEKQIKNLILDYIATPNCLI LAVSPANVDLVNSESLKLAREVDPQGKRTIGVITKLDLMDSGTNALDILSGKMYPLKLGFVGVVNRSQQD IQLNKTVEESLDKEEDYFRKHPVYRTISTKCGTRYLAKLLNQTLLSHIRDKLPDIKTKLNTLISQTEQEL ARYGGVGATTNESRASLVLQLMNKFSTNFISSIDGTSSDINTKELCGGARIYYIYNNVFGNSLKSIDPTS NLSVLDVRTAIRNSTGPRPTLFVPELAFDLLVKPQIKLLLEPSQRCVELVYEELMKICHKCGSAELARYP KLKSMLIEVISELLRERLQPTRSYVESLIDIHRAYINTNHPNFLSATEAMDDIMKTRRKRNQELLKSKLS QQENGQTNGINGTSSISSNIDQDSAKNSDYDDDGIDAESKQTKDKFLNYFFGKDKKGQPVFDASDKKRSI AGDGNIEDFRNLQISDFSLGDIDDLENAEPPLTEREELECELIKRLIVSYFDIIREMIEDQVPKAVMCLL VNYCKDSVQNRLVTKLYKETLFEELLVEDQTLAQDRELCVKSLGVYKKAATLISNIL >gi|12860348|dbj|BAB31925.1| ATP-binding cassette, sub-family C (CFTR/MRP), member 9~data source:MGD, source key:MGI:1352630, evidence:ISS~putative [Mus musculus] MEISVSPCLQCLKVMACLARHSRSSNYIVMDNLSPRLSPSTISYPLFEGFCDFNPFLFLGIYCLSIAHSS GQCEFPANVRFRKRTKECKSWHTCGLTTQMTNS >gi|6981606|ref|NP_037171.1| ATP-binding cassette, sub-family C (CFTR/MRP), member 8; Sulfonylurea receptor [Rattus norvegicus] MPLAFCGTENHSAAYRVDQGVLNNGCFVDALNVVPHVFLLFITFPILFIGWGSQSSKVHIHHSTWLHFPG HNLRWILTFILLFVLVCEIAEGILSDGVTESRHLHLYMPAGMAFMAAITSVVYYHNIETSNFPKLLIALL IYWTLAFITKTIKFVKFYDHAIGFSQLRFCLTGLLVILYGMLLLVEVNVIRVRRYVFFKTPREVKPPEDL QDLGVRFLQPFVNLLSKGTYWWMNAFIKTAHKKPIDLRAIGKLPIAMRALTNYQRLCLAFDAQARKDTQS QQGARAIWRALCHAFGRRLVLSSTFRILADLLGFAGPLCIFGIVDHLGKENHVFQPKTQFLGVYFVSSQE FLGNAYVLAVLLFLALLLQRTFLQASYYVAIETGINLRGAIQTKIYNKIMHLSTSNLSMGEMTAGQICNL VAIDTNQLMWFFFLCPNLWAMPVQIIVGVILLYYILGVSALIGAAVIILLAPVQYFVATKLSQAQRTTLE YSNERLKQTNEMLRGIKLLKLYAWENIFCSRVEKTRRKEMTSLRAFAVYTSISIFMNTAIPIAAVLITFV GHVSFFKESDFSPSVAFASLSLFHILVTPLFLLSSVVRSTVKALVSVQKLSEFLSSAEIREEQCAPREPA PQGQAGKYQAVPLKVVNRKRPAREEVRDLLGPLQRLTPSTDGDADNFCVQIIGGFFTWTPDGIPTLSNIT IRIPRGQLTMIVGQVGCGKSSLLLATLGEMQKVSGAVFWNSLPDSEGEDPSNPERETAADSDARSRGPVA YASQKPWLLNATVEENITFESPFNKQRYKMVIEACSLQPDIDILPHGDQTQIGERGINLSGGQRPGISVA RALYQHTNVVFLDDPFSALDVHLSDHLMQAGILELLRDDKRTVVLVTHKLQYLPHADWIIAMKDGTIQRE GTLKDFQRSECQLFEHWKTLMNRQDQELEKETVMERKAPEPSQGLPRAMSSRDGLLLDEDEEEEEAAESE EDDNLSSVLHQRAKIPWRACTKYLSSAGILLLSLLVFSQLLKHMVLVAIDYWLAKWTDSALVLSPAARNC SLSQECALDQSVYAMVFTVLCSLGIALCLVTSVTVEWTGLKVAKRLHRSLLNRIILAPMRFFETTPLGSI LNRFSSDCNTIDQHIPSTLECLSRSTLLCVSALAVISYVTPVFLVALLPLAVVCYFIQKYFRVASRDLQQ LDDTTQLPLLSHFAETVEGLTTIRAFRYEARFQQKLLEYTDSNNIASLFLTAANRWLEVRMEYIGACVVL IAAATSISNSLHRELSAGLVGLGLTYALMVSNYLNWMVRNLADMEIQLGAVKGIHTLLKTEAESYEGLLA PSLIPKNWPDQGKIQIQNLSVRYDSSLKPVLKHVNALISPGQKIGICGRTGSGKSSFSLAFFRMVDMFEG RIIIDGIDIAKLPLHTLRSRLSIILQDPVLFSGTIRFNLDPEKKCSDSTLWEALEIAQLKLVVKALPGGL DAIITEGGENFSQGQRQLFCLARAFVRKTSIFIMDEATASIDMATENILQKVVMTAFADRTVVTIAHRVH TILSADLVMVLKRGAILEFDKPEKLLSQKDSVFASFVRADK >gi|6981604|ref|NP_037172.1| ATP-binding cassette, sub-family C (CFTR/MRP), member 9; Sulfonylurea receptor 2 [Rattus norvegicus] MSLSFCGNNISSYNIYHGVLQNPCFVDALNLVPHVFLLFITFPILFIGWGSQSSKVQIHHNTWLHFPGHN LRWILTFALLFVHVCEIAEGIVSDSQRASRHLHLFMPAVMGFVATTTSIVYYHNIETSNFPKLLLALFLY WVMAFITKTIKLVKYWQLGWGMSDLRFCITGVMVILNGLLMAVEINVIRVRRYVFFMNPQKVKPPEDLQD LGVRFLQPFVNLLSKATYWWMNTLIISAHRKPIDLKAIGKLPIAMRAVTNYVCLKEAYEEQKKKAADHPN RTPSIWLAMYRAFGRPILLSSTFRYLADLLGFAGPLCISGIVQRVNEPKNNTTRFSETLSSKEFLENAHV LAVLLFLALILQRTFLQASYYVTIETGINLRGALLAMIYNKILRLSTSNLSMGEMTLGQINNLVAIETNQ LMWFLFLCPNLWAMPVQIIMGVILLYNLLGSSALVGAAVIVLLAPIQYFIATKLAEAQKSTLDYSTERLK KTNEILKGIKLLKLYAWEHIFCKSVEETRMKELSSLKTFALYTSLSIFMNAAIPIAAVLATFVTHAYASG NNLKPAEAFASLSLFHILVTPLFLLSTVVRFAVKAIISVQKLNEFLLSDEIGEDSWRTGEGTLPFESCKK HTGVQSKPINRKQPGRYHLDNYEQARRLRPAETEDVAIKVTNGYFSWGSGLATLSNIDIRIPTGQLTMIV GQVGCGKSSLLLAILGEMQTLEGKVYWNNVNESEPSFEATRSRSRYSVAYAAQKPWLLNATVEENITFGS SFNRQRYKAVTDACSLQPDIDLLPFGDQTEIGERGINLSGGQRQRICVARALYQNTNIVFLDDPFSALDI HLSDHLMQEGILKFLQDDKRTVVLVTHKLQYLTHADWIIAMKDGSVLREGTLKDIQTKDVELYEHWKTLM NRQDQELEKDMEADQTTLERKTLRRAMYSREAKAQMEDEDEEEEEEEDEDDNMSTVMRLRTKMPWKTCWW YLTSGGFFLLFLMIFSKLLKHSVIVAIDYWLATWTSEYSINDPGKADQTFYVAGFSILCGAGIFLCLVTS LTVEWMGLTAAKNLHHNLLNKIILGPIRFFDTTPLGLILNRFSADTNIIDQHIPPTLESLTRSTLLCLSA IGMISYATPVFLIALAPLGVAFYFIQKYFRVASKDLQELDDSTQLPLLCHFSETAEGLTTIRAFRHETRF KQRMLELTDTNNIAYLFLSAANRWLEVRTDYLGACIVLTASIASISGSSNSGLVGLGLLYALTITNYLNW VVRNLADLEVQMGAVKKVNSFLTMESENYEGTMDPSQVPEHWPQEGEIKIHDLCVRYENNLKPVLKHVKA YIKPGQKVGICGRTGSGKSSLSLAFFRMVDIFDGKIVIDGIDISKLPLHTLRSRLSIILQDPILFSGSIR FNLDPECKCTDDRLWEALEIAQLKNMVKSLPGGLDATVTEGGENFSVGQRQLFCLARAFVRKSSILIMDE ATASIDMATENILQKVVMTAFADRTVVTIAHRVSSIMDAGLVLVFSEGILVECDTGPNLLQHKNGLFSTL VMTNK >gi|14141185|ref|NP_066388.1| cystic fibrosis transmembrane conductance regulator homolog; ATP-binding cassette, subfamily c, member 7 [Mus musculus] MQKSPLEKASFISKLFFSWTTPILRKGYRHHLELSDIYQAPSADSADHLSEKLEREWDREQASKKNPQLI HALRRCFFWRFLFYGILLYLGEVTKAVQPVLLGRIIASYDPENKVERSIAIYLGIGLCLLFIVRTLLLHP AIFGLHRIGMQMRTAMFSLIYKKTLKLSSRVLDKISIGQLVSLLSNNLNKFDEGLALAHFIWIAPLQVTL LMGLLWDLLQFSAFCGLGLLIILVIFQAILGKMMVKYRDQRAAKINERLVITSEIIDNIYSVKAYCWESA MEKMIENLREVELKMTRKAAYMRFFTSSAFFFSGFFVVFLSVLPYTVINGIVLRKIFTTISFCIVLRMSV TRQFPTAVQIWYDSFGMIRKIQDFLQKQEYKVLEYNLMTTGIIMENVTAFWEEGFGELLEKVQQSNGDRK HSSDENNVSFSHLCLVGNPVLKNINLNIEKGEMLAITGSTGSGKTSLLMLILGELEASEGIIKHSGRVSF CSQFSWIMPGTIKENIIFGVSYDEYRYKSVVKACQLQQDITKFAEQDNTVLGEGGVTLSGGQRARISLAR AVYKDADLYLLDSPFGYLDVFTEEQVFESCVCKLMANKTRILVTSKMEHLRKADKILILHQGSSYFYGTF SELQSLRPDFSSKLMGYDTFDQFTEERRSSILTETLRRFSVDDSSAPWSKPKQSFRQTGEVGEKRKNSIL NSFSSVRKISIVQKTPLCIDGESDDLQEKRLSLVPDSEQGEAALPRSNMIATGPTFPGRRRQSVLDLMTF TPNSGSSNLQRTRTSIRKISLVPQISLNEVDVYSRRLSQDSTLNITEEINEEDLKECFLDDVIKIPPVTT WNTYLRYFTLHKGLLLVLIWCVLVFLVEVAASLFVLWLLKNNPVNSGNNGTKISNSSYVVIITSTSFYYI FYIYVGVADTLLALSLFRGLPLVHTLITASKILHRKMLHSILHAPMSTISKLKAGGILNRFSKDIAILDD FLPLTIFDFIQLVFIVIGAIIVVSALQPYIFLATVPGLVVFILLRAYFLHTAQQLKQLESEGRSPIFTHL VTSLKGLWTLRAFRRQTYFETLFHKALNLHTANWFMYLATLRWFQMRIDMIFVLFFIVVTFISILTTGEG EGTAGIILTLAMNIMSTLQWAVNSSIDTDSLMRSVSRVFKFIDIQTEESMYTQIIKELPREGSSDVLVIK NEHVKKSDIWPSGGEMVVKDLTVKYMDDGNAVLENISFSISPGQRVGLLGRTGSGKSTLLSAFLRMLNIK GDIEIDGVSWNSVTLQEWRKAFGVITQKVFIFSGTFRQNLDPNGKWKDEEIWKVADEVGLKSVIEQFPGQ LNFTLVDGGYVLSHGHKQLMCLARSVLSKAKIILLDEPSAHLDPITYQVIRRVLKQAFAGCTVILCEHRI EAMLDCQRFLVIEESNVWQYDSLQALLSEKSIFQQAISSSEKMRFFQGRHSSKHKPRTQITALKEETEEE VQETRL >gi|6753432|ref|NP_034030.1| chloride channel 2 [Mus musculus] MAAATAAAAAAAAAGEGMEPRALQYEQTLMYGRYTQELGAFAKEEAARIRLGGPEPWKGSPSARATPELL EYGQSRCARCRICSVRCHKFLVSRVGEDWIFLVLLGLLMALVSWAMDYAIAVCLQAQQWMSRGLNTNILL QYLAWVTYPVVLITFSAGFTQILAPQAVGSGIPEMKTILRGVVLKEYLTLKTFVAKVIGLTCALGSGMPL GKEGPFVHIASMCAALLSKFLSLFGGIYEHESRNTEMLAAACAVGVGCCFAAPIGGVLFSIEVTSTFFAV RNYWRGFFAATFSAFIFRVLAVWNRDEETITALFKTRFRLDFPFDLQELPAFAVIGIASGFGGALFVYLN RKIVQVMRKQKTINRFLMRKRLLFPALVTLLISTLTFPPGFGQFMAGQLSQKETLVTLFDNRTWVRQGLV EDLELPSTSQAWSPPRANVFLTLVIFILMKFWMSALATTIPVPCGAFMPVFVIGAAFGRLVGESMAAWFP DGIHTDSSTYRIVPGGYAVVGAAALAGAVTHTVSTAVIVFELTGQIAHILPVMIAVILANAVAQSLQPSL YDSIIRIKKLPYLPELGWGRHQQYRVRVEDIMVRDVPHVALSCTFRDLRLALHRTKGRMLALVESPESMI LLGSIERSQVVALLGAQLSPARRRQHMQKLRKAQLSSPSDQESPPSSETSIRFQVNTEDSGFSGAHGQTH KPLKPALKRGPSNSTSLQEGTTGNMESAGIALRSLFCGSPPLEATSELEKSESCDKRKLKRVRISLASDS DPEAEMSPEEILEWEEQQLDEPVNFSDCKIDPAPFQLVERTSLHKTHTIFSLLGVDHAYVTSIGRLIGIV TLKELRKAIEGSVTAQGVKVRPPLASFRDSATSSSDTETTEVHALWGPRSRHGLPREGTPSDSDDKCQ >gi|6678848|ref|NP_032602.1| ATP-binding cassette, sub-family C, member 1a; ATP-binding cassette, sub-family C (CFTR/MRP), member 1; multiple drug resistance-associated protein [Mus musculus] MALRSFCSADGSDPLWDWNVTWHTSNPDFTKCFQNTVLTWVPCFYLWSCFPLYFFYLSRHDRGYIQMTHL NKTKTALGFFLWIICWADLFYSFWERSQGVLRAPVLLVSPTLLGITMLLATFLIQLERRKGVQSSGIMLT FWLVALLCALAILRSKIISALKKDAHVDVFRDSTFYLYFTLVLVQLVLSCFSDCSPLFSETVHDRNPCPE SSASFLSRITFWWITGMMVHGYRQPLESSDLWSLNKEDTSEEVVPVLVNNWKKECDKSRKQPVRIVYAPP KDPSKPKGSSQLDVNEEVEALIVKSPHKDREPSLFKVLYKTFGPYFLMSFLYKALHDLMMFAGPKILELI INFVNDREAPDWQGYFYTALLFVSACLQTLALHQYFHICFVSGMRIKTAVVGAVYRKALLITNAARKSST VGEIVNLMSVDAQRFMDLATYINMIWSAPLQVILALYFLWLSLGPSVLAGVAVMILMVPLNAVMAMKTKT YQVAHMKSKDNRIKLMNEILNGIKVLKLYAWELAFQDKVMSIRQEELKVLKKSAYLAAVGTFTWVCTPFL VALSTFAVFVTVDERNILDAKKAFVSLALFNILRFPLNILPMVISSIVQASVSLKRLRIFLSHEELEPDS IERRSIKSGEGNSITVKNATFTWARGEPPTLNGITFSIPEGALVAVVGQVGCGKSSLLSALLAEMDKVEG HVTLKGSVAYVPQQAWIQNDSLRENILFGHPLQENYYKAVMEACALLPDLEILPSGDRTEIGEKGVNLSG GQKQRVSLARAVYSNSDIYLFDDPLSAVDAHVGKHIFEKVVGPMGLLKNKTRILVTHGISYLPQVDVIIV MSGGKISEMGSYQELLDRDGAFAEFLRTYANAEQDLASEDDSVSGSGKESKPVENGMLVTDTVGKHLQRH LSNSSSHSGDTSQQHSSIAELQKAGAKEETWKLMEADKAQTGQVQLSVYWNYMKAIGLFITFLSIFLFLC NHVSALASNYWLSLWTDDPPVVNGTQANRNFRLSVYGALGILQGAAIFGYSMAVSIGGIFASRRLHLDLL YNVLRSPMSFFERTPSGNLVNRFSKELDTVDSMIPQVIKMFMGSLFSVIGAVIIILLATPIAAVIIPPLG LVYFFVQRFYVASSRQLKRLESVSRSPVYSHFNETLLGVSVIRAFEEQERFIHQSDLKVDENQKAYYPSI VANRWLAVRLECVGNCIVLFAALFAVISRHSLSAGLVGLSVSYSLQITAYLNWLVRMSSEMETNIVAVER LKEYSETEKEAPWQIQETAPPSTWPHSGRVEFRDYCLRYREDLDLVLKHINVTIEGGEKVGIVGRTGAGK SSLTLGLFRINESAEGEIIIDGVNIAKIGLHNLRFKITIIPQDPVLFSGSLRMNLDPFSQYSDEEVWMAL ELAHLKGFVSALPDKLNHECAEGGENLSVGQRQLVCLARALLRKTKILVLDEATAAVDLETDNLIQSTIR TQFEDCTVLTIAHRLNTIMDYTRVIVLDKGEVRECGAPSELLQQRGIFYSMAKDAGLV >gi|17488612|gb|AAL40378.1|AC087333_5 Cystic fibrosis transmembrane conductance regulator [Takifugu rubripes] MQKSPVEDANFLSKFFFWWTSPLLRKGFKKKLELSDVYKAPSFDLADNLSERLEREWDREIVSAKKRPKL MRALARCFLGPFLFFGILLYLGEASKTVQPQLLGRIIASFDPFHAPERSQGYFLALGLCLLFTARFLLLQ PAIFGLHHLGMQIRIALFSLIYKKTLKLSSRVLDKISTDQLVSLMSAHLNKLDESLGLAHFIWITPLQCI LCVGLIWELIEVNGFCALAALTLLGIIQAWLSQKMGPHRVKRAGMINRRLALTSEIVENIHSVKAYGWED VMETIIKNIRQDEMTLTRKIGYLRYFYSAAYFFSAILVIVSAIVPHALSKGIILRRIFTTASYCMVLRMT LTRQLPGSIQMWYDTLALVKKIEDFLLKEEYRVLEYNLTTTEVELVNVSASWDEGISELFEKIKQENKAN GHMANDPGLFFTNLYVTPVLKNISLYLEKGKMLAVAGSTGAGKSSLLMMILGELVPTEGKIKHSGRISFS PQNSWIMPGTIRDNILFGLTYDEYRYTSVIKACQLEEDFALLPDKDRTLLMEGGVTLSGGQRARLGLARA VYKDADLYLLDAPFTHLDLVTEREIFEKCVCKLMGSKTRIVVTSKLEHLKRADKILLLHNGDCYFYGTFL ELQAQRPDFSSLLLGLEAYDNINAERRSSVLTETLRRVSVDETAGFRGPESIRQSFRQPPPPVIVSGSQG NPGGDGYPEKRKQSLILSPLAAARKFSFMGNSNAGNAAQATATEDGVHEHSERRFSVVPEDDQVEEVLPR SNAYHHGLHHLSGQRRQSVLAFITTSQGQERRAQIQSSFRKKLSITPQCDLASELDIYARRLSKDSVYDI SEDVDTEDMEQCFADDRDNTFETTSWSTYLRYVSTNKSLVYVLIFIFVVFVIEVAGSVIGIFLITDTIWR DGANPSSPNYIDQQHSNTSSPPTHLAVIVTPTSAYYIIYIFVATSESVLALGFFRGLPLVHTLLTVSKRL HEQMLSAVLRAPMAVLNTMKTGRIMNRFTKDMATIDDMLPLVLFDLIQLTLIVTGAIFTVSIMRPYIFIA AIPLAIIFVILRKYFLRTGQQLKLLEAEARSPIFSHLIISLKGLWTIRAFGRQTYFETLFHKALNTHTAT WFHYLSTLRWFLFRCDIIFVLFFTAAAFIAVGTNQDKPGEIGIIVALAMLILGTFQWAVITSITVDGLMR SVDRVFKFIDLPSEELLPGKPGGKGRPDLIIDNPHAQDYWPNRGQMDVQGLAVKYTEAGRAVLSDISFSV EGGLSMGLLGRTGSGKSTLLSALLRLASTDGEISIDGISWNSVPLHKWRKAFGVVPQKVFILTGTFRMNL DPHGRYSDEELWRVADQVGLKSVIEQFPDRLDFQLENGGSVLSHGHKQLMCLARSILSKARILLLDEPSA YLDTITLQVLRKTLKHAFSDCTVILSEHKVEPLLECQSFLVIEGSSVKSYDSIQKLLNEMSHLKQAVSAA DRLRLFPTLHRLNSIKRAPPQAAKISSLPEEAEDEVHDTRL >gi|6320339|ref|NP_010419.1| Metal resistance protein with similarity to human cystic fibrosis protein CFTR and multidrug resistance proteins; Ycf1p [Saccharomyces cerevisiae] MAGNLVSWACKLCRSPEGFGPISFYGDFTQCFIDGVILNLSAIFMITFGIRDLVNLCKKKHSGIKYRRNW IIVSRMALVLLEIAFVSLASLNISKEEAENFTIVSQYASTMLSLFVALALHWIEYDRSVVANTVLLFYWL FETFGNFAKLINILIRHTYEGIWYSGQTGFILTLFQVITCASILLLEALPKKPLMPHQHIHQTLTRRKPN PYDSANIFSRITFSWMSGLMKTGYEKYLVEADLYKLPRNFSSEELSQKLEKNWENELKQKSNPSLSWAIC RTFGSKMLLAAFFKAIHDVLAFTQPQLLRILIKFVTDYNSERQDDHSSLQGFENNHPQKLPIVRGFLIAF AMFLVGFTQTSVLHQYFLNVFNTGMYIKSALTALIYQKSLVLSNEASGLSSTGDIVNLMSVDVQKLQDLT QWLNLIWSGPFQIIICLYSLYKLLGNSMWVGVIILVIMMPLNSFLMRIQKKLQKSQMKYKDERTRVISEI LNNIKSLKLYAWEKPYREKLEEVRNNKELKNLTKLGCYMAVTSFQFNIVPFLVSCCTFAVFVYTEDRALT TDLVFPALTLFNLLSFPLMIIPMVLNSFIEASVSIGRLFTFFTNEELQPDSVQRLPKVKNIGDVAINIGD DATFLWQRKPEYKVALKNINFQAKKGNLTCIVGKVGSGKTALLSCMLGDLFRVKGFATVHGSVAYVSQVP WIMNGTVKENILFGHRYDAEFYEKTIKACALTIDLAILMDGDKTLVGEKGISLSGGQKARLSLARAVYAR ADTYLLDDPLAAVDEHVARHLIEHVLGPNGLLHTKTKVLATNKVSALSIADSIALLDNGEITQQGTYDEI TKDADSPLWKLLNNYGKKNNGKSNEFGDSSESSVRESSIPVEGELEQLQKLNDLDFGNSDAISLRRASDA TLGSIDFGDDENIAKREHREQGKVKWNIYLEYAKACNPKSVCVFILFIVISMFLSVMGNVWLKHWSEVNS RYGSNPNAARYLAIYFALGIGSALATLIQTIVLWVFCTIHASKYLHNLMTNSVLRAPMTFFETTPIGRIL NRFSNDIYKVDALLGRTFSQFFVNAVKVTFTITVICATTWQFIFIIIPLSVFYIYYQQYYLRTSRELRRL DSITRSPIYSHFQETLGGLATVRGYSQQKRFSHINQCRIDNNMSAFYPSINANRWLAYRLELIGSIIILG AATLSVFRLKQGTLTAGMVGLSLSYALQITQTLNWIVRMTVEVETNIVSVERIKEYADLKSEAPLIVEGH RPPKEWPSQGDIKFNNYSTRYRPELDLVLKHINIHIKPNEKVGIVGRTGAGKSSLTLALFRMIEASEGNI VIDNIAINEIGLYDLRHKLSIIPQDSQVFEGTVRENIDPINQYTDEAIWRALELSHLKEHVLSMSNDGLD AQLTEGGGNLSVGQRQLLCLARAMLVPSKILVLDEATAAVDVETDKVVQETIRTAFKDRTILTIAHRLNT IMDSDRIIVLDNGKVAEFDSPGQLLSDNKSLFYSLCMEAGLVNEN >gi|2506121|sp|Q00555|CFTR_SHEEP Cystic fibrosis transmembrane conductance regulator (CFTR) (cAMP-dependent chloride channel) MQRSPLEKASVVSKLFFSWTRPILKKGYRQRLELSDIYHISSSDSADNLSEKLEREWDRELASKKNPKLI NALRRCFFWRFMFYGIILYLGEVTKAVQPLLLGRIIASYDPDNKVERSIAIYLGIGLCLLFIVRTLLLHP AIFGLHHIGMQMRIAMFSLIYKKTLKLSSRVLDKISIGQLVSLLSNNLNKFDEGLALAHFVWIAPLQVTL LMGLLWDLLQAFTFCGLAFLVVLALLQAGLGKMMMKYRDQRAGKINERLVITSEMIENIQSVKAYCWEEA MEKIIENLRQTELKLTRKAAYVRYLNSSAFFFSGFFVVFLSVLPYALLKGIILRKIFTTISFCIVLRMAV TRQFPWAVQTWYDSLGAINKIQDFLQKQEYKTLEYNLTTTDVVMENVTAFWEEGFSKLFEKAKENNNNRK ISNCDTSLFFSNLLLGTPVLKDISFKIERGQLLAVAGSTGAGKTSLLMMIMGELEPSEGKIKHSGRISFC SQYSWIMPGTIKDNIIFGVSYDEYRYRSVIKACQLEEDISKFSEKDNIVLGEGGITLSGGQRARISLARA VYKDADLYLLDSPFGYLDVLTEKEIFESCVCKLMANKTRILVTSKMEHLKKADKILILHEGSVYFYGTFS ELQNQRPDFSSKLMGCDTFDQFTAERRNSIITETLRRFSLEGDTSVSWNETKKPSFKQTGEFGEKRKNSI LNSINSIRKFSVVQKTSLQMNGIDGASDEPLERRLSLVPHSEPGEGILPRSNAVNSGPTFLGGRRQSVLN LMTCSSVNQGQSIHRKTATSTRKMSLAPQASLAEIDIYSRRLSQDTGLEISEEINEEDLRDCFFDDVENI PAVTTWNTYLRYITVHKSLMFVLIWCLVVFLVEVAASLVVLCLFPKILLQDKGNSTKNASNSYAVIITST SSYYIFYIYVGVADTLLALGLFRGLPLVHTLITVSKTLHHKMLQSVLQAPMSTLNTLKTGGILNRFSKDI AVLDDLLPLTIFDFIQLLLIVIGAVVVVSVLQPYIFLATVPVIAAFILLRGYFLHTSQQLKQLESEGRSP IFTHLVTSLKGLWTLRAFGRQPYFETLFHKALNLHTANWFLYLSTLRWFQMRIEMIFVIFFIAVTFISIL TTGEGEGRVGIILTLAMNIMGTLQWAVNSSIDVDSLMRSVSRVFKFIDMPTEDGKPNNSFRPSKDSQPSK VMIIENQHVKKDDIWPSGGQMTVKDLTAKYIDGGNAILENISFSISPGQRVGLLGRTGSGKSTLLLAFLR LLNTKGEIQIDGVSWDSITLQQWRKAFGVIPQKVFIFSGTFRKNLDPYEQWSDQEIWKVADEVGLRSVIE QFPGKLDFVLVDGGCVLSHGHKQLMCLARSVLSKAKILLLDEPSAHLDPITYQIIRRTLKQAFADCTVIL SEHRIEAMLECQRFLVIEENKVRQYDSIQRMLSEKSLFRQAISPADRLKLLPHRNSSRQRSRANIAALKE ETEEEVQETKL >gi|1705763|sp|Q00554|CFTR_RABIT Cystic fibrosis transmembrane conductance regulator (CFTR) (cAMP-dependent chloride channel) MQRSPLEKAGVLSKLFFSWTRPILRKGYRQRLELSDIYQIPSADSADNLSEKLEREWDRELASKKNPKLI NALRRCFFWRFMFYGIFLYLGEVTKAVQPLLLGRIIASYDPDNKEERSIAIYLGIGLCLLFVVRTLLLHP AIFGLHHIGMQMRIAMFSLIYKKGLALAHFVWISPLQVTLLMGLLWELLQASAFCGLAFLIVLALVQAGL GRMMMKYRDQRAGKINERLVITSEMIENIQSVKAYCWEEAMEKMIENLRQTELKLTRKAAYVRYFNSSAF FFSGFFVVFLSVLPYALTKGIILRKIFTTISFCIVLRMAVTRQFPWAVQTWYDSLGAINKIQDFLQKQEY KTLEYNLTTTEVVMDNVTAFWEEGFGELFEKAKQNNSDRKISNGDNNLFFSNFSLLGAPVLEDISFKIER GQLLAVAGSTGAGKTSLLMMITGELEPSEGKIKHSGRISFCSQFSWIMPGTIKENIIFGVSYDEYRYRSV IKACQLEEDISKFTEKDNTVLGEGGITLSGGQRARISLARAVYKDADLYLLDSPFGYLDVLTEKEIFESC VCKLMANKTRIMVTSKMEHLKKADKILILHEGSSYFYGTFSELQSLRPDFSSKLMGYDSFDQFSAERRNS ILTETLRRFSLEGDASVSWNDTRKQSFKQNGELGEKRKNSILNPVNSMRKFSIVLKTPLQMNGIEEDSDA TIERRLSLVPDSEQGEAILPRSNMINTGPMLQGCRRQSVLNLMTHSVSQGPSIYRRTTTSTRKMSLAPQT NLTEMDIYSRRLSQESGLEISEEINEEDLKECFIDDVDSIPTVTTWNTYLRYITVHRSLIFVLIWCIVIF LAEVAASLVVLWLFGNTAPQDKENSTKSGNSSYAVIITNTSSYYFFYIYVGVADTLLALGLFRGLPLVHT LITVSKILHHKMLHSVLQAPMSTLNTLKAGGILNRFSKDIAILDDLLPLTIFDFIQLLLIVVGAIAVVSV LQPYIFLATVPVIAAFILLRAYFLHTSQQLKQLESEGRSPIFTHLVTSLKGLWTLRAFGRQPYFETLFHK ALNLHTANWFLYLSTLRWFQMRIEMIFVLFFIAVAFISILTTGEGEGRVGIILTLAMNIMSTLQWAVNSS IDVDSLMQSVSRVFMFIDMPTEAKSTKSIKPSSNCQLSKVMIIENQHVKKDDVWPSGGQMTVKGLTAKYI DSGNAILENISFSISPGQRVGLLGRTGSGKSTLLSAFLRLLSTEGEIQIDGVSWDSITLQQWRKAFGVIP QKVFIFSGTFRKNLDPYEQWSDQEIWKVADEVGLRSVIEQFPGKLDFVLVDGGYVLSHGHKQLMCLARSV LSKAKILLLDEPSAHLDPITYQIIRRTLKQAFADCTVILCEHRIEAMLECQRFLVIEENTVRQYESIQKL LSEKSLFRQAISSSDRAKLFPHRNSSKHKSRPQITALKEEAEEEVQGTRL >gi|1705762|sp|P13569|CFTR_HUMAN Cystic fibrosis transmembrane conductance regulator (CFTR) (cAMP-dependent chloride channel) MQRSPLEKASVVSKLFFSWTRPILRKGYRQRLELSDIYQIPSVDSADNLSEKLEREWDRELASKKNPKLI NALRRCFFWRFMFYGIFLYLGEVTKAVQPLLLGRIIASYDPDNKEERSIAIYLGIGLCLLFIVRTLLLHP AIFGLHHIGMQMRIAMFSLIYKKTLKLSSRVLDKISIGQLVSLLSNNLNKFDEGLALAHFVWIAPLQVAL LMGLIWELLQASAFCGLGFLIVLALFQAGLGRMMMKYRDQRAGKISERLVITSEMIENIQSVKAYCWEEA MEKMIENLRQTELKLTRKAAYVRYFNSSAFFFSGFFVVFLSVLPYALIKGIILRKIFTTISFCIVLRMAV TRQFPWAVQTWYDSLGAINKIQDFLQKQEYKTLEYNLTTTEVVMENVTAFWEEGFGELFEKAKQNNNNRK TSNGDDSLFFSNFSLLGTPVLKDINFKIERGQLLAVAGSTGAGKTSLLMMIMGELEPSEGKIKHSGRISF CSQFSWIMPGTIKENIIFGVSYDEYRYRSVIKACQLEEDISKFAEKDNIVLGEGGITLSGGQRARISLAR AVYKDADLYLLDSPFGYLDVLTEKEIFESCVCKLMANKTRILVTSKMEHLKKADKILILHEGSSYFYGTF SELQNLQPDFSSKLMGCDSFDQFSAERRNSILTETLHRFSLEGDAPVSWTETKKQSFKQTGEFGEKRKNS ILNPINSIRKFSIVQKTPLQMNGIEEDSDEPLERRLSLVPDSEQGEAILPRISVISTGPTLQARRRQSVL NLMTHSVNQGQNIHRKTTASTRKVSLAPQANLTELDIYSRRLSQETGLEISEEINEEDLKECFFDDMESI PAVTTWNTYLRYITVHKSLIFVLIWCLVIFLAEVAASLVVLWLLGNTPLQDKGNSTHSRNNSYAVIITST SSYYVFYIYVGVADTLLAMGFFRGLPLVHTLITVSKILHHKMLHSVLQAPMSTLNTLKAGGILNRFSKDI AILDDLLPLTIFDFIQLLLIVIGAIAVVAVLQPYIFVATVPVIVAFIMLRAYFLQTSQQLKQLESEGRSP IFTHLVTSLKGLWTLRAFGRQPYFETLFHKALNLHTANWFLYLSTLRWFQMRIEMIFVIFFIAVTFISIL TTGEGEGRVGIILTLAMNIMSTLQWAVNSSIDVDSLMRSVSRVFKFIDMPTEGKPTKSTKPYKNGQLSKV MIIENSHVKKDDIWPSGGQMTVKDLTAKYTEGGNAILENISFSISPGQRVGLLGRTGSGKSTLLSAFLRL LNTEGEIQIDGVSWDSITLQQWRKAFGVIPQKVFIFSGTFRKNLDPYEQWSDQEIWKVADEVGLRSVIEQ FPGKLDFVLVDGGCVLSHGHKQLMCLARSVLSKAKILLLDEPSAHLDPVTYQIIRRTLKQAFADCTVILC EHRIEAMLECQQFLVIEENKVRQYDSIQKLLNERSLFRQAISPSDRVKLFPHRNSSKCKSKPQIAALKEE TEEEVQDTRL >gi|461723|sp|P34158|CFTR_RAT Cystic fibrosis transmembrane conductance regulator (CFTR) (cAMP-dependent chloride channel) (Fragment) IKHSGRVSFSSQISWIMPGTIKENIIFGVSYDEYRYKSVVKACQLQEDITKFAEQDNTVLGEGGVTLSGG QRARISLARAVYKDADLYLLDSPFGYLDVLTEEQIFESCVCKLMASKTRILVTSKMEQLKKADKILILHE GSSYFYGTFSELQSLRPDFSSKLMGYDTFDQFTEERRSSILTETLRRFSVDDASTTWNKAKQSFRQTGEF GEKRKNSILSSFSSVKKISIVQKTPLSIEGESDDLQERRLSLVPDSEHGEAALPRSNMITAGPTFPGRRR QSVLDLMTFTPSSVSSSLQRTRASIRKISLAPRISLKEEDIYSRRLSQDSTLNITEEINEEDLKECFFDD MVKIPTVTTWNTYLRYFTLHRGLFAVLIWCVLVFLVEVAASLFVLWLLKNNPVNGGNNGTKIANTSYVVV ITSSSFYYIFYIYVGVADTLLALSLFRGLPLVHTLITASKILHRKMLHSILHAPMSTFNKLKAGGILNRF SKDIAILDDFLPLTILT >gi|461721|sp|Q00553|CFTR_MACMU Cystic fibrosis transmembrane conductance regulator (CFTR) (cAMP-dependent chloride channel) (Fragment) TTLTSKMEHLKKADKILILHEGSSYFYGTFSELQNLRPDFSSKLMGYDSFDQFSAERRNSILTETLRRFS LEGDAGVSWTETKKQSFKQTGEFGEKRKNSILNPINSIRKFSIVQKTPLQMNGIEEDSDEPLERRLSLVP YSEQGEVILPRISVISTGPTLQARRRQSELNLMTHS >gi|461720|sp|Q00552|CFTR_CAVPO Cystic fibrosis transmembrane conductance regulator (CFTR) (cAMP-dependent chloride channel) (Fragment) TSGTTLLVTSKMEHLKKADKILILHEGSSYFYGTFSELQNLRPDFSSKLMGYDSFDQFSAERRNSILTET LRRFSLEGDPSVSFNETKKQSFKQTGEFGEKRKNSILNQFNSITKFSIVPKTPLQISGIEEDSDDPVERR LSLVPDSEQSDGLLRKHVIHTGPTFQGSRRQSVLNLITHS >gi|461719|sp|P35071|CFTR_BOVIN Cystic fibrosis transmembrane conductance regulator (CFTR) (cAMP-dependent chloride channel) MQRSPLEKASVVSKVFFSWTRPILKKGYRQRLELSDIYHISSSDSADNLSEKLEREWDRELASKKNPKLI NALRRCFFWRFMFYGIILYLGEVTKAVQPLLLGRIIASYDPDNKVERSIAIYLGIGLCLLFIVRTLLLHP AIFGLHHIGMQMRIAMFSLIYKKTLKLSSRVLDKISIGRLVSLLSNNLNKFDEGLALAHFVWIAPLQVTL LMGLLWELLQAFTFCGLAFLIVLALLQAGLGKMMMKYRDQRAGKINERVVITSEMIENIQSVKAYCWEEA MEKIIENLRQTELKLTRKAAYVRYLNSSAFFFSGFFVVFLSVLPYALLKGIILRKIFTTISFCIVLRMAV TRQFPWAVQTWYDSLGAINKIQDFLQKQEYKTLEYNLTTTDVVMDNVTAFWEEGFSKLFEKAKENNNNRK ISNGDNSLFFSNLLLGTPVLKDISFKIERGQLLAVAGSTGAGKTSLLLMIMGELEASEGKIKHSGRISFC SQYSWIMPGTIKDNIIFGVSYDEYRYRSVIKACQLEEDISKFAEKDNVVLGEGGITLSGGQRARISLARA VYKDADLYLLDSPFGYLDVLTEKEIFESCICKLMANKTRILVTSKMEHLKKADKILILHEGSIYFYGTFS ELQNQRPDFSSKLMGCDTFDQFTAERRNSIITETLRRFSLEGDTSVSWNETKKPSFKQTGEFGEKRKNSI LSSINSIRKFSVVQKTSLQMNGIEGAADAPLERRLSLVPHSEPGEGILPRSNAVNSGPTFLGGRRQSVLN LMTGSSVNQGQSIHRKTATSTRKMSLAPQASLAEIDIYSRRLSQDTGLEISEEINEEDLRDCFFDDVENI PAVTTWNTYLRYITVHKSLMFVLIWCLVVFLVEVAASLVVLCLFPKIFFQDKGNSTKSANNSYAVIITST SSYYIFYIYVGVADTLLALGLFRGLPLVHTLITVSKTLHHKMLQSVLQAPMSTLNTLKTGGILNRFSKDI AVLDDLLPLTIFDFVQLLLIVIGAVVVVSVLQPYIFLATVPVIAAFILLRAYFLHTSQQLKQLESEGRSP IFTHLVTSLKGLWTLRALDRQPYFETLFHKALNLHTANWFLYLSTLRWFQMRIEMIFVIFFIAVTFISIL TTGEGEGRVGIILTLAMNIMGTLQWAVNSSIDVDSLMRSVSRVFKFIDMPTEDGKPNNSFRPSKDSQPSK VMIIENQHVKKDDIWPSGGQMTVKDLTAKYTDGGNAILENISFSISSGQRVGLLGRTGSGKSTLLLAFLR LLNTKGEIQIDGVSWDSITLQQWRKAFGVIPQKVFIFSGTFRKNLDPYGQWSDQEIWKVADEVGLRAVIE QFPGKLDFVLVDGGCVLSHGHKQLMCLARSVLSKAKILLLDEPSAHLDPITYQIIRRTLKQAFANCTVIL SEHRIEAMLECQRFFVIEENKVRQYDSIQRMLSEKSLFRQAISPADRLKLLPHRNSSRQRSRSNIAALKE ETEEEVQETKL >gi|116142|sp|P26363|CFTR_XENLA Cystic fibrosis transmembrane conductance regulator (CFTR) (cAMP-dependent chloride channel) MQKTPLEKASIFSQIFFSWTKPILWKGYRQRLELSDIYQIHPGDSADNLSERLEREWDREVATSKKNPKL INALKRCFFWKFLFYGILLYLGEVTKAVQPLLLGRIIASYDRDNEHERSIAYYLAIGLCLLFVVRMLLLH PAIFGLHHIGMQMRIAMFSLIYKKTLKLSSKVLDKISTGQLVSLLSNNLNKFDEGLALAHFVWIAPLQVL LLMGLLWDLLQASAFCGLGFLIILSLFQARLGRMMMKYKDKRAGKINERLVITSQIIENIQSVKAYCWEN AMEKIIETIRETELKLTRKAAYVRYFNSSAFFFSGFFVVFLSIVPHLLLDGISLRKIFTTISFSIVLRMA VTRQFPWAVQTWYDSLGVINKIQEFLQKEEYKSLEYNLTTTEVAMENVSASWDEGIGEFFEKAKLEVNGG NISNEDPSAFFSNFSLHVAPVLRNINFKIEKGQLLAIAGSTGAGKTSLLMMIMGELEPSAGKIKHSGRIS FSPQVSWIMPGTIKENIVFGVSYDQYRYLSVIKACQLEEDISKFPEKDNTVLGEGGITLSGGQRARISLA RAVYKDADLYLLDSPFSYLDLFTEKEIFESCVCKLMANKTRILVTSKVEQLKKADKVLILHEGSCYFYGT FSELEDQRPEFSSHLIGFDHFNAERRNSIITETLRRCSIDSDPSAVRNEVKNKSFKQVADFTEKRKSSII NPRKSSRKFSLMQKSQPQMSGIEEEDMPAEQGERKLSLVPESEQGEASLPRSNFLNTGPTFQGRRRQSVL NLMTRTSISQGSNAFATRNASVRKMSVNSYSNSSFDLDIYNRRLSQDSILEVSEEINEEDLKECFLDDTD SQSPTTTWNTYLRFLTAHKNFIFILVFCLVIFFVEVAASSAWLWIIKRNAPAINMTSNENVSEVSDTLSV IVTHTSFYYVFYIYVGVADSLLALGIFRGLPLVHSLISVSKVLHKKMLHAILHAPMSTFNTMRAGRILNR FSKDTAILDDILPLSIFDLTQLVLIVIGAITVVSLLEPYIFLATVPVIVAFILLRSYFLHTSQQLKQLES KARSPIFAHLITSLKGLWTLRAFGRQPYFETLFHKALNLHTANWFLYLSTLRWFQMTIEMIFVIFFIAVS FISIATSGAGEEKVGIVLTLAMNIMNTLQWAVNASIDVDSLMRSVSRIFRFIDLPVEELINENKNKEEQL SEVLIYENDYVKKTQVWPSGGQMTVKNLSANYIDGGNTVLENISFSLSPGQRVGLLGRTGSGKSTLLSAF LRLLSTQGDIQIDGVSWQTIPLQKWRKAFGVIPQKVFIFSGSIRKNLDPYGKWSDEELLKVTEEVGLKLI IDQFPGQLDFVLLDGGCVLSHGHKQLVCLARSVLSKAKILLLDEPSAHLDPITFQIIRKTLKHAFADCTV ILSEHRLEAMLECQRFLVIEDNTVRQYDSIQKLVNEKSFFKQAISHSDRLKLFPLHRRNSSKRKSRPQIS ALQEETEEEVQDTRL >gi|116141|sp|P26362|CFTR_SQUAC Cystic fibrosis transmembrane conductance regulator (CFTR) (cAMP-dependent chloride channel) MQRSPIEKANAFSKLFFRWPRPILKKGYRQKLELSDIYQIPSSDSADELSEMLEREWDRELATSKKNPKL VNALRRCFFWRFLFYGILLYFVEFTKAVQPLCLGRIIASYNAKNTYEREIAYYLALGLCLLFVVRTLFLH PAVFGLQHLGMQMRIALFSLIYKKILKMSSRVLDKIDTGQLVSLLSNNLNKFDEGVAVAHFVWIAPVQVV LLMGLIWNELTEFVFCGLGFLIMLALFQAWLGKKMMQYRDKRAGKINERLAITSEIIDNIQSVKVYCWED AMEKIIDDIRQVELKLTRKVAYCRYFSSSAFFFSGFFVVFLSVVPYAFIHTIKLRRIFTTISYNIVLRMT VTRQFPSAIQTWYDSLGAIRKIQDFLHKDEHKTVEYNLTTKEVEMVNVTASWDEGIGELFEKVKQNDSER KMANGDDGLFFSNFSLHVTPVLKNISFKLEKGELLAIAGSTGSGKSSLLMMIMGELEPSDGKIKHSGRIS YSPQVPWIMPGTIKDNIIFGLSYDEYRYTSVVNACQLEEDITVFPNKDKTVLGDGGITLSGGQRARISLA RALYKDADLYLLDSPFSHLDVTTEKDIFESCLCKLMVNKTRILVTSKLEHLKKADKILLLHEGHCYFYGT FSELQGEKPDFSSQLLGSVHFDSFSAERRNSILTETFRRCSVSSGDGAGLGSYSETRKASFKQPPPEFNE KRKSSLIVNPITSNKKFSLVQTAMSYPQTNGMEDATSEPGERHFSLIPENELGEPTKPRSNIFKSELPFQ AHRRQSVLALMTHSSTSPNKIHARRSAVRKMSMLSQTNFASSEIDIYSRRLSEDGSFEISEEINEEDLKE CFADEEEIQNVTTTWSTYLRYVTTNRNLVFVLILCLVIFLAEVAASLAGLWIISGLAINTGSQTNDTSTD LSHLSVFSKFITNGSHYYIFYIYVGLADSFLALGVIRGLPLVHTLVTVSKDLHKQMLHSVLQGPMTAFNK MKAGRILNRFIKDTAIIDDMLPLTVFDFVQLILIVVGAICVVSVLQPYTLLAAIPVAVIFIMLRAYFLRT SQQLKQLESEARSPIFSHLITSLRGLWTVRAFGRQSYFETLFHKALNLHTANWFLYLSTLRWFQMRIDIV FVLFFIAVTFIAIATHDVGEGQVGIILTLAMNITSTLQWAVNSSIDVDGLMRSVSRVFKYIDIPPEGSET KNRHNANNPSDVLVIENKHLTKEWPSGGQMMVNNLTAKYTSDGRAVLQDLSFSVNAGQRVGLLGRTGAGK STLLSALLRLLSTEGEIQIDGISWNSVSLQKWRKAFGVIPQKVFVFSGTFRKNLDPYEQWSDEEIWKVTE EVGLKSMIEQFPDKLNFVLVDGGYILSNGHKQLMCLARSILSKAKILLLDEPTAHLDPVTFQIIRKTLKH TFSNCTVILSEHRVEALLECQQFLVIEGCSVKQFDALQKLLTEASLFKQVFGHLDRAKLFTAHRRNSSKR KTRPKISALQEEAEEDLQETRL >gi|116140|sp|P26361|CFTR_MOUSE Cystic fibrosis transmembrane conductance regulator (CFTR) (cAMP-dependent chloride channel) MQKSPLEKASFISKLFFSWTTPILRKGYRHHLELSDIYQAPSADSADHLSEKLEREWDREQASKKNPQLI HALRRCFFWRFLFYGILLYLGEVTKAVQPVLLGRIIASYDPENKVERSIAIYLGIGLCLLFIVRTLLLHP AIFGLHRIGMQMRTAMFSLIYKKTLKLSSRVLDKISIGQLVSLLSNNLNKFDEGLALAHFIWIAPLQVTL LMGLLWDLLQFSAFCGLGLLIILVIFQAILGKMMVKYRDQRAAKINERLVITSEIIDNIYSVKAYCWESA MEKMIENLREVELKMTRKAAYMRFFTSSAFFFSGFFVVFLSVLPYTVINGIVLRKIFTTISFCIVLRMSV TRQFPTAVQIWYDSFGMIRKIQDFLQKQEYKVLEYNLMTTGIIMENVTAFWEEGFGELLQKAQQSNGDRK HSSDENNVSFSHLCLVGNPVLKNINLNIEKGEMLAITGSTGLGKTSLLMLILGELEASEGIIKHSGRVSF CSQFSWIMPGTIKENIIFGVSYDEYRYKSVVKACQLQQDITKFAEQDNTVLGEGGVTLSGGQRARISLAR AVYKDADLYLLDSPFGYLDVFTEEQVFESCVCKLMANKTRILVTSKMEHLRKADKILILHQGTSYFYGTF SELQSLRPSFSSKLMGYDTFDQFTEERRSSILTETLRRFSVDDSSAPWSKPKQSFRQTGEVGEKRKNSIL NSFSSVRKISIVQKTPLCIDGESDDLQEKRLSLVPDSEQGEAALPRSNMIATGPTFPGRRRQSVLDLMTF TPNSGSSNLQRTRTSIRKISLVPQISLNEVDVYSRRLSQDSTLNITEEINEEDLKECFLDDVIKIPPVTT WNTYLRYFTLHKGLLLVLIWCVLVFLVEVAASLFVLWLLKNNPVNSGNNGTKISNSSYVVIITSTSFYYI FYIYVGVADTLLALSLFRGLPLVHTLITASKILHRKMLHSILHAPMSTISKLKAGGILNRFSKDIAILDD FLPLTIFDFIQLVFIVIGAIIVVSALQPYIFLATVPGLVVFILLRAYFLHTAQQLKQLESEGRSPIFTHL VTSLKGLWTLRAFRRQTYFETLFHKALNLHTANWFMYLATLRWFQMRIDMIFVLFFIVVTFISILTTGEG EGTAGIILTLAMNIMSTLQWAVNSSIDTDSLMRSVSRVFKFIDIQTEESMYTQIIKELPREGSSDVLVIK NEHVKKSDIWPSGGEMVVKDLTVKYMDDGNAVLENISFSISPGQRVGLLGRTGSGKSTLLSAFLRMLNIK GDIEIDGVSWNSVTLQEWRKAFGVITQKVFIFSGTFRQNLDPNGKWKDEEIWKVADEVGLKSVIEQFPGQ LNFTLVDGGYVLSHGHKQLMCLARSVLSKAKIILLDEPSAHLDPITYQVIRRVLKQAFAGCTVILCEHRI EAMLDCQRFLVIEESNVWQYDSLQALLSEKSIFQQAISSSEKMRFFQGRHSSKHKPRTQITALKEETEEE VQETRL >gi|17224462|gb|AAL36986.1|AF282773_1 sub-family C member 2 ATP-binding cassette protein [Mus musculus] MDEFCNSTFWNLSLLKSPEADLPLCFEQTVLVWIPLGFLWLLAPWQLYRIYRSRTKRFAITKFYLAKQVF VVCLLILAAIDLSLALTEDTGQATIPPVKYTNPILYLCTWLLVLVIQHCRQCCIQKNSWFLSMFWILSLL CGIFQFQTLIRALLQDSKSNMTYSCLFFVSYGFQIVILILSAFSESSDSTHAPSATASFLSSVTFSWYDS TVLKGYKHPLTIEDVWDIEENLKAKSLTSKFKTIMTKDLQKARQALQRRLKKSQQSPEGTSHGLTKKQSQ SQDVLVLEDSKKKKKKSEATKDFPKSWLVKALFKTFYVVILKSFILKLAHDILLFLNPQLLKFLIGFVKD PDSYPWVGYIYAILMFSVTLIQSFFLQCYFQFCFVLGMTVRTTIIASVYKKALTLSNLARRQYTIGETVN LMSVDSQKLMDVTNYIHLLWSSVLQIALSIFFLWRELGPSILAGVGLMVLLVPVNGVLATKIRKIQVQNM KNKDKRLKIMNEILSGIKILKYFAWEPSFKEQVNSIRKKELRNLLRFSQLQTILIFILHLTPTLVSVITF SVYVLVDSQNVLNAEKAFTSITLFNILRFPLAMLPMVISSVIQASVSVDRLEQYLGSDDLDLSAIRHVCH FDKAVQFSEASFTWDRDLEATIQDVNLDIKPGQLVAVVGTVGSGKSSLISAMLGEMENVHGHITIKGSIA YVPQQAWIQNGTIKDNILFGSEYDEKKYQRVIEACALLPDLEMLPGGDMAEIGEKGINLSGGQKHRVSLA RATYQDADIYILDDPLSAVDTHVGKHIFNKVVGPNGLLSGKTRILVTHGIHFLPQVDEIVVLGKGTILEK GSYSDLMDKKGVFAKNWKTFMKHSGPEGEATVDNDSEEEDGDCGLIPTVEEIPDDAASLTMRRENSLRRT LSRSSRSGSRRGKSLKSSLKIKSVNALNKKEEVVKGQKLIKKEFVETGKVKFSIYLKYLQAVGWWSLLFI VIFYVLNYVAFIGTNLWLSAWTSDSEKQNGTDNSPSQRDMRIGVFGALGIAQGIFLLSSSLWSIYACRNA SKTLHRQLLTNILRAPMSFFDTTPTGRIVNRFAGDISTVDDTLPQTLRSWLLCFFGIVSTLVMICMATPI FIIIIIPLSILYVSVQVFYVATSRQLRRLDSVTKSPIYSHFSETVSGLPVIRAFEHQQRFLANSEKQIDT NQKCVFSWITSNRWLAIRLELVGNLIVFCSALLLVIYKNSLTGDTVGFVLSNALNITQTLNWLVRMTSEV ETNIVAVERINEYINVDNEAPWVTDKKPPADWPKKGEIQFNNYQVRYRPELDLVLKGITCNIKSTEKVGV VGRTGAGKSSLTNCLFRILESAGGQIIIDGIDIASIGLHDLRGRLTIIPQDPILFSGNLRMNLDPFNKYS DEEIWRALELAHLKSFVAGLQLGLLHEVTEGGDNLSIGQRQLLCLGRAVLRKSKILVLDEATAAVDLETD SLIQTTIRNEFSQCTVITIAHRLHTIMDSDKIMVLDSGKIVEYGSPEELLSNMGPFYLMAKEAGIESVNH TEL >gi|17224460|gb|AAL36985.1|AF282772_1 sub-family C member 2 ATP-binding cassette protein [Mus musculus] MDEFCNSTFWNLSLLKSPEADLPLCFEQTVLVWIPLGFLWLLAPWQLYRIYRSRTKRFAITKFYLAKQVF VVCLLILAAIDLSLALTEDTGQATIPPVKYTNPILYLCTWLLVLVIQHCRQCCIQKNSWFLSMFWILSLL CGIFQFQTLIRALLQDSKSNMTYSCLFFVSYGFQIVILILSAFSESSDSTHAPSATASFLSSVTFSWYDS TVLKGYKHPLTIEDVWDIEENLKAKSLTSKFKTIMTKDLQKARQALQRRLKKSQQSPEGTSHGLTKKQSQ SQDVLVLEDSKKKKKKSEATKDFPKSWLVKALFKTFYVVILKSFILKLAHDILLFLNPQLLKFLIGFVKD PDSYPWVGYIYAILMFSVTLIQSFFLQCYFQFCFVLGMTVRTTIIASVYKKALTLSNLARRQYTIGETVN LMSVDSQKLMDVTNYIHLLWSSVLQIALSIFFLWRELGPSILAGVGLMVLLVPVNGVLATKIRKIQVQNM KNKDKRLKIMNEILSGIKILKYFAWEPSFKEQVNSIRKKELRNLLRFSQLQTILIFILHLTPTLVSVITF SVYVLVDSQNVLNAEKAFTSITLFNILRFPLAMLPMVISSVIQASVSVDRLERYLGSDDLDLSAIRHVCH FDKAVQFSEASFTWDRDLEATIQDVNLDIKPGQLVAVVGTVGSGKSSLISAMLGEMENVHGHITIKGSIA YVPQQAWIQNGTIKDNILFGSEYDEKKYQRVIEACALLPDLEMLPGGDMAEIGEKGINLSGGQKHRVSLA RATYQDADIYILDDPLSAVDTHVGKHIFNKVVGPNGLLSGKTRILVTHGIHFLPQVDEIVVLGKGTILEK GSYSDLMDKKGVFAKNWKTFMKHSGPEGEATVDNDSEEEDGDCGLIPTVEEIPDDAASLTMRRENSLRRT LSRSSRSGSRRGKSLKSSLKIKSVNALNKKEEVVKGQKLIKKEFVETGKVKFSIYLKYLQAVGWWSLLFI VIFYVLNYVAFIGTNLWLSAWTSDSEKQNGTDNSPSQRDMRIGVFGALGIAQGIFLLSSSLWSIYACRNA SKTLHRQLLTNILRAPMSFFDTTPTGRIVNRFAGDISTVDDTLPQTLRSWLLCFFGIVSTLVMICMATPI FIIIIIPLSILYVSVQVFYVATSRQLRRLDSVTKSPIYSHFSETVSGLPVIRAFEHQQRFLANSEKQIDT NQKCVFSWITSNRWLAIRLELVGNLIVFCSALLLVIYKNSLTGDTVGFVLSNALNITQTLNWLVRMTSEV ETNIVAVERINEYINVDNEAPWVTDKKPPADWPKKGEIQFNNYQVRYRPELDLVLKGITCNIKSTEKVGV VGRTGAGKSSLTNCLFRILESAGGQIIIDGIDIASIGLHDLRGRLTIIPQDPILFSGNLRMNLDPFNKYS DEEIWRALELAHLKSFVAGLQLGLLHEVTEGGDNLSIGQRQLLCLGRAVLRKSKILVLDEATAAVDLETD SLIQTTIRNEFSQCTVITIAHRLHTIMDSDKIMVLDSGKIVEYGSPEELLSNMGPFYLMAKEAGIESVNH TEL >gi|2133118|pir||S64758 SCD25 protein (version 2) - yeast (Saccharomyces cerevisiae) MPITSSPDLFYLNDCDVVYWYDLTRLVCHYVNLTERDLLANEREKFLTSLDLLTAQITYVYMLFRNLRLV EDSFKKTLKKLIYTLSRFSINANIWFHSTLFEEREAIASQKDPERRSPLLQSILGTFQKFHFLLRLLHFL SNPNELTILPQLTPRFFKDSFNTISWNNPFLRKRLNQHMSHDLPRQMIKAVAGASGIVAENIDEIPASKQ GTSCSSETSHHSPSAPFQRRRRGTIFSNVSGSSDESDTIWSKRKKPYPLNEETLSLVRARKKQLDGKLKQ MIKSANEYLSNTANFSKMLNFEMNFKTYEEVSGTIPIIDILENLDLTIFLNLRELGDENRVFDEDVAIDD EDEEFLKHSLSSLSYILSDYFNMKQYFHDVVVKFIIVAQHLTLEDPFVFSPMQNDLPTGYYEPMKPSSLN LDNAKDKKNGSQNTDIQEEEDEYEPDPDSLILFHNLINQDSDFNDLKFFNLAHVFKKSCDDYFDVLKLAI EFVNQLILERENLLNYAARMMKNNITELLLRGEEGYGSYDGGETAEKSDTNAVYADSDTKDNDEWRDSQV KLPRYLQREYDSELIWGSNNRIKGGSKHALISYLTDNEKKDLFFDITFLITFRSIFTTTEFLSYLISQYN LDPPEDLCFEEYNEWVTKKLIPVKCRVVEIMTTFFKQYWFLGYDEPDLATLNLDYFAQVAIKENITGSVE LLKEVNQKFKHGNIQEATAPMKTLDQQICQDHYSGTLYSTTESILAVDPVLFATQLTILEHEIYCEITIF DCLQKIWKNKYTKSYGASPGLNEFISFANKLTNFISYSVVKEADKSKRAKLLSHFIFIAEYCRKFNNFSS MTAIISALYSSPIYRLEKTWQAVIPQTRDLLQSLNKLMDPKKNFINYRNELKSLHSAPCVPFFGVYLSDL TFTDSGNPDYLVLEHGLKGVHDEKKYINFNKRSRLVDILQEIIYFKKTHYDFTKDRTVIECISNSLENIP HIEKQYQLSLIIEPKPRKKVVPNSNSNNKSQEKSRDDQTDEGKTSTKKDRFPKFQLHKTKKKAPKVSK QETRL clustalw-mpi-0.15/db10.input0000644000411000001440000014673007644152540014225 0ustar liusers>gi|00000001| GCTCCGCCGTGAGATAAGTAGTGTGCCTAGGTAGAGACTGCGTAGGATATAGTGGGATGAGATGGACTCA CTGATCGTATTACGTACTATTGATCGTGCCGTACTTGGTTCGTTCGTGGAGGGGCTCCGTGAGTGGGAAC TGATCCGCTACGGTAATCGCCAGTAACGCGAAAAACCGTCTATGTATTGCCTGTTGACACTCCAGGGACC GACGTCGGAATGACCTTCAAGTGAAATGAATGAACTCTGGACAAGGTGAAGTACAACTGGTCCAGGATCG GGATGTCGCGGACGCCGTCACAGGCACCATATCAGTTACCGAGAGTCCGGGATATAAAGCTTAATGATTC CCGTCAGTCCGTGCACGCGAGCTGGTCGTAAAGGTAGGAAAGAGTCTCGCGCTCTCCCTAGAACGACCGC CGTGCTTCAGTTTGATAGAAGTATAGACTGTCCTAGGAATGTGCCTAGCCGAAGAACAGATGGAGGTCGT CCTTGATTTCCCGCAGGCTCGGTATGTTTGTATGACTTCGCGTTATGGCCATATTTCTGCCCCAACGTCT CTGCGGCCCGTCCTCCTTAGACACCATATCTCAGGTATACGATTAAATTCCTGCCACCACGAGGGCCGCG ATTAGTATGTAATTCCTTGAAAATTTACGCATCTTTGTTATTATGGAGGGTGGAGGTTTGATCCGACAAA CTAGGTATGGCCCACAGCTCTAAAGTCTACACCATAAATGTAACCGGTTCCTCGATCCTCTATACTACTA ATAACCGTCGAGCGAGGCTGTTGGTACACCAGCCGGTCGATTGCGTTACTGAGCTCCAGTGTCTAAGTCA AAAGCGGAGTACTGTGACTGACCGAGGTCTACTAACGGGCCGTCAGTATTTAACGCTAACAAGTATCTGT CTCACGTATGATTAACCTGCAGGTGTTTGCTTACACTTGTCGGCTGTAGCGTACGTACTTGGGTAGCTCA TTTCAGATTGCCCGTACTGAGCTCATAGTCGGCCAGTAGGTTAACACTTTAGCATCTTATCGGTAGCAGT TGTAGAAAAAAAGCAGTAGTAAGGTTACTTCGCAGACTCCTCGCGTAGTGGTTAGTTTAGGCCACAATTG AGTTAGGCACAGCTTTGGGTCCATGGTGCTCGGACTGTATCCCTCATTTGTCTTACGAAATCGGGACATG AATGTATGAGATAATCGGCTGCAATGACTATACTGCTCAATCTACTCACGTTAAAAGCCGCCGGAAATGA ACCTGTGTTAGGTGGAGCCAATCTGCAGCGGCATCGTACGAACAGTACCGGCCTCTCCCTTTCTTGGTTT CAATAGGCAATGGAGTTACCTACAACTTATGCTGAACTGGACCGGAGGATTAAACCCAAGATATGAAAAG GCAATGAGCAGAAGCCATGACGACGGGTGTCCAGCACCTGGCTTAAAAATCGGCAAATATGGATAGACTT TGCGCCTGGACAGTCTAGGACAGCATTAATAAGCTTTGCCTTCCTTATCTATTTAAGAATACTGGGGCCA TCTCTGCAGTTGGTGTTATAAACTTAGCGAGGCCTAACCTAAGGTACGCCGCCACAATGGAAAGGATTCA GCAAATCCGGTCTACACGAAAAACTTGGTTTCCAGCTTTGCGTATACAGCATGCCCCAAAATGCTCTTAT CGGCTCGAGCGTCACGCGGGTGCCACGACTTGGCAGTGTCTCAACCTTAGCTCTCCCAGGACAGTACTTA AGCCTTTGTAAATCACTCCCTCGATGCACACGGTACGAACAAGAGTCCAGGAACAATCCCGGACCCTTCT CGTATCTATTTCATGATGGAAATACACAATTGCTGACCCACCGCCAGCTAGTACAGCCGGAGAGGGGAAA CGCTAGAGTAGCTTTTGCCATCTATGTGTAGAGAAGTCAGCTACTACCCGGCAGCAACGTCCATGAGCGG ATGCCGCGTTAGAAAGACTTAACCTTCGCAACTTTAGAGGATAATCGAGGATGGAGGGATGCCGCAGTCC CCACGATCCCTGAGACAGTCGGGAAACTTTCCCCTTGGAATAGAGTCGGCTCAGCAGGTGCATTGGGCAT CATAACAGAGAATAGACCTAACCTAAGTGTGATCAAGATTTTTGACTCTTGTCATGACCTCTTACACACA TAAGTAATGATCAACTGGCACGACTCCAGGCGTGCGTCCCGAGGCAGAGTAAGCCGTGTCCCTATCGGGA AACGTTGCTCCACTCTGACCGGTGGGTCAGGATTTGTGTGAATCAACTCGCTCAGAGCCTTTAGTTCTGC CGCAAGCCGTTTGATACGGGGGGGCGCGCTTTGCAGACTGATCTACACATTGCGAGAGCGGACAGCGGTC AAAGAAAAGATCTCAATAGATACAATGTCCCGGGAGTCTCCGGCTTCTATTTACTCACACTGTCAAATCT ATGTAGGCCGCCCCGCTGTAAGGTAGTCCAAGTGCAAACGGTTACGCACAACTGAACACTCGCCCGCGGT ACGACTGTATAACATCAAGGAAACCGATATGAACCGAAGAAGCCGATTCGCCGGTTATGATCCTGTGTTA AATCGCGCAATGGCCCCCCAGAGAAAAGAATAAGGGACACCTAACCCTTGACTGGTGGCTGCATAGCCAC GCCTGTATGAGGCCAAATTGTCATACCGGCTCTCACACCTCACTCGAGGTCCAGACTGTCATGTCTCCAT AGTGGCAGATCGAGACTTTTCTTATATCCAAGTAACGCTTAACCGGGGGCCTCATCATGGATATACATGT GTAAAGGTACCGAGGAATAGCAGGTGTACGAAGAAGTTGAATGAGCCGCCCTGTCGGCGAATAGAACTTT TTGGAATGTCAAAGTCCGTAGTTGCTTGTTGTGACTCCGAGGCGACAGTTGCTCACCTAAGGTCTACAGT CACGTGTTATTGAGACTCAACTGAACCCGACTAGGACGTCCGAGCATCGACTTAAACGGTGTTTGGTTAT CGGGATTACCAGCAGGAAAGATGATCGGAGGGAAACTTCCCGTGGGATTCGTCCAAGGTGCCCGCGTCCC GGTGCGCCGATAAACACTGAGTGTGTGCCTTTGTGTGTCATCATCGTTGGTAGGAACGGTGCTCACAGAG TATTACGTAAGACAGCAAAGAGTTTAAATCTGGTGGAAGCCAGGACAACATGGGCGTGTGAGCGCTCGAT TGACAAGACAAATTTAGGTCCTTGGAGTTTTGCTACTTGCATCTTAACTTGAGGTCTCAGAAACTCTTCG ACCGCAGCGGCAGAGCCCTGATGCCCGACTGCCATTAGCTAGTTTGCAAATAAGCCTACCAAGCAGCCCG AGCACAGTCTACTATCCTCTACATTACAGGTGAATGCGGTCGAAGTCATTATAAGTCATTGGGTTCGATA TCTAGGTAGTTGGTTAGAAGATAGTAAGAATACGATAAATTTGCTCGCGTAGTCCTCCCGGAGTTGTATA TGTGAGATAGTACTGGGTATCGAAGTGGAGGACCGCAGCACCCGATCTTCTCTTCGTAATGGTTTGCACT ATAGGCGTAGCTTATAGTCTTACTTCCCCTCGGGAATTTACAACCTGATTTGTACTCGCTGGCAAGAACT ATAAACATGTGCCGGTGAGTTACAAGGAGAATTAAAGCTCCGTGCCGTGAGCAAGATCCGGGCCTGGGTG TAAGTCACAGGTAGATTTACCGTTAGCTACCACCGCTTTACCTGATCTTGCCCAAGTGCATGACTTTTGG TATGGTGATGGATTCAGATTCGCGACCTAACACCGTACAAAGAAGCAAGTATGCGGTTGTATTCAGCCAG CCCGCTAGAGGCGAGATCAGCCCCTGAAAATGGAATAAGCTAGCCAGAGTGTAACTTCAAGTGATTTTTC ATGGCTTTACGTCTTGGTATTGTCGGAGGAAGTGCACCACGTCAGATCATCTCACAGCGCGTAGCCTTTT CGTGGGTGATCCGTCTTACGCACGGTACGTCACAGAGGGGCTATTCTTGCCACTGTGTCCGGGTTCTGTG AACCACTAACCCGTCCTAAAGTTATGCTACCCCTTGAGGAAAGGATATACAGATTAGCTGGAATATGACC AGGTAGTCCTGCGTCCTTGTGATGATGTAAACGGATATAGTTACTCTGACCGGCCGCTGCTGGGCTCGTG AGGATCCGTGACAGGCCAATCAGAGTTACTTGCCGCGATGGATTGGAAGCAACGAACACTTACCGAGACG GAGGTCATCGACGGATTGTACTCGATTGAAAGATATACTGTAAGTACGGCTTCCGCAGACTCATACTCGT TCTTAGACCGGAGTGCCTAGATTAGACGCTGCCCACACGGACTGCCATAACATCAGCGCTCTATCCAGGT AGCTCGAGGCTTCGCGCGCTGGTGCAAGTGCTCGTTCAGTCCTTAAGCTTCGGAAACAGATAACTTAAAG AGTAGACACAATTTTTTAAGCTTCAGCCTCAATAACACGCGGCCGACCCCAGCTAGGCACGAGGAGAAAT GTGTAAAGGCAGAGCAATCCCACATCGAGTTAGCATGACAGGGTAATCACGGCTGCCCCTACAGTCGCGA CAGATGATTCACTGCCTGGCGTGTGCAATGCCTTCTGGGGTTATCGACCTGTGCTCGACGTTTGTAGCTC AGACCGTGTAGGACTTGGAACTTAGGAAATCAGGGAACGAGCTGGTGAGGCTCATTTAATACAGAGGCAC CGGCCCATCGCGCTTCGGCTGCAGTCCGGCAAATCCACACAGACCATATATCCAACCCAATAATTCCATC GTAGCGTAGGCGTGGTAACACACCACTCCTTTTTGCCCGATTTTGCTGGATTACCAGAGATCTTCGATAG TACGTTAGGTTGTATACGCTGCAGACAAAAGAAATAAATGAGATGAATGCGAGTTTCTTGAGTCGGGGTC CTAGTTGTGACCCTCATGAGCACAGAGATGGAGGGCGACACGGGGAGCTGAATCACCTGACCCTAACGCG TGAGAGGG >gi|00000002| GCTCCGCCGTAAGATAAGTAGTGTGCCTAGGACGAGACTTCGTAGGATATAGTGGGAGGAAATGGAATGA CTGATCGTATTACGTACTATTGATCGTGCCGTACTTGGTTCGTTTGTGGCCGTGAGTGGGAAATGATCCG CTATGCCACAACTATACAATAAATTAGGTAATCGCCAGTAACGCGAAAAACTGTCCATGTATTGCCTGTT GACACTCCAGGGACTGACGTCGGAATGACCTTCAAGTGAAAAGAATGAAGTACGGACAAGGTAAACTACA ACTGGTCCAGCATCGGGATGTCGTGGACGCCGTCCCAGGCACCATATCAGCTACCGAGAGTACGGGATAT TTAGCTAAATGATTCCCGCCTGTCCGTGCACGCGAGCTGGTGAACGACCGCCGTGCATAAGTTCTAGCCG AAGAACAGAGGGAGGTCCTAGATAGAGGACCCGTCATGGGCCTTGATTCCCCGCCGGCTCGGTCCTGCCG TAGGCCATCTAATGGGGATGTTTGCATGACTTCGCGTGATGGGAAGTCCAGAGTTCCAACTGACCATGTA AACGCCATATTTCTGCCCCCTCGTCTCTGCGGCTCGTCCTCCTTGGACACCATATCTCAGGTATACGAAA ATTTACGCATCTTGGTTATTATGGAGGGTGGAGGTTCGATCCGACAAACTAGGTATGGTCCACAGATCTA AAGTCTACACCATAAATGTAACCGGTTCCTCCATCCTCTATATTACTACTAACCGTCGAGCGAGGCTGTG GGTACACCAGCCGGTCGATTGCGTTAATGAGTTCCAGTGTCTAAGTCAAAAGCGGAGTACTGTGACTGAC CGAGGTCTACTAACGGGCCGTCAGTTTTTGAATAAGCTTCTCGGCTGTAGCGTACGTACTGGGGTAGCTC ATTTCAGATTGCCCGTATTGAGCTGATCTTATCGGTAGCAGTTGTAGAAAAAAAGCAGTAATAAGGTTAC TCCGCAGACTCCTCGCGTAGTGGTTTGTTTACGCCACAATTGAGTTAGGCTCAGCTATGGGTCCATAGTG CTCGGACTGAATCCCTCATTTGCCTTACGAAATCGGGGCATGAATGTATTAGATAGTCGGCTGCAATCAC TATACAGCTCAATCTAGTCACGTGAAAAGCCGGCGGAACTTAACCTGTGTTAGGGGGAGCCAATGGGTTT CAATAGGCAATGTAGTTACCTACAACTTATGCTGAACCGGACCGGAGGATTACACCCAAGATATGAAAAG TCAATGAGCAGAAGCCATGACGACGGGTTTCCAGCACGTGGCTTAAAAGTCGGCAAATACGTTGACTATG CTGTAGCAGTCCATGTCGAGTCATCTATGTGGAGGCCCCTGGGTGCAACGGATAGACTTAGCGTCTGGAA AGTCTAGGACAGCATTAATAAGCGTCGCCTTCCTTATCTGTTTAAGAATACTGGGGCCATCTCTGCAGTT GGTGTTATAAAGTTAGTGACCACCATGGAAAGGATTCAGCAAATCCGGTCTACACGGAAAACTTGGTTTC CAGCTTTGCGTATACAACATGCCCCAAGATGCTCTCATCGCCTCGTGCGTCACGCGGGTGCCACGACTTG GCAGTGTCTAAACCTTAGCGCTCCCAGGACAGCACTTAAGCCTTTATAAATCACTGCCTAATAGTCCAGG AACACTCCCGGAACCTACTCGTATCTATTTCATGATGGTAATACACAATTGCTGACCCTCCGCCCGCTAG TAAAGCCGGAGAGCGGAAACGCTAGAGTAGCTTTTGCCATATATGAGTCGAGAAGTAAGCTAGTACCGGG CAGCAACGCCCATGAGTGGATGCGAATTCTGACTAGTATCCATTCAGGTTAAGTTAACAAAACGCGTTAG AAAGACTTAACCTTCGCAACTTTAGAGGATAATCGAGGATGGAGGGATGCCGCAGTCCCCACGATCCCTG AGACAGTCGGGAAACGTTCGCCTTGGATTAGAGTCGGTTCGGCAGGTGCATTGGGCATCATAAAAGAGAA TAGACCCGACCTAAGTGTGATCAAGCTCTTTGACTCTTGTCATGACCTCTTACACCAGGCACGGGTCCAG GCGTGCGTCCCGAGGCAGAGTAAGCCATGTCCCTATCGGGATACGTTGCTCCACTCTGACAGGTTGGTAA GGATTTGAGTCAATCAACTTGCTCAGGGCCTTTAGTTCTGCCGTAAGCCGTTTGATGCGGGGGGGCGCGC TTTGCAGACTGATCTACACATTGCGAGAGCGGACAGCGGTCAAAGAAAAGATCTCAATAGATACGATGTC CCGGGAGTATCCGGCTGTTTTCCACACCTTCTACGTAGTGTAGTTATGTCTATTTAATCACACTGTCAAA TCTATGTAGGCCGCCCCGCTGTAAGGTAGTCCAAGTGCAAACAGTTACGCACAACTGATCACTCGCCCGC GGTACGACTATATAACATCTAGGAAACCGGTATGAACAGAAGAAGCCGATTCGCCGGTTATGATCCTGTG TTAAATCGCGCAATGGCCCCCTCGAGAAAAGAATAAGGGACATCTAGCCCTTTATCGCATTCCTCGACTG GTGGCTGCATAGCCACGCCTGTATGAGGCCAATTTGTCATCCCGGCTCTCACCCAGGCCCGGTTGGGTGG TATGTTGGAGTTCCACCTCACTCGGGGGATGAGCCAGACTGTCATGTCTCCATAGTGGCAGATCGACACT GTTCTTATATCCCAGTGACGTTTAACCGGGGGCCTTATACTTGGGTAAAGGTACCGAGGAATAGCAGGTG TACGAAGAAGTTGAATGAGCCGCCCCGACGGCGCCTAGAACTTTTAGGAATGTCAAAGTCCGAAGTTGTT GTGACTCCGAGGCGACAGTTGCTCACCTAAGGTCTACAGTCACGTGTTGTTGAGAATGGCTTCTTACGTC CTGTAGAGCGCCCTCAAGTGGACCAGACTATGACGTCCGAGCATCGGCTTAAACGGTGTTTGCCCGCGGC TGTATCGAGATACGACCCTAGAGTTAACGTCCGGTTGCGTAATAGACGGTAATCGGGATTGCCAGCAGGA AAGATGATCGGAGGGAAACTTCCCGTGGGATTCGTCCTAGGTGCGCCTTGGTGTGTCATCACCGTTGGTA GGAACGGTGCTCACAGAGTATTACGTAAGACGTCAAAGAGTTTAAATCTGGTGGGAGCCAGGACAACATG GGCGTGTCAGAGCCCGATTGACAAGACTAATTTAGGTCCTAGGAGATTTGCTACTTGAATCTTAACTTGA GGTCTCAGAAACTCTTCGACCGCAGCGGCAGACCCCTGATGCCCGACTGCCATTCGCTAGTTTGCAAGTA AGCCTACCAAGGAGCCCGAGCATACTATGCTCTAAAGTACCGCTGGGCGTAGTCCGTGAAGGTAATCATA ACAGGTCAATGCGGTCGAAGTCATTATCGGTCATTGGGTTCGATATCTAGGTAGTTGTTTAGAAGTTAGT AAGAATACGATAAATTTGCACGCGAAGTCCTGAGGTGTATAAGTGAGATCGTATTGGGTATCGAAATGGA GGACCGCAGCACCCGATCTTCTGTTGGTAATGGTTTCCACCATACGCGTTGCTTATATTCTTACTTCACA TCGGGAATATGCAACCTGAATTGTACTCGCTGGCAAGTAGTATATAAATGTGCCCGTGAGTTACAAGGAG TATTAGGTGCGCACGAAAGCTCCGTGGCGTGAGCAAGATCCGGGCCTGGGTGTAAGTCACAGGTAGATTT ACCGTTAACTACCACCGCTTTACCTGATCTTGCCCAGGTGCTTGAATTTTGGTATGGCGACCTAACACCG TACAAGTAAGCAAGTATGCGGTTATATTCAGCCAGCCCGCTAGAGGCGAGATCATCCCCTGTAAATGGAT TAAGCTAGACAGGGTGTAACTTCAAGCGATTTTTCATGGCTTTACGTCTTAGTATTGTCGGAGAAAGTAC ACCATGACAGTATTGACATATCATCTGACAGCGCGTAGCCTTTTCGTGGGTGATGCGTCTTATCCACGGT ACGTCACAGAGGGGCTATTCTTGCCACTGTGTCCGGGTTCTGTGAACCACTAACCCATGCTACCCCTTGA GGAAAGGATATACAGATTAGCTGGACTATGACCAGGGAGTCCTGGGTCATTGTGATGATGTAATCGTATT TAGTTACTCTGACCGGCTGCGTGAGTGGGGGAGCGACCGCTGCTGGGCTCGTGAGGAAATAGGCCCAGCG GAAGCTCCGTGACAGGCCAATCAGAGTCACTTGCCGCGATGGACGTTCCTTGTCGTCCAGTGGGACAAGG TCGTTCTGGCGATTGGAAGCAAACACCAGCACGTCAGATGTCATCGACGGATCTCGTACTTAGCGGAGTT CTTAGATTAGACGATGCCCACACGGACTGCCATAACGTCAGCGCTCTACCCTGGTAGGTCCAGGCTTCGC GCGCTGGTGCAAGTGCTCGTTCATTCCTTTAGCTTCGGAAACGGATAACTTAAATCGTAGACACAATGTT TTAAGCTTCACCCTCCGACCCCAGATAGGCACGAGGAGAAATGGGTAAAGGGAGAACAATTCCACATCGA GTTAGCATGACAGTGTAATCGTGGTGACGGCTGCCCCTACAGTCGCGACAGATGATTCACTGCCTGCCGT GTGCAAAGCCTTCTTGGTTTATCGACCTGTGCTCGACGTATGTAGCTGAGACCGTGTAGGACTTGGAACT TTTGAAATAAGGGAACGAGCTGGGGAGGCTCATTTAAGACAGACTATGGCTGCAGTCCGGCAAAACCACC CAGATCATATATCCAACCCAATAATTCCATTGTAGCGTAGGCGTGGTAACACAGATTACCAGAGATCTTC GACAGTACGTTAGGTTGTATACGCTGCAGACAAAAGAAATATTTGAGATGAATACGAGCCAGATGTGACC CTCATGAACACAGAGATGGAGGGCGACACGGGGAGCTGAATCAACTGACCCTAAGGCGTGAGAGGG >gi|00000003| GCTCCCCCGTAAGATAAGTAGTGTGCCTAGGACAAGACTTCGTAGGATGTAGTGGGAGAAAATGGAATGA CTGAACTTATGACGTACTATTGATCGAGCCGTACTTGGTTTGTTTGTGGCCGTGAGTGGCAAATGAACCG CTATGTCACAACCATACAATAAATTAGAAAATCGCCTTTAATGCGACTATCAGGTCTGTTGTTCTAGTTC GGCCAGTGCTAAAACTGTCCATGTATTGCCTGTTGACACTCCATCCGCTAGCCCCCTTAGCTACACTTTC GTGCACTGGGTCCGTTATGTTAACGAGGTGAGTACGTGTCGACGGGACTGACGTCCGAATGACCTTCAAG TGAAAAGAATGAAGTACGAACAAGGTAAACTACAACTGGACCAGCATCCGGATGTCGTGGACGCCGTCCC AGGCGCCATATCAGCTGCAGAGAGTACGGGATATTTAATTATCATTTGCCCTGCTAAATGATTCCCGCCT GTCCGTGCACTCGAGCTGGTGAACGAGCGCCGTGCAGAAGTTCTAGCCGAAGTACAGAGGGAGGTCCTAG AATCATGGGGCTTGCCTCCCCGCCGGCTCGGTCCTGCCGTTGGCCATCTAATGGGGATGTTTGAATGCCT TCGCGTGATCGGAGGTCCAGAGTTCCAACTGACCATGTACGCGCCATATTTCTGCCCCCTCGTCTCTGCG GCTCGTCCTCCTTGGACACCATATCTCAGGTATACGAAAATTTACGCATCTTCGGTATTATGGAGCGTGG AGGTTCGATCCGACAGACTCGTTTGGGGTTGGCAGGAAACGGGGCTGATTCGACGTATGGTCCACAGCTC TAAAGTCTACACCACCGTCGAGCGAGGCTGTGGGTACACCAGCCGGTCGATTGCGTTAATGACTTCCAGT GTCTAAGTCAAAAGCGGCGAACGGTGACTGACAGAGGTCTGGTAACGGGCCCTCAGTTTTTGAATAAGCT TCTCGGCTGTAGCGCACGAACTGGGGTAGCTCATTTCAGATTGCCCGTAGTGAGCTGATCTTATCGGTAG AAGCTGTAGAAAAAAAGCAGTGATAAGGTTACTCCGCAGACTCCTATACCAGTCCCCCTTCTCAGCTATG GGTCGATAGTGCACGGACTGAATCCCGTAAGTTCCTTTAAAGGGACATTCTTTCGTGATGCGGATTTGAT TTGCCTTACGATTTCGGGGCATGAATGTATTAGATAGTCGGCTGCAATCACTATACAGCTCAATCTAAAG CCGGCTGAACTTAACCTGTGTTAGGCGGATCCAACGGGTTTCAATAGTCAATGTAGTTACCTACAAGTTA TGCTGGACCGGACCGGAGGATTACACCCAAGATATGAAAAGTCAATGAGCAGATGCCATGACGACGGGTT TCCAGCACGTGGCTTGAAGAGACCGCAAATGTCGGCAAATGCGTTGGCTATGCTGTAGCAGTCCATGTCG AGTCATCTATGTGGAGCCCTGGGTGCAACGGATAGACTTAGCGTCTGGAAAGTCTAGGACAGCATTAATA AGCATGGCCTTCCTTATCTGTTCAAGAATACTGGGGTCATCTCTGCAGTTGGTGTTAAAAACATAGTGAC CACGGCGTACTCGCCTAACGTTGCACGGCATACATTCCGCTGGGACATGGAAAGGATTCCGCAAACACGG TTTTCACGGAAAACTTCGTTTCCAGCTTTGCGTATACAACATGACCCAAGATGCTCTGATCGGCTCGTGC GTCACGCGGGTGCCACGACTTGGCAGTGTCAAAACCTTAGCGCTCCCAGGACAGCACTTAAGCCTTTATA AATCACTGCCTAATAGTCCAGGATCACTCCCGGAAACTAATAGTATCTATTTCATTATGGTAGTGCACAA TTGCTTACCCTCCGCCCGCTAGTAAAGCCTGAGAGCGGAAACGCTATAGTAGCTTTTGCCATATATGAGT CGAGACGTAATGTACTACCGGGCAGCAACGCCCATGAATGGATGCGAATTCTGACTAGAATCCATTTACG ACATGGAGTTAACGAAGTCAGCATTTATCCACAAAGTTAAGCGCAGCTTAAGTTGTTAACCTTCGCAACT TTAGAGGATAATCGAAGATGGAGGGATGCCGCAGTCCCCACGATCCCTGAGACAGTCGGGGAACGTTCGC CTTGGATTAGAGGGAGAAATTCTATTACTTAGCGAGCATAAACGTTGGGCATCAAGCAAAGATGGCCTAC TAGGGCGATTTCCCTGTATAAAAGAGAATAGACCCGTCCTAACTGTGATCAAGCTCCTTGACACTTGTCA TGACCTCTTACACCAGGCACGGCTCCAGGCCTGCGTCCCGAGGCAGAGTACACCATGTCCCTATCGGGAC ACGTTGCTCCACTCTGACAGGTTGGTAAGGATCTGAGTCAATCAACTGGCTCAGGGCTTTTAGTGCTGCC TTAAGCATTATCCTTACAACAGCAGGGCTCATGCATCTTACCTCCCCGGGCTGTCCGTTTGATGCGGGGG GGTGCGATGACAGCGGTCAAAGAAAAGATCTCAATAGATACGTTGTCCCGGGAGTATCTGGCTGTTTCCC ACACCTTCATCGTAGTGTAGTTATCTCTATTTAATCACACTGTCCAATCTATGTAGGCAGCCCCGCTGTA AGGTAGTCCAAGTGCAAACAGGTAGGCACTACTGTTCACTCGCCCGCGGTACCACTTTATAACATCTAGG AACCCTGTAAGCCAATATCAGATAAACCGGTATGAACAGAATTTTACTTCGTCTGCACGAATATAGAAGC CGATTCGCCGGTTATGATCCTGTGTTAAATGCGGCAATGGCCCCCTCTAGAAAAGAATAAGGGACTTCTA GCCAATTATCGCATTCCTTCACTGGTGGGTGCGTAGCCACACCGTGGTATGTTAGAGTTCCACCTCACTC GGGTGATGACCCAGACTGTCATGTCTCCAGCGGTCCTGCATGCGAGGGGTGGTCAATGTGAACCTTACCT AGATCCGTAGTTGCAGATCGACACTGTTCTTATATCCCATTGACGTTTAATCGGGGGCCTTATACTTGGG CAGAGGTACCGTGGAATTGCATGTGTATCCAAATAAGCGGGGGCTTATATTCTCCGCCCGTCTTCAATGG CAAAGTTTAATGAGTCGCCCCGACGGGGCCTATACCTTTTCGAAGTGTCAAAGTCCTAAGTTGTTGTGAC TCCGAAGCGACAGTTGCTCAGCTAAGGTATACAGTCACGTCTTGTCGAGAATGGCTTCTTACGTCCTGTA GAGCGGCTTCAAGTGGACCAGACTATGACGTCCGAGCATCGGCTTAAACAGTGATTGCCCGTGGCTGTAT CGAGGTACGACCCTAGAGTTGACGTGCGCTTGCGTAATAGACTGTAATCGGGATTGCCATCAGGGAAGAT GATCGGAGGAAAACTTCTCGTGGGATTCACCCTAGGTGGGCCTTGGTGTGTCATCATCGTTGGTAGTAAC GGTGCTCAGAGAGTATTACGTAAGACGTCAAAGAATTTAAGTCTGGTTGGAGCCAGGACAACATGGGCGT GTTAGAGCCCGACTGACAAGACTGATTTAGATCGTAGGAGATTAGATACTTGAATCTTAACTTGAGGTCT CAGAAACTCTTCTACCGCAGCGGCAGACCCCGGATGGCCGTCTGCCATTCGCTAAACTGAAGGAATCCGG GAGACAGTCTGCCACGTTAGAGTACCGCTGTGCGTAGTGCGTGCAGGTAATCATATCAGGTTAATGCGGT CGAAGTCATTATCGGTCATTGTGTTCGATATCTAGTTAGTTGTTTAGAAGTGAATAAGATTACGATAAAT TTGCACGCGGGTGTATAAGTGATATCGTATTGGGTATCGAAATGGAGTACCGCCATCGTAACGACGCAGC ACCCAGTGAACTAGGCAGATGGCAATTTATCTGGAGCAAGCGACCGATGCCTTTCATAGGTTTCCAGTAT GAGTGAGGTCTACTTGATGGCGGATTCTTCTGTTGGTAATGGTTTCTACCATACTCGTTGCTTATATTCT TACTTTACATCGGGAATATGCAACCTGAATTGTACTCGCTGCAGCTTACGCTGCCACTTGCTTGCGTTGT GGCGCGCCTGTAGCGAGTTGGCCGCAAGTAGTATATAAATGTGCCCGTGCGTGAGCAAGATCCGGGCATG GGTGTAAGTCACAGGTAGATTTACCGTTAACGACCACCGCTTAACGTGCTCTTGCCCAGGTGCTTGAATT TTGGTATGGCGACCTAACACCGTAAAAGTAAGCAAGTATGCGATTACATTCAGCCAGCCCTCTAGAGGCG AGATCATCACCTTAAGTTCAAGCGATTTTTCTTCGCTTTACGTATTACACTTATTCATCTGACAGCGCGT AGCCTTTGCGTGGTTGATGCGTCTTATCCACGGTACGTGACAGAGGGGCCATTCTTGCCACTGTGTCCGG GTTCTGTGAACCACTAACCCGTGGCACCCCTTGAGGAATGGATATACAGATTAGCTGGACTATCACCAGG GAGTCCTGGGTCATTGGGATGATGGAATCGTGTTTAGTTCGTCTGACCGGCTACGTGAGTGGGGGAGCGA CCGCTGCTGGGCTCGTGAGGAAATAGCCCCACGTGACAGGCGAATCAGAGACACTTGCCGCGATGGACGC TCCTTGTCGTCCAGTGGGCGCAGGTCGTTCGGGCGAATGGCAGCAAACATTGGGGGGAAATGCCCAGCAC GACAGATGTGATCGACGGATCTCGGACTTAGTCGAGGTCTTAGATTACACTGTGGGAGGTCTTTTAATTG TTTGTTTCCGACCTGAGTTCTAAGAATATGTCCCACCCAGACTGGCATAACGTCAGCGCTCTACCCTGGT AGGTCCAGGCATCGCGCGCTGGTGCAAGTCCTCGTTCATTCGTTTAGCTTCGGACATCGGTTTGCTCCAT TTTTAATCTTAACGAAGTGCGCAGAGCGACCCGGATAACTTAAATCGTAGACACAATGTTTTAAGCTTCA CCTTCCGACCCCAGATAGACACGAGGAGAAATGGGTAAAGTGAGAACAATTCCACATCGAGTTAGCAGGA CTGTGTAATCGCGATGCCAGCTGCCCCGACAGTCGCGAGAGATAATTTACTGCCTGGCGTGTGCGAAGCT TTCTTAGTTTATCGACCTGTGCTCGACGTATGTAGCTGAGATCGTGCAGGACACTGTTTTGAAATTTGAG ACAGACTATGGCTGCAGTCCGGCAAAACCACCCAGAGCATTTATCCAACCGAATAACTCCATCGTTGCGT AGGCGTGGGAACACCGATTAACAGAGACATCCGACAGTACGGTAGGTTGTAAACGCTGCATACAAGAGAA ATATTTGAGATGAATACGAGCCAGATGGGTCCATCATGGACACAGAGATGGAGGCCGACACGAGGAGCTG AATCAACTGACCCTAAGGCGTGAGAGGG >gi|00000004| GCTCCCCCGTAAGATACGTAATGTGCCTATCTGGACAAGACTTCATAGGATGTAGTGCGAGAAAAGCCGT ACTTGGTTTGTTTGTGGCCGTGAGTGGCAAATGAGCTGCTATGTCACAACCATACAACAAATTAAAATAT TGCCTTTAATGCGACTATCAGGTCTGTTGTTCTAGTTCGGCCATTGCTAGAACTGTCCATCTATTGCCTG TTGACACTCCATCCGCTAGCCCCTGCATATGGTATCTGCGTTGTCTTAGCTTCACTTTCCTGCACTGGGT GCGTTATGTTAACGATGCGAATGATGAAAAATATGTTGTGGGAACAGATTCTGTTGGAGTACGTGTCGAC GGGAGTGACGTCGGAATGGCCTTCAAGTGAAACGAATGTAGTACGAACAAGGTAAACTACAATTGGACCA GTATCCAGATGACGTTGACACAAAGCCCCCTTCTTCATAGGTTCCCTAGACTAGTGTCAAGAAATTGAAC GTCCCGGTCGCCATATCAGCTGCAGAGAGTACGGGATATTTAATTTTCATTTGCCCTGCTAAATGATGCC CGCCTGTCCGTGCACTCGACCTGCTGAACGAGCGCCGTGCAAACGTTCTATCCGAATCACAGAGGGAGGT CCTAGAATCATGGTGCGTGCCTCCCCGCCGGCTCGGTCCTGCCGTTCGCCAGCTAAGGGGGATGTTTGAA TGCTTTCGCGTGATCGGAGGTCCAGAGTTCCAACTGACCATGAACGCGCCATATTTCTCCCCCCTCGTCT CTGCGGCTCGTGCTCAGTGGACACCATATCTCAGGTATACGGAAATTTACCTATCTTCGGTATTATGGAG CGTGCTGGTTCGAGCCGAGAGAGTCCTTTGGGGTTGGCAGGAAACGGGGCTAATTCGACGTATGGTCCAC AGCTCTAAAGTCTACACCACCGTCGAGCGAGGCTGTGGGTACACCGGCCGGCCGATTGCGTTAATGACTA GTAAAGCGGCGAACTGTAACTGGGTTTTTGAATAAGCTTCTCGGCTGTAGCGCACGAAGGGTGGGGTAGC TCATTTGAGATAGCCCGTAGTGAGCTGCTCTTATCGGTAGAAGCGGTAGAAAAAAAGCGGTGATTAGGTT ACTCCGCAGACTGAATCCCGTAAGTTCCTTTAAAGGGACATTCTTTCGTGATGCGGATTTGATTTGCCTT ACGATTTCGTTTGACAATCGCCGGACGGGCATGAATGTATTAGATAGTCGGGTGCAATCACTATACATTT CAATTTAAAGCCGGCTGAACTTTACCTGTGTTAGGCGGATCCAACGGGCTTCTATGGGTGTGGGGCACGG TGACCAATAGTCAATGTAGTTACCTACAAGTTATGCCGGACCGGACCGGAGGATTACACCCAAGATATGT AGAGTCAATGAGCAGATGCCATGACGACGGGTTTCCAGCACGTGGCTTGCAGAGTCCGCAAATGTCGGCA AATGCCTTGGCTATGCTGTAGGAGTCCATGTCTAGTAACCTATGTGGAGCCCTTGGTGCAGCGGATAGAC TTGGCGTCTGCAAAGTCTAGGACAGCATTACTCAGGATGGCCTTCCTTATCTGTTTAAAAAAACTGGGAA CATCTCTGCAGTTGGTGTTAAATACATAGTGACCACGGCGTACTCGCCTAACGTTGCACGGCATACATTC CGATGGCACATGGAAAGGATTCCGCAAACACGGTTTCCACGGATAACTGCGTTACCAGCTTTGCGTATAC AACATGACCCAAGATGATCTGATAGGCTCGTGCGTCACGCGGGGGGCACGACTTGTCAAAGCCTGAGCGT TCCCAGGACAGCACTTAAGAAGGCTCCCTCCCGGAAACTAATAGTATCTATTTCATTATGGGAGAAGACA ATTGCTTACCATCTGCCTGCTAGTAAAGCCGGACAGCGGAAACGCTATAGTAGCTTTTGCCATATATGTC CTCGTCTTGATGTCGAGACGCAACTTTTGACTAGAGTCCATTTACGCCATGGAGTTAACGAAGTCAGCAT TTATCCATAAAGTTAAGCGCAGCCAATAGCACTATGGATTTCATGGAGTCCCCACGATCCCTGAGACAGT CGGGAAACGTTCGCCTTGGATTAGAGGGAGAAATTCTATGGCCGACGAGGGCGACTTCCCTGTATAAAAG AGAATAGACCCGTCCTAACTGTGAACAAGCTCCTGGACACTTGTCATAACCTCTTACACCAAGCACGACC CCAGGCCTGCGTCCCGAGGCAGAGTACACCATGCCCCTATAGGGATACGTTCGGTGTCTCAATATGGTTA TTCGGTGCGCTAGGTGGCGCCACACAGACGGGTTGGTAAGTATCTGCGTCAATCAACTGGCTCCGGGTGC TTCTAACCCGCCATTTGACAGAAGGATGAGAACGACGTTAACCTTCTTACCTCCGCGGTCTGTCCGTTTG ATGCGGGCGGGTGCGATGACAGCACTCAAAGAAAAGATCTCAATGGATACGTTGTCCCGGGAGTATCTGG ATTTAATCACAGTGTCCAATCTATGTTGGCAGCTCCGCTGGAAGAGCACAGCAGGAAAGATATTGCATAG CACTGCCTGTAGTCCAAGTGGAGACAGGTAAGTACTACTGTTCAGTCGCACCCTGTACGGCAATTTAAGA TAAACCGGTAAGAACAGAAATTTACTTCGTCTGCACGAATATAGAAGCCCCTTCGCCGGTTATAATCCTG TGTTACATGCGGCAATGGCGAATGGTTCACTGGTGGGAGCGTAGCCACACCTTGGTATGTTACATTTCCA CCTCACTCTGGCGATGACCCAGACTGTCATGTCTCCATCGGTCCTCAGATCGACGCTGTTCTTAGATACC ATTGACGTTTAATCGGGGGCCTTATACATGGGCAGAGGTACCGTGGAATTGCCTGTGTATCCATATAAGC GAGGGATTGTATTCTCCGCCCGTCTTCAACGACAAAGTTTAACGAGTCTCCCCGACGGGGCCTAGTTGCT CAGCTAAGGTATACAGTCACGTCTTGTCGAGAATGGCAACCTTCTTACGTGCTGGAGAGCATGTAAGCTT GCTACTATTCGGCGGCTTCAAGGGGACCTGACTATGTCGTCCGAACATCGGCTTAAACTGTAATTGCCCG TGGCGGTATCTCCCCGTGTTGAGGTACGACCCTAGAGTTCACGTGCGCTTGCGTAATAGACTGTAATCGA GATTGCCATCAGGGAAGATGATCGAAGGAACACTTCTCGTGGGATTCAGCTTTGATGGGCCTTCGTGTGG AATCATTGTTGGTCGTAACGGTCCTCAGAGAGTATTACGTAAGGCGACCAACAATTGAAGTCTGGTTGGA GCCAGGACAACATGGGCGTGTTAGAGCCTGACTGACAAGACTGATTCAGCTCGTAGGAGATTAGATACTT CAATCTTAACTTGAGGTCTCAGAAACTCTTCTACCGCCGCGCCAGACCTGCCAATCGCTAAACTGAAGGT GCGTAGTGCGTGCAGGTAATCATATCAGGTTAATGCGGTCGAAGTCCTTATCGGCCATTGTGTTCGATAT CTAGTCAGTTGTTTAGAAGTGAATAAGCTTACGATAAAGTTGCACGCCGGATTCCGATACCTGTCCTCAC GCAGCACCTAGTGGACTAGTCAGATGGCAATTCATCTGGAGCAAGGCAGCGATGCCTTTCATGCTCATTA CGTGTAGCACCCCTACGTGCCGAATTTAGGCTCCAAGTATGAGTAAGGTCTACTTGATGGCGGATTCTTC TGTTGGTAATGGTTTCTACAAAACTCGCTGCTTATATGCTTACTTTACATAAGGAATATTTTTTGATTGC AACCTGAATTGTACTCGCTGCAGCTTACGCTGCTACTTGCTTAAAATCTGTTCAGAGGCCTTGCGGCGCG CCTGTAGCGAGCTGGCCGCAAGTATTATATAAATGTGCCCGTGCGTAAGCAAGATCCGGGCAAGGGCGGA AGTCACAGGTAGATTTACAGTTAACGACCACCGTTTAACGTGCGATTGCCCAGGTGCTTGAATTTTGGTA TGTAAACCCGGCTCGCGACCTAACACCGTAAAAGCAAGCAAGTATGCGATTACATTCAGCCAGCCCAAAC AAAGTTAGTAGTCGCAAAACTCTAGCGGCACGATCATCACATGAAGTTCAAGCGATTTGCGTGGTTGATG CGTCTTATCCACGGTACGTGACAGAGCGGCCATTCTTGCCACTGTGTCCGGGTTCTGTGAACCACTAACC CGTGGCACCCCTTGAGGAATCGATATAGAGATTAGCTGGACTATCACCAGGAAGTCCTGGGTCATTGGGC TGAGTGAATAGTGTTTAGTCCGTCTGACCAGCTACGTGAGTGGGGGAGCGACCGCTGCGGTGCTCGTGAG GAAATAGCCCTTCGTCACAGGTGATTCAGAGACACTTGCCGCGAAGGACGCTCCTTGTCGTCCAGTGGGC GGAACATTCGGGGGAAATTCCCAGCACGACAGATGAGAGCGACGGATCTCGGACTTAGTCGAGGTCTTAG ATTACACTATGGGAGGTCTTATAATAGTTTGTTTACGACCTGAGTTCTAGGAATATGTCCTTGGCATAAC GTGAGCGCTCTACCCTGGTAGGTCCAGGCATAGCGAGCTGGTGCAAGTCCTCGTCCATCCGTTTAGCTTC GGACACCGGTTTGCTCCATTTTTGATCTTTATCTACGCTTGACGGTTCTTGATGAGACAGCCGTCGCGAA GTGCGCAGAATGTTTTAAGCTTCACCTTCCGAGCCGAGATAGACACGAGGAGAAAGGGGTAAAGTGAGAA CAACTCCTCATCGAGTTAGCAGGACTGTGTAATCGCGATGCCAGCTGCCCCGACAGTCGCGAGAGAATTT ACTTCCTGGCGTGTGCGAAGCTTTCTTTGTTTATCGACAGGTTTTACCTAGTCTCGGTATATTTTTGTAG CTCCTCGCGGATGTGCCAGGACACTGTTTTGAAATTTGAGACAGACTATGGCTGCAGTCCGGCAAAACCA CCCGGAGCATTTTTCCAAGTGAATAACTCCGTAGTTGCGTAGGCGTGGGAACACCGATTAACAGAGACAT CCGACAGTACGGTAGGTTGTAAACGCTGCATTCGAGAGAAATATTTGAGATGAATACGAGCCAGATGGGT CCATCATTGACACAGAGATGGATGCCGACACGAGGAGCTGAAGCAACTGACCCTAAGGCGTCAGAGATGC AAAAAGTAAGG >gi|00000005| GCTCCCCCGTAAGATTCGTGATGTGCCTATCTGGACAAGACTTCATAGGATGTAGTGCGAGAGAAGCCGC TCACCTGGCTGTTTCAGCCAACTTGGTTTGTTTGTGGCCGTGAGTGGTATATGAGCTGCTATGTCACAAC CGAATCTACACTGCCATCTCGCGAAACTTCCCTCCATACAACAAATCAAAATATTGCCGTTAATGCGACT ATCAGATCTGTTGTTCTAGTTCGGCCATTGCTAGAACTGACCATACATTACCTGTTGACACTCCATCCGC TAGCCCCTGCATATGGTATCCGCGTTGTCTTAGCTTCACTTTCCTGCACTGGGTGCGTTATGTTATCGAT GCGAATGATGATAAATATGTTGTGGGAACAGATTCTGTTGGAGTACGTGTCGACGGGAGGGACGTCTGAA TGGCCGTCAAGTGAAACGAATGTAGTACGAACAAGGTAAACGACGTTGACACAAAGCCCTCTTCTTCATA GGTCCCCTAGACTAGTGTCAATAAACTGTACGTCCCGGTCGCCATATCAGCTGCTGAGAGTACGGGATAT TTAATTTTCATTTGCGCTGCTAGATGATGCCCGCCTGTCGGTGCACTCGTCCAGCTGAACGAGCGCCGAG CAAACGTTCTATCCGAATTAAAGAAAACACGAACCATCTTCACAGAGGGAGGGCTTAGAAACGTGGTGCT TGCCTTCCCGCCGGCTCGGTCCTGCCCTTCGCCAGCTAAGGGGGATGTTTCTCCATATTTCTCCCCCCTC ATCTCTGTAGGAAGCAGCCTCGTGCTCAGTGGACACCATATCTCAGGTATACGGAAATTTACCTATCTTC GGCAGTATAGAGCGTGCAGGTTGGAGCCGAGAGAGTCCTTTGGGGTTGGCAGGAAACGGGGCTAATTCGA CGTATGGTCCACAGCTCAAAAGTCTACACCACCTTCTAGCGAGGGTGTGCGTACACCGGCCGGGCGATTG CATTAATGAGTAGTTAAGCTGCGTACTGTTCGGCCGTAGCGCACGAAGTTTGGGATAGCTCATTTGAGAA AGCCCGTAGCGGCTGCGTCGTGTGAGCTGTTATCCGTACCTACGGACCTTACGGGATCTGGGCGATGCAT GATTGGAAAGAAGCGGTAGAATTCAACCGGATACTGACTCCATGTCCAGACTGGCTAAACGTAAGTTCCT CTAAAGGGTCATTCTCGGACCGGCATGAATGTATTAGATAGTCGGGTGCAATCACTATGCATTTCAATTT TAAGCCGGCTGAACTTTACCTGTGTTAGGCGTATCCAACTGGCTTCTCTGGGTGTGGGGCATGGTGACAA AATGTCAGTGTAGTTCCTTACAAGTTATGCCGGACCGGATCGGAGGATTACGCCCAAGATCTATAGAGTC AATGAGCAGATCCAATGACTAGGAGTCCATGTCTAGTAACCTATGTGGTTAGTTTCAAGCTTGGCGTCTG AAAATTCTAGGACAGCATTACTCAGGATGGCCTTCCTTATCTGCTTAAAAAAACTGGGAACATCTCTGCA GTTGGTGTTAAATACATAGTGACCACGGCGTACTCGCCTAACGTTGCACGGCATACATTCCGTTGGTACA TGGAAAGGATTCCGAAAACACGGTTGCCACGGACAACTGCGTTCCCAGCTTTGCTTATACAACATGACCC AAGATGATCTGACAGGCTCGTGCGTCACGCGGGGGGTACTACTTGCAAAAGCCTGAGCTTTTCCAGGGCA GGACTTTAGAAGTCTCCCTCCCGGAAGCTAATACCATCGATTCAGTACATTATGGGAGAAGACAATTGCT TAACGTCTGCCTACTAGTAAAGCCCGACTGAGGAAACGCTATAGTAGCTTTCGCCATATATGTCCGCGTC TTAATGTCGAGATGCAACTTTTGACTAGATTCCATTTACGCCCTGGAGTTAACGAAGTCAGCATTTATCC GTAAAGTTAAGCACAGCCAATAGCAATATGGATTTGATGGACTCCCCACGATCCCTGAGACAGTCGGGAA ACGTTCGCCTTGGATTAGAGCAAGAACTTCTATGGCCTACGAGGGCTTTAGGACCGACTTCCCTGTATAA GAGAGACTAGACCCGTCCTAACTGTGCACGAGCTCCTGGACACTTGTCATAACCTCTTACGCCAAGCACC GCCCCAGGCCTGCGTCCCGAGTCAGAGTATAACATGCCCCTATAGGGAGACGTTCGGTGTCTCAAGATGG TTATTCGGTGCGCTAGGTGGCGCCGCACAGACAGGTTGGTAAGTATCCGCGTCAATCAACTGGCTCCGGG TGCTTCTAACCCGCCTTTTGACAGAAGAATGAGAACGACGTTAACCTTCTTACCTCCGAGGTCTGTCCGT TTGATGCGGGCGGGTGCGATGACAGAACTCAAAGAAAAGATCTCAATGGAGACCGAGTCCAGCGAGTATC TGGATTTACTATGTCTAATCTATGTTGGCAGCTCCGCTGGAGGAGCACAGCAGGAAAGATATTGCATAGC ACTCCCTGTAGTCCAAGTGGAGACAATGATATGATGTGTAAGGACTACAGTTCAGTCGCACCCCGTACGG CAATTTAAGATAAACGGCTAAAAACAGACATTTACTTCGTCTGCAAGAATATAGAAGCCCCAATGGTTCA CTGGTGGGAGCGTAGATACACCTTAGTATGTTACATTTCCACCTCACTCTGGCCCTGACCCAGACTGTCA TGTTTCCATCGGTGCTCAGATCGACGCTGTTCTTAGATGTTTAATCGGGGGCCTTATACATGGGCAGAGG TACCGTGGAATTGCCTGTGTATCCATATAACCGCGGGATTGTATTGTCCGCCTGTCTGCAACGACAAAGT TTAACGAGTCACCCCAGATCGAACGGGGCCTAGTTGCTTCGCTAAGGTATACAGAGGGCATGTAAGCTTG CTACTATTCGGCGGCCGTATACTGCCGTCGCAACTTCGAGGGGACCTGACTATGTCGTCCGACCAACGGC TTAAAGTATAATCCCCCGTGGCGGTATCTCCCCGTGTTGACGTACGACCCTAGAGTTCACGTGCGCTTGC GTAATAGACTGTAATCGAGATTGCCGTCAGGGAAGATGATCGAAGGAACACTACTCGTGGAATTCAGCTT TCATGGGCCTTCGTGTCGAGAGGACCAACAATTGAGGTCTGGTTGGAGCCAGGACAACATGGGCGTGTTA GAGCCTGACTGACAAGACAGATTCAACTCGTAGGAGATTAGATACTTCAATCTTAACTTGACGTCTCAGA AACTCTTCTACCGCCGCGCCAGACCTGCCAATCGCTAAACTGAAGGTGCGTAGTGCGTGCAGGTAATCCT ATCTGGTTAATGCGTTCGAAGTCCTTACCATTGTGGTCGATATCTAGTCAGTTGTTTAGAAGTGAATAAG CTCACGTTAAAGCACCTAATTGACTAGTCAACTGGTAATTCATCTGGCGCAATGCAGGCTCCAAGTATGA GTATGGTCTGCTTGATGGCGGATTCAGCGTATCCCTAGTCCCTCTGAGTACTCAGGATTCTATGCTTACT TTTTTTGACCCTAGTCAGACAGCCCGCCAAATTGAATAGCATTAATCGCTCCAGCTTACGCTGCTCGAGC TGGCCGCAAGTATTATGTCAATGTGCCCGTGCGTAAGCAAGATCGGGGCAAGGGTCGAAGTCACAGGTAG ATCTACAGTTAACGACCACCGTTTAACGTGCGACTGCCCATGTGCTTGAAGGTTGGTTTGTAAACCCGGC TAGCGACCTAACACCCAGAAAGAAAGCAAGTATGCGATTACATTTAGCCAGTCCAAACAAAGGTAGTAGT CGCATAACTCTAGCGGCACAATCATCACATAAAGCTCAAGAGATTTGCGTGGTTGATGCGTCTTATACAC GGTACGTGACAGAGCGGCCATTCTTGCCACTGTGTCCGGGTTCTGTGAACCACTGACCGGTGACACCCCT TGAGTAATGGATACAGAGATTAGCTGGACTATCACCAGGAAGTCCTAGGTAATTGTGCTGAGTCAATAGT GTTTAGTCCGTCCCTACGCCCTAGGAGCCTAATATGTTGTTCTCTGCACTGGTGCGTATATTGACCAGCT ACGAGAGTTGAGGAAATAGCCCTTCGTCACAGGTGATACAGAGACACTTGCCGCGAAGGACGCTCCTTGT CGTCCAGTGGGGGGAACATTCGGGGGAAATTCCCAGCACGATAGATGAGAGAGAGTGATGCTAGACGGCT CTCGTGTGGATTACCTCGGACTTTGTCGAGGTCTTAGATTACACTAAGGGAGGTCTTATTGGCATATCGC GAGCACTCTACCCTAGTAGGTCCAGGCATAGCAAGCGTCGGGCGCTCCTCGTCCATCCGTTTAGCTTCGG ACCCCGGTTTGCTCCATGTTTGATCTTTATCTACGCTTGACGGTACTTGAGGCTACCCGTCGTTGGTATC CCAAACCGACCCAATTGATAGGTAGCCGTCGCGAAGTGCGCACAATGTTTTAAGCTTCACCTGCCGAGCC AAGAAAGACACGAGGAGAAAGGGGTAAAGTGAGAACAACTCCTCATCGAGTTAGCAGGACTCTGTAATCG GGATGGCAGCTGCCCCGACAGTTGCGAGAGCATTGACTTCCTGGCGTGTGCGAAGCTCTCTTTGGTTATC GACAGGTGTTACCTAGTCTCGGTAGATTTTTGTAGATCCTCGCGCATGTGCCAGGACACTATTTTGAAAT CTGTGAAAGCCTATGGCTGCAGTCCGGCAAAACCACCCGGAGAAATTTTCCAAGTGAATAACTTCGTCGG TTCGTAGGCGTGAGAGCACGGATTAATAGACCCATCCGACAGTCGAGTAGGTTGGACACGCTGCATTCGA GATAAATATTTGAGATGAATACGAGCCAGATGGGCCAATCATTGACACAGAGATGGATCCCGACACGAGG AGCTGAAGC >gi|00000006| GCGCCCCCGTAAGATTCGTGATGTGCCTATCTGGACAAGACTTCATAGGATGTAGTGCGAGAGAAGCCGC TCACCTGGCTGTTTCAGCCACCTTGGTTTGTTTGTGTCCGTGAGTGCTATATGAGCTTCTACGTGTCACA ACCGAATCAACACTGCCATCTCGCGAGACTTCCCTCCATACAACAAATCTACTTATCCACGTCTTAGATA ACGATGGTGTGGAGCCGTGGCACGACGGCAAAATAGTGCCGTTAATGCGACTATCAGATCTGTTGTTCTA GTACGGCCATTGCTAGAACTGACCATACATTTCCTGTTGACACTCAGTCCGCTAGCCACTGCATAAGGTA TCCGGATTGCCCTTAGAGTAGCACATATTGGCGGTATGAGACTACCGTTGCCTTAGCTTCACTTCCGTGC ACTGGGTGCGATATGTTATCGATGCGAATGAGGACAGACTCTGTTGGAGGACGTGTCGACGGGAGGGACG TCTGAATGGCCGCCAAGTGAAACGAACGTAATACGAACTAGGTAAACGACGTTGACACAAAGTCCTCTTC AATATTTTTCGACACTGTTCTGGGAAAGTTAAATTACAAAAAATACATAGTTCCCCTAGACTAGTGTCAA TAAACTGTCAGTCCCCGTCGCCATATCAGCTGCTGATAGTAGGGGATATTTAATTTTGATTTGCTCTGCT AGATGATGCCCGCCTGTCGGTTCACTCGTCCAGCTGAACGATCTCGAGCCTTCACGGAGGGAGGGGTTAG AAAAGTGGTGCTTGCCTTCCCGCCGGCTCGGTCCTGCCATTGGCCAGCTAATGCGGATGTTTGTCCATAT TTCTCCCCCCTCATCTCTGTAGGAAGCAGCGTCACGCTCAGTGGACACCATATCTCAGGTATACGTAAAT TTACCTATCTTCGGCAGTATAGATCATGCAGGTTGGAGCGGTGAGAGTCCTTTGGGGTTGGCAGGAAACG GCGTGGAATCCGTGTGTAGGAAATCGTCGTGTAATAAATTGAAACCGAGTACTGTAGAAAATTCTGTTTT ACCATGGCTAATTCGACGTATGGCCCGCCCGACAGAGGATCTGATACAGCTCAAAAGTGTACACCACCTT CTAGCGCGGGTGTGCTTACACCGGCCGGTTGATTGCATTAATGAGGAGTTAAGCCGCGTACTGTTCGGCC GTAGCGCACGAAGTTTGGATAGCTCCATTGAGAAAGTCCGTAGCGGCTTCGTCGTGTGCGCTGTTATCCG TACCTACGGACCTTACGGGATCTGGGCGATGCATGATTGGAAAGAAGCGGTAGAATTCAACCGTATACTG ACTCCATGTACAGACTGGCTAAACGTAAGTCCCTCTAAAGGGTCAATGCCGGACCGGCATGAATGTATTA GATAGTCGGGTGCAATCACTAAGCTTTCCAATTTTGAGCCGGCTGAACTTTACCTGTGTTAGGCGTATCC AACTTTCTTCTCTGGGTATGGGGCATGGTGACAGACTGACAAGGAGGGTCGGGCTTATACTCAGCCGAAC GCCAGTTAAATGTCAGTATGGTTCCATACAAGTTATGCCGGACCGGACCGGAGGATTACGCCCAAGATCT ATAGAGTCAATGAGCAGATCCAATGACTACGAGTCCATGTCTAGTAACTATACCCTCAAGCACGGACGCC TTTGCCTATGTGGTTAGTTTCAAGCTAAGAGTACTTGGCCTCCTCTGAAAATTCTAGGAAAGCATTACTC AGGATGGCCACCTTTATCTGCTTAGCAAAACTGGGAACATCTCTGCAGTTTGTGTTAAATACATAGTGAC CACGGCGTACTCGTCTAACGTTGCACGGCAAACATTCCGTTGGTACATCGAAAGGTGCACTTAGAAGGAA GAGCCACGCACGGTTGCCACGGACAACTGCGTTCCCACCTTTGCTCAAACAACATGACCCAAGATGATCT GACAGGCTCGTGCGTCACGCGGGGGGTACTACTTGCAAAAGCCTGAGCTTTTCCAGAGCAGGACTTTAGA AGTCTCCCTCTCCGAGGCTAATACCCTCTATTCAGTACGTTATGGGAGGAGACGATTGCTTAACGCCTGT CTACTATTAAATACCGACGGAGGAAACGCTATAGTAGTTGTAGCCATACATAGATGCAACTTTTGACTAG ATTCCATTTACCCCCTGGAGGTAACGAAGTCAGCAGTTATCCGTAAAGTTAAGCATAGCCAATAGATGGA TTCGATGGACTACCCTCGATCCCTGAGACAGTCGGGAAACGTTCGCCTTGGATTAGAGCAAGAAAATCTA TGGCGTACGAGGGCTTTACGACCGACTTCCCTTTATAAGAGAGACTAAACCCGTCCTAACCGTGCACGAG CTCCTGGATACTTGTCATAAACTCTTACGCCAAGTACCGCCCCAGGCCGGCGTCCCGAGTCAGAGTATAA CATGCCTCTATAGGGAGACGTGCGGTGTCCGAAGAGGGTTATTCGCTGCGCTAGGTGGCGCCGCACAGAC AGGTTGGTCAGTATCCTCGTCAATCAACTGGCTCCGGGTGCTTCTAACCCGCCTTTTGACAGAAGAATGA GAACGACGTTAACCTTTTTGCCTCCGAGGTCTGTCCGTTTGATGCGGGCGGGTGCGATAACAGAACTGAA AGAAAAGATCTCAATGAAGACCATGTCCAGCGAGTATCTGGATTTACTATGTCTAATCTATGTTGGCAGC TCCGGTGGAGGAGCACAGCAGGAGAGACAGTGATTTGATGTGTAAGGACTACAGTTCAGTCGCACCCCGT ACGGCAATTTAAGAAAAACGGCTAAAAACAAACATTTACGACGTCTTCAAGAATATGGATGCCCCAATGG TTCACTGGTGGGAGCGTAGATACACCTTCGTATGTGACATTTCACCCTCACTCTGGTCGTGACTCAGTCT GTCATGTTTCCATTGGTGCTCAGAACTACGCTGTTCTTATATGTTTAATCGGGGGCCTTATACATGGGCA CGGGTGCCGTGGAATTGCGTGTGTATCCATATAACCGCGGGATTGTATTGTCCTCCTGTCTGCAAGAAGC TCCCCGCAGACAAACTTTAACGAGTCACCCCAGATCGTAGAGGTAACTGGGCCTCGTTGCTATTCGGAGG CCGTATACTGCCGTCGCAACTTCGATGGGACCTGACTATGTCGTCCGACGAACGTCTAAAAGGATAATCC CCCGTGCTGCGGTTTCTCCCCGCGTTGACGTACGACCCTAGAGTTCACCTGCGCGGGCATAGATATTTCC TTAATCGTAATGACTTAGACCATGAACTTTGCGTAATCGATCGAAGGAACACTACTCGTGGAATTCAGTT GTCATGGGCCTTCGTGTCGAGAGGACCAACAATCGAGGTATGGTTGAAGCGAGGACAACATGGGCGTGTC AGGGCCTGACCTTACCTTGACGTCTCAGAAACTATTCTACCGCCGCGCCAGACCTGCCAATCGCTAAACT GTAGGTGCGTAGTGCGTGCAGGTAATCCTAACCGGTTAATACGTTCGAAGTCCTTACCATTGTGGTCGAT ATCTAATCAGTTGTTTAGAAGTGAATAAGCTCACGTTAAGGCACCTAATTGACTAGTCAACTGGTAATTC GTCTGGCGCAATGCAGGCTCCAAGTATGAGTATGGTCTGCCTGATGGAGGATTCAGCGTATCCCTAGACC CTCTGAGTGCACAGTATTCTATGCTTACTTTGACTGACAAGTGCTATTTTGACTCAGACAGCCCGTCAAA TTGAATAGCATTAATCGCTGCAGCTTACGCTGCTCGAGCTGGCCGCAGGTATTATGTCAATGTGCCCGTC GAAGTCACCGGTAGATCTACCCATGTGCTTGAAGGTTGGTTTGTAAACCCGGCTAGGGACCTAACACCCA GAAAGAAAGCAAGCTTGCGATTACATGCCAGTCCAAACAAAGGTAGGAGTCGCATAACTATAGCTGAACA ATCATCACATAAAGCTCAAGAGATTTGTCTTACCAGTGTGTACGGGTTCTGTGGACCCCGGACCGTTGAC ACCCCTTGAGTAATGGAGACCACGAAGTCCTAGGTAATTGGGATAACTGGGCCTCCTCGACTCGTGGTTA GAACTTTATGTCAAGCAGGGAGGGTCTCTGTTTTGACCGTCCCTACGCCCTAGGAACCTAATATGTTGTT CTCTGCACTGGTGCGTATATTGACCAGCTTCAGAAAGGGCCAGGGAGTTCCCATCCGCATAGTGTATCGC TAGGGACGAGAGTTGAGAAAATAGCCCTTCGTCACAGGTGTTACAGTGACACTTGCCGCGCAGGACGCTC CTTGTCGTCCCGTGGGGAGATCATTCGGGCGTAATTCCGAGCACGATAGAGGAGCGCACACAATTGTGTC CAGTGAATTTTACTTACACGAGAGTGATGCTAGACGGCTCCCGTGTGGATCGATTACACTAAGGGGCGAG CACTCTACCCTAGTAGGTCCAGGCATTGCAAGCGTCGTGCACTCCTCGTCCTTCCGTTTAACTTCGGACC CCTGTTTGCGCCATGTTAGCAGTTTGATCAATATTTACGCTTGAGGGTACTTGTGGCTACCCGTCGTTGC GAGGTTATCGAAAACGTCCAGGTATCCCATACCGACCCAATTGATAGGTAGCCGTCGCGAAGTGCGCACA AAGTTTTAAGCTTCACCTGCCCAGCCAAGAATGACACGAGGAGAAAGGGGTTCGACAGAATGTGGGAACA ACTCCTCGTCGAGTTAGTGCCCCGACAGTTTCGAGAGCACTGACTTCCTGGCGTGTGCGAAGCTCTCTTT GGTTATCGACATGTGTTACCTAGTCTAGGTAGATTGTTGTAGATCCTCGCGCATGTGCCAGGACACTATT TTGAACTCTGTGAAAGCACAGGCCAACGAATGGCTGAAGCCTATGGCTGCAGTCCGGCAAAACCACCCGG AGAAATTTTAGTGAATAACTTGGTCGGTTCGTTGTCGTGAGAGCACGGAGTAATAGACCCATCCCACAGT CGAATAGGTTGGTCCCGCTGCATTCGAGAGAAATATTTGAGTTGAATACGAGCCAAAAGGGCCCATAATT GACACGGAGTTGGATCCCGAAACTAGGAGCTGAAGC >gi|00000007| GCGCCGCCGTAAGGGCACATGCTAAGTCTATACATGTGCCCCCGTTAAACTTGTATCTCACTTCGTGATG TGCCTACCTGGACAATACTTCACAGGATGTAGTGCGAGAGAAGCCGCTCACCTGGCCGTTCCAGCCACCT TGGTTTGTTTGTGTCCGTGAGTGCTATATGAGCTTCCACGTGTCTCAACGGAAAGTTACAGACCCGCCAT AAAATCAACACTGCCATCTCTCAAGACTTCCCTCCTTACAATAAATCTACTTATCCAGGCAACCCGGAAG AGGCCGTGGTGAGGAGCTGTGGCACGATAGTACGGCCATTGCTAGAACTGACCATATATTTTCTGTTGAC ACTCGGTCCGCAAGCCACGGCATAAGGTATCCGGATTGTCCTGAGAGTATTAACAATTGGCGGTCTGAGA CTACCGTTGCCTTGGCATCACTTCCGTGCCCTTGGTGCGAAATGTTACCGATGGGACTGAGGACAGACTC TGTTGGAGGACGTGTCGACGGGAGGGATGTCTGAATGCCCGCCGACTGAAACGAACGTAATACGAAACAG GTAAACGACTTTGACACGAAGTCGTCTTCAATCTTTTTCGACACTGTTGTGGGCAAGTTAAATCAAAAAA ACTACATAGTTCCCCTAGACTAGTGTCTATAAACTGACAGTCCCCGTCGCCAAATCAGCTGCTGATAGTA GGGGATATTTAGTTATGTTTTGCTCTGCTAGGTCATGCCCGCCTGTCGGTTCACTCGTCCAGCTGAACGA TCTCGAGCCTTCATGCCTTCCCGTGGGCTCGGTCCCGCCATGGGCCAGCTTATGCGGATGTTTGTCCACA TTTCTCGCCCCTCATCTCCGTAGGAAGCAGCGTCACGCTCAGTGGTCACCATATCACCGGTATACGTAAA TTTACCTTAGATCATGCAGGTTGGAGCGGTGAGAGGCCTTTGGGGACGCTTGGCAGGAAACGGCGTGGAA TCCGTAAATTCTGTTTTCCCATGGCGAATTCGACGAATGGCACGCCCGACAGAGTATCTGATATAGCACA AAAGTGTACATCCCCTAGGACTTTTGCTCATGTATCCAAAAAATATAGGGGACGTCAACGCACACCTAGC GCGGGTGTGCTTACACCGGCCGGTTGACAACACCATTGAAAAAGTCCGTAGCGGCTTCGTCGTAATAATA TAGGATTGTGGTAGTGCGCTGCTAGCCGTACCTACGGACCTTGCGGGATCTGGGCCATGCATGATTTGAC TGAAGCGGTAGAATTCAACCGTATACTGACTCCATGTACAGACTGGCTAAACGTACGTGCCTCTAAAGGG AGAATGCAGGACCCGCATGAATGTAAGCGTTCCAAGTTTGAGACGGCTGAACTTAACAGACTGACAAGGA GGGTTGGGCTTATCCTCAGCCAAACGCCCGTTAAACGTTAGTATGGGTCCGTACAAGTTATGCCGGGCGG CACCGGAGGACTAGGCCCAAGATCTATAGAGTGCACGAGCAGATACAATGACTACGAGTCCATGTCTAGT AACTATACCCTCAAGCACGGACGCCTTTGCCTATGTGGTTAGTTTCCAACTATGAGTACTTGGCCTCTTC TGAAAATTCTAGGTTTGCAGGTGCGGGCTCCAAAACAATACGGTGCAAAGCCTTAGTCAAGATAGCCACC TTTATCTGCTTACGTCGCAAAACTGGGAACATCTCTGCAGTTTGTGTTTAATACATAGTAACCACGACGG ACTTGTCTAACGTTGCAATCGAAAGGTGCACTTCAACTTTGCAGAAACAACATGACCCAAGATGATCTGA CAGGCAGGGCTGCTGCGCTGGCTGTGTCGTGAATCACGAGGGGGGTACTAATTGCAAAATCCTGAGCTTT TCAAGTGCAGGACTTTATAAGTCTCCCTCTCCGAGGGTAATAGCTCGTGTGTACCGTCTATTCAGTACGT TATCGGAGGAGACGATTGCTTAACGCCTGTCAAATATTAAATACCAACGGAGGATACGCTTTAGCAGTTG TAGCCATACATAGATGCAACTTTTGACTAGATTCCATTTACCCCCTGGAGGTAACGAAGTCAGCAGTTAT CGGTAAAGTTAAGCATAGTCAATAGATGGATTAGATGGACTGCCCTCGCTCCCTGAGACAGTCGTGAAAC GTTCGCCTCGGATTAGAGCAAGAAAATCTATGGCGTACGAGGGCTTTACGACCGACTTCCCTTTATAGGA GAGAGTAAACCCGTCCCAACGGTGCGCGAGCGAAATCTGCGCGATGACTTTGTGGGCTCCCGTCGAAAAG GGTCTGTAATATTCCTGGATACTTGTCACAAACTCTTACGCCAAGTACCGCCCCAGGCTGGCGTCCCGAG TCAGAGTATAACATGCCTCTATAGGGAGACGTGCTGTGTCCGAAGAGGGTCATGCGCTGAGCTAGGTGTC GCCGCAGAGACAGGTTGGTCAGTATCATCGTCTATCAACTGGCTCCGGGTGCATCTAACTCGCCTTTTGA CAGAAGAATGACAACGACGTTAACCTATTTGCCTCCGAGGTCTGTCCGTTTGATGCGGCCGGGTGTGATA ACAGAACTGAAAGAAAAGATCTCAATGAAGACCATGTCCAGCGAGTATCTGGGTCTACTATGTCTAATCA ATGTTGGCAGCTCCGGTGGAGGAGCACAGCACGAGAGACAGTGATTTGATGTGTAAGGACCGCGTCTATG AGTTCAGTCGCACCCCGTACGCCTATTTAAGAAAAACGGCTAAAAACAAACATTTACGATGTCTTCAAGT ATAGCGATACCCCGATGGTTCACTGGTGGGAGCACACCTTCTTATGTGACATTTCACCCTAACTCTGGTC GTGACTCAGTATGTCATGTTGCCAATGGAGCTCACAACTACGCTGTTCTTGTATGTTTAATCGGGGACCT TAGACATGGGCCCGGGCGCCGTGGACTCGCGTTTGTATCCGTATAACCGCGGGGGAGCCTACAACTGAGT ACCGCAATATTGGACCATATTGTATGGTCCTCCTGTCTGCAAGACGCTCCCCGCAGACAAACTTTCACGA GTCACCCCAGATCGTAGAGGTCTACTGCCGTCGCAACTACGATGGGACCTGACTATGTCGTCCGACGTAC GTGATAAAAAATAATCCACCGTGCTGCGGTTTCTCCCCGCGTTTACGTACGACCCGAGAGTTCACCTGCG CGGGCATAGATATTTCCTTAATCGTAATGACTTAGACCATGAACTTTCCGTACTCGATGGAAGGAACACT CCTCGTGGAATTCAGTTGTCAGGGGCCGTCGTGTCGAGAGGACCATCAATCGAGGTATGGTTGAAGCGAG GACCACTCAGAAACTATTCTACCGCTGCTCCAGACCTGCCAATACCTAAACAGTAGGTGCGTAGTGCTAG ATAAGGAGTGAATAAGGTCACGTTAAGGCACCTAATTGACTAGTCAACTCGTAATTCGTCTGGCGCAGTG CAGGCTCCAAGTATGAGTATGGACTGCCTAATGGAGGACTCAGCGTATCCCTAGACACGGGTGGTGGGTG TGATGAGCGACTTGCCTGTGAGTGCACAGTATTCTATGCTTACTTTGACAGACAAGTGCTATTTTGACTC AGACAGCCCGTCAAATTGAATAGCATTAATCGCTGCAGCTTATGCTGCTCGAGCTGGCCGCCGGTATTAT GTCCATGTGCCCGTCGGGGTCAACGGTGGATCTACCCAAGTGGCTACGGACACAAAGGTTGAAAAATCCT CACATAAAGCTCAAGAGATTTGTCTTACCAGTGCATGCGGGTTCTGTGGACCCCGGACCGTTGACGACAC AATTAACCACGAATTCCTAGGTAATTGGGATAACTGGGCCTCCTCGATTCGTGGTTAGAACTTTATGTCA AGCAGGGAGGGTCTCTGTTTTGACCGTCGCTCCGCCATAGGAACCTAATATGTTGTTCCCTGCACTAGAG AGTAAAGGGCCAGGGAGTTCCCATCGGCATCGTGTATCGCTAGTGACGAGAGTTGAGAAAATAGCCCTTC GTCACAGGTGTTACAGTGACACTTGTCGCGCAGGACGCAGCTCCCATCACCTTATATTGCCTTATTGGGT AAGGGATCGACGATCGAGGAGCGCACACACCTGTGTCCAGTGAATTTTACTTACACGGGAGTGATGCTAG ATCGATTACACTAAGGGGCTAGCACTCCACCCTAGTAGGGCCAGGCATTGCAAGCGTCGTGCACTCCTCG TCATTCCGTGTAACTTCGAACCCCGGTTTGCACCATGTTAGCAGTTTGATCAATATTTACGCTTGAGGGT ACTTGTGGCTACCCGTCGTTGTGTACGAGTCTTTCTAGACGCGAGGTTATCGAAAACGTCCATGCATCCC ATATCGACCCAATTGATAGGCAGCCGTCGCGATGTGCGCATAAAGTTTTAAGCTTCACGTGCCCAGCCAA GAATGACTCGAGGAGAACGGGGTTCGATAGAATGTGGGAACAATTCCTTGTCGAGTTATTGCCCCGCCAG TTTCGAGAGCAGTGAATTTCTGGCTACTCTATAAATCTCAGATCCTCGGAGCAGCATTTCGAAGCAGGAT TTAGTGTGCGCAGCTCTCTTTTGTTATCGACATGTGTTACCTAGTCTCGGTAGATTGTTGTAGATCCTCG CGCATGTGCCAGGAGGCCAACGAATGGCTGACGCCTATGGCTGCAGTCCGGCAAAACCACCCGTAGAAAT TTTAGGGAATAACCTGATCGGTTGGTTGTATCAGCCATCCGACCGAATCGCGTCTGGTCTACCAAGCCCT GTATCGACGCGCGAGAGCACGGAGTAATAGACCCATCCCACAGTCGAATAGGTTGGTCCCGCCGCATTCG AGAGAAATATTTGAGTTGAATGCGAGCCTAAAGGGCCCATAATTGACACGGAGTTGGATCCCGAAACTAC GAGCGGAAGC >gi|00000008| GCGCCGCCGTCAGGGTACATGATACGTATATGCATGTGCCCCCGTTATACTTGTATCTCACTTCGTGATG TGCCTACCTGGACAGTACTTCACAGGATGTAGTGCGATGGAAGCCGCTCACATGGCCGTTGTTTACTAGA ATGTCTCACCGTGCCACCTAGGTTTGTTTGTGTCCGTGAGTGCTATATGAGCTTACACGTGACTCAACGG AAAGTTACAGACCCGCCATAAACTCAACACATCGATCTCTCAAGCCTTCCACCCTTAAAATAAATCTACT TATCGAGGCAACCTGGAAGAGGCCGTTCTGATGAGCTGTGGCATGATAGTACGGACATTGCTAGAACTGA CCATATATTTTCTGTTGAAACTCGTTTCGCACGCCACGGCAGAAAGTATCCGGAATGTCCAGACAGTATT AACAATTGGCGGTCTGAGAAGACCGTTGCCTTGGCATCACTTCCGTGCACTTGGTGCGAAATATTACCGA GGGGGTTGAGGACAGACTCTGGGGGATGACGTGTCGACGGAAGGGATGTCTGAATGCCCACCGACTGAAA CGAACGTAATACGAAACAGGTAAACGACTTTTACACGAAGTCGTCTTCAATATTTTTCGACACTGTTGTG GGCAAGTTAAATCAAAAAACCTACATAGTTCCCCTAAACTTGTGACTATGAACTGACAGTCGCCGTCGCC AAATCAGCTGCTGATAGTAGGGGATATTTAGTTATGCCCTTAGGTATGTCTGTCGAGAATTTTTGCTCTG CTAGGTCATGCGCTCCTGTCGGTTCACTCGTCCAGCTGAACAAACTTCTGCTAATGGACTACAATTTGTA TGCTTGTTATCGGGTCCCGGTCTCGAGACTTCATGCCTTCCCGTGGGTTCGCCCCCGCCATGGGCCAGCT TATGCGGATGTATGTCCACGATTATCTGTACATCCAGTACCAACGCACGGATCAACGTAATCTCTGGCCC CTCATCTCCGTAGGAAGCAGCGTCACGCTACCACCACCCACGGTGATCCCATCCAACAGTGGTCACCAGC CTTTGGGGACGCTTGGCAGGCAACGGCGTGGAATCCGTAAACTCTGTTTTTGTTCCCATGGCTAATTCGA CGAATGGCACGCCCGACCGAGAGGTGGGGTTATCTGATATAGCACAAAAGTGTACATCCCCTAGGACTTT TCCTCATGTATCCAAAAAATCTAGAGGATGTCAACGCACACCTAGCGCGGGTGTGCTTACACCGGCCGGT CGGCTTCGTCGTAATTATATAGGAATGTGGTAGTGCGCTGCTAGCCGTACGGGATCCGAATGCCCTTTAA CTCGGAACTCTGTCGTACGGACCTTGCGGGATCGGGGCCATGCATGATTTGACTGAAGCGCTAGAATTCA ACCGTATACTGACTCCATGGACAGACTGGCTAAACGAAAGTGCCGTTAAAGGGAGAATGCAGGACCCGCA TGAATGTAAGCGTTCCAAGTTTGAGTCGGCTGAACTTAACAGACTGACAAGGAGGGTTGGGCTTATCCTC AGCCAAACGCCCGTTAAAGGCGTACTGCGTTAGTATGAGTCCGTACAAGTTATGCCGGCCGGCACCGGAG GACGAGCACGAGCATACCTATAGAATGACTACGAGTCCCTGTCTAGTAACTATACACTCAGGCACGGACG CCTTTGCCTATGTGGTTAGTTGCCAACTATGAGTACGTGGCCTCTTCTGAAAACTCTAGGAAGCGTGCCA GGTGGGACATTTCGTCCGCGACTTTGGGTATTAACAGGTGCGGGCTCCAAAACAAGACGGTGCAAACCCT TAGTTATGATAGACACCTTTATTACGTCGCAAAACTGGGAACATCTCTGCAGTTTGTGTTTAAGAAATAG TAACCACGACGGGCTTGTCTTAGGTTGGCATCGAAAGGTGCACTTCAACTTTGGAGAGCCAACATGACCC AGGATGATCTGACAGGCAGGGCTGCTGCGCTGGCTGTGTCGGGAATCACGAGGGGGGTAATAATTGCAAA ATTTGTTCCTGAGCTTTTCAAGTGCAGGAGTTTATAAGTACCGTCTATTCAGTACGTTATCGGAGGAGAC GATTGCCGCCTGTCAAATATTAGATCCCAACGTTCGATGTAGTTTCTCTCGGCACAAAGTCCATTTACCC CCTGGAGGTAACGAAGTCAACCGTTATCGGTAAAGTTAAGCATAGACAATAGATGGATCAGAAGGACTGT CCTCTCTCCCTGAAACAGTCGTGCAACGTTCGCCTCGGATTAGAGCAAGAAAATGTATGGCGTACGAGGG CTTTACGACCGGAGAGAGTAAACCCGTCCCAACGGTGCGCGACCGAAATCTGCGCGACCTCTTTGTGAGC TTGCGTCGAAAAGGGTCTGTCAAATTGCTGGATACCTGTCACAACCCCTTACGCCAAGTACGGCCCCAGG CTGGCGTCCCGAGTCAGAGTATAACAGGGTGGCAACTCATCTATAGGGAGAAGTGCTGTGTCCGAAGAGG GTCATGCGCTGAGCTCGGTGTTGCCGGTGAGACAGGTTGGTCAATATCAACATACGGACTCGTCTATCAA CTGGCTCCGGTTGCATGTAACTCGCCTTTTGACAGAAGGATGACTACGACGTTAAGCTTCTTGTCTCCGA GGTCTGTCCGTTTGATGGGGCCGGATGTGATAACAGAACTGAAAGAAAGGATATCAATGAAGACCATGTC CAGCGAGTTTCTGGGTCTACTATCTCGACTCCAATGGGGACGATTGGGGTAGATCGAAGGTATTAGCTCA TAATCAATGTGGGCAGCCCCGGGGGAGGAGCACAGCACGATAGAGCGTGATTTGATGTGTAAGGACCGCG TCTATGAGTTCAGTCGTACCCCGTACGCCTATTTATGAAAAACGGCTAAAAACAAACATTGACGATGTGC TGAAGTTTCTAATGTGACATTTCACCCTAACTCTTGTTGTGTCTCAGTATTGCATGTTGCCGATGGATCT CACAACTACGCTGTTCCTGTATGTTTAAACGGGGACCTTAGACATGGGCCCGGGCGCCGTGGACTCGCGT TTGTATCGGTATCACCGTGGGGGAGCCTACAACCGAGTACCGCAATATTGGACCAAATTGTATGGTCCTC CTGTCTGGAAGACGCTCCCCGCAGACAAACTTTCACGAGTCAGCCCATATCGTAGGGGTCTACTCCCGTC GCAACTACGATCGGACCTGACTATGTCGTCCGACGTACGTGATAAAAAATAATCCACCGTGCGGCAGCAT AGATATTTCCTTACTCGTAATGACTTAGACTGTGAACGTTCCGTACTCGATGGAAGGAACACTCCTCGTG GAATGCCGTTGTCAGCGGCCGTCGTGTCAATGTGAGCTGAGTTTTACCTCGAGTGTTCCATCAATTCTAC CGCTGCGCCAGAAGGTCACGTTAAGGCACCTAATTGACTAGTCGACTCGTAATTCGTCTAGCTTGGTTAG ACCCGTTTAATTAAGTTGGTGCCGCGTGTATAATCTTGACCGGCGCAGTGGAAGCTCCAAGTATGAGTAT GGACTGACTAATGGAGGACTCAGCGTATCCCTACACACGGGTAGTGGGTGTGATGAGCGACTTGCCTGTA CGTTACGGCTACTATGGGACACCGGGGGCGGGCGCCAAACTCTTGGGAGTTCACCGTATTCTATGCTTAC TTTGACAGACAAGTGCTATATTGACTCAGAAACCCCGTCAAATTGAATAGCATATATCGCTGCAGCTTAT GCTGCTCGAGTTGGCACCCGGTATTATGTCCATGACCCAGTCGGGGTCAACAGTGGATCTACCCTCGTGG CCTCGGTCACAAAGGCGGGAGATAATATTGCCGTGGGTATTTGAAAAATCCTCGCATAAAGCTCAAGTGA TTCAGACGGCCATAATCTGTGGCGATTTTCTTACCCGCGCATGCGGGTTCTGTGTACCCCGGACCGTTGA CGACACAATTAACCACGAATTCCTAGGTCATTGGGATAAAGCAGAGAGGTTCTCTGTTTCGACCGTCGCT CCGCCATAGAAACCTAATATGTTGTTCCCTACACTATCTAGACAAGGGCCAGGGGGAACCCATCGACATC GTGTATCGCTAGTGACGAGAGATGAGAAACTAGCCCTTCGCCACAGGTGTGCCCCTTTGTTCCGCCCCTA TGGCGACGTACCGGGACGTAGACAGTGACACTTGTGGCGCAGGACGCAGCTCCCATCACCTTATAATGCC TTATTCGGTAAGGGATCGACGATCGAGGAGGGCTCAGACCTGTGTCCAGTAAATTTTACTTACACGGGAG TGATGCTTGATCGATTACACTAAGGGGCTAGCGCTCCACCTTAGTAGGGCCAGGCATTGCAAGCGTCGTG CACTCTTCGTCATTTCCTGCCGGTTTGCACCATGTTAGCAGATTGATCCATATTTATGCTTGAGGGTACT TGCTAGCCGTCGTTCTGTACGAGTCTTTATAGACGCGATGGTATCGAAAACGTCCATGCATCCCACATCG ACCCAATTGAAAGGCAGCCGTGAGCCTCTGTACATACGTCCGCGAAAAGTTCATTGCGATGTGCGCATAA AGTTGAATGACTCGAGGAGAAAGTACGCCGCAGCACCAGTATTTCGTGGTTCGATAGCATGTGGGGACAA TTCCTTGTCGAGTTATTGCCCCGCCAGATTCGCGAGCAGTGAATTTCTGGCTACTCTATGACTCTCTGAT CCTCGGAGCAGCATTTCGAAGCAGGATTTAGTCTGCGCAGCTCTCTTTTGTTATCGACAAGTCTTACTTA GTCTCGGTAAATTGTTGTAGATCCTCGCGCATGTGCCAGGGGTAGGAGGCAGGCCAACGTATGGCTGACG CCTATGGCTTCAGTCCGTCGAAACCAGCCGTCGAAATTTTATCGTGATGATATTTAATTTAGAATGATAA TAGATTTTCGAGGAATAACCTGATCGGTTGGTTGTCACAGCCATCCGACCGGATCGCGGCCGGTCTACCA AGCACTGTATCGACGCGCGAGAGCACGGAGTAATAGCCCCATCGCACAGTCGAATAGATTGCTCCCGCCG GGAGATTCTAATGCATTCGAGAGAAATATTTGAGTTGAATGCGAGCCTAAACGGCCCATAATTGACACGG AGTTGGATCCAGAAACTACGAGCGGAAGC >gi|00000009| GCGCCGCCGTCAGTGATGTGCCCACCTGGACAGTACTTCAGACGCTGTACGCCGACAAAGGTCGCCCACT GGAGTGGTGTCCCGAGAAGATATCAGTGCGGTGGAAGCCGCTCACATGGCCGTTGTTTACTAGAATGTCT CACCGTGCCACCCAGGTTTGTTTGTGTCCGTGAGTGCTATAAGAGCTTACACCTGATTCAACGGAAAGTG ACAGGCCCGCCATAAACTCAACACATCGTTCTCTCAAGCCCTCCACCCTTAAAATAAATCTACTTATCGA GGCAACCTAGAAGAGACCGTTCTGATGAGCTGTGGCATGATAGTGCGGACATTGTTAGAACTGACCCTAT TTTTTCTGTGGAACCTCGTTTCACACGCCACGGCAGAAAGTATCCGGAATGTCAAGACAGTATTAACAAT TGGCGGTATCAGAAGACCGTTGCCTTGGCAGCACTTTCGTGCACATGGTGCGACATATTACTGAGGGGGT TGAGGGGGGATGACGTGTCGACGGAAGGGACTCGGCAGAGTGGTACCACGTGGTAGGTTTAAGCGCTTTA CTGTGATGTCTGAATGCCCACCGACTTAAAGGGGCGTAATACGAAACAGGTAATATTTTTGGACAATGTT GTGGGCAAGTTAAATCAAAAAACCTACAAAGTTCCCCTAAACTTGTGACTATGAACTGACAAGACTTCGG GGATATTTAGTTATGCCCTTAGGTATGTCTGTCGAGAATCTTTCACTCGTCCAGCTGAACAAAATTCTGC TAATGGACTACACGTTCTATGCTTATTATCGGGTCCTCGTCTCGAGACTTTATGCTTCGCCCCTGCCATG GGCCAGCTTATGCGGCATAATCTTTGGCCCGTCATCTCCGTAGGAAGCAGCGTCCCGCTACCATCACCCA CGGTGAGCCCATCCAACAGAGGTCACCAGCCTTTGGGCACGCTTGGTAGGCAACGGCGTGGGATCCGTAA TCTCTCTTTTTGTTCCCATGACTAATTCGACGAATGGCACGCTGGACCGACAGGTGGCGTTATTATCAGC CTATAGAGCACAAAAGTGTACATCCCCTAGGACTTTTCCTCATGTATCCAAAAAATCTAGAGGATGTCAA CACATAACTAGCGCGGGTGTGCTTACACGGGCCGGTCGGCTTGGTCGTAATTATATAGGAATGTGGTAGT GCGCCGCTAGCCGTACGGGATCCGTCGGGGCCATGCATGATTAGACTGCTGTGCAAGCACTCAACCGTAT ACTGACTCCACGGACATACTGGCTGAACGAAAGTTCCATTAAAGGGAGGATGCGGTACCCACTTAAAAGA CTTACAAGGCGAGTTGGGCTGATCCTCAGCCAAACGCCCGTTAAAGGCCCGGCCGGCCCCGGAGGATGAG CTCTAGGATATCTATAGAATGACTACGAGTGCCTGTTTAGTAACTATACACACAGGCACGGACGCCTTTG GCTCTCTGGTTATTTGCCAACTATGAGTACGTGGGCTCTTCTGAAGACTCTAGGAATCGTGCCAGGTGGG ACTGGGCAGCTCCGACGTTCATAATGTGCCATGTCAAACTCGTCCGCGACTTTGGGTTTTAACAGGAGCG GGCTCCAAAAAATGACGGTGCCAACCCTTAGGTATGAAAGACAACTTTATTACGTCGGAGTAACCACTAC GGGCTTGTCTTAGGTTGGCATCAAAAGGTGCACTACAACTTTGGAGAGCCAACAAGACCCAGGATGGTCT GACAGCCAGGGCTGCTGCGCTGGCTGCGGGAATCACGAGGGGGGTAATAATTGCAAAATTTGATCCTGAG CTTTTCAAGTGCAGGAGTTTATAAGTACCGTGTATTCAGTACGTTGTCGGAGGAGACGATTGTCGCTTGT CAAATATTAGATCCCAACGTTCGATGTAGTTTCTCTCGGCACAAAGTTCATTAACGGAGTCAACCGTTAT CCGTAAGGTTCTCCCCATCAAAGCATAGACAATTGGTTGATCAGAAGGACGCTCCTCTTACCCTAAAACA GGCGTGCAACGTTCGTCTCGGATTAGAGCAAGAAAATGTATGGCGTACGAGGGCTTTACGACCGCAGAGA GTAAACCCGTCCCAACGGTGCGCGACCGAAATCTACGCGACCTCTTTGTGAGCTTGCGTCGAAAAGGGTC TGTCAAGTTGCTCGATACCTGTCACAACCCCTTACGCCAAGTACGCCCCCAGGCTGGCGTCCCGAGTCAG AGTATAAGGGGGTGGCAACTCATCTATAGGGACAACAGCGGTGTCCGAAGAGGGCCATGCGCTGAGCTCG GTGTTGCCGGTGAGATAGGTTGGACAATCTCAACATACGGACTCGTCTATCCCGAGAAGATGAGCCCTGG GTCACTCACCGTGTCTGGACTTTTGCCTCTACCTGGCTTTCATGCAAAATCTGTGGGAGTTTTTCATTCG CCGTTTCGGCTCCCCTTGACATAAACATGTAACTCGCCTTTTGACAGAAGGATTACTACGACCTTATTGA TGGGGACGGCTGTGATAACAGATCTGAAAGAAAGGATATCAATGAAGACCATGTCCAGGGAGTTTCTGGG TATACTATCTCGACTCCAATGGGGACGATTGTGGTAGAACGAAGGAGTTCTTAAAAGATGTTGTAAACAT GGGCAAGTTATTAGCTCATAACCAATGTGCGCTGCCCCTGGGGAGGAGCACAGCACGTTAGAGCGTGATG TGATGTGTAAGGTCCGCGTCTATGAGTTCAGTCGTACCTCATACGCCTATTTATGGAAAACGGCTAAAAG CAAACATTGACGATGTGCTGAAGCTCTTCACGGTAGGCTTGTCTCTTTCGGTTCTAATGCGACCTTTCAC CCAAAGTATACTGGCCGAAATTCTGGCGGACGTTGTGTCTCAGTATCGCATGGCAGTTGATCGCTGCCTA GAGGGGCTGCCTATGAGGATTGTGCGCGTCCTATAGCCCATGTATAAAATCGCCTTGGACTCTCGTTTGT ATCGGTATCTCCGTGGGGGAGCCTACAACCGATTAGAGCAATATTGGACCACATAGTATGGTCCTCCTGT TTATATGAGATCTATTCAGGTCTATCGTTAGTTTTAGCCCGCGCCCCTCTTTGAACGACGCTCCCCGCAG GCAAATTTTCACGAGTCAGCCCATATCGTAGGCGTCGACTCCCGTCGCATGTACCATCGGACCTGACTAT GGCGTCCGACGTACGTGATATAAAAGAATCATGACGAACACACCGTGCGGCTGCATAGATATATCCTTAC TCGTAGTGACTTGGACTGTGAACGTTCTGTACTCGATGGAAGGCACACTCCTGGTGGAATGCCGTTGTCA GCGGCCGTCGTGTCAATGTGAGCAGAGTTTCACCTTGAGTGTTCCATCAATTCTACCGCTGCGCCAGAAG GTCTGGTTAAGGCACCTAATTGACTAGTCGTCTCGACCAAAATGTAATCGGGCGCTAGAACAAAGGCATA ATTCGTCTAGCCGGGTTAGACCCGTTTAATTAATTTCGTGCCGCGTGTATAATCTTGAACGGCGCACTGT AAGCTCTAAGTATGAGTATGGACTGACTAATGGAGGACTCAGCGTAAACCATCCCTACACACGGGTAGTG GGTGTGATCCGGGGGCGGGCGCCGTACTTGATTGGGCTAGCCTGATGGACACAAACTCTTGGTAGTTATA GGGGCCACCGTATTCTATGCTTACTCTGACAGACAAGTGCTATATTGAGGCAGAAACCCCGTCAAATTGA ATAGCATATATCGATGCAGCTTATGCTACTCGAGTTGGCACCCGGTATTATCTCCATGACCAAGCTGGGG GTGCATAGGTAATTGTCGGAAAAAGCAAGTTGGGGTCAACAGTCGATCTACCCTCGAGGCCTCGGTCACA AAGGCGGGAGATAATATTGCCGTGAGTATTTGAAAAATACTCGCATAAAGCTCAAGTGATTCTGACAGCC ATAATCTATGGCGATTTTCTCACCCGCGCATGCGGGTTCTGTGTACCCCGGACCAGTGACGACACAATTA ACCACGAATTCCTAGGTCATTGGGATAAAGCAGAGAGGTTCTCTGTGTCGACCGTTGCTCCGCCATAGAA ACGTAATATGTTGTTCCCTACACTATCTAGACAAGGGCCAGGGGGAACCCATCGACATCGTGTATCGCTA GTGCCGAGACAAGATCATCTCACAATCCCTAGGTTGTGGACTAATCTGAAGCCTAGCCCTTCGCCACAGG TGTGAAGACAGTGACTCCGTAAGGGATCGATGATCGAGGAGGGTTCAGGCCTGTGTCCAGTAAATTTTAC TTACACGGGAGTGATGCTTGCTCGATTACACTAAGGGGCGAGCGCACCACCTTAGTCGGGCCAGGCATTG CAGGGGTCGTGCACTCTTCGTCGTTGCCTGCCGGTTTGCACCGATGTTAGCAGATTGATCCATATTTATG CTTGAGGGCACTTGCGAGCCGCGATGGTATCGAAAACGTCCATGCATCCCACATCGACCAAATTGTAAAG CAGCCTTGAGCCTGTATACATACGTCCGCGAAAAGTTCATTGCACTGTGCCCCTAAAGTTGAATGACTCG ACGAGCAAGTACGCCGCAGCAGAAGTATTTCGTGGTTCGATAGGATTTGAGGACAATTCCCTGTCGAGTT ATAGCCCCGCGAGAATCGTGAGCCGTGAATTTCTGCCTTCGCCCCATCAAAAGACCTGCTGGCTACACTA TGACTCTCTGATCCTAGGAGCAGCATTTCGAAGCAGGATTTAGTCTGCGCTGCTCTCTTGTGTTATCGAC AAGTCTTACTTAGTCTCGGTAAATTGTTGTCGATCCTCGCGCATCTGCCAGGGGTAGGAGGCAGGCCAAC GTATGCCTGACGCCTATGGCTTCAGTCCGTCGCAACCAGCCGTCGACATTTTATCGTTATGATATTTAAT TTAGAGTGATAATAGATTTTCGAGAAAAAAGCTGATCGGTTGTCTGGCACAGCCATCCGACCGGATGGCG GCCGAAGCCTAGTTCCCTTCTGATATAATGTCAACCAAGCACTTTATCGACGCGCGAGACCCATCTCACA GTCGAATAGATTTCTAGTGCATGCGAGAGAAATACTTAAGTTGAATGCGAGCCTAAACGGCCCATAATTG ACACGGAGTGTG >gi|00000010| GCGCCGCCGTCACTGATGTGCCAACGTGGACAGTACTTCAGACGCTGTACGCCGACAAACGTCGCCCATG GAGTGGTGTCCCGAGAATATATGTGTGCGGTGGAAGCCGCCCACATGGCCGTTGTTTACTAGAATGTCTC ACAGGGCCACACAGGTTTGATTGTGTCCGGGAGTGCTATAAGAGCTTACAGCGGCCGAAGAATGCCCTAA CATTAAGATATAGTTCAACGGAAAGTGGGCCGACCAGTTGATAAAATTCTTTACGACAACCCCGCCATAA ACTCAACACATCGTTCTCTCAAGCCCTCCACCCTTAAACTAAATCTATTTATCGGGGCAACCTAGAAGAG ACCGTTACCGCCTATTAAAAAATTTCAGTGATTTTAATCCCGGCTGCTTGAAGATCATGATGAGCTGTGG CATGATAGTGCGGACATTGTTAAAACTGACCCTGGATTAGACTTGTAGGACTTATTTTTTCTGGGGTACC TCGTTTCACACGCCACGGCAGAAAGTATCCGGACGGTCGAGACAGTATTAACAATTGGCGGTATCAGAAG ACCGTTGCCATGGCAGCACTTTAGTGCACATGCTGCGACATATTACTGAGGGGGTTGAGGGGCGATGACG TGTCGACGCAAGGGACTCAGCAGAGTGGTACCATGTGGTAGTTGCTTTACTGTGATGTATGACTGCCCAC CGACTTTAAGGGGCGTAATACGAAACAGGTAATATACATGTTGCCAAAGTTCCCCTGAACATGTGCCTAT GAACTGACAAGTCGAATCTTTTACTTGGCGAGATGAACAAGATGGACTGCACTTTCGATGCTTATTATCG GGTACTCGTCTCGAGACTATATGCTTCGCCCCTGGCATGTGCCAACTTAAGCGGCATAATCTTTGGCCCT TCACCTACGTCTGAAGCAGCATCCCGCTACCATCCCCCACGGTGAGCCCTTCTAACAGAGGTCAACAGCC TTTGGGCACGCTTGGTACGAAACGGCGTGGGATCCGTAATCTCACTTGTTGTTGCCATGACTAGTTCGAA GAATGGCACGCTCGACCGACCGGTGGCGTTATTATGAGCCTATAGAGCACTTTTCCTCATGTATCCAAAA AATCTAGAGTATGTCAACACATAACAAATGCGGCTGTGCTTACACGGGCCGTTCGGCTTGGTCGGAAGTA TACAGGTATGTGGTAGTGCGCCGCTAGCCGTATGGGATCCGTCGGGGCCATGCATGTATATGCCCCTTGA TGAACAGCAGCTGTGCAAGCACTCAACCGTATACTGACTGGGACGCGCCACTGACATACTGGCTGTACGA AAGTTCCATTAAAGGGAGGATACGGTACCCACTTAAAAGCCTTACAAGCCGAGATGGGCTGATCCGTATG CGTTGAACTATCCAGGAGGCGCCCGGTGTTAAGTAATAGTCTCATCCAAACGCCCCTTAAAGGCACGGCC GGCCCCGGAGGATGAGCTACAGGATATCTAGAGACTCTGTCACTGACGCCTTTGGCTCTCTGGTTATGTG CCAACTATGAGTACGTGGGCTCTTCTGAACACTCTAGGAATCGTGCCAGGTGGGACTGGGCAGCTCCGCG TCCGCGACTTTGGGTTTTAACAGCACCGGGCTCGAAAAAATGACGGTGCCAACCCTTAGGTAGGAAAGAC AACTTCATTACGTCAGCGTAACCACTACGCGCTTGTATTAGGTTGGCATCAAAAGATGCACTACAACTTT GGAGTGCCAACAAGACCCAGGATGGTCTGACAGCCAGTGCTGCTGGGCTCGCTGCGGGTATCATGAGGGG GGTAATAAATGCGAAATTTGATCCTGAGCTCTTCAAGTGGAGGAGTTTATCAGTACCGTGCATTCAGTAC GTTGTCGGAGGAGACGATAGTCACTTGTCAAATATTAGATCCCAAAGTTCGATGTAGTTTCTCTCGGCAC AAAGTTCATTCGCGGAGTCAACAGTTATCCGTGAGGTTCTCCCCATCAAAGCATTGACAATTGGTTGATC AGAAGGACGCTCCTCTTACCCTAAAACAGGCGTGCAACGTTCGTCTCCGAACTGATCACATAGTTGGGAG GTGGCGAGAGTCATTAGCCACAAAAATGTGGATTAGGTCCTAACGGTGCGCGACCGAAAGCTACGCAAGC TCTTTGTGAGCTTGCGTACGCCAAGTACGCCTCCAGGCTGGCGTCCAGAGTCAGAGTATAAGGGGGTGGC AACTCATCTATAGGGGCTTATTGTCCTTCTCTATGGCTGAGGGCCATGCGCTGAGCTCGCTGTTGCCGGT GAGAAAGGTTGGACATACTCAACATACGGAGTCGTCTATCCCGAGAAGATGAGCCCCATGGTCATTGGGT CACTCACCGTGTCTGGACTTTGGCTGCCGAGAAGTGACCAGGCTGTAACGGCACGTTGCCCCTACCTGGG TTTGATGTAAAATCTGCGGGGGTTGTTCATTTGCCGTTTCGGCTCCCCTTGACATAAACATGTAACTCGC CTTTTGACAGAAGGATTACTATGACCTTATTGCTCGGTACCGCTGTGATAACAGATCTGAAAGAATGTAA ATCAATCAAGACCATGTCCAGGGAGCTTCTGGGTATACTATCTCGACTCCAATGGGGGACGATTGCGGGC GAACGAAGGAGTTCGTAAACGATGTTCTAAACATGGGCAAGTTATTAGCTCATAACCAATGTTCGCTACC CCTGGGGATAAGCACACCACGTTAGAGCGTGATGTGATGTGTAAAGTCCGCGTCTATATTTATGGAAAAC GGCTAATAGCAAACATTGACGATGTGCTGAAGATCTTCACGGTAGGCTTGTCTGTTTCGGTTCTAATGCG ACCTATCACCCAAAGTATCCTGGCAGCATGGTCAAATACACTGTATCATCGCGCTAAAATTGTATACTAT TGTCCGAAATTTTGGAGGACGGCAGTCCTCGACCCAACACTCATACCACTTTGTGTCCAAGTATCGCATG GCAGTTGATCGCTGCCTAGATGGGCTGCCTTTGAGGATTGTGCGCGTCCTATAGCCCATGTATAAAATCG CCTTGGACTCTTGTGTGTATCGGTACCTCCGTGGGGAAGCCTACAGCCGATTAGAGCAATATTGGCCCAC ATAGTATGGTCCTCCTGTTTATATGAGATCTATTCAGGTCTATCACGACGCTTCCCGCAGGCAAATTTTC ACGAGTCAGCCCATATCGTAGGCGTCGACTCCCGTCGCGTGTACCATCGGACCTGACTATGGCGTCCTAC GTACGTGATATACAAGAATCATGACGAACACAACACCTCCAGAATTCGGATGTTCGTTAGGGGGATTGGC ATTCCGTGAGGCTTTCTGTACTCGGTGAAAGGCACACTCCTGGTGGAATGCCGTTGTCAGCGTACGTCCC GTCAACGTAAGCAGAATTTCACCTTTAGTGTTTCAGCCGATCAAAGATAAGGTCAAAGCAGTCCCGTGTG GTTTTTCCCATCAATTCTACCGCTCCGCCCGAAGTTCTGGTTAAGGCACCTAATTGACTAGTCGTCTCGA CCAAAATGTAATAACCGGGTTAGGCCCGTTTAATTAATTTCGGGCCGCGTGTATAATCTTGAAGGGCGCA CTGTAAGCTCTAAGTATGAGTAAGGACTGACTAATGGAGGACTCAGCGTAAACGATCGCTACACACGGTT AGTGGGTGTGATCCGCGGGCGGGCTCCGTACTTGATTCGGCTAGCATATACTGAATGGTGCGTGTCGGTT GGTGTAACATCACATTTCCGTCCTCTATGGTGGCTTTGGGAGCAGCGGAGGTCTGAGGGACACAAACTCT TGGTAGTGATAGGGGCCACCGGATGCTATGCTTAGTTTGACAGACGAGTGCTATATTGAGGCAGAAACCC GGGCAAATTCAATAGAATATATCGATGCAGCTTATGCTACATGTGTCGAGTTGGCATCCGTTATTATCTC CATGACCATCATGCTGGGAGTGAACGCTACTTCGTAGCCGGTCCAATGCGGTAAAAAGCAAGTTGGGGTC AACTGTTGATCTACCCTCGAGGCCTCAGTCACAAAGGCGGGAGATAATATTGCCGTGAGTATTTGAAAAA TACTCGCATAAAGCTCAAGTGATTCTGACAGCCATAATCAATGGCGATTTTCTCACCCGCACATGCGGGT TCTCTGAACCCCGGACCACTGACGACACAATTAACCACGAATTCCTTGGTCATTGGAGTAAAGCAGAGAG GTCCTCTGTGTCGACCGTTGCTCCGCCATAGAAACGTAATATGTTGTTCCCTTCGCTATGACAAGGGCCA GGGGGCACCCATCTGCATCGTGTATCGCTAGTGCTGAGACAAGATCCTCTCGCAAACCCTAGGTAGTGGA CTAATCTCAAGCCCAGCCCTTCGCCACAGATGTGTCTAAACCGACAACCATACAACGATACGTATTAGAC AGTGAATCCGTAAGGGATCGTTGATCGAGGAGGGTTGAGGCCTGTGTCTAGTAAGTTTTACTTACACGGG AGTGATGCTTGCTCGATCACACTAAGGGGCGATCGCACCACCCCAAGAATTTAAGTCGGGCCAGGCTTTG AAGGGGTCGTGCACTGTTCGTCGTTGCCTTGGTTGTCAAAGTTACTTCTATTTCACCTAAAGTTCAGTCT ATGTAAGTGGAAGGGTACTTGCGAGCCGCGATGGTGTGAAAACGTGCATGCATCCCACATCGACCAAATT GCAAAGCATCCTTGAGCCTGTATACATACGTCCACGATAAGTTCATTGCACTGTGCCCCTAAAGTTGAAT GACTCGACGAGCAAGTACGCCGTAGCAGAAGGATTTCATGGTTCGATTGGATTTGAGGACAATCCCCTGT CGGGTTATAGCCCCGCGAAAATCGTGAGCCGTAAATTTCTGCCTTCCCCCCATCAAAAGACATCCTGGTT ACACTATAACTCTCTGATCCTAGGAGCGGCATTTCGAAGCAGGACTTAGTCTGCTCTGCTCTCTTGTGTT ATCGACAAGTCTTTCTTAGTCTCGGTAAATTGTTGTCCATCCTAGCGCATCTGACAGTGGTAGGAGGCAG GCCAACGTATGCCTGACGCCTATGGCTTCGGTCCGTCGCAACCCGCCGTCGACATTTTATGGTTATGATA ATTAATTTACAGTGATAATAGATTTACGAGAAATACGATGATCGGTTGACTGGCACAGCCATCAGACCGG ATGCCAGCGGCCGAAGCCTAATTCCCTTCTGATATAATGTCAACCAAGCACTTTATCAGGAAAGATCTGT TCCATTGAGCGAGGGGGCCATGAAGCAGCCGTGGTCGAGAGAAATACGCGCGAGACGAATCTCACAATCG AATAGATTTCTAGTGCATGCGAGAGAAATACTTACCTGAGCACATATTAAATGCCGGAGTTGTAATTGAC ACGGAGTGTG clustalw-mpi-0.15/Makefile0000644000411000001440000000254110350465702014040 0ustar liusersinstall: clustalw-mpi clean: /bin/rm -f *.o OBJECTS = clustalw-mpi.o interface.o sequence.o showpair.o malign.o malign_mpi_progressive.o \ malign_mpi_pdiff.o util.o trees.o gcgcheck.o \ prfalign_mpi_pdiff.o prfalign_mpi_progressive.o pairalign_new.o \ calcgapcoeff.o calcprf1.o calcprf2.o calctree.o \ readmat.o alnscore.o random.o parallel_compare.o \ prfalign.o stupid.o HEADERS = general.h clustalw.h CC = mpicc TREES_FLAG = -DSERIAL_NJTREE # # Choose either dynamic or static scheduling for pairalign(): # the default is to use dynamic scheduling. You might also # want to check PAIRALIGN_NCHUNK in clustalw.h. The # larger this marco, the smaller the chunk size, default is 100. # PAIRALIGN_FLAG = -DDYNAMIC_SCHEDULING_PAIRALIGN #PAIRALIGN_FLAG = -DSTATIC_SCHEDULING_PAIRALIGN CFLAGS = -c -g #CFLAGS = -c -O3 -funroll-all-loops LFLAGS = -lm clustalw-mpi: $(OBJECTS) $(CC) -o $@ $(OBJECTS) $(LFLAGS) interface.o : interface.c $(HEADERS) param.h $(CC) $(CFLAGS) $*.c readmat.o : readmat.c $(HEADERS) matrices.h $(CC) $(CFLAGS) $*.c trees.o : trees.c $(HEADERS) dayhoff.h $(CC) $(TREES_FLAG) $(CFLAGS) $*.c pairalign_new.o : pairalign_new.c $(HEADERS) $(CC) $(PAIRALIGN_FLAG) $(CFLAGS) $*.c parallel_compare.o : parallel_compare.c $(HEADERS) dayhoff.h $(CC) $(PAIRALIGN_FLAG) $(CFLAGS) $*.c .c.o : $(CC) $(CFLAGS) $? clustalw-mpi-0.15/README.clustalw-mpi0000644000411000001440000001375011015472123015676 0ustar liusers****************************************************************************** CLUSTALW-MPI: ClustalW Analysis Using Grid and Parallel Computing based on ClustalW, the multiple sequence alignment program (version 1.82, Feb 2001) Please send bug reports, comments etc. to kbli@ym.edu.tw (Kuo-Bin Li) ****************************************************************************** COPYRIGHT and LICENSING POLICY CLUSTALW-MPI is freely available to the user community. You can redistribute it and/or modify it. Since CLUSTALW-MPI was derived from CLUSTAL W, the original license policy of CLUSTAL W is listed here: Clustal W is freely available to the user community. However, Clustal W is increasingly being distributed as part of commercial sequence analysis packages. To help us safeguard future maintenance and development, commercial distributors of Clustal W must take out a NON-EXCLUSIVE LICENCE. Anyone wishing to commercially distribute version 1.81 of Clustal W should contact the authors unless they have previously taken out a licence. ****************************************************************************** ClustalW is a popular tool for multiple sequence alignment. The alignment is achieved via three steps: pairwise alignment, guide-tree generation and progressive alignment. ClustalW-MPI is an MPI implementation of ClustalW. Based on version 1.82 of the original ClustalW, both the pairwise and progressive alignments are parallelized with MPI, a popular message passing programming standard. The pairwise alignments can be easily parallelized since the many alignments are time independent on each other. However the progressive alignments are essentially not parallelizable because of the time dependencies between each alignment. Here we applied the recursive parallelism paradigm to the linear space profile-profile alignment algorithm. This approach is more time efficient on computers with distributed memory architecture. Traditional approach that relies on precomputing the profile-profile score matrix has also been implemented. Results shown the latter is indeed more appropriate for shared memory multiprocessor computer. The software is available at http://kmlvli.com/kuobin/clustalw-mpi/ The original ClustalW/ClustalX can be found at ftp://ftp-igbmc.u-strasbg.fr. REFERENCE --------- Kuo-Bin Li (2003) "ClustalW-MPI: ClustalW Analysis Using Distributed and Parallel Computing", Bioinformatics, in press. INSTALLATION (for Unix/Linux) ------------ This is an extremely quick installation guide. 1. Make sure you have MPICH or LAM installed on your system. 2. Unpack the package in any working directory: tar xvfp clustalw-mpi-0.1.tar.gz 3. Take a look at the Makefile and make the modifications that you might desire, in particular: CC = mpicc CFLAGS = -c -g or CFLAGS = -c -O3 4. Build the whole thing simply by typing "make". 5. If you wanted to use serial codes to compute the neighbor-joining tree, you would have to define the macro "SERIAL_NJTREE" when compiling trees.c: CFLAGS = -c -g -DSERIAL_NJTREE This macro is defined in the default Makefile. That is, to use MPI codes in neighbor-joining tree, you have to "undefine" the macro "SERIAL_NJTREE" in your Makefile. SAMPLE USAGE (for Unix/Linux) ------------ 1. To make a full multiple sequence alignment: (using one master node and 4 computing nodes) %mpirun -np 5 ./clustalw-mpi -infile=dele.input %mpirun -np 5 ./clustalw-mpi -infile=CFTR.input 2. To make a guide tree only: %mpirun -np 5 ./clustalw-mpi -infile=dele.input -newtree=dele.mytree %mpirun -np 5 ./clustalw-mpi -infile=CFTR.input -newtree=CFTR.mytree 3. To make a multiple sequence alignment out of an existing tree: %mpirun -np 5 ./clustalw-mpi -infile=dele.input -usetree=dele.mytree %mpirun -np 5 ./clustalw-mpi -infile=CFTR.input -usetree=CFTR.mytree 4. The environment variable, CLUSTALG_PARALLEL_PDIFF, could be used to run the progressive alignment based on the parallelized pdiff(). By default the variable CLUSTALG_PARALLEL_PDIFF is not set, and the progressive alignment will be parallelized accroding the structure of the neighbor-joining tree. However, parallelized pdiff() will still be used in the later stage when prfalign() tries to align more distant sequences to the profiles. If you don't understand this, simply leave the variable unset. KNOWN PROBLEM ------------ 1. On Intel IA32 platforms, slightly different neighbor-joining trees might be obtained with and without enabling the compiler's optimization flags. This is due to the fact that Intel processors use 80-bit FPU registers to cache "double" variables, which are supposed to be 64-bit long. With '-O1' or above optimizer flag, the compiler would not always immediately save the variables involved in a double operation back to memory. Instead, intermediate results will be saved in registers, having 80-bit of precision. This would cause problem for nj_tree() because it is sensitive to the precision of floating point numbers. Solutions: (1) Other platforms, including Intel's IA64, don't seem to have this problem. or (2) Building "trees.c" with optios like the below: (potentially with high performance overhead) %gcc -c -O3 -ffloat-store trees.c // GNU gcc %icc -c -O3 -mp trees.c // Intel C compiler or (3) Decalring relevant variables as "volatile" in nj_tree(): volatile double diq, djq, dij, d2r, dr, dio, djo, da; volatile double *rdiq; rdiq = (volatile double *)malloc(((last_seq-first_seq+1)+1)* sizeof(volatile double)); ... ... free((void*)rdiq); ACKNOWLEDGEMENTS ------------ 1. In parallel_compare.c: "fprintf(stderr, ..." changed to "fprintf(stdout,....". Thanks to Ville Silventoinen . Last modified: May 23, 2008 clustalw-mpi-0.15/History0000644000411000001440000000371211015472013013755 0ustar liusersclustalw-mpi-0.15 1. bug fix version 0.15 released on May 23, 2008. In the previous version, memory crash may occur if the length of a sequence's name is longer than 60 characters. This was defined in "clustalw.h" by the typedef macro of stree. /***** Original code ***************************************************/ typedef struct node { /* phylogenetic tree structure */ struct node *left; struct node *right; struct node *parent; float dist; sint leaf; int order; char name[64]; } stree, *treeptr; /***********************************************************************/ In version 0.15, we have changed the definition to "char names[MAXNAMES]". It means you must re-define macro MAXNAMES in order to process FASTA file with sequence names longer than the default length. /***** New code ***************************************************/ typedef struct node { /* phylogenetic tree structure */ struct node *left; struct node *right; struct node *parent; float dist; sint leaf; int order; char name[MAXNAMES]; } stree, *treeptr; /***********************************************************************/ clustalw-mpi-0.14 1. bug fix version 0.14 released on dec 19, 2005. (1) in the old version, "mpirun -np 3 ./clustalw-mpi two.seq" would hang provided the input file "two.seq" contains only two sequences. This bug has been fixed. Details: in pairalign_new.c: if the number of slaves is greater than N*(N-1)/2, we will MPI_Send the sequence data to N*(N-1) slaves only. This is to prevent the idled slaves not being able to get out of their while(1) loop, see "parallel_compare.c". clustalw-mpi-0.12 1. Fixed a bug that would hang the program when using "mpirun -np 2 ./clustalw-mpi". clustalw-mpi-0.11 1. Fixed a bug that would hang the program when using "mpirun -np 1 ./clustalw-mpi".