SMILEv1.47/0000755002404200237300000000000010217767251012136 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS/0000755002404200237300000000000010114607737013255 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS/bin/0000755002404200237300000000000010114605452014015 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS/obj/0000755002404200237300000000000010114605452014017 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS/src/0000755002404200237300000000000010217767132014044 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS/src/.deps/0000755002404200237300000000000010066542217015052 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS/src/.deps/alea.P0000644002404200237300000000273410066542217016103 0ustar lamaaoc00000000000000alea.o: alea.c /usr/include/stdio.h /usr/include/features.h \ /usr/include/sys/cdefs.h /usr/include/gnu/stubs.h \ /usr/lib/gcc-lib/i586-mandrake-linux/2.95.3/include/stddef.h \ /usr/lib/gcc-lib/i586-mandrake-linux/2.95.3/include/stdarg.h \ /usr/include/bits/types.h /usr/include/libio.h \ /usr/include/_G_config.h /usr/include/bits/stdio_lim.h \ /usr/include/bits/stdio.h /usr/include/stdlib.h \ /usr/include/sys/types.h /usr/include/time.h /usr/include/endian.h \ /usr/include/bits/endian.h /usr/include/sys/select.h \ /usr/include/bits/select.h /usr/include/bits/sigset.h \ /usr/include/sys/sysmacros.h /usr/include/alloca.h \ /usr/include/unistd.h /usr/include/bits/posix_opt.h \ /usr/include/bits/confname.h /usr/include/getopt.h alea.c : /usr/include/stdio.h : /usr/include/features.h : /usr/include/sys/cdefs.h : /usr/include/gnu/stubs.h : /usr/lib/gcc-lib/i586-mandrake-linux/2.95.3/include/stddef.h : /usr/lib/gcc-lib/i586-mandrake-linux/2.95.3/include/stdarg.h : /usr/include/bits/types.h : /usr/include/libio.h : /usr/include/_G_config.h : /usr/include/bits/stdio_lim.h : /usr/include/bits/stdio.h : /usr/include/stdlib.h : /usr/include/sys/types.h : /usr/include/time.h : /usr/include/endian.h : /usr/include/bits/endian.h : /usr/include/sys/select.h : /usr/include/bits/select.h : /usr/include/bits/sigset.h : /usr/include/sys/sysmacros.h : /usr/include/alloca.h : /usr/include/unistd.h : /usr/include/bits/posix_opt.h : /usr/include/bits/confname.h : /usr/include/getopt.h : SMILEv1.47/P_BLOCS/src/global_fonctions.c0000644002404200237300000003054610066543662017544 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include void Init_All(unsigned char *Alphabet,int Joker,int nb_sequence) { int i; int l; initBitTab(nb_sequence); Init_Allocateurs(); l=strlen((const char *)Alphabet); if (Joker) { for(i=0;i<255;i++) Translation_Table[i]=0; for(i=0;idebut & LEAF_BIT)) return ; N->fils[Translation_Table[Sequence[F->sequence_number][F->debut & LEAF_BIT_INV]]] = F; } Noeud *Get_Child_Start_Letter(Noeud *N,int indice) { if ((Translation_Table[Sequence[current_sequence][indice]] == 255) || (!N) || (N->debut & LEAF_BIT)) return NULL; return N->fils[Translation_Table[Sequence[current_sequence][indice]]]; } int seg_taille(Noeud *N) { if ( N->debut & LEAF_BIT) { if ( getValue(Liste_positions_fin,((Feuille *)N)->fin_deb) == -1) return (global_indice- ((((Feuille *)N)->debut) & LEAF_BIT_INV)); else return ( getValue(Liste_positions_fin,((Feuille *)N)->fin_deb) - ((((Feuille *)N)->debut) & LEAF_BIT_INV)); } else { if (N->fin == -1) return global_indice - N->debut; else return (N->fin - N->debut); } } Noeud *Add_Fast_String(Noeud *N,int deb,int fin,int *type,Noeud **pere) { Noeud * res, *tmp_n; Feuille *tmp_f; int tmp_i,res_d; if (!N) return NULL; if (N->debut & LEAF_BIT) { *type = 2; /* Extension d'un feuille. */ if (seg_taille(N)<(fin -deb)) { N->debut = (deb - (getValue(Liste_positions_fin,N->fin) - (N->debut & LEAF_BIT_INV)))| LEAF_BIT; setListeValue(Liste_positions_fin,((Feuille *)N)->fin_deb,fin); } else if ((seg_taille(N)==(fin -deb)) &&((getValue(Liste_positions_fin,((Feuille *)N)->fin_deb))!=-1)) { if ((N->sequence_number!=current_sequence) || (getValue(Liste_positions_fin,((Feuille *)N)->fin_deb) != fin)) { N->debut = deb | LEAF_BIT; if (N->sequence_number==current_sequence) { Ajoute_Position_Liste(Liste_positions_fin,&(((Feuille *)N)->fin_deb),fin,0); } else { Ajoute_Position_Liste(Liste_positions_fin,&(((Feuille *)N)->fin_deb),fin,1); addBitTabValue(&(((Feuille *)N)->sequences),current_sequence); N->sequence_number = current_sequence; } } } return N; } res = Get_Child_Start_Letter(N,deb); if (res == NULL) { *type = 1; /* Creation d'une feuille. */ tmp_f = Alloc_Feuille(); tmp_f->debut = deb | LEAF_BIT; Ajoute_Position_Liste(Liste_positions_fin,&(tmp_f->fin_deb),fin,0); *pere = N; Ajoute_Fils_Au_Noeud(N,(Noeud *)tmp_f); return (Noeud *)tmp_f; } tmp_i = seg_taille(res); res_d = res->debut & LEAF_BIT_INV; if (deb + tmp_i >= fin) { if (Translation_Table[Sequence[current_sequence][fin-1]] == Translation_Table[Sequence[res->sequence_number][res_d + (fin - deb) - 1]]) { *type = deb-fin; if (res->debut & LEAF_BIT) { if ((getValue(Liste_positions_fin,((Feuille *)res)->fin_deb)!=-1) && ((fin - deb) == seg_taille(res))) { if ((res->sequence_number != current_sequence) || (getValue(Liste_positions_fin,((Feuille *)res)->fin_deb) != fin)) { ((Feuille *)res)->debut = deb | LEAF_BIT; if (res->sequence_number != current_sequence) { Ajoute_Position_Liste(Liste_positions_fin,&(((Feuille *)res)->fin_deb),fin,1); addBitTabValue(&(((Feuille *)res)->sequences),current_sequence); res->sequence_number = current_sequence; } else Ajoute_Position_Liste(Liste_positions_fin,&(((Feuille *)res)->fin_deb),fin,0); } } } return N; } else { tmp_n = Alloc_Noeud(); tmp_n->debut = res_d; tmp_n->fin = res_d + fin - deb - 1; tmp_n->sequence_number = res->sequence_number; tmp_n->suffixe_link = N; tmp_n->fils[Translation_Table[Sequence[res->sequence_number][tmp_n->fin]]]=res; if (res->debut & LEAF_BIT) { res->debut = tmp_n->fin | LEAF_BIT; } else res->debut = tmp_n->fin; tmp_f = Alloc_Feuille(); *pere = tmp_n; tmp_f->debut = (fin - 1) | LEAF_BIT; Ajoute_Position_Liste(Liste_positions_fin,&(tmp_f->fin_deb),fin,0); N->fils[Translation_Table[Sequence[tmp_n->sequence_number][tmp_n->debut]]]=tmp_n; tmp_n->fils[Translation_Table[Sequence[tmp_f->sequence_number][tmp_f->debut&LEAF_BIT_INV]]]=(Noeud *)tmp_f; *type = 3; return (Noeud *)tmp_f; } } *pere = N; return Add_Fast_String(res,deb+tmp_i,fin,type,pere); } int compare_string(int d1,int f1, int d2,int f2) { int i,j; for(i=d1,j=d2;(idebut & LEAF_BIT) { *nb_feuilles = *nb_feuilles + 1; if (affichage){ printf("Feuille (%p): %d->(",N,N->debut & LEAF_BIT_INV); tmp = ((Feuille *)N)->fin_deb; while(tmp != LISTE_END) { printf("%d,",(getValue(Liste_positions_fin,tmp)!=-1)?getValue(Liste_positions_fin,tmp):-global_indice); tmp = getIndiceSuivant(Liste_positions_fin,tmp); if (tmp & LISTE_CHANGE_BIT) printf("-,"); tmp = tmp & LISTE_CHANGE_BIT_INV; } printf(") ["); printBitTab(((Feuille *)N)->sequences); tmp = getValue(Liste_positions_fin,((Feuille *)N)->fin_deb); d=N->debut & LEAF_BIT_INV; f=(tmp==-1)?global_indice-1:tmp-1; printf("] {%d}= (",nbSequenceInBitTab(((Feuille *)N)->sequences)); for(j=d;j<=f;j++) putchar(Sequence[N->sequence_number][j]); printf(")\n"); } } else { *nb_noeud = *nb_noeud + 1; if (affichage) { printf("Noeud (%p): %d->%d (",N,N->debut,N->fin); d=N->debut; f=(N->fin-1)>=0?N->fin-1:0; for(j=d;j<=f;j++) putchar(Sequence[N->sequence_number][j]); printf(") ["); printBitTab(N->sequences); printf("]{%d}\n%s sl : %p\n%s fils :\n",nbSequenceInBitTab(N->sequences),indent,N->suffixe_link,indent); strcat(indent,"\t"); } for(i=0;ifils[i]) { *nb_fils=*nb_fils + 1; if (affichage) printf("%s--> %c :",indent,Sequence[N->fils[i]->sequence_number][N->fils[i]->debut & LEAF_BIT_INV]); Print_Tree_Indent(N->fils[i],nb_noeud,nb_feuilles,nb_fils,indent,affichage); } } indent[strlen(indent)-1]='\0'; if (affichage) printf("%s fin fils\n",indent); } } void Print_Tree(Noeud *N,int affichage,int stat) { char *indent = (char *)malloc(1000); int nb_noeud=0,nb_feuilles=0; int nb_fils=0; indent[0]='\0'; Print_Tree_Indent(N,&nb_noeud,&nb_feuilles,&nb_fils,indent, affichage); if (stat) { printf("nombre de Noeud (%d + %d) : %d \n" "nombre de Feuille (%d) : %d \n" "nombre de fils : %d \n" "nombre de fils/Noeud : %f \n" "cardinal de l'alphabet : %d \n", (int)sizeof(Noeud),(int)sizeof(Noeud *)*ALPHA_CARD,nb_noeud, (int)sizeof(Feuille),nb_feuilles, nb_fils,(double )((double )nb_fils/(double )nb_noeud), ALPHA_CARD); } free(indent); } void Print_Liste(Liste *liste) { Liste *tmp = liste; while(tmp) { printf("( %p )->",tmp->feuille); tmp = tmp->suiv; } printf("\n"); } /* Recursif!!!! */ Noeud *FindString(Noeud *N,int deb,int fin,Noeud **pere,int *restant,int *pos_in_edge) { Noeud *res; int start,end; int i,j; if (N->debut & LEAF_BIT) /* Si N est un feuille. */ { start = N->debut & LEAF_BIT_INV; end = getValue(Liste_positions_fin,((Feuille *) N)->fin_deb); for(i = start,j=deb; ( (isequence_number][i] != Sequence[current_sequence][j] ) { *restant = j - fin ; /* <0 */ *pos_in_edge = i - start ; return N; } if ((i==end) && (j==fin)) { /* Pile poil... C'est la */ *restant = 1; *pos_in_edge = 0; return N; } if (i==end) { *restant = 2; *pos_in_edge = 0; return N; } *restant = 3; *pos_in_edge = 0; return (N); } res = Get_Child_Start_Letter(N,deb); if (res == NULL) { *restant = deb - fin ; /* <0 */ *pos_in_edge = -1; return N; } if (res->debut & LEAF_BIT) { *pere = N; return FindString(res,deb,fin,pere,restant,pos_in_edge); } start = res->debut; end = res->fin; *pere = N; for(i = start,j=deb; ( (isequence_number][i] != Sequence[current_sequence][j] ) { *restant = j - fin; /* <0 */ *pos_in_edge = i - start; return res; } if ((i==end) && (j==fin)) { *restant = 1; *pos_in_edge = 0; return res; } if (i==end) { return FindString(res,deb + end - start,fin,pere,restant,pos_in_edge); } *restant = 3; *pos_in_edge = 0; return (res); } void UpdateBit_TabForAllTree(Noeud *N) { int i,j; if (N==NULL) return; if (N->debut & LEAF_BIT) return; for(i=0;ifils[i]); j=0; while(N->fils[j]==NULL) j++; if (N->fils[j]->debut & LEAF_BIT) CopyBitTab(&(N->sequences),((Feuille *)N->fils[j])->sequences); else CopyBitTab(&(N->sequences),N->fils[j]->sequences); for(i=j+1;ifils[i]) { if (N->fils[i]->debut & LEAF_BIT) fusionneBitTab(&(N->sequences),((Feuille *)N->fils[i])->sequences); else fusionneBitTab(&(N->sequences),N->fils[i]->sequences); } } N->nb_element_bt = nbSequenceInBitTab(N->sequences); } void Print_BTTree_Debug(Noeud *N,int *nb_noeud) { int i,j,max; if (N==NULL) return; if (N->debut & LEAF_BIT) { *nb_noeud = *nb_noeud + 1; printf("%d\t",*nb_noeud); max = getValue(Liste_positions_fin,((Feuille *)N)->fin_deb); for(j=N->debut & LEAF_BIT_INV;jsequence_number][j]); printf("\t"); printf("%d",nbSequenceInBitTab(((Feuille *)N)->sequences)); printf("\t"); printBitTab(((Feuille *)N)->sequences); printf("\n"); return; } *nb_noeud = *nb_noeud + 1; printf("%d\t",*nb_noeud); for(j=N->debut ;jfin;j++) printf("%c",Sequence[N->sequence_number][j]); printf("\t"); printf("%d",nbSequenceInBitTab(N->sequences)); printf("\t"); printBitTab(N->sequences); printf("\n"); for(i=0;ifils[i],nb_noeud); } SMILEv1.47/P_BLOCS/src/allocateurs.c0000644002404200237300000001357410066543642016540 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include Allocateur Noeud_Alloc; Allocateur Feuille_Alloc; Liste *FREE_LISTE=NULL; Liste *ALLOC_LISTE=NULL; #ifdef DEBUG_J_ALLOC int nb_alloc_alloc_cell=0; int nb_liste_cell=0; #endif Alloc_Cell *Alloc_Alloc_Cell(int obj_size) { Alloc_Cell *cel; #ifdef DEBUG_J_ALLOC nb_alloc_alloc_cell++; #endif cel = (Alloc_Cell *) malloc(sizeof(Alloc_Cell)); if (cel == NULL) { fprintf(stderr,"No Enougth space\nProgram Abord\n"); exit(-1); } cel->suivant = NULL; cel->data = (unsigned char *)malloc(getpagesize()); if (cel->data == NULL) { fprintf(stderr,"No Enougth space\nProgram Abord\n"); exit(-1); } cel->current=0; cel->max = getpagesize()/obj_size; return cel; } Noeud *Alloc_Noeud(void ) { int i; Noeud *tmp; if (Noeud_Alloc.last->max==Noeud_Alloc.last->current) { Noeud_Alloc.last->suivant = Alloc_Alloc_Cell(sizeof(Noeud)); Noeud_Alloc.last = Noeud_Alloc.last->suivant ; tmp = (Noeud *)(Noeud_Alloc.last->data ); Noeud_Alloc.last->current+=1; } else { tmp = (Noeud *)(Noeud_Alloc.last->data + sizeof(Noeud)*Noeud_Alloc.last->current); Noeud_Alloc.last->current+=1; } tmp->debut = 0 ; tmp->fin = 0 ; tmp->suffixe_link = NULL; tmp->fils = (Noeud **)malloc(sizeof(Noeud *)*ALPHA_CARD); tmp->sequence_number = current_sequence; if (!tmp->fils) { fprintf(stderr,"No Enougth space\nProgram Abord\n"); exit(-1); } for(i=0;ifils[i]=NULL; tmp->sequences = AllocBitTab(); return tmp; } Feuille *Alloc_Feuille(void ) { Feuille *tmp; if (Feuille_Alloc.last->max==Feuille_Alloc.last->current) { Feuille_Alloc.last->suivant = Alloc_Alloc_Cell(sizeof(Feuille)); Feuille_Alloc.last = Feuille_Alloc.last->suivant ; tmp = (Feuille *)(Feuille_Alloc.last->data ); Feuille_Alloc.last->current+=1; } else { tmp = (Feuille *)(Feuille_Alloc.last->data + sizeof(Feuille)*Feuille_Alloc.last->current); Feuille_Alloc.last->current+=1; } tmp->debut = LEAF_BIT; tmp->fin_deb = -1; tmp->sequence_number = current_sequence; tmp->sequences = AllocBitTab(); addBitTabValue(&(tmp->sequences),current_sequence); return tmp; } Liste *Alloc_Liste(void) { Liste *l; if (FREE_LISTE==NULL) { l =(Liste *)malloc(sizeof(Liste)); #ifdef DEBUG_J_ALLOC nb_liste_cell++; #endif if (!l) { fprintf(stderr,"No Enougth space\nProgram Abord\n"); exit(-1); } l->suiv = NULL; l->feuille = NULL; } else { l = FREE_LISTE; FREE_LISTE = FREE_LISTE->suiv; l->suiv = NULL; l->feuille = NULL; } return l; } void Free_Liste(Liste *l) { l->suiv = FREE_LISTE; FREE_LISTE = l; } void Free_All_Liste_Cell(void) { Liste *tmp; #ifdef DEBUG_J_ALLOC int nb=0; #endif while(FREE_LISTE != NULL) { tmp = FREE_LISTE; FREE_LISTE = FREE_LISTE->suiv; free(tmp); #ifdef DEBUG_J_ALLOC nb++; #endif } #ifdef DEBUG_J_ALLOC printf("%d/%d Liste Cell désalouées...\n",nb,nb_liste_cell); #endif FREE_LISTE=NULL; } void Init_Allocateurs(void ) { #ifdef DEBUG_J_ALLOC printf("Init Allocateur : %d alloc cell, %d liste cell\n",nb_alloc_alloc_cell,nb_liste_cell); #endif Noeud_Alloc.first=Alloc_Alloc_Cell(sizeof(Noeud)); Noeud_Alloc.last=Noeud_Alloc.first; Feuille_Alloc.first=Alloc_Alloc_Cell(sizeof(Feuille)); Feuille_Alloc.last=Feuille_Alloc.first; FREE_LISTE=NULL; } void Free_Arbre(Noeud *Racine) { Alloc_Cell *tmp,*tmp2; Noeud *n; Feuille *f; int i; #ifdef DEBUG_J_ALLOC int nb=0; #endif tmp=Noeud_Alloc.first; while(tmp!=NULL) { #ifdef DEBUG_J_ALLOC nb++; #endif for(i=0; icurrent;i++) { n = (Noeud *) (tmp->data + i*sizeof(Noeud)); free(n->fils); free(n->sequences); } tmp2=tmp->suivant; free(tmp->data); free(tmp); tmp=tmp2; } tmp=Feuille_Alloc.first; while(tmp!=NULL) { #ifdef DEBUG_J_ALLOC nb++; #endif for(i=0; icurrent;i++) { f = (Feuille *) (tmp->data + i*sizeof(Feuille)); free(f->sequences); } tmp2=tmp->suivant; free(tmp->data); free(tmp); tmp=tmp2; } Noeud_Alloc.first=Noeud_Alloc.last=Feuille_Alloc.first=Feuille_Alloc.last=NULL; #ifdef DEBUG_J_ALLOC printf("%d/%d alloc cell OK \n",nb,nb_alloc_alloc_cell); #endif } SMILEv1.47/P_BLOCS/src/global_variables.c0000644002404200237300000000322310066543665017505 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef GLOB_VAR #define GLOB_VAR #include unsigned char Translation_Table[255]; unsigned char **Sequence=NULL; ListePositions *Liste_positions_fin=NULL; int global_indice=0; int current_sequence=0; #endif SMILEv1.47/P_BLOCS/src/sub_suffix_tree.c0000644002404200237300000001351310066543705017407 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include #include #include #include #include #include #define SIZE 45 extern char *optarg; extern int optind, opterr, optopt; static struct option programme_options[] = { {"seq", 1, NULL, 'f'}, {"size", 1, NULL, 's'}, {"print", 0, NULL, 'p'}, {"alpha", 1, NULL, 'a'}, {"help",0,NULL, 'h'}, {"joker",0,NULL, 'j'}, {"stat",0,NULL,'i'}, {0, 0, 0, 0} }; void printHelp(void) { printf("option : --seq -f \n" " --size -s \n" " --print -p : print the tree\n" " --alpha -a \n" " --joker -j : whith joker\n" " --help -h : print this help\n"); } void readParametres(int argc,char **argv,int *windows_size,int *print,int *stat,char **fileName,int *joker,char **alphabet) { int c; int index; *windows_size = DEFAULT_WINDOW_SIZE_OPTION; *print = 0; *joker = 0; *alphabet = NULL; *fileName = NULL; *stat=0; while(1) { c = getopt_long(argc,argv,"f:s:a:phji", programme_options, &index); switch(c) { case -1 : if (*fileName==NULL) { fprintf(stderr,"option -f missing\n"); fprintf(stderr,"-h for more information\n"); exit(0); } if (*alphabet==NULL) { fprintf(stderr,"option -a missing\n"); fprintf(stderr,"-h for more information\n"); exit(0); } if (*windows_size<=3) { fprintf(stderr,"window size must be more then 3\n"); fprintf(stderr,"-h for more information\n"); exit(0); } return; case '?': fprintf(stderr,"Unknow option %s \n",optarg); fprintf(stderr,"-h for more information\n"); exit(0); break; case 'a': *alphabet =(char *)malloc(strlen(optarg)+1); strcpy(*alphabet,optarg); break; case 's': *windows_size = atoi(optarg); break; case 'i': *stat = 1; break; case 'p': *print = 1; break; case 'f': *fileName = (char *)malloc(strlen(optarg)+1); strcpy(*fileName,optarg); break; case 'j': *joker = 1; break; case 'h' : printHelp(); exit(0); break; } }; } void printParametres(int windows_size,int print,int stat,char *fileName,int joker,char *alphabet) { printf("sequence file name : %s\n" "alphabet : %s\n" "joker : %s\n" "windows size : %d\n" "print tree : %s\n" "statistiques : %s\n", fileName,alphabet,(joker)?"yes":"no", windows_size,(print)?"yes":"no", (stat)?"yes":"no"); } int main(int argc, char **argv) { Noeud *Arbre; FILE *fichier; FastaSequence *seq; int ok; int i; int window_size = 4; int print; int stat; int joker; char *filename,*alphabet; // A virer char *TEXT[5000]; int indice_text = 0; // readParametres(argc,argv,&window_size,&print,&stat,&filename,&joker,&alphabet); printParametres(window_size,print,stat,filename,joker,alphabet); fichier = fopen(filename,"r"); if (fichier==NULL) { fprintf(stderr,"Invalide sequence file name\n"); exit(0); } do { seq = NewFastaSequence(); ok = ReadFastaSequence(fichier,seq); if (ok) { TEXT[indice_text] = (char *)malloc(sizeof(char )*seq->length+2); memcpy(TEXT[indice_text],seq->seq,seq->length); TEXT[indice_text][seq->length]='$'; TEXT[indice_text][seq->length+1]='\0'; indice_text++; } } while(ok); Init_All(alphabet,joker,indice_text); Arbre=Construction_Arbre(TEXT[0],window_size); //printf("Arbre de la premiere sequence %s :\n",TEXT[0]); //Print_Tree(Arbre,1,0); for(i=1;i extern int **code2Sauts; ListePositions *Alloc_ListePositions(int size) { ListePositions * tmp = (ListePositions *)malloc(sizeof(ListePositions)); if (!tmp) { fprintf(stderr,"No Enougth space\nProgram Abord\n"); exit(-1); } tmp->tab[0] = (int *)malloc(sizeof(int) * size); tmp->tab[1] = (int *)malloc(sizeof(int) * size); #if DEBUG_JTREE printf("Alloc Liste.... %d \n",sizeof(int) * size); #endif if ((!tmp->tab[0]) || (!tmp->tab[1])) { fprintf(stderr,"No Enougth space\nProgram Abord\n"); exit(-1); } memset(tmp->tab[0],0,sizeof(int) * size); memset(tmp->tab[1],0,sizeof(int) * size); tmp->last_cell = 0; tmp->tab_size = size; return tmp; } int ChercheDerniereCelluleDansListe(ListePositions *lpos,int deb_liste) { if (lpos == NULL) return -1; while(lpos->tab[1][deb_liste] != LISTE_END) deb_liste = lpos->tab[1][deb_liste] & LISTE_CHANGE_BIT_INV; return deb_liste; } int Ajoute_Position_Liste(ListePositions *lpos,int *deb_liste,int position,int change_seq) { if (!lpos) return -2; if (lpos->last_cell == lpos->tab_size) { #if DEBUG_JTREE printf("realloc .... LPOS\n"); #endif lpos->tab[0] = (int *)realloc(lpos->tab[0],sizeof(int)*(lpos->tab_size + POS_ALLOC_STEP)); lpos->tab[1] = (int *)realloc(lpos->tab[1],sizeof(int)*(lpos->tab_size + POS_ALLOC_STEP)); lpos->tab_size+=POS_ALLOC_STEP; } lpos->tab[0][lpos->last_cell] = position ; lpos->tab[1][lpos->last_cell] = LISTE_END ; if (*deb_liste!=-1) { if (change_seq) lpos->tab[1][lpos->last_cell] = *deb_liste | LISTE_CHANGE_BIT; else lpos->tab[1][lpos->last_cell] = *deb_liste; } *deb_liste = lpos->last_cell; (lpos->last_cell)++; return 1; } int getValue(ListePositions *lpos,int i) { if (lpos==NULL) return -2; if ((i<0) || (i>lpos->last_cell)) return -3; return lpos->tab[0][i] ;/*& LISTE_CHANGE_BIT_INV; */ } void setListeValue(ListePositions *lpos,int i,int value) { if ((lpos==NULL) || (i<0) || (i>lpos->last_cell)) return ; lpos->tab[0][i] = value; } int getIndiceSuivant(ListePositions *lpos,int i) { if (lpos==NULL) return -2; if ((i<0) || (i>lpos->last_cell)) return -3; return lpos->tab[1][i]; } void Free_ListePositions(ListePositions *lpos) { if (lpos == NULL) return ; free(lpos->tab[0]); free(lpos->tab[1]); free(lpos); } int Print_Positions_Dynamique(FILE *f, Feuille *n, LongSeq longway, P_Criteres cr, int code) { int indice = n->fin_deb, occurrence = 0; #if AFF_OCC int longueur, *i, nb_element = ((n->sequences[0] & 0x7F) << 8) | n->sequences[1]; nb_element--; longueur = getValue(Liste_positions_fin,indice)-(n->debut&LEAF_BIT_INV)+longway; #endif while(indice != LISTE_END) { #if AFF_OCC fprintf(f,"Seq %5d Pos %5d", ((unsigned short int *)(n->sequences + 2))[nb_element], getValue(Liste_positions_fin,indice)-longueur); if(cr && cr->bloc != 1) { fprintf(f, "\t"); for(i=code2Sauts[code]; i!=code2Sauts[code]+cr->bloc-1; i++) fprintf(f, "%d ",*i); } fprintf(f, "\n"); #endif indice = getIndiceSuivant(Liste_positions_fin,indice); #if AFF_OCC if (indice & LISTE_CHANGE_BIT) nb_element--; #endif indice = indice & LISTE_CHANGE_BIT_INV; occurrence++; } return occurrence; } /******************************************************************************/ /* */ /******************************************************************************/ int Print_Positions_Statique(FILE *f, Feuille *n, LongSeq longway, P_Criteres cr, int code) { unsigned char mask = 0x01; int compteur = SIZE_STATIC_BIT_TAB-1, sequence = 8 * compteur + 6, indice = n->fin_deb, occurrence = 0; #if AFF_OCC int longueur, *i; longueur = getValue(Liste_positions_fin,indice)-(n->debut&LEAF_BIT_INV)+longway; #endif while ( (n->sequences[compteur] & mask) == 0 ) { mask <<= 1; sequence--; if ( mask == 0 ) { mask = 0x01; compteur--; } } while(indice != LISTE_END) { #if AFF_OCC fprintf(f,"Seq %5d Pos %5d",sequence, getValue(Liste_positions_fin,indice)-longueur); if(cr && cr->bloc != 1) { fprintf(f, "\t"); for(i=code2Sauts[code]; i!=code2Sauts[code]+cr->bloc-1; i++) fprintf(f, "%d ",*i); } fprintf(f, "\n"); #endif indice = getIndiceSuivant(Liste_positions_fin,indice); if ( (indice & LISTE_CHANGE_BIT) && ( indice != LISTE_END ) ) { do { mask <<= 1; sequence--; if ( mask == 0 ) { mask = 0x01; compteur--; } } while ( (n->sequences[compteur] & mask) == 0 ); } occurrence++; indice = indice & LISTE_CHANGE_BIT_INV; } return occurrence; } int Print_Positions(FILE *f, Feuille *n, LongSeq longway, P_Criteres cr, int code) { /* #if DEBUG */ /* printf("J'arrive dans Print avec long %d\n",longway); */ /* #endif */ if (n->sequences[0] & 0x80) return (Print_Positions_Dynamique(f, n, longway, cr, code)); return (Print_Positions_Statique(f, n, longway, cr, code)); } SMILEv1.47/P_BLOCS/src/bit_tab2.c0000644002404200237300000000336610066543652015707 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include #include void initBitTab(int nb_seq) {} Bit_Tab *AllocBitTab() { return NULL; } void addBitTabValue(Bit_Tab **tab,int value) {} void fusionneBitTab(Bit_Tab **tab1,Bit_Tab *tab2) {} // 1 <- 1 & 2 int nbSequenceInBitTab(Bit_Tab *tab) { return 0; } void printBitTab(Bit_Tab *tab) {} SMILEv1.47/P_BLOCS/src/liste_pos2.c0000644002404200237300000000427510066543676016312 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include int *Positions = NULL; int current_pos=0; ListePositions *Alloc_ListePositions(int size) { Positions =(int *)malloc(sizeof(int)*1000000); current_pos=0; return NULL; } int Ajoute_Position_Liste(ListePositions *lpos,int *deb_liste,int position,int change_seq) { if (*deb_liste == -1) { *deb_liste = current_pos; Positions[current_pos] = position; current_pos++; } else { Positions[*deb_liste] = position; } return 1; } int getValue(ListePositions *lpos,int i) { return Positions[i]; } void setListeValue(ListePositions *lpos,int i,int value) { Positions[i]=value; } int getIndiceSuivant(ListePositions *lpos,int i) { return LISTE_END; } void Free_ListePositions(ListePositions *lpos) { } SMILEv1.47/P_BLOCS/src/construction.c0000644002404200237300000007233010066543656016754 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include Noeud *Construction_Arbre(unsigned char *S,int taille_fenetre) { Liste *Debut_liste = Alloc_Liste(); Liste *Fin_liste =NULL; Noeud *Fin_liste_pere; int nb_element; int fictive; Noeud *Racine = Alloc_Noeud(); Feuille *feuille = Alloc_Feuille(); if (!S) { fprintf(stderr,"Construction_Arbre : Invalide String\nProgram Abord\n"); exit(-1); } if (taille_fenetre <=0) { fprintf(stderr,"Construction_Arbre: taille fenetre = 0!\n"); exit(-1); } Sequence[0] = S; Liste_positions_fin = Alloc_ListePositions(strlen((const char *) S)*20); feuille->debut = 0 | LEAF_BIT; Ajoute_Position_Liste(Liste_positions_fin,&(feuille->fin_deb),-1,0); Fin_liste_pere = Racine; nb_element = 1; Debut_liste->feuille = feuille; Fin_liste = Debut_liste; Ajoute_Fils_Au_Noeud(Racine,(Noeud *)feuille); if (taille_fenetre >= strlen((const char *) S)) taille_fenetre=strlen((const char *) S); Premiere_Phase(Racine,taille_fenetre,&Debut_liste,&Fin_liste,&Fin_liste_pere,&nb_element,2); /*----------------------------------------*/ fictive = Deuxieme_Phase(Racine,taille_fenetre,&Debut_liste,&Fin_liste,&Fin_liste_pere,&nb_element,taille_fenetre,0,0); /*----------------------------------------*/ Troisieme_Phase(Racine,taille_fenetre,&Debut_liste,&Fin_liste,&Fin_liste_pere,&nb_element,fictive); /*----------------------------------------*/ #if DEBUG_JTREE printf("nb_alloc_noeud = %d ; nb_alloc_feuille = %d; nb_alloc_liste = %d; nb_alloc_tab = %d\n",nb_alloc_noeud,nb_alloc_feuille,nb_alloc_liste,nb_alloc_tab); printf("TAILLE TOTALE ALLOUEE : %d\n",nb_alloc_noeud*(sizeof(Noeud)+sizeof(Noeud *)*ALPHA_CARD) + nb_alloc_feuille * (sizeof(Feuille))+ nb_alloc_liste * sizeof(Liste)+ Liste_positions_fin->tab_size * sizeof(int) * 2+ nb_alloc_tab * 2); #endif return Racine; } void Premiere_Phase(Noeud *Racine, int taille_fenetre, Liste **debut_liste, Liste **fin_liste, Noeud **fin_liste_pere, int *nb_element_liste, int start_indice) { int i,j; int result_type; Noeud *result; Noeud *result_pere; Noeud *last_created_node; /* Construction de l'arbre pour S[0..taille_fenetre] */ for(i=start_indice;ifeuille))); /* on insere S[*nb_element_liste ... i]. */ for(j=*nb_element_liste;jsuffixe_link)) result = Add_Fast_String(Racine,j,i,&result_type,&result_pere); else /* sinon on ajoute par lien suffixe... */ result = Add_Fast_String(result->suffixe_link,i+result_type,i,&result_type,&result_pere); /* Si il a eu création d'une feuille */ /* 1: à un noeud déjà existant */ /* 3: coupure d'un arc avec création d'un noeud. */ if ((result_type == 1) || (result_type == 3)) { /* On met l'indice de fin de la feuille crée à -1 (indice global) */ setListeValue(Liste_positions_fin,((Feuille *)result)->fin_deb,-1); /* On ajoute la feuille créee à la liste des feuilles */ (*fin_liste)->suiv = Alloc_Liste(); (*fin_liste) = (*fin_liste)->suiv; (*fin_liste)->feuille = (Feuille *)result; /* On positionne la variable fin_liste_pere au pere */ /* de la feuille créee. */ *fin_liste_pere = result_pere; /* On incremente le nombre de feuille dans la liste */ *nb_element_liste = *nb_element_liste + 1; /* Si à l'étape precedente on a crée un noeud (lien suffixe) */ if (last_created_node) last_created_node->suffixe_link = result_pere; /* Alors on positionne le lien suffixe sur le pere de la feuille. */ /* on reinitialise la variable last_created_node a NULL */ last_created_node = NULL; /* Si on a crée un Noeud */ if (result_type == 3) last_created_node = result_pere; } /* la chaine est deja dans l'arbre.... */ else if (result_type<0) { /* si il y a un lien suffixe pendent .... */ if (last_created_node) /* on le positionne */ last_created_node->suffixe_link = result; /* on le reinitialise. */ last_created_node = NULL; /* on s'arrete */ break; } else /* sinon car 2 : rien */ last_created_node = NULL; if (result_type > 0) /* Si le resultat est la creation d'une feuille */ /* alors on va repositionner de maniere a reprendre sur un noeud */ /* afin de faire l'ajout suivant par lien suffixe. */ { if (result_type==3) /* Si il y a creation d'un noeud : on remonte */ /* au pere de celui-ci : il est mis dans le lien suffixe */ /* du noeud crée. et on recalcul la longeur a parcourir */ /* a partir du lien suffixe. */ { result_type = - (seg_taille(result_pere) + 1); result = result_pere->suffixe_link; result_pere->suffixe_link = NULL; } else /* sinon : il y a juste eu ajout d'une feuille */ { /* la feuille vient d'être créée : elle mesure 1. */ result_type = -1; /* on remet result sur le pere de la feuille. */ result = result_pere; } } } } } int Deuxieme_Phase(Noeud *Racine, int taille_fenetre, Liste **debut_liste, Liste **fin_liste, Noeud **fin_liste_pere, int *nb_element_liste, int start_indice, int fict, int ini_res_type) { int i,j; int result_type=ini_res_type; int taille_sequence; int fictive = fict; Liste *tmp_liste; Noeud *result; Noeud *result_pere; Noeud *last_created_node; Feuille *tmp_feuille; Noeud *tmp_noeud; taille_sequence = strlen((const char *) Sequence[current_sequence]); /* Construction de l'arbre pour S[k...m-1] */ for(i=start_indice;i0) result_type=-result_type; else result_type = -(seg_taille((Noeud *)((*fin_liste)->feuille))) ; if (fictive) /* On reprend sur debut liste qui est une cellule fictive : */ /* elle ne correspond pas à une feuille crée. mais l'extension */ /* se fait dans cette feuille... */ { /* On verifie que l'extension en cours est bien dans la cellule. */ tmp_noeud = result; result = Add_Fast_String(result,i+result_type,i,&result_type,&result_pere); if (result_type == 1) { result_type = -1; result = result_pere; } else /* On a cree un une feuille et un Noeud */ if (result_type==3) { /* Alors result est la feuille cree. */ /* result_pere est le noeud cree. */ /* result_pere->suffixe_link est le pere noeud cree. */ result_type = -(seg_taille(result_pere)+1); result = result_pere->suffixe_link; result_pere->suffixe_link = NULL; last_created_node = result_pere; } else if (result_type == 2) { result_type = -(seg_taille(result)); result = tmp_noeud; } /* Sinon : On a cherché à inserer une chaine de longueur k */ /* si elle est deja dans l'arbre, elle aboutie a une feuille */ /* on ajoute alors la position i dans la feuille: ce qui est */ /* effectué par la fonction fast_string. */ /* ????? else result = result->suffixe_link ????? */ } else /* On fixe l'indice de fin de la premiere cellule de la liste(vrais feuille) à i: */ { setListeValue(Liste_positions_fin,(*debut_liste)->feuille->fin_deb,i); if (result_type>0) result_type=-result_type; else result_type = -(seg_taille((Noeud *)((*fin_liste)->feuille))) ; } fictive = 0; /* on insere S[*nb_element_liste ... i]. */ for(j=*nb_element_liste;jsuffixe_link)) result = Add_Fast_String(Racine,j,i,&result_type,&result_pere); else /* sinon on ajoute par lien suffixe... */ result = Add_Fast_String(result->suffixe_link,i+result_type,i,&result_type,&result_pere); /* Si il a eu création d'une feuille */ /* 1: à un noeud déjà existant */ /* 3: coupure d'un arc avec création d'un noeud. */ if ((result_type == 1) || (result_type == 3)) { /* On met l'indice de fin de la feuille crée à -1 (indice global) */ setListeValue(Liste_positions_fin,((Feuille *)result)->fin_deb,-1); /* On ajoute la feuille créee à la liste des feuilles */ (*fin_liste)->suiv = Alloc_Liste(); (*fin_liste) = (*fin_liste)->suiv; (*fin_liste)->feuille = (Feuille *)result; /* On positionne la variable fin_liste_pere au pere */ /* de la feuille créee. */ *fin_liste_pere = result_pere; /* On incremente le nombre de feuille dans la liste */ *nb_element_liste = *nb_element_liste + 1; /* Si à l'étape precedente on a crée un noeud (lien suffixe) */ if (last_created_node) last_created_node->suffixe_link = result_pere; /* Alors on positionne le lien suffixe sur le pere de la feuille. */ /* on réinitialise la variable last_created_node a NULL */ last_created_node = NULL; /* Si on a crée un Noeud */ if (result_type == 3) last_created_node = result_pere; } /* la chaine est deja dans l'arbre.... */ else if (result_type<0) { /* si il y a un lien suffixe pendent .... */ if (last_created_node) last_created_node->suffixe_link = result; /* on le positionne */ last_created_node = NULL; /* on le reinitialise. */ /* On doit verifier que l'on n'a suffisement avancé dans la liste */ /* .a.d. que *nb_element_liste >= (i+1) - k. */ if (*nb_element_liste==(i-taille_fenetre+1)) { /* // On sait que le resultat est le pere d'une feuille */ /* // car si *nb_element=i-taille_sequence alors la taille */ /* // de la derniere chaine cherchée est k. comme l'arbre */ /* // se coupe a la hauteur k... */ /* // De plus on sait que qu'il n'y a plus d'element dans la liste */ /* // car on l'indente d'une fois au fur et à mesure que */ /* // l'on progresse.... */ tmp_feuille = (Feuille *)Get_Child_Start_Letter(result,i+result_type); if (-seg_taille((Noeud *)tmp_feuille)==result_type) result_type = 1; if (tmp_feuille->debut & LEAF_BIT) { (*fin_liste_pere) = result; (*fin_liste)->suiv = Alloc_Liste(); (*fin_liste) = (*fin_liste)->suiv; (*fin_liste)->feuille = tmp_feuille; *nb_element_liste = *nb_element_liste + 1; } else { (*fin_liste_pere) = (Noeud *)tmp_feuille; (*fin_liste)->suiv = Alloc_Liste(); (*fin_liste) = (*fin_liste)->suiv; (*fin_liste)->feuille = tmp_feuille; /* FAUX */ *nb_element_liste = *nb_element_liste + 1; } fictive = 1; /* On indique pour la prochaine reprise... */ /* // Probleme : lors de la reprise au i suivant on va */ /* // initialise result a ce noeud et on va compter */ /* // la longeur N-Feuille pour la longeur du suffixe link. */ } break; /* on s'arrete */ } else last_created_node = NULL; /* sinon car 2 : rien */ if (result_type > 0) /* Si le resultat est la creation d'une feuille */ /* // alors on va repositionner de maniere a reprendre sur un noeud */ /* // afin de faire l'ajout suivant par lien suffixe. */ { if (result_type==3) /* Si il y a creation d'un noeud : on remonte */ /* // au pere de celui-ci : il est mis dans le lien suffixe */ /* // du noeud crée. et on recalcul la longeur a parcourir */ /* // a partir du lien suffixe. */ { result_type = - (seg_taille(result_pere) + 1); result = result_pere->suffixe_link; result_pere->suffixe_link = NULL; } else /* sinon : il y a juste eu ajout d'une feuille */ if (result_type==1) { result_type = -1; /* la feuille vient d'être créée : elle mesure 1*/ result = result_pere; /* on remet result sur le pere de la feuille*/ } else /* cas 2. */ { result_type = - seg_taille(result); result = result_pere; } } } /* // On avance dans la liste chainée de cellules: */ { tmp_liste = (*debut_liste); (*debut_liste) = (*debut_liste)->suiv; Free_Liste(tmp_liste); } } return fictive; } void Troisieme_Phase(Noeud *Racine, int taille_fenetre, Liste **debut_liste, Liste **fin_liste, Noeud **fin_liste_pere, int *nb_element_liste, int fictive) { int i,j;/*,lm; */ int result_type; int taille_sequence; Noeud *result; Noeud *result_pere=NULL; Noeud *last_created_node; Noeud *tmp_noeud; Noeud *tmp_noeud2; Liste * tmp_liste; taille_sequence = strlen((const char *) Sequence[current_sequence]); i=taille_sequence; /* Construction de l'arbre pour S[taille_fenetre] */ result = *fin_liste_pere; /* Reprise sur le père de la dernière feuille créee. */ last_created_node = NULL; /* Initialisation pour le suffix_link */ global_indice = i; /* Valeur de l'indice globale */ /* on remet la longeur du dernier segment Noeud - feuille */ if ((*fin_liste)->feuille->debut & LEAF_BIT) result_type = -(seg_taille((Noeud *)((*fin_liste)->feuille))); else result_type = -1; if (fictive) { result_pere = NULL; result = Add_Fast_String(result,i+result_type,i,&result_type,&result_pere); if (result_type==3) /* On a cree un une feuille et un Noeud */ { last_created_node = result_pere; result_type = -(seg_taille(result_pere)+1); result = result_pere->suffixe_link; result_pere->suffixe_link = NULL; } if (result_type==1)/* on a juste cree une feuille */ { result_type = -1; result = result_pere; } } else { while((*debut_liste)) { setListeValue(Liste_positions_fin,(*debut_liste)->feuille->fin_deb,i); if ((*debut_liste)==(*fin_liste)) break; tmp_liste = *debut_liste; *debut_liste = (*debut_liste)->suiv; Free_Liste(tmp_liste); } if ((*fin_liste)->feuille->debut & LEAF_BIT) result_type = -(seg_taille((Noeud *)((*fin_liste)->feuille))); else result_type = -1; Free_Liste((*fin_liste)); } /* on insere S[*nb_element_liste ... i]. */ for(j=*nb_element_liste;jsuffixe_link)) result = Add_Fast_String(Racine,j,i,&result_type,&result_pere); else /* sinon on ajoute par lien suffixe... */ result = Add_Fast_String(result->suffixe_link,i+result_type,i,&result_type,&result_pere); /* Si il a eu création d'une feuille */ /* 1: à un noeud déjà existant */ /* 3: coupure d'un arc avec création d'un noeud. */ if ((result_type == 1) || (result_type == 3)) { /* On met l'indice de fin de la feuille crée à -1 (indice global) */ /* On ajoute la feuille créee à la liste des feuilles */ /* ATTENTION : A VERIFIER... (*fin_liste)->suiv = Alloc_Liste(); (*fin_liste) = (*fin_liste)->suiv; (*fin_liste)->feuille = (Feuille *)result; */ /* On positionne la variable fin_liste_pere au pere */ /* de la feuille créee. */ *fin_liste_pere = result_pere; /* On incremente le nombre de feuille dans la liste */ *nb_element_liste = *nb_element_liste + 1; /* Si à l'étape precedente on a crée un noeud (lien suffixe) */ if (last_created_node) last_created_node->suffixe_link = result_pere; /* Alors on positionne le lien suffixe sur le pere de la feuille. */ /* on reinitialise la variable last_created_node a NULL */ last_created_node = NULL; if (result_type == 3) /* Si on a crée un Noeud */ last_created_node = result_pere; } else /* la chaine est deja dans l'arbre.... */ if (result_type<0) { /* on ne s'arrete pas : */ /* On coupe l'arc ... et on "emule" l'insertion d'une feuille... */ tmp_noeud2 = Get_Child_Start_Letter(result,i+result_type); if (seg_taille(tmp_noeud2)>-result_type) { /* si il y a un lien suffixe pendent .... */ tmp_noeud = Alloc_Noeud(); if (last_created_node) last_created_node->suffixe_link = tmp_noeud; /* on le positionne */ last_created_node = NULL;/* on le reinitialise. */ tmp_noeud->debut = i + result_type; tmp_noeud->fin = i; if (tmp_noeud2->debut & LEAF_BIT) /* result est un feuille */ tmp_noeud2->debut = (tmp_noeud2->debut - result_type) | LEAF_BIT; else tmp_noeud2->debut = (tmp_noeud2->debut - result_type); Ajoute_Fils_Au_Noeud(result,tmp_noeud); Ajoute_Fils_Au_Noeud(tmp_noeud,tmp_noeud2); last_created_node = tmp_noeud; result = result_pere; } else { /* si il y a un lien suffixe pendent .... */ if (last_created_node) last_created_node->suffixe_link = result; /* on le positionne */ last_created_node = NULL;/* on le reinitialise. */ /* ATTENTION : PAS SUR if (tmp_noeud2->debut & LEAF_BIT) &&() { tmp_noeud2->debut = (i + result_type) | LEAF_BIT; if ((getValue(Liste_positions_fin,((Feuille *)tmp_noeud2)->fin_deb) != i ) || (((Feuille *)tmp_noeud2)->sequence_number != current_sequence)) Ajoute_Position_Liste(Liste_positions_fin,&(((Feuille *)tmp_noeud2)->fin_deb),i,0); } else { tmp_noeud2->debut = i + result_type; tmp_noeud2->fin = i; } */ } } else last_created_node = NULL; /* sinon car 2 : rien */ if (result_type > 0) /* Si le resultat est la creation d'une feuille */ /* alors on va repositionner de maniere a reprendre sur un noeud */ /* afin de faire l'ajout suivant par lien suffixe. */ { if (result_type==3) /* Si il y a creation d'un noeud : on remonte */ /* au pere de celui-ci : il est mis dans le lien suffixe */ /* du noeud crée. et on recalcul la longeur a parcourir */ /* a partir du lien suffixe. */ { result_type = - (seg_taille(result_pere) + 1); result = result_pere->suffixe_link; result_pere->suffixe_link = NULL; } else /* sinon : il y a juste eu ajout d'une feuille */ { result_type = -1; /* la feuille vient d'être créée : elle mesure 1. */ result = result_pere; /* on remet result sur le pere de la feuille. */ } } } } Noeud *AjouteSequence(Noeud *Arbre,unsigned char *S,int taille_fenetre) { Noeud *pere = NULL; Noeud *resultat; int position_arc; int seg_lg; int res_type; int i=taille_fenetre,j; int lg_sequence; int decalage = taille_fenetre; current_sequence++; Sequence[current_sequence] = S; lg_sequence = strlen((const char *) S); if (taille_fenetre>=lg_sequence) taille_fenetre = lg_sequence; i=taille_fenetre; decalage = taille_fenetre; /* Recherché la chaine S[i..i+taille_fenetre] dans l'arbre */ /* pour i allant de 0 à lg(S)-taille_fenetre */ resultat = Arbre; position_arc = 0; while(i<=lg_sequence) { pere=NULL; resultat = FindString(resultat,i-decalage,i,&pere,&res_type,&position_arc); if (position_arc == -1) { /*resultat est un noeud et n'a pas de fils pour la chaine i+res_type ... i */ return CaseTreeAddSequence(Arbre,resultat,i+res_type,i+res_type+1,taille_fenetre); } if (res_type == 1) { seg_lg = seg_taille(resultat); if (resultat->debut & LEAF_BIT) { resultat->debut = (i-seg_lg) | LEAF_BIT; Ajoute_Position_Liste(Liste_positions_fin,&(((Feuille *)resultat)->fin_deb),i,(resultat->sequence_number==current_sequence)?0:1); resultat->sequence_number = current_sequence; addBitTabValue(&(((Feuille *)resultat)->sequences),current_sequence); } i++; decalage = seg_taille(resultat) + 1; if (pere!=Arbre) resultat = pere->suffixe_link; else { decalage = taille_fenetre; resultat = Arbre; } } else if (res_type == 2) { /* En principe impossible. */ printf("ERREUR CAS 2 for sequence %d and string %d .. %d \n",current_sequence,i-decalage,i); for(j=i-decalage;jdebut & LEAF_BIT) { if ((resultat->sequence_number != current_sequence) || ( (resultat->sequence_number == current_sequence) && (resultat->debut&LEAF_BIT_INV) !=(i-seg_lg) ) ) { resultat->debut = (i-seg_lg) | LEAF_BIT; Ajoute_Position_Liste(Liste_positions_fin,&(((Feuille *)resultat)->fin_deb),i,(resultat->sequence_number==current_sequence)?0:1); resultat->sequence_number = current_sequence; addBitTabValue(&(((Feuille *)resultat)->sequences),current_sequence); } } } else printf("YOUPI II I I I %d \n",res_type); } return Arbre; } /* cas creation d'une feuille à la racine de l'arbre. */ Noeud *CaseOneAddSequence(Noeud *Arbre,int taille_fenetre) { Liste *Debut_liste = Alloc_Liste(); Liste *Fin_liste; Noeud *Fin_liste_pere; int nb_el_liste=0; int fictive = 0; Feuille *tmp_f; fprintf(stderr,"CASE ONE\n"); tmp_f = Alloc_Feuille(); tmp_f->debut = 0 | LEAF_BIT; Ajoute_Position_Liste(Liste_positions_fin, &(tmp_f->fin_deb),-1,0); Fin_liste_pere = Arbre; nb_el_liste = 1; Debut_liste->feuille = tmp_f; Fin_liste = Debut_liste; Ajoute_Fils_Au_Noeud(Arbre,(Noeud *)tmp_f); Premiere_Phase(Arbre,taille_fenetre, &Debut_liste,&Fin_liste,&Fin_liste_pere, &nb_el_liste,1); fictive = Deuxieme_Phase(Arbre,taille_fenetre, &Debut_liste,&Fin_liste,&Fin_liste_pere, &nb_el_liste,taille_fenetre, 0,0); Troisieme_Phase(Arbre,taille_fenetre, &Debut_liste,&Fin_liste,&Fin_liste_pere, &nb_el_liste,fictive); return Arbre; } /* cas de reprise au debut de 2eme phase */ Noeud *CaseTwoAddSequence(Noeud *Arbre,Noeud *resultat, Noeud *pere,int taille_fenetre) { Liste *Debut_liste = Alloc_Liste(); Liste *Fin_liste; Noeud *Fin_liste_pere; int nb_el_liste=0; int fictive = 0; fprintf(stderr,"CASE TWO\n"); Debut_liste->feuille = (Feuille *)pere; Fin_liste = Debut_liste; Fin_liste_pere = resultat; nb_el_liste = 1; fictive = Deuxieme_Phase(Arbre,taille_fenetre,&Debut_liste, &Fin_liste,&Fin_liste_pere,&nb_el_liste, taille_fenetre,1,0); Troisieme_Phase(Arbre,taille_fenetre, &Debut_liste,&Fin_liste,&Fin_liste_pere, &nb_el_liste,fictive); return Arbre; } /* cas de reprise en milieu de 1er phase */ Noeud *CaseTreeAddSequence(Noeud *Arbre,Noeud *resultat,int deb,int fin,int taille_fenetre) { Liste *Debut_liste = Alloc_Liste(); Liste *Fin_liste=NULL; Noeud *Fin_liste_pere; Noeud *pere=NULL; int nb_el_liste=0; int fictive = 0; int j,lim,res_type; int start; Noeud *last_created = NULL; /* Dans ce cas on doit faire un tour d'algo à la main..... */ lim = fin; res_type = deb - fin; global_indice = lim; start = lim-taille_fenetre; if (start<0) start=0; for (j=start;jfin_deb),-1); if (Fin_liste) { Fin_liste->suiv = Alloc_Liste(); Fin_liste = Fin_liste->suiv; Fin_liste->feuille = (Feuille *)resultat; } else { Fin_liste = Debut_liste; Debut_liste->feuille = (Feuille *)resultat; } Fin_liste_pere = pere; nb_el_liste++; if (last_created) last_created->suffixe_link = pere; last_created = NULL; if (res_type == 3) last_created = pere; } else if (res_type<0) { if (last_created) last_created->suffixe_link = resultat; last_created = NULL; break; } else last_created = NULL; if (res_type>0) { if (res_type==3) { res_type = - seg_taille(pere) - 1; resultat = pere->suffixe_link; if (resultat!=Arbre) resultat = resultat->suffixe_link; else res_type += 1; pere->suffixe_link = NULL; } else { res_type = -1; resultat = pere; if (resultat == NULL) { resultat = Arbre; res_type = -(j+1); } else if (resultat!=Arbre) resultat = resultat->suffixe_link; else res_type += 1; } } } if (fin <= taille_fenetre) { Premiere_Phase(Arbre,taille_fenetre, &Debut_liste,&Fin_liste,&Fin_liste_pere, &nb_el_liste,lim+1); fictive = Deuxieme_Phase(Arbre,taille_fenetre, &Debut_liste,&Fin_liste,&Fin_liste_pere, &nb_el_liste,taille_fenetre,0,0); Troisieme_Phase(Arbre,taille_fenetre, &Debut_liste,&Fin_liste,&Fin_liste_pere, &nb_el_liste,fictive); } else { setListeValue(Liste_positions_fin,Debut_liste->feuille->fin_deb,lim); if (nb_el_liste == 1) fictive = 1; else { res_type = -1; Debut_liste = Debut_liste->suiv; nb_el_liste--; } nb_el_liste = ((fictive)?lim:lim+1)-taille_fenetre+nb_el_liste; fictive = Deuxieme_Phase(Arbre,taille_fenetre, &Debut_liste,&Fin_liste,&Fin_liste_pere, &nb_el_liste,(fictive)?lim:lim+1,fictive,0); Troisieme_Phase(Arbre,taille_fenetre, &Debut_liste,&Fin_liste,&Fin_liste_pere, &nb_el_liste,fictive); } return Arbre; } Noeud *CaseFourAddSequence(Noeud *Arbre,Noeud *resultat,Noeud *pere,int res_type,int position_arc,int i,int taille_fenetre) { Liste *Debut_liste = Alloc_Liste(); Liste *Fin_liste; Noeud *Fin_liste_pere; int nb_el_liste=0; int fictive = 0; fprintf(stderr,"\nCASE FOUR------------------------------------------------------------------\n\n"); /* On doit faire un tour d'algo avant de */ /* Rentrer dans les fonctions standard... */ global_indice = i; if (res_type != -1) { Debut_liste->feuille = (Feuille *)resultat; Fin_liste = Debut_liste; Fin_liste_pere = pere; nb_el_liste = i-taille_fenetre+1; } else { Debut_liste->feuille = (Feuille *)resultat; Fin_liste = Debut_liste; Fin_liste_pere = resultat; nb_el_liste = i-taille_fenetre+1; } printf("Arbre AVANT la Deuxieme phase\n"); printf("Arbre = %p \n taille_fenetre = %d \n" "Debut_liste = %p ->feuille %p \n" "Fin_liste = %p ->feuille %p\n" "Fin_liste_pere = %p\n" "nb_element_liste=%d\n" "i = %d\n" "Fictive = 0\n", Arbre,taille_fenetre,Debut_liste,Debut_liste->feuille,Fin_liste,Fin_liste->feuille,Fin_liste_pere,nb_el_liste,i); fictive = Deuxieme_Phase(Arbre,taille_fenetre,&Debut_liste, &Fin_liste,&Fin_liste_pere,&nb_el_liste, i,1,0); Troisieme_Phase(Arbre,taille_fenetre,&Debut_liste, &Fin_liste,&Fin_liste_pere,&nb_el_liste,fictive); return Arbre; } void CloseTheFirstPhase( Liste **debut_liste, Liste **fin_liste, Noeud **fin_liste_pere) { Liste *tmp = *debut_liste; printf("CLOSE THE FIRST CASE\n"); while(tmp!=NULL) { if ((tmp->feuille) && (tmp->feuille->debut & LEAF_BIT)) setListeValue(Liste_positions_fin,tmp->feuille->fin_deb,global_indice); tmp=tmp->suiv; } } SMILEv1.47/P_BLOCS/src/bit_tab.c0000644002404200237300000001511310066543646015621 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include int NB_SEQUENCE=0; int CHANGE_LIMITE=0; int SIZE_STATIC_BIT_TAB=0; void initBitTab(int nb_seq) { NB_SEQUENCE=nb_seq; SIZE_STATIC_BIT_TAB=nb_seq/8 +1; CHANGE_LIMITE=SIZE_STATIC_BIT_TAB/2 -1; if (CHANGE_LIMITE<=0) CHANGE_LIMITE=1; } Bit_Tab *AllocBitTab(void) { Bit_Tab *tmp = (Bit_Tab *)malloc(2); #if DEBUG_JTREE nb_alloc_tab++; #endif if (tmp==NULL) { fprintf(stderr,"No enougth space... \n Program aborded \n"); exit(-2); } memset(tmp,0,2); tmp[0] = 0x80; /* --> tmp est en dynamic... */ tmp[1] = 0; /* --> pas d'element dans tmp. */ return tmp; } void ReinitBitTab(Bit_Tab **bt) { if ((*bt)[0]&0x80) { *bt = (Bit_Tab *)realloc(*bt,2); (*bt)[0] = 0x80; (*bt)[1] = 0; } else { *bt = (Bit_Tab *)memset(*bt,0,SIZE_STATIC_BIT_TAB); } } void CopyBitTab(Bit_Tab **dest,Bit_Tab *src) { int nb_element; if (src[0] & 0x80) { /* Dynamic : */ nb_element = ((src[0] & 0x7F) << 8) | src[1]; (*dest) = (Bit_Tab *)realloc(*dest,2 + nb_element*sizeof(unsigned short int) ); *dest = (Bit_Tab *)memcpy(*dest,src,2 + nb_element*sizeof(unsigned short int) ); } else { /* Static : */ free(*dest); *dest = AllocBitTabStatic(); /*ReinitBitTab(dest); */ *dest = (Bit_Tab *)memcpy(*dest,src,SIZE_STATIC_BIT_TAB); } } Bit_Tab *AllocBitTabStatic(void) { Bit_Tab *tmp = (Bit_Tab *)malloc(SIZE_STATIC_BIT_TAB); if (tmp==NULL) { fprintf(stderr,"No enougth space... \n Program aborded \n"); exit(-2); } tmp = (Bit_Tab *)memset(tmp,0,SIZE_STATIC_BIT_TAB); return tmp; } void addBitTabValue(Bit_Tab **tab,int value) { if ((*tab)[0] & 0x80) addBitTabValueDynamic(tab,value); else addBitTabValueStatic(tab,value); } void fusionneBitTab(Bit_Tab **tab1,Bit_Tab *tab2) /* 1 <- 1 & 2 */ { if ((*tab1)[0]&0x80) fusionneBitTabDynamic(tab1,tab2); else fusionneBitTabStatic(tab1,tab2); } int nbSequenceInBitTab(Bit_Tab *tab) { if (tab[0] & 0x80) return nbSequenceInBitTabDynamic(tab); return nbSequenceInBitTabStatic(tab); } void printBitTab(Bit_Tab *tab) { int i,value; int nb_element; unsigned char mask = 0; if (tab[0] & 0x80) { printf("d:"); nb_element = nbSequenceInBitTabDynamic(tab); for(i=0;i>=1,value++) if (tab[0] & mask) printf("%d,",value); for(i=1;i>=1,value++) if (tab[i] & mask) printf("%d,",value); } /*---------------------------------------------------------*/ void convertBitTab(Bit_Tab **tab) { Bit_Tab *tmp = AllocBitTabStatic(); int nb_elment = nbSequenceInBitTabDynamic(*tab); int i; for(i=0;i> offset; (*tab)[position] = (*tab)[position] | offset; } void addBitTabValueDynamic(Bit_Tab **tab,int value) { unsigned short int nb_element = 0,i; nb_element = (((*tab)[0] & 0x7F) << 8) | (*tab)[1]; if (nb_element==CHANGE_LIMITE) { convertBitTab(tab); addBitTabValueStatic(tab,value); return; } for(i=0;(i> 8); (*tab)[1] = nb_element; } int nbSequenceInBitTabStatic(Bit_Tab *tab) { int i; unsigned char k; int nb_element=0; for(k=0x40;k!=0;k>>=1) if (tab[0] & k) nb_element++; for(i=1;i>=1) if (tab[i] & k) nb_element++; return nb_element; } int nbSequenceInBitTabDynamic(Bit_Tab *tab) { return ((((tab)[0] & 0x7F) << 8) | (tab)[1]); } void fusionneBitTabStatic(Bit_Tab **tab1,Bit_Tab *tab2) { int i=0; int nb_element; if ((tab2)[0] &0x80) { nb_element = nbSequenceInBitTabDynamic(tab2); for(i=0;i>=1,value++) if (tab2[0] & mask) addBitTabValue(tab1,value); for(i=1;i>=1,value++) if (tab2[i] & mask) addBitTabValue(tab1,value); } SMILEv1.47/P_BLOCS/src/libfasta.c0000644002404200237300000002022210211354641015762 0ustar lamaaoc00000000000000/* * Copyright (c) Atelier de BioInformatique * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307, USA * * For questions, suggestions, bug-reports, enhancement-requests etc. * I may be contacted at: Alain.Viari@inrialpes.fr */ /* #ifdef THINK_C */ #include /* #else */ /* #include */ /* #endif */ #include #include #include #include "Gtypes.h" #include "libsysk.h" #include "libfasta.h" #define CHECK 0 #define DEBUG_FASTA 0 #define READ_NEXT Faux #define PUSH_BACK Vrai #define SERIAL Vrai #define INDEXED Faux #ifdef THINK_C #define LINE_FEED '\r' #else #define LINE_FEED '\n' #endif /* -------------------------------------------- */ /* @static: lecture bufferisee */ /* -------------------------------------------- */ static char * sNextIOBuffer(FILE *streamin, Bool retain, Bool serial) { /* Int32 lenbuf; */ char *buf, *end; static char sBuffer[BUFSIZ]; /* in */ static Bool sRetained = Faux; buf = (((retain || sRetained) && serial) ? sBuffer : fgets(sBuffer, sizeof(sBuffer), streamin)); if (buf) { end = buf + strlen(buf) - 1; if (*end == LINE_FEED) *end = '\000'; } sRetained = retain; return buf; } /* -------------------------------------------- */ /* compte le nombre de caracteres alpha dans */ /* un buffer */ /* -------------------------------------------- */ Int32 CountAlpha(char *buf) { Int32 count; for (count = 0 ; *buf ; buf++) if (isalpha((int)*buf)) count++; return count; } /* -------------------------------------------- */ /* copy only alpha chars from s2 to s1 */ /* -------------------------------------------- */ char * StrcpyAlpha(char *s1, char *s2) { for( ; *s2 ; s2++) if (isalpha((int)*s2)) *s1++ = *s2; *s1 = '\000'; return s1; } /* -------------------------------------------- */ /* skip to next space in buffer */ /* -------------------------------------------- */ char * NextSpace(char *buffer) { for (; *buffer ; buffer++) if (isspace((int)*buffer)) return buffer; return NULL; } /* -------------------------------------------- */ /* returns sequence name (FASTA) */ /* -------------------------------------------- */ char *GetFastaName(char *buffer) { static char name[FASTA_NAMLEN]; buffer[FASTA_NAMLEN] = '\000'; if (sscanf(buffer + 1, "%s", name) != 1) strcpy(name, ""); return name; } /* -------------------------------------------- */ /* returns sequence comment (FASTA) */ /* -------------------------------------------- */ char *GetFastaComment(char *buffer) { char *space; static char comment[FASTA_COMLEN]; buffer[FASTA_COMLEN] = '\000'; space = NextSpace(buffer); strcpy(comment, (space ? space + 1 : "")); return comment; } /* -------------------------------------------- */ /* liberation d'une sequence */ /* -------------------------------------------- */ FastaSequencePtr FreeFastaSequence(FastaSequencePtr seq) { if (seq) { if (seq->seq) FREE(seq->seq); FREE(seq); } return NULL; } /* -------------------------------------------- */ /* allocation d'une sequence */ /* -------------------------------------------- */ FastaSequencePtr NewFastaSequence(void) { FastaSequencePtr seq; if (! (seq = NEW(FastaSequence))) return NULL; seq->length = 0; if (! (seq->seq = NEWN(char, BUFSIZ))) return FreeFastaSequence(seq); seq->bufsize = BUFSIZ; *(seq->name) = '\000'; *(seq->comment) = '\000'; seq->ok = Vrai; return seq; } /* -------------------------------------------- */ /* lecture/redimensionnement d'une sequence au */ /* format Fasta Lecture en serie */ /* returns : Faux -> last sequence */ /* Vrai -> more to read */ /* you must check seq->ok ! */ /* -------------------------------------------- */ Bool ReadFastaSequence(FILE *streamin, FastaSequencePtr seq) { Int32 readlen, buflen; char *buffer, *tbuf; seq->ok = Faux; /* assume error */ buflen = seq->length = 0; seq->offset = ftell(streamin); buffer = sNextIOBuffer(streamin, READ_NEXT, SERIAL); if (! (buffer && (*buffer == '>'))) /* sync error */ return Faux; /* last sequence */ if (seq->offset) seq->offset -= (strlen(buffer) + 1); strcpy(seq->name, GetFastaName(buffer)); strcpy(seq->comment, GetFastaComment(buffer)); while ((buffer = sNextIOBuffer(streamin, READ_NEXT, SERIAL))) { if (*buffer == '>') { (void) sNextIOBuffer(streamin, PUSH_BACK, SERIAL); /* push it back */ break; } #if CHECK readlen = CountAlpha(buffer); #else readlen = strlen(buffer); #endif buflen += readlen; if (buflen >= seq->bufsize) { if (! (tbuf = REALLOC(char, seq->seq, 2 * buflen + 1))) return Vrai; /* but seq->ok is Faux */ seq->seq = tbuf; seq->bufsize = 2 * buflen + 1; } #if CHECK StrcpyAlpha(seq->seq + seq->length, buffer); #else memcpy(seq->seq + seq->length, buffer, readlen); #endif seq->length = buflen; } seq->seq[seq->length] = '\000'; return (seq->ok = Vrai); } /* -------------------------------------------- */ /* lecture/redimensionnement d'une sequence au */ /* format Fasta Lecture indexee */ /* returns : Faux -> last sequence */ /* Vrai -> more to read */ /* you must check seq->ok ! */ /* -------------------------------------------- */ Bool GetFastaSequence(FILE *streamin, FastaSequencePtr seq) { Int32 readlen, buflen; char *buffer, *tbuf; seq->ok = Faux; /* assume error */ buflen = seq->length = 0; fseek(streamin, seq->offset, SEEK_SET); buffer = sNextIOBuffer(streamin, READ_NEXT, INDEXED); if (! (buffer && (*buffer == '>'))) /* sync error */ return Faux; /* last sequence */ if (seq->offset) seq->offset -= (strlen(buffer) + 1); strcpy(seq->name, GetFastaName(buffer)); strcpy(seq->comment, GetFastaComment(buffer)); while ((buffer = sNextIOBuffer(streamin, READ_NEXT, INDEXED))) { if (*buffer == '>') break; #if CHECK readlen = CountAlpha(buffer); #else readlen = strlen(buffer); #endif buflen += readlen; if (buflen >= seq->bufsize) { if (! (tbuf = REALLOC(char, seq->seq, 2 * buflen + 1))) return Vrai; /* but seq->ok is Faux */ seq->seq = tbuf; seq->bufsize = 2 * buflen + 1; } #if CHECK StrcpyAlpha(seq->seq + seq->length, buffer); #else memcpy(seq->seq + seq->length, buffer, readlen); #endif seq->length = buflen; } seq->seq[seq->length] = '\000'; return (seq->ok = Vrai); } /* -------------------------------------------- */ /* ecriture d'une sequence au format Fasta */ /* -------------------------------------------- */ void WriteFastaSequence(FILE *streamou, FastaSequencePtr seq, Int32 char_per_line) { Int32 i, nlines, rest; char *buf, *end, tempo; fputc('>', streamou); fputs((*(seq->name) ? seq->name : "") , streamou); fputc(' ', streamou); fputs((*(seq->comment) ? seq->comment : ""), streamou); fputc(LINE_FEED, streamou); nlines = seq->length / char_per_line; buf = seq->seq; for (i = 0 ; i < nlines ; i++) { end = buf + char_per_line; tempo = *end; *end = '\000'; fputs(buf, streamou); fputc(LINE_FEED , streamou); *end = tempo; buf += char_per_line; } if ((rest = (seq->length % char_per_line))) { end = buf + rest; tempo = *end; *end = '\000'; fputs(buf, streamou); fputc(LINE_FEED , streamou); *end = tempo; } } SMILEv1.47/P_BLOCS/Makefile0000644002404200237300000001175710066542217014725 0ustar lamaaoc00000000000000############################################################################### # OPTIONS DU PROGRAMME ######################################################## #Affichage du nombre d'occurrences pour chaque modele trouve NB_OCCS=1 #Affichage des positions des occurrences de chaque modele trouve AFF_OCCS=0 ############################################################################### #Compilateur : CC=gcc OPT=-Wall -ansi -O3 #OPT=-g #definition des repertoires des arbres: INCL_DIR=include/ OBJ_DIR=obj/ SRC_DIR=src/ LIB_DIR=lib/ BIN_DIR=bin/ # definition de repertoires de spell : SPELL_INCL_DIR=Spell/include/ SPELL_SRC_DIR=Spell/src/ SPELL_OBJ_DIR=Spell/obj/ # Options de compilation : C_FLAG= $(OPT) -I$(INCL_DIR) -I$(SPELL_INCL_DIR) O_FLAG= -c $(OPT) -I$(INCL_DIR) -I$(SPELL_INCL_DIR) -DOCC=$(NB_OCCS) -DAFF_OCC=$(AFF_OCCS) LIB_FLAG= -lm # Nom du programme , de l'archive PROG_NAME=x-smile ARCHIVE_NAME=x-smile.tar.gz #Objet ... OBJ_FILE=$(OBJ_DIR)*.o $(SPELL_OBJ_DIR)*.o TREE_OBJ=$(OBJ_DIR)global_fonctions.o $(OBJ_DIR)construction.o $(OBJ_DIR)liste_pos.o $(OBJ_DIR)allocateurs.o $(OBJ_DIR)bit_tab.o $(OBJ_DIR)libfasta.o $(OBJ_DIR)global_variables.o $(SPELL_OBJ_DIR)global.o $(SPELL_OBJ_DIR)criteres.o #$(OBJ_DIR)sub_suffix_tree.o TREE_OBJ_DEB=$(OBJ_DIR)global_fonctions.o $(OBJ_DIR)construction.o $(OBJ_DIR)liste_pos2.o $(OBJ_DIR)allocateurs.o $(OBJ_DIR)bit_tab2.o $(OBJ_DIR)libfasta.o $(OBJ_DIR)global_variables.o $(SPELL_OBJ_DIR)global.o $(SPELL_OBJ_DIR)criteres.o $(OBJ_DIR)sub_suffix_tree.o SPELL_OBJ=$(SPELL_OBJ_DIR)global.o $(SPELL_OBJ_DIR)pile_occ.o $(SPELL_OBJ_DIR)spell.o $(SPELL_OBJ_DIR)occ.o $(SPELL_OBJ_DIR)criteres.o $(SPELL_OBJ_DIR)barre.o $(SPELL_OBJ_DIR)alphabet.o $(SPELL_OBJ_DIR)model.o all: $(BIN_DIR)$(PROG_NAME) echo ALL!!! debug: $(TREE_OBJ_DEB) $(CC) $(C_FLAG) $(OBJ_FILE) -o $(BIN_DIR)$(PROG_NAME) $(LIB_FLAG) $(BIN_DIR)$(PROG_NAME): $(TREE_OBJ) $(SPELL_OBJ) $(CC) $(C_FLAG) $(OBJ_FILE) -o $(BIN_DIR)$(PROG_NAME) $(LIB_FLAG) $(OBJ_DIR)sub_suffix_tree.o : $(SRC_DIR)sub_suffix_tree.c $(CC) $(SRC_DIR)sub_suffix_tree.c -o $(OBJ_DIR)sub_suffix_tree.o $(O_FLAG) $(OBJ_DIR)global_fonctions.o : $(INCL_DIR)global_fonctions.h $(SRC_DIR)global_fonctions.c $(CC) $(SRC_DIR)global_fonctions.c -o $(OBJ_DIR)global_fonctions.o $(O_FLAG) $(OBJ_DIR)construction.o : $(INCL_DIR)construction.h $(SRC_DIR)construction.c $(CC) $(SRC_DIR)construction.c -o $(OBJ_DIR)construction.o $(O_FLAG) $(OBJ_DIR)liste_pos.o : $(INCL_DIR)liste_pos.h $(SRC_DIR)liste_pos.c $(CC) $(SRC_DIR)liste_pos.c -o $(OBJ_DIR)liste_pos.o $(O_FLAG) $(OBJ_DIR)liste_pos2.o : $(INCL_DIR)liste_pos.h $(SRC_DIR)liste_pos2.c $(CC) $(SRC_DIR)liste_pos2.c -o $(OBJ_DIR)liste_pos2.o $(O_FLAG) $(OBJ_DIR)allocateurs.o : $(INCL_DIR)allocateurs.h $(SRC_DIR)allocateurs.c $(CC) $(SRC_DIR)allocateurs.c -o $(OBJ_DIR)allocateurs.o $(O_FLAG) $(OBJ_DIR)bit_tab.o : $(INCL_DIR)bit_tab.h $(SRC_DIR)bit_tab.c $(CC) $(SRC_DIR)bit_tab.c -o $(OBJ_DIR)bit_tab.o $(O_FLAG) $(OBJ_DIR)bit_tab2.o : $(INCL_DIR)bit_tab.h $(SRC_DIR)bit_tab2.c $(CC) $(SRC_DIR)bit_tab2.c -o $(OBJ_DIR)bit_tab2.o $(O_FLAG) $(OBJ_DIR)libfasta.o : $(INCL_DIR)libfasta.h $(SRC_DIR)libfasta.c $(CC) $(SRC_DIR)libfasta.c -o $(OBJ_DIR)libfasta.o $(O_FLAG) $(OBJ_DIR)global_variables.o : $(SRC_DIR)global_variables.c $(CC) $(SRC_DIR)global_variables.c -o $(OBJ_DIR)global_variables.o $(O_FLAG) #Dependance pour le programme spell : $(SPELL_OBJ_DIR)global.o : $(SPELL_INCL_DIR)global.h $(SPELL_SRC_DIR)global.c $(CC) $(SPELL_SRC_DIR)global.c -o $(SPELL_OBJ_DIR)global.o $(O_FLAG) $(SPELL_OBJ_DIR)criteres.o : $(SPELL_INCL_DIR)criteres.h $(SPELL_SRC_DIR)criteres.c $(SPELL_INCL_DIR)global.h $(CC) $(SPELL_SRC_DIR)criteres.c -o $(SPELL_OBJ_DIR)criteres.o $(O_FLAG) $(SPELL_OBJ_DIR)pile_occ.o : $(SPELL_INCL_DIR)pile_occ.h $(SPELL_SRC_DIR)pile_occ.c $(SPELL_INCL_DIR)global.h $(CC) $(SPELL_SRC_DIR)pile_occ.c -o $(SPELL_OBJ_DIR)pile_occ.o $(O_FLAG) $(SPELL_OBJ_DIR)spell.o : $(SPELL_INCL_DIR)spell.h $(SPELL_SRC_DIR)spell.c $(SPELL_INCL_DIR)global.h $(CC) $(SPELL_SRC_DIR)spell.c -o $(SPELL_OBJ_DIR)spell.o $(O_FLAG) $(SPELL_OBJ_DIR)occ.o : $(SPELL_INCL_DIR)occ.h $(SPELL_SRC_DIR)occ.c $(CC) $(SPELL_SRC_DIR)occ.c -o $(SPELL_OBJ_DIR)occ.o $(O_FLAG) $(SPELL_OBJ_DIR)barre.o : $(SPELL_INCL_DIR)barre.h $(SPELL_SRC_DIR)barre.c $(CC) $(SPELL_SRC_DIR)barre.c -o $(SPELL_OBJ_DIR)barre.o $(O_FLAG) $(SPELL_OBJ_DIR)alphabet.o : $(SPELL_INCL_DIR)alphabet.h $(SPELL_SRC_DIR)alphabet.c $(CC) $(SPELL_SRC_DIR)alphabet.c -o $(SPELL_OBJ_DIR)alphabet.o $(O_FLAG) $(SPELL_OBJ_DIR)model.o : $(SPELL_INCL_DIR)model.h $(SPELL_SRC_DIR)model.c $(CC) $(SPELL_SRC_DIR)model.c -o $(SPELL_OBJ_DIR)model.o $(O_FLAG) clean: clean_emacs clean_obj echo CLEAN OK!!! clean_emacs: find . -name "*~" -exec rm -f {} \; clean_obj: rm -f $(OBJ_DIR)*o $(SPELL_OBJ_DIR)*o rm -f $(BIN_DIR)$(PROG_NAME) clean_arch: rm ./$(ARCHIVE_NAME) tgz: clean tar -zcvf $(ARCHIVE_NAME) ./* depend: makedepend -o.o $(SPELL_INCL_DIR)*.h $(INCL_DIR)*.h $(SPELL_SRC_DIR)*.c $(SRC_DIR)*.c -I$(INCL_DIR) -I$(SPELL_INCL_DIR) # DO NOT DELETE SMILEv1.47/P_BLOCS/Spell/0000755002404200237300000000000010066542217014331 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS/Spell/obj/0000755002404200237300000000000010114605452015076 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS/Spell/src/0000755002404200237300000000000010066543505015121 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS/Spell/src/occ.c0000644002404200237300000000311110066543471016027 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include void initOcc(P_occ o) { o->x = NULL; o->num = -1; o->lon = -1; o->xerr = -1; o->blocerr = -1; o->saut = 0; o->codesaut = 0; } SMILEv1.47/P_BLOCS/Spell/src/criteres.c0000644002404200237300000001666010066543455017122 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include /* From alphabet.c */ extern int nbSymbMod; int **code2Sauts; /******************************************************************************/ /* FONCTIONS PRIVEES */ /******************************************************************************/ int recFillTab(int bloc, P_Criteres cr, int *nbcodes, int **code2Sauts); /******************************************************************************/ /* setCompoPal */ /******************************************************************************/ void setCompoPal(P_Criteres cr, char **argv, int argc) { int i,j,bloc,nbsymb; if( (cr->compo = malloc(nbSymbMod*sizeof(LongSeq))) == NULL) fatalError("initCriteres: cannot allocate 'cr->compo'\n"); for(i=0;ibloc;i++) { cr->compobloc[i] = (LongSeq *) malloc(nbSymbMod*sizeof(LongSeq)); if(!(cr->compobloc[i])) fatalError("setCompoPal: cannot allocate 'cr->compobloc[i]'\n"); } /* Initialisations */ cr->flag_compo = FAUX; for (i = 0; i != nbSymbMod; i++) cr->compo[i] = -1; for (i = 0; i != cr->bloc; i++) { cr->flag_compobloc[i] = FAUX; for (j = 0; j != nbSymbMod; j++) cr->compobloc[i][j] = -1; } while(argc>0 && (**argv!='p')) { bloc = atoi(*argv); argc--; argv++; nbsymb = atoi(*argv); argc--; argv++; for(i=0; i!=nbsymb; i++) { j = str2nummod(*argv); argc--; argv++; if( j == -1) { fprintf(stderr, "> Warning: composition in '%s' ignored, symbol is not in the models alphabet.\n", *(argv-1)); argc--; argv++; continue; } if(bloc == 0) { cr->flag_compo = VRAI; cr->compo[j] = atoi(*argv); /* printf("compo glob %s %d\n",*(argv-1),cr->compo[j]); */ } else { cr->flag_compobloc[bloc-1]= VRAI; cr->compobloc[bloc-1][j] = atoi(*argv); /* printf("compo bloc %s %d\n",*(argv-1),cr->compobloc[j]); */ } argc--; argv++; } } /* S'il n'y a pas de palindromes */ if(argc==0) return; cr->flag_palindrom = VRAI; while(argc>0) { sscanf(*argv, "p%d/%d",&i, &j); cr->palindrom[i-1] = j-1; argc--; argv++; } } /******************************************************************************/ /* addSaut2Code */ /******************************************************************************/ int addSaut2Code(int oldcode, LongSeq saut, LongSeq curbloc, P_Criteres cr) { #if DEBUG_SAUT printf("addSaut2Code: Je recois %d",oldcode); #endif if (curbloc == 0) { #if DEBUG_SAUT printf(" et renvoie %d\n",saut - cr->saut[0].min); #endif return (saut - cr->saut[0].min); } oldcode *= cr->saut[curbloc].max - cr->saut[curbloc].min + 1; oldcode += saut - cr->saut[curbloc].min; #if DEBUG_SAUT printf(" et renvoie %d\n",oldcode); #endif return oldcode; } /******************************************************************************/ /* initTabSauts */ /******************************************************************************/ void initTabSauts(P_Criteres cr) { int bloc = cr->bloc, **i,j, nbcodes; nbcodes = 1; for(j=0; j != bloc-1; j++) nbcodes *= cr->saut[j].max - cr->saut[j].min +1; if ( (code2Sauts = (int **) malloc(nbcodes * sizeof(int *)) ) == NULL) fatalError("criteres.c: initTabSauts: cannot allocate 'code2Sauts'\n"); for(j=0, i=code2Sauts; j != nbcodes; j++,i++) if ( (*i = (int *) malloc((bloc-1) * sizeof(int)) ) == NULL ) fatalError("criteres.c: initTabSauts: cannot allocate 'code2Sauts[j]'\n"); recFillTab(0, cr, &nbcodes, code2Sauts); } /******************************************************************************/ /* recFillTab */ /******************************************************************************/ int recFillTab(int bloc, P_Criteres cr, int *nbcodes, int **code2Sauts) { int i,j,k,pos=0,a; if(bloc != cr->bloc-2) a = recFillTab(bloc+1, cr, nbcodes, code2Sauts); else a = 1; *nbcodes /= cr->saut[bloc].max - cr->saut[bloc].min +1; for(i=0; i!=*nbcodes; i++) for(j=cr->saut[bloc].min; j!=cr->saut[bloc].max+1; j++) for(k=0; k!=a; k++) { code2Sauts[pos][bloc] = j; pos++; } printf("\n"); return( a*(cr->saut[bloc].max - cr->saut[bloc].min +1)); } /******************************************************************************/ /* allocBloc */ /******************************************************************************/ void allocBloc(P_Criteres cr, int bloc) { int i; cr->maxerrblocs = (LongSeq *) malloc(bloc*sizeof(LongSeq)); cr->longbloc = (Fourchette *) malloc(bloc*sizeof(Fourchette)); cr->saut = (Fourchette *) malloc(bloc*sizeof(Fourchette)); cr->flag_compobloc = (Flag *) malloc(bloc*sizeof(Flag)); cr->compobloc = (LongSeq **) malloc(bloc*sizeof(LongSeq *)); cr->palindrom = (LongSeq *) malloc(bloc*sizeof(LongSeq)); if(!cr->maxerrblocs || !cr->longbloc || !cr->saut || !cr->flag_compobloc || !cr->compobloc || !cr->palindrom) fatalError("criteres.h: allocBloc: allocation error\n"); /* Initialisations */ for (i = 0; i != bloc; i++) { cr->saut[i].min = cr->saut[i].max = 0; cr->maxerrblocs[i] = -1; cr->longbloc[i].min = cr->longbloc[i].max = -1; cr->palindrom[i] = -1; } cr->flag_palindrom = FAUX; } SMILEv1.47/P_BLOCS/Spell/src/spell.c0000644002404200237300000013620510066543505016413 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include /******************************************************************************/ /* PROTOTYPES PRIVES */ /******************************************************************************/ /* Gestion des modeles acceptes */ void keepModel(P_mod , P_PileOcc, NbSeq, NbSeq, LongSeq, LongSeq *longbloc, P_Criteres cr); #if DEBUG_TREE /* compte des feuilles suivant qu'on est sur un noeud ou non */ int compteFeuilles(P_occ); #endif /* essaie d'avancer d'une lettre dans un arc, et renvoie le noeud image */ Flag avanceBranche(P_occ, P_occ, int, int, Flag, P_Criteres, LongSeq, Flag); /* Lancement du saut */ NbSeq gestionSaut(P_mod model, P_PileOcc pocc, P_PileOcc poccnew, P_Criteres, LongSeq curbloc); /* explore les modeles */ Flag spellModels ( P_PileOcc pocc, P_PileOcc poccnew, P_PileOcc poccsaut, LongSeq longmod, LongSeq longcurbloc, LongSeq curbloc, Flag multiblocs, P_mod model, P_occ next, Bit_Tab **colors_model, NbSeq nbseq, NbSeq tmp_quorum, P_Criteres cr, LongSeq *longbloc, LongSeq *posdebbloc); /* Calcule le BT union de tous les BT occurrences */ NbSeq sommeBTOcc(P_PileOcc, Bit_Tab **); /* Compute CPU time */ static float PrintCpuTime(char); /******************************************************************************/ /* VARIABLES GLOBALES */ /******************************************************************************/ int nbmod = 0; LongSeq maxlongmod=0, *maxlongbloc=NULL; signed char ** text=NULL; FILE * f=NULL; /* EXTERNES from alphabet.c */ extern int nbSymbMod; extern int nbSymbSeq; extern char *nummod2str[127]; extern int carseq2num[127]; extern int comp[127]; extern Flag TabSymb[127][127]; extern int numJOKER; extern int numSAUT; /******************************************************************************/ /******************************************************************************/ /************************ FONCTIONS DE BASE ***********************************/ /******************************************************************************/ /******************************************************************************/ #if DEBUG_TREE /******************************************************************************/ /* compteFeuilles */ /******************************************************************************/ /* Compte des feuilles suivant qu'on est sur un noeud ou une branche */ /* Comme le champ nb_feuille contient en fait nb feuille+position, s'il est */ /* negatif, c'est une feuille (nb_feuille=1) et sa valeur absolue est la pos. */ /* Sinon, ce n'est pas une feuille, en on a pas la pos (inutile) */ /******************************************************************************/ int compteFeuilles(P_occ p) { int val; if (p->lon == 0) val = p->x->nb_feuille; else val = (p->x->Trans)[p->num]->nb_feuille; if (val <= 0) return(1); return(val); } #endif /******************************************************************************/ /******************************************************************************/ /*********************** GESTION DES LISTES D'OCCURRENCES *********************/ /*********************************ET DES MODELES*******************************/ /******************************************************************************/ /******************************************************************************/ /* KeepModel */ /******************************************************************************/ /* Affiche (ou stocke si necessaire) les modeles trouves */ /******************************************************************************/ void keepModel(P_mod model, P_PileOcc pocc, NbSeq nbseq, NbSeq quorum, LongSeq l, LongSeq *longbloc, P_Criteres cr) { int i,j; LongSeq *lb, *mb; nbmod++; #if DEBUG_BASE printf("MODELE %s valide!\n",model->name); #endif j = model->lon; for(i=0; i!=j; i++) fprintf(f,"%s", nummod2str[model->name[i]]); fprintf(f," "); for(i=0; i!=j; i++) { if(model->name[i]==numJOKER) fprintf(f,"%c",JOKERinterne); else if(model->name[i]==numSAUT) fprintf(f,"%c",SAUTinterne); else fprintf(f,"%c", model->name[i]+SHIFTALPHA); } fprintf(f," %d", quorum); if(l > maxlongmod) maxlongmod = l; if(cr->bloc != 1) for(i=0, lb=longbloc, mb=maxlongbloc; i!=cr->bloc; i++, lb++, mb++) if(*lb > *mb) *mb = *lb; #if OCC #if AFF_OCC fprintf(f,"\n"); #else fprintf(f,"\t"); #endif afficheLastOcc(f, pocc, l, cr); #else fprintf(f,"\n"); #endif } /******************************************************************************/ /******************************************************************************/ /************************* RECHERCHE DES MODELES ******************************/ /******************************************************************************/ /******************************************************************************/ /******************************************************************************/ /* avanceBranche */ /******************************************************************************/ /* Essaie d'avancer d'une lettre dans un arc. */ /* Renvoie 1 si reussi, 0 sinon. */ /* La variable 'flag' indique si on est sur un noeud(1) ou une branche(0) */ /******************************************************************************/ Flag avanceBranche( P_occ next, P_occ tmp, int symbol, int trans, Flag flag_noeud, P_Criteres cr, LongSeq curbloc, Flag multiblocs) { /* Dans cette fonction, le code est duplique dans un souci de rapidite: */ /* j'essaie de faire un max de tests eliminatoires avant affectations */ /* Si la branche courante n'est pas epuisee... */ if (flag_noeud == FAUX) { if ( equiv(symbol, trans) ) { next->xerr = tmp->xerr; next->blocerr = tmp->blocerr; } else { next->xerr = tmp->xerr+1; if (next->xerr == cr->maxerr+1) /* si maxerr global atteint */ return 0; if(multiblocs == VRAI) { next->blocerr = tmp->blocerr+1; /* si maxerr local atteint */ if (next->blocerr == cr->maxerrblocs[curbloc]+1) return 0; } } next->x = tmp->x; next->num = tmp->num; next->lon = tmp->lon+1; } else /* Si la branche courante est epuisee, on est sur une nouvelle branche */ { next->x = tmp->x->fils[tmp->num]; if (next->x->fils[trans] == NULL) return(0); if ( equiv(symbol, trans) ) { next->xerr = tmp->xerr; next->blocerr = tmp->blocerr; } else { next->xerr = tmp->xerr+1; if (next->xerr == cr->maxerr+1) /* si maxerr global atteint */ return 0; if(multiblocs == VRAI) { next->blocerr = tmp->blocerr+1; /* si maxerr local atteint */ if (next->blocerr == cr->maxerrblocs[curbloc]+1) return 0; } } next->num = trans; next->lon = 1; } if(multiblocs == VRAI) { next->saut= tmp->saut; next->codesaut= tmp->codesaut; } return(1); } /******************************************************************************/ /* sommeBTOcc */ /******************************************************************************/ /* Fait l'union des sequences d'une liste d'occurrence et renvoie le nombre */ /* de ces sequences. */ /******************************************************************************/ NbSeq sommeBTOcc(P_PileOcc p, Bit_Tab ** bt) { LongSeq pos, precdummy; P_occ po; ReinitBitTab(bt); pos = p->pos-1; if (pos < 0) fatalError("spell.c: sommeBTOcc: wrong position in stack!\n"); precdummy=getPrecDummy(p); po = p->occ+pos; while ((pos != precdummy) && (po->x != NULL)) { #if DEBUG_BT printf("Fusion avec : "); #endif if (po->x->fils[po->num]->debut & LEAF_BIT) { fusionneBitTab(bt,((Feuille *)po->x->fils[po->num])->sequences); #if DEBUG_BT printBitTab(((Feuille *)po->x->fils[po->num])->sequences); #endif } else { fusionneBitTab(bt,po->x->fils[po->num]->sequences); #if DEBUG_BT printBitTab(po->x->fils[po->num]->sequences); #endif } po--; pos--; } #if DEBUG_BT printf("Somme BT : \n") ; printBitTab(*bt); printf(" -> %d values\n", nbSequenceInBitTab(*bt)); #endif return nbSequenceInBitTab(*bt); } /******************************************************************************/ /* sauteSymbole */ /******************************************************************************/ int sauteSymbole(Occ curocc, P_mod model, P_PileOcc pocc, P_Criteres cr, LongSeq curbloc, LongSeq longsaut) { LongSeq lmaxbr; Noeud *tmpnoeud; Occ tmpocc; int res = 0, trans; char carseq; tmpnoeud = curocc.x->fils[curocc.num]; if (tmpnoeud->debut & LEAF_BIT) lmaxbr = getValue(Liste_positions_fin,((Feuille *)tmpnoeud)->fin_deb) - (((Feuille *)tmpnoeud)->debut & LEAF_BIT_INV); else lmaxbr = tmpnoeud->fin - tmpnoeud->debut; #if DEBUG_SAUT printf("SauteSymbole: j'ai gere le saut pour %d, noeud %d, etat: %d/%d branche %d\n",longsaut,curocc.x,curocc.lon,lmaxbr,curocc.num); #endif ajouteOcc2Pile(pocc, curocc.x, curocc.num, curocc.lon, curocc.xerr, 0, curocc.saut+longsaut, addSaut2Code(curocc.codesaut, longsaut, curbloc, cr)); res++; longsaut++; if (curocc.lon != lmaxbr) /* on est au milieu d'une branche */ { curocc.lon++; carseq = text[tmpnoeud->sequence_number] [(tmpnoeud->debut & LEAF_BIT_INV)+curocc.lon-1]; if(carseq==FINAL) /* si on rencontre un $ c'est fini */ return res; if(longsaut<=cr->saut[curbloc].max) res += sauteSymbole(curocc, model, pocc, cr, curbloc, longsaut); return res; } else /* sinon on est a un noeud, plusieurs trans sont possibles */ { if(longsaut<=cr->saut[curbloc].max) { tmpocc.x = tmpnoeud; tmpocc.lon = 1; tmpocc.xerr = curocc.xerr; tmpocc.codesaut = curocc.codesaut; tmpocc.saut = curocc.saut; if ((tmpnoeud->debut & LEAF_BIT) == 0) for (trans = 0; trans != nbSymbSeq; trans++) { if (tmpnoeud->fils[trans] != NULL) { tmpocc.num = trans; res += sauteSymbole(tmpocc, model, pocc, cr, curbloc, longsaut); } } } return res; } } /******************************************************************************/ /* sauteBranche */ /******************************************************************************/ int sauteBranche(Occ curocc, P_mod model, P_PileOcc pocc, P_Criteres cr, LongSeq curbloc, LongSeq longsaut) { LongSeq lmaxbr; Noeud * tmpnoeud, *newtmpnoeud; Occ tmpocc; int res = 0, newlongsaut, trans; char carseq; tmpnoeud = curocc.x->fils[curocc.num]; if (tmpnoeud->debut & LEAF_BIT) lmaxbr = getValue(Liste_positions_fin,((Feuille *)tmpnoeud)->fin_deb) - (((Feuille *)tmpnoeud)->debut & LEAF_BIT_INV); else lmaxbr = tmpnoeud->fin - tmpnoeud->debut; #if DEBUG_SAUT printf("SauteBranche: j'ai gere le saut pour %d, noeud %d, etat: %d/%d branche %d\n",longsaut,curocc.x,curocc.lon,lmaxbr,curocc.num); #endif if (curocc.lon != lmaxbr) /* on est au milieu d'une branche */ { if ( lmaxbr-curocc.lon <= cr->saut[curbloc].min-longsaut ) { longsaut+=lmaxbr-curocc.lon; curocc.lon=lmaxbr; #if DEBUG_SAUT printf("SauteBranche: milieuBr, fast, je vais au bout %d/%d br %d et lgsaut %d\n",curocc.lon,lmaxbr,curocc.num,longsaut); #endif carseq = text[tmpnoeud->sequence_number] [(tmpnoeud->debut & LEAF_BIT_INV)+lmaxbr-1]; if(carseq != FINAL) /* si on rencontre un $ c'est fini */ { #if DEBUG_SAUT printf("SauteBranche: finBr=$, c'est fini\n"); #endif res += sauteBranche(curocc, model, pocc, cr, curbloc, longsaut); } } else { curocc.lon+=cr->saut[curbloc].min-longsaut; longsaut=cr->saut[curbloc].min; #if DEBUG_SAUT printf("SauteBranche: milieuBr, minsaut ds Br, je m'arrete a %d/%d num %d et lgsaut %d\n",curocc.lon,lmaxbr,curocc.num,longsaut); #endif res += sauteSymbole(curocc, model, pocc, cr, curbloc, longsaut); } } else /* sinon on est a un noeud, plusieurs trans sont possibles */ { tmpocc.x = tmpnoeud; tmpocc.xerr = curocc.xerr; tmpocc.codesaut = curocc.codesaut; tmpocc.saut = curocc.saut; if ((tmpnoeud->debut & LEAF_BIT) == 0) for (trans = 0; trans != nbSymbSeq; trans++) { tmpocc.num = trans; newlongsaut = longsaut; if (tmpnoeud->fils[trans] != NULL) { newtmpnoeud = tmpnoeud->fils[trans]; if (newtmpnoeud->debut & LEAF_BIT) lmaxbr = getValue(Liste_positions_fin, ((Feuille *)newtmpnoeud)->fin_deb) - (newtmpnoeud->debut & LEAF_BIT_INV); else lmaxbr = newtmpnoeud->fin - newtmpnoeud->debut; if ( lmaxbr <= cr->saut[curbloc].min-longsaut ) { newlongsaut+=lmaxbr; tmpocc.lon=lmaxbr; #if DEBUG_SAUT printf("SauteBranche: noeud, fast, %d/%d, br %d, lgsaut %d\n",tmpocc.lon,lmaxbr,tmpocc.num,newlongsaut); #endif carseq = text[newtmpnoeud->sequence_number] [(newtmpnoeud->debut & LEAF_BIT_INV)+lmaxbr-1]; if(carseq!=FINAL) /* si on rencontre un $ c'est fini */ { #if DEBUG_SAUT printf("SauteBranche: finBr=$, c'est fini\n"); #endif res += sauteBranche(tmpocc, model, pocc, cr, curbloc, newlongsaut); } } else { tmpocc.lon=cr->saut[curbloc].min-newlongsaut; newlongsaut=cr->saut[curbloc].min; #if DEBUG_SAUT printf("SauteBranche2: noeud %d, minsaut ds Br, %d/%d, br %d, lgsaut %d\n",tmpocc.x, tmpocc.lon,lmaxbr,tmpocc.num,newlongsaut); #endif res += sauteSymbole(tmpocc, model, pocc, cr, curbloc, newlongsaut); } } } } return res; } /******************************************************************************/ /* gestionSaut */ /******************************************************************************/ NbSeq gestionSaut(P_mod model, P_PileOcc pocc, P_PileOcc poccnew, P_Criteres cr, NbSeq curbloc) { LongSeq pos, precdummy; P_occ tmpocc; int res = 0; pos = poccnew->pos-1; tmpocc = poccnew->occ+pos; precdummy=getPrecDummy(poccnew); ajouteDummy(pocc); while ((pos != precdummy) && (tmpocc->x != NULL) ) { if (cr->saut[curbloc].min == 0) { res+=sauteSymbole(*tmpocc, model, pocc, cr, curbloc, 0); } else { res += sauteBranche(*tmpocc, model, pocc, cr, curbloc, 0); } pos--; tmpocc = poccnew->occ+pos; } if(res==0) depileRec(pocc); return (res); } /******************************************************************************/ /* spellModels */ /******************************************************************************/ /* Explore les modeles recursivement. */ /******************************************************************************/ Flag spellModels ( P_PileOcc pocc, P_PileOcc poccnew, P_PileOcc poccsaut, LongSeq longmod, LongSeq longcurbloc, LongSeq curbloc, Flag multiblocs, P_mod model, P_occ next, Bit_Tab **colors_model, NbSeq nbseq, NbSeq tmp_quorum, P_Criteres cr, LongSeq *longbloc, LongSeq *posdebbloc) { Flag zarb_back = 0, zarb_ext = 0; char carseq; LongSeq lmaxbr, pos, precdummy, palbloc; long int maxseq; P_occ tmpocc; int tmpint, nbnewmod = 0, nbocc, symbol, trans; NbSeq tmp_quorum2; if(longmod==3) barre(0); /* CONDITION D'EXTENSION */ if ( ( (cr->longueur.max == 0) || (longmod < cr->longueur.max) ) && ( (multiblocs == FAUX) || ( (cr->longbloc[curbloc].max == 0) || (longcurbloc < cr->longbloc[curbloc].max) ) ) && ( (cr->flag_palindrom == FAUX) || cr->palindrom[curbloc] == -1 || longcurbloc != longbloc[(int)(cr->palindrom[curbloc])] ) ) { #if DEBUG_BASE printf("J'etends...\n"); #endif /* Boucle sur les symboles de l'alphabet pourl'extension du modele ************/ for (symbol = 0; symbol != nbSymbMod; symbol++) { #if DEBUG_BASE AfficheModel(model); printf("Vers %d\n", symbol); #endif /* Pas de JOKER en premiere position */ if(longmod == 0 && symbol == numJOKER) continue; /* Gestion de la composition des modeles **************************************/ if (cr->flag_compo == VRAI || cr->flag_compobloc[curbloc] == VRAI) { if ( (cr->compobloc[curbloc][symbol] == 0) || (cr->compo[symbol] == 0) ) continue; } /* Gestion des palindromes */ if (cr->flag_palindrom) { if (longcurbloc == 0) posdebbloc[curbloc] = model->lon; if (cr->palindrom[curbloc] != -1) { palbloc = cr->palindrom[curbloc]; if (symbol!= comp[model->name[posdebbloc[palbloc]+longbloc[palbloc]-1-longcurbloc]]) continue; } } /* Init variables de pile d'occs */ pos = pocc->pos-1; tmpocc = pocc->occ+pos; precdummy=getPrecDummy(pocc); videPile(poccnew); maxseq = 0; nbocc = 0; #if DEBUG_BASE printf("J'ENTRE (l=%d symbol=%d model=%s quorum=%d)\n",longmod,symbol, model->name,tmp_quorum); #endif /* fprintf(stderr,"pos %d pd %d\n",pos, precdummy); */ while ((pos != precdummy) && (tmpocc->x != NULL)) { lmaxbr = ((tmpocc->x->fils)[tmpocc->num]->debut & LEAF_BIT)? getValue(Liste_positions_fin, ((Feuille *)tmpocc->x->fils[tmpocc->num])->fin_deb) - (tmpocc->x->fils[tmpocc->num]->debut & LEAF_BIT_INV) : tmpocc->x->fils[tmpocc->num]->fin - tmpocc->x->fils[tmpocc->num]->debut; #if DEBUG_BASE if(longmod!=0) { printf("Je traite l'occ:%p num %d lon %d saut %d codesaut %d (longmod=%d)\n", tmpocc->x,tmpocc->num,tmpocc->lon,tmpocc->saut, tmpocc->codesaut, longmod); afficheOcc(stdout, tmpocc, longmod,0); printf("...et je trouve:\n"); } #endif /* on est au milieu d'une branche - une transition possible */ if (tmpocc->lon != lmaxbr) { carseq = text[tmpocc->x->fils[tmpocc->num]->sequence_number] [ (tmpocc->x->fils[tmpocc->num]->debut & LEAF_BIT_INV) + tmpocc->lon]; if ( (carseq != FINAL) && (avanceBranche(next, tmpocc, symbol, carseq2num[(int) carseq], 0, cr, curbloc, multiblocs) ) ) { ajouteOcc2Pile(poccnew, next->x, next->num, next->lon, next->xerr,next->blocerr, next->saut, next->codesaut); #if DEBUG_BASE printf("occ:%p num %d lon %d saut %d codesaut %d (longmod=%d)\n", next->x,next->num,next->lon,next->saut, next->codesaut, longmod); afficheOcc(stdout, next, longmod+1,0); #endif nbocc++; if (next->x->fils[next->num]->debut & LEAF_BIT) { maxseq += nbSequenceInBitTab( ((Feuille *)next->x->fils[next->num])->sequences); #if DEBUG_BT printf("nb seq in bt (br): %d \n", nbSequenceInBitTab(((Feuille *) next->x->fils[next->num])->sequences)); #endif } else { maxseq += next->x->fils[next->num]->nb_element_bt; #if DEBUG_BT printf("nb seq in bt (br): %d \n", nbSequenceInBitTab( next->x->fils[next->num]->sequences)); #endif } } } /* sinon on est a un noeud, plusieurs trans sont eventuellement possibles */ else { for (trans = 0; trans != nbSymbSeq; trans++) { tmpocc=pocc->occ+pos; if (avanceBranche(next, tmpocc, symbol, trans, 1, cr, curbloc, multiblocs)) { ajouteOcc2Pile(poccnew, next->x, next->num, next->lon, next->xerr, next->blocerr, next->saut, next->codesaut); #if DEBUG_BASE printf("occ:%p num %d lon %d saut %d codesaut %d (longmod=%d)\n", next->x,next->num,next->lon,next->saut, next->codesaut, longmod); afficheOcc(stdout, next, longmod+1, 0); #endif nbocc++; if (next->x->fils[next->num]->debut & LEAF_BIT) { maxseq += nbSequenceInBitTab(((Feuille *) next->x->fils[next->num])->sequences); #if DEBUG_BT printf("nb seq in bt (nd): %d \n", nbSequenceInBitTab(((Feuille *) next->x->fils[next->num])->sequences)); #endif } else { maxseq += next->x->fils[next->num]->nb_element_bt; #if DEBUG_BT printf("nb seq in bt (nd): %d \n", nbSequenceInBitTab( next->x->fils[next->num]->sequences)); #endif } } } } /* Si on n'a plus d'occurrences dans la pile */ if(pos == 0) { #if DEBUG_BASE printf("break avec %d occ\n",nbocc); #endif break; } pos--; tmpocc=pocc->occ+pos; #if DEBUG_PILE printf("pos pile %d (adresse %p), len mod %d, nbocc %d\n",pos, tmpocc,longmod,nbocc); printf("x %p\n",tmpocc->x); #endif /* if(pos==precdummy) */ /* fprintf(stderr,"pos %d et fin\n",pos); */ /* else */ /* fprintf(stderr,"pos %d x %x\n",pos,(pocc->occ+pos)->x); */ } #if DEBUG_BASE printf("J'ai trouve %d occ\n",nbocc); afficheOldOcc(poccnew, longmod+1); #endif if (nbocc == 0) continue; /***************/ /* CAS DU SAUT */ /***************/ tmp_quorum2 = -1; if ( multiblocs && (curbloc != cr->bloc-1) && (longcurbloc+1 >= cr->longbloc[curbloc].min ) && (maxseq >= cr->quorum) && ( (tmp_quorum2 = sommeBTOcc(poccnew, colors_model) ) >= cr->quorum) && ( gestionSaut(model, pocc, poccnew, cr, curbloc)) != 0 ) { changeModel(model, symbol); changeModel(model, numSAUT); ajouteDummy(poccsaut); tmpint = copieLastOcc(poccsaut,poccnew); #if DEBUG_SAUT printf("J'ai copie %d occ de Pnew->Psaut\n",tmpint); afficheOldOcc(poccnew,longmod+1); #endif videPile(poccnew); zarb_ext = 1; if ( cr->flag_compo == VRAI || cr->flag_compobloc[curbloc] == VRAI ) { cr->compo[symbol]--; cr->compobloc[curbloc][symbol]--; } if(multiblocs) longbloc[curbloc] = longcurbloc+1; zarb_back += spellModels(pocc, poccnew,poccsaut, longmod+1, 0, curbloc+1, multiblocs, model, next, colors_model, nbseq, tmp_quorum2, cr, longbloc, posdebbloc); if ( cr->flag_compo == VRAI || cr->flag_compobloc[curbloc] == VRAI ) { cr->compo[symbol]++; cr->compobloc[curbloc][symbol]++; } decrModel(model); /* vire la premiere lettre du nouveau bloc */ decrModel(model); /* vire le symbole de saut */ videPile(poccnew); tmpint = copieLastOcc(poccnew,poccsaut); #if DEBUG_SAUT printf("J'ai copie %d occ de Psaut->Pnew\n",tmpint); afficheOldOcc(poccnew,longmod+1); #endif depileRec(poccsaut); depileRec(pocc); } #if DEBUG_BASE printf("nbocc = %d\n",nbocc); if (nbocc<=0) printf("Sortie nbocc\n"); else if (maxseq < cr->quorum) printf("Sortie maxseq %ld\n",maxseq); else printf("Calcul de quorum (maxseq = %ld) tmp_quorum2=%d\n", maxseq,tmp_quorum2); #endif if ( (maxseq >= cr->quorum) && ( tmp_quorum2!=-1 ? tmp_quorum2 >= cr->quorum: (tmp_quorum2 = sommeBTOcc(poccnew, colors_model) ) >= cr->quorum)) { #if DEBUG_BASE printf("Accepte (res quorum=%d)\n", tmp_quorum2); #endif if(symbol == numJOKER) zarb_ext = 1; else nbnewmod++; changeModel(model,symbol); ajouteDummy(pocc); transferePile2Pile(pocc, poccnew); if ( cr->flag_compo == VRAI || cr->flag_compobloc[curbloc] == VRAI ) { cr->compo[symbol]--; cr->compobloc[curbloc][symbol]--; } zarb_back += spellModels(pocc, poccnew,poccsaut, longmod+1, longcurbloc+1, curbloc, multiblocs, model, next, colors_model, nbseq, tmp_quorum2, cr, longbloc, posdebbloc); if ( cr->flag_compo == VRAI || cr->flag_compobloc[curbloc] == VRAI ) { cr->compo[symbol]++; cr->compobloc[curbloc][symbol]++; } depileRec(pocc); /* on decremente la longueur du modele de 1 */ decrModel(model); } #if DEBUG_BASE else printf("Refuse curquorum=%d quorum=%d\n",tmp_quorum2,cr->quorum); #endif } /* Si: il n'y pas eu d'extension REGULIERE, la longueur courante est valide, */ /* ET [il n'y a pas eu d'extension bizarre (joker, saut) OU ces extensions */ /* ont pose un probleme (modele se terminant par jokers)] */ /* printf("Avant test: %s\n",model->name); */ /* printf("nbnewmod: %d longmod: %d longcurbloc: %d zarb %d %d\n",nbnewmod,longmod,longcurbloc,zarb_back,zarb_ext); */ /* printf("%d %d %d %d %d %d %d\n",curbloc == cr->bloc-1,nbnewmod == 0,longmod >= cr->longueur.min,multiblocs == FAUX,longcurbloc >= cr->longbloc[curbloc].min,zarb_back!=0,zarb_ext==0); */ if ( (curbloc == cr->bloc-1) /*&& (nbnewmod == 0)*/ && (longmod >= cr->longueur.min) && ( (multiblocs == FAUX) || (longcurbloc >= cr->longbloc[curbloc].min ) ) && ( cr->flag_palindrom == FAUX || cr->palindrom[curbloc] == -1 || longcurbloc == longbloc[(int)(cr->palindrom[curbloc])] ) && ( (zarb_back!=0) || (zarb_ext==0) ) ) { /* A VIRER? ce test est il inutile? */ if ( (model->name[model->lon-1] != numJOKER) && (model->name[model->lon-1] != numSAUT) ) { if(multiblocs) longbloc[curbloc] = longcurbloc; keepModel(model, pocc, nbseq, tmp_quorum, longmod, longbloc, cr); return(0); } else return(1); } } else if ( (curbloc == cr->bloc-1)) /* && ( (cr->longueur.max != 0) && (longmod <= cr->longueur.max) ) */ /* && ((multiblocs == FAUX) || ((cr->longbloc[curbloc].max != 0) */ /* && (longcurbloc <= cr->longbloc[curbloc].max) ) ) ) */ { /* A VIRER? ce test est il inutile? */ if ( (model->name[model->lon-1] != numJOKER) && (model->name[model->lon-1] != numSAUT)) { if(multiblocs) longbloc[curbloc] = longcurbloc; keepModel(model, pocc, nbseq, tmp_quorum, longmod, longbloc, cr); return(0); } else return(1); } return(0); } /******************************************************************************/ /* doSpell */ /******************************************************************************/ /* Lance la recursion sur les modeles. */ /******************************************************************************/ void doSpell(P_Criteres cr, NbSeq nbseq, Noeud *root) { /* bloc est un indicateur du bloc en cours de construction */ P_mod model; P_PileOcc pocc, poccnew, poccsaut=NULL; P_occ next; Bit_Tab *colors_model; Flag multiblocs; Noeud *root_pere; LongSeq *longbloc=NULL; LongSeq *posdebbloc=NULL; /* Creation du faux pere de la racine (pour faciliter la recursion) */ root_pere = Alloc_Noeud(); root_pere->fils[Translation_Table[FINAL]] = root; root_pere->sequence_number = 0; root->debut = 0; root->fin = 1; root->sequence_number = 0; if(cr->bloc == 1) multiblocs = FAUX; else multiblocs = VRAI; /* Allocation du modele */ model = allocModel(); /* Allocation de l'occurrence courante */ next = (P_occ) calloc (1,sizeof(Occ)); if (next == NULL) fatalError("doSpell: cannot allocate 'next'\n"); initOcc(next); /* Allocation du tableau de bits courant */ colors_model = AllocBitTab(); ReinitBitTab(&colors_model); /* Allocation des piles d'occurrences */ if( multiblocs == VRAI ) { poccsaut = creePileOcc(); longbloc = (LongSeq *) malloc(cr->bloc * sizeof(LongSeq)); if (longbloc == NULL) fatalError("doSpell: cannot allocate 'longbloc'\n"); if ( cr->flag_palindrom ) { posdebbloc = (LongSeq *) malloc(cr->bloc * sizeof(LongSeq)); if (posdebbloc == NULL) fatalError("doSpell: cannot allocate 'posdebbloc'\n"); } } poccnew = creePileOcc(); pocc = creePileOcc(); /* Ajout de l'occurrence nulle dans la pile d'occurrence */ ajouteInitOcc2Pile(pocc, root_pere); fprintf(stderr,"** Models extraction **\n"); barre((int)pow((double)nbSymbMod, 3.0)); /* ...et lancement de la recursion */ spellModels(pocc, poccnew, poccsaut, 0, 0, 0, multiblocs, model, next, &colors_model, nbseq, 0, cr, longbloc, posdebbloc); /* Liberation des structures */ free(next); free(model->name); free(model); free(colors_model); liberePileOcc(pocc); liberePileOcc(poccnew); if(multiblocs == VRAI) liberePileOcc(poccsaut); } /******************************************************************************/ /******************************************************************************/ /********************************** MAIN **************************************/ /******************************************************************************/ /******************************************************************************/ int main(int argc, char **argv) { FastaSequence **seq; Flag readok; char infini = 0, buf[100]; NbSeq nbtxt; float quorum = 0.0, user_time; int i, j, posarg, taille, siztxt; long int nbsymb; LongSeq maxlongsaut = 0; Noeud *arbre_suffixe; Criteres criteres; Symbole *alphaseq; posarg = 4; /* QUORUM */ quorum = atof(argv[posarg++]); /* BLOCS */ criteres.bloc = atoi(argv[posarg++]); allocBloc(&criteres, criteres.bloc); /* LONGUEUR MIN */ criteres.longueur.min = (LongSeq)atoi(argv[posarg++]); /* LONGUEUR MAX */ criteres.longueur.max = (LongSeq)atoi(argv[posarg++]); if ( criteres.longueur.max == 0 ) infini = 1; /* ERREURS GLOBALES */ criteres.maxerr = (LongSeq)atoi(argv[posarg++]); /* PARAMETRES BLOCS ***********************************************************/ if(criteres.bloc > 1) { for(i = 0; i != criteres.bloc; i++ ) { /* LONGUEUR MIN BLOC */ criteres.longbloc[i].min = (LongSeq)atoi(argv[posarg++]); /* LONGUEUR MAX BLOC */ criteres.longbloc[i].max = (LongSeq)atoi(argv[posarg++]); if ( criteres.longbloc[i].max == 0 ) infini = 1; maxlongmod += criteres.longbloc[i].max; /* ERREURS BLOC */ criteres.maxerrblocs[i] = atoi(argv[posarg++]); if(i != criteres.bloc-1 ) { /* SAUT MIN BLOC */ criteres.saut[i].min = (LongSeq)atoi(argv[posarg++]); /* SAUT MAX BLOC */ criteres.saut[i].max = (LongSeq)atoi(argv[posarg++]); maxlongsaut += criteres.saut[i].max; } } } if ( infini == 0 ) { if (maxlongmod < criteres.longueur.max) maxlongmod = criteres.longueur.max; maxlongmod += maxlongsaut; } else maxlongmod = INT_MAX; /******************************************************************************/ /* TRAITEMENT DES SEQUENCES */ /******************************************************************************/ /* Allocations */ seq = (FastaSequence **) malloc(GRAINSEQ * sizeof(FastaSequence *)); text = (signed char **) malloc(GRAINSEQ * sizeof(signed char *)); if(!seq || !text) fatalError("main: seq/text: cannot allocate\n"); siztxt = GRAINSEQ; /* Ouverture du fichier contenant les sequences */ f = fopen (argv[2],"r"); if(f==NULL) { fprintf(stderr,"Error: main: cannot open FASTA file '%s'\n",argv[2]); exit(1); } readok = 1; nbtxt = 0; nbsymb = 0; /* Stockage des sequences en memoire */ do { if(nbtxt == siztxt) { siztxt *= 2; seq = (FastaSequence **) realloc(seq,siztxt * sizeof(FastaSequence *)); text = (signed char **) realloc(text, siztxt * sizeof(signed char *)); if(!seq || !text) fatalError("main: seq/text: cannot reallocate\n"); } seq[nbtxt] = NewFastaSequence(); readok = ReadFastaSequence(f, seq[nbtxt]); if (readok) { nbsymb += seq[nbtxt]->length; taille = seq[nbtxt]->length+1; text[nbtxt] = (signed char *) malloc ((taille+2) * sizeof(signed char)); if (text[nbtxt] == NULL) fatalError("main: cannot allocate 'text'\n"); /* printf("Seq %d\n", nbtxt); */ /* printf("%d symboles lus\n",taille-1); */ /* for(i=0; i!=taille; i++) */ /* printf("%c.",seq[nbtxt]->seq[i]); */ /* printf("\n"); */ strcpy((char *) text[nbtxt],seq[nbtxt]->seq); text[nbtxt][taille-1] = FINAL; text[nbtxt][taille] = '\0'; nbtxt++; } } while (readok); fclose(f); if (nbtxt == 0) fatalError("No sequence in FASTA file!\n"); if (nbtxt == 1) fatalError("One sequence only in FASTA file!\n"); criteres.nbsymb = nbsymb; if (quorum == 0.0) criteres.quorum = (NbSeq) ceil( (double) (70*nbtxt)/100.0); else criteres.quorum = (NbSeq) ceil( (double) (quorum*nbtxt)/100.0); if(criteres.quorum==1) warning("quorum value is 1 sequence!"); else if(criteres.quorum<1) fatalError("main: quorum value is lower than 1 sequence!"); /******************************************************************************/ /* Chargement alphabet sequences et modeles */ if(!(f=fopen(argv[1],"r"))) { fprintf(stderr,"Error: main: cannot open alphabet file '%s'\n",argv[1]); exit(1); } initAlphabet(); if(!(alphaseq = chargeAlphabet(f, (Symbole **) text, nbtxt))) fatalError("main: incorrect alphabet file format\n"); fclose(f); /******************************************************************************/ /* COMPOSITION (traitee apres car besoin alphabet modeles) */ /* S'il reste des arguments, c'est la composition et/ou les palindromes */ setCompoPal(&criteres, argv+posarg, argc-posarg); /* Transformation de l'alphabet (ex: AG => R) */ /* + création des complémentarités pour palindromes */ transAlphMod(criteres.flag_palindrom); /******************************************************************************/ /* Construction de l'arbre compact generalise */ fprintf(stderr, "** Suffix tree construction **\n"); barre(nbtxt); Init_All(alphaseq, 0, nbtxt); arbre_suffixe = Construction_Arbre((unsigned char *)text[0], maxlongmod); barre(0); for (i = 1; i != nbtxt; i++) { arbre_suffixe=AjouteSequence(arbre_suffixe,(unsigned char *)text[i], maxlongmod); barre(0); } fprintf(stderr,"\n"); /******************************************************************************/ /* Liberation de la structure Fasta */ for(i=0;i != nbtxt;i++) FreeFastaSequence(seq[i]); free(seq); #if DEBUG_TREE fprintf(stderr,"\nNB Feuilles: %d\n\n",calculFeuilles(arbre_suffixe)); #endif UpdateBit_TabForAllTree(arbre_suffixe); /* if (flag_tree == VRAI) */ /* Print_Tree(arbre_suffixe,1,0); */ strcpy(buf,argv[3]); /* if(!strcmp(buf+strlen(buf)-4,".out")) */ /* buf[strlen(buf)-4]='\0'; */ /************************ enumeration des resultats ***************************/ printf("Extraction is going to be made with the following parameters:\n"); printf("FASTA file: %s\n",argv[2]); printf("Alphabet file: %s\n",argv[1]); printf("Output file: %s\n",argv[3]); printf("Total min length: %d\n",criteres.longueur.min); if (criteres.longueur.max == 0) printf("Total max length: MAX\n"); else printf("Total max length: %d\n",criteres.longueur.max); printf("Boxes: %d\n",criteres.bloc); printf("Total number of subst.: %d\n",criteres.maxerr); printf("Quorum: %f%% (%d sequences in %d)\n\n", quorum,criteres.quorum,nbtxt); if (criteres.flag_compo) { for (i = 0; i != nbSymbMod; i++) { if (criteres.compo[i] != -1) printf("Total max composition in %s: %d\n",nummod2str[i], criteres.compo[i]); } } if (criteres.bloc > 1) { for (i = 0; i != criteres.bloc; i++) { printf("\nBOX %d\n",i+1); printf("Min length: %d\n",criteres.longbloc[i].min); if (criteres.longbloc[i].max == 0) printf("Max length: MAX\n"); else printf("Max length: %d\n", criteres.longbloc[i].max); printf("Max number of subst.: %d\n", criteres.maxerrblocs[i]); if (i != criteres.bloc-1) { printf("Min spacer length: %d\n",criteres.saut[i].min); printf("Max spacer length: %d\n",criteres.saut[i].max); /* sprintf(buf2,"[%d-%d]",criteres.saut[i].min,criteres.saut[i].max); */ /* strcat(buf,buf2); */ } if (criteres.flag_compobloc[i]) { for (j = 0; j != nbSymbMod; j++) if (criteres.compobloc[i][j] != -1) printf("Max composition in %s: %d\n", nummod2str[j],criteres.compobloc[i][j]); } if (criteres.palindrom[i]!=-1) printf("Palindrom of box: %d\n", criteres.palindrom[i]+1); } } fprintf(stderr,"\n ------ CHECK THESE PARAMETERS! ------\n"); if(criteres.bloc > 1) initTabSauts(&criteres); if (!strcmp(argv[2],"stdout")) f = stdout; else { /* strcat(buf,".out"); */ if(!(f = fopen(buf,"w"))) { fprintf(stderr,"Error: main: cannot open output file '%s'\n",buf); exit(1); } } /* Remise a zero de maxlongmod pour recalcul de vraie longueur max */ /* et allocation du tableau des longueurs max des blocs */ maxlongmod = 0; if(!(maxlongbloc = (LongSeq *) calloc(criteres.bloc, sizeof(LongSeq)))) fatalError("main: cannot allocate 'maxlongbloc'\n"); /* Insertion de l'espace necessaire en tete de fichier pour y mettre la */ /* ligne d'informations apres extraction */ /* J'ecris en tout 80 * 3 = 240 espaces et 80 '=' => 320 caracteres */ for(i=0; i!=3; i++) { fprintf(f," "); fprintf(f," \n"); } fprintf(f,"========================================"); fprintf(f,"=======================================\n"); /******************************************************************************/ /******************************************************************************/ /* Fonction principale */ PrintCpuTime(1); doSpell(&criteres,nbtxt,arbre_suffixe); user_time=PrintCpuTime(0); /******************************************************************************/ /******************************************************************************/ for(i=0; i!=nbtxt; i++) free(text[i]); free(text); /* Affichage et insertion du nb de modeles trouves en fin de fichier */ printf("\nNb models: %d\nUser time : %.2f sec.\n", nbmod, user_time); fprintf(f,"Nb models: %d\nUser time : %.2f sec.\n", nbmod, user_time); /******************************************************************************/ /* Insertion de la ligne de parametres en tete du fichier */ rewind(f); fprintf(f,"%%%%%% %d %d/%d %ld %d %d %d",criteres.bloc,criteres.quorum,nbtxt, criteres.nbsymb, criteres.longueur.min, maxlongmod, criteres.maxerr); /* Ecriture des dimensions des blocs trouves */ if(criteres.bloc > 1) { for(j=0; j!=criteres.bloc; j++) { fprintf(f," %d %d %d",criteres.longbloc[j].min, maxlongbloc[j], criteres.maxerrblocs[j]); if(j!=criteres.bloc-1) fprintf(f," %d %d", criteres.saut[j].min, criteres.saut[j].max); } } /* Ecriture du nom du fichier alphabet utilise et de l'alphabet des sequences */ fprintf(f," %s %s", argv[1], alphaseq); /******************************************************************************/ /* Liberation de l'arbre */ Free_Arbre(arbre_suffixe); return(0); } /******************************************************************************/ /* PrintCpuTime */ /******************************************************************************/ static float PrintCpuTime(char flag) { float ust; struct tms tms; static float dust; times(&tms); ust = (float) tms.tms_utime; if (flag) { dust = ust; return 0.0; } else { ust -= dust; return ust / sysconf(_SC_CLK_TCK); } } SMILEv1.47/P_BLOCS/Spell/src/global.c0000644002404200237300000000507110066543461016531 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include #include /******************************************************************************/ /* fatalError */ /******************************************************************************/ /* Gestion des erreurs FATALES! */ /******************************************************************************/ void fatalError(char *msg) { fprintf(stderr,">> Error: %s\n",msg); exit(1); } /******************************************************************************/ /* warning */ /******************************************************************************/ void warning(char *msg) { fprintf(stderr,"> Warning: %s\n",msg); } /******************************************************************************/ /* entree */ /******************************************************************************/ void entree(void) { printf("\n-- Type ENTER\n"); fflush(stdin); getchar(); } SMILEv1.47/P_BLOCS/Spell/src/model.c0000644002404200237300000000721010066543465016372 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include /******************************************************************************/ /* AfficheModel */ /******************************************************************************/ void AfficheModel(P_mod m) { int i; for(i=0; ilon; i++) printf("%d",m->name[i]); printf("\n"); } /******************************************************************************/ /* allocModel */ /******************************************************************************/ /* Alloue la structure d'un modele */ /******************************************************************************/ P_mod allocModel(void) { P_mod model; if ( !(model = (P_mod) malloc (sizeof(Mod)) ) ) fatalError("allocModel: cannot allocate 'model'\n"); if ( !(model->name = (int *) calloc (GRAIN_SIZMOD, sizeof(int)) ) ) fatalError("allocModel: cannot allocate 'model->name'\n"); model->taille = GRAIN_SIZMOD; model->lon = 0; return model; } /******************************************************************************/ /* changeModel */ /******************************************************************************/ /* Ajoute un symbole au modele */ /******************************************************************************/ void changeModel(P_mod mod, int symbol) { if (mod->lon+2 >= mod->taille) { mod->taille += GRAIN_SIZMOD; #if DEBUG_BASE printf("J'etends model a %d\n",mod->taille); #endif mod->name = (int *) realloc(mod->name, mod->taille * sizeof(int)); if (!mod->name) fatalError("changeModel: cannot reallocate 'model->name'\n"); } (mod->name)[mod->lon] = symbol; (mod->lon)++; } /******************************************************************************/ /* decrModel */ /******************************************************************************/ void decrModel(P_mod model) { if(model->lon <= 0) return; model->lon--; } SMILEv1.47/P_BLOCS/Spell/src/alphabet.c0000644002404200237300000003566410066543445017066 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include void chercheComp(char *s1, char *s2); /******************************************************************************/ /* VARIABLES GLOBALES */ /******************************************************************************/ Flag TabSymb[MAXSYMBMOD][MAXSYMBMOD]; /* Table d'equivalences */ char *alphMod[MAXSYMBMOD]; /* Alphabet original des modeles */ Symbole alphSeq[MAXSYMBMOD]={0}; /* Alphabet des sequences */ int carseq2num[MAXSYMBMOD]; /* Conversion caractere => indice ds alphabet*/ char *nummod2str[MAXSYMBMOD]; /* Conversion num modeles => symboles */ int comp[MAXSYMBMOD]; /* Conversion complément pour palindrome */ int nbSymbMod = 0, /* Nb de symboles de l'alphabet de modeles*/ nbSymbSeq = 0, /* idem sequences */ numSAUT = -1, /* code utilise pour SAUT ds nummod2str */ numJOKER = -1; /* code utilise pour JOKER ds nummod2str */ enum {DNA, PROTEINS, UNKNOWN} type; /* Type de la sequence lue */ /******************************************************************************/ /* initAlphabet: */ /* initialise les variables globales de la classe. */ /******************************************************************************/ void initAlphabet(void) { int i, j; for(i=0; i!=MAXSYMBMOD; i++) { carseq2num[i] = -1; for(j=0; j!=MAXSYMBMOD; j++) TabSymb[i][j] = 0; } alphSeq[0] = '\0'; } /******************************************************************************/ /* chargeAlphabet: */ /* lecture du fichier alphabet et construction de la matrice d'equivalence. */ /* PREND le fichier alphabet, le texte a traiter. */ /* RENVOIE un ptr sur l'alphabet des sequences a traiter. */ /******************************************************************************/ Symbole * chargeAlphabet(FILE *f, Symbole **seq, NbSeq nbseq) { int i, k; Symbole *j; char tmp[MAXSYMBMOD]={0}, s, line[512]; /* PARCOURS DU TEXTE pour recherche de l'alphabet utilise. */ for(i=0; i!=nbseq; i++) { for(j=seq[i]; (*j)!=FINAL; j++) { s = *j; if(s<32 || s>=MAXSYMBMOD) { fprintf(stderr, ">> Error: Seq %d Pos %d Control character %d ('%c')\n", i, (int)(j-seq[i]), s, s); exit(1); } else if(s == JOKER || s == SAUT) { fprintf(stderr, ">> Error: Seq %d Pos %d Forbidden character %d ('%c') in the sequences\n", i, (int)(j-seq[i]), s, s); exit(1); } else if(!isalnum((int)s)) fprintf(stderr, "> Warning: Seq %d Pos %d Non alphanumeric character '%c'\n", i, (int)(j-seq[i]), s); else if(islower((int) s)) /* Mise en majuscules de la sequence */ { s = (Symbole) toupper((int) s); *j = s; } tmp[(int) s] = 1; } } /* Construction de la chaine a fournir pour construire l'arbre */ for(i=32, nbSymbSeq = 0; i!=MAXSYMBMOD; i++) { if(tmp[i]==1) { alphSeq[nbSymbSeq] = i; carseq2num[i] = nbSymbSeq; nbSymbSeq++; } } carseq2num[(int) FINAL] = nbSymbSeq; alphSeq[nbSymbSeq] = FINAL; alphSeq[nbSymbSeq+1] = '\0'; printf("\n** Text alphabet: %s (%d symbols + terminator) **\n", alphSeq, nbSymbSeq); /* LECTURE DU FICHIER ALPHABET */ fgets(line, 512, f); /* Determination du type d'alphabet */ if(strstr(line, "Nucleotide")) { type = DNA; fprintf(stderr, "** Models alphabet: Nucleotides **\n"); } else if(strstr(line, "Protein")) { type = PROTEINS; fprintf(stderr, "** Models alphabet: Amino-acids **\n"); } else { type = UNKNOWN; fprintf(stderr, "** Models alphabet: unknown **\n"); } /* Lecture des lignes de l'alphabet des modeles */ nbSymbMod = 0; while(fgets(line, 512, f)) { j = (Symbole *) line; if(*j == '\n') continue; alphMod[nbSymbMod] = (char *) malloc((strlen(line)+1)*sizeof(char)); if(alphMod[nbSymbMod]==NULL) fatalError("alphabet.c: chargeAlphabet: cannot allocate 'alphMod[i]'\n"); strcpy(alphMod[nbSymbMod], line); while(*j != '\0' && *j != '\n') { s = *j; if(s<=32 || s>=MAXSYMBMOD) { fprintf(stderr, ">> Error: Control character '%c' in the alphabet file\n", s); exit(1); } else if(s == JOKER) { if(j!=(Symbole *)line || (*(j+1)!='\0' && *(j+1)!='\n')) { fprintf(stderr, ">> Error: JOKER character '%c' not alone\n", JOKER); exit(1); } if(numJOKER != -1) fatalError("JOKER defined 2 times in the alphabet file\n"); numJOKER = nbSymbMod; for(k=0; k!=nbSymbSeq; k++) TabSymb[numJOKER][k] = 1; } else if(s == FINAL || s == SAUT) { fprintf(stderr, ">> Error: Forbidden character %d ('%c') in the alphabet file\n", s, s); exit(1); } else if(!isalnum((int)s)) { fprintf(stderr, "Warning: Non alphanumeric character '%c' in the alphabet file\n", s); TabSymb[nbSymbMod][carseq2num[(int)s]] = 1; } else { if(islower((int) s)) *j = s = (Symbole) toupper((int) s); TabSymb[nbSymbMod][carseq2num[(int)s]] = 1; /* printf("Je fais matcher Mod %s %d et symbole %c %d\n",line, nbSymbMod,s,s); */ } j++; } if(*j == '\n') *j = '\0'; if(!(nummod2str[nbSymbMod] = (char *) malloc((strlen(line)+4)*sizeof(char)))) fatalError("chargeAlphabet: cannot allocate 'nummod2str[i]'\n"); strcpy(nummod2str[nbSymbMod], line); nbSymbMod++; } fprintf(stderr, "Models alphabet's size: %d\n",nbSymbMod); /* Ajout des symboles speciaux dans nummod2str */ numSAUT = nbSymbMod; if(!(nummod2str[numSAUT] = (char *) malloc(2*sizeof(char)))) fatalError("chargeAlphabet: cannot allocate 'nummod2str[i]'\n"); nummod2str[nbSymbMod][0] = SAUT; nummod2str[nbSymbMod][1] = '\0'; /* Affiche l'alphabet des modeles */ /* for(i=0; i!=nbSymbMod; i++) */ /* { */ /* printf("SymbMod %d\t%s\n",i,nummod2str[i]); */ /* } */ /* Info sur l'alphabet du texte */ for(i=0; i!=nbSymbSeq; i++) { s = 0; for(k=0; k!=nbSymbMod; k++) s |= (char) TabSymb[k][i]; if(!s) fprintf(stderr,"> Warning: text symbol '%c' is not recognized by any models symbol.\n", alphSeq[i]); } return alphSeq; } /******************************************************************************/ /* estSymbMod */ /******************************************************************************/ int str2nummod(char *str) { int i; for(i=0; i!=nbSymbMod; i++) if(!strcmp(nummod2str[i], str)) return i; return -1; } /******************************************************************************/ /* strshfl - teste si deux chaines sont le shuffling l'une de l'autre */ /******************************************************************************/ Flag strshfl(char * a, char * b) { char * p; if(strlen(a) != strlen(b)) return FAUX; p = a; while(*p!='\0') { if(!strchr(b, *p)) return FAUX; p++; } return VRAI; } /******************************************************************************/ /* transAlphMod */ /******************************************************************************/ void transAlphMod(Flag pal) { int i,j; char tmp[512]; for(i=0; i!=nbSymbMod; i++) { if(type == DNA) { if(strshfl(nummod2str[i],"AR") || strshfl(nummod2str[i],"AN") || strshfl(nummod2str[i],"ARN")) { fprintf(stderr, "Symbol %s ->> A\n", nummod2str[i]); sprintf(nummod2str[i],"A"); } else if( strshfl(nummod2str[i],"CY") || strshfl(nummod2str[i],"CN") || strshfl(nummod2str[i],"CYN")) { fprintf(stderr, "Symbol %s ->> C\n",nummod2str[i]); sprintf(nummod2str[i],"C"); } else if( strshfl(nummod2str[i],"GR") || strshfl(nummod2str[i],"GN") || strshfl(nummod2str[i],"GRN")) { fprintf(stderr, "Symbol %s ->> G\n", nummod2str[i]); sprintf(nummod2str[i],"G"); } else if( strshfl(nummod2str[i],"TY") || strshfl(nummod2str[i],"TN") || strshfl(nummod2str[i],"TYN")) { fprintf(stderr, "Symbol %s ->> T\n", nummod2str[i]); sprintf(nummod2str[i],"T"); } else if(strshfl(nummod2str[i],"AG") || strshfl(nummod2str[i],"AGR") || strshfl(nummod2str[i],"AGN") || strshfl(nummod2str[i],"AGRN")) { fprintf(stderr, "Symbol %s ->> R\n",nummod2str[i]); sprintf(nummod2str[i],"R"); } else if (strshfl(nummod2str[i],"CT") || strshfl(nummod2str[i],"CTY") || strshfl(nummod2str[i],"CTN") || strshfl(nummod2str[i],"CTYN")) { fprintf(stderr, "Symbol %s ->> Y\n", nummod2str[i]); sprintf(nummod2str[i],"Y"); } else if(nummod2str[i][1]=='\0') { if(nummod2str[i][0]==JOKER) { fprintf(stderr, "Symbol '%c' ->> N\n", JOKER); sprintf(nummod2str[i],"N"); } } else { strcpy(tmp, nummod2str[i]); sprintf(nummod2str[i],"[%s]",tmp); } } else if(type == PROTEINS) { if(nummod2str[i][1]=='\0') { if(nummod2str[i][0]==JOKER) { fprintf(stderr, "Symbol '%c' ->> X\n", JOKER); sprintf(nummod2str[i],"X"); } } else { strcpy(tmp, nummod2str[i]); sprintf(nummod2str[i],"[%s]",tmp); } } else { if(nummod2str[i][1]=='\0') { if(nummod2str[i][0]==JOKER) { strcpy(tmp, nummod2str[i]); sprintf(nummod2str[i],"[%s]",tmp); } } } /* Verification de collision de symboles */ for(j=0; j!=i; j++) if(!strcmp(nummod2str[j],nummod2str[i])) { fprintf(stderr,">> Error: possible confusion between symbols %s and %s of the alphabet.\nModify the alphabet to avoid conflict.\n", alphMod[i],alphMod[j]); exit(1); } } /* Positionnement des complémentaires */ if (pal) { if(type != DNA) { fprintf(stderr,">> Error: palindroms can only be used with a nucleotide alphabet\n"); exit(1); } for(i=0; i!=nbSymbMod; i++) comp[i] = -1; chercheComp("A", "T"); chercheComp("C", "G"); chercheComp("R", "Y"); /* ...and so on? */ /* Complementaire du joker */ if(numJOKER!=-1) comp[numJOKER] = numJOKER; /* Verification que tous les symboles de l'alphabet ont bien un palindrome */ for(i=0; i!=nbSymbMod; i++) { if(comp[i] == -1) { fprintf(stderr,">> Error: some symbols of the models alphabet misses their complemtentary symbol, cannot use the palindromic option\n"); exit(1); } } } } void chercheComp(char *s1, char *s2) { int i,j; /* Recherche de s1 et s2 dans alph modeles */ for(i=0; i!=nbSymbMod; i++) if (!strcmp(nummod2str[i],s1)) break; for(j=0; j!=nbSymbMod; j++) if (!strcmp(nummod2str[j],s2)) break; /* s1 et s2 sont presents dans l'alphabet => ils sont complementaires */ if(i!=nbSymbMod && j!=nbSymbMod) { comp[i] = j; comp[j] = i; } /* Si un seul d'entre eux est present => probleme */ else if((i==nbSymbMod && j!=nbSymbMod) || (i!=nbSymbMod && j==nbSymbMod)) { fprintf(stderr,">> Error: the models alphabet misses some symbols to be used with the palindromic option ('%s' and '%s' must appear together)\n", s1,s2); exit(1); } /* Et si aucun... pas probleme */ } SMILEv1.47/P_BLOCS/Spell/src/barre.c0000644002404200237300000000645410066543451016371 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include "barre.h" /* Fonctions privees */ void repChar(char, int); /******************************************************************************/ /* repChar */ /******************************************************************************/ void repChar(char c, int nb) { int i; for(i=0;i!=nb;i++) fprintf(stderr,"%c",c); } /******************************************************************************/ /* barre */ /******************************************************************************/ void barre(int n) { static int pos=0; static int max=0; static int lastecrit=0; static time_t start; int nbpts; int sec, mn, hr; if(n!=0) { start = time(NULL); max = n; lastecrit = 0; pos = 1; fprintf(stderr," 0%%"); #if MAXCOL != 0 fprintf(stderr," ["); repChar(' ',MAXCOL); fprintf(stderr,"]"); #endif return; } if(pos>max) return; if(pos==max) { fprintf(stderr,"\r100%%"); #if MAXCOL != 0 fprintf(stderr," ["); repChar(CHARBARRE,MAXCOL); fprintf(stderr,"]"); #endif fprintf(stderr," 00:00:00\n"); pos++; return; } sec = (int) difftime( time(NULL), start); sec *= ((float) max-pos)/pos; hr = sec / 3600; mn = (sec - hr * 3600) / 60; sec = sec - hr * 3600 - mn *60; #if MAXCOL != 0 nbpts = pos*MAXCOL/max; if(nbpts!=lastecrit) { #endif fprintf(stderr,"\r%3d%%",pos*100/max); #if MAXCOL != 0 fprintf(stderr," ["); repChar(CHARBARRE,nbpts); repChar(' ',MAXCOL-nbpts); fprintf(stderr,"]"); #endif if(pos>3) fprintf(stderr, " %02d:%02d:%02d", hr,mn,sec); #if MAXCOL != 0 lastecrit = nbpts; } #endif pos++; } SMILEv1.47/P_BLOCS/Spell/src/pile_occ.c0000644002404200237300000003106210066543475017052 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include /* FONCTIONS PRIVEES */ #if OCC int recAfficheOcc(FILE *f,Noeud * n, LongSeq l, P_Criteres cr, int codesaut); #endif extern char **text; extern int nbSymbSeq; extern int carseq2num[127]; /******************************************************************************/ /* creePileOcc */ /******************************************************************************/ P_PileOcc creePileOcc(void) { P_PileOcc p; p=(P_PileOcc)malloc(sizeof(PileOcc)); if(p==NULL) fatalError("creePileOcc: cannot allocate 'p'\n"); p->occ=(P_occ)calloc(GRAIN, sizeof(Occ)); if(p->occ==NULL) fatalError("creePileOcc: cannot allocate 'p->occ'\n"); p->carte=(unsigned int *)malloc(GRAIN_SIZMOD*sizeof(unsigned int)); if(p->carte==NULL) fatalError("creePileOcc: cannot allocate 'p->carte'\n"); p->size=GRAIN; p->size_carte=GRAIN_SIZMOD; p->pos_carte=0; p->pos=0; ajouteDummy(p); return(p); } /* MODE D'EMPLOI DES DUMMYS EN MULTIBLOC DELTA */ /* Si le parametre principal vaut VRAI c'est un dummy de separation entre * recursions. */ /* Sinon c'est un dummy de separation entre sauts. */ /* Dans les deux cas le dernier parametre indique le code d'intervalle * concerne par les occurrences qui suivent. */ /******************************************************************************/ /* ajouteDummy */ /******************************************************************************/ void ajouteDummy(P_PileOcc p) { #if DEBUG_PILE unsigned int * t=p->carte; #endif if(p->pos_carte>=p->size_carte) { p->size_carte+=GRAIN_SIZMOD; #if DEBUG_PILE printf("J'etends carte a %d\n",p->size_carte); #endif p->carte=(unsigned int *)realloc(p->carte,p->size_carte *sizeof(unsigned int)); if(p->carte==NULL) fatalError("pile_occ.c: ajouteDummy: cannot reallocate 'p->carte'"); #if DEBUG_PILE if(t!=p->carte) printf("CHANGEMENT D'emplacement memoire de la carte\n"); #endif } p->carte[p->pos_carte]=p->pos; p->pos_carte++; ajouteOcc2Pile(p, NULL, -1, -1, -1, -1, -1, -1); } /******************************************************************************/ /* getPrecDummy */ /******************************************************************************/ LongSeq getPrecDummy(P_PileOcc p) { /* if(p->pos_carte==0) POSSIBLE DANGER*/ return(p->carte[p->pos_carte-1]); } /******************************************************************************/ /* ajouteInitOcc2Pile */ /******************************************************************************/ void ajouteInitOcc2Pile(P_PileOcc p, Noeud *x) { P_occ ptr; ptr=p->occ+p->pos; ptr->x=x; ptr->num=carseq2num[(int) FINAL]; /* Symbole quelconque, non lu */ ptr->lon=1; ptr->xerr=0; ptr->blocerr=0; ptr->saut=0; ptr->codesaut=0; p->pos++; } /******************************************************************************/ /* ajouteOcc2Pile */ /******************************************************************************/ void ajouteOcc2Pile(P_PileOcc p, Noeud *x, int num, LongSeq lon, LongSeq err, LongSeq blocerr, LongSeq saut, int codesaut) { P_occ ptr; #if DEBUG_PILE printf("j'ecris une nouvelle occurrence en %d\n", p->pos); #endif if(p->pos>=p->size) { #if DEBUG_PILE printf("JE RESIZE (ajoute)\n"); #endif p->size+=GRAIN; ptr = p->occ; p->occ=(P_occ)realloc(p->occ, (p->size)*sizeof(Occ)); if(p->occ==NULL) fatalError("ajouteOcc2Pile: cannot reallocate 'p->occ'\n"); #if DEBUG_PILE if(p->occ!=ptr) printf("changement d'emplacement memoire de pileocc\n"); #endif } ptr = p->occ+p->pos; ptr->x = x; ptr->num = num; ptr->lon = lon; ptr->xerr = err; ptr->blocerr = blocerr; ptr->saut = saut; ptr->codesaut = codesaut; p->pos++; } /******************************************************************************/ /* copieLastOcc */ /******************************************************************************/ int copieLastOcc(P_PileOcc dest, P_PileOcc source) { int last_dummy, nbocc; if(source->pos==0) return 0; if(source->pos_carte==0) last_dummy=-1; else last_dummy=source->carte[source->pos_carte-1]; nbocc = source->pos-last_dummy; if(nbocc==1) return 1; if(((dest->pos)+(nbocc-1))>=(dest->size)) { #if DEBUG_PILE printf("JE RESIZE (copie) posdest %d sizedest %d possource %d\n", dest->pos, dest->size, source->pos); #endif dest->size=(int)ceil(((double)(dest->pos+nbocc-1))/((double)GRAIN))*GRAIN; #if DEBUG_PILE printf("New size %d\n",dest->size); #endif dest->occ=(P_occ)realloc(dest->occ, dest->size*sizeof(Occ)); if(dest->occ==NULL) fatalError("ajouteOcc2Pile: cannot reallocate 'dest->occ'\n"); } memcpy(dest->occ+dest->pos, source->occ+last_dummy+1, (nbocc-1)*sizeof(Occ)); dest->pos+=nbocc-1; return nbocc-1; } /******************************************************************************/ /* transferePile2Pile */ /******************************************************************************/ void transferePile2Pile(P_PileOcc dest, P_PileOcc source) { #if DEBUG_PILE P_occ t=dest->occ; #endif if(source->pos==0) return; if(((dest->pos)+(source->pos))>=(dest->size)) { #if DEBUG_PILE printf("JE RESIZE (transfere) posdest %d sizedest %d possource %d\n", dest->pos, dest->size, source->pos); #endif dest->size=(int)ceil(((double)(dest->pos+source->pos))/((double)GRAIN))*GRAIN; #if DEBUG_PILE printf("New size %d\n",dest->size); #endif dest->occ=(P_occ)realloc(dest->occ, dest->size*sizeof(Occ)); if(dest->occ==NULL) fatalError("ajouteOcc2Pile: cannot reallocate 'dest->occ'\n"); #if DEBUG_PILE if(t!=dest->occ) printf("chgmnt d'empl memoire de pocc (transfere)\n"); #endif } memcpy(dest->occ+dest->pos, source->occ, source->pos*sizeof(Occ)); dest->pos+=source->pos; source->pos=0; } /******************************************************************************/ /* depileRec */ /******************************************************************************/ void depileRec(P_PileOcc p) { if(p->pos_carte==0) p->pos=0; else { p->pos_carte--; p->pos=p->carte[p->pos_carte]; } } /******************************************************************************/ /* libereOcc */ /******************************************************************************/ void videPile(P_PileOcc p) { p->pos = 0; p->pos_carte = 0; ajouteDummy(p); } /******************************************************************************/ /* liberePileOcc */ /******************************************************************************/ void liberePileOcc(P_PileOcc p) { free(p->occ); free(p->carte); free(p); } #if DEBUG_BASE /******************************************************************************/ /* affichePileOcc */ /******************************************************************************/ void affichePileOcc(P_PileOcc p) { int i; for(i=p->pos-1; i>=0; i--) if(p->occ[i].x==NULL) printf("====== DUMMY =======\n"); else printf("num %p branche %c lon %d err %d\n",p->occ[i].x, lettres[p->occ[i].num], p->occ[i].lon, p->occ[i].xerr); /* printf("num %d branche %c lon %d err %d\n",p->occ[i].x->numero, */ /* lettres[p->occ[i].num], p->occ[i].lon, p->occ[i].xerr); */ printf("--------------------------------------\n"); } #endif #if OCC /******************************************************************************/ /* afficheLastOcc */ /******************************************************************************/ /* Lance l'affichage de tous les motifs associes aux occurrences trouvees */ /******************************************************************************/ void afficheLastOcc(FILE *f, P_PileOcc pocc, LongSeq l, P_Criteres cr) { int i=pocc->pos-1,nbocc=0; P_occ ptr; ptr=pocc->occ+i; while((i>0) && (ptr->x!=NULL)) { nbocc+=recAfficheOcc(f,ptr->x->fils[ptr->num], l-ptr->lon+ptr->saut, cr, ptr->codesaut); ptr--; i--; } fprintf(f,"%d\n",nbocc); } /******************************************************************************/ /* recAfficheOcc */ /******************************************************************************/ /* Parcourt l'arbre recursivement pour atteindre les feuilles et affiche */ /******************************************************************************/ int recAfficheOcc(FILE *f,Noeud * n, LongSeq l, P_Criteres cr, int codesaut) { int i,nbocc=0; /* #if DEBUG_BASE */ /* printf("J'entre dans recAffiche avec long %d\n",l); */ /* #endif */ /* Si on a atteint une feuille */ if (n->debut & LEAF_BIT) { nbocc += Print_Positions(f, (Feuille *) n, l, cr, codesaut); } else for(i=0; i!=nbSymbSeq+1; i++) if(n->fils[i]) { /* #if DEBUG_BASE */ /* printf("recAffiche: je passe par %c\n",lettres[i]); */ /* #endif */ nbocc += recAfficheOcc(f,n->fils[i], l+n->fin-n->debut, cr, codesaut); } return nbocc; } /******************************************************************************/ /* affOcc */ /******************************************************************************/ int afficheOcc(FILE *f, P_occ o, LongSeq longmod, P_Criteres cr) { if(o->x != NULL) return(recAfficheOcc(f,o->x->fils[o->num], longmod-o->lon+o->saut, cr, o->codesaut)); return 0; } #endif #if DEBUG_BASE /******************************************************************************/ /* afficheOldOcc */ /******************************************************************************/ /* Affiche les occurrences d'un niveau -n dans la pile */ /******************************************************************************/ void afficheOldOcc(P_PileOcc p, LongSeq l) { int pos=p->pos-1; P_occ tmpocc=p->occ+pos; printf("=======HAUT=PILE=========\n"); while(pos >=0 && tmpocc != NULL && tmpocc->lon >= 0) { printf("*** num %d lon %d saut %d codesaut %d lmod %d\n",tmpocc->num, tmpocc->lon,tmpocc->saut,tmpocc->codesaut, l); afficheOcc(stdout,tmpocc,l,0); tmpocc--; pos--; } printf("=*=*=*=*=DUMMY=*=*=*=*=*=\n"); } #endif SMILEv1.47/P_BLOCS/Spell/include/0000755002404200237300000000000010066543431015753 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS/Spell/include/occ.h0000600002404200237300000000415210066543404016662 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _OCC_H #define _OCC_H #include /* Structure de maillon d'une liste d'occurrences */ typedef struct liste_occ { Noeud * x; /* noeud de depart de l'etat virtuel */ LongSeq lon; /* longueur sur la transition */ LongSeq xerr; /* nb total d'erreurs de l'occurrence */ LongSeq saut; /* longueur totale de saut */ LongSeq blocerr;/* erreurs du bloc courant */ int codesaut; /* code indiquant les != sauts empruntes */ int num; /* numero de la transition */ } Occ, *P_occ; void initOcc(P_occ); #endif SMILEv1.47/P_BLOCS/Spell/include/criteres.h0000644002404200237300000000553610066543363017761 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _CRITERES_H #define _CRITERES_H #include #include #include #include #include /******************************************************************************/ /* STRUCTURE DE STOCKAGE DES CRITERES DE RECHERCHE */ /******************************************************************************/ typedef struct struct_fourchette { LongSeq min; LongSeq max; } Fourchette, *P_Fourchette; typedef struct struct_criteres { LongSeq **compobloc; LongSeq *maxerrblocs; LongSeq *compo; Fourchette *longbloc; Fourchette *saut; Flag *flag_compobloc; LongSeq *palindrom; long int nbsymb; Fourchette longueur; LongSeq maxerr; NbSeq quorum; char bloc; Flag flag_compo; Flag multiblocs; Flag flag_palindrom; } Criteres, *P_Criteres; /******************************************************************************/ /* FONCTIONS PUBLIQUES */ /******************************************************************************/ void setCompoPal(P_Criteres cr, char **argv, int argc); int addSaut2Code(int oldcode, LongSeq saut, LongSeq curbloc, P_Criteres cr); void initTabSauts(P_Criteres); void allocBloc(P_Criteres cr, int bloc); #endif SMILEv1.47/P_BLOCS/Spell/include/spell.h0000644002404200237300000000433210066543415017247 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _SPELL_H #define _SPELL_H #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* Grain d'allocation des sequences Fasta */ #define GRAINSEQ 500 /******************************************************************************/ /* PROTOTYPES PUBLICS */ /******************************************************************************/ void /* explore les modeles */ doSpell(P_Criteres, NbSeq, Noeud *); #endif SMILEv1.47/P_BLOCS/Spell/include/global.h0000644002404200237300000001020010066543372017361 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _GLOBAL_H #define _GLOBAL_H #include #include #define FAUX 0 #define VRAI 1 typedef char Flag; /******************************************************************************/ /* Caracteres speciaux */ /******************************************************************************/ /* => dans symb.h */ /******************************************************************************/ /* Flags */ /******************************************************************************/ /* DEBUGGING */ #define DEBUG_BASE 0 /* Debug base */ #define DEBUG_BT 0 /* Tableaux de bits */ #define DEBUG_SAUT 0 /* Procedures de saut */ #define DEBUG_PILE 0 /* Pile d'occurrences */ #define DEBUG_TREE 0 /* Arbre suffixe : HS bicoz Julien */ /******************************************************************************/ /* Define dependants du jeu de donnees */ /******************************************************************************/ /* Grain d'allocation de la taille du modele */ #define GRAIN_SIZMOD 100 /******************************************************************************/ /* Types */ /******************************************************************************/ /* Nombre de sequences */ #define NbSeq int /* Longueur de sequence */ #define LongSeq int /******************************************************************************/ /* L'affichage fur et a mesure est obligatoire si on veut les occurrences */ /******************************************************************************/ #if !OCC #undef AFF_OCC #define AFF_OCC 0 #endif /******************************************************************************/ /* Active DEBUG_BASE si l'un des DEBUGs est active */ /******************************************************************************/ #if DEBUG_BT || DEBUG_SAUT || DEBUG_PILE || DEBUG_TREE #undef DEBUG_BASE #define DEBUG_BASE 1 #endif /******************************************************************************/ /* Fonctions basiques */ /******************************************************************************/ void fatalError(char *msg); void warning(char *msg); void entree(void); #endif SMILEv1.47/P_BLOCS/Spell/include/model.h0000600002404200237300000000415410066543377017231 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _MODEL_INCLUDE #define _MODEL_INCLUDE #include #include /* Structure de stockage d'un modele */ typedef struct model { int *name; /* Sequence du modele (codes alphabets) */ LongSeq lon; /* Taille du modele stocke dans name */ LongSeq taille; /* Taille de name */ } Mod, *P_mod; /* FONCTIONS */ /* allouer un modele */ P_mod allocModel(void); /* diminuer la taille d'un modele de 1 */ void decrModel(P_mod); /* ajoute un symbole au modele */ void changeModel(P_mod , int); /* Affiche le modele */ void AfficheModel(P_mod); #endif SMILEv1.47/P_BLOCS/Spell/include/symb.h0000644002404200237300000000401510066544347017105 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ /******************************************************************************/ /* Symboles */ /******************************************************************************/ #ifndef _SYMBINTERN #define _SYMBINTERN /* SYMBOLES EXTERNES */ #define SAUT '_' #define JOKER '*' #define FINAL '$' /* SYMBOLES INTERNES (comm. entre Spell et SigStat) */ #define JOKERinterne '*' #define SAUTinterne '-' #define SHIFTALPHA '0' #endif SMILEv1.47/P_BLOCS/Spell/include/alphabet.h0000644002404200237300000000550610066543330017710 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ /******************************************************************************/ /* ALPHABET - Gestion des alphabets de SMILEv1.4 */ /******************************************************************************/ #ifndef _ALPHABET #define _ALPHABET #include #include #include #include #include #define equiv(i,j) TabSymb[i][j] #define MAXSYMBMOD 127 /* Nb max de symboles des modeles */ /******************************************************************************/ /* TYPES ABSTRAITS */ /******************************************************************************/ typedef unsigned char Symbole; /******************************************************************************/ /* STRUCTURES */ /******************************************************************************/ /******************************************************************************/ /* PROTOTYPES */ /******************************************************************************/ void initAlphabet(void); Symbole * chargeAlphabet(FILE *f, Symbole **seq, NbSeq nbseq); int str2nummod(char *str); void transAlphMod(Flag); #endif SMILEv1.47/P_BLOCS/Spell/include/barre.h0000644002404200237300000000433310066543356017230 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _BARRE_ #define _BARRE_ #include #include #include #define CHARBARRE '.' #define MAXCOL 60 /* Nombre de colonnes de la barre */ /****************************************************************************** barre permet d'afficher une barre d'etat lors d'un calcul progressif. Le premier appel se fait en donnant le nombre d'etats a franchir avant d'atteindre les 100%. La fonction est alors initialisee. Ensuite il suffit de l'appeler avec la valeur 0, le nombre de fois annoncé. Sortie sur stderr. MAXCOL permet de definir la taille de la barre. Mis a 0 la barre est desactivee pour ne laisser que le temps et le pourcentage. *******************************************************************************/ void barre(int); #endif SMILEv1.47/P_BLOCS/Spell/include/pile_occ.h0000644002404200237300000000566110066543411017707 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _PILE_OCC_H #define _PILE_OCC_H #include #include #include #include #include #include #include /* Grain du tableau des occurrences */ #define GRAIN 2000 typedef struct struct_pile_occ { Occ *occ; /* Carte des positions des 'dummy' */ unsigned int *carte; /* Position courante dans carte */ unsigned int pos_carte; unsigned int size_carte; unsigned int size; int pos; } PileOcc, *P_PileOcc; /******************************************************************************/ /* FONCTIONS PUBLIQUES */ /******************************************************************************/ P_PileOcc creePileOcc(void); void ajouteDummy(P_PileOcc); LongSeq getPrecDummy(P_PileOcc); void ajouteInitOcc2Pile(P_PileOcc, Noeud *); void ajouteOcc2Pile(P_PileOcc, Noeud *, int,LongSeq,LongSeq ,LongSeq, LongSeq, int); void transferePile2Pile(P_PileOcc, P_PileOcc); int copieLastOcc(P_PileOcc, P_PileOcc); void depileRec(P_PileOcc); void videPile(P_PileOcc); void liberePileOcc(P_PileOcc); #if OCC void afficheLastOcc(FILE *f, P_PileOcc, LongSeq l, P_Criteres cr); #endif #if DEBUG_BASE int afficheOcc(FILE *f, P_occ o, LongSeq longmod, P_Criteres cr); void afficheOldOcc(P_PileOcc p, LongSeq l); void affichePileOcc(P_PileOcc); #endif #endif SMILEv1.47/P_BLOCS/include/0000755002404200237300000000000010217767123014700 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS/include/global_fonctions.h0000644002404200237300000000647210066543562020405 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _GLOBAL_FONCTIONS_H #define _GLOBAL_FONCTIONS_H #include #include #include #include #include #include #include void Init_All(unsigned char *Alphabet,int Joker,int nb_sequence); void Ajoute_Fils_Au_Noeud(Noeud *N,Noeud *F); Noeud *Get_Child_Start_Letter(Noeud *N,int indice); int seg_taille(Noeud *N); Noeud *Add_Fast_String(Noeud *N,int deb,int fin,int *type,Noeud **pere); /* --> Retourne: * Si ajout d'une feuille a l'arbre : @ de la feuille. type = 1. * Si extension d'une feuille a l'arbre: @ de la feuille. type = 2. * Si decoupe d'un arc avec creation d'une feuille: @ de la feuille cree. type = 3. * Si rien (chaine deja ds l'arbre) : @ du dernier noeud en amont. type = - lg du dernier seg. */ int compare_string(int d1,int f1, int d2,int f2); void Print_Tree(Noeud *N,int affichage,int stat); void Print_Liste(Liste *liste); Noeud *FindString(Noeud *N,int deb,int fin,Noeud **pere,int *restant,int *pos_in_edge); /* cherche la chaine deb fin de la sequence courante à partir de N. retourne le sommet au bout de l'arc contenant la chaine cherchée. pere = pere du sommet retouné. restant : < 0 : coupure au milieu de l'arc pos_in_edge : nb de car. commun sur l'arc 1 : la chaine est dans l'arbre et elle aboutit à un sommet. 2 : la recherche aboutit à une feuille. et elle est plus grande que l'arc. 3 : la chaine est plus courte que l'arc. mais elle est contenue dans celui-ci. */ void UpdateBit_TabForAllTree(Noeud *N); void Print_BTTree_Debug(Noeud *N,int *cpt); #endif SMILEv1.47/P_BLOCS/include/allocateurs.h0000644002404200237300000000520610066543543017372 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef ALLOCATEUR_H #define ALLOCATEUR_H #include #include #include #include #include #include #include #include #include #include /* SINGLE_TAB_SIZE __________|___________ / \ / ---- | | | ----> [ | | | | | | | | | ....] | ---- | | | ----> [ | | | | | | | | | ....] ALLOC_NOEUD_TAB_STEP - ---- | | | ----> [ | | | | | | | | | ....] | ---- | | | ----> [ | | | | | | | | | ....] \ ---- .... */ Noeud *Alloc_Noeud(void ); Feuille *Alloc_Feuille(void ); Liste *Alloc_Liste(void); void Free_Liste(Liste *l); void Free_All_Liste_Cell(void); void Init_Allocateurs(void ); void Free_Arbre(Noeud *Racine); typedef struct _cell { struct _cell *suivant; unsigned char *data; int current; int max; }Alloc_Cell; typedef struct _allocateur { Alloc_Cell *first; Alloc_Cell *last; }Allocateur; #endif SMILEv1.47/P_BLOCS/include/structures.h0000644002404200237300000000444410066543610017275 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _STRUCTURES_H #define _STRUCTURES_H #include #include #include typedef struct _Liste_Pos { int *tab[2]; int last_cell; int tab_size; } ListePositions; typedef struct _Noeud /* Ne pas changer l'ordre des 3 premiers champs! */ { LongSeq debut; NbSeq sequence_number; LongSeq fin; struct _Noeud *suffixe_link; struct _Noeud **fils; Bit_Tab *sequences; int nb_element_bt; }Noeud; typedef struct _Feuille /* Ne pas changer l'ordre des 3 premiers champs! */ { LongSeq debut; NbSeq sequence_number; LongSeq fin_deb; Bit_Tab *sequences; }Feuille; typedef struct _Liste { struct _Liste *suiv; Feuille *feuille; } Liste; extern unsigned char Translation_Table[255]; extern unsigned char **Sequence; extern ListePositions *Liste_positions_fin; extern int global_indice; extern int current_sequence; #endif SMILEv1.47/P_BLOCS/include/libsysk.h0000644002404200237300000000337010211354657016532 0ustar lamaaoc00000000000000/* * Copyright (c) Atelier de BioInformatique * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307, USA * * For questions, suggestions, bug-reports, enhancement-requests etc. * I may be contacted at: Alain.Viari@inrialpes.fr */ #ifndef _H_Gtypes #include "Gtypes.h" #endif #define _H_libsysk /* ==================================================== */ /* Constantes */ /* ==================================================== */ #define TICKS_PER_SEC 60 #define TIME_RESET Vrai #define TIME_NO_RESET Faux /* ==================================================== */ /* Prototypes (generated by mproto) */ /* ==================================================== */ /* libsysk.c */ float UserCpuTime P(( Bool reset )); float SysCpuTime P(( Bool reset )); char *StrCpuTime P(( Bool reset )); void SetUpKmrNotify P(( Bool notif )); Bool GetKmrNotify P(( void )); void NotifyKmrStep P(( Int32 wlen, Int32 wnb )); void NotifyKmrEnd P(( Int32 wlen, Int32 maxlen, float cpu )); void NotifyKmrError P(( char *msg, Int32 wlen )); SMILEv1.47/P_BLOCS/include/Gtypes.h0000644002404200237300000000661010217767112016325 0ustar lamaaoc00000000000000/* * Copyright (c) Atelier de BioInformatique * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307, USA * * For questions, suggestions, bug-reports, enhancement-requests etc. * I may be contacted at: Alain.Viari@inrialpes.fr */ #define _H_Gtypes #ifndef NULL #include /* is the official NULL here ? */ #endif /* ==================================================== */ /* constantes */ /* ==================================================== */ #ifndef PROTO #define PROTO 1 /* prototypes flag */ #endif #ifdef THINK_C #define Vrai true /* TC boolean values */ #define Faux false /* */ #else #define Vrai 0x1 /* bool values = TRUE */ #define Faux 0x0 /* = FALSE */ #endif #define Nil NULL /* nil pointer */ #define kBigInt16 0x7fff /* plus grand 16 bits signe */ #define kBigInt32 0x7fffffff /* plus grand 32 bits signe */ #define kBigUInt16 0xffff /* plus grand 16 bits ~signe */ #define kBigUInt32 0xffffffff /* plus grand 32 bits ~signe */ #define kBitsPerLong 32 /* long = 32 bits */ #define kMaxShftLong 31 /* BitsPerLong - 1 max shift */ #define kLog2BitLong 5 /* =log2(BitsPerLong) */ #ifdef THINK_C /* ==================================================== */ /* Types (for Macintosh ThinK C) */ /* ==================================================== */ typedef long Long; /* plus grand mot signe */ typedef unsigned long ULong; /* plus grand mot signe */ typedef long Int32; /* Int32 = 32 bits signe */ typedef unsigned long UInt32; /* UInt32 = 32 bits ~signe */ typedef short Int16; /* Int16 = 16 bits signe */ typedef unsigned short UInt16; /* UInt32 = 16 bits ~signe */ typedef char Int8; /* Int8 = 8 bits signe */ typedef unsigned char UInt8; /* UInt8 = 8 bits ~signe */ typedef Boolean Bool; /* booleen */ #else /* ==================================================== */ /* Types (for Sun & Iris) */ /* ==================================================== */ typedef long Long; /* plus grand mot signe */ typedef unsigned long ULong; /* plus grand mot signe */ typedef int Int32; /* Int32 = 32 bits signe */ typedef unsigned int UInt32; /* UInt32 = 32 bits ~signe */ typedef short Int16; /* Int16 = 16 bits signe */ typedef unsigned short UInt16; /* UInt32 = 16 bits ~signe */ typedef char Int8; /* Int8 = 8 bits signe */ typedef unsigned char UInt8; /* UInt8 = 8 bits ~signe */ typedef int Bool; /* booleen (int for ANSI) */ typedef void *Ptr; /* pointeur */ #endif /* ==================================================== */ /* special macro for prototypes */ /* ==================================================== */ #if PROTO #define P(s) s #else #define P(s) () #endif SMILEv1.47/P_BLOCS/include/liste_pos.h0000644002404200237300000000376110066543573017064 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _LIST_POS_H #define _LIST_POS_H #include #include #include #include #include ListePositions *Alloc_ListePositions(int size); int Ajoute_Position_Liste(ListePositions *lpos,int *deb_liste,int position,int change_seq); int getValue(ListePositions *lpos,int i); void setListeValue(ListePositions *lpos,int i,int value); int getIndiceSuivant(ListePositions *lpos,int i); void Free_ListePositions(ListePositions *lpos); int Print_Positions(FILE *f, Feuille *n, LongSeq longway, P_Criteres cr, int code); #endif SMILEv1.47/P_BLOCS/include/define.h0000644002404200237300000000353410066543556016314 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _DEFINE_H #define _DEFINE_H int ALPHA_CARD; #define LEAF_BIT 0x80000000 #define LEAF_BIT_INV 0x7FFFFFFF #define POS_ALLOC_STEP 50 #define LISTE_CHANGE_BIT 0x80000000 #define LISTE_CHANGE_BIT_INV 0x7FFFFFFF #define LISTE_END 0x0FFFFFFF #define FEUILLE_ALLOC_STEP 100 #define NOEUD_ALLOC_STEP 100 #define DEBUG_JTREE 0 #define DEFAULT_WINDOW_SIZE_OPTION 4 #endif SMILEv1.47/P_BLOCS/include/construction.h0000644002404200237300000000555510066543553017616 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _CONSTRUCTION_H #define _CONSTRUCTION_H #include #include Noeud *Construction_Arbre(unsigned char *S,int taille_fenetre); void Premiere_Phase(Noeud *Racine, int taille_fenetre, Liste **debut_liste, Liste **fin_liste, Noeud **fin_liste_pere, int *nb_element_liste, int start_indice); int Deuxieme_Phase(Noeud *Racine, int taille_fenetre, Liste **debut_liste, Liste **fin_liste, Noeud **fin_liste_pere, int *nb_element_liste, int start_indice, int fict, int ini_res_type); void Troisieme_Phase(Noeud *Racine, int taille_fenetre, Liste **debut_liste, Liste **fin_liste, Noeud **fin_liste_pere, int *nb_element_liste, int fictive); Noeud *AjouteSequence(Noeud *Arbre,unsigned char *S,int taille_fenetre); void CloseTheFirstPhase( Liste **debut_liste, Liste **fin_liste, Noeud **fin_liste_pere); Noeud *CaseOneAddSequence(Noeud *Arbre,int taille_fenetre); Noeud *CaseTwoAddSequence(Noeud *Arbre,Noeud *resultat, Noeud *pere,int taille_fenetre); Noeud *CaseTreeAddSequence(Noeud *Arbre,Noeud *resultat,int deb,int fin,int taille_fenetre); Noeud *CaseFourAddSequence(Noeud *Arbre,Noeud *resultat,Noeud *pere,int res_type,int position_arc,int i,int taille_fenetre); #if DEBUG_JTREE extern int nb_alloc_noeud; extern int nb_alloc_feuille; extern int nb_alloc_liste; #endif #endif SMILEv1.47/P_BLOCS/include/struct_tab.h0000644002404200237300000000320410066543601017215 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _STRUCT_TAB_H #define _STRUCT_TAB_H #include #include extern int NB_SEQUENCE; extern int CHANGE_LIMITE; extern int SIZE_STATIC_BIT_TAB; typedef unsigned char Bit_Tab; #endif SMILEv1.47/P_BLOCS/include/bit_tab.h0000644002404200237300000000457310066543547016472 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _BIT_TAB_H #define _BIT_TAB_H #include #include #include void initBitTab(int nb_seq); Bit_Tab *AllocBitTab(void); void addBitTabValue(Bit_Tab **tab,int value); void fusionneBitTab(Bit_Tab **tab1,Bit_Tab *tab2); /* 1 <- 1 & 2 */ int nbSequenceInBitTab(Bit_Tab *tab); void printBitTab(Bit_Tab *tab); void CopyBitTab(Bit_Tab **dest,Bit_Tab *src); void ReinitBitTab(Bit_Tab **bt); /*---------------------------------------------------------*/ void convertBitTab(Bit_Tab **tab); /* transforme tab de dyn a static... */ void addBitTabValueStatic(Bit_Tab **tab,int value); void addBitTabValueDynamic(Bit_Tab **tab,int value); int nbSequenceInBitTabStatic(Bit_Tab *tab); int nbSequenceInBitTabDynamic(Bit_Tab *tab); void fusionneBitTabStatic(Bit_Tab **tab1,Bit_Tab *tab2); void fusionneBitTabDynamic(Bit_Tab **tab1,Bit_Tab *tab2); Bit_Tab *AllocBitTabStatic(void); #if DEBUG_JTREE extern int nb_alloc_tab; #endif #endif SMILEv1.47/P_BLOCS/include/libfasta.h0000644002404200237300000000577010211354657016645 0ustar lamaaoc00000000000000/* * Copyright (c) Atelier de BioInformatique * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307, USA * * For questions, suggestions, bug-reports, enhancement-requests etc. * I may be contacted at: Alain.Viari@inrialpes.fr */ #ifndef _H_libfasta #define _H_libfasta #ifndef _H_Gtypes #include "Gtypes.h" #endif /* ==================================================== */ /* Constantes */ /* ==================================================== */ #define FASTA_NAMLEN 64 /* max length of seq. name */ #define FASTA_COMLEN 512 /* max length of seq. comment */ #define FASTA_CHAR_PER_LINE 50 /* # of chars per line in output */ /* ==================================================== */ /* Macros standards */ /* ==================================================== */ #ifndef NEW #define NEW(object) (object*)malloc(sizeof(object)) #define NEWN(object, dim) (object*)malloc((unsigned)(dim) * sizeof(object)) #define REALLOC(typ, ptr, dim) (typ*)realloc((void *) (ptr), (unsigned long)(dim) * sizeof(typ)) #define FREE(object) free(object) #endif /* ==================================================== */ /* Structures de donnees */ /* ==================================================== */ typedef struct { /* -- Sequence ---------------- */ Bool ok; /* error flag */ Int32 length, /* longueur */ offset, /* offset */ bufsize; /* size of current seq buffer */ char name[FASTA_NAMLEN], /* nom */ comment[FASTA_COMLEN], /* commentaire */ *seq; /* sequence */ } FastaSequence, *FastaSequencePtr; /* ==================================================== */ /* Prototypes (generated by mkproto) */ /* ==================================================== */ /* libfasta.c */ Int32 CountAlpha P(( char *buf )); char *StrcpyAlpha P(( char *s1 , char *s2 )); char *NextSpace P(( char *buffer )); char *GetFastaName P(( char *buffer )); char *GetFastaComment P(( char *buffer )); FastaSequencePtr FreeFastaSequence P(( FastaSequencePtr seq )); FastaSequencePtr NewFastaSequence P(( void )); Bool ReadFastaSequence P(( FILE *streamin, FastaSequencePtr seq )); Bool GetFastaSequence P(( FILE *streamin, FastaSequencePtr seq )); void WriteFastaSequence P(( FILE *streamou, FastaSequencePtr seq , Int32 char_per_line )); #endif SMILEv1.47/Makefile0000644002404200237300000000114210066542217013567 0ustar lamaaoc00000000000000############################################################################### # definition des repertoires P_DIR=P_BLOCS/ PD_DIR=P_BLOCS+DELTA/ S_DIR=SigStat/ C_DIR=Converter/ all: clean @echo "**** Making P_BLOCS..." (cd $(P_DIR) && $(MAKE) all) @echo "==> P_BLOCS OK." @echo "**** Making P_BLOCS+DELTA..." (cd $(PD_DIR) && $(MAKE) all) @echo "==> P_BLOCS+DELTA OK." @echo "**** Making SigStat..." (cd $(S_DIR) && $(MAKE) all) @echo "==> SigStat OK." clean: @echo Cleaning... (cd $(P_DIR) && $(MAKE) clean) (cd $(PD_DIR) && $(MAKE) clean) (cd $(S_DIR) && $(MAKE) clean) @echo Clean OK. SMILEv1.47/smile.1.gz0000644002404200237300000001161110066552671013750 0ustar lamaaoc00000000000000‹rÕ½@smile.1µ;isÛ8–ßñ+0šêr\K³-9G;Õ›)EqmùK™ìlwª ¢ ‰‘P2Žú×ï;”ä¤{gW•Ø >¼û„Óé;9¹_]ȾHíÉ›áõ…´+Ó¬çr¦¥Z¯e¦66‘“‹Ñt|{=ëŸü”ÈBmáòáGi›™ÕY›’à˜z¥+¹QUa¥ªyÐó—Òj /•O^'ô»,ÒÉ;ÚWØ"_kùë‰ÌË…®t™iiÒÖU“ÕM¥çÒæËR­-<—E³®ó ,·úsƒK-Á™üóæön2žˆôµì1¼Ÿ UèZW¿-àû«žHgUôü—“¥,›b¦«=‚Ñ{s1ÝïÞž˜®r‹˜6j ô,µœ›¬)tY[9«r½Xo%Ð ðî%Á™)`ù<oM% ÄÏu­r@îJýU€v"·¦ñÌ\©/À!¹6擬 ƒßËžßI![{±GÚm Ün<äõJæu*Ò»;¤‡·l•ÜTf D(UËeáF^¨*d 5׸‹þZW*«qia€9‘r·Ä››a¸•ÊéJÃÖ°NÛ¥R#[ü6p Ij #*µiìz+¬þ¢+`ì‘/¬|’1î3óUÛcÆ@n@u¬œç@J=3@d¥`?·scòJYÛyM¹ÄÎ^ßÐbóŽÃñê¡:vÆž½ÀZ5%¸zWëô¤ZÔ”IA¹9»a­¾+P»cù¼\=Û—“X7÷áç„Êͨ q/}1FoôØÿ=ä@¿™µ7(œÓyw¿çÄÎRЫ`”`ÁrМú§g Íþ©Њ‰Ó²>ü¿3$‹~÷Éixrö”ž º°Ñ…؃³6J?¤ D'×I—œÄ›·ó—‘SÜxžpÍѺA–†_vöÔ/ãmÐØŸ ²ùSŠPXg€¤”ûц¿ëÊï(¢+^ÚÒ*_@YãJø(Ã$bº‘PÍ¡82à— DsÚŠ’./Hå:µ£˜C^>3u ÎÒ¿á•” ÅÞ^<ô: hÝ™¬šÅXÚ`ÿ“üw-?ð¦“Tš¦œhɸÚìpéÅÊ<ăÑTòÆ0b¹«+2Éy^†¤„Kœï1SD®ÌïNœ#ìâÃFÔVŒÌ–*† °µÁ-b;E?u08? Ï3Æ…ñÄáS„ZÍÀ„±8Å{Î*97£C[Èa+Jg?6IÈÇu#iXURùé É';Ÿ:œ{Q0P2O¦ãÑDÞÞÈé» yóþúõ޼}+'q3º¥þc|s)‡SyuÑ–^ÈÛÑèýý=>'@×f®×Á1ÿ"]ÕRþ•ÿ`W‹u*ÿÊ¿&ù²Pr´Êò¿Nl†ÌÀwþý_úð3¨4)FnôèAÒgônöñ'8y–ž?G_sžžŸpƒ» úî ܈§‚=MO_ˆG¸:½¯"ÞîM±¯e\—e’™Ÿÿ¾b-“îósù->uÞä_Èzó¹lÙƒn òسµÉ>¡x…òÁúÆCí‹nÖãȪZ_ëBs'i3YÖTÔåÀˆÞ g.©–‹#5‹¾míÁCa`Ø7}ªªEý`â•$ ’ÌðE'´c¨à±Ç2OAìä˜ÐXS¡R,·¶ÜÚ”+Ðl"º—ÁÍ éž¿±ëL™4î¨ïg!M®rªcÀS|£áu(WŠÊìG¹êßÉbC£»0sð¿ÜªŸ¬pan§—"|>½ ¿mŽvÛ<œJ/×fF­eßÀ r‰ØÃ1>¡ã.èVge?Tm(ç|·,çDr·†ˆb"°ªE©}ÔÑSìÓøkµµ†|†tã(p}Ê9N@Ó£–ôO÷ÅbZǽžþºY«’û/Êî‹ ©eHªqÚrFô€* ¿Þöö¶í¦‘"x¤òØUDõRbð&á>±À±î’ªh,]Fný Wv‘‰à=DzOµ¦¢ñØÆœ† -ðù5ºGÚ‚§Ã<âw»Z#Ä\ Ôb¾>ä‚®ÑP%÷À«û¶š\Já[Íõ·yÐ^âÔ²ÝL\ÇBÜ«ïñ§äOä)Î7ºeá}.ú¾Ãµ½|}ŸýA.Ãð~z§õmAj÷Ä^-!DËrYm(꽿µ… Œ”ä‚yùC+…æÈaÉ_ºÚð]‡•b÷…XàoôºVA± $?ó¨Ép>§?â"ÝgÛìš0Œ˜¦¡à©›O%׬dWþ´UÄo«ù=×ú¥ó=s ì…w1t‚’ì­UµÔ=>#™DÒÊhŽä‹Àæ¾K°8 s£‚ß57šñÚöuËÙù ‚Ì2–ݳmÀtœîÜÀ/ýÓ“ÁéGYa"‡ Û²'|J(”1ÀN':î¼cD`1eÙõ‚€ùWn v>³ Å€Sïš#j×î¨S´9öÒi#+ÉOˆ‹#þ.>Š'˜ì½$BûO?&pÑ?é?ƒ‹4M)6x4Ë„zŠÂg.hF º•¤cÀ<ÒË;ÀµœWÆg7‡UTŒCKÌ¥¶ßƒ®”|̯-&ˆã¹Y›eÎm¬:åª ÑéM±"F~1Œdþ@¸²aµó †2tö%♈fPm;/ê€~g£¦Eûèâ¤ýAmÙ>®@ÅU76ÕÜòÀ„FD`M §¼'þHÝà8®r—XSâCÇ"<É¡¨î0oXò‰]ð•+3ÿ3½'˜± .ÊŠvê&E ô˜ÎW’~(»WWïqáñL¯sý%R¯è¬™e;‹æ™Q?ëD?–W©¼V•UœÏ\§'oS9QKS'Øt/ºyŠ3ôš{Ó>;‡ÏAìe]iíN+n6k „9w=Ûsõ¸c¥— 00Ý-¸Òš¸ë” !ås „¼+óOä¤(ƒ¹T¾†˜™Ê‰œžöyöôÙÉÙóSAG(ÜªÒ ž«s ·QœbáÙjO[žù¿B vØÌTܦCLJ.$ Ÿ7F»È"o9w&>:/Îsû¬öZ÷ÂÙ4P‚pTÞUkËßaçJËÏ¡¸ƒÞu!ôiY‡W~Wþm^¨H(xøç øp ÏĬ›‡zQAv^ŒÒtàØH²KcæÂívœ´ÜöÊ5Lå?TéŽN=®iØŒÝýK jàA‘ð8\²6:ÝØ:æ ÷ bUá!ðR^çØ.a½•ýg§‰ìŸŸŸ'åÅ‹ó“çç„$s¿,<ºR3ˆôú€•Œ}ó"¨2‚kÏ ³º ¸5`9¥ë;VAGÍ ÛÚœ$Á+I©ÆòDú \ŽTåÝv lqç~:• U`Þ ±ÂÇðÙb®ÑJØVç/ˆ`0À‘³30˜ggì[†ï§ïnï÷ÿ¨ÿFÆw»Á,€ eÉŽ=ßpZI;¯}£g9¬½¼yÿãU^6P3mÁb ù}£óÐT0Á”mYLØþ†ဂ5SMILEv1.47/COPYING.gpl0000644002404200237300000004310010217766634013754 0ustar lamaaoc00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Library General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) 19yy This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) 19yy name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Library General Public License instead of this License. SMILEv1.47/P_BLOCS+DELTA/0000755002404200237300000000000010066766523014107 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/bin/0000755002404200237300000000000010114605452014642 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/obj/0000755002404200237300000000000010114605452014644 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/src/0000755002404200237300000000000010066542217014666 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/src/global_fonctions.c0000777002404200237300000000000010066542217026375 2../../P_BLOCS/src/global_fonctions.custar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/src/allocateurs.c0000777002404200237300000000000010066542217024365 2../../P_BLOCS/src/allocateurs.custar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/src/global_variables.c0000777002404200237300000000000010066542217026311 2../../P_BLOCS/src/global_variables.custar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/src/sub_suffix_tree.c0000777002404200237300000000000010066542217026121 2../../P_BLOCS/src/sub_suffix_tree.custar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/src/liste_pos.c0000777002404200237300000000000010066542217023533 2../../P_BLOCS/src/liste_pos.custar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/src/bit_tab2.c0000777002404200237300000000000010066542217022705 2../../P_BLOCS/src/bit_tab2.custar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/src/liste_pos2.c0000777002404200237300000000000010066542217023677 2../../P_BLOCS/src/liste_pos2.custar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/src/construction.c0000777002404200237300000000000010066542217025015 2../../P_BLOCS/src/construction.custar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/src/bit_tab.c0000777002404200237300000000000010066542217022541 2../../P_BLOCS/src/bit_tab.custar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/src/libfasta.c0000777002404200237300000000000010066542217023103 2../../P_BLOCS/src/libfasta.custar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/Makefile0000644002404200237300000001202410066766523015546 0ustar lamaaoc00000000000000############################################################################### # OPTIONS DU PROGRAMME ######################################################## #Affichage du nombre d'occurrences pour chaque modele trouve NB_OCCS=1 #Affichage des positions des occurrences de chaque modele trouve AFF_OCCS=0 ############################################################################### #Compilateur : CC=gcc OPT=-Wall -ansi -O3 #OPT=-g #definition des repertoires des arbres: INCL_DIR=include/ OBJ_DIR=obj/ SRC_DIR=src/ LIB_DIR=lib/ BIN_DIR=bin/ # definition de repertoires de spell : SPELL_INCL_DIR=Spell/include/ SPELL_SRC_DIR=Spell/src/ SPELL_OBJ_DIR=Spell/obj/ # Options de compilation : C_FLAG= $(OPT) -I$(INCL_DIR) -I$(SPELL_INCL_DIR) O_FLAG= -c $(OPT) -I$(INCL_DIR) -I$(SPELL_INCL_DIR) -DOCC=$(NB_OCCS) -DAFF_OCC=$(AFF_OCCS) LIB_FLAG= -lm # Nom du programme , de l'archive PROG_NAME=x-smile_delta ARCHIVE_NAME=x-smile_delta.tar.gz #Objet ... OBJ_FILE=$(OBJ_DIR)*.o $(SPELL_OBJ_DIR)*.o TREE_OBJ=$(OBJ_DIR)global_fonctions.o $(OBJ_DIR)construction.o $(OBJ_DIR)liste_pos.o $(OBJ_DIR)allocateurs.o $(OBJ_DIR)bit_tab.o $(OBJ_DIR)libfasta.o $(OBJ_DIR)global_variables.o $(SPELL_OBJ_DIR)global.o $(SPELL_OBJ_DIR)criteres.o #$(OBJ_DIR)sub_suffix_tree.o TREE_OBJ_DEB=$(OBJ_DIR)global_fonctions.o $(OBJ_DIR)construction.o $(OBJ_DIR)liste_pos2.o $(OBJ_DIR)allocateurs.o $(OBJ_DIR)bit_tab2.o $(OBJ_DIR)libfasta.o $(OBJ_DIR)global_variables.o $(SPELL_OBJ_DIR)global.o $(SPELL_OBJ_DIR)criteres.o $(OBJ_DIR)sub_suffix_tree.o SPELL_OBJ=$(SPELL_OBJ_DIR)global.o $(SPELL_OBJ_DIR)pile_occ.o $(SPELL_OBJ_DIR)spell.o $(SPELL_OBJ_DIR)occ.o $(SPELL_OBJ_DIR)criteres.o $(SPELL_OBJ_DIR)barre.o $(SPELL_OBJ_DIR)alphabet.o $(SPELL_OBJ_DIR)model.o all: $(BIN_DIR)$(PROG_NAME) echo ALL!!! debug: $(TREE_OBJ_DEB) $(CC) $(C_FLAG) $(OBJ_FILE) -o $(BIN_DIR)$(PROG_NAME) $(LIB_FLAG) $(BIN_DIR)$(PROG_NAME): $(TREE_OBJ) $(SPELL_OBJ) $(CC) $(C_FLAG) $(OBJ_FILE) -o $(BIN_DIR)$(PROG_NAME) $(LIB_FLAG) $(OBJ_DIR)sub_suffix_tree.o : $(SRC_DIR)sub_suffix_tree.c $(CC) $(SRC_DIR)sub_suffix_tree.c -o $(OBJ_DIR)sub_suffix_tree.o $(O_FLAG) $(OBJ_DIR)global_fonctions.o : $(INCL_DIR)global_fonctions.h $(SRC_DIR)global_fonctions.c $(CC) $(SRC_DIR)global_fonctions.c -o $(OBJ_DIR)global_fonctions.o $(O_FLAG) $(OBJ_DIR)construction.o : $(INCL_DIR)construction.h $(SRC_DIR)construction.c $(CC) $(SRC_DIR)construction.c -o $(OBJ_DIR)construction.o $(O_FLAG) $(OBJ_DIR)liste_pos.o : $(INCL_DIR)liste_pos.h $(SRC_DIR)liste_pos.c $(CC) $(SRC_DIR)liste_pos.c -o $(OBJ_DIR)liste_pos.o $(O_FLAG) $(OBJ_DIR)liste_pos2.o : $(INCL_DIR)liste_pos.h $(SRC_DIR)liste_pos2.c $(CC) $(SRC_DIR)liste_pos2.c -o $(OBJ_DIR)liste_pos2.o $(O_FLAG) $(OBJ_DIR)allocateurs.o : $(INCL_DIR)allocateurs.h $(SRC_DIR)allocateurs.c $(CC) $(SRC_DIR)allocateurs.c -o $(OBJ_DIR)allocateurs.o $(O_FLAG) $(OBJ_DIR)bit_tab.o : $(INCL_DIR)bit_tab.h $(SRC_DIR)bit_tab.c $(CC) $(SRC_DIR)bit_tab.c -o $(OBJ_DIR)bit_tab.o $(O_FLAG) $(OBJ_DIR)bit_tab2.o : $(INCL_DIR)bit_tab.h $(SRC_DIR)bit_tab2.c $(CC) $(SRC_DIR)bit_tab2.c -o $(OBJ_DIR)bit_tab2.o $(O_FLAG) $(OBJ_DIR)libfasta.o : $(INCL_DIR)libfasta.h $(SRC_DIR)libfasta.c $(CC) $(SRC_DIR)libfasta.c -o $(OBJ_DIR)libfasta.o $(O_FLAG) $(OBJ_DIR)global_variables.o : $(SRC_DIR)global_variables.c $(CC) $(SRC_DIR)global_variables.c -o $(OBJ_DIR)global_variables.o $(O_FLAG) #Dependance pour le programme spell : $(SPELL_OBJ_DIR)global.o : $(SPELL_INCL_DIR)global.h $(SPELL_SRC_DIR)global.c $(CC) $(SPELL_SRC_DIR)global.c -o $(SPELL_OBJ_DIR)global.o $(O_FLAG) $(SPELL_OBJ_DIR)criteres.o : $(SPELL_INCL_DIR)criteres.h $(SPELL_SRC_DIR)criteres.c $(SPELL_INCL_DIR)global.h $(CC) $(SPELL_SRC_DIR)criteres.c -o $(SPELL_OBJ_DIR)criteres.o $(O_FLAG) $(SPELL_OBJ_DIR)pile_occ.o : $(SPELL_INCL_DIR)pile_occ.h $(SPELL_SRC_DIR)pile_occ.c $(SPELL_INCL_DIR)global.h $(CC) $(SPELL_SRC_DIR)pile_occ.c -o $(SPELL_OBJ_DIR)pile_occ.o $(O_FLAG) $(SPELL_OBJ_DIR)spell.o : $(SPELL_INCL_DIR)spell.h $(SPELL_SRC_DIR)spell.c $(SPELL_INCL_DIR)global.h $(SPELL_INCL_DIR)model.h $(CC) $(SPELL_SRC_DIR)spell.c -o $(SPELL_OBJ_DIR)spell.o $(O_FLAG) $(SPELL_OBJ_DIR)occ.o : $(SPELL_INCL_DIR)occ.h $(SPELL_SRC_DIR)occ.c $(CC) $(SPELL_SRC_DIR)occ.c -o $(SPELL_OBJ_DIR)occ.o $(O_FLAG) $(SPELL_OBJ_DIR)barre.o : $(SPELL_INCL_DIR)barre.h $(SPELL_SRC_DIR)barre.c $(CC) $(SPELL_SRC_DIR)barre.c -o $(SPELL_OBJ_DIR)barre.o $(O_FLAG) $(SPELL_OBJ_DIR)alphabet.o : $(SPELL_INCL_DIR)alphabet.h $(SPELL_SRC_DIR)alphabet.c $(CC) $(SPELL_SRC_DIR)alphabet.c -o $(SPELL_OBJ_DIR)alphabet.o $(O_FLAG) $(SPELL_OBJ_DIR)model.o : $(SPELL_INCL_DIR)model.h $(SPELL_SRC_DIR)model.c $(CC) $(SPELL_SRC_DIR)model.c -o $(SPELL_OBJ_DIR)model.o $(O_FLAG) clean: clean_emacs clean_obj echo CLEAN OK!!! clean_emacs: find . -name "*~" -exec rm -f {} \; clean_obj: rm -f $(OBJ_DIR)*o $(SPELL_OBJ_DIR)*o rm -f $(BIN_DIR)$(PROG_NAME) clean_arch: rm ./$(ARCHIVE_NAME) tgz: clean tar -zcvf $(ARCHIVE_NAME) ./* depend: makedepend -o.o $(SPELL_INCL_DIR)*.h $(INCL_DIR)*.h $(SPELL_SRC_DIR)*.c $(SRC_DIR)*.c -I$(INCL_DIR) -I$(SPELL_INCL_DIR) # DO NOT DELETE SMILEv1.47/P_BLOCS+DELTA/Spell/0000755002404200237300000000000010066543734015163 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/Spell/obj/0000755002404200237300000000000010114605452015723 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/Spell/src/0000755002404200237300000000000010066544017015745 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/Spell/src/occ.c0000777002404200237300000000000010066542217023412 2../../../P_BLOCS/Spell/src/occ.custar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/Spell/src/criteres.c0000644002404200237300000002634010066544005017733 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include /* From alphabet.c */ extern int nbSymbMod; int **code2Sauts; /******************************************************************************/ /* FONCTIONS PRIVEES */ /******************************************************************************/ FILE** recInitFiles(FILE **f, int bloc, P_Criteres cr, char *buf, int *tab, FILE *namefile); int recFillTab(int bloc, P_Criteres cr, int *nbcodes, int **code2Sauts); /******************************************************************************/ /* setCompoPal */ /******************************************************************************/ void setCompoPal(P_Criteres cr, char **argv, int argc) { int i,j,bloc,nbsymb; if( (cr->compo = malloc(nbSymbMod*sizeof(LongSeq))) == NULL) fatalError("initCriteres: cannot allocate 'cr->compo'\n"); for(i=0;ibloc;i++) { cr->compobloc[i] = (LongSeq *) malloc(nbSymbMod*sizeof(LongSeq)); if(!(cr->compobloc[i])) fatalError("setCompo: cannot allocate 'cr->compobloc'\n"); } /* Initialisations */ cr->flag_compo = FAUX; for (i = 0; i != nbSymbMod; i++) cr->compo[i] = -1; for (i = 0; i != cr->bloc; i++) { cr->flag_compobloc[i] = FAUX; for (j = 0; j != nbSymbMod; j++) cr->compobloc[i][j] = -1; } while(argc>0 && (**argv!='p')) { bloc = atoi(*argv); argc--; argv++; nbsymb = atoi(*argv); argc--; argv++; for(i=0; i!=nbsymb; i++) { j = str2nummod(*argv); argc--; argv++; if( j == -1) { fprintf(stderr, "> Warning: composition in '%s' ignored, symbol is not in the models alphabet.\n", *(argv-1)); argc--; argv++; continue; } if(bloc == 0) { cr->flag_compo = VRAI; cr->compo[j] = atoi(*argv); /* printf("compo glob %s %d\n",*(argv-1),cr->compo[j]); */ } else { cr->flag_compobloc[bloc-1]= VRAI; cr->compobloc[bloc-1][j] = atoi(*argv); /* printf("compo bloc %s %d\n",*(argv-1),cr->compobloc[j]); */ } argc--; argv++; } } /* S'il n'y a pas de palindromes */ if(argc==0) return; cr->flag_palindrom = VRAI; while(argc>0) { sscanf(*argv, "p%d/%d",&i, &j); cr->palindrom[i-1] = j-1; argc--; argv++; } } /******************************************************************************/ /* initFiles */ /******************************************************************************/ int initFiles(FILE **f, char *nom, P_Criteres cr) { char buf[500]; int tab[100]; FILE * namefile; strcpy(buf, nom); namefile = fopen(NAMEFILE, "w"); if(namefile == NULL) return FAUX; if(recInitFiles(f, 0, cr, buf,tab, namefile) == NULL) { fclose(namefile); return FAUX; } fclose(namefile); return VRAI; } /******************************************************************************/ /* recInitFiles */ /******************************************************************************/ FILE** recInitFiles(FILE **f, int bloc, P_Criteres cr, char *buf, int tab[100], FILE *namefile) { int i; /* int tmpi,tab2[100]; */ char tmp[20]; char *posfin; posfin = buf + strlen(buf); for(i=cr->saut[bloc].min+cr->delta[bloc]; i+cr->delta[bloc]<=cr->saut[bloc].max; i++) { if(bloc < cr->bloc-1) { tab[bloc]=i; sprintf(tmp,"[%d-%d]",i-cr->delta[bloc], i+cr->delta[bloc]); strcat(buf,tmp); f = recInitFiles(f, bloc+1, cr, buf, tab, namefile); if(f==NULL) return(NULL); } else { /* printf("\n%s =%d\n",buf,tmpi =delta2File(tab, cr)); */ /* if ( ! file2Delta(tmpi, tab2, cr) ) */ /* printf("bug?CDSCVSDbloc-1; i++) */ /* printf("%d ", tab2[i]); */ /* printf("\n"); */ *f = fopen(buf,"w"); if(*f == NULL) return NULL; fprintf(namefile, "%s\n",buf); /* Insertion de l'espace necessaire en tete de fichier pour y mettre la */ /* ligne d'informations apres extraction */ /* J'ecris en tout 80 * 3 = 240 espaces et 80 '=' ==> 320 caracteres */ for(i=0; i!=3; i++) { fprintf(*f," "); fprintf(*f," \n"); } fprintf(*f,"========================================"); fprintf(*f,"=======================================\n"); return (++f); } *posfin = '\0'; } return(f); } /******************************************************************************/ /* delta2File */ /******************************************************************************/ int delta2File(LongSeq *deltatab, P_Criteres cr) { int pos=0, i; pos = deltatab[0] - cr->saut[0].min - cr->delta[0]; for(i=1; i != cr->bloc-1; i++) { pos *= cr->saut[i].max - cr->saut[i].min + 1-cr->delta[i]*2; pos += deltatab[i] - cr->saut[i].min - cr->delta[i]; } return pos; } /******************************************************************************/ /* addSaut2Code */ /******************************************************************************/ int addSaut2Code(int oldcode, LongSeq saut, LongSeq curbloc, P_Criteres cr) { #if DEBUG_SAUT printf("addSaut2Code: Je recois %d",oldcode); #endif if (curbloc == 0) { #if DEBUG_SAUT printf(" et renvoie %d\n",saut - cr->saut[0].min); #endif return (saut - cr->saut[0].min); } oldcode *= cr->saut[curbloc].max - cr->saut[curbloc].min + 1; oldcode += saut - cr->saut[curbloc].min; #if DEBUG_SAUT printf(" et renvoie %d\n",oldcode); #endif return oldcode; } /******************************************************************************/ /* file2Delta */ /******************************************************************************/ /* A REFAIRE!! Vaut mieux avoir un tableau ou chaque colonne pointe vers * le deltatab associe. */ int file2Delta(int pos, LongSeq *deltatab, P_Criteres cr) { int i, tmp; for(i=0; i != cr->bloc-1; i++) { tmp = cr->saut[i].max-cr->saut[i].min+1-cr->delta[i]*2; deltatab[cr->bloc-2-i] = pos % tmp; pos /= tmp; } if(pos != 0) { warning("file2Delta: conversion error"); return(0); } return (1); } /******************************************************************************/ /* initTabSauts */ /******************************************************************************/ void initTabSauts(P_Criteres cr) { int bloc = cr->bloc, **i,j, nbcodes; nbcodes = 1; for(j=0; j != bloc-1; j++) nbcodes *= cr->saut[j].max - cr->saut[j].min +1; if ( (code2Sauts = (int **) malloc(nbcodes * sizeof(int *)) ) == NULL) fatalError("criteres.c: initTabSauts: cannot allocate 'code2Sauts'\n"); for(j=0, i=code2Sauts; j != nbcodes; j++,i++) if ( (*i = (int *) malloc((bloc-1) * sizeof(int)) ) == NULL ) fatalError("criteres.c: initTabSauts: cannot allocate 'code2Sauts[j]'\n"); recFillTab(0, cr, &nbcodes, code2Sauts); } /******************************************************************************/ /* recFillTab */ /******************************************************************************/ int recFillTab(int bloc, P_Criteres cr, int *nbcodes, int **code2Sauts) { int i,j,k,pos=0,a; if(bloc != cr->bloc-2) a = recFillTab(bloc+1, cr, nbcodes, code2Sauts); else a = 1; *nbcodes /= cr->saut[bloc].max - cr->saut[bloc].min +1; for(i=0; i!=*nbcodes; i++) for(j=cr->saut[bloc].min; j!=cr->saut[bloc].max+1; j++) for(k=0; k!=a; k++) { code2Sauts[pos][bloc] = j; pos++; } printf("\n"); return( a*(cr->saut[bloc].max - cr->saut[bloc].min +1)); } /******************************************************************************/ /* allocBloc */ /******************************************************************************/ void allocBloc(P_Criteres cr, int bloc) { int i; cr->maxerrblocs = (LongSeq *) malloc(bloc*sizeof(LongSeq)); cr->longbloc = (Fourchette *) malloc(bloc*sizeof(Fourchette)); cr->saut = (Fourchette *) malloc(bloc*sizeof(Fourchette)); cr->flag_compobloc = (Flag *) malloc(bloc*sizeof(Flag)); cr->compobloc = (LongSeq **) malloc(bloc*sizeof(LongSeq *)); cr->delta = (LongSeq *) malloc(bloc*sizeof(LongSeq)); cr->palindrom = (LongSeq *) malloc(bloc*sizeof(LongSeq)); if(!cr->maxerrblocs || !cr->longbloc || !cr->saut || !cr->flag_compobloc || !cr->compobloc || !cr->delta || !cr->palindrom) fatalError("criteres.h: allocBloc: allocation error\n"); /* Initialisations */ for (i = 0; i != bloc; i++) { cr->saut[i].min = cr->saut[i].max = 0; cr->maxerrblocs[i] = -1; cr->longbloc[i].min = cr->longbloc[i].max = -1; cr->delta[i] = 0; cr->palindrom[i] = -1; } cr->flag_palindrom = FAUX; } SMILEv1.47/P_BLOCS+DELTA/Spell/src/spell.c0000644002404200237300000016252610066544017017244 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include /******************************************************************************/ /* PROTOTYPES PRIVES */ /******************************************************************************/ /* Gestion des modeles acceptes */ void keepModel(P_mod , P_PileOcc, NbSeq, NbSeq, LongSeq, LongSeq *, int *cursaut, P_Criteres cr); /* essaie d'avancer d'une lettre dans un arc, et renvoie le noeud image */ Flag avanceBranche(P_occ, P_occ, int, int, Flag, P_Criteres, LongSeq); /* Lancement du saut */ NbSeq gestionSaut(P_mod model, P_PileOcc pocc, P_PileOcc poccnew, P_Criteres, int *nbocc_saut, int maxinter, LongSeq curbloc); /* explore les modeles */ Flag spellModels ( P_PileOcc pocc, P_PileOcc poccnew, P_PileOcc poccsaut, LongSeq longmod, LongSeq longcurbloc, LongSeq curbloc, P_mod model, P_occ next, Bit_Tab **colors_model, NbSeq nbseq, NbSeq tmp_quorum, P_Criteres cr, int **nbocc_saut, Bit_Tab ***bt_intermediaire, int deb_occ, int fin_occ, int *cursaut, LongSeq *longbloc, LongSeq *posdebbloc); /* Calcule le BT union de tous les BT occurrences */ NbSeq sommeBTOcc(P_PileOcc, Bit_Tab **); /* Compute CPU time */ static float PrintCpuTime(char); /******************************************************************************/ /* VARIABLES GLOBALES */ /******************************************************************************/ int *nbmod; LongSeq *maxlongmod, **maxlongbloc; signed char ** text; FILE ** f; /* EXTERNES from alphabet.c */ extern int nbSymbMod; extern int nbSymbSeq; extern char *nummod2str[127]; extern int carseq2num[127]; extern int comp[127]; extern Flag TabSymb[127][127]; extern int numJOKER; extern int numSAUT; /******************************************************************************/ /******************************************************************************/ /*********************** GESTION DES LISTES D'OCCURRENCES *********************/ /*********************************ET DES MODELES*******************************/ /******************************************************************************/ /******************************************************************************/ /* KeepModel */ /******************************************************************************/ /* Affiche (ou stocke si necessaire) les modeles trouves */ /******************************************************************************/ void keepModel(P_mod model, P_PileOcc pocc, NbSeq nbseq, NbSeq quorum, LongSeq l, LongSeq *longbloc, int *cursaut, P_Criteres cr) { int numfile, i, j; FILE *g; LongSeq *lb, *mb; numfile = delta2File(cursaut, cr); nbmod[numfile]++; g = f[numfile]; #if DEBUG_BASE printf("MODELE %s valide!\n",model->name); #endif j = model->lon; for(i=0; i!=j; i++) fprintf(g,"%s", nummod2str[model->name[i]]); fprintf(g," "); for(i=0; i!=j; i++) { if(model->name[i]==numJOKER) fprintf(g,"%c",JOKERinterne); else if(model->name[i]==numSAUT) fprintf(g,"%c",SAUTinterne); else fprintf(g,"%c", model->name[i]+SHIFTALPHA); } fprintf(g," %d", quorum); if(l > maxlongmod[numfile]) maxlongmod[numfile] = l; for(i=0, lb=longbloc, mb=maxlongbloc[numfile]; i!=cr->bloc; i++, lb++, mb++) if(*lb > *mb) *mb = *lb; #if OCC #if AFF_OCC fprintf(g,"\n"); #else fprintf(g,"\t"); #endif afficheLastOcc(g, pocc, l, cr); #else fprintf(g,"\n"); #endif } /******************************************************************************/ /******************************************************************************/ /************************* RECHERCHE DES MODELES ******************************/ /******************************************************************************/ /******************************************************************************/ /******************************************************************************/ /* avanceBranche */ /******************************************************************************/ /* Essaie d'avancer d'une lettre dans un arc. */ /* Renvoie 1 si reussi, 0 sinon. */ /* La variable 'flag' indique si on est sur un noeud(1) ou une branche(0) */ /******************************************************************************/ Flag avanceBranche( P_occ next, P_occ tmp, int symbol, int trans, Flag flag_noeud, P_Criteres cr, LongSeq curbloc) { /* Dans cette fonction, le code est duplique dans un souci de rapidite: */ /* j'essaie de faire un max de tests eliminatoires avant affectations */ /* Si la branche courante n'est pas epuisee... */ if (flag_noeud == FAUX) { if ( equiv(symbol, trans) ) { next->xerr = tmp->xerr; next->blocerr = tmp->blocerr; } else { next->xerr = tmp->xerr+1; if (next->xerr == cr->maxerr+1) /* si maxerr global atteint */ return 0; next->blocerr = tmp->blocerr+1; /* si maxerr local atteint */ if (next->blocerr == cr->maxerrblocs[curbloc]+1) return 0; } next->x = tmp->x; next->num = tmp->num; next->lon = tmp->lon+1; } else /* Si la branche courante est epuisee, on est sur une nouvelle branche */ { next->x = tmp->x->fils[tmp->num]; if (next->x->fils[trans] == NULL) return(0); if ( equiv(symbol, trans) ) { next->xerr = tmp->xerr; next->blocerr = tmp->blocerr; } else { next->xerr = tmp->xerr+1; if (next->xerr == cr->maxerr+1) /* si maxerr global atteint */ return 0; next->blocerr = tmp->blocerr+1; /* si maxerr local atteint */ if (next->blocerr == cr->maxerrblocs[curbloc]+1) return 0; } next->num = trans; next->lon = 1; } next->saut= tmp->saut; next->codesaut= tmp->codesaut; return(1); } /******************************************************************************/ /* sommeBTOcc */ /******************************************************************************/ /* Fait l'union des sequences d'une liste d'occurrence et renvoie le nombre */ /* de ces sequences. */ /******************************************************************************/ NbSeq sommeBTOcc(P_PileOcc p, Bit_Tab ** bt) { LongSeq pos, precdummy; P_occ po; ReinitBitTab(bt); pos = p->pos-1; if (pos < 0) fatalError("spell.c: sommeBTOcc: wrong position in stack!\n"); po = p->occ+pos; precdummy = getPrecDummy(p); while ((pos != precdummy) && (po->x != NULL)) { #if DEBUG_BT printf("Fusion avec : "); #endif if (po->x->fils[po->num]->debut & LEAF_BIT) { fusionneBitTab(bt,((Feuille *)po->x->fils[po->num])->sequences); #if DEBUG_BT printBitTab(((Feuille *)po->x->fils[po->num])->sequences); #endif } else { fusionneBitTab(bt,po->x->fils[po->num]->sequences); #if DEBUG_BT printBitTab(po->x->fils[po->num]->sequences); #endif } pos--; po--; } #if DEBUG_BT printf("Somme BT : \n"); printBitTab(*bt); printf(" -> %d values\n", nbSequenceInBitTab(*bt)); #endif return nbSequenceInBitTab(*bt); } /******************************************************************************/ /* sommeBTOccPartielle */ /******************************************************************************/ /* Fait l'union des BR d'une partie de la pile d'occurrences */ /******************************************************************************/ void sommeBTOccPartielle( P_PileOcc p, Bit_Tab **bt, int nb_tranches, int *nboccsaut) { LongSeq pos = p->pos-1; P_occ po = p->occ+pos; int i, *ptr_saut; Bit_Tab **curbt; if (pos < 0) fatalError("spell.c: sommeBTOccPartielle: wrong position in stack!\n"); curbt = bt+nb_tranches-1; ptr_saut = nboccsaut+nb_tranches-1; while ( nb_tranches != 0 ) { ReinitBitTab(curbt); for ( i=*ptr_saut; i!=0; i--) { if (po->x->fils[po->num]->debut & LEAF_BIT) fusionneBitTab(curbt,((Feuille *)po->x->fils[po->num])->sequences); else fusionneBitTab(curbt,po->x->fils[po->num]->sequences); po--; } nb_tranches--; ptr_saut--; curbt--; } #if DEBUG_BT printf("Somme BT : \n"); printBitTab(*curbt); printf(" -> %d values\n", nbSequenceInBitTab(*curbt)); #endif } /******************************************************************************/ /* sauteSymbole */ /******************************************************************************/ int sauteSymbole(Occ curocc, P_mod model, P_PileOcc pocc, Fourchette *range, P_Criteres cr, LongSeq longsaut, int longmod, LongSeq curbloc) { LongSeq lmaxbr; Noeud *tmpnoeud; int res, trans; char carseq; tmpnoeud = curocc.x->fils[curocc.num]; if (tmpnoeud->debut & LEAF_BIT) lmaxbr = getValue(Liste_positions_fin,((Feuille *)tmpnoeud)->fin_deb) - (((Feuille *)tmpnoeud)->debut & LEAF_BIT_INV); else lmaxbr = tmpnoeud->fin - tmpnoeud->debut; #if DEBUG_SAUT printf("SauteSymbole: je gere le saut pour %d, noeud %p, etat: %d/%d branche %d\n", longsaut,curocc.x,curocc.lon,lmaxbr,curocc.num); #endif if (curocc.lon != lmaxbr) /* on est au milieu d'une branche */ { curocc.lon++; carseq = text[tmpnoeud->sequence_number] [(tmpnoeud->debut & LEAF_BIT_INV)+curocc.lon-1]; if(carseq==FINAL) /* si on rencontre un $ c'est fini */ return 0; if(longsaut<=(*range).max) { ajouteOcc2Pile(pocc, curocc.x, curocc.num, curocc.lon, curocc.xerr, curocc.blocerr, curocc.saut+1, addSaut2Code(curocc.blocerr, longsaut, curbloc, cr)); #if DEBUG_SAUT printf("Occ (br) num %d lon %d saut %d\n",curocc.num,curocc.lon, longsaut); afficheOcc(stdout, &curocc, longmod+1, 0); #endif return 1; } return 0; } else /* sinon on est a un noeud, plusieurs trans sont possibles */ { res = 0; if(longsaut<=(*range).max) { curocc.x = tmpnoeud; curocc.lon = 1; if ((tmpnoeud->debut & LEAF_BIT) == 0) for (trans = 0; trans != nbSymbSeq; trans++) { if (tmpnoeud->fils[trans] != NULL) { curocc.num = trans; res++; ajouteOcc2Pile(pocc, curocc.x, curocc.num, curocc.lon, curocc.xerr, curocc.blocerr, curocc.saut+1, addSaut2Code(curocc.blocerr, longsaut, curbloc, cr)); #if DEBUG_SAUT printf("Occ (nd) num %d lon %d saut %d\n", curocc.num, curocc.lon,longsaut); afficheOcc(stdout, &curocc, longmod, 0); #endif } } } return res; } } /******************************************************************************/ /* saute2MinSaut */ /******************************************************************************/ /* Avance les occ jusqu'a saut min. Les occurrences trouvees sont ajoutees * a la pile, avec cela de specifique que le champ 'blocerr' sert temporaire- * ment de stockage pour 'codesaut' */ /******************************************************************************/ int saute2MinSaut(Occ curocc, P_mod model, P_PileOcc pocc, Fourchette *range, P_Criteres cr, LongSeq longsaut, int longmod, LongSeq curbloc) { LongSeq lmaxbr; Noeud * tmpnoeud, *newtmpnoeud; int res = 0, newlongsaut, trans; char carseq; tmpnoeud = curocc.x->fils[curocc.num]; if (tmpnoeud->debut & LEAF_BIT) lmaxbr = getValue(Liste_positions_fin,((Feuille *)tmpnoeud)->fin_deb) - (((Feuille *)tmpnoeud)->debut & LEAF_BIT_INV); else lmaxbr = tmpnoeud->fin - tmpnoeud->debut; #if DEBUG_SAUT printf("Saute2MinSaut: j'ai gere le saut pour %d, noeud %p, etat: %d/%d branche %d\n", longsaut,curocc.x,curocc.lon,lmaxbr,curocc.num); #endif if (curocc.lon != lmaxbr) /* on est au milieu d'une branche */ { if ( lmaxbr-curocc.lon <= (*range).min-longsaut ) /* si on peut aller au bout de la branche */ { longsaut += lmaxbr-curocc.lon; curocc.lon = lmaxbr; #if DEBUG_SAUT printf("Saute2MinSaut: milieuBr, fast, je vais au bout %d/%d br %d et lgsaut %d\n", curocc.lon,lmaxbr,curocc.num,longsaut); #endif carseq = text[tmpnoeud->sequence_number] [(tmpnoeud->debut & LEAF_BIT_INV)+lmaxbr-1]; if(carseq != FINAL) /* si on rencontre un $ c'est fini */ res += saute2MinSaut(curocc, model, pocc, range, cr, longsaut, longmod, curbloc); #if DEBUG_SAUT else printf("Une qui tombe avant minima\n"); #endif } else { curocc.lon += (*range).min-longsaut; longsaut = (*range).min; #if DEBUG_SAUT printf("Saute2MinSaut: milieuBr, minsaut ds Br, je m'arrete a %d/%d num %d et lgsaut %d\n", curocc.lon,lmaxbr,curocc.num,longsaut); #endif res++; ajouteOcc2Pile(pocc, curocc.x, curocc.num, curocc.lon, curocc.xerr, curocc.codesaut, curocc.saut+longsaut, addSaut2Code(curocc.codesaut, longsaut, curbloc, cr) ); #if DEBUG_SAUT printf("Occ (br) num %d lon %d saut %d\n", curocc.num,curocc.lon, longsaut); afficheOcc(stdout, &curocc, longmod, 0); #endif } } else /* sinon on est a un noeud, plusieurs trans sont possibles */ { curocc.x = tmpnoeud; if ((tmpnoeud->debut & LEAF_BIT) == 0) { for (trans = 0; trans != nbSymbSeq; trans++) { curocc.num = trans; newlongsaut = longsaut; if (tmpnoeud->fils[trans] != NULL) { newtmpnoeud = tmpnoeud->fils[trans]; if (newtmpnoeud->debut & LEAF_BIT) lmaxbr = getValue(Liste_positions_fin, ((Feuille *)newtmpnoeud)->fin_deb) - (newtmpnoeud->debut & LEAF_BIT_INV); else lmaxbr = newtmpnoeud->fin - newtmpnoeud->debut; if ( lmaxbr <= (*range).min-longsaut ) { newlongsaut += lmaxbr; curocc.lon = lmaxbr; #if DEBUG_SAUT printf("Saute2MinSaut: noeud, fast, %d/%d, br %d, lgsaut %d\n", curocc.lon,lmaxbr,curocc.num,newlongsaut); #endif carseq = text[newtmpnoeud->sequence_number] [(newtmpnoeud->debut & LEAF_BIT_INV)+lmaxbr-1]; if(carseq != FINAL) /* si on rencontre un $ c'est fini */ { #if DEBUG_SAUT printf("Saute2MinSaut: finBr=$, c'est fini\n"); #endif res += saute2MinSaut(curocc, model, pocc, range, cr, newlongsaut, longmod, curbloc); } } else { curocc.lon = (*range).min-newlongsaut; newlongsaut = (*range).min; #if DEBUG_SAUT printf("Saute2MinSaut2: noeud %p, minsaut ds Br, %d/%d, br %d, lgsaut %d\n", curocc.x, curocc.lon,lmaxbr,curocc.num,newlongsaut); #endif res++; ajouteOcc2Pile(pocc, curocc.x, curocc.num, curocc.lon, curocc.xerr, curocc.codesaut, curocc.saut+newlongsaut, addSaut2Code(curocc.codesaut, newlongsaut, curbloc, cr)); #if DEBUG_SAUT printf("Occ (nd) num %d lon %d saut %d\n", curocc.num, curocc.lon,longsaut); afficheOcc(stdout, &curocc, longmod, 0); #endif } } } } } return res; } /******************************************************************************/ /* initBlocerrPile */ /******************************************************************************/ /* Sert a reinitialiser le champ 'blocerr' des occurrences trouvees pour * le saut, et qui etaient utilisess pour 'codesaut'. */ /******************************************************************************/ void initBlocerrPile(P_PileOcc p) { LongSeq pos = p->pos-1, precdummy; P_occ tmpocc = p->occ+pos; precdummy = getPrecDummy(p); while ((pos != precdummy) && (tmpocc->x != NULL) ) { tmpocc->blocerr = 0; pos--; tmpocc--; } } /******************************************************************************/ /* gestionSaut */ /******************************************************************************/ /* Gere le saut en classant les occurrences obtenues par longueur de saut */ /* Renvoie le nombre de tranches de saut */ /******************************************************************************/ NbSeq gestionSaut( P_mod model, P_PileOcc pocc, P_PileOcc poccnew, P_Criteres cr, int *nbocc_saut, int maxinter, LongSeq curbloc) { LongSeq pos, precdummy; P_occ tmpocc; Fourchette range = cr->saut[curbloc]; int i, oldpos, nb_tranches = 0, tmpres = 0, newoldpos, *ptr_saut; #if DEBUG_SAUT printf("Et je saute (modele %s)...\n",model->name); #endif ptr_saut = nbocc_saut; /* Initialisation du tableau de stockage des nb d'occ par tranche de saut */ for (i=0; i != maxinter; i++, ptr_saut++) *ptr_saut = 0; ptr_saut=nbocc_saut; /* Releve de la fin de pile d'occurrences */ ajouteDummy(pocc); oldpos = pocc->pos-1; #if DEBUG_SAUT printf("Le dummy de la pile des new occ est en %d\n",oldpos); #endif /* Extension au seuil minimal des occurrences courantes */ if (range.min != 0) { /* Releve des positions dans la pile des nouvelles occurrences */ pos = poccnew->pos-1; precdummy = getPrecDummy(poccnew); tmpocc = poccnew->occ+pos; while ((pos != precdummy) && (tmpocc->x != NULL) ) { tmpres += saute2MinSaut(*tmpocc, model, pocc, &range, cr, 0, model->lon+1, curbloc); pos--; tmpocc = poccnew->occ+pos; /** ATTENTION: c'est obligatoire cause realloc!! */ } if ( tmpres == 0 ) { depileRec(pocc); return 0; } } else { tmpres = copieLastOcc(pocc, poccnew); #if DEBUG_SAUT for(i=tmpres, tmpocc=pocc->occ+(pocc->pos-1); i!=0; i--, tmpocc--) printf("Occ num %d lon %d saut %d\n", tmpocc->num,tmpocc->lon, tmpocc->saut); #endif if (tmpres == 0) { depileRec(pocc); return 0; } /* Mise a jour des codesauts! */ if(curbloc != 0 || range.max!=0) { pos = pocc->pos-1; precdummy = getPrecDummy(pocc); tmpocc = pocc->occ+pos; while ((pos != precdummy) && (tmpocc->x != NULL)) { if(range.max!=0) tmpocc->blocerr = tmpocc->codesaut; if(curbloc !=0) tmpocc->codesaut= addSaut2Code(tmpocc->codesaut, 0, curbloc, cr); pos--; tmpocc--; } } } *ptr_saut = tmpres; ptr_saut++; nb_tranches++; #if DEBUG_SAUT printf("Etape de saut %d, j'en trouve %d\n",range.min,tmpres); #endif if ( range.min == range.max ) { if(range.max!=0) initBlocerrPile(pocc); return 1; } /* Extension du seuil minimal au seuil maximal */ for ( i = range.min; i != range.max; i++ ) { pos = newoldpos = pocc->pos-1; tmpocc = pocc->occ+pos; tmpres = 0; #if DEBUG_SAUT printf("Je m'arreterai en %d\n",oldpos+1); #endif while (pos != oldpos) { #if DEBUG_SAUT printf("saut: ext occ No %d\n",pos); #endif tmpres += sauteSymbole(*tmpocc, model, pocc, &range, cr, i+1, model->lon+1, curbloc); pos--; tmpocc = pocc->occ+pos; /** ATTENTION: c'est obligatoire cause realloc!! */ } oldpos = newoldpos; *ptr_saut = tmpres; #if DEBUG_SAUT printf("Etape de saut %d, j'en trouve %d\n",i+1,tmpres); #endif if (tmpres == 0) { #if DEBUG_SAUT break; #endif initBlocerrPile(pocc); return nb_tranches; } ptr_saut++; nb_tranches++; } #if DEBUG_SAUT printf("PILE APRES SAUT:\n"); pos = pocc->pos-1; tmpocc = pocc->occ+pos; while(tmpocc && tmpocc->lon != -1) { printf("*** Occ num %d lon %d saut %d codesaut %d\n", tmpocc->num,tmpocc->lon, tmpocc->saut, tmpocc->codesaut); afficheOcc(stdout, tmpocc, model->lon, 0); tmpocc--; } tmpres=0; printf("\nTABLEAU NB_TRANCHES EN SORTIE DE SAUT: (nbtranches=%d)\n",nb_tranches); for(i=0; ilongueur.max == 0) || (longmod < cr->longueur.max)) && ( ( (cr->longbloc[curbloc].max == 0) || (longcurbloc < cr->longbloc[curbloc].max) ) ) && ( (cr->flag_palindrom == FAUX) || cr->palindrom[curbloc] == -1 || longcurbloc != longbloc[(int)(cr->palindrom[curbloc])] ) ) { /* Boucle sur les symboles de l'alphabet pourl'extension du modele ************/ for (symbol = 0; symbol != nbSymbMod; symbol++) { #if DEBUG_BASE printf("LONGMOD %d: j'etends %s vers %s%c\n",longmod,model->name, model->name,lettres[symbol]); #endif /* Pas de JOKER en premiere position */ if(longmod == 0 && symbol == numJOKER) continue; /* Gestion de la composition des modeles **************************************/ if (cr->flag_compo == VRAI || cr->flag_compobloc[curbloc] == VRAI ) { if ( (cr->compobloc[curbloc][symbol] == 0) || (cr->compo[symbol] == 0) ) continue; } /* Gestion des palindromes */ if (cr->flag_palindrom) { if (longcurbloc == 0) posdebbloc[curbloc] = model->lon; if (cr->palindrom[curbloc] != -1) { palbloc = cr->palindrom[curbloc]; if (symbol!= comp[model->name[posdebbloc[palbloc]+longbloc[palbloc]-1-longcurbloc]]) continue; } } /* Init variables de pile d'occs */ if ( fin_occ == 0 ) pos = pocc->pos-1; else pos = fin_occ; precdummy = getPrecDummy(pocc); tmpocc = pocc->occ+pos; videPile(poccnew); maxseq = 0; nbocc = 0; #if DEBUG_BASE printf("J'ENTRE (l=%d symbol=%d model=%s quorum=%d)\n",longmod,symbol, model->name,tmp_quorum); #endif while ( (pos!=precdummy) && (tmpocc->x != NULL) && (fin_occ == 0 || pos != deb_occ)) { lmaxbr = ((tmpocc->x->fils)[tmpocc->num]->debut & LEAF_BIT)? getValue(Liste_positions_fin, ((Feuille *)tmpocc->x->fils[tmpocc->num])->fin_deb) - (tmpocc->x->fils[tmpocc->num]->debut & LEAF_BIT_INV) : tmpocc->x->fils[tmpocc->num]->fin - tmpocc->x->fils[tmpocc->num]->debut; #if DEBUG_BASE if(longmod!=0) { printf("Je traite l'occ:%p num %d lon %d saut %d codesaut %d (longmod=%d)\n", tmpocc->x,tmpocc->num,tmpocc->lon,tmpocc->saut, tmpocc->codesaut, longmod); afficheOcc(stdout, tmpocc, longmod,0); printf("...et je trouve:\n"); } #endif /* on est au milieu d'une branche - une transition possible */ if (tmpocc->lon != lmaxbr) { carseq = text[tmpocc->x->fils[tmpocc->num]->sequence_number] [ (tmpocc->x->fils[tmpocc->num]->debut & LEAF_BIT_INV) + tmpocc->lon]; if ( (carseq != FINAL) && (avanceBranche(next, tmpocc, symbol, carseq2num[(int) carseq],0, cr, curbloc) ) ) { ajouteOcc2Pile(poccnew, next->x, next->num, next->lon, next->xerr,next->blocerr, next->saut, next->codesaut); #if DEBUG_BASE printf("occ:%p num %d lon %d saut %d codesaut %d (longmod=%d)\n", next->x,next->num,next->lon,next->saut, next->codesaut, longmod); afficheOcc(stdout, next, longmod+1,0); #endif nbocc++; if (next->x->fils[next->num]->debut & LEAF_BIT) { maxseq += nbSequenceInBitTab( ((Feuille *)next->x->fils[next->num])->sequences); #if DEBUG_BT printf("nb seq in bt (br): %d \n", nbSequenceInBitTab(((Feuille *) next->x->fils[next->num])->sequences)); #endif } else { maxseq += next->x->fils[next->num]->nb_element_bt; #if DEBUG_BT printf("nb seq in bt (br): %d \n", nbSequenceInBitTab( next->x->fils[next->num]->sequences)); #endif } } } /* sinon on est a un noeud, plusieurs trans sont eventuellement possibles */ else { for (trans = 0; trans != nbSymbSeq; trans++) { tmpocc = pocc->occ+pos; if (avanceBranche(next, tmpocc, symbol, trans, 1, cr, curbloc)) { ajouteOcc2Pile(poccnew, next->x, next->num, next->lon, next->xerr, next->blocerr, next->saut, next->codesaut); #if DEBUG_BASE printf("occ:%p num %d lon %d saut %d codesaut %d (longmod=%d)\n", next->x,next->num,next->lon,next->saut, next->codesaut, longmod); afficheOcc(stdout, next, longmod+1, 0); #endif nbocc++; if (next->x->fils[next->num]->debut & LEAF_BIT) { maxseq += nbSequenceInBitTab(((Feuille *) next->x->fils[next->num])->sequences); #if DEBUG_BT printf("nb seq in bt (nd): %d \n", nbSequenceInBitTab(((Feuille *) next->x->fils[next->num])->sequences)); #endif } else { maxseq += next->x->fils[next->num]->nb_element_bt; #if DEBUG_BT printf("nb seq in bt (nd): %d \n", nbSequenceInBitTab( next->x->fils[next->num]->sequences)); #endif } } } } /* Si on n'a plus d'occurrences dans la pile */ if(pos == 0) { #if DEBUG_BASE printf("break avec %d occ\n",nbocc); #endif break; } pos--; tmpocc = pocc->occ+pos; #if DEBUG_PILE printf("pos pile %d (adresse %p), len mod %d, nbocc %d\n",pos, tmpocc,longmod,nbocc); printf("x %p\n",tmpocc->x); #endif } #if DEBUG_BASE printf("J'ai trouve %d occ\n",nbocc); afficheOldOcc(poccnew, longmod+1); #endif if (nbocc == 0) continue; /***************/ /* CAS DU SAUT */ /***************/ tmp_quorum2 = -1; if ( (curbloc != cr->bloc-1) && (longcurbloc+1 >= cr->longbloc[curbloc].min ) && (maxseq >= cr->quorum) && ( (tmp_quorum2 = sommeBTOcc(poccnew, colors_model) ) >= cr->quorum) && ( (nb_tranches = gestionSaut(model, pocc, poccnew, cr, *nbocc_saut, cr->maxinter, curbloc)) != 0 ) ) { minsaut = cr->saut[curbloc].min; maxsaut = cr->saut[curbloc].max; delta = cr->delta[curbloc]; /* Calcul des BT pour chaque tranche de saut */ sommeBTOccPartielle(pocc, *bt_intermediaire, nb_tranches, *nbocc_saut); /* deb et fin stockent les positions des occ interessantes dans la pile */ new_deb_occ = pocc->carte[pocc->pos_carte-1]; new_fin_occ = new_deb_occ; tmpintptr = *nbocc_saut; for ( i=0; i<2*delta+1; i++, tmpintptr++) new_fin_occ += *tmpintptr; changeModel(model, symbol); changeModel(model, numSAUT); ajouteDummy(poccsaut); tmpint = copieLastOcc(poccsaut,poccnew); #if DEBUG_SAUT printf("J'ai copie %d occ de Pnew->Psaut\n",tmpint); afficheOldOcc(poccnew,longmod+1); #endif videPile(poccnew); zarb_ext = 1; if ( cr->flag_compo == VRAI || cr->flag_compobloc[curbloc] == VRAI ) { cr->compo[symbol]--; cr->compobloc[curbloc][symbol]--; } longbloc[curbloc] = longcurbloc+1; k = 0; tmpint = nb_tranches; for(i=minsaut+delta; (i+delta<=maxsaut) && (tmpint != 0); i++, tmpint--, k++) { /* Mise a jour des positions des occurrences interessantes dans la pile */ if(k!=0) { new_deb_occ += (*nbocc_saut)[k-1]; new_fin_occ += (*nbocc_saut)[k+2*delta]; } cursaut[curbloc] = i; ReinitBitTab(colors_model); for(j=i-delta; j!=i+delta+1; j++) fusionneBitTab(colors_model, (*bt_intermediaire)[j-minsaut]); if ( nbSequenceInBitTab(*colors_model) >= cr->quorum ) { #if DEBUG_SAUT printf("Modele %s : La tranche [%d-%d] est acceptee avec\n", model->name,i-delta, i+delta); printf("fin occ: %d deb occ: %d\n",new_fin_occ,new_deb_occ); for(tmpint2=new_fin_occ; tmpint2!=new_deb_occ;tmpint2--) { printf("Pos pile %d:\n",tmpint2); afficheOcc(stdout,pocc->occ+tmpint2, longmod+1, 0); } #endif zarb_back += spellModels(pocc, poccnew,poccsaut, longmod+1, 0, curbloc+1, model, next, colors_model, nbseq, tmp_quorum2, cr, nbocc_saut+1, bt_intermediaire+1, new_deb_occ, new_fin_occ, cursaut, longbloc, posdebbloc); } } if ( cr->flag_compo == VRAI || cr->flag_compobloc[curbloc] == VRAI ) { cr->compo[symbol]++; cr->compobloc[curbloc][symbol]++; } decrModel(model); /* vire la premiere lettre du nouveau bloc */ decrModel(model); /* vire le symbole de saut */ videPile(poccnew); tmpint = copieLastOcc(poccnew,poccsaut); #if DEBUG_SAUT printf("J'ai copie %d occ de Psaut->Pnew\n",tmpint); afficheOldOcc(poccnew,longmod+1); #endif depileRec(poccsaut); depileRec(pocc); } #if DEBUG_BASE printf("nbocc = %d\n",nbocc); if (nbocc<=0) printf("Sortie nbocc\n"); else if (maxseq < cr->quorum) printf("Sortie maxseq %ld\n",maxseq); else printf("Calcul de quorum (maxseq = %ld) tmp_quorum2=%d\n", maxseq,tmp_quorum2); #endif if ( (maxseq >= cr->quorum) && ( tmp_quorum2!=-1 ? tmp_quorum2 >= cr->quorum: (tmp_quorum2 = sommeBTOcc(poccnew, colors_model) ) >= cr->quorum)) { #if DEBUG_BASE printf("Accepte (res quorum=%d)\n", tmp_quorum2); #endif if(symbol == numJOKER) zarb_ext = 1; else nbnewmod++; changeModel(model,symbol); ajouteDummy(pocc); transferePile2Pile(pocc, poccnew); if ( cr->flag_compo == VRAI || cr->flag_compobloc[curbloc] == VRAI ) { cr->compo[symbol]--; cr->compobloc[curbloc][symbol]--; } zarb_back += spellModels(pocc, poccnew,poccsaut, longmod+1, longcurbloc+1, curbloc, model, next, colors_model, nbseq, tmp_quorum2, cr, nbocc_saut, bt_intermediaire, 0, 0, cursaut, longbloc, posdebbloc); if ( cr->flag_compo == VRAI || cr->flag_compobloc[curbloc] == VRAI ) { cr->compo[symbol]++; cr->compobloc[curbloc][symbol]++; } depileRec(pocc); /* on decremente la longueur du modele de 1 */ decrModel(model); } #if DEBUG_BASE else printf("Refuse curquorum=%d quorum=%d\n",tmp_quorum2,cr->quorum); #endif } /* Si: il n'y pas eu d'extension REGULIERE, la longueur courante est valide, */ /* ET [il n'y a pas eu d'extension bizarre (joker, saut) OU ces extensions */ /* ont pose un probleme (modele se terminant par jokers)] */ if ( (curbloc == cr->bloc-1) /*&& (nbnewmod == 0)*/ && (longmod >= cr->longueur.min) && ( longcurbloc >= cr->longbloc[curbloc].min ) && ( cr->flag_palindrom == FAUX || cr->palindrom[curbloc] == -1 || longcurbloc == longbloc[(int)(cr->palindrom[curbloc])] ) && ( (zarb_back!=0) || (zarb_ext==0) ) ) { /* A VIRER? ce test est il inutile? */ if ( (model->name[model->lon-1] != numJOKER) && (model->name[model->lon-1] != numSAUT) ) { longbloc[curbloc] = longcurbloc; keepModel(model, pocc, nbseq, tmp_quorum, longmod, longbloc, cursaut, cr); return(0); } else return(1); } } else if ( (curbloc == cr->bloc-1)) /* && ( ( (cr->longueur.max != 0) && (longmod == cr->longueur.max) ) */ /* || ( (cr->longbloc[curbloc].max != 0) */ /* && (longcurbloc <= cr->longbloc[curbloc].max) ) ) ) */ { /* A VIRER? ce test est il inutile? */ if ( (model->name[model->lon-1] != numJOKER) && (model->name[model->lon-1] != numSAUT)) { longbloc[curbloc] = longcurbloc; keepModel(model, pocc, nbseq, tmp_quorum, longmod, longbloc, cursaut, cr); return(0); } else return(1); } return(0); } /******************************************************************************/ /* doSpell */ /******************************************************************************/ /* Lance la recursion sur les modeles. */ /******************************************************************************/ void doSpell(P_Criteres cr, NbSeq nbseq, Noeud *root) { /* bloc est un indicateur du bloc en cours de construction */ P_mod model; P_PileOcc pocc, poccnew, poccsaut; P_occ next; Bit_Tab *colors_model, ***bt_intermediaire; int **nbocc_saut, i, j, *cursaut; LongSeq *longbloc; LongSeq *posdebbloc=NULL; /* Creation du faux pere de la racine (pour faciliter la recursion) */ Noeud *root_pere = Alloc_Noeud(); root_pere->fils[Translation_Table[FINAL]] = root; root_pere->sequence_number = 0; root->debut = 0; root->fin = 1; root->sequence_number = 0; /* Allocation du modele */ model = allocModel(); model->lon = 0; /* Allocation du tableau de longueur de bloc courant */ longbloc = (LongSeq *) malloc(cr->bloc * sizeof(LongSeq)); if(longbloc==NULL) fatalError("doSpell: cannot allocate 'longbloc'"); /* Allocation de l'occurrence courante */ next = (P_occ) calloc (1,sizeof(Occ)); if (next == NULL) fatalError("doSpell: cannot allocate 'next'\n"); initOcc(next); /* Allocation du tableau de bits courant et BT intermediaires */ bt_intermediaire = (Bit_Tab ***) malloc((cr->bloc-1) * sizeof(Bit_Tab **)); if (bt_intermediaire == NULL) fatalError("doSpell: cannot allocate 'bt_intermediaire'\n"); for (i=0; i!=cr->bloc-1; i++) { bt_intermediaire[i] = (Bit_Tab **) malloc(cr->maxsaut * sizeof(Bit_Tab *)); if (bt_intermediaire[i] == NULL) fatalError("doSpell: cannot allocate 'bt_intermediaire[i]'\n"); for (j=0; j!=cr->maxsaut; j++) bt_intermediaire[i][j] = AllocBitTab(); } colors_model = AllocBitTab(); ReinitBitTab(&colors_model); /* Allocation du tableau de comptage du nb d'occ par tranche de saut */ nbocc_saut = (int **) malloc( (cr->bloc-1) * sizeof(int *)); if (nbocc_saut == NULL) fatalError("doSpell: cannot allocate 'nbocc_saut'\n"); for(i=0; i!=cr->bloc-1; i++) { nbocc_saut[i] = (int *) malloc(cr->maxsaut * sizeof(int)); if (nbocc_saut[i] == NULL) fatalError("doSpell: cannot allocate 'nbocc_saut[i]'\n"); } /* Allocation du tableau d'indication des saut effectues */ cursaut = (int *) malloc((cr->bloc-1) * sizeof(int) ); if (cursaut == NULL) fatalError("doSpell: cannot allocate 'cursaut'\n"); /* Allocation des piles d'occurrences */ poccsaut = creePileOcc(); poccnew = creePileOcc(); pocc = creePileOcc(); /* Allocation de la structure de palindromes */ if ( cr->flag_palindrom ) { posdebbloc = (LongSeq *) malloc(cr->bloc * sizeof(LongSeq)); if (posdebbloc == NULL) fatalError("doSpell: cannot allocate 'posdebbloc'\n"); } /* Ajout de l'occurrence nulle dans la pile d'occurrence */ ajouteInitOcc2Pile(pocc, root_pere); fprintf(stderr,"** Models extraction **\n"); barre((int)pow((double)nbSymbMod, 3.0)); /* ...et lancement de la recursion */ spellModels(pocc, poccnew, poccsaut, 0, 0, 0, model, next, &colors_model, nbseq, 0, cr, nbocc_saut, bt_intermediaire, 0, 0, cursaut, longbloc, posdebbloc); /* Liberation des structures */ free(next); free(model->name); free(model); for (i=0; i!=cr->bloc-1; i++) { for (j=0; j!=cr->maxsaut; j++) free(bt_intermediaire[i][j]); free(bt_intermediaire[i]); } free(bt_intermediaire); free(colors_model); free(cursaut); for(i=0; ibloc-1; i++) free(nbocc_saut[i]); free(nbocc_saut); liberePileOcc(pocc); liberePileOcc(poccnew); liberePileOcc(poccsaut); } /******************************************************************************/ /******************************************************************************/ /********************************** MAIN **************************************/ /******************************************************************************/ /******************************************************************************/ int main(int argc, char **argv) { FILE *g; FastaSequence **seq; Flag readok; char infini = 0; NbSeq nbtxt; float quorum = 0.0, user_time; int i, j, taille, siztxt, posarg, nbmodtot, nbfiles; long int nbsymb; LongSeq maxlongsaut = 0; Noeud *arbre_suffixe; Criteres criteres; LongSeq maxlongmodel, *deltatab; Symbole *alphaseq; posarg = 4; /* QUORUM */ quorum = atof(argv[posarg++]); /* BLOCS */ criteres.bloc = atoi(argv[posarg++]); if(criteres.bloc < 2) fatalError("Incorrect boxes number\n"); allocBloc(&criteres, criteres.bloc); /* LONGUEUR MIN */ criteres.longueur.min = (LongSeq)atoi(argv[posarg++]); /* LONGUEUR MAX */ criteres.longueur.max = (LongSeq)atoi(argv[posarg++]); if ( criteres.longueur.max == 0 ) infini = 1; /* ERREURS GLOBALES */ criteres.maxerr = (LongSeq)atoi(argv[posarg++]); maxlongmodel = 0; /* PARAMETRES BLOCS ***********************************************************/ if(criteres.bloc > 1) { for(i = 0; i != criteres.bloc; i++ ) { /* LONGUEUR MIN BLOC */ criteres.longbloc[i].min = (LongSeq)atoi(argv[posarg++]); /* LONGUEUR MAX BLOC */ criteres.longbloc[i].max = (LongSeq)atoi(argv[posarg++]); if ( criteres.longbloc[i].max == 0 ) infini = 1; maxlongmodel += criteres.longbloc[i].max; /* ERREURS BLOC */ criteres.maxerrblocs[i] = atoi(argv[posarg++]); if(i != criteres.bloc-1 ) { /* SAUT MIN BLOC */ criteres.saut[i].min = (LongSeq)atoi(argv[posarg++]); /* SAUT MAX BLOC */ criteres.saut[i].max = (LongSeq)atoi(argv[posarg++]); maxlongsaut += criteres.saut[i].max; /* DELTA BLOC */ criteres.delta[i] = (LongSeq) atoi(argv[posarg++]); } } } if ( infini == 0 ) { if (maxlongmodel < criteres.longueur.max) maxlongmodel = criteres.longueur.max; maxlongmodel += maxlongsaut; } else maxlongmodel = INT_MAX; /******************************************************************************/ /* DEBUT DU TRAITEMENT */ /******************************************************************************/ /* Allocations */ seq = (FastaSequence **) malloc(GRAINSEQ * sizeof(FastaSequence *)); text = (signed char **) malloc(GRAINSEQ * sizeof(signed char *)); if(!seq || !text) fatalError("main: seq/text: cannot allocate\n"); siztxt = GRAINSEQ; /* Ouverture du fichier contenant les sequences */ g = fopen (argv[2],"r"); if(g==NULL) fatalError("main: cannot open FASTA file"); readok = 1; nbtxt = 0; nbsymb = 0; /* Stockage des sequences en memoire */ do { if(nbtxt == siztxt) { siztxt *= 2; seq = (FastaSequence **) realloc(seq,siztxt * sizeof(FastaSequence *)); text = (signed char **) realloc(text, siztxt * sizeof(signed char *)); if(!seq || !text) fatalError("main: seq/text: cannot reallocate\n"); } seq[nbtxt] = NewFastaSequence(); readok = ReadFastaSequence(g, seq[nbtxt]); if (readok) { nbsymb += seq[nbtxt]->length; taille = seq[nbtxt]->length+1; text[nbtxt] = (signed char*) malloc ((taille+2)*sizeof(signed char)); if (text[nbtxt] == NULL) fatalError("main: cannot allocate 'text'\n"); strcpy((char *) text[nbtxt],seq[nbtxt]->seq); text[nbtxt][taille-1] = FINAL; text[nbtxt][taille] = '\0'; nbtxt++; } } while (readok); fclose(g); if (nbtxt == 0) fatalError("main: no sequences in FASTA file"); if (nbtxt == 1) fatalError("main: one sequence only in FASTA file"); criteres.nbsymb = nbsymb; if (quorum == 0.0) criteres.quorum = (NbSeq) ceil( (double) (70*nbtxt)/100.0); else criteres.quorum = (NbSeq) ceil( (double) (quorum*nbtxt)/100.0); if(criteres.quorum==1) warning("quorum value is 1 sequence"); else if(criteres.quorum<1) fatalError("quorum value is lower than 1 sequence"); /******************************************************************************/ /* Chargement alphabet sequences et modeles */ if(!(g=fopen(argv[1],"r"))) fatalError("main: cannot open alphabet file\n"); initAlphabet(); if(!(alphaseq = chargeAlphabet(g, (Symbole **) text, nbtxt))) fatalError("main: wrong alphabet file format\n"); fclose(g); /******************************************************************************/ /* COMPOSITION (traitee apres car besoin alphabet modeles) */ /* S'il reste des arguments, c'est la composition */ setCompoPal(&criteres, argv+posarg, argc-posarg); /* Transformation de l'alphabet (ex: AG => R) */ transAlphMod(criteres.flag_palindrom); /******************************************************************************/ /* Construction de l'arbre compact generalise */ fprintf(stderr, "** Suffix tree construction **\n"); barre(nbtxt); Init_All(alphaseq,0,nbtxt); arbre_suffixe = Construction_Arbre((unsigned char *)text[0], maxlongmodel); barre(0); for (i = 1; i != nbtxt; i++) { arbre_suffixe=AjouteSequence(arbre_suffixe,(unsigned char *)text[i],maxlongmodel); barre(0); } fprintf(stderr,"\n"); /******************************************************************************/ /* Liberation de la structure Fasta */ for(i=0;i != nbtxt;i++) FreeFastaSequence(seq[i]); free(seq); UpdateBit_TabForAllTree(arbre_suffixe); /* if (flag_tree == VRAI) */ /* Print_Tree(arbre_suffixe,1,0); */ /************************ enumeration des resultats ***************************/ printf("Extraction is going to be made with the following parameters:\n"); printf("FASTA file: %s\n",argv[2]); printf("Alphabet file: %s\n",argv[1]); printf("Output file: %s\n",argv[3]); printf("Total min length: %d\n",criteres.longueur.min); if (criteres.longueur.max == 0) printf("Total max length: MAX\n"); else printf("Total max length: %d\n",criteres.longueur.max); printf("Boxes: %d\n",criteres.bloc); printf("Total number of subst.: %d\n",criteres.maxerr); printf("Quorum: %f%% (%d sequences in %d)\n\n", quorum,criteres.quorum,nbtxt); if (criteres.flag_compo) { for (i = 0; i != nbSymbMod; i++) { if (criteres.compo[i] != -1) printf("Total max composition in %s: %d\n",nummod2str[i], criteres.compo[i]); } } if (criteres.bloc > 1) { for (i = 0; i != criteres.bloc; i++) { printf("\nBOX %d\n",i+1); printf("Min length: %d\n",criteres.longbloc[i].min); if (criteres.longbloc[i].max == 0) printf("Max length: MAX\n"); else printf("Max length: %d\n", criteres.longbloc[i].max); printf("Max number of subst.: %d\n", criteres.maxerrblocs[i]); if (i != criteres.bloc-1) { printf("Min spacer length: %d\n",criteres.saut[i].min); printf("Max spacer length: %d\n",criteres.saut[i].max); if(criteres.delta != 0) printf("Delta : %d\n",criteres.delta[i]); else printf("Delta : NON\n"); } if (criteres.flag_compobloc[i]) { for (j = 0; j != nbSymbMod; j++) if (criteres.compobloc[i][j] != -1) printf("Max composition in %s: %d\n", nummod2str[j],criteres.compobloc[i][j]); } if (criteres.palindrom[i]!=-1) printf("Palindrom of box: %d\n", criteres.palindrom[i]+1); } } fprintf(stderr,"\n ------ CHECK THESE PARAMETERS! ------\n"); criteres.maxinter = 0; criteres.maxsaut = 0; nbfiles = 1; for(i=0; i criteres.maxsaut) criteres.maxsaut = j; if(criteres.delta[i] == 0) j = 1; else { if((j=(criteres.saut[i].max-criteres.saut[i].min+1-criteres.delta[i]*2)) > criteres.maxinter) criteres.maxinter = j; } nbfiles *= j; } f = (FILE **) malloc (nbfiles * sizeof(FILE *) ); nbmod = (int *) calloc(nbfiles, sizeof(int)); if(f==NULL || nbmod==NULL) fatalError("main: cannot allocate 'f/nbmod'"); if(!initFiles(f,argv[3],&criteres)) fatalError("main: cannot initialize output files\n"); initTabSauts(&criteres); /* Initialisax de maxlongmod pour recalcul de vraie longueur max */ maxlongmod = (LongSeq *) calloc(nbfiles , sizeof(LongSeq)); /* Idem maxlongbloc */ maxlongbloc = (LongSeq **) malloc(nbfiles * sizeof(LongSeq *)); if(maxlongmod==NULL || maxlongbloc==NULL) fatalError("main: cannot allocate 'maxlongmod/maxlongbloc'"); for(i=0; i!=nbfiles; i++) { maxlongbloc[i] = (LongSeq *) calloc (criteres.bloc , sizeof(LongSeq)); if(maxlongbloc[i]==NULL) fatalError("main: cannot allocate 'maxlongbloc[i]'"); } /******************************************************************************/ /******************************************************************************/ /* Fonction Principale */ PrintCpuTime(1); doSpell(&criteres,nbtxt,arbre_suffixe); user_time=PrintCpuTime(0); /******************************************************************************/ /******************************************************************************/ /******************************************************************************/ /* Insertion de la ligne d'information en tete des fichiers de sortie */ deltatab = (LongSeq *) malloc(criteres.bloc * sizeof(LongSeq)); if(deltatab==NULL) fatalError("main: cannot allocate 'deltatab', header of output will be wrong"); for(i=0,nbmodtot=0;i!=nbfiles;i++) { fprintf(f[i],"Nb models: %d\nUser time : %.2f sec.\n", nbmod[i], user_time); nbmodtot+=nbmod[i]; /* Insertion de la ligne de parametres dans le fichier de sortie */ rewind(f[i]); fprintf(f[i],"%%%%%% %d %d/%d %ld %d %d %d",criteres.bloc, criteres.quorum, nbtxt, criteres.nbsymb, criteres.longueur.min, maxlongmod[i], criteres.maxerr); file2Delta(i, deltatab, &criteres); for(j=0; j!=criteres.bloc; j++) { fprintf(f[i]," %d %d %d",criteres.longbloc[j].min, maxlongbloc[i][j], criteres.maxerrblocs[j]); if(j!=criteres.bloc-1) fprintf(f[i]," %d %d", criteres.saut[j].min+deltatab[j], criteres.saut[j].min+deltatab[j]+2*criteres.delta[j]); } /* Ecriture du nom du fichier alphabet utilise et de l'alphabet des sequences */ fprintf(f[i], " %s %s", argv[1], alphaseq); fclose(f[i]); } printf("\nNb models: %d\nUser time : %.2f sec.\n", nbmodtot, user_time); for(i=0;i!=nbtxt;i++) free(text[i]); free(text); /* Liberations */ free(f); free(nbmod); Free_Arbre(arbre_suffixe); return(0); } /******************************************************************************/ /* PrintCpuTime */ /******************************************************************************/ static float PrintCpuTime(char initIt) { float ust; struct tms tms; static float dust; times(&tms); ust = (float) tms.tms_utime; if (initIt) { dust = ust; return 0.0; } else { ust -= dust; return ust / sysconf(_SC_CLK_TCK); } } SMILEv1.47/P_BLOCS+DELTA/Spell/src/global.c0000777002404200237300000000000010066542217024602 2../../../P_BLOCS/Spell/src/global.custar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/Spell/src/model.c0000777002404200237300000000000010066542217024302 2../../../P_BLOCS/Spell/src/model.custar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/Spell/src/alphabet.c0000777002404200237300000000000010066542217025442 2../../../P_BLOCS/Spell/src/alphabet.custar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/Spell/src/barre.c0000777002404200237300000000000010066542217024270 2../../../P_BLOCS/Spell/src/barre.custar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/Spell/src/pile_occ.c0000777002404200237300000000000010066542217025434 2../../../P_BLOCS/Spell/src/pile_occ.custar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/Spell/include/0000755002404200237300000000000010066543770016606 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/Spell/include/occ.h0000777002404200237300000000000010066542217025114 2../../../P_BLOCS/Spell/include/occ.hustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/Spell/include/criteres.h0000644002404200237300000000625210066543746020607 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _CRITERES_H #define _CRITERES_H #include #include #include #include #include /******************************************************************************/ /* STRUCTURE DE STOCKAGE DES CRITERES DE RECHERCHE */ /******************************************************************************/ typedef struct struct_fourchette { LongSeq min; LongSeq max; } Fourchette, *P_Fourchette; typedef struct struct_criteres { LongSeq **compobloc; LongSeq *maxerrblocs; LongSeq *compo; Fourchette *longbloc; Fourchette *saut; LongSeq *delta; Flag *flag_compobloc; LongSeq *palindrom; long int nbsymb; Fourchette longueur; LongSeq maxerr; LongSeq maxinter; /* Nb max d'intervalles delta dans un saut */ LongSeq maxsaut; /* Longueur max d'un saut */ NbSeq quorum; char bloc; Flag flag_compo; Flag flag_palindrom; } Criteres, *P_Criteres; /******************************************************************************/ /* FONCTIONS PUBLIQUES */ /******************************************************************************/ void setCompoPal(P_Criteres cr, char **argv, int argc); int initFiles(FILE **f, char *nom, P_Criteres cr); int delta2File(LongSeq *deltatab, P_Criteres cr); int file2Delta(int pos, LongSeq *deltatab, P_Criteres cr); int addSaut2Code(int oldcode, LongSeq saut, LongSeq curbloc, P_Criteres cr); void initTabSauts(P_Criteres); void allocBloc(P_Criteres cr, int bloc); #endif SMILEv1.47/P_BLOCS+DELTA/Spell/include/spell.h0000777002404200237300000000000010066542217026042 2../../../P_BLOCS/Spell/include/spell.hustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/Spell/include/global.h0000644002404200237300000001101310066543752020213 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _GLOBAL_H #define _GLOBAL_H #include #include #define FAUX 0 #define VRAI 1 typedef char Flag; /******************************************************************************/ /* Flags */ /******************************************************************************/ /* DEBUGGING */ #define DEBUG_BASE 0 /* Debug base */ #define DEBUG_BT 0 /* Tableaux de bits */ #define DEBUG_SAUT 0 /* Procedures de saut */ #define DEBUG_PILE 0 /* Pile d'occurrences */ #define DEBUG_TREE 0 /* Arbre suffixe : HS bicoz Julien */ /******************************************************************************/ /* Define dependants du jeu de donnees */ /******************************************************************************/ #define ALPHABET 5 /* Grain d'allocation de la taille du modele */ #define GRAIN_SIZMOD 1000 /******************************************************************************/ /* Caracteres speciaux */ /******************************************************************************/ /* => dans symb.h */ /******************************************************************************/ /* Types */ /******************************************************************************/ /* Nombre de sequences */ #define NbSeq int /* Longueur de sequence */ #define LongSeq int /* Type de l'alphabet */ #define Alpha int /******************************************************************************/ /* Nom du fichier de stockage des noms de fichiers generes */ /******************************************************************************/ #define NAMEFILE ".namefile" /******************************************************************************/ /* L'affichage fur et a mesure est obligatoire si on veut les occurrences */ /******************************************************************************/ #if !OCC #undef AFF_OCC #define AFF_OCC 0 #endif /******************************************************************************/ /* Active DEBUG_BASE si l'un des DEBUGs est active */ /******************************************************************************/ #if DEBUG_BT || DEBUG_SAUT || DEBUG_PILE || DEBUG_TREE #undef DEBUG_BASE #define DEBUG_BASE 1 #endif /******************************************************************************/ /* Fonctions basiques */ /******************************************************************************/ void fatalError(char *msg); void warning(char *msg); int entiers(char); void entree(void); void initEntiers(void); #endif SMILEv1.47/P_BLOCS+DELTA/Spell/include/model.h0000777002404200237300000000000010066542217026004 2../../../P_BLOCS/Spell/include/model.hustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/Spell/include/symb.h0000777002404200237300000000000010066542217025530 2../../../P_BLOCS/Spell/include/symb.hustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/Spell/include/alphabet.h0000777002404200237300000000000010066542217027144 2../../../P_BLOCS/Spell/include/alphabet.hustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/Spell/include/barre.h0000777002404200237300000000000010066542217025772 2../../../P_BLOCS/Spell/include/barre.hustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/Spell/include/pile_occ.h0000777002404200237300000000000010066542217027136 2../../../P_BLOCS/Spell/include/pile_occ.hustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/include/0000755002404200237300000000000010066544041015517 5ustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/include/global_fonctions.h0000777002404200237300000000000010066542217030077 2../../P_BLOCS/include/global_fonctions.hustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/include/allocateurs.h0000777002404200237300000000000010066542217026067 2../../P_BLOCS/include/allocateurs.hustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/include/structures.h0000777002404200237300000000000010066542217025701 2../../P_BLOCS/include/structures.hustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/include/libsysk.h0000777002404200237300000000000010066542217024373 2../../P_BLOCS/include/libsysk.hustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/include/Gtypes.h0000777002404200237300000000000010066542217023761 2../../P_BLOCS/include/Gtypes.hustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/include/liste_pos.h0000777002404200237300000000000010066542217025235 2../../P_BLOCS/include/liste_pos.hustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/include/define.h0000777002404200237300000000000010066542217023717 2../../P_BLOCS/include/define.hustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/include/construction.h0000777002404200237300000000000010066542217026517 2../../P_BLOCS/include/construction.hustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/include/struct_tab.h0000777002404200237300000000000010066542217025557 2../../P_BLOCS/include/struct_tab.hustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/include/bit_tab.h0000777002404200237300000000000010066542217024243 2../../P_BLOCS/include/bit_tab.hustar lamaaoc00000000000000SMILEv1.47/P_BLOCS+DELTA/include/libfasta.h0000777002404200237300000000000010066542217024605 2../../P_BLOCS/include/libfasta.hustar lamaaoc00000000000000SMILEv1.47/Lanceur/0000755002404200237300000000000010115611576013522 5ustar lamaaoc00000000000000SMILEv1.47/Lanceur/NEW0000644002404200237300000000337610066547617014120 0ustar lamaaoc00000000000000Since the 1.41 version, you can treat sequences with any kind of alphabet you don't have to define. But you have to indicate to SMILE the alphabet you want it to use to generate models. These indications must be written in an alphabet file (see 'alphabet' and 'alpha'). This file contains: - a type of data (Nucleotides, Proteins, Other) that makes SMILE able to recognize known groups of symbols (for instance AGR gives R). - a set of symbols groups, for instance: AB C D ...indicates that SMILE will generate models on a 3 symbols alphabet: [AB], C and D. Notice that you have to give all the symbols you want to put together. As an example, if you're dealing with a set of DNA sequences containing A, C, G, T and R, and want to generate models on a R, Y alphabet, you have to write the following alphabet file: Type: Nucleotides AGR ...that will be recognized as an R CT (or CTY, no difference) ...that will be recognized as a Y Symbols of the sequences not in the alphabet file won't match anything. If you want to deal with WILD CARDS (matching any symbols) you have to add '*' in the alphabet file. Finally, the name of this alphabet file is given in the parameters file. A few bugs have been fixed in the 1.42 and 1.43 versions. The 1.44 version consider valid models in another way than before. In older versions, if AAAA was valid but AAAAT too, AAAA didn't appear in the results. Now every valid model found appears in the results of the extraction. The 1.45 and 1.46 versions fix small bugs. The 1.47 version corrects an important bug and adds the 'palindrom' functionality. One can now extract models which has one or several boxes that are biological palindroms of other boxes. SMILEv1.47/Lanceur/example2.shuffle0000644002404200237300000007510410066546131016623 0ustar lamaaoc00000000000000STATISTICS ON THE NUMBER OF SEQUENCES HAVING AT LEAST ONE OCCURRENCE Model %right #right %shfl. #shfl. Sigma Chi2 Z-score ================================================================================= ATTGAC_TATAAT 4.43% 47 0.47% 4.95 1.50 34.89 27.97 CTTTTT_TTATAA 5.08% 54 2.14% 22.75 4.27 13.20 7.33 TTTTAT_ATAATA 4.99% 53 2.43% 25.85 3.76 9.71 7.22 AGAAAA_TTTTTC 5.18% 55 2.41% 25.55 4.12 11.19 7.14 GAAAAA_AAAAGA 4.33% 46 2.48% 26.30 2.99 5.56 6.58 TAAAAA_TTTTTC 6.31% 67 3.36% 35.65 4.79 10.06 6.54 CTTTTT_TATAAT 4.24% 45 1.95% 20.70 3.81 9.27 6.37 TTTTTA_ATAATA 4.80% 51 2.44% 25.95 4.03 8.46 6.21 TTATAT_TTTATT 4.33% 46 2.42% 25.65 3.33 5.98 6.11 TTTTTA_TATAAT 4.90% 52 2.43% 25.85 4.40 9.12 5.94 AAAAGA_TTTTTC 4.14% 44 2.25% 23.90 3.46 6.15 5.80 GAAAAA_TTTTTC 5.46% 58 2.73% 29.00 5.13 10.08 5.65 GAAAAA_TTTTTA 5.74% 61 3.56% 37.85 4.11 5.69 5.64 ATAAAA_AATGAA 4.61% 49 2.26% 23.95 4.61 8.91 5.44 TTTATT_ATAATA 4.43% 47 2.00% 21.20 4.77 10.08 5.40 GAAAAA_AAAGAA 4.24% 45 2.42% 25.70 3.61 5.45 5.34 TTATAA_AAAAAG 4.52% 48 2.15% 22.85 4.85 9.24 5.19 TGAAAA_TTTTTC 4.43% 47 2.40% 25.50 4.31 6.60 4.99 ATTTTT_TATAAT 4.43% 47 2.68% 28.50 3.73 4.70 4.95 GAAAAA_TTTTCT 4.71% 50 2.49% 26.40 4.87 7.56 4.85 AAAAAT_TTTTTC 6.31% 67 3.61% 38.30 5.94 8.23 4.83 ATAAAA_TTTTCT 4.52% 48 2.84% 30.15 3.80 4.23 4.70 AGAAAA_TTTTTT 5.37% 57 3.55% 37.65 4.16 4.14 4.65 TTTTTA_TTATAA 5.08% 54 2.86% 30.35 5.13 6.91 4.61 ATAAAA_TTTTTT 6.87% 73 4.61% 49.00 5.21 5.01 4.61 ATTATA_TTTAAA 4.05% 43 2.14% 22.75 4.40 6.44 4.60 ATAAAA_AAAATG 4.52% 48 2.64% 28.00 4.46 5.46 4.48 GAAAAA_TTTTCA 4.52% 48 2.44% 25.95 4.99 6.81 4.42 TTTTAT_ATAAAA 6.12% 65 4.06% 43.15 4.94 4.65 4.42 ATAAAA_AAAGAA 4.80% 51 2.74% 29.15 5.09 6.19 4.29 AATAAA_TTTTTC 4.05% 43 2.82% 29.95 3.05 2.42 4.28 AAAAAA_TTTTTC 6.40% 68 3.74% 39.70 6.62 7.83 4.28 AAAAAT_TTTTCT 5.46% 58 3.19% 33.85 5.82 6.64 4.15 TTTATT_ATAAAA 5.18% 55 3.37% 35.75 4.67 4.27 4.13 TAAAAA_TTTTCT 4.52% 48 2.79% 29.65 4.48 4.50 4.10 ACAAAA_ATTTTT 4.52% 48 2.97% 31.55 4.02 3.53 4.09 TTTTAT_AAAGAA 5.18% 55 2.87% 30.45 6.01 7.35 4.08 AGAAAA_ATTTTT 5.37% 57 3.38% 35.90 5.22 5.01 4.04 AAAAAA_TTTTCT 5.46% 58 3.49% 37.10 5.29 4.81 3.95 TTCAAA_ATTTTT 4.05% 43 2.82% 29.90 3.37 2.44 3.89 AAGAAA_TTTTTT 5.74% 61 3.37% 35.75 6.54 6.90 3.86 TTCATT_TTTTAT 4.05% 43 2.36% 25.10 4.66 4.86 3.84 TTCTTT_TTTTAT 4.52% 48 2.58% 27.45 5.38 5.80 3.82 ATTATA_TAAAAA 4.61% 49 2.67% 28.40 5.42 5.69 3.80 AAAAAT_TTTTCA 4.99% 53 3.49% 37.10 4.20 2.93 3.78 AAAATA_ATGAAA 4.14% 44 2.54% 27.00 4.51 4.21 3.77 AAAATA_AAAGAA 4.05% 43 2.66% 28.25 4.14 3.16 3.56 TGAAAA_ATTTTT 4.71% 50 3.33% 35.35 4.12 2.62 3.56 TTTTAT_AATAAA 4.99% 53 3.29% 34.90 5.10 3.89 3.55 AATAAA_ATATAA 4.33% 46 2.70% 28.70 4.89 4.15 3.54 GAAAAA_TTTTTT 5.93% 63 3.87% 41.10 6.22 4.84 3.52 ATTATA_AAAAAA 4.33% 46 2.84% 30.20 4.50 3.40 3.51 TAAAAA_TTTTTT 6.40% 68 4.76% 50.55 4.98 2.72 3.50 TATTAT_ATAAAA 4.24% 45 2.29% 24.30 5.93 6.39 3.49 AAAAAA_TTTCTT 5.08% 54 3.31% 35.15 5.40 4.16 3.49 AAAAAG_TTTTTC 4.24% 45 2.46% 26.10 5.43 5.20 3.48 TTTTTT_TATAAT 4.90% 52 3.06% 32.45 5.67 4.71 3.45 TGAAAA_AAAAAG 4.05% 43 2.50% 26.55 4.78 4.02 3.44 TCTTTT_TTTTTT 4.61% 49 3.23% 34.35 4.27 2.68 3.43 AAAAAG_TTTTTT 5.37% 57 3.48% 37.00 5.84 4.45 3.42 ATTTTA_GAAAAA 4.33% 46 2.68% 28.45 5.15 4.29 3.41 TTCTTT_ATAAAA 4.90% 52 2.77% 29.45 6.62 6.49 3.41 TTTTCT_TATTTT 4.24% 45 2.68% 28.45 4.86 3.86 3.40 TTTTTC_TAAAAT 4.33% 46 2.68% 28.45 5.24 4.29 3.35 AAAAAT_TTTTTA 6.40% 68 4.49% 47.70 6.23 3.77 3.26 TAAAAA_ATTTTT 6.21% 66 4.44% 47.15 5.81 3.32 3.25 CTTTTT_TTTTCA 4.05% 43 2.45% 26.00 5.25 4.33 3.24 TTTTCT_ATAAAA 4.05% 43 2.86% 30.35 3.94 2.26 3.21 AAAAAT_TTTTGA 4.24% 45 2.93% 31.10 4.36 2.63 3.19 GAAAAA_CTTTTT 4.33% 46 2.67% 28.35 5.55 4.34 3.18 GAAAAA_ATTTTT 5.56% 59 3.86% 41.00 5.70 3.40 3.16 AAAAAA_CTTTTT 5.37% 57 3.63% 38.50 5.89 3.75 3.14 AAAATA_AATAAA 5.08% 54 3.45% 36.60 5.59 3.49 3.12 ATTTTC_ATTTTT 4.14% 44 2.97% 31.50 4.08 2.15 3.06 TTCTTT_TATTTT 4.33% 46 2.75% 29.25 5.52 3.87 3.03 TCTTTT_TTTTTC 4.05% 43 2.42% 25.70 5.72 4.50 3.02 TTTTAT_AAAAAG 4.43% 47 3.01% 31.95 5.01 2.98 3.00 AAACAA_TTTTTT 4.05% 43 3.02% 32.10 3.64 1.64 2.99 ATAAAA_TTTTTC 5.18% 55 3.30% 35.00 6.68 4.64 2.99 TTTATT_AAAATA 4.90% 52 3.60% 38.25 4.61 2.19 2.98 AATAAA_TTTTTT 5.27% 56 3.93% 41.75 4.78 2.18 2.98 TTATAA_TAAAAA 4.71% 50 3.08% 32.70 5.82 3.77 2.97 TCAAAA_ATTTTT 4.43% 47 3.04% 32.30 4.95 2.83 2.97 AAAAAT_ATAATA 4.05% 43 2.88% 30.60 4.19 2.16 2.96 CATTTT_TTTTAT 4.24% 45 2.66% 28.20 5.67 3.99 2.96 TTTTTA_AAAGAA 4.05% 43 2.74% 29.15 4.72 2.75 2.94 TATTAT_TAAAAA 4.24% 45 2.50% 26.50 6.31 4.95 2.93 AATATT_TTTTAT 4.43% 47 3.10% 32.95 4.83 2.57 2.91 TATTAT_AAAAAA 4.05% 43 2.81% 29.80 4.60 2.48 2.87 TTTATT_TTTTAT 4.61% 49 3.35% 35.55 4.71 2.23 2.86 ATAAAA_AATATT 4.14% 44 2.98% 31.65 4.33 2.09 2.85 TAAAAA_AAAGAA 4.05% 43 2.74% 29.15 4.98 2.75 2.78 TTTTTT_CTTTTT 4.90% 52 3.46% 36.75 5.49 2.73 2.78 TATTTT_AAAAAG 4.24% 45 2.90% 30.75 5.13 2.78 2.78 TTCTTT_ATTTTT 4.33% 46 3.14% 33.35 4.57 2.09 2.77 CAAAAA_ATTTTT 4.14% 44 3.14% 33.30 3.87 1.54 2.77 ATAAAA_TATAAT 4.05% 43 2.49% 26.40 6.01 4.10 2.76 TTTTAT_AAAAGA 4.43% 47 2.96% 31.45 5.64 3.20 2.76 AATAAA_TATTTT 4.43% 47 3.34% 35.50 4.17 1.67 2.76 TGAAAA_TTTTTT 4.71% 50 3.59% 38.15 4.33 1.66 2.74 TTTTAT_TTATAA 4.05% 43 2.76% 29.35 5.02 2.67 2.72 TAAAAA_CTTTTT 4.61% 49 3.14% 33.35 5.82 3.09 2.69 AAAATA_TTTTTA 5.27% 56 4.02% 42.65 5.01 1.89 2.67 AAAAAA_TTTTTT 6.31% 67 4.89% 51.90 5.69 2.03 2.65 TTCTTT_TTTTTA 4.24% 45 2.90% 30.80 5.39 2.76 2.64 AAAATA_AAAAGA 4.05% 43 2.80% 29.70 5.05 2.52 2.63 TGTTTT_TTTTTT 4.24% 45 2.95% 31.30 5.24 2.55 2.61 AAAAAG_TTTTTA 4.71% 50 3.19% 33.85 6.27 3.24 2.58 TTTATT_AATAAA 4.14% 44 2.71% 28.80 5.91 3.29 2.57 AAATAA_TTTTTT 5.56% 59 4.08% 43.30 6.17 2.53 2.55 AAAAAA_AAAAGA 4.43% 47 3.26% 34.60 4.87 1.96 2.55 TAAAAA_GTTTTT 4.05% 43 2.85% 30.25 5.06 2.30 2.52 TTTTCA_ATAAAA 4.24% 45 2.94% 31.25 5.49 2.57 2.51 AAAAAA_ATTTTT 6.40% 68 4.85% 51.55 6.61 2.40 2.49 AAAAAA_TTTTTA 6.21% 66 4.67% 49.55 6.61 2.48 2.49 GAAAAA_AATAAA 4.24% 45 2.70% 28.65 6.86 3.76 2.38 CAAAAA_AAAAAT 4.43% 47 3.18% 33.80 5.55 2.24 2.38 TATTTT_AATAAA 4.61% 49 3.36% 35.65 5.62 2.19 2.37 ACAAAA_TTTTTT 4.52% 48 3.26% 34.65 5.67 2.24 2.36 AAAGAA_AAAAAT 4.05% 43 2.94% 31.20 5.16 1.94 2.29 GAAAAA_TTTTAT 4.43% 47 3.40% 36.10 4.78 1.49 2.28 AAGAAA_TTTTTA 4.24% 45 2.83% 30.10 6.55 3.06 2.27 TTTTCA_AAAATA 4.05% 43 2.84% 30.20 5.69 2.32 2.25 AAATAA_TTTTTA 4.33% 46 3.48% 36.95 4.05 1.03 2.24 TTTTTT_TTTATT 5.18% 55 3.87% 41.10 6.27 2.11 2.22 TTTATA_ATAAAA 4.14% 44 3.08% 32.70 5.12 1.73 2.21 TTTTAT_GAAAAA 4.24% 45 3.24% 34.40 4.81 1.47 2.21 ATAAAA_AAAAGA 4.14% 44 2.82% 30.00 6.43 2.74 2.18 GAAAAA_ATAAAA 4.14% 44 3.26% 34.65 4.32 1.15 2.16 AAAAAA_TTTTGT 4.43% 47 3.20% 34.00 6.07 2.17 2.14 TTTTTC_ATAAAA 4.24% 45 3.43% 36.45 4.01 0.93 2.13 AAAAAA_TTATTT 5.18% 55 4.03% 42.85 5.70 1.58 2.13 AGAAAA_TTTTTA 4.14% 44 2.98% 31.65 5.82 2.09 2.12 TTTTTA_CTTTTT 4.14% 44 3.15% 33.45 5.05 1.49 2.09 AAAGAA_TTTTTT 4.52% 48 3.32% 35.25 6.12 2.03 2.08 AAAAAG_TAAAAA 4.14% 44 3.14% 33.30 5.15 1.54 2.08 AAAAAT_AATAAA 4.99% 53 3.86% 41.00 5.86 1.60 2.05 ATAAAA_TTTTCA 4.14% 44 2.93% 31.10 6.32 2.30 2.04 TCTTTT_TTTTTA 4.05% 43 3.00% 31.85 5.46 1.72 2.04 TTTTAT_TTTTTA 5.18% 55 4.00% 42.45 6.16 1.69 2.04 TTTTTA_TTTTCA 4.24% 45 3.20% 33.95 5.45 1.61 2.03 TCTTTT_ATAAAA 4.05% 43 2.87% 30.50 6.21 2.20 2.01 ATAAAA_ATTTTT 5.84% 62 4.51% 47.85 7.10 1.92 1.99 TGAAAA_TAAAAA 4.14% 44 3.16% 33.55 5.36 1.46 1.95 AAAAAC_TTTTTT 4.14% 44 3.30% 35.00 4.63 1.06 1.94 TTTTTA_AAATAA 4.05% 43 3.40% 36.15 3.53 0.62 1.94 AAGAAA_ATTTTT 4.43% 47 3.15% 33.45 7.04 2.37 1.93 TTATTT_ATAAAA 4.52% 48 3.20% 34.00 7.38 2.49 1.90 TTTTAT_TATAAA 4.14% 44 3.14% 33.35 5.62 1.52 1.89 TTTTTA_ATAAAA 5.08% 54 4.19% 44.45 5.05 0.97 1.89 TTTTTC_TATTTT 4.05% 43 3.06% 32.55 5.55 1.50 1.88 TTTTTC_TTTTTT 4.43% 47 3.57% 37.95 4.98 1.00 1.82 ATAAAA_TTATTT 4.33% 46 3.36% 35.65 5.72 1.36 1.81 AAAAAT_GAAAAT 4.14% 44 3.11% 33.00 6.13 1.63 1.79 CTTTTT_AAAATA 4.05% 43 3.04% 32.25 6.09 1.59 1.77 CTTTTT_ATAAAA 4.05% 43 3.29% 34.90 4.62 0.87 1.75 TTTATT_TATTTT 4.24% 45 3.35% 35.55 5.43 1.15 1.74 AAAATT_TTTATT 4.05% 43 3.12% 33.15 5.69 1.32 1.73 ATTTTA_AAAAAT 4.71% 50 3.53% 37.45 7.39 1.88 1.70 TTTTTT_TATTTT 5.27% 56 4.43% 47.05 5.32 0.82 1.68 AAAATA_GAAAAA 4.24% 45 3.24% 34.40 6.31 1.47 1.68 AAAATA_TTTTTC 4.14% 44 3.31% 35.20 5.32 1.02 1.65 ATTTTT_TAAAAT 4.71% 50 3.73% 39.60 6.31 1.26 1.65 ATATTT_AAAAAT 4.71% 50 3.85% 40.85 5.57 0.96 1.64 TATTTT_AAAATA 4.80% 51 3.71% 39.45 7.04 1.54 1.64 TTAAAT_TTTTTT 4.05% 43 3.23% 34.35 5.35 1.00 1.62 TTATAT_AAAAAT 4.05% 43 3.28% 34.85 5.06 0.89 1.61 TTTTTC_ATTTTT 4.24% 45 3.49% 37.10 4.92 0.79 1.61 ATAAAA_AATTTT 4.33% 46 3.52% 37.35 5.39 0.93 1.60 AAAATA_AAAATA 4.61% 49 3.66% 38.85 6.34 1.22 1.60 AAAAAT_GAAAAA 4.71% 50 3.83% 40.65 5.85 1.01 1.60 AAAAAA_GTTTTT 4.14% 44 3.23% 34.30 6.22 1.25 1.56 TATTTT_TTTATT 4.05% 43 3.23% 34.35 5.59 1.00 1.55 AAAAAG_AAAAAT 4.14% 44 3.37% 35.75 5.38 0.89 1.53 ATTTTT_TTTATT 4.61% 49 3.60% 38.25 7.03 1.38 1.53 AAAAAT_CTTTTT 4.14% 44 3.48% 37.00 4.63 0.63 1.51 ATAAAA_TTTTAT 4.71% 50 4.07% 43.25 4.49 0.51 1.50 TTTTAT_TAAAAA 4.80% 51 4.20% 44.65 4.23 0.44 1.50 TTTTAT_AAATAA 4.43% 47 3.52% 37.40 6.48 1.14 1.48 AATAAA_AATTTT 4.24% 45 3.20% 33.95 7.47 1.61 1.48 TTATAA_ATTTTT 4.05% 43 3.27% 34.70 5.64 0.92 1.47 ATTTTA_AAAAAA 4.52% 48 3.76% 39.95 5.49 0.77 1.47 ATAAAA_ATATTT 4.43% 47 3.64% 38.65 5.71 0.85 1.46 ATTTTT_ATTTAT 4.05% 43 3.31% 35.10 5.45 0.83 1.45 TTTTCA_TTTTTT 4.43% 47 3.47% 36.90 7.02 1.27 1.44 AAAAAT_TGAAAA 4.43% 47 3.61% 38.35 6.09 0.91 1.42 AAATAA_TTTTAT 4.33% 46 3.50% 37.20 6.28 0.97 1.40 TAAAAA_TTATTT 4.14% 44 3.53% 37.50 4.64 0.54 1.40 TTATTT_TAAAAA 4.05% 43 3.41% 36.25 4.82 0.60 1.40 TTTTCA_ATTTTT 4.14% 44 3.45% 36.65 5.25 0.70 1.40 TTTTTC_TAAAAA 4.14% 44 3.55% 37.65 4.55 0.51 1.40 AAAAAT_TTTTTT 6.12% 65 5.07% 53.85 7.99 1.11 1.40 AAATAT_TTTTAT 4.33% 46 3.67% 38.95 5.06 0.61 1.39 AAAAAA_AAAACA 4.05% 43 3.24% 34.45 6.24 0.98 1.37 TTTTTT_AATATT 4.33% 46 3.62% 38.45 5.54 0.70 1.36 ATAAAA_AATAAA 4.61% 49 3.61% 38.30 7.95 1.37 1.35 TAAAAA_AATTTT 4.14% 44 3.60% 38.25 4.30 0.42 1.34 CAAAAA_TTTTTT 4.14% 44 3.43% 36.40 5.73 0.75 1.33 AAAAAG_AAAAAA 4.43% 47 3.63% 38.50 6.44 0.88 1.32 GAAAAA_AAAAAA 4.43% 47 3.63% 38.60 6.49 0.86 1.29 ATAAAT_ATTTTT 4.05% 43 3.38% 35.85 5.58 0.67 1.28 AAAAGA_TTTTTT 4.05% 43 3.39% 35.95 5.58 0.65 1.26 AATATT_AAAAAA 4.33% 46 3.66% 38.85 5.84 0.63 1.22 AAAAAA_TTTTCA 4.61% 49 3.72% 39.55 7.78 1.05 1.21 AAAATA_AAAAAA 5.18% 55 4.44% 47.10 6.58 0.64 1.20 ATTTTT_TTTTTC 4.43% 47 3.89% 41.30 4.76 0.38 1.20 TTATTT_TTTTTT 4.33% 46 3.88% 41.20 4.07 0.28 1.18 AAAAAA_TTTAAA 4.24% 45 3.79% 40.25 4.04 0.28 1.18 TTTTTA_TTATTT 4.14% 44 3.46% 36.75 6.17 0.68 1.17 TTTTAT_TTTATT 4.14% 44 3.44% 36.55 6.36 0.72 1.17 AAAATA_TTTTTT 5.37% 57 4.56% 48.45 7.39 0.73 1.16 TTTAAA_TTTTTT 4.80% 51 4.11% 43.60 6.43 0.61 1.15 ATTTTT_ATATAA 4.14% 44 3.45% 36.65 6.60 0.70 1.11 AATAAA_AAAAAA 4.33% 46 3.76% 39.95 5.48 0.44 1.10 AAAAAA_TTCTTT 4.14% 44 3.52% 37.35 6.06 0.57 1.10 AAAAAA_TTTTAT 5.37% 57 4.71% 50.05 6.54 0.48 1.06 TTTTTT_TTTTTT 4.99% 53 4.39% 46.65 6.06 0.42 1.05 AAAATA_AAATAA 4.05% 43 3.51% 37.25 5.50 0.43 1.05 TTTTTT_TAAAAT 4.33% 46 3.82% 40.55 5.22 0.36 1.04 TTTTTT_TTTTTC 4.52% 48 3.90% 41.40 6.32 0.51 1.04 TTAAAA_AAAAAT 4.05% 43 3.60% 38.25 4.60 0.29 1.03 TTTTAT_TTTTAT 4.43% 47 3.73% 39.60 7.18 0.66 1.03 AATAAA_ATTTTT 4.43% 47 3.96% 42.10 4.89 0.28 1.00 AAAAAT_ATTTTT 5.65% 60 4.94% 52.45 7.57 0.54 1.00 AAATAT_TTTTTA 4.33% 46 3.78% 40.15 5.87 0.41 1.00 AAAAAA_GAAAAA 4.24% 45 3.74% 39.70 5.35 0.35 0.99 TTTATT_TTTTTT 4.52% 48 3.94% 41.80 6.27 0.45 0.99 TTTTAT_TTTTTT 4.90% 52 4.51% 47.85 4.21 0.18 0.99 TATTTT_ATAAAA 4.33% 46 3.79% 40.25 5.85 0.40 0.98 ATAAAA_AAATAT 4.14% 44 3.56% 37.80 6.44 0.49 0.96 TTTTTT_TTTTAT 4.99% 53 4.48% 47.55 6.07 0.31 0.90 TAAAAA_TATTTT 4.52% 48 4.10% 43.55 5.21 0.23 0.85 TTTTAT_AAAAAA 5.18% 55 4.61% 48.95 7.10 0.37 0.85 AATAAA_AAAAAT 4.33% 46 3.76% 39.90 7.20 0.45 0.85 AAAAAA_TTTTTG 4.05% 43 3.55% 37.75 6.30 0.35 0.83 TTTTTT_ATTTTT 5.18% 55 4.72% 50.10 5.93 0.24 0.83 AAAAAT_ATAAAA 4.99% 53 4.52% 48.00 6.26 0.26 0.80 AAAAAT_TAAAAA 4.71% 50 4.31% 45.80 5.50 0.19 0.76 TATAAA_ATTTTT 4.14% 44 3.70% 39.30 6.37 0.28 0.74 AAAAAA_TGAAAA 4.14% 44 3.75% 39.85 5.63 0.21 0.74 TTTTTT_TTTTCA 4.14% 44 3.71% 39.40 6.27 0.26 0.73 TAAAAA_ATAAAA 4.52% 48 4.12% 43.80 5.73 0.20 0.73 AAAAAA_AATAAA 4.33% 46 3.97% 42.15 5.28 0.18 0.73 TTTTTT_ATAAAA 5.37% 57 4.92% 52.25 6.52 0.22 0.73 AAAAAT_TTTTAT 4.80% 51 4.41% 46.80 6.00 0.19 0.70 TTTTTA_TTTTTT 4.90% 52 4.49% 47.65 6.32 0.20 0.69 AAAATA_TTTTAT 4.05% 43 3.75% 39.80 4.71 0.13 0.68 ATAAAA_TTTAAA 4.05% 43 3.60% 38.25 7.23 0.29 0.66 TTTATA_ATTTTT 4.05% 43 3.73% 39.65 5.37 0.14 0.62 TAAAAT_AAAAAA 4.14% 44 3.77% 40.05 6.47 0.19 0.61 AAAAAT_AAAAAA 5.27% 56 4.90% 52.00 6.59 0.16 0.61 TTTTTA_TTTTAT 4.33% 46 3.99% 42.35 6.11 0.16 0.60 AAAAAA_TTTTAA 4.33% 46 3.97% 42.20 6.57 0.17 0.58 AAAAAT_AAATAA 4.33% 46 3.97% 42.20 6.59 0.17 0.58 TTATTT_ATTTTT 4.14% 44 3.87% 41.15 5.11 0.10 0.56 AAAATA_AAAAAT 4.61% 49 4.26% 45.20 6.84 0.16 0.56 AATTTT_AAAAAT 4.14% 44 3.86% 41.00 5.65 0.11 0.53 TTTAAA_AAAAAT 4.14% 44 3.86% 41.00 6.03 0.11 0.50 AAAAAT_TTTTAA 4.24% 45 3.99% 42.40 5.31 0.08 0.49 TTTTTA_AAAAAA 4.90% 52 4.56% 48.45 7.65 0.13 0.46 ATTTTT_AAATAT 4.24% 45 3.88% 41.25 8.60 0.17 0.44 ATTTTT_AAATTT 4.14% 44 3.95% 41.90 5.20 0.05 0.40 TTTATT_ATTTTT 4.24% 45 4.01% 42.55 6.07 0.07 0.40 ATTTTT_TTTTAT 4.61% 49 4.35% 46.20 7.09 0.09 0.39 TTAAAA_ATTTTT 4.05% 43 3.85% 40.90 6.27 0.05 0.34 TTTTTA_TATTTT 4.14% 44 3.91% 41.50 7.67 0.08 0.33 TTTTAA_AAAAAA 4.14% 44 3.95% 41.95 6.78 0.05 0.30 AAATTT_AAAAAT 4.24% 45 4.07% 43.25 6.18 0.04 0.28 TATTTT_AAAAAT 4.43% 47 4.29% 45.55 5.92 0.02 0.24 TTTTAT_ATTTTT 4.52% 48 4.40% 46.75 5.20 0.02 0.24 AAAATA_TAAAAA 4.05% 43 3.94% 41.80 5.23 0.02 0.23 AAAAAA_TAAAAA 4.61% 49 4.47% 47.45 7.14 0.03 0.22 TATTTT_ATTTTT 4.33% 46 4.21% 44.70 6.00 0.02 0.22 TTTTTT_ATATTT 4.24% 45 4.14% 43.95 4.93 0.01 0.21 TTTTTT_AAATAA 4.14% 44 4.06% 43.10 5.48 0.01 0.16 AAAAAA_TTTATT 4.24% 45 4.17% 44.30 4.86 0.01 0.14 AAAAAA_TATTTT 4.90% 52 4.83% 51.30 5.05 0.00 0.14 ATTTTT_ATAAAA 4.71% 50 4.63% 49.20 6.55 0.01 0.12 TTTTTT_TAAAAA 4.80% 51 4.74% 50.35 7.94 0.00 0.08 AAATAA_ATTTTT 4.05% 43 4.00% 42.50 6.12 0.00 0.08 ATTTTT_AAAATA 4.14% 44 4.08% 43.35 7.99 0.01 0.08 TTTTAT_AAAATA 4.05% 43 4.00% 42.50 7.04 0.00 0.07 ATAAAA_AAAAAT 4.52% 48 4.48% 47.55 7.00 0.00 0.06 AAAAAT_AAAAAT 5.08% 54 5.06% 53.75 7.36 0.00 0.03 TTTTTA_AAAAAT 4.52% 48 4.52% 47.95 7.37 0.00 0.01 TTTTTT_GAAAAA 4.05% 43 4.06% 43.10 8.18 0.00 -0.01 AAAAAA_AAAAAA 4.33% 46 4.39% 46.60 7.08 0.00 -0.08 TTTATT_AAAAAT 4.05% 43 4.11% 43.60 6.22 0.00 -0.10 TTTTTA_ATTTTT 4.52% 48 4.58% 48.65 5.65 0.00 -0.12 AAAAAA_ATAAAA 4.52% 48 4.60% 48.85 5.91 0.01 -0.14 TTTTTT_TTTTTA 4.24% 45 4.37% 46.40 6.97 0.02 -0.20 ATAAAA_TAAAAA 4.14% 44 4.27% 45.30 6.11 0.02 -0.21 TTTTAT_AAAAAT 4.43% 47 4.56% 48.45 6.07 0.02 -0.24 ATTTTT_ATTTTT 4.61% 49 4.75% 50.45 5.51 0.02 -0.26 ATTTTT_AAAAAT 4.61% 49 4.82% 51.15 8.15 0.05 -0.26 TTTTTA_TAAAAA 4.05% 43 4.25% 45.15 7.81 0.05 -0.28 ATTTTT_TATTTT 4.24% 45 4.44% 47.10 6.58 0.05 -0.32 AAAAAT_AAAATA 4.24% 45 4.47% 47.50 7.51 0.07 -0.33 AAATAT_AAAAAA 4.05% 43 4.23% 44.95 5.78 0.05 -0.34 ATAAAA_AAAAAA 4.61% 49 4.85% 51.50 6.23 0.07 -0.40 TATTTT_AAAAAA 4.05% 43 4.35% 46.25 7.06 0.12 -0.46 ATTTTT_TAAAAA 4.33% 46 4.68% 49.75 7.22 0.15 -0.52 TTTTTT_AAAATA 4.05% 43 4.48% 47.60 8.07 0.24 -0.57 ATTTTT_TTTTTT 4.14% 44 4.60% 48.80 5.90 0.26 -0.81 TTTTTT_AAAAAA 4.33% 46 4.90% 52.05 7.42 0.39 -0.82 TTTTTT_AAAAAT 4.43% 47 5.08% 53.95 8.15 0.50 -0.85 TAAAAA_AAAAAA 4.05% 43 4.67% 49.55 6.54 0.48 -1.00 STATISTICS ON THE TOTAL NUMBER OF OCCURRENCES Model #right #shfl. Sigma Chi2 Z-score ================================================================================= ATTGAC_TATAAT 47 4.95 1.50 34.04 27.97 AGAAAA_TTTTTC 80 32.15 5.31 20.42 9.00 GAAAAA_AAAAGA 59 31.25 3.93 8.53 7.06 TTTTAT_ATAATA 56 27.60 4.21 9.65 6.75 CTTTTT_TTATAA 61 25.25 5.36 14.82 6.67 CTTTTT_TATAAT 51 22.65 4.44 10.91 6.39 GAAAAA_TTTTTC 89 38.85 7.88 19.68 6.37 TAAAAA_TTTTTC 91 46.75 7.30 14.22 6.06 ATTTTT_TATAAT 56 31.20 4.18 7.05 5.94 TTTTTA_ATAATA 56 29.45 4.49 8.25 5.91 GAAAAA_TTTTCT 74 32.35 7.07 16.32 5.89 TTTTTA_TATAAT 60 28.55 5.42 11.17 5.80 ACAAAA_ATTTTT 58 37.90 3.60 4.21 5.59 AAAAGA_TTTTTC 56 28.85 5.06 8.69 5.37 TAAAAA_TTTTCT 65 36.35 5.40 8.10 5.30 TTATAT_TTTATT 49 28.05 3.95 5.70 5.30 ATAAAA_AATGAA 51 26.25 4.83 7.93 5.12 GAAAAA_AAAGAA 57 31.10 5.14 7.62 5.04 TTTATT_ATAATA 49 22.70 5.25 9.65 5.01 TTATAA_AAAAAG 55 26.25 5.78 10.18 4.97 AAAAAT_TTTTTC 89 52.05 7.54 9.68 4.90 TTTTAT_ATAAAA 78 50.85 5.80 5.72 4.68 ATAAAA_TTTTCT 59 35.05 5.36 6.10 4.47 AGAAAA_ATTTTT 73 43.70 6.69 7.36 4.38 GAAAAA_TTTTCA 56 31.00 6.01 7.19 4.16 GAAAAA_TTTTTA 79 49.90 7.08 6.57 4.11 TTCATT_TTTTAT 45 27.60 4.33 4.17 4.02 AATAAA_ATATAA 52 31.85 5.08 4.84 3.97 ATAAAA_AAAGAA 55 32.15 5.78 5.99 3.95 ATAAAA_AAAATG 50 30.70 4.89 4.62 3.95 CTTTTT_TTTTCA 52 30.15 5.62 5.81 3.89 TGAAAA_TTTTTC 54 30.45 6.14 6.57 3.83 AAAAAG_TTTTTC 66 35.10 8.09 9.45 3.82 AAAAAT_TTTTCT 73 41.40 8.31 8.73 3.80 TGAAAA_AAAAAG 52 31.05 5.90 5.29 3.55 GAAAAA_CTTTTT 68 37.40 8.68 8.89 3.53 TAAAAA_AAAGAA 57 34.60 6.39 5.48 3.50 AAAAAA_TTTTTC 128 76.70 14.65 12.86 3.50 ATTATA_TAAAAA 55 32.65 6.42 5.70 3.48 TATTAT_ATAAAA 47 26.15 6.00 5.94 3.47 ATTATA_TTTAAA 45 25.45 5.64 5.43 3.46 TCTTTT_TTTTTC 59 31.70 7.89 8.22 3.46 TTTTTA_TTATAA 56 35.15 6.05 4.77 3.45 TTCTTT_TTTTAT 52 30.95 6.13 5.34 3.44 TTTTAT_AAAGAA 58 33.90 7.11 6.32 3.39 AATATT_TTTTAT 53 34.80 5.43 3.77 3.35 TTTATT_ATAAAA 61 41.50 6.03 3.71 3.23 AAAAAA_TTTTCT 105 65.70 12.21 9.05 3.22 AATAAA_TTTTTC 51 36.00 4.67 2.59 3.21 AAAATA_ATGAAA 45 29.55 4.83 3.20 3.20 TGAAAA_ATTTTT 58 41.10 5.30 2.88 3.19 AAAATA_AAAGAA 47 31.60 4.85 3.02 3.18 AAAATA_AATAAA 63 41.45 6.85 4.45 3.15 TTCTTT_TATTTT 52 32.75 6.18 4.37 3.11 TAAAAA_ATTTTT 83 63.70 6.24 2.54 3.09 ATAAAA_AAAAGA 53 33.55 6.31 4.37 3.08 AAAAAT_TTTTGA 53 36.30 5.43 3.12 3.08 AGAAAA_TTTTTT 104 68.95 11.42 7.11 3.07 GAAAAA_ATTTTT 80 53.65 8.70 5.20 3.03 AGAAAA_TTTTTA 63 39.15 7.90 5.57 3.02 TTTTTT_TATAAT 80 49.75 10.22 7.05 2.96 TTTATT_AAAATA 58 42.50 5.29 2.39 2.93 TTTTCT_TATTTT 51 33.05 6.17 3.83 2.91 TTCTTT_TTTTTA 57 36.35 7.10 4.57 2.91 TTCTTT_ATTTTT 56 39.30 5.81 2.93 2.87 ATAAAA_TTTTTC 68 42.30 9.04 5.99 2.84 TTTTAT_AATAAA 59 40.55 6.57 3.42 2.81 ATTTTA_GAAAAA 49 32.80 5.82 3.21 2.78 TTCAAA_ATTTTT 47 35.25 4.24 1.68 2.77 TTTTCT_ATAAAA 51 35.55 5.60 2.76 2.76 TATTTT_AATAAA 58 40.30 6.42 3.19 2.76 ATAAAA_TTTTTT 112 84.55 10.02 3.84 2.74 AAAAAT_TTTTCA 60 43.80 5.95 2.53 2.72 ATAAAA_TATAAT 45 27.65 6.40 4.14 2.71 AAAAAG_TTTTTA 67 44.45 8.33 4.56 2.71 TTTTAT_TTATAA 46 31.70 5.28 2.63 2.71 TTCTTT_ATAAAA 56 34.00 8.21 5.38 2.68 TTTTAT_AAAAGA 53 35.40 6.58 3.50 2.67 TCTTTT_TTTTTT 79 57.00 8.23 3.56 2.67 TTTTTA_ATAAAA 73 55.25 6.66 2.46 2.67 TCTTTT_TTTTTA 55 38.20 6.30 3.03 2.66 TATTAT_TAAAAA 49 29.65 7.30 4.76 2.65 TCAAAA_ATTTTT 55 38.60 6.24 2.87 2.63 TTATAA_TAAAAA 52 36.40 5.93 2.75 2.63 GAAAAA_TTTTTT 118 82.15 13.69 6.42 2.62 CATTTT_TTTTAT 47 30.60 6.27 3.47 2.62 TTTTTC_ATAAAA 56 45.85 3.88 1.01 2.61 AAAAAT_ATAATA 47 34.90 4.72 1.79 2.56 TGAAAA_TTTTTT 80 60.30 7.69 2.77 2.56 GAAAAA_ATAAAA 54 42.25 4.83 1.43 2.43 ATAAAA_AATATT 47 34.15 5.29 2.04 2.43 TAAAAA_CTTTTT 64 44.20 8.35 3.62 2.37 CAAAAA_AAAAAT 66 43.80 9.55 4.49 2.32 TTTTTA_TTTTCA 55 39.90 6.52 2.40 2.32 TTTTTC_TAAAAT 48 33.60 6.25 2.54 2.30 AATAAA_TATTTT 51 40.80 4.43 1.13 2.30 AAAAAT_TTTTTA 83 64.20 8.17 2.40 2.30 TTTTAT_AAAAAG 55 38.95 7.07 2.74 2.27 AAAATA_AAAAGA 48 34.10 6.13 2.35 2.27 TTTTAT_TAAAAA 70 55.60 6.35 1.65 2.27 ATTTTC_ATTTTT 48 36.15 5.28 1.67 2.24 TTTTTA_CTTTTT 57 42.95 6.31 1.98 2.23 AAAAAA_TTTTTT 191 143.90 21.18 6.63 2.22 TATTTT_AAAAAG 54 37.55 7.42 2.96 2.22 AAAATA_AAAATA 62 45.45 7.47 2.55 2.21 CAAAAA_ATTTTT 58 44.70 6.07 1.72 2.19 ATAAAA_TTTTCA 51 34.80 7.42 3.06 2.18 TTTTCA_AAAATA 48 34.05 6.42 2.37 2.17 TGAAAA_TAAAAA 54 39.20 6.90 2.35 2.14 TATTAT_AAAAAA 62 45.40 7.78 2.57 2.13 AAAATT_TTTATT 54 39.65 6.75 2.20 2.12 GAAAAA_AATAAA 54 35.25 8.88 3.94 2.11 ATAAAA_ATTTTT 77 59.30 8.42 2.30 2.10 TTTTTC_ATTTTT 62 48.80 6.30 1.57 2.10 AAGAAA_ATTTTT 61 40.25 9.94 4.25 2.09 TTTTTA_AAAGAA 48 34.80 6.46 2.10 2.04 TTTATA_ATAAAA 47 36.05 5.42 1.44 2.02 TTTTAT_GAAAAA 56 41.75 7.07 2.08 2.02 ACAAAA_TTTTTT 83 62.10 10.38 3.01 2.01 TTTTTC_TATTTT 57 40.15 8.39 2.92 2.01 AAGAAA_TTTTTT 89 62.50 13.43 4.64 1.97 TATTTT_TTTATT 51 39.75 5.90 1.39 1.91 AAAATA_GAAAAA 58 41.35 8.74 2.79 1.91 ATTATA_AAAAAA 64 46.15 9.45 2.89 1.89 TTTATT_TTTTAT 53 41.05 6.41 1.52 1.86 TAAAAA_GTTTTT 51 39.85 6.09 1.37 1.83 TTTTCA_ATAAAA 48 35.20 7.02 1.97 1.82 GAAAAA_TTTTAT 57 44.10 7.09 1.65 1.82 ATAAAA_AATTTT 58 45.15 7.20 1.60 1.79 TTTATT_AATAAA 46 32.40 7.69 2.36 1.77 TTTATT_TATTTT 52 40.65 6.43 1.39 1.77 TATTTT_AAAATA 61 44.95 9.20 2.43 1.74 AAAAAT_CTTTTT 61 49.00 6.97 1.31 1.72 AAAATA_TTTTTC 53 41.80 6.52 1.32 1.72 AATAAA_AATTTT 56 39.80 9.45 2.74 1.71 ATTTTT_TTTTTC 69 55.15 8.13 1.55 1.70 TCTTTT_ATAAAA 49 35.55 7.91 2.14 1.70 AAAAAA_TTTCTT 82 62.60 11.60 2.60 1.67 ATTTTA_AAAAAT 57 43.25 8.23 1.89 1.67 CTTTTT_AAAATA 52 39.30 7.74 1.77 1.64 AAAAAG_TAAAAA 55 42.60 7.57 1.58 1.64 AAAAAT_AATAAA 61 49.20 7.34 1.26 1.61 AAAGAA_AAAAAT 49 37.70 7.15 1.47 1.58 AAAAAT_GAAAAA 68 53.55 9.29 1.72 1.56 TAAAAA_TTTTTT 117 96.90 12.94 1.89 1.55 AAAAAA_TTTTGT 78 59.95 11.75 2.36 1.54 AAAATA_TTTTTA 62 51.35 6.98 1.00 1.53 TTATTT_ATAAAA 54 39.75 9.39 2.17 1.52 AAGAAA_TTTTTA 50 36.55 8.98 2.09 1.50 TGTTTT_TTTTTT 69 52.85 10.81 2.14 1.49 AAAAAT_TAAAAA 74 61.90 8.12 1.08 1.49 ATTTTT_TAAAAT 57 45.85 7.60 1.21 1.47 ATTTTT_TTTATT 59 45.80 9.15 1.66 1.44 TTTTAT_TATAAA 47 37.50 6.60 1.07 1.44 AAAAAA_TTTTTA 117 95.30 15.15 2.22 1.43 CTTTTT_ATAAAA 53 43.25 6.81 0.99 1.43 ATAAAT_ATTTTT 49 40.40 6.09 0.83 1.41 TTTTTT_TTTATT 81 67.50 9.80 1.23 1.38 AAAAAT_GAAAAT 49 37.95 8.22 1.40 1.34 TTTTTC_TAAAAA 59 50.80 6.17 0.61 1.33 AAAAAT_ATAAAA 67 58.85 6.14 0.53 1.33 TTATAT_AAAAAT 50 41.00 6.86 0.89 1.31 ATAAAA_TTATTT 50 40.85 7.28 0.92 1.26 AAAAAA_TTTTCA 79 62.65 13.12 1.89 1.25 ATAAAA_TTTTAT 57 49.70 5.88 0.50 1.24 TTTTAT_TTTTTA 64 53.20 8.94 1.00 1.21 AAAAAA_AAAAGA 71 58.35 10.48 1.24 1.21 GAAAAA_AAAAAA 90 70.15 16.45 2.46 1.21 AAAAAA_CTTTTT 92 73.75 15.33 2.01 1.19 AAATAT_TTTTAT 49 42.45 5.54 0.47 1.18 AAAAAG_TTTTTT 91 72.95 15.30 1.99 1.18 TTTTTA_AAATAA 50 43.25 5.75 0.49 1.17 TTTTAT_AAATAA 53 42.65 8.83 1.12 1.17 AAAAAA_TTTAAA 80 69.95 8.62 0.67 1.17 AAATAA_TTTTTA 51 44.40 5.99 0.46 1.10 TTTTCA_ATTTTT 49 41.95 6.42 0.55 1.10 AAAAAG_AAAAAT 54 45.90 7.62 0.66 1.06 CAAAAA_TTTTTT 85 71.20 13.07 1.22 1.06 TTTTTC_TTTTTT 75 66.30 8.35 0.54 1.04 AAAAAT_TGAAAA 54 46.00 7.79 0.64 1.03 TAAAAA_AATTTT 57 49.30 7.62 0.56 1.01 TTTTTT_TTTTTC 90 74.70 15.61 1.42 0.98 TTATTT_TAAAAA 50 44.45 5.83 0.33 0.95 AAAATA_AAATAA 48 41.95 6.38 0.41 0.95 TATTTT_ATAAAA 55 47.60 7.84 0.53 0.94 ATTTTT_ATATAA 51 43.40 8.17 0.61 0.93 TTTTCA_TTTTTT 68 56.85 12.07 1.00 0.92 AATAAA_ATTTTT 57 51.15 6.44 0.32 0.91 ATTTTT_ATTTAT 45 39.75 5.79 0.33 0.91 TTTTTA_TTATTT 52 44.25 8.58 0.62 0.90 TTATAA_ATTTTT 46 38.85 7.94 0.60 0.90 AAATAA_TTTTAT 49 42.05 7.84 0.53 0.89 AAAAAA_TTTTTG 86 72.20 15.60 1.20 0.88 AAAAAA_AAAACA 68 57.70 11.67 0.84 0.88 TATAAA_ATTTTT 53 45.70 8.38 0.54 0.87 TTTTAT_ATTTTT 64 57.00 8.12 0.41 0.86 TTTTAT_AAAAAA 96 83.00 15.35 0.94 0.85 TTTTAT_TTTATT 48 42.20 7.30 0.37 0.80 TTAAAT_TTTTTT 62 54.00 10.25 0.55 0.78 ATATTT_AAAAAT 54 48.25 7.42 0.32 0.78 TTAAAA_ATTTTT 57 51.85 6.73 0.24 0.77 AAAGAA_TTTTTT 71 61.05 13.34 0.75 0.75 ATAAAA_AATAAA 51 44.15 9.19 0.49 0.75 AAAAAT_TTTTAT 64 58.05 8.08 0.29 0.74 AAACAA_TTTTTT 61 55.75 7.14 0.24 0.73 AAAATA_TTTTAT 50 45.75 6.07 0.19 0.70 ATAAAA_ATATTT 48 43.20 7.05 0.25 0.68 TTAAAA_AAAAAT 54 49.85 6.37 0.17 0.65 ATAAAA_AAATAT 47 41.75 8.06 0.31 0.65 TATTTT_ATTTTT 61 55.75 8.58 0.24 0.61 TTTTTA_TTTTTT 94 86.35 12.91 0.32 0.59 AAAAAA_ATTTTT 110 101.25 14.89 0.36 0.59 AAAATA_AAAAAT 62 56.10 10.29 0.29 0.57 AAATAT_TTTTTA 52 47.70 7.93 0.19 0.54 AAAAAT_AAAAAT 79 72.00 13.00 0.32 0.54 TTTTTA_ATTTTT 68 63.55 9.06 0.15 0.49 TTTTTA_TATTTT 57 52.35 9.72 0.20 0.48 ATTTTA_AAAAAA 68 63.60 9.33 0.15 0.47 AATAAA_AAAAAT 52 47.40 9.82 0.21 0.47 AAATAA_TTTTTT 80 74.80 11.95 0.17 0.44 TTTTTT_ATAAAA 96 89.15 15.77 0.25 0.43 AAAAAT_ATTTTT 74 69.25 11.35 0.16 0.42 TTTTTT_TATTTT 84 79.95 10.14 0.10 0.40 TATTTT_AAAAAT 59 55.35 9.59 0.12 0.38 AATAAA_TTTTTT 76 72.25 9.85 0.09 0.38 TTTTTT_CTTTTT 73 67.50 14.54 0.22 0.38 AAAATA_TAAAAA 54 51.10 7.69 0.08 0.38 ATTTTT_ATTTTT 72 68.45 9.52 0.09 0.37 TTTTTT_TTTTTT 137 128.45 23.16 0.28 0.37 AAAAAA_GTTTTT 71 66.30 12.79 0.16 0.37 TTTTTT_TTTTAT 86 81.75 12.34 0.11 0.34 TAAAAA_TTATTT 48 46.00 5.97 0.04 0.33 AAAAAT_AAAATA 62 58.60 10.68 0.10 0.32 AAAAAA_TTCTTT 67 63.30 11.69 0.11 0.32 ATTTTT_ATAAAA 64 61.40 9.15 0.05 0.28 TTTTAT_TTTTAT 50 47.65 8.54 0.06 0.28 AAAAGA_TTTTTT 67 63.55 12.65 0.09 0.27 TTTTTT_ATTTTT 97 93.00 15.18 0.08 0.26 TTTATT_TTTTTT 70 67.15 12.18 0.06 0.23 AAAAAT_AAAAAA 103 99.10 17.73 0.08 0.22 TAAAAA_ATAAAA 56 54.65 6.73 0.02 0.20 AAAATA_AAAAAA 84 81.25 14.34 0.05 0.19 TTTTAT_AAAATA 50 48.40 8.87 0.03 0.18 TTTATT_ATTTTT 52 50.55 8.41 0.02 0.17 TTTAAA_TTTTTT 78 76.00 13.26 0.03 0.15 TTTTAT_TTTTTT 81 79.90 9.01 0.01 0.12 AAAAAT_AAATAA 51 49.85 9.60 0.01 0.12 TTTTTT_TTTTCA 65 64.00 11.25 0.01 0.09 TTTTAT_AAAAAT 60 59.25 8.54 0.00 0.09 AATTTT_AAAAAT 55 54.15 9.77 0.01 0.09 ATTTTT_AAATTT 53 52.45 6.56 0.00 0.08 TTTAAA_AAAAAT 53 52.45 8.57 0.00 0.06 AAAAAT_TTTTAA 55 54.75 8.05 0.00 0.03 AAAAAA_TGAAAA 63 62.75 8.82 0.00 0.03 ATTTTT_TTTTAT 56 55.75 9.41 0.00 0.03 TAAAAT_AAAAAA 62 61.75 11.68 0.00 0.02 TTTATA_ATTTTT 47 46.95 6.34 0.00 0.01 AAAAAA_AATAAA 71 70.95 12.11 0.00 0.00 AAAAAT_TTTTTT 107 107.05 18.21 0.00 -0.00 ATTTTT_AAATAT 48 48.10 10.65 0.00 -0.01 AAATTT_AAAAAT 55 55.25 9.05 0.00 -0.03 TTTATT_AAAAAT 51 51.30 8.80 0.00 -0.03 TTATTT_TTTTTT 67 67.40 9.19 0.00 -0.04 ATTTTT_AAAATA 52 52.55 11.34 0.00 -0.05 ATAAAA_TTTAAA 45 45.70 9.68 0.01 -0.07 AAAAAA_TTTTAA 74 75.30 13.36 0.01 -0.10 TTTTTA_TTTTAT 51 52.00 9.00 0.01 -0.11 TTATTT_ATTTTT 49 49.75 6.48 0.01 -0.12 TTTTTA_AAAAAA 89 91.65 19.93 0.04 -0.13 AAAATA_TTTTTT 82 84.55 16.13 0.04 -0.16 AAAAAC_TTTTTT 68 69.45 9.16 0.02 -0.16 ATAAAA_AAAAAT 56 57.45 8.33 0.02 -0.17 AAAAAA_GAAAAA 70 72.55 12.60 0.05 -0.20 TAAAAA_TATTTT 52 53.50 7.25 0.02 -0.21 AAAAAG_AAAAAA 67 70.05 13.77 0.07 -0.22 AAAAAA_AAAAAA 125 130.90 23.99 0.14 -0.25 TTTTTT_ATATTT 70 72.50 9.87 0.04 -0.25 AAAAAA_TAAAAA 84 88.60 14.75 0.12 -0.31 AAAAAA_TTTTAT 84 88.20 12.88 0.10 -0.33 AAAAAA_TTATTT 73 76.50 10.53 0.08 -0.33 ATTTTT_TATTTT 56 58.95 8.76 0.08 -0.34 AATAAA_AAAAAA 62 65.70 10.65 0.11 -0.35 AAATAA_ATTTTT 48 50.45 6.97 0.06 -0.35 TTTTTT_TAAAAA 87 93.45 16.86 0.23 -0.38 TTTTTT_TAAAAT 62 65.60 9.22 0.10 -0.39 ATAAAA_AAAAAA 80 85.25 13.33 0.17 -0.39 AAAAAA_ATAAAA 76 81.90 12.76 0.22 -0.46 TTTTTT_AATATT 54 58.90 9.45 0.21 -0.52 TTTTTT_AAATAA 66 72.05 10.99 0.27 -0.55 TTTTTT_GAAAAA 69 78.25 16.65 0.58 -0.56 AAATAT_AAAAAA 67 73.65 11.37 0.31 -0.59 TTTTTA_TAAAAA 53 60.45 10.78 0.49 -0.69 TTTTTA_AAAAAT 55 62.65 11.05 0.50 -0.69 TTTTAA_AAAAAA 65 75.20 13.59 0.74 -0.75 AATATT_AAAAAA 52 60.80 11.11 0.69 -0.79 ATTTTT_AAAAAT 56 67.50 13.57 1.07 -0.85 ATTTTT_TTTTTT 82 93.55 12.20 0.76 -0.95 ATAAAA_TAAAAA 48 55.60 7.63 0.56 -1.00 TTTTTT_AAAAAT 78 99.40 20.51 2.58 -1.04 TTTTTT_TTTTTA 72 89.90 14.73 1.98 -1.22 TATTTT_AAAAAA 63 80.65 14.43 2.17 -1.22 TTTTTT_AAAATA 63 82.75 14.97 2.68 -1.32 TTTTTT_AAAAAA 113 144.35 23.43 3.82 -1.34 AAAAAA_TATTTT 72 91.25 13.66 2.27 -1.41 AAAAAA_TTTATT 62 76.25 9.08 1.47 -1.57 ATTTTT_TAAAAA 49 65.95 10.16 2.50 -1.67 TAAAAA_AAAAAA 67 93.25 14.57 4.30 -1.80 User time : 126.97 sec. SMILEv1.47/Lanceur/fasta0000600002404200237300000072402610066542220014540 0ustar lamaaoc00000000000000>strand + guaB inositol-monophosphate dehydrogenas CTTTCCGTTATCTAAATATTTCAACTCTTTCCCGCTTCCTTGACATGCTCTTGGCTAGTTGATAATCTACATATAATATTTTGCCGAAAA >strand - yaaC yaaC TTTTCGGCAAAATATTATATGTAGATTATCAACTAGCCAAGAGCATGTCAAGGAAGCGGGAAAGAGTTGAAATATTTAGATAACGGAAAG >strand + yaaJ similar to hypothetical proteins CCGTTTCAGTTATAGTTAATATGTAGCCTTTTTAGGCAATGAAAAAACTTTGAAA >strand - yaaI similar to isochorismatase TTTCAAAGTTTTTTCATTGCCTAAAAAGGCTACATATTAACTATAACTGAAACGG >strand + metS methionyl-tRNA synthetase ATTTTATAAATATTTAATAAAGCTATTATCCTACTAAAAATCCTTTTAAATCAAGACTTTCGAACCAAAGTTTTTTATTTCATTTGATTATATACGACAAAATTCGACACGAACAGACTTTTTTTATTTTCATTAAAGATTTTTAATTTTAATTATTCTTTTTCAGGGCGTATGTATATATTCTTGATCTTAAAGGCTAAGATGGTATCATAGATAAAGGATAAATATAAATAATATTCATATATGATTTGCACTTATCGCCGCTCTCGTCCTTTGGGCGGGAGCTTTTTGACATTCTGA >strand - abrB transcriptional regulator TTAATGAAAATAAAAAAAGTCTGTTCGTGTCGAATTTTGTCGTATATAATCAAATGAAATAAAAAACTTTGGTTCGAAAGTCTTGATTTAAAAGGATTTTTAGTAGGATAATAGCTTTATTAAATATTTATAAAATGCTGTTATTTCGGTAGTTTCCAAGACATTACTGACTATAAGAACTAATTCTTACAATCAATAGTAAACAAAATGATTGACGATTATTGGAAACCTTGTTATGCTATGAAGGTAAGGATTTTGTCGAATAATGACGAAGAAAAATATAATTTAAACAAATAAGTA >strand + kbaA alternate gene name: ybxC, ybaM CACATACTTGTCTTACCTTTAATATGATTCATCTGTTTGGAATTATACATAACAAAATATCGCTGTTCGCGATTTTTGA >strand - gerD gerD TCAAAAATCGCGAACAGCGATATTTTGTTATGTATAATTCCAAACAGATGAATCATATTAAAGGTAAGACAAGTATGTG >strand + ybaR similar to hypothetical proteins GGAAACCAGCTCGGGTTGCCAAATGAGAGGAAAGATATTACTGTCATTAAAACGATTCTTGGTTTCATCCGGATATGATAAAATGTCTCAGACTATATTTGAGCCATTTTTTTCTTCAGCAATCATCTTGCGTAATTGATAAAAATTTATTATGATACTCTTTGTATGACAACTCCTTGCCTCAATACAATATACTCAACGTTTCCCCGTTTTCTCCGGTCGTTTTTCTTTTCATTTTCTCCCGTAAAATAAAAAAAGCTCCCAATACAATAGATATGGTGCGGATTTATTTTTTACTAA >strand - ybaN alternate gene name: ybxG; simila TATAAAAATGAATTAACAGGTACGTTTTGTCTTGTTTAGTTTTCAAAGATCATTACTGCCGCTTAACGAAGCAGCTTTATTACTATAACATTTAGCTTCTTTTAAGTCAACAACTTTTTTCAAAGTATTTAATGAAGCTTTTAGTTGATGTCAGCTGTTTATCAGCGACGAATAATAATATATCACGGATATTATCAAAAGGTCAACACCCAATTTAGGCTGTATGTGAAGAGACATTTTATCGGTTATATTCAATTGTCCATGCTCATAAGATGTAAAACAAGAATGATGGACAAGATA >strand + sigW RNA polymerase ECF-type sigma fac GCCCCCCTCCACCATTATTGGGCTATAGCCAAGCGGTAAGGCAACGGACTTTGACTCCGTCATGCGTTGGTTCGAATCCAGCTAGCCCAGTCACAGACACCTTTGATCAAAAGGTGTCTTTTTTCTTTTCGGAAAAATCATTCCAACTTCTAACTGTTCAGTCTGTATAATAATTTTAAAAATATGTTAAGGTAGTTTATTCACGAATTACCATCTACACCCTGCCAAAAATTTGATAAACTTATTTTATAAAAAAATTGAAACCTTTTGAAACGAAGCTCGTATACATACAGACCGGTG >strand - ybbK ybbK GCTCGAACCGCCGACCCTCTGCTTGTAAGGCAGATGCTCTCCCAGCTGAGCTAATCCTCCAAATGGTGACCCGTACGGGATTCGAACCCGTGTTACCGCGTGAAAGGGCGGTGCTTAACCGTTTACCAACGGGCCGTTGTGTTCTCGTCGAGCTGACAAAAGTTATTATATACAGGTCACCCCCTGTTTGTAAAGAGGAAAATTAAATTTTTTATTTTCAAACTTTTTAACTCTTTTTTAACTGAAAAGCTCCAACTAAAGCCATTTATTTTATGATAGAGACAAGCAAAGCAAGAGAGG >strand + adaA methylphosphotriester-DNA alkyltr CTCCTTATCTTAAATAATTATAAGAAAACTCACGCTGGTTGAAATTAGTTTATCACATAATCATCTTTACTCATTTTGTTATCTTGCTATTACATTCTAACCACCTTATTTTTTTCTATTTATGAGGTTATAGTGTAGTTATCAAGAATGCTAAACGGGAGTAGATAAAAATGGCCCAAGATATAAAAAACGCGTGTCGAAAATTTAAATTGGGATTCAATTCAAAGAATTAGATGAGCA >strand - alkA DNA-3-methyladenine glycosylase TGCTCATCTAATTCTTTGAATTGAATCCCAATTTAAATTTTCGACACGCGTTTTTTATATCTTGGGCCATTTTTATCTACTCCCGTTTAGCATTCTTGATAACTACACTATAACCTCATAAATAGAAAAAAATAAGGTGGTTAGAATGTAATAGCAAGATAACAAAATGAGTAAAGATGATTATGTGATAAACTAATTTCAACCAGCGTGAGTTTTCTTATAATTATTTAAGATAAGGAG >strand + ybdO ybdO TTTCACCATGCAAACAACCTCCTTTTATTTCCATCATACATAATAACGATTTAGGTGTAAATAAATGTTTTTTGAGGTTAATATATACATTATATTCGCCG >strand - ybdN ybdN CGGCGAATATAATGTATATATTAACCTCAAAAAACATTTATTTACACCTAAATCGTTATTATGTATGATGGAAATAAAAGGAGGTTGTTTGCATGGTGAAA >strand + ybdT ybdT TTTTTTCATTTTGTTCTCCCCTTCGTTTTGATTAACTACCTTTTCTCTCTTTCGTTTGTTCTTCCTTTAAAAAATAATCAAAAAGCGCTTAAATTTGGTTTTGCAAACAAATACTGTTACTGGGTTTATAGTCGGAAGATAA >strand - ybxI alternate gene name: ybdS; simila TTATCTTCCGACTATAAACCCAGTAACAGTATTTGTTTGCAAAACCAAATTTAAGCGCTTTTTGATTATTTTTTAAAGGAAGAACAAACGAAAGAGAGAAAAGGTAGTTAATCAAAACGAAGGGGAGAACAAAATGAAAAAA >strand + ybeC similar to amino acid transporter TTGTTTCATGGTGATACTCCTTTCTATTTTAAGAATAGTGTTTCCTGTATCATTCCCGTTTTGGAAATTGCTAAACCTGTTGTGTTTTTATCATTGGAATTAACAGTAAATTCCATTTAAGAAGAAACTAGTGACTTCATTCCTGTATTGGAAGGTAATATACATTGACTTATTATAGATTTCAGTCAAAATTCGATAAATATTGACAAAGTAAAGTAATCAGAATAATAATATAATT >strand - ybyB ybyB AATTATATTATTATTCTGATTACTTTACTTTGTCAATATTTATCGAATTTTGACTGAAATCTATAATAAGTCAATGTATATTACCTTCCAATACAGGAATGAAGTCACTAGTTTCTTCTTAAATGGAATTTACTGTTAATTCCAATGATAAAAACACAACAGGTTTAGCAATTTCCAAAACGGGAATGATACAGGAAACACTATTCTTAAAATAGAAAGGAGTATCACCATGAAACAA >strand + ybeF ybeF ATTAAGCATTAATAATTATCCCCCTTTGCTGTTTGTGCTGGATGTGTGGCGTGCCTCCTTTCCCGAAAAACAAAAAGAGCCAGAAAACACCCCTTGTCAAAAGGGGTTGGATTCCTGGCTCTCCTCATCTCCATCCAAGCTATTAACTTATCATGATAGCGCTTTCTTAAATTATCATAGTCCGGAGCGAATGTAAACGGGTAATTTTCAAATTGGGCCTTAGGGAAGTGAATATTATCCTTTTACTAAATGCAATTATATTTTTT >strand - glpT glycerol-3-phosphate permease AAAAAATATAATTGCATTTAGTAAAAGGATAATATTCACTTCCCTAAGGCCCAATTTGAAAATTACCCGTTTACATTCGCTCCGGACTATGATAATTTAAGAAAGCGCTATCATGATAAGTTAATAGCTTGGATGGAGATGAGGAGAGCCAGGAATCCAACCCCTTTTGACAAGGGGTGTTTTCTGGCTCTTTTTGTTTTTCGGGAAAGGAGGCACGCCACACATCCAGCACAAACAGCAAAGGGGGATAATTATTAATGCTTAAT >strand + purT phosphoribosylglycinamide formylt GTTTTGCATGGGATGCCTCCTGTCGGTGTGGTGGTCTTGGATTAAGTGTAGCATGATTTTGGGGGATTTCTTGTATGATGTTGCTGTTTAGGAATAGAGAAAGCAGACCGTTATTTGTCTGCTTTCGTTTAAGAATTTAAAAGATTTTAACCCTCAATAATAATTTTATCTTTTTTAATAAAAATTTATTTTTCAAATAT >strand - ybfI similar to transcriptional regula ATATTTGAAAAATAAATTTTTATTAAAAAAGATAAAATTATTATTGAGGGTTAAAATCTTTTAAATTCTTAAACGAAAGCAGACAAATAACGGTCTGCTTTCTCTATTCCTAAACAGCAACATCATACAAGAAATCCCCCAAAATCATGCTACACTTAATCCAAGACCACCACACCGACAGGAGGCATCCCATGCAAAAC >strand + ybgA similar to transcriptional regula AATCTTCATGTGACACCCCCTCAAAGAGATAGACAAGCACCATATTTGTTATGACCAATTTATGATACTTGTCATTACGAATTTAGCACCGCCCTTATCAAACTGTCAATATTAATTTCTGAAAATTTGTTATAAAAGAAGGATACAAATCTTTCATATTGGGAGGGCAAATGGTATTATGGTCTCAA >strand - ybfT similar to glucosamine-6-phosphat TTGAGACCATAATACCATTTGCCCTCCCAATATGAAAGATTTGTATCCTTCTTTTATAACAAATTTTCAGAAATTAATATTGACAGTTTGATAAGGGCGGTGCTAAATTCGTAATGACAAGTATCATAAATTGGTCATAACAAATATGGTGCTTGTCTATCTCTTTGAGGGGGTGTCACATGAAGATT >strand + ycbA alternate gene name: yzgA; simila ACTGAGGTGTATCAAAACGAAGGCTAAGGGGGCGTTAAACGCTTTCATAATAGCGGAAGGTATAATTACTGTATAATGGAGACTACAGAATCATACAAAATACGACAAAAACATTTTGAAAATAAGCAGTAAGATTTATAAAAACCTCATGTTCTGGGTCAAAAGCAATTTGCAGGTTTTCTCCCGATTCCCCAAAATAAAAAGCGAGGATGTGATCAACATAAAACTGGAACGGTTTATTGAAAAAGACGGATATATACTGGCCCTCATGCTGATTACGGTGCCGCTGGCCGGTGAACT >strand - ybgJ similar to glutaminase TATATATCCGTCTTTTTCAATAAACCGTTCCAGTTTTATGTTGATCACATCCTCGCTTTTTATTTTGGGGAATCGGGAGAAAACCTGCAAATTGCTTTTGACCCAGAACATGAGGTTTTTATAAATCTTACTGCTTATTTTCAAAATGTTTTTGTCGTATTTTGTATGATTCTGTAGTCTCCATTATACAGTAATTATACCTTCCGCTATTATGAAAGCGTTTAACGCCCCCTTAGCCTTCGTTTTGATACACCTCAGTCCTTTAAGAAAAAAATCAGTGAGGAGGAATTCGATGAAAGA >strand + cwlJ alternate gene name: ycbQ TGTGTCATTACAATTCCTCCTTTTGTCATTTGATTTGTTATGTATAATTCCTCAAATGTAAAAAGTTAAACCTTATTTCTTTTCGGAGGCGGGTGCAAATCTGCTGGGGAGAGGTCAATGATGTGAATCCAGGAATTGAGGCTGCGGCGCTTCGACAGCGGAAACGTGAAGCTTGTACTGCACCGGACGCTGCTTTGATGCCCAACAGCCCATTATGTTTGCTGTTTTTAGATGAATAAATTTAAAAACTTCTCCTGTCCCTCCGTCATCACTTCTGAAGTAA >strand - ycbP similar to hypothetical proteins TTACTTCAGAAGTGATGACGGAGGGACAGGAGAAGTTTTTAAATTTATTCATCTAAAAACAGCAAACATAATGGGCTGTTGGGCATCAAAGCAGCGTCCGGTGCAGTACAAGCTTCACGTTTCCGCTGTCGAAGCGCCGCAGCCTCAATTCCTGGATTCACATCATTGACCTCTCCCCAGCAGATTTGCACCCGCCTCCGAAAAGAAATAAGGTTTAACTTTTTACATTTGAGGAATTATACATAACAAATCAAATGACAAAAGGAGGAATTGTAATGACACA >strand + ycbU similar to NifS protein homolog TTTTTTCTCATACTCCCATCTCCTTTTTCTTTTATTCTATTGTTTATT >strand - pcp pyrrolidone-carboxylate peptidase AATAAACAATAGAATAAAAGAAAAAGGAGATGGGAGTATGAGAAAAAA >strand + yccC similar to asparaginase CCATAACTCATAACATTCCCACCTTACTGAATTGCAATCAAAAATATAGTGACTGGTCTATTATCTTGATTCAATCATCAATTGTCAAGAAAAATTCATTGTATGAAAAGACAAAAAAAGAAGGATATGACAACAAAAAATACTGAGAGAAAAGCTGACTGATCTTTTGACTGAATAGATAAAATG >strand - lmrA transcriptional regulator CATTTTATCTATTCAGTCAAAAGATCAGTCAGCTTTTCTCTCAGTATTTTTTGTTGTCATATCCTTCTTTTTTTGTCTTTTCATACAATGAATTTTTCTTGACAATTGATGATTGAATCAAGATAATAGACCAGTCACTATATTTTTGATTGCAATTCAGTAAGGTGGGAATGTTATGAGTTATGG >strand + yccF yccF GCTAGTTCCAATAGAAACACTCCTTAAAATGTTAAATAAACACCTAATGATTGTAAAAAAGAAGGGCCTAAAGTGGGAATAGGTGATAAGCCTTAAATCACAAAAGTTGGTGAAAATGTCATAGGTAAATTGGCATAATCAGCCAGCTTATCACATTACCAAATTCTTTTTTAGCCCGAAACCAAGCCCTCAGAAGTTATTTTT >strand - yczC similar to hypothetical proteins AAAAATAACTTCTGAGGGCTTGGTTTCGGGCTAAAAAAGAATTTGGTAATGTGATAAGCTGGCTGATTATGCCAATTTACCTATGACATTTTCACCAACTTTTGTGATTTAAGGCTTATCACCTATTCCCACTTTAGGCCCTTCTTTTTTACAATCATTAGGTGTTTATTTAACATTTTAAGGAGTGTTTCTATTGGAACTAGC >strand + natA Na+ ABC transporter (ATP-binding ACTTTTACCATTATCATTCCCCTAGTTACATAGAATCATCTTTATTCTATCCCATTTCACGACATAAAAAGAACTCTTCGCGACAACTTTATCCAACTAATCCAGCTTTTCGT >strand - yccH similar to two-component response ACGAAAAGCTGGATTAGTTGGATAAAGTTGTCGCGAAGAGTTCTTTTTATGTCGTGAAATGGGATAGAATAAAGATGATTCTATGTAACTAGGGGAATGATAATGGTAAAAGT >strand + ycdB ycdB GAAACATTCATTTTTCCTCCTCAAATGGTTTAGGAAAGGGGAGTCTTTTGTCTTTGAGAAATGGATCTTTTTGAATGGATTAGCATCATATAACCAAAAAAATCAGTTCTTTTCTCACGGTTTTCACCGGATTTTCTGGAAACTCCGCCCTCCTTTGTCATGAATGGACCTATAAAAAGTTACAACGGAATTAATTTTAAGATTATTAGCTTTTATATTGAATAGGTTCATCATCCTATTTTTGGTGAAAAATGTTCTGGTAAAACTGGTAAACTCAGAAAACTGTGTATTTTAGAC >strand - ycdA ycdA GTCTAAAATACACAGTTTTCTGAGTTTACCAGTTTTACCAGAACATTTTTCACCAAAAATAGGATGATGAACCTATTCAATATAAAAGCTAATAATCTTAAAATTAATTCCGTTGTAACTTTTTATAGGTCCATTCATGACAAAGGAGGGCGGAGTTTCCAGAAAATCCGGTGAAAACCGTGAGAAAAGAACTGATTTTTTTGGTTATATGATGCTAATCCATTCAAAAAGATCCATTTCTCAAAGACAAAAGACTCCCCTTTCCTAAACCATTTGAGGAGGAAAAATGAATGTTTC >strand + rapJ response regulator aspartate phos AGTTCATATCGGTCTCACTTTCTCAAGATGTGCTGAATGTTAACGCTTTTAAGTGATTGACTCTCTCTAAAAGCGAACAGCTATGATAAAAT >strand - ycdD similar to L-alanoyl-D-glutamate ATTTTATCATAGCTGTTCGCTTTTAGAGAGAGTCAATCACTTAAAAGCGTTAACATTCAGCACATCTTGAGAAAGTGAGACCGATATGAACT >strand + yceC similar to tellurium resistance p AGGGCTTTCTCCTTTAGAAACAGGTGCCTGATCAAGTATGCTTAAATGAATCAAAAGGCCGCTTCCTTTCTGTCTGTTTTTAGATGGTGACACCAGAATAGGCAAAATGACTTGCCCGCGTCAAATGGTTTGTTTAAGAAGCGCACTTTATAGCATTTGCAAATCATATATCAAATATGGAAGTTTTTTTTCGAGGTTTTTCGTCAATTATTCTTAACTTTTACGAAACTTTGATATAATAACAAACGTATATATTAGTAATTTACGGCTTATTTTCCTTGTGAGCGTAAAAATAAATGT >strand - yceB similar to hypothetical proteins TTTTTACGCTCACAAGGAAAATAAGCCGTAAATTACTAATATATACGTTTGTTATTATATCAAAGTTTCGTAAAAGTTAAGAATAATTGACGAAAAACCTCGAAAAAAAACTTCCATATTTGATATATGATTTGCAAATGCTATAAAGTGCGCTTCTTAAACAAACCATTTGACGCGGGCAAGTCATTTTGCCTATTCTGGTGTCACCATCTAAAAACAGACAGAAAGGAAGCGGCCTTTTGATTCATTTAAGCATACTTGATCAGGCACCTGTTTCTAAAGGAGAAAGCCCTGTGACAA >strand + opuAA glycine betaine ABC transporter AATACATAATGGGTGTTGAAAAAGCAACTTATTTTTCCAGTAAAGGAGCAGGAAGTCATAAGAGGAAGGAGGAATTCAATTCCAATAAAAGCGTTTTCAACTAAAGGTAGGAGAAAAATCACCCGTAAAATACAATCATATAGGAGGATTACAGAGCATTTAGAAGCATAAATAAGATCATGTGGTCACATGGATGTTTATAAAGAAATGGTACAGAATAAAAGAGAATATGCTGTTTGTGTGGGAAGTTACATAAATG >strand - yceK similar to transcriptional regula CATTTATGTAACTTCCCACACAAACAGCATATTCTCTTTTATTCTGTACCATTTCTTTATAAACATCCATGTGACCACATGATCTTATTTATGCTTCTAAATGCTCTGTAATCCTCCTATATGATTGTATTTTACGGGTGATTTTTCTCCTACCTTTAGTTGAAAACGCTTTTATTGGAATTGAATTCCTCCTTCCTCTTATGACTTCCTGCTCCTTTACTGGAAAAATAAGTTGCTTTTTCAACACCCATTATGTATT >strand + ycgA similar to hypothetical proteins TATTGAAACAGGAAGATAGGCTGTATATAATATAGCACATATTGCTACTATTCAGAATAATTAATATTTTCAAACAGAGGGGATGGATCGAAATATGAGTATGCCAGCAGCCGAAACACAGCCTAAGAAAAAACGTATGACATTTAAAATGCCTGACGCCTATGTCCTCTTATTTATGATTGCTTTCATTTGCGCAATCGCTTCATATATTGTGCCGGCAGGTAATTTGACCGCGTGACAAAGGGGGATGTCACGACCGCTGTTCCGGGAAGCTATCATTCAATTGAACAGTCTCCGGTC >strand - amhX amidohydrolase TCACGCGGTCAAATTACCTGCCGGCACAATATATGAAGCGATTGCGCAAATGAAAGCAATCATAAATAAGAGGACATAGGCGTCAGGCATTTTAAATGTCATACGTTTTTTCTTAGGCTGTGTTTCGGCTGCTGGCATACTCATATTTCGATCCATCCCCTCTGTTTGAAAATATTAATTATTCTGAATAGTAGCAATATGTGCTATATTATATACAGCCTATCTTCCTGTTTCAATATAAATAAAGTAAAGAAGGTAAAAATTCATTCTGATAGAAGGAGGTATTTCTTACCATGCAAA >strand + ycgE ycgE TGTCCATTTGTTCTACCTCCTCTTACTTTTCTATGTAAGTTCTGATATCATGATAATAGTTTGATTGTTGAATATCTCGATT >strand - mdr multidrug-efflux transporter AATCGAGATATTCAACAATCAAACTATTATCATGATATCAGAACTTACATAGAAAAGTAAGAGGAGGTAGAACAAATGGACA >strand + ycgI ycgI AAGACAAAATACGTACCGATCCCTGCGATCAGAAATGAAAGGAGAACGGAAAAACCGCTTTTTACAATTCGATGCTGGAACCGAGAAAGAAGCCTGTTCCAATCGTGCAGCCGACTCCGATCAGTGACAGCTGCCA >strand - ycgH similar to amino acid transporter TGGCAGCTGTCACTGATCGGAGTCGGCTGCACGATTGGAACAGGCTTCTTTCTCGGTTCCAGCATCGAATTGTAAAAAGCGGTTTTTCCGTTCTCCTTTCATTTCTGATCGCAGGGATCGGTACGTATTTTGTCTT >strand + aroI shikimate kinase TGATCATCGCCATTCCTCCCTTCCTATTTATCATACAGAAAAAGACCCTTCCATCGAAGAGTCAGCTGGAAAAACAATAAAGGCGGAAGGTTTTAACGCTTTTTTGCGTCGAAGTATTAAAATACCTGCAGAAATGTTGTACAATGAATGTCATTCAGGCAAAAATGGCTACAACAAAGCGCACTATAAGCTTGACCGGTAAAGCCGGTATCTATATAAATATTCGCTAAATAAGCATATAATGA >strand - tmrB ATP-binding membrane protein TCATTATATGCTTATTTAGCGAATATTTATATAGATACCGGCTTTACCGGTCAAGCTTATAGTGCGCTTTGTTGTAGCCATTTTTGCCTGAATGACATTCATTGTACAACATTTCTGCAGGTATTTTAATACTTCGACGCAAAAAAGCGTTAAAACCTTCCGCCTTTATTGTTTTTCCAGCTGACTCTTCGATGGAAGGGTCTTTTTCTGTATGATAAATAGGAAGGGAGGAATGGCGATGATCA >strand + ycgK similar to transcriptional regula CCCAAATCTTCCCCCTCGGCAAATACCTTTTCATCCCGATACATCTCGGCGTTTTTAGAAAACGGTGTTTCATTTGTAATTTGAACCAGCTCCTTCTTTTCGTAAATGATGATATGGATTATGTTAGCAATTTCTGTTATGATTCGTCCAAGATATAAAAAT >strand - ycgJ ycgJ ATTTTTATATCTTGGACGAATCATAACAGAAATTGCTAACATAATCCATATCATCATTTACGAAAAGAAGGAGCTGGTTCAAATTACAAATGAAACACCGTTTTCTAAAAACGCCGAGATGTATCGGGATGAAAAGGTATTTGCCGAGGGGGAAGATTTGGG >strand + ycgT similar to thioredoxin reductase TTCATCTGTTTCACCCAATTGGTTTTTGCCTATTATAGAGTTTTCAGCAATTGACCACAAGGTAGAAGAAATGAATAAACAAAGGCCCTTGAAATATGATATACTTTGTATTGATATTCATTCTCAATTAA >strand - ycgS similar to aromatic hydrocarbon c TTAATTGAGAATGAATATCAATACAAAGTATATCATATTTCAAGGGCCTTTGTTTATTCATTTCTTCTACCTTGTGGTCAATTGCTGAAAACTCTATAATAGGCAAAAACCAATTGGGTGAAACAGATGAA >strand + nasA nitrate transporter TCTTCATGTATCAGACCTCCTTTGGCGAAGCTCGTAAGTTTTCTCCTATTAAAAATTATGTCACAATGCATTGTTAACGCATTAAACGTGTCACAAAAACTTACACATGTCTTTTCCAGAAAATAATGGTCCTATATCCTTGATTCAGA >strand - nasB assimilatory nitrate reductase (e TCTGAATCAAGGATATAGGACCATTATTTTCTGGAAAAGACATGTGTAAGTTTTTGTGACACGTTTAATGCGTTAACAATGCATTGTGACATAATTTTTAATAGGAGAAAACTTACGAGCTTCGCCAAAGGAGGTCTGATACATGAAGA >strand + yckC similar to hypothetical proteins CATTGAACATAAGGGGTTGCAATATGTTGATCGCGTTTTCAAAAAGTTCGAATTGTGTATACAAAAAGCATGGGTGGGTGTTTAAAAGCGTCTATGAAGGGAAGAGGTGATGAAAAAAGGAATTGTCCAAGGATGAATGCCAGACGTACCAAGGAAACGCATGTGAAAGTGAAGGATGCAGCAGACCCGTCACTCATCTATGCCAATACCGGAAGAAATGGTCAAGAGGCATGAAACCGATCTTCAGAATAAATTTAAGTCTTGCTGATGCCCCTAAGGCATATGATATATTCGATGAGA >strand - yckB similar to amino acid ABC transpo TTTATTCTGAAGATCGGTTTCATGCCTCTTGACCATTTCTTCCGGTATTGGCATAGATGAGTGACGGGTCTGCTGCATCCTTCACTTTCACATGCGTTTCCTTGGTACGTCTGGCATTCATCCTTGGACAATTCCTTTTTTCATCACCTCTTCCCTTCATAGACGCTTTTAAACACCCACCCATGCTTTTTGTATACACAATTCGAACTTTTTGAAAACGCGATCAACATATTGCAACCCCTTATGTTCAATGCTATCTTTTTAGTTGATTAACAGATTGGGGGAACTATAGGTGAAATC >strand + yckH similar to hypothetical proteins ATTCCATTGTTGATCCACTCCCTTTTCGAATTATTGTTCATTCTATAGGTTCCTTCTTTTGTTGAGAAGTAGGCACTTTGAAGTCAACAAGTTACTTGGAGGATACTGTGAGGAGAGACAATCAGATTTCTTTCGCTTGATTTTATGAGTGAGTATAATGAAAAAAGAGTCTGCAAAAAAGTAAGTAAGTGATGATACGTA >strand - yckG similar to D-arabino 3-hexulose 6 TACGTATCATCACTTACTTACTTTTTTGCAGACTCTTTTTTCATTATACTCACTCATAAAATCAAGCGAAAGAAATCTGATTGTCTCTCCTCACAGTATCCTCCAAGTAACTTGTTGACTTCAAAGTGCCTACTTCTCAACAAAAGAAGGAACCTATAGAATGAACAATAATTCGAAAAGGGAGTGGATCAACAATGGAAT >strand + ycxD similar to transcriptional regula TAACCATAATGGGAAGGGCTCCTTTACCTGAATTGCAGCGCCGGTCGCTCCCTTTATTGTATGGCCGCGGTCAGAACGGTACAATGAGAAAAA >strand - ycxC similar to hypothetical proteins TTTTTCTCATTGTACCGTTCTGACCGCGGCCATACAATAAAGGGAGCGACCGGCGCTGCAATTCAGGTAAAGGAGCCCTTCCCATTATGGTTA >strand + yclB similar to phenylacrylic acid dec TATCCAAAAGAAAGCGCCTCCTTATCATACCTTTTAGGTATTAATTATAGATTTAATAG >strand - yclA similar to transcriptional regula CTATTAAATCTATAATTAATACCTAAAAGGTATGATAAGGAGGCGCTTTCTTTTGGATA >strand + yclG yclG AAGCCATGTCAGCCAAACCCTTTCTAAAAAAAGTGTATTATAAAAATCATATTCTTATATAATACTTGTGTCAATATAATTTATACTCATTTCAAATAGTTCCATATTGGAAAAATTATAAATACCTAAAAATAGTGATTTCCTCCTAATTTTTAAACTTTTTATCGTGAGATTTTCTTACACAACTATTCCTTTATATTTTTTTAATAAAATTTTAGAGAAATGTACAAGCAGATCGGGCCTTTGTGA >strand - yclF similar to di-tripeptide ABC tran TCACAAAGGCCCGATCTGCTTGTACATTTCTCTAAAATTTTATTAAAAAAATATAAAGGAATAGTTGTGTAAGAAAATCTCACGATAAAAAGTTTAAAAATTAGGAGGAAATCACTATTTTTAGGTATTTATAATTTTTCCAATATGGAACTATTTGAAATGAGTATAAATTATATTGACACAAGTATTATATAAGAATATGATTTTTATAATACACTTTTTTTAGAAAGGGTTTGGCTGACATGGCTT >strand + gerKA gerKA TCTTCATGCTTCATCATCCTCCGCTCTTAGTGTCTATCTTTGGCGGAGAAGTTATACCGCATCTAACCCTAATGCATAATCTCAATTTTAAAAGG >strand - yczF yczF CCTTTTAAAATTGAGATTATGCATTAGGGTTAGATGCGGTATAACTTCTCCGCCAAAGATAGACACTAAGAGCGGAGGATGATGAAGCATGAAGA >strand + yclJ similar to two-component response AGTTCATTTATGTTCCTCCGTATAGGTATTTTTATTGATAAGACCGCCTCTGGGGCGCGTCCCCTTGCTGATAAATTAATAGTAGTCCAAAGGTATGAAAAAAATATGAACAATCTATTTCTAGTTGATGAAATAAAAACTAACAATTGTGAAACGCAAAACCTTCTGTTTAAAATGGTGCT >strand - yclI similar to transporter AGCACCATTTTAAACAGAAGGTTTTGCGTTTCACAATTGTTAGTTTTTATTTCATCAACTAGAAATAGATTGTTCATATTTTTTTCATACCTTTGGACTACTATTAATTTATCAGCAAGGGGACGCGCCCCAGAGGCGGTCTTATCAATAAAAATACCTATACGGAGGAACATAAATGAACT >strand + yclN similar to ferrichrome ABC transp TTATCTCTCTTTCAGTACTGTAAATCAAGGGGGAATTTCTTTCTCAGGAGAACATTTGTATTTTTCACGAGGAATAATGTTAGGTTTGCTGACCGTATGCCTGCGTTATAAAGATAAATATGGTAAACAGCCTAACGTTTTGGGATGGAAAATGGTTAGAATGATTAGTAAAATTGATAAATGACTAGGTTAATATTTTTAAAGAATATTGACTAACCCTATAAAAATGGTAATATGTAAATGATAATGATAATCAATTACTATATGGCCATATTGTTTTGAGTCCTTGCGGAGTAGGAG >strand - yclM similar to homoserine dehydrogena TATCATTTACATATTACCATTTTTATAGGGTTAGTCAATATTCTTTAAAAATATTAACCTAGTCATTTATCAATTTTACTAATCATTCTAACCATTTTCCATCCCAAAACGTTAGGCTGTTTACCATATTTATCTTTATAACGCAGGCATACGGTCAGCAAACCTAACATTATTCCTCGTGAAAAATACAAATGTTCTCCTGAGAAAGAAATTCCCCCTTGATTTACAGTACTGAAAGAGAGATAATAAATACTAAATTTGTGTGAAAATTTTAAACATTAGGAGATGTAAACATGAAGG >strand + yczG similar to transcriptional regula CGATCATGTAAATCTCCTCCGTCGTTATGCGAATTTATTGTTCGAATTTCATCGAACTAAATGTAACATGTATTCTAAATGAGGTAAAGGAAAAACCCTTATATAGTAA >strand - ycnE similar to hypothetical proteins TTACTATATAAGGGTTTTTCCTTTACCTCATTTAGAATACATGTTACATTTAGTTCGATGAAATTCGAACAATAAATTCGCATAACGACGGAGGAGATTTACATGATCG >strand + ycnG similar to 4-aminobutyrate aminot TATCCATTTTGTAGTTTCTCCTTCTGATACCATCAAAAAGTTATAATTGGTACTTTTCATCATACCAAAGAGAAGT >strand - ycnF similar to transcriptional regula ACTTCTCTTTGGTATGATGAAAAGTACCAATTATAACTTTTTGATGGTATCAGAAGGAGAAACTACAAAATGGATA >strand + ycnL ycnL GAAGCATTCATACACCCTCTTCATTAAGATCTATTTAAGCGTAGAGGATTTTAGAGACGGAGACAAGTAAGTGGATGATAAAAAATGTGAAAATGTGTTGAACAGCAACAAAACGAAACAGAAGGGACTCTGGT >strand - ycnK similar to transcriptional regula ACCAGAGTCCCTTCTGTTTCGTTTTGTTGCTGTTCAACACATTTTCACATTTTTTATCATCCACTTACTTGTCTCCGTCTCTAAAATCCTCTACGCTTAAATAGATCTTAATGAAGAGGGTGTATGAATGCTTC >strand + ycsD similar to hypothetical proteins TAACGGCCTCCTTTTGTGTTTTTTCCATTATATCCTATCGTTTCACTTTAGGCTG >strand - yczH yczH CAGCCTAAAGTGAAACGATAGGATATAATGGAAAAAACACAAAAGGAGGCCGTTA >strand + pbpC penicillin-binding protein 3 TGCTATCCCTTCTTTCTGTCTTTTCAAATAGTCTACAGCAAACGATAGGAGATTTCTGCAAAATCGGTGAACTTTTCCTTCATTTTCAAAAAATCTTGCACGCACCAATTCAATTAAGAG >strand - yczJ yczJ CTCTTAATTGAATTGGTGCGTGCAAGATTTTTTGAAAATGAAGGAAAAGTTCACCGATTTTGCAGAAATCTCCTATCGTTTGCTGTAGACTATTTGAAAAGACAGAAAGAAGGGATAGCA >strand + ydaD similar to alcohol dehydrogenase CCGATGACGCCCTTTGGCCTGCTGAAATCTTCTAGAAAGCATGTTAATCATTCAGCAACCAACCCCCTTTTTGAGTTAATTTCCTCTCAGCGTTCCATGTGAAACATTTTTCTGATAAAGCAATGCGAAGGTGCCAAGCCATGTTTATCCAAAGAGTGTGTGAGGTACACAACAAAT >strand - ydaC similar to hypothetical proteins ATTTGTTGTGTACCTCACACACTCTTTGGATAAACATGGCTTGGCACCTTCGCATTGCTTTATCAGAAAAATGTTTCACATGGAACGCTGAGAGGAAATTAACTCAAAAAGGGGGTTGGTTGCTGAATGATTAACATGCTTTCTAGAAGATTTCAGCAGGCCAAAGGGCGTCATCGG >strand + lrpC transcriptional regulator (Lrp/As AATCGCCCTCCTTGTGGACACGTTTTCATTTTATACTATAAACAATCCGGGGGGGCATATGACAGCTTTCAAAAAATGTTCGGAAAACATTCATTTTTACATGCCTTTTCTAGGGAACTGTACTTGTCATTTACAAAAATACCCGAGATAATGT >strand - ydzA ydzA ACATTATCTCGGGTATTTTTGTAAATGACAAGTACAGTTCCCTAGAAAAGGCATGTAAAAATGAATGTTTTCCGAACATTTTTTGAAAGCTGTCATATGCCCCCCCGGATTGTTTATAGTATAAAATGAAAACGTGTCCACAAGGAGGGCGATT >strand + ydbA similar to ABC transporter (bindi TCTATCACTCCTTTTCCTTTTATAAAAATGGTACCCGCAGGTGAAAAATAAAAACGCATCCGAAAAAATGAAGGTATACTATATAGT >strand - ydaT ydaT ACTATATAGTATACCTTCATTTTTTCGGATGCGTTTTTATTTTTCACCTGCGGGTACCATTTTTATAAAAGGAAAAGGAGTGATAGA >strand + ydbO similar to hypothetical proteins CGCTTTGACTTTTTGTATATACCATTCTGTATTAACAGATGAATGAGCTGTTTTTTTCTGATTTCTGCAGACTGAGGTGTCATTGTGTCACCAACCTCCAATCAAACGATATTGATAATCATTATCAATTAACTTCAGTATACAATGTCCGCAATAGCCTGTAAAGATGAAAAGAGTCCTATTCTTGGCCAATTTACAAACATTCAACAAATCGACAAAAAGTTTCCCGGGCAATAATTTGCTGTCGAAATATTCCGCTTCATTGAACGCACCCTGCTATTTTTCATGTTTTGAACCATA >strand - ydbN ydbN TCAATGAAGCGGAATATTTCGACAGCAAATTATTGCCCGGGAAACTTTTTGTCGATTTGTTGAATGTTTGTAAATTGGCCAAGAATAGGACTCTTTTCATCTTTACAGGCTATTGCGGACATTGTATACTGAAGTTAATTGATAATGATTATCAATATCGTTTGATTGGAGGTTGGTGACACAATGACACCTCAGTCTGCAGAAATCAGAAAAAAACAGCTCATTCATCTGTTAATACAGAATGGTATATACAAAAAGTCAAAGCGTCATCTGTATGAATTGTCGCTGACGGAGTTAGAA >strand + ddlA D-alanyl-D-alanine ligase A TGTCACATCTCCTTTTCAATGCCATTTTAATATAAAATAAAAAGGAATTACATACAAATGCATTCAACATCAGCATGCCCAAAAATAATTAGATCATTTTCCGGTGAAAATGATAAGATAGTAAAGATGAATTGTAGTGGGGGG >strand - ydbP similar to thioredoxin CCCCCCACTACAATTCATCTTTACTATCTTATCATTTTCACCGGAAAATGATCTAATTATTTTTGGGCATGCTGATGTTGAATGCATTTGTATGTAATTCCTTTTTATTTTATATTAAAATGGCATTGAAAAGGAGATGTGACA >strand + ydcB similar to holo- acyl-carrier pro CCGAGCCCCTTTTCTTCATTTTGGTATATCTATAGTAACACGTTTATGAGACAAACCATTTAAA >strand - ydcA similar to hypothetical proteins TTTAAATGGTTTGTCTCATAAACGTGTTACTATAGATATACCAAAATGAAGAAAAGGGGCTCGG >strand + sacV transcriptional regulator TTATCATCCATTTCACAAAGAGTTGATTTTTATAAAAATAATAACAACATTTTTTGGAGAATTCATTAATTTCACCTATAAGTTAAATAAATAATTCTAAATTCTCCTTTTAGGAGTGTTTGTTTCTTCTTTATGGAGAGTTTAATTATCAATATTTCCATATATGTTCAAAAAAATACACTTTAAGGTGAATTTTGTGTTGACTTACACGAAAAGGAGTTTTATAATAAAGTTAATTCACAA >strand - ydcN similar to transcriptional regula TTGTGAATTAACTTTATTATAAAACTCCTTTTCGTGTAAGTCAACACAAAATTCACCTTAAAGTGTATTTTTTTGAACATATATGGAAATATTGATAATTAAACTCTCCATAAAGAAGAAACAAACACTCCTAAAAGGAGAATTTAGAATTATTTATTTAACTTATAGGTGAAATTAATGAATTCTCCAAAAAATGTTGTTATTATTTTTATAAAAATCAACTCTTTGTGAAATGGATGATAA >strand + rapI response regulator aspartate phos CGACCTAATTTAATCCATTGTTTGTAAACATTTCATATCTAATTATATAACATAGACTCCTATTCTAACTACTTATTTTAAAGCAGTCTGGATTGTTTGGGTAATTCATAAAAAAATAAAAGAAAGAAGGAGGAATAGAGTTTTTCTTTTTTTTGTTTGCAATGTTACTGTCAAGTCGCAAAAGAATTGCTATGACCGAAAAAAAACCGCAAAGGAATAGATATAAGGTACCTTTTTGCAATTCATCTTTGTAAAATAAAGGTTATTCTGACATAATACAATTAATGTAAAAATTCGCAC >strand - yddK yddK TTTACATTAATTGTATTATGTCAGAATAACCTTTATTTTACAAAGATGAATTGCAAAAAGGTACCTTATATCTATTCCTTTGCGGTTTTTTTTCGGTCATAGCAATTCTTTTGCGACTTGACAGTAACATTGCAAACAAAAAAAAGAAAAACTCTATTCCTCCTTCTTTCTTTTATTTTTTTATGAATTACCCAAACAATCCAGACTGCTTTAAAATAAGTAGTTAGAATAGGAGTCTATGTTATATAATTAGATATGAAATGTTTACAAACAATGGATTAAATTAGGTCGGAGGAAATC >strand + lrpA transcriptional regulator (Lrp/As TTTCACCTCTAAAATAATGAACTTATACAAGATCAATGGAAAAAACATTTTGAAAAATTGTTTCATATTAAATGGTGTCCATTCCTCTTTGATTTGCATGCAAATAAATATATACTTTCTCAAACGATCAATACATAGGCAAAATAAAGTTATTTCACAATTTCACTTTACTTTTAAAGTTTTAAGCAAACATAAAAAAATGTTCAGTGAACAAAATATGTAAAATACA >strand - yddN similar to hypothetical proteins TGTATTTTACATATTTTGTTCACTGAACATTTTTTTATGTTTGCTTAAAACTTTAAAAGTAAAGTGAAATTGTGAAATAACTTTATTTTGCCTATGTATTGATCGTTTGAGAAAGTATATATTTATTTGCATGCAAATCAAAGAGGAATGGACACCATTTAATATGAAACAATTTTTCAAAATGTTTTTTCCATTGATCTTGTATAAGTTCATTATTTTAGAGGTGAAA >strand + yddQ similar to hypothetical proteins TAACTCCTTTCACAGTGAAAAAGTTTTACAAAATAGTATTTCATGAGACCATCGAAAATTCAATGGAAATTATATATCCTTATA >strand - lrpB transcriptional regulator (Lrp/As TATAAGGATATATAATTTCCATTGAATTTTCGATGGTCTCATGAAATACTATTTTGTAAAACTTTTTCACTGTGAAAGGAGTTA >strand + ydeD similar to hypothetical proteins TGACTTCACACCTCAATATATTTATATTTTTTCATTCAATTTGAGTATTTTTTATCATGTTTTGCATTTATTATAACACTAT >strand - ydeC similar to transcriptional regula ATAGTGTTATAATAAATGCAAAACATGATAAAAAATACTCAAATTGAATGAAAAAATATAAATATATTGAGGTGTGAAGTCA >strand + ydeF similar to transcriptional regula ATTTGGCCTCCTTTAACCAAAGAATAACAGGAGTGAAATGTATTTATCCGACATTCTGTGCACAGTTTTGCAGGATGACTTATATGAAATAAAGTAAGTGATTCACTTGTATTTGTACAGGTTTTATTGAAGCATGCTGTTCATAAAAAGGCATCAATAAAAGAAAGTTATAATTGTCTTATAAACG >strand - ydeE similar to transcriptional regula CGTTTATAAGACAATTATAACTTTCTTTTATTGATGCCTTTTTATGAACAGCATGCTTCAATAAAACCTGTACAAATACAAGTGAATCACTTACTTTATTTCATATAAGTCATCCTGCAAAACTGTGCACAGAATGTCGGATAAATACATTTCACTCCTGTTATTCTTTGGTTAAAGGAGGCCAAAT >strand + ydeL similar to transcriptional regula TAATCACTCCACATATCTTTCTTGTTCATTGTCTGTATTTTCTTAACAAAACCAGCAGCTAAGGATTACAATGACTATAAACTGGAATTGGCCTTTTATAAACAGCCAGTTTTCATAAAAAAAT >strand - ydeK similar to hypothetical proteins ATTTTTTTATGAAAACTGGCTGTTTATAAAAGGCCAATTCCAGTTTATAGTCATTGTAATCCTTAGCTGCTGGTTTTGTTAAGAAAATACAGACAATGAACAAGAAAGATATGTGGAGTGATTA >strand + ydeO similar to hypothetical proteins AATCAAACCTCCTTTTTTAAGTATCATATCAGAATAAATTAGGATTTTCGGTGTTGAAGTTTGAGGGCGATTCACGATCTGAAGTGAAAGGGGCCGCTTGCTCAACAGGCGGCACGGTACATGAATGTTCTAAGTCAAAGAGGGTTACCTTACTGTTAGTTACTTTATTTAAAAAAACGCATGTTTTTAGCTGAAAATATCTATTATGGTATATTTAAATATAA >strand - ydzF similar to hypothetical proteins TTATATTTAAATATACCATAATAGATATTTTCAGCTAAAAACATGCGTTTTTTTAAATAAAGTAACTAACAGTAAGGTAACCCTCTTTGACTTAGAACATTCATGTACCGTGCCGCCTGTTGAGCAAGCGGCCCCTTTCACTTCAGATCGTGAATCGCCCTCAAACTTCAACACCGAAAATCCTAATTTATTCTGATATGATACTTAAAAAAGGAGGTTTGATT >strand + ydeQ similar to NAD(P)H oxidoreductase GTTCACTCCTAATAGTATACTTTTTGATACTATATACATTCAATGTGCGTACTTTTTATATTTTAATATACATCATATAA >strand - ydeP similar to hypothetical proteins TTATATGATGTATATTAAAATATAAAAAGTACGCACATTGAATGTATATAGTATCAAAAAGTATACTATTAGGAGTGAAC >strand + ydeS similar to transcriptional regula AATGAACTCCTTTTTAAGTGGTTTTTTGAGTGTAATCATGACTTCTCACAAGTGATTTTATAAACTGAACGTTCAGTTTCGTCGATTAATAAAAATAATATACTAAACGTTCAGTTTTGTAAATGGATTTCTGTTTTCTTTTTTTATTCAATTTAAGTATACTGAACGTATAG >strand - ydeR similar to antibiotic resistance CTATACGTTCAGTATACTTAAATTGAATAAAAAAAGAAAACAGAAATCCATTTACAAAACTGAACGTTTAGTATATTATTTTTATTAATCGACGAAACTGAACGTTCAGTTTATAAAATCACTTGTGAGAAGTCATGATTACACTCAAAAAACCACTTAAAAAGGAGTTCATT >strand + ydfD similar to transcriptional regula TCTCACTCCTCTTTACAAAAACCAACCAGTTATTTAATGTGTTTCTATACTAATAAAGATTTGATGATATGGCTTCAACCAGTTCAATAGATTTTGACCCAA >strand - ydfC similar to hypothetical proteins TTGGGTCAAAATCTATTGAACTGGTTGAAGCCATATCATCAAATCTTTATTAGTATAGAAACACATTAAATAACTGGTTGGTTTTTGTAAAGAGGAGTGAGA >strand + ydfF ydfF TTTTTCCTCCTTGTCACGCTTACATTTATATTGTAAAGGATTAACATTTCAACTATCAT >strand - ydfE ydfE ATGATAGTTGAAATGTTAATCCTTTACAATATAAATGTAAGCGTGACAAGGAGGAAAAA >strand + ydfH similar to two-component sensor h CTTTTCGGTCCGGATTCCTTTTCTCTTTAATGATAAAATTGTGAGATCAATCGATTTCAATGATGATCATTCCTTCAATGAATAGTAAGTTCCTTATGATTTTAAAGGTAAATTTGAAAAGATAAAACGTACATAGAGGCAGAACGTTCATGTACGTTTGGGCAATGATCACTTGTTGGAAACTCTAATGTTTCCCTGCACCTGATATTGTTGTGTTCCTTCTCTTCCTATAAAATGATTGGATACATGTATGTTTGCACGTTGAAATATGGGTTCGATTTTCATGAATCTCTATTCGGT >strand - ydfG similar to hypothetical proteins GACAAAAATGCGCAACCAAAAAGCAAGGAAAAAGAGGAATGAACATGTATTGGTTTCTAGGTGCTTTATTATATTTTCTGATCGGTACTTATATATTCATTAGAGTCACTAGAGACAGCCAATCAGGCTCATGGATACTGCTTGCATTAGCAGCTCCACTCATCATTGCTGGCTACCCTTATTTTTATTCAAAGAAGCTTCTTTCCAAAAGACGCTGAAACAGCTGTCATTCAAACCGCACATTGGTAAAATGATCAATACCGGGATACAATCTAGGTATTCTATGGAAAGGAGTCTTGC >strand + ydfN similar to hypothetical proteins GTGACAGAAAAAAAGTTGGAGCTATGCTAAAAAGGTCATACAGCAAACAAATGAGTTCTTATGTAACAACAGTGATGATTCGCTATTAAAATGCTGCCGTCAGTGAAAAATTTCCACCTAATTTTCAAAAACCTATTGACATGAGAAGATCTGTATTTTACTATTATCTCATATTCAAGATATTTTAATTTGAGATATTTGGTCGGAAAGATCAGCGATCAAGCGCAACTCTAAGTGTTCAATCGTTGTCAGTTCTTTTACACAATATTCAATATGATTTTTTATAACATAATATCTCGA >strand - ydfM similar to cation efflux system TACAGATCTTCTCATGTCAATAGGTTTTTGAAAATTAGGTGGAAATTTTTCACTGACGGCAGCATTTTAATAGCGAATCATCACTGTTGTTACATAAGAACTCATTTGTTTGCTGTATGACCTTTTTAGCATAGCTCCAACTTTTTTTCTGTCACCTTATTTATTAGTAAACAGGAAACAACGTTGCTATAGACCCACTCGAAATTGAATTGGTATGATAAACAAAGTGATTACACAGGAAAGGATACGGAGAGCGTATATAGAGCACTCCTATTTTTTTGTGCAGTTTTATAGGAGGAA >strand + ydfS similar to hypothetical proteins GTCCTGAACCGAACAATATTTTTGGTATAGATGGATATATTAAGCCAAACAATGATAGCAACAGGAGTTTTTTGGCCGAAAC >strand - ydfR similar to hypothetical proteins GTTTCGGCCAAAAAACTCCTGTTGCTATCATTGTTTGGCTTAATATATCCATCTATACCAAAAATATTGTTCGGTTCAGGAC >strand + ydgC similar to hypothetical proteins ATCACACCCTTCCATCTTTCTGTACAGTACATTATGCGCCGCTGCACCCAATTGTTCCTTGCATTTACGGGCAAAACTCTATCTGGCCCCGGAGTGATGCTCAATTATCAGTGAATGAAAAGCCATTGATATAGGAATCAATGAATTGATTCAGTTATTCGAAATGCATGAGACCTTTTAATAAAGGAAGCTGGCAAACAGGATCAGTTTGAAATAATTGACAATGAAGCTAATGCCATGAAATAATACTCGTGAGTAACATTACTCG >strand - ydgB similar to hypothetical proteins CGAGTAATGTTACTCACGAGTATTATTTCATGGCATTAGCTTCATTGTCAATTATTTCAAACTGATCCTGTTTGCCAGCTTCCTTTATTAAAAGGTCTCATGCATTTCGAATAACTGAATCAATTCATTGATTCCTATATCAATGGCTTTTCATTCACTGATAATTGAGCATCACTCCGGGGCCAGATAGAGTTTTGCCCGTAAATGCAAGGAACAATTGGGTGCAGCGGCGCATAATGTACTGTACAGAAAGATGGAAGGGTGTGAT >strand + ydgG similar to transcriptional regula AAAATTCCCCCTTTCGTGTGTATAGCTTTCATTATACACGAACATGAGTTCTTATGTAGAGTGCTCGAAAATAAAATTTGTTGACATATATAGTTTAGAGGAATAAACTAATTGACATTGAACATAAAGTTTAGCGGG >strand - dinB nuclease inhibitor CCCGCTAAACTTTATGTTCAATGTCAATTAGTTTATTCCTCTAAACTATATATGTCAACAAATTTTATTTTCGAGCACTCTACATAAGAACTCATGTTCGTGTATAATGAAAGCTATACACACGAAAGGGGGAATTTT >strand + ydgK similar to bicyclomycin resistanc ATATACAACGATTGATTTCATCTTGCAAGCCATTTGCACTTTTGAACAACCTCCCAGCGCATAAAATTCGCTAGGCTAGAACTTAGCAAAAAAAGCTGGCCGCTTCTTCAGATGATGCTGGTCACGATGATTCCCTGCATCATTCCCATTTATAGGTGTTCGGAATATCACTTTGCCGGCCCGTGCTCAAGCGGCTTTCACAGGCTGAAAGTTATGATACAATATACAAATGTTTCACAGCTTCAATCCATAGTTGGAGCTGACATCCATTATATAAGGG >strand - ydgJ similar to transcriptional regula CCCTTATATAATGGATGTCAGCTCCAACTATGGATTGAAGCTGTGAAACATTTGTATATTGTATCATAACTTTCAGCCTGTGAAAGCCGCTTGAGCACGGGCCGGCAAAGTGATATTCCGAACACCTATAAATGGGAATGATGCAGGGAATCATCGTGACCAGCATCATCTGAAGAAGCGGCCAGCTTTTTTTGCTAAGTTCTAGCCTAGCGAATTTTATGCGCTGGGAGGTTGTTCAAAAGTGCAAATGGCTTGCAAGATGAAATCAATCGTTGTATAT >strand + ydhC similar to transcriptional regula TTTTCCTCCATTGAAAAGATGTAAAAATGTAATCGTGCGCTATTCTGGCGGAGGTATGGTGTCCTATTGACGAAGAAAAACACGCCGTTTTTTCAGTTGCCAGTAGTCCGTCGACCGTCGTTCAGCAAACATATCGTACCTCAAATTCAGGGATCTGACAAGGCAGTTTGTTGCTTCCCATTACCATCTGTTATATGATTTTGATGAAGAAAAGCCCT >strand - ydhB similar to hypothetical proteins AGGGCTTTTCTTCATCAAAATCATATAACAGATGGTAATGGGAAGCAACAAACTGCCTTGTCAGATCCCTGAATTTGAGGTACGATATGTTTGCTGAACGACGGTCGACGGACTACTGGCAACTGAAAAAACGGCGTGTTTTTCTTCGTCAATAGGACACCATACCTCCGCCAGAATAGCGCACGATTACATTTTTACATCTTTTCAATGGAGGAAAA >strand + ydhG similar to hypothetical proteins GACAACCCAATTATATTTTTAAACTGTTAAGCTAGTATTAATCGATTTTAAAGATTGGATTTACAAGAATAATGAACTGGCACCCTTAAATAATGGTTATTCTTTTATGAGAAGTTTTAGAAGGAAAGAGCGATAGTACTAAAGAAATAAATTGGAGGAATAGATACTGGCCGTGGATAGGCGGACTGAAATCTGCTATAGTCGCATAGAAGCAACCCGCTTACATCTTGAACAATTTAAAGTATTACGTTGGAAAGAGGTATTGTGTAAGCACATGTCTGAGCAGTTCATATGGAGTGG >strand - phoB alkaline phosphatase III TACACAATACCTCTTTCCAACGTAATACTTTAAATTGTTCAAGATGTAAGCGGGTTGCTTCTATGCGACTATAGCAGATTTCAGTCCGCCTATCCACGGCCAGTATCTATTCCTCCAATTTATTTCTTTAGTACTATCGCTCTTTCCTTCTAAAACTTCTCATAAAAGAATAACCATTATTTAAGGGTGCCAGTTCATTATTCTTGTAAATCCAATCTTTAAAATCGATTAATACTAGCTTAACAGTTTAAAAATATAATTGGGTTGTCATTGAGATTCATCTATATTTAGGAGGTTATC >strand + ydhJ similar to hypothetical proteins ACTCCTCTATTGAAAGATTGCATATGTACTATCAATATAAACAATATATAAC >strand - ydhI similar to hypothetical proteins GTTATATATTGTTTATATTGATAGTACATATGCAATCTTTCAATAGAGGAGT >strand + ydhM similar to cellobiose phosphotran ATATTATTGAGGTTATACAAGTGATAATAGCTATTTAATTGATTCGTTTCCGTTGATGAGTGTACCACATTATGAATGAATCTTCCATAGAAAAATTTATAAAACGGTGAACCGGAAAAAAATATGATTATATTTATTTTATAAAAGTATAGACATTTAAAATTAAATGACTATAATAATCAATGTAA >strand - ydhL similar to chloramphenicol resist TTACATTGATTATTATAGTCATTTAATTTTAAATGTCTATACTTTTATAAAATAAATATAATCATATTTTTTTCCGGTTCACCGTTTTATAAATTTTTCTATGGAAGATTCATTCATAATGTGGTACACTCATCAACGGAAACGAATCAATTAAATAGCTATTATCACTTGTATAACCTCAATAATAT >strand + ydiA alternate gene name: ydxA; simila CCCGCAACCAAATATATGGTTAAATAAGGAGTGCCCTAAGGGTGCAACCAAAATAATCCTATACAAATATGGTCCGGTAGTTCAGTTGGTTAGAATGCCTGCCTGTCACGCAGGAGGTCGCGGGTTCGAGTCCCGTCCGGACCGCCATTAATGTAAACGAAACGATCATGTTTTCGTTTTTTTTGTGTTTTCTTTTACTGTATCTTCTGCTTGGTGTCATAGCCAAGCTTTCTTTATATCGGGAACATCTTGGATATTCCAGCAAAATTCGATAAACTAGGAAGAGCACAAATAGGCATA >strand - ydhU ydhU TTACGGAGAAATGCGTACAATGATGCAGTTTTTTTCCAAAGCTCTAATTTCAGAGGAAAGCAGAAACAGTACCGCGATCTGCTTCGCGGCATTTTTTTAGAGGAGACTGCCCACGTTGAACTGGTTCAAAACACAATCAATGCTTTATTAGATGAATCAGGCGGTGAAGGTGTCGGCAGTCAAGGTACAGATCAAGCGCCACTTGATGAAGCGGTTAAACATGCTAATCCTCATCATTATATTATTGGAGCGCAAAGTTCCCTTCCTGTTGATGCAGGGGGCAATCCGTGGAACGGGTCG >strand + ydiG similar to molybdopterin precurso ATTTCACCTCTGCTATAATAAGTTAAGTGTAGCTTATGAACCAGAAGTCAGCAAACAATACATTGGCCATATAAGACATAGAGAAGAAAATAGTG >strand - ydiF similar to ABC transporter (ATP-b CACTATTTTCTTCTCTATGTCTTATATGGCCAATGTATTGTTTGCTGACTTCTGGTTCATAAGCTACACTTAACTTATTATAGCAGAGGTGAAAT >strand + groES class I heat-shock protein (mole GTATGTACTCCTTTGTTAAGTGGGTTTCGTTCATCTACAGCTATTGTAACATAATCGGTACGGGGGTGAAAAAGCTAACGGAAAAGGGAGCGGAAAAGAATGATGTAAGCGTGAAAAATTTTTTATCTTATCACTTGAAATTGGAAGGGAGATTCTTTATTATAAG >strand - ydiL ydiL CTTATAATAAAGAATCTCCCTTCCAATTTCAAGTGATAAGATAAAAAATTTTTCACGCTTACATCATTCTTTTCCGCTCCCTTTTCCGTTAGCTTTTTCACCCCCGTACCGATTATGTTACAATAGCTGTAGATGAACGAAACCCACTTAACAAAGGAGTACATAC >strand + gutB sorbitol dehydrogenase GCCTGCACCTCCTATTAGTACTATTTTATCAATCCAGCGTTTTTTGTATATGAAAAAATAAAAGTACAGTGCCGCTGTCCTTTTATACAGCAGGAAAGGCTGTTGAACGTGTTAAAAAGCAGATAAAATGGGGGCAGTAACAGAGAAAACAAAAATGTATGCACTTACATT >strand - gutR transcriptional regulator AATGTAAGTGCATACATTTTTGTTTTCTCTGTTACTGCCCCCATTTTATCTGCTTTTTAACACGTTCAACAGCCTTTCCTGCTGTATAAAAGGACAGCGGCACTGTACTTTTATTTTTTCATATACAAAAAACGCTGGATTGATAAAATAGTACTAATAGGAGGTGCAGGC >strand + ydjK similar to sugar transporter TTATATTGACTGAAAGCGTTTTTATAATTATGATAATATCAGATATAACAAAAATGTCACTATAACATCACTACAATTGAACTATTAAGTGACTAATAAATACCAAATGAACGAGGTTATGCACAAAGAAATGACCATTGTTCTTTTAACGAGAAATGAAATCGCTTCCATAGGGATTGACATGCTAAGAAGAAGGGAAGGTTATGAGGTTATTGGATAAAAACATGAACAACTATTTTCTGAAGCCTGCTTACGAAATGTGAGGAAGGCAGCAGCCCTGGAATAAGTTTTTGACATACG >strand - ydjJ ydjJ ATATCTGATATTATCATAATTATAAAAACGCTTTCAGTCAATATAAATTTCTGAAAATTTTTTACTCTATAAAAGGAAAATCACCAATTCAAGGGGAAAATGAACGTTTTTCTTCATGAAAATGTGTGAATCAAGTCAAAAATCTTGTGATTTTCTGTTCATATTGAAAAATGAGTAAGTGTTGGGTAACTCCCACCTATTTTTCTCTGTAAAATCAAACATTCGTTCAATTGTTTCAGACTCAGGATAAAAGGAAATAGAGAAGAAAATAAAAGAAGGCTGACTTAAGGGAGGCAACCA >strand + ydjM alternate gene name: yzvA; simila TATAATCTACGTTTTTTTGAAAAAAAGAAACTTTAAAGAAAAAAGGGTCTCGCACACTCTTTTTTCTTTATATGAGAGTATCAATCGTTTACATAGCTATGAATTTGGTTTGAACAATACGGTAATGGTTCGTTCATATTGAATGCGACAACTACTCTATCAATAATTATAACTCTAATACTGCTATTATAATAGGATAATCTTCCATGGTTTATTTTAGCAGAGGTAACTTTTTTGACTTTGTAACATTACTGTAAGGATATTGAAATAAAAAATAGCTGGTTGATCGTGTATAATCTT >strand - ydjL similar to L-iditol 2-dehydrogena TTATAATAGCAGTATTAGAGTTATAATTATTGATAGAGTAGTTGTCGCATTCAATATGAACGAACCATTACCGTATTGTTCAAACCAAATTCATAGCTATGTAAACGATTGATACTCTCATATAAAGAAAAAAGAGTGTGCGAGACCCTTTTTTCTTTAAAGTTTCTTTTTTTCAAAAAAACGTAGATTATATTATATAAAAATTTCATTTCTTTTTTGGTTTTAAAAATGGCGATTTTGTGAACAGTTCATATTGACCGGTGCGGAATTACTCACATCAAAAGTTATAGGAGTGGTAAT >strand + yeaB alternate gene name: ydxT; simila TCGAAAATTCGTGTAAGAAAATATACCAGCTTATTTCAAATTTGTAGTGTTTTGTTTGCGCTTTCAGCGAATGTGTGGAAAAAATGTTTCGTTTTTTATTCGCTGTGTGATTTTGCGTAATATTTACAATTCGATAGAATGCGAGAAGCACTGGTGTCCTTGGGTTTGATGGGATTTTTGGCGGAGCAAATAAATTGGATGAGAGAAAAAGTGTTCCTTCGTCACTATTTTTCTGTGGCGGGTGAATCATGTTTCTGTAGACCCACTGCGGATCAGCTTTGTAAGATAATAATAGGAAAA >strand - gabP gamma-aminobutyrate (GABA) permea GAAAAATAGTGACGAAGGAACACTTTTTCTCTCATCCAATTTATTTGCTCCGCCAAAAATCCCATCAAACCCAAGGACACCAGTGCTTCTCGCATTCTATCGAATTGTAAATATTACGCAAAATCACACAGCGAATAAAAAACGAAACATTTTTTCCACACATTCGCTGAAAGCGCAAACAAAACACTACAAATTTGAAATAAGCTGGTATATTTTCTTACACGAATTTTCGACAAACTGTATATTTATGATAGAGTTTCAATTATCATAATATTCAGTAATGATTGCATTTTAAAAGGA >strand + yecA similar to amino acid permease CTCCTTTACATATTTAGGCGAATGAATGAAAATTCCTTTGATCGTAAGGTAATGCTGTTGATTTAACTTCATTGTAGCATATGGATGCCGTCTTATTTTTTCTATAATAAAGC >strand - yezC similar to transcriptional regula GCTTTATTATAGAAAAAATAAGACGGCATCCATATGCTACAATGAAGTTAAATCAACAGCATTACCTTACGATCAAAGGAATTTTCATTCATTCGCCTAAATATGTAAAGGAG >strand + yerE similar to hypothetical proteins CCTCCCTTTTTATGTAACCATTTCCACAGTAGAACACTTTCCAAACAATAGCAGAAGGAATCTGGTGAAAAATGTGTGCTCCTGCCGATACAGGACTCACCATGCAGGTACTGCCATATGCTCTCTTTTTTTATGAAAAAAGAATCAAAACACCTTTTTCAGTGAATAACGAGATTAACGGCTTTTTCCGTTTTTCCGCGCTATGTGAATTTGTTATAATGTTTAAATGGTAAC >strand - yerD similar to glutamate synthase (fe GTTACCATTTAAACATTATAACAAATTCACATAGCGCGGAAAAACGGAAAAAGCCGTTAATCTCGTTATTCACTGAAAAAGGTGTTTTGATTCTTTTTTCATAAAAAAAGAGAGCATATGGCAGTACCTGCATGGTGAGTCCTGTATCGGCAGGAGCACACATTTTTCACCAGATTCCTTCTGCTATTGTTTGGAAAGTGTTCTACTGTGGAAATGGTTACATAAAAAGGGAGG >strand + yerL alternate gene name: yedA; simila GAAATTCTTTTGCGGGTGCACTTGGTAACTTAACCTAACAAAGTATTCCCGGCCATTCAAGCGATGAAACATTGATCATAAGCTGGAAATACCGTTGCAAACCTAGAAGGGAATAAAGAATAAAACGCTTTCAAAAAATAATTAAAGTAAAAATATTCAGAAAAATATTTCGTTTCTCCTTCTCCTCATTTTAGTATAAAATATATAGGGTATTGTTTCGAAAACACAGGCCTGTCTCAAGGCGTTTTGTTGCTTTAAAGGGCTTGTTTTTGATATGATCAGTATTATATGACTTAACGG >strand - opuE proline transporter AACAATACCCTATATATTTTATACTAAAATGAGGAGAAGGAGAAACGAAATATTTTTCTGAATATTTTTACTTTAATTATTTTTTGAAAGCGTTTTATTCTTTATTCCCTTCTAGGTTTGCAACGGTATTTCCAGCTTATGATCAATGTTTCATCGCTTGAATGGCCGGGAATACTTTGTTAGGTTAAGTTACCAAGTGCACCCGCAAAAGAATTTCTTTTGTTTTGCACTTACGCCAGGCTGCTTAACTGGCACCATCAATTTGAGAGGTGATGCAACATTATTAGAAATGAAGAGAGG >strand + yerP similar to acriflavin resistance TTCCCCTTAATCCTTGAAAAATTTAGTAAAATTTTCTGGTATATGAGTTGACGTTGTTGCGT >strand - yerO similar to transcriptional regula ACGCAACAACGTCAACTCATATACCAGAAAATTTTACTAAATTTTTCAAGGATTAAGGGGAA >strand + yeeG yeeG TCCCTTCTTTTGTTCGCTCAATATCAAAATTTTACCACGGGTCGGGATAAGGTGGGGTGAGAACGTGTTTTGGTGGAAAAATGAGTCATTTGGCATGTGTCTTTA >strand - yeeF similar to hypothetical proteins TAAAGACACATGCCAAATGACTCATTTTTCCACCAAAACACGTTCTCACCCCACCTTATCCCGACCCGTGGTAAAATTTTGATATTGAGCGAACAAAAGAAGGGA >strand + yetI similar to hypothetical proteins AAACCTCCAAGTTGAAAAGATAATAATACTATAACCTGTTTACGGCCGAAGTAACATAAAAAAATCAGCCGGGAAACCGGCTGATTATTAAAATGAGCAGCTTACAACATCGATGTGATTTGTGTCTAAGGCGAAATAAACCCAGCGGCGCCGTGAGGAATCCCAATAAAAGCCGCCGACGGATCTTCTGCCAAGGATGATCGGGTAAAACCAAAATGAGCGGCCATTGGTTAACCAGACATAGGTAAA >strand - yetH similar to hypothetical proteins TTTACCTATGTCTGGTTAACCAATGGCCGCTCATTTTGGTTTTACCCGATCATCCTTGGCAGAAGATCCGTCGGCGGCTTTTATTGGGATTCCTCACGGCGCCGCTGGGTTTATTTCGCCTTAGACACAAATCACATCGATGTTGTAAGCTGCTCATTTTAATAATCAGCCGGTTTCCCGGCTGATTTTTTTATGTTACTTCGGCCGTAAACAGGTTATAGTATTATTATCTTTTCAACTTGGAGGTTT >strand + yetM similar to salicylate 1-monooxyge ATTAGCTCCCGTACATTCGTTGTGACACGATTATAAAGGAGGGAAAACCATAACGCAACTTGGCTTGGAGATGATTGACAGCTTTTCTTGGGACATAGTAAGGTAATATAGTTAGGCGCCTAACTAATACAT >strand - yetL similar to transcriptional regula ATGTATTAGTTAGGCGCCTAACTATATTACCTTACTATGTCCCAAGAAAAGCTGTCAATCATCTCCAAGCCAAGTTGCGTTATGGTTTTCCCTCCTTTATAATCGTGTCACAACGAATGTACGGGAGCTAAT >strand + yetO alternate gene name: yfnJ; simila AAACTCACCTTATTCGCGTATTGTACAGCTTCTCAAGTATAGAAAAAACAGCGATCATGCGTCAATTGGGAGAACGGAATAATCATGACCATGAAAGGGAAATGTAGTCAAATAGAAGG >strand - yetN yetN CCTTCTATTTGACTACATTTCCCTTTCATGGTCATGATTATTCCGTTCTCCCAATTGACGCATGATCGCTGTTTTTTCTATACTTGAGAAGCTGTACAATACGCGAATAAGGTGAGTTT >strand + yfmT similar to benzaldehyde dehydroge AGTTCCTCCTAGAAATATTCAAAATATTCTAATCATTATATGCAGTTCTTGTTGTATTATATTGTCGAAATTTGACGTAGTCAATAAAATATTTTTCTGCTTTAAATGATAGAAGCGAAACAGCGTTGTCGTATCATAGGATAGTAATTTTTACTAATCGTATATTCAATTTTATGCAAGATGAACCGATAGAAAAAATAGATTCGCCCATATTTTGAT >strand - yfnA similar to metabolite transporter ATCAAAATATGGGCGAATCTATTTTTTCTATCGGTTCATCTTGCATAAAATTGAATATACGATTAGTAAAAATTACTATCCTATGATACGACAACGCTGTTTCGCTTCTATCATTTAAAGCAGAAAAATATTTTATTGACTACGTCAAATTTCGACAATATAATACAACAAGAACTGCATATAATGATTAGAATATTTTGAATATTTCTAGGAGGAACT >strand + yfmL similar to RNA helicase ACACCTCTATCGTTATCAAACACGCACAATCGGCGTGTAATATTCACCCTTTTGATTATAAAGGAGGATTCGGAATGTTGAAAGGTATAGGAAAGAAAAAGAAAAATTGCGTTCCGGGCGAGGTTTTGAGATACTATGGAAT >strand - yfmM similar to ABC transporter (ATP-b ATTCCATAGTATCTCAAAACCTCGCCCGGAACGCAATTTTTCTTTTTCTTTCCTATACCTTTCAACATTCCGAATCCTCCTTTATAATCAAAAGGGTGAATATTACACGCCGATTGTGCGTGTTTGATAACGATAGAGGTGT >strand + yfmH yfmH ATCCTCCCTTGGAATAATTTTTCTCGGTAAAGAGCTTAAACATATTTAAACACCTTAAAACAACAAAAGTAAAGAAAAAATTTCCTTGTAAACTATTTACTTTTATTCTATTTATTGTTATTGTTTATTTAAGTTCTCTGACTATTCTATCAATATTCAGGGAACCA >strand - yfmI similar to hypothetical proteins TGGTTCCCTGAATATTGATAGAATAGTCAGAGAACTTAAATAAACAATAACAATAAATAGAATAAAAGTAAATAGTTTACAAGGAAATTTTTTCTTTACTTTTGTTGTTTTAAGGTGTTTAAATATGTTTAAGCTCTTTACCGAGAAAAATTATTCCAAGGGAGGAT >strand + yflF similar to phosphotransferase sys ATTCCCGCTTTCTTTTTAAATAATCTAGATTAAGCTTACCACAACTGTCTTAAAAATAGGAAACACACGGACCTGGGAAAAAAGAAATACCCCCGGGAAAATTGGTATAGATCACTAGATATCTTATATGGTATATTTGAA >strand - yflG similar to methionine aminopeptid TTCAAATATACCATATAAGATATCTAGTGATCTATACCAATTTTCCCGGGGGTATTTCTTTTTTCCCAGGTCCGTGTGTTTCCTATTTTTAAGACAGTTGTGGTAAGCTTAATCTAGATTATTTAAAAAGAAAGCGGGAAT >strand + yflD yflD CACTCCTTTTTTCCGATCCAGTTCGAGCGAAACGATAGAAAAAACGAACGTATCCAATATAACACGAATC >strand - yflE similar to anion-binding protein GATTCGTGTTATATTGGATACGTTCGTTTTTTCTATCGTTTCGCTCGAACTGGATCGGAAAAAAGGAGTG >strand + treP phosphotransferase system (PTS) t CTCACCCTGCTTTTTAATGTCATATTCAATAAGTGCTTGTGACTTTTAGGGTAACCTTGTCTGTTTCATATATTCCAGACTAGGATGAAAGCGCTATAAAAAAGTGTTGACTACCTGTATATACAGGAATACAATATGATTATAAGTTGTATATACAAGTTA >strand - yfkQ similar to spore germination resp TAACTTGTATATACAACTTATAATCATATTGTATTCCTGTATATACAGGTAGTCAACACTTTTTTATAGCGCTTTCATCCTAGTCTGGAATATATGAAACAGACAAGGTTACCCTAAAAGTCACAAGCACTTATTGAATATGACATTAAAAAGCAGGGTGAG >strand + yfkM similar to hypothetical proteins CTCCACCTTTCAACGAAATTTGTGTTACAGCTCCTATTGTAAGAATATTTTGTAACTATTTGTAGGTGTTTCTTTATGATTATTGTAAAAGGGAGAAAAATGAGTATTATGTAGCAGTATCAAACGACAAATACTGGGCATTTATGCTTATCGTTTGTCACAATCGCGGAATGTTTATTTTAGTGTGGCTGGGGTAGAGTGTTTTATGTAGA >strand - yfkN similar to 2',3'-cyclic-nucleotid TCTACATAAAACACTCTACCCCAGCCACACTAAAATAAACATTCCGCGATTGTGACAAACGATAAGCATAAATGCCCAGTATTTGTCGTTTGATACTGCTACATAATACTCATTTTTCTCCCTTTTACAATAATCATAAAGAAACACCTACAAATAGTTACAAAATATTCTTACAATAGGAGCTGTAACACAAATTTCGTTGAAAGGTGGAG >strand + yfkJ similar to protein-tyrosine phosp TCCATCCTTTCTATGCGCAATATAGGTATGTTTTATTCTAACAGAATTCCATCAGAAAAAAACCCTATTCTAAAAGCAGGCTGTTCATTCCAAAAAAGTGAAGACATCAGTTGAAAAGAAAATGAACATCCTACTAAGATATTCATGAAGGTTTCTTTTTAGAGAAATAGGG >strand - yfkK yfkK CCCTATTTCTCTAAAAAGAAACCTTCATGAATATCTTAGTAGGATGTTCATTTTCTTTTCAACTGATGTCTTCACTTTTTTGGAATGAACAGCCTGCTTTTAGAATAGGGTTTTTTTCTGATGGAATTCTGTTAGAATAAAACATACCTATATTGCGCATAGAAAGGATGGA >strand + yfkE similar to H+/Ca2+ exchanger CAGTTCTCCTTATATGGAATCTATGTTGATCGTACAACGTTTTCCAAAAGCAGGCAACCTGAAAAAAGCCTATATCACGCTATACATTTGCCCTCGTGTAATCATGAAGGTTTTTGTGAAGAATAAAAATAAAACAA >strand - yfkF similar to multidrug-efflux trans TTGTTTTATTTTTATTCTTCACAAAAACCTTCATGATTACACGAGGGCAAATGTATAGCGTGATATAGGCTTTTTTCAGGTTGCCTGCTTTTGGAAAACGTTGTACGATCAACATAGATTCCATATAAGGAGAACTG >strand + yfjT yfjT TCTAACTCCTTTATAAATGGGCATACTAACGCTGCCAACAGTTAATTTTAGACCTGATTTGCCAAGATCGCAAGTAGCGCGGCGATTGAAAAACGGCAAAAAATGAGCTACCAT >strand - yfkA yfkA ATGGTAGCTCATTTTTTGCCGTTTTTCAATCGCCGCGCTACTTGCGATCTTGGCAAATCAGGTCTAAAATTAACTGTTGGCAGCGTTAGTATGCCCATTTATAAAGGAGTTAGA >strand + yfjP similar to DNA-3-methyladenine gl ACCCTCCACCTGCCATTATATCATGCGGTCTTTATGAGAAAAGTATACGGTACTCTCTGCCTAATTCCAGCGGCATGTTATACTGAAC >strand - yfjQ similar to divalent cation transp GTTCAGTATAACATGCCGCTGGAATTAGGCAGAGAGTACCGTATACTTTTCTCATAAAGACCGCATGATATAATGGCAGGTGGAGGGT >strand + glvA 6-phospho-alpha-glucosidase CTGCCTTTTCTAAATTCACGCACAATTGGATGTTTTATATAAATGATTATAAATAATTCGGCATGTATCCGAATCGTACAAAAGAACCTTTTCATAAGAATTGGAAGGGCGTATATTCACTTAAAATTCACAGTTGGTGAGACTTTAAGATTACAAAAAAGGTAAAAAAACCAAATCTCTCAGACATAAGGCAAATGAGAAATTTCCCGCTCTATGGGAAAAAACACTAAAGTTGATCAAATGACCTAAGTGCGCCAAACGTGTTACGGGACGAGCTATCTCATGGTATAAATGGAATTG >strand - yfjA yfjA CCATTTATACCATGAGATAGCTCGTCCCGTAACACGTTTGGCGCACTTAGGTCATTTGATCAACTTTAGTGTTTTTTCCCATAGAGCGGGAAATTTCTCATTTGCCTTATGTCTGAGAGATTTGGTTTTTTTACCTTTTTTGTAATCTTAAAGTCTCACCAACTGTGAATTTTAAGTGAATATACGCCCTTCCAATTCTTATGAAAAGGTTCTTTTGTACGATTCGGATACATGCCGAATTATTTATAATCATTTATATAAAACATCCAATTGTGCGTGAATTTAGAAAAGGCAGGGGAC >strand + lipB lipase CCTCCGTGTCTGGCTGTTTCGCAAAACAGATGAGCCGGTGCTCAACTGTGATTCTAACTGCCTATTCCCCTTATTATAGAAAAGGGAGGAAGGGAACACAACCGGGCTCTTTGGAAATGAAGATAAAAGGTTTGTCAGAAGGTATCCGATTTATCTCCGTCAATATAAAAGAGATTTAAATTTAATTTTCAGAAAAAATAGTAACGATATTGATTTTTTATATATAATAGGAAATTGGTGGT >strand - yfiO yfiO ACCACCAATTTCCTATTATATATAAAAAATCAATATCGTTACTATTTTTTCTGAAAATTAAATTTAAATCTCTTTTATATTGACGGAGATAAATCGGATACCTTCTGACAAACCTTTTATCTTCATTTCCAAAGAGCCCGGTTGTGTTCCCTTCCTCCCTTTTCTATAATAAGGGGAATAGGCAGTTAGAATCACAGTTGAGCACCGGCTCATCTGTTTTGCGAAACAGCCAGACACGGAGG >strand + yfiT yfiT ATCCTCCTGATTATTAAAATGACACTAGTGTCGTTTTTAATTGTATGGGAAAAAGACACAATATACAATCTTTTTCTTGTAAAATAAAGGAGA >strand - yfiS similar to multidrug resistance p TCTCCTTTATTTTACAAGAAAAAGATTGTATATTGTGTCTTTTTCCCATACAATTAAAAACGACACTAGTGTCATTTTAATAATCAGGAGGAT >strand + yfiW yfiW CACTTCCTCATCATTCACTCCTCTTACCATATTACTCGATTTTCACTCCTGAAAGAAAGTGAGCAGAAACCGCATGCGGGACTTTGTCCGAAAAATAACGGTAGTGTATAATGAGTTGGAGATTATTGGGGGAGAGGATCG >strand - yfiV similar to transcriptional regula CGATCCTCTCCCCCAATAATCTCCAACTCATTATACACTACCGTTATTTTTCGGACAAAGTCCCGCATGCGGTTTCTGCTCACTTTCTTTCAGGAGTGAAAATCGAGTAATATGGTAAGAGGAGTGAATGATGAGGAAGTG >strand + yfiZ similar to iron(III) dicitrate tr TCCTCCCAATATTGAAATTCATTATCATTTAGATCATAATAAGCAGTGTTAAGAGTGTCAATCCCTAATTGAGGATTATTCTCAAAAACAAACATTACAT >strand - yfiY similar to iron(III) dicitrate tr ATGTAATGTTTGTTTTTGAGAATAATCCTCAATTAGGGATTGACACTCTTAACACTGCTTATTATGATCTAAATGATAATGAATTTCAATATTGGGAGGA >strand + yfhG yfhG TCTCTCCTCATCGTTCTAATAAATATACTTTACCTCATTGTAATAAAAGAAAACGCGT >strand - yfhF similar to cell-division inhibito ACGCGTTTTCTTTTATTACAATGAGGTAAAGTATATTTATTAGAACGATGAGGAGAGA >strand + yfhQ similar to A/G-specific adenine g ACCTCCTTGCAGGGTGATCCTGCACATTCACTTTGTAAGAAATTCCCCATTTTCATATACCCAATTTTTAAACTGATTCAATCGTTCTCATATGTTTCCGCTTTTTCTTTATTACTATTTAATGTAACATATTATCGTACTGTGCCCTTAGTGTACAACAAAAAACAGTCTAAAGAAAAGCGGGG >strand - yfhP yfhP CCCCGCTTTTCTTTAGACTGTTTTTTGTTGTACACTAAGGGCACAGTACGATAATATGTTACATTAAATAGTAATAAAGAAAAAGCGGAAACATATGAGAACGATTGAATCAGTTTAAAAATTGGGTATATGAAAATGGGGAATTTCTTACAAAGTGAATGTGCAGGATCACCCTGCAAGGAGGT >strand + yfhR alternate gene name: ygaA; simila AAGACCCCCTTTAAAGTTCTTGCCATTATTGTTTGTGGAAATAAAAGAACT >strand - yfhS yfhS AGTTCTTTTATTTCCACAAACAATAATGGCAAGAACTTTAAAGGGGGTCTT >strand + ygaF similar to thiol-specific antioxi CAGGAGAACCGCCGCCGACTGCTTTATATGATCTAGACGGGCTGTTTACACCGCCGACAATGTGCTGTAATGCCTCATTGTGAAGTTCAATTGATTTTGTATGCAAGAATAAAAACCTCCTATATCTCCATGCGTCCTTTCTATTGTAGCATGCTTCCGAACAAGAACAATGGCTTCGGGCTTCAGATTCAGCTGAGATGGTTGTAATCTGGACAAGAATTGGATACGCTTAA >strand - gsaB glutamate-1-semialdehyde aminotra TTAAGCGTATCCAATTCTTGTCCAGATTACAACCATCTCAGCTGAATCTGAAGCCCGAAGCCATTGTTCTTGTTCGGAAGCATGCTACAATAGAAAGGACGCATGGAGATATAGGAGGTTTTTATTCTTGCATACAAAATCAATTGAACTTCACAATGAGGCATTACAGCACATTGTCGGCGGTGTAAACAGCCCGTCTAGATCATATAAAGCAGTCGGCGGCGGTTCTCCTG >strand + ygzA ygzA ATCTTCCCCTTTTTCTTTTTTCTCAACAGATATACGTAAGAAGGAGGAAAAAAGATTCGTTTGATTCATACTTTTTTCCTATTATTCGTCTCGGCCTAGGGACAGACTAAG >strand - ygaI ygaI CTTAGTCTGTCCCTAGGCCGAGACGAATAATAGGAAAAAAGTATGAATCAAACGAATCTTTTTTCCTCCTTCTTACGTATATCTGTTGAGAAAAAAGAAAAAGGGGAAGAT >strand + senS transcriptional regulator ATACCTTTCTAAGCTTAGGTTAATATTGGTTCTTGCTTCGTTATAGTCCGGATCATCCCGTGTCACGATACGTCCGGTCAATTTTGTCTTTTCCACACTCCCACATCTCTTTCTCTCGTATTCTAGTTTCTCTAGCTTATGCGTCAGGGGAAAAGAGTGTATAAGGAAAAAGCGGGGATGCAATCTGATACAGTGTCAACACCCTCAAAAAATAGTTGACAGGTCGGTATTGTATGAATTAACATGGTCAGTACAAATTTTTCAAATTTATCGCGCTGATCGGAACACCGAAGGCTCTTA >strand - ygaK similar to reticuline oxidase CCCTGACGCATAAGCTAGAGAAACTAGAATACGAGAGAAAGAGATGTGGGAGTGTGGAAAAGACAAAATTGACCGGACGTATCGTGACACGGGATGATCCGGACTATAACGAAGCAAGAACCAATATTAACCTAAGCTTAGAAAGGTATCCTGACATCATTGTGTTTTGCCAAAACAAACAGGATGCACTCAATGCGCTGAAATGGGCGCGTGAAAACCGTGTGCCTTTCCGTATTAGAGGCGGCAGACACAGCTATGAGAACTTTTCCCTTTTAAATAACGGTCTTGTTATTGATTTAA >strand + ygaL similar to nitrate ABC transporte ACATAATATCATCTCTAAGTAAAAAGTCAATATTATTTTATAATTATTATAAAATAGTTATCTCTATTAATCGATATATAGTATCGAAATTGTTGAAAAACCTTGTTGTAACAGCTTTTTATGACACTTAATAATTTTTGGAGAAATAATTTTTCCTTCTCTATTTGCGAAACAAGCAGAGTAAGTGATAAACAAGGATATCAAAAATCGTATTTAAAAAAGAGTTGACATATCGGAATAATTTGAATTAACATGTTATTCAAACAAAAAAACAATTGGGCAGCTGATCGGACCGCCGAA >strand - katA vegetative catalase 1 CCAATTGTTTTTTTGTTTGAATAACATGTTAATTCAAATTATTCCGATATGTCAACTCTTTTTTAAATACGATTTTTGATATCCTTGTTTATCACTTACTCTGCTTGTTTCGCAAATAGAGAAGGAAAAATTATTTCTCCAAAAATTATTAAGTGTCATAAAAAGCTGTTACAACAAGGTTTTTCAACAATTTCGATACTATATATCGATTAATAGAGATAACTATTTTATAATAATTATAAAATAATATTGACTTTTTACTTAGAGATGATATTATGTTCTTATAAATTCCAAGAGGTG >strand + yhbA alternate gene name: ygaP; simila GATCCGGACCGGCTCATGAAGCTTTCCGGCATCATAAATCATAATAGGCAGAGAATATTTTAATGCGAAAAAATCGATTGTGTCATGGGCCGAGTCTTTACTTTCCATGAATTGTTCCTTGGATATCGGGACCAGCATGTCCTCCATTGGGGTTGTTTCTCCCTCAAATGAAGGGTTAAGCTGTTTAAAGAGTTTCTGCCATTCCTTGACGAAGGATTCATCGGATATACCGCCGTCCGCATAGTCGAAAAACAAGGATTCTTGAAACAGGCGGAGTCTGCCTTTCAGCATTTGCGGTGA >strand - ygaO ygaO AAAAAAAGTTATCTTTTTGTAAAGTTGTTAAGTTCCATTTACATAATGTTAAGTATACTATACAATATAATCAATCGAGATTGTGAAGGGGACAGACGTCATGATGAAAAATAAAGTAAAAGAGCTGAGGGCACGCTTCGGCTATTCTCAGGAAAAACTCGGAGAAACGGTCGGCGTCACAAGACAGACTGTGGCAGCGATTGAAAAAGGCGATTATGTTCCCTCACTGCTGTTAGCACTGAAAATCTGCAAAGCCTTCTCCATGAAAATGGAGGATGTCTTTTGGTTAGAGGAGGAAAA >strand + yhcJ similar to ABC transporter (bindi AAAATAATTACCTTTACTCTACCAGTAATCTTCAGAAAAAACAACTGCTTATTGAAATTTTTTTCGGAAAATACCATATTTATTTTTCAATTTGATATTTATTTACGGAAATTCATCTGTTTTCTCGATATGTGATCATACGTTTGCCTTAGTATTCCTTCATTTTGGGAAGATAAGTAATAGGAATTGACAACCTTATTGGATATGAGTAAGGTAAAAGTTAATCAATATTACAAATGACAACGTTTATCAACTAAGAGAGCGTATGATTTTTATGTCTGATATTGCTTGCATGCTCTA >strand - cspB major cold-shock protein GGTTGTCAATTCCTATTACTTATCTTCCCAAAATGAAGGAATACTAAGGCAAACGTATGATCACATATCGAGAAAACAGATGAATTTCCGTAAATAAATATCAAATTGAAAAATAAATATGGTATTTTCCGAAAAAAATTTCAATAAGCAGTTGTTTTTTCTGAAGATTACTGGTAGAGTAAAGGTAATTATTTTTGTTCGAACTATCTTTAAGAAGAAAGTTTTGTAAGAGTTTTCGTCTTGAAAGTTTGTTAAGAGCAAGAATAGTGAATTTAAGCGTTATGATCGCTTTAGGAGGAA >strand + yhcL similar to sodium-glutamate sympo ATCACCCTTATATAAGGTACTAAAGATGGAAGGGAAAGAAAAGAACAAAAAAGGCATTATATAAGGTTTTTTTACTTGAATTGATAAGTTAAAATAATTCGCACTTGACCCATCGGAATTAGTGTAATATAGTAAATGAGAA >strand - yhcK similar to hypothetical proteins TTCTCATTTACTATATTACACTAATTCCGATGGGTCAAGTGCGAATTATTTTAACTTATCAATTCAAGTAAAAAAACCTTATATAATGCCTTTTTTGTTCTTTTCTTTCCCTTCCATCTTTAGTACCTTATATAAGGGTGAT >strand + yhcN similar to hypothetical proteins CTCCTTTGCAGTGTATTCATATTTTCCCCCTCGTTAGACAAATTAACCGTTATATTTCCCGGGAAAGCATTTCCTCCAGATTTGCATGCATAATTAAAGCCAGTTAGAAAAACCTATAA >strand - yhcM yhcM TTATAGGTTTTTCTAACTGGCTTTAATTATGCATGCAAATCTGGAGGAAATGCTTTCCCGGGAAATATAACGGTTAATTTGTCTAACGAGGGGGAAAATATGAATACACTGCAAAGGAG >strand + yhcR similar to 5'-nucleotidase ATTCCTCCTTTTTTTTCAGATCAGCTGTATTTTGTGTCAGTCTGCCGTGTAT >strand - yhcQ similar to hypothetical proteins ATACACGGCAGACTGACACAAAATACAGCTGATCTGAAAAAAAAGGAGGAAT >strand + yhcU yhcU ATGGCTCCTTTTCGAAGCGGAAGGCTGCTGTTTCTCATAGGAAACACTTACCTTAGCTTTCGCGTTTTTTTCTGTGCTAT >strand - yhcT similar to hypothetical proteins ATAGCACAGAAAAAAACGCGAAAGCTAAGGTAAGTGTTTCCTATGAGAAACAGCAGCCTTCCGCTTCGAAAAGGAGCCAT >strand + yhdC yhdC GCTTCTCCTTTCTTGGGTGAGAAGTTCATGATTAGTTTATGCCGCATCCTCTGTTATTGACGGGGCAAAACTTGCCGTCTGCGTCAGTAAAGGGGTAGCAATATGAGGGAGACATAAAATCAAAATGCCTGAATGAAGATATCGCTTTTGGTAAACCTATGAGGGAATAT >strand - yhdB yhdB ATATTCCCTCATAGGTTTACCAAAAGCGATATCTTCATTCAGGCATTTTGATTTTATGTCTCCCTCATATTGCTACCCCTTTACTGACGCAGACGGCAAGTTTTGCCCCGTCAATAACAGAGGATGCGGCATAAACTAATCATGAACTTCTCACCCAAGAAAGGAGAAGC >strand + lytE phosphatase-associated protein GGATAACCACAGCTCTCCTCGAATATACTTTATCACTCATTTTTCCGATATATGAGCCGACCCCGAAAGTTTTTCATTTATTTCCTTTATGTTGAAAAACATCCATAAAACATGACAAAGCACTCGTTTTTTGTCACCTTTGCAATTTGAAAATTAAATATTTAATACCCCTTAAAAACTTTTTTTTAGAACGAATAATTAAGAAATTTGTCACATGAAGTCAAGACTATTTCTGATGGGAATCTATCCTTATAATAGAAATCAAATAAGATTAATTTAAAAAAAGAGATACATATATAT >strand - phoA alkaline phosphatase A TTTTTCAACATAAAGGAAATAAATGAAAAACTTTCGGGGTCGGCTCATATATCGGAAAAATGAGTGATAAAGTATATTCGAGGAGAGCTGTGGTTATCCGCAGTTTTTTTATTTAAACAAACATATCATGCAAAGACAGAGAGGTAAAGATTTTTCTGAAAAATGAATGCTTTACATCAAATAAGGCAAGATAACGAAAAGCGTTTTTTCATTTCCTTACAAGGCTTTCATTATTGTTTACATGATCAACAGCCGCATTTAACAAAGTTTCCCTAACATGATAAACGGAATACATTAAAG >strand + citA citrate synthase I TTCTCCCTCTGATTAATATTTTTAATTAATTCCCTTTAAAATATTGATTATTTTTTAAATATTATATTTACTATAATA >strand - citR transcriptional regulator (LysR TATTATAGTAAATATAATATTTAAAAAATAATCAATATTTTAAAGGGAATTAATTAAAAATATTAATCAGAGGGAGAA >strand + yhdJ yhdJ GTCTCCTTTGTAGCTGGCATGGTCATTTTTTCTGATATTGGCTATTTGGCAAATGCCATTTATTAGCTATGATCATTGT >strand - yhdI similar to transcriptional regu ACAATGATCATAGCTAATAAATGGCATTTGCCAAATAGCCAATATCAGAAAAAATGACCATGCCAGCTACAAAGGAGAC >strand + yhdN similar to aldo/keto reductase TTTCTCCCCTCTATGTTATACACGCATAAGAAAGGTTTAAAGTTGCACATTACATGCAAACGGCTTTTTTATACCTAATCAAACACCATTACTATATAGTACCCAAAAATGGCCGCAAAAGATAGTGCGGAGAAAGGTTTAACATTTTTTCCAGAGGGGAAAAGAT >strand - yhdM similar to RNA polymerase ECF-t ATCTTTTCCCCTCTGGAAAAAATGTTAAACCTTTCTCCGCACTATCTTTTGCGGCCATTTTTGGGTACTATATAGTAATGGTGTTTGATTAGGTATAAAAAAGCCGTTTGCATGTAATGTGCAACTTTAAACCTTTCTTATGCGTGTATAACATAGAGGGGAGAAA >strand + yhdR similar to aspartate aminotrans TATAGTTATGAGAATAGTTTAATATATGAACGGTGTCTTGCACAAGTTTATTGTTTTTTTTAGACCGGAATTGGAAGTTGTGGAATTCATCATCTGATTGAAGTAACATAGGAAGATAGAATTTTC >strand - yhdQ similar to transcriptional regu GAAAATTCTATCTTCCTATGTTACTTCAATCAGATGATGAATTCCACAACTTCCAATTCCGGTCTAAAAAAAACAATAAACTTGTGCAAGACACCGTTCATATATTAAACTATTCTCATAACTATA >strand + yhdT similar to hemolysin ACCTCAATGTTAAGGAAAAATGATTATATCAGGAGGTGACTCCTTA >strand - yhdS yhdS TAAGGAGTCACCTCCTGATATAATCATTTTTCCTTAACATTGAGGT >strand + yhdX yhdX GTCAGCTCCTCGTATGGGCATTTCTGTTATTGTATCAAATCCATATATGCACCGCGATACAAACTGGTGAATTTTTTCTGCCATATCGGAATGGTCCGGGCTGAGGAATGCGTATAACTAAAAAAAGACACTCAGCGTATCAGAACACATTTCGTCAGAAAATTAGGTCTTTCGACATGTTCACAACAAAATATGATCCTTTA >strand - yhdW similar to glycerophosphodieste TAAAGGATCATATTTTGTTGTGAACATGTCGAAAGACCTAATTTTCTGACGAAATGTGTTCTGATACGCTGAGTGTCTTTTTTTAGTTATACGCATTCCTCAGCCCGGACCATTCCGATATGGCAGAAAAAATTCACCAGTTTGTATCGCGGTGCATATATGGATTTGATACAATAACAGAAATGCCCATACGAGGAGCTGAC >strand + yheM similar to D-alanine aminotrans TCTCTCCATTGGCATAAAATCTCTACATAATGCTGTACAGCAGGCAGCTTATGAAGATGCCTTATGTTTACAACCTATGTATTACCAGTCAAGTATACAGGAATAAAACTGGAAAGCACATAGAAAATTTGTCGAAATGTCAGATTTTAAGGAAAAAATAAACTCCTTTCCGCTATAGATCTCTTGATCTTTCTGTTTTTCAGCTTGTCCAATATGGTAGAATTAGTGATAATGACAGCTTGAATGACATCAGG >strand - yheN similar to endo-1,4-beta-xylana CCTGATGTCATTCAAGCTGTCATTATCACTAATTCTACCATATTGGACAAGCTGAAAAACAGAAAGATCAAGAGATCTATAGCGGAAAGGAGTTTATTTTTTCCTTAAAATCTGACATTTCGACAAATTTTCTATGTGCTTTCCAGTTTTATTCCTGTATACTTGACTGGTAATACATAGGTTGTAAACATAAGGCATCTTCATAAGCTGCCTGCTGTACAGCATTATGTAGAGATTTTATGCCAATGGAGAGA >strand + yheJ yheJ CTTTTCCTATTCAAGTAATGATTGACAATAAAAGGTTTTGTTTATATGATGATAAAAAGATGAAATCAGAATGGAAGGA >strand - yheK similar to hypothetical protein TCCTTCCATTCTGATTTCATCTTTTTATCATCATATAAACAAAACCTTTTATTGTCAATCATTACTTGAATAGGAAAAG >strand + yheB yheB GACTCTCCCTTTTATTCAAAAGCTGCCGGCGGGTGTCCGCAGCTTTACACTAAATACATGTAAGACGAACGCAGCTGACGGTATTCTACTTTACAGAATACGATTCTCCACCTTGCCCTATTCCAAAAGAACAAGCTGATTTTCTAAGAACAGGAATTTGTCTCCTCGTTTGCTAAGTGATTGCATGAGCCTCTATAATACGTATAGGTACATAATCTATATGAAAAAAGCTGAAG >strand - yheC similar to hypothetical protein CTTCAGCTTTTTTCATATAGATTATGTACCTATACGTATTATAGAGGCTCATGCAATCACTTAGCAAACGAGGAGACAAATTCCTGTTCTTAGAAAATCAGCTTGTTCTTTTGGAATAGGGCAAGGTGGAGAATCGTATTCTGTAAAGTAGAATACCGTCAGCTGCGTTCGTCTTACATGTATTTAGTGTAAAGCTGCGGACACCCGCCGGCAGCTTTTGAATAAAAGGGAGAGTC >strand + yhaR similar to 3-hydroxbutyryl-CoA TATCCCTTCTTTTTCACTATATAAAACTTTATGTTTCCATTCAAACAATATGCATTTTTCAGTATCTATTGACCTTGAAAAATAAAAAGGTTTATTTGGGGATGGCGGGGAATGTATAGGAGAAAGAAAAAAGT >strand - yhaS yhaS ACTTTTTTCTTTCTCCTATACATTCCCCGCCATCCCCAAATAAACCTTTTTATTTTTCAAGGTCAATAGATACTGAAAAATGCATATTGTTTGAATGGAAACATAAAGTTTTATATAGTGAAAAAGAAGGGATA >strand + yhaI yhaI TCATCCTTCCATACATCTTGAATAGAATCTCAAAATAATTTACTCCTCCCAAGAGAAAAAATCAATGCTTTTTTAAGAAAAAATATCATCCGGCAGGTATTTTTTTGAAACAAGCAAACAAACTGCGAAATATGGGGTTAGAACTTTCTTCTCAGTGAATTATCAGCTAAAATAAGA >strand - yhaJ yhaJ TCTTATTTTAGCTGATAATTCACTGAGAAGAAAGTTCTAACCCCATATTTCGCAGTTTGTTTGCTTGTTTCAAAAAAATACCTGCCGGATGATATTTTTTCTTAAAAAAGCATTGATTTTTTCTCTTGGGAGGAGTAAATTATTTTGAGATTCTATTCAAGATGTATGGAAGGATGA >strand + ecsA ABC transporter (ATP-binding pr AATAAAATGAAGACTTGTTTAATGCCTAACGATTCCTGCGATAAACACCTCATTTGACAATTTGCTTCATACATGAAGAATTCATCAATTCAGTGAAGAGAAATGGTTTGAGCATTGGACATGTTTATTTTCTATCCAACTGTTTCTAAGACGTCTTTGACAAACACCTCATTTTTCAGTTGATATGCTTTTCCTCTACAACTTTTATGATGTCCAGAACCCTGTCTCTTTATGACAGCAAATTCATCATTTGTTAAAAGGGAATTGTTTCAAATCAGGCCTTGTTTTTGGTACTATGAA >strand - hit Hit-like protein ACAGTTGGATAGAAAATAAACATGTCCAATGCTCAAACCATTTCTCTTCACTGAATTGATGAATTCTTCATGTATGAAGCAAATTGTCAAATGAGGTGTTTATCGCAGGAATCGTTAGGCATTAAACAAGTCTTCATTTTATTGACAAATGAGGTGCTTACCAAAGGCATAACACATTTTCTTCATATAAGCTCTTCCTCTGCATTCAGGGTGAACGCTCGCCGTTCATCCTGTTTTCTATTTTCTGCATTTCTGTGGTACGATGAATGTATACATACTAAACAATTTCATAAGGAGGAA >strand + pbpF penicillin-binding protein 1A CCACCTCCTGCTTGCTAGTATATCAAAACAATGGTATAAGTTTCTATTGGCGAGTGCTTCGAACATAATTCCAAACAATTTTTCCCATACT >strand - yhgC alternate gene name: yixC; simi AGTATGGGAAAAATTGTTTGGAATTATGTTCGAAGCACTCGCCAATAGAAACTTATACCATTGTTTTGATATACTAGCAAGCAGGAGGTGG >strand + yhfC yhfC TCACTCCTTATGGTCAGATTATAACACTAGATATTAGTATCTGGTACTAAAAAGAAGGTTTTTTACAAAAAAAGAGAAGTGTAGATAGGAATCTA >strand - yhfB similar to 3-oxoacyl- acyl-carr TAGATTCCTATCTACACTTCTCTTTTTTTGTAAAAAACCTTCTTTTTAGTACCAGATACTAATATCTAGTGTTATAATCTGACCATAAGGAGTGA >strand + yhfE similar to glucanase TCCCTCCTCTTCCCATGCTCTCTACAGCATATGAGAGATTGATAGGAAAAGATTGGACGGGTCCCTCGGGAAAAGAGTATATTTTAAG >strand - yhfD yhfD CTTAAAATATACTCTTTTCCCGAGGGACCCGTCCAATCTTTTCCTATCAATCTCTCATATGCTGTAGAGAGCATGGGAAGAGGAGGGA >strand + yhfI similar to hypothetical protein CTTTCCCCTTTGTAGTGATTTCATTGTAATATAACAGATTACAAAAATCAATAACTGTTTTGATACAATGTTTAAAACATGCTTTTTTCAAGAAAAATGGGTATATTGAAGGAGGACCAAT >strand - yhfH yhfH ATTGGTCCTCCTTCAATATACCCATTTTTCTTGAAAAAAGCATGTTTTAAACATTGTATCAAAACAGTTATTGATTTTTGTAATCTGTTATATTACAATGAAATCACTACAAAGGGGAAAG >strand + yhfN alternate gene name: yzoA; simi TCACTCCAAACATTGTTAGTTTTCCCAGCGATCGGGGTTTCCATGCTTAAAAGGGTGGAAAAGTGCGGAACACAGCTTGGTTCTAAGAATTTGAATTTATGATTACAATAGAAGTAAC >strand - yhfM yhfM GTTACTTCTATTGTAATCATAAATTCAAATTCTTAGAACCAAGCTGTGTTCCGCACTTTTCCACCCTTTTAAGCATGGAAACCCCGATCGCTGGGAAAACTAACAATGTTTGGAGTGA >strand + yhfO similar to hypothetical protein ATGATTTTATCTCTATTTAGGTATATCATCTCTCGCTATTTCCGTAGAGACTCGAAATAACTATTTTCATACAGAAAAGAACGAAAATAGACATGAGTAAATGTTCATTATGCTGAAATGTCATGCTTTTTTAGGTTAAATGCTCCTGAGTCCCGGCAAATTCCTGTCGAAAAAATTCGTTCAAATGACCTGCGTGTGCTTCCGTGAGAACAATGGATATTATCGTGATATTTTTTCAAAGCATGATGATAAAAGTATTCTGAAAAATAAACTTTACAGAAAAGGGATAGAATGAAAAAA >strand - aprE serine alkaline protease (subti CACGCAGGTCATTTGAACGAATTTTTTCGACAGGAATTTGCCGGGACTCAGGAGCATTTAACCTAAAAAAGCATGACATTTCAGCATAATGAACATTTACTCATGTCTATTTTCGTTCTTTTCTGTATGAAAATAGTTATTTCGAGTCTCTACGGAAATAGCGAGAGATGATATACCTAAATAGAGATAAAATCATCTCAAAAAAATGGGTCTACTAAAATATTATTCCATCTATTACAATAAATTCACAGAATAGTCTTTTAAGTAAGTCTACTCTGAATTTTTTTAAAAGGAGAGGGT >strand + yhxC similar to alcohol dehydrogenas TATACCTCCAATCAGATTTATTCTTAATTTTTCCATATTCTATCTTTTCATACATAGGAAGCAGGCATTGTTCATAACATA >strand - yhfW similar to hypothetical protein TATGTTATGAACAATGCCTGCTTCCTATGTATGAAAAGATAGAATATGGAAAAATTAAGAATAAATCTGATTGGAGGTATA >strand + comK competence transcription factor ATCCCCCCAATGCCTTTTTTATAGTATATGGATAACGGTCGAAAAAATCAATAAATTTTTATAATTTTTAGACAACTAATACTTGGCAATCTATCGACATATCCTGCAAAATGCCGTAAACCGGCAAGTCTTATGAAAGTAAAATCGGTTTATTACTAGTCATTTAGTACCATTAAATATCATTAAAAGATGATTTTATCTTAAATGTTAAAAAAACCTGTCGTTTTACAAAAACAGATGATAGATTATTAGTATAAATTTT >strand - yhzC yhzC AAAATTTATACTAATAATCTATCATCTGTTTTTGTAAAACGACAGGTTTTTTTAACATTTAAGATAAAATCATCTTTTAATGATATTTAATGGTACTAAATGACTAGTAATAAACCGATTTTACTTTCATAAGACTTGCCGGTTTACGGCATTTTGCAGGATATGTCGATAGATTGCCAAGTATTAGTTGTCTAAAAATTATAAAAATTTATTGATTTTTTCGACCGTTATCCATATACTATAAAAAAGGCATTGGGGGGAT >strand + yhjA yhjA ACGCTCCTTTTCTGTTTGGTGTACATTCGTATGTGTACCCCTGAGCATAAGCAGAAAAACATGTTTTGGCAGTTTGAATAGTGAAGGAAAAAGAAAACAAGAAAAAACAAAGCCTTCTTCCATATTTATCTTGTCAAATAAGACTATATGAATTATGATAAATTCGAGCTATAAATCAACTAAATA >strand - yhxD similar to ribitol dehydrogenas TATTTAGTTGATTTATAGCTCGAATTTATCATAATTCATATAGTCTTATTTGACAAGATAAATATGGAAGAAGGCTTTGTTTTTTCTTGTTTTCTTTTTCCTTCACTATTCAAACTGCCAAAACATGTTTTTCTGCTTATGCTCAGGGGTACACATACGAATGTACACCAAACAGAAAAGGAGCGT >strand + yhjE similar to hypothetical protein CTCCTTCTTTCTAAGGGAACGTTTGTTCTATTTGTATTCTATATCGGCTGGTGAAATGTTTCAAAGACAGAAAAAATCACCAAAACTTTTTAGTTTTCTGCTGATAATGTGTAGAATAAGGA >strand - yhjD yhjD TCCTTATTCTACACATTATCAGCAGAAAACTAAAAAGTTTTGGTGATTTTTTCTGTCTTTGAAACATTTCACCAGCCGATATAGAATACAAATAGAACAAACGTTCCCTTAGAAAGAAGGAG >strand + yhjM similar to transcriptional regu GTACCTCCAATGATTTTATAATCTAAAAATTTATTACAAATTTATCCTAGTTTCTGAAATCGGTAACATGTCA >strand - yhjL similar to sensory transduction TGACATGTTACCGATTTCAGAAACTAGGATAAATTTGTAATAAATTTTTAGATTATAAAATCATTGGAGGTAC >strand + yhjP similar to transporter binding CGCCGCCTTTCGCTGTTTGATATAGTAGTAGTGTATAGAAGGGGCGGCGGCCCAAAAAGTGAAGAAAAGAAGAGATAAAAGTT >strand - yhjO similar to multidrug-efflux tra AACTTTTATCTCTTCTTTTCTTCACTTTTTGGGCCGCCGCCCCTTCTATACACTACTACTATATCAAACAGCGAAAGGCGGCG >strand + addB ATP-dependent deoxyribonuclease TTCAACTCCATTCTTAGGAATTGGGCTTCTGTAAAAGTATATGTTGGGCAGGGAGAGATGTGCGGAGATAATCAGCTTTTTATATGTGAAAAGGCCGTTTTTACCAATAGATCAGATTGGTCATTTTCGTCAACATTCGATAAAATATAGAGA >strand - yhjR yhjR TCTCTATATTTTATCGAATGTTGACGAAAATGACCAATCTGATCTATTGGTAAAAACGGCCTTTTCACATATAAAAAGCTGATTATCTCCGCACATCTCTCCCTGCCCAACATATACTTTTACAGAAGCCCAATTCCTAAGAATGGAGTTGAA >strand + yisK similar to 5-oxo-1,2,5-tricarbo TTCCTCCTTCTCAAAACGACATTTGTATGATATGTAGAGCAAGCCCGTCCTGAAACGAAATGAAAACGGAATTGAGAAAAAGGGATTTCAGCATTCGACAGAGAATACATGAAGGAGCATTCTG >strand - yisJ yisJ CAGAATGCTCCTTCATGTATTCTCTGTCGAATGCTGAAATCCCTTTTTCTCAATTCCGTTTTCATTTCGTTTCAGGACGGGCTTGCTCTACATATCATACAAATGTCGTTTTGAGAAGGAGGAA >strand + yisO alternate gene name: yucB; simi TCTTTTCTTTCCAATTCAAGCCTGTCCCATACAACTCTTTTATGGACAAATCAACTATCTGCCTATAGATGCATAAACTTACTAAGGTGCTGAAAGAAACGAAATTGTTACGAAAGAAACGGGATTGGAGCTGATGTCACATGTGTGGAATTACGGGTTGGGTCGATTTTAAAAAGCAGCTCGT >strand - yisN alternate gene name: yucA; simi ACGAGCTGCTTTTTAAAATCGACCCAACCCGTAATTCCACACATGTGACATCAGCTCCAATCCCGTTTCTTTCGTAACAATTTCGTTTCTTTCAGCACCTTAGTAAGTTTATGCATCTATAGGCAGATAGTTGATTTGTCCATAAAAGAGTTGTATGGGACAGGCTTGAATTGGAAAGAAAAGA >strand + yisR alternate gene name: yuxC, yucF CTAACCCCTTTCGTAAATAAAATCAACGTTTTAAAGTGTACCTCCATTTCCTTATAATAAAAAGGGATAGAAACGATACTTCTTGTAAAAATC >strand - yisQ alternate gene name: yucE; simi GATTTTTACAAGAAGTATCGTTTCTATCCCTTTTTATTATAAGGAAATGGAGGTACACTTTAAAACGTTGATTTTATTTACGAAAGGGGTTAG >strand + yisY similar to chloride peroxidase GACATCCTTTGCTGCCCGAAAAACAGAGGCTTCCGGGTTCAATTTAATAGATCGTTAAGGAAATTGATACATATAGTAAATCGTGAAGAGCCTTTTCTTGGTACAGCAGCCCGCTTGTTTTGTAAGCATAAATGGAAAGCTTCCTATACGTCAAGGCGTTTACAGGCCGAAAAAG >strand - yisX yisX CTTTTTCGGCCTGTAAACGCCTTGACGTATAGGAAGCTTTCCATTTATGCTTACAAAACAAGCGGGCTGCTGTACCAAGAAAAGGCTCTTCACGATTTACTATATGTATCAATTTCCTTAACGATCTATTAAATTGAACCCGGAAGCCTCTGTTTTTCGGGCAGCAAAGGATGTC >strand + yitC similar to hypothetical protein TTTCTCCGTACGTTCGGTAAGCCCATTTCAACACATCGAGCTCATCGATCAATTGATCCGTTATCTGTTTGGACAGCATGTCGTTCCAAGTGTCATATGTGATGGTTTCGTTCAATGTTTTCCCCCCGTTTGCATCTGCTTTTTTTATCTATTGATCGAGTTATAGGCAGACTATTCATTTTTTGCATAAAGGTTCAGGTTTTAGGTGCCGCGCCCGCCTAAAATGAGGTGCGGAACCATTAAGAGCCCGGCTGAATATGCTTTTTAGCAAAAT >strand - yitB similar to phospho-adenylylsulf ATTTTGCTAAAAAGCATATTCAGCCGGGCTCTTAATGGTTCCGCACCTCATTTTAGGCGGGCGCGGCACCTAAAACCTGAACCTTTATGCAAAAAATGAATAGTCTGCCTATAACTCGATCAATAGATAAAAAAAGCAGATGCAAACGGGGGGAAAACATTGAACGAAACCATCACATATGACACTTGGAACGACATGCTGTCCAAACAGATAACGGATCAATTGATCGATGAGCTCGATGTGTTGAAATGGGCTTACCGAACGTACGGAGAAA >strand + yitL yitL AACCTCCATAACATTTAACTTATGTGTTGATTATAGTAGAATAAAGAGAAACTTCCAACAGAACAAAG >strand - yitK similar to hypothetical protein CTTTGTTCTGTTGGAAGTTTCTCTTTATTCTACTATAATCAACACATAAGTTAAATGTTATGGAGGTT >strand + yitQ yitQ ATATAAAAACATTTATGTAATAAAAAGGTTAAAAAGTATGAGAAATTCCATTTTTGTTAAAAGGAAAAACAGGACATTTTACACTGCTATATGTGATTGATATATAGTGTTTTTTTCGATATATTTAAACCATGAAAACGAATCAAGTTAATGACCGCTGGATTGTTCAATTTCGAAAAGGGATTTTCGAGCTGGCCATCCTATCCCTTCTGCGTTCTAAACCGATGTACGGTTATGAGCTGACGTCATCCTTAAAAACCACCTCGGCGCTGGCTATTTCAGAGGGAGCGATTTATCCGA >strand - yitP similar to hypothetical protein GTTTTTAAGGATGACGTCAGCTCATAACCGTACATCGGTTTAGAACGCAGAAGGGATAGGATGGCCAGCTCGAAAATCCCTTTTCGAAATTGAACAATCCAGCGGTCATTAACTTGATTCGTTTTCATGGTTTAAATATATCGAAAAAAACACTATATATCAATCACATATAGCAGTGTAAAATGTCCTGTTTTTCCTTTTAACAAAAATGGAATTTCTCATACTTTTTAACCTTTTTATTACATAAATGTTTTTATATTATGAAAAGTGTAAAAACGATTCTTCAGGTTAAAAGGAGCA >strand + yitT alternate gene name: yuxA; simi TTCCCTTCTTTCCGTTATCGCTCGTTATCATGAGAGTTAAGCCAACTATATCATATTAATTTGCCAAAATCGTGTTCAAATATTTGTTTTAAGGGAAAACATAATAA >strand - yitS yitS TTATTATGTTTTCCCTTAAAACAAATATTTGAACACGATTTTGGCAAATTAATATGATATAGTTGGCTTAACTCTCATGATAACGAGCGATAACGGAAAGAAGGGAA >strand + yitV similar to hypothetical protein GCTCCTTTAACCTTAGATTTTGCTTGTTTTGCCTTACTTACAGTGTAGCTTTTTACCATCTTTATTGTCTACATGTTACACTTATGATTA >strand - yitU similar to hypothetical protein TAATCATAAGTGTAACATGTAGACAATAAAGATGGTAAAAAGCTACACTGTAAGTAAGGCAAAACAAGCAAAATCTAAGGTTAAAGGAGC >strand + yjaU yjaU CACCTCGTCCTTCTCATTCTATTCTAAAAGTTATCATACCATACAAAAAGCGAAGGGGGAATCCAATTTTAGCAGAAAGTGAGGTGACAAAAGAGTCTGATTCGTCACACTTTAATGGTAAAATATAAAACGTCAATCAGTATAATAGCCAAAAATGTAAAAGGCTAGGACTTTACCTTATTCGTTTCTTTTCTATTTACATAAGATAGCGTGAGATG >strand - yjzD yjzD CATCTCACGCTATCTTATGTAAATAGAAAAGAAACGAATAAGGTAAAGTCCTAGCCTTTTACATTTTTGGCTATTATACTGATTGACGTTTTATATTTTACCATTAAAGTGTGACGAATCAGACTCTTTTGTCACCTCACTTTCTGCTAAAATTGGATTCCCCCTTCGCTTTTTGTATGGTATGATAACTTTTAGAATAGAATGAGAAGGACGAGGTG >strand + yjaX similar to 3-oxoacyl- acyl-carr CCTTTCCATATACCATACTCTATGAGTAAGATGAACTGATAGTTTAGACGAATATATTGCCATGTGAAAAAAAATAGGATAGAATTAGTACCTGATACTAATAATTGATCACAACCTGATTGATCTTCTAAATT >strand - yjzB yjzB AATTTAGAAGATCAATCAGGTTGTGATCAATTATTAGTATCAGGTACTAATTCTATCCTATTTTTTTTCACATGGCAATATATTCGTCTAAACTATCAGTTCATCTTACTCATAGAGTATGGTATATGGAAAGG >strand + oppA oligopeptide ABC transporter (b AGATATTATATGTAAAAAGCAATATGGGCAGAAAACACATGAAAAAGTTTTTAATCAATTTATGCTTTAAATGGTAGAAGGATATTATGTTCATGGAAGAAAAACTAACGAAGTTTAAATATTTTAAATTGATAAAATAATATTGCAATAAATTATTTGTTTCATTATAATGAACTTGTTCACTCTATTGTTACAGCTTTTTTACAAAAATAATCAGAAAAGACGGAACAGAATAAAAGTTGTGGAAACTCAGGTTCATTTGTCTGATATTTCTGAGGATTTAGCCGTAAGGAGCTGAAA >strand - trpS tryptophanyl-tRNA synthetase TATAATTGATTTATAATGAAGCAATCAAAAGAAAAGCTTGCTATGAAAAAGAGTAGTACACGTTTCACCTGTTTGAAGAGAGTCTGCGGTGCTGGGAGCAGATAACGGGCAATGTGGAATGGACTTTGGAGCAGCTGACCGAACTGGAAAAAAGTAGGCTCAGCCGGAGCAGTCTCCGTTACAAACGTCAGAGTGATTCCATTTTAATGGAATAATCAGGGTGGTACCACGGTTCATTCGTCCCTTTTTTACAGGGGAAGAATGAGCCTTTTTTATTATGTTTTAAGAAATGAGGTTGAT >strand + mecA mecA GCACCTCGCTTTCAGTCCTTCGCTTGTACTACTACTATATGAGACAGGTCATATTTTATGATATCAGCCGCCTGATCTGCCGTTTTTTAGGCAAAGGTGCATTTTTATTTCTTTTGTGACTGTTTTATCATAAAATAGAAATACAAAGGAATTCACACTGGCCTTGGTTAAGGTTAAGATGTGGACGGAATGGGTAAAGTGTAGTAAAGTACAATTAATCGGGAGCTTAGATGTCCCTTCAACATC >strand - yjbE similar to hypothetical protein GATGTTGAAGGGACATCTAAGCTCCCGATTAATTGTACTTTACTACACTTTACCCATTCCGTCCACATCTTAACCTTAACCAAGGCCAGTGTGAATTCCTTTGTATTTCTATTTTATGATAAAACAGTCACAAAAGAAATAAAAATGCACCTTTGCCTAAAAAACGGCAGATCAGGCGGCTGATATCATAAAATATGACCTGTCTCATATAGTAGTAGTACAAGCGAAGGACTGAAAGCGAGGTGC >strand + yjbL yjbL CTTCCTTTCCGCCCTGTAAATCTTATTTTTACTATTATCTCTTTAGATGTTAGGTTTATCAATATCGAGCTCGATATGATAAAATGAATCTGTA >strand - yjbK yjbK TACAGATTCATTTTATCATATCGAGCTCGATATTGATAAACCTAACATCTAAAGAGATAATAGTAAAAATAAGATTTACAGGGCGGAAAGGAAG >strand + yjbQ similar to Na+/H+ antiporter TCCTCCTTACCGATTGTTTTATTTATTATGAAGCAAAGTGTTGTGAGTCATCACAAGCTTTTATGTGCAAATTATAGAAGAAGAATCATTTTCTAAACAGGATTAAGACCTTATCACTTTATTTTTCATCATCACCCTTGTTAAAATCGCGTTATGTTTTTTAAAAATGAGGTTTTGTTCTCTGAGTGAGAACTAGCATAACCC >strand - yjbP similar to diadenosine tetrapho GGGTTATGCTAGTTCTCACTCAGAGAACAAAACCTCATTTTTAAAAAACATAACGCGATTTTAACAAGGGTGATGATGAAAAATAAAGTGATAAGGTCTTAATCCTGTTTAGAAAATGATTCTTCTTCTATAATTTGCACATAAAAGCTTGTGATGACTCACAACACTTTGCTTCATAATAAATAAAACAATCGGTAAGGAGGA >strand + yjcA similar to hypothetical protein CACTCCTTTCTTATCCATACTCTATATAATGCAAATGGGGTGCAGAGCGGAATAATAAAAATAACCAATTTTCTGCCGGACAAGATACAATTCATGAAGAATAGGGTTTTGGCATATATGTGAAGGGAT >strand - cotV spore coat protein (insoluble f ATCCCTTCACATATATGCCAAAACCCTATTCTTCATGAATTGTATCTTGTCCGGCAGAAAATTGGTTATTTTTATTATTCCGCTCTGCACCCCATTTGCATTATATAGAGTATGGATAAGAAAGGAGTG >strand + yjcI similar to cystathionine gamma- TCTTGCATTTTCAATTATTGTACCATGAATTATTAGAAATTTCTCTCTTTAAACGGTTCGGCACACGAAGAAAAAAGTGTTGAAATAAACTGTGAATTGCGCTAATATAAAACAATCAGAAAATTGAAATCGATATTTCTTATCGTGAGAGGTGGAGGGACTGGCCCTTAGAAACCTCAGCAACCGGCTTGTTTTGCATTTGCAAAGCGCCAAGGTGCTAAATCCAGCAAGCGTTTTTTATGCTTGGAAGATAAGAAGAAGCGTTAAACCCCTTCTTCTTATGAAGAAGGGGTTTTTATT >strand - yjcH yjcH GAAGGGGTTTAACGCTTCTTCTTATCTTCCAAGCATAAAAAACGCTTGCTGGATTTAGCACCTTGGCGCTTTGCAAATGCAAAACAAGCCGGTTGCTGAGGTTTCTAAGGGCCAGTCCCTCCACCTCTCACGATAAGAAATATCGATTTCAATTTTCTGATTGTTTTATATTAGCGCAATTCACAGTTTATTTCAACACTTTTTTCTTCGTGTGCCGAACCGTTTAAAGAGAGAAATTTCTAATAATTCATGGTACAATAATTGAAAATGCAAGAAAGTATAGTGATGTCCTGGAGGAAT >strand + yjcN yjcN CCTCCAATAAAATCTAAATATAAGTAATATTTATCATTTACACCAATTGAAATATGGCATAAAATACATGTAATAATATAACATAAAAATAAATA >strand - yjcM yjcM TATTTATTTTTATGTTATATTATTACATGTATTTTATGCCATATTTCAATTGGTGTAAATGATAAATATTACTTATATTTAGATTTTATTGGAGG >strand + yjdC similar to transcriptional anti GCGTGTGGAAATTTTGTTACCTGCACTTTATCCCCGAAGATACCAAATCACTTTTCTTCATCATGTTCTTACTTTCGTACTCATACATTAGAAAATCTTCGTTCTAAATAGGTGAAGAATTGGAGTATTTTTATGAATTTCTGCTGAATATACATTACATAGCAAACTCAAAGAGTATAAAAATCGCTTTTTTCCGGAAGCTTCGGTAAAAAACGAAACTTTTGTCTCTATGATTTTGTTTTATAATGTAAACGGTTTCTTATATAGTATACTTATACTATCAATTTGCTCAAGTAGATA >strand - yjdB yjdB GTACGAAAGTAAGAACATGATGAAGAAAAGTGATTTGGTATCTTCGGGGATAAAGTGCAGGTAACAAAATTTCCACACGCTCAACAAACTAATACCCATCAAATGTGTGCTATTAACATTTTAAGGATGGCCATTCTCTCTCAGCAATTTTCCATCATAAATACAAACTCTGTGCAGGGCACACAATATTCTTAGCTCAAATCAATTGATCGTTCACATATTATTAACATTTATTTACAAGGAAAATAATTACTTTTATTGAATTGTTATAGTGCAAGACAAAAAACTTTAAAGGAGTGT >strand + yjdI similar to transcription regula AAATTGTATGTGGGAGTTATTTGTTCATTCCTGTTTTGTTCCATGTAGACTAATAATGGGCTATAGGCGTCCAAGGAAATCAATTTGTCCTTCCGTTGATTCAATGCTAGTAACCTTGAAAATGCAGGTTAGGATGATAGAATTTACATATTACTGCAAGAAGTTAGACATACA >strand - yjdH yjdH TGTATGTCTAACTTCTTGCAGTAATATGTAAATTCTATCATCCTAACCTGCATTTTCAAGGTTACTAGCATTGAATCAACGGAAGGACAAATTGATTTCCTTGGACGCCTATAGCCCATTATTAGTCTACATGGAACAAAACAGGAATGAACAAATAACTCCCACATACAATTT >strand + yjeA similar to endo-1,4-beta-xylana CGGTTTTTTTTATGATGAAATTCCTTAAAAAGGATTGACATGAACTTCTAAATCTCATAGTATTACAAATGTGAAATAAATGTTACTACAATGTTACAAAAACATTTCTAAATAGATACAGTTCCAACTGATGAAGAGTAAATCGAAA >strand - cotT spore coat protein (inner) TTTCGATTTACTCTTCATCAGTTGGAACTGTATCTATTTAGAAATGTTTTTGTAACATTGTAGTAACATTTATTTCACATTTGTAATACTATGAGATTTAGAAGTTCATGTCAATCCTTTTTAAGGAATTTCATCATAAAAAAAACCG >strand + yjfC yjfC ACCTCCTATGCCTTACCTATCGGCGGGTTTGTATGATTTTACAGTGAAAAATGGCTTAATTTCTCTTGTTGACCCTTACGCTGCGGCATATCGTAT >strand - yjfB yjfB ATACGATATGCCGCAGCGTAAGGGTCAACAAGAGAAATTAAGCCATTTTTCACTGTAAAATCATACAAACCCGCCGATAGGTAAGGCATAGGAGGT >strand + yjgC similar to formate dehydrogenas CCCTCCTAATTGATCTACCATTCCGTTTCCCCGTGTTTTTGCAACGAAACATGTGAGTGCGCTTTGTATGTTTTATTGAGTTGTTGTAAGGGAACTGAAATAGGTAATTCGGTAT >strand - yjgB yjgB ATACCGAATTACCTATTTCAGTTCCCTTACAACAACTCAATAAAACATACAAAGCGCACTCACATGTTTCGTTGCAAAAACACGGGGAAACGGAATGGTAGATCAATTAGGAGGG >strand + yjiB similar to monooxygenase AAGTCATTTTTGGAGTTAGTGCATTTGGTTGAAAACAGAGACGTTTTTTGAGCGGAGCGGTGCAATTGAATCCATATTGGATTTGCGTGCTTATGCCTAAGGTCACGTTGTGAGCCAAAACAGCAGTGCAGAATAGAAAAAAATTGAAAGATTATGGTAAGATGAACGAAATTCGAGGGGGAATATATGATGCCTGCATCGAACATCGTGATCGCCTGCCTGCTGTTATCAATTTAATTATTTGTGGTAGTCCCTCTCTTTTTCGAAAGCGGGAGGTGAAGTGGAACATTAATTGCTGAA >strand - yjiA yjiA TAATTAAATTGATAACAGCAGGCAGGCGATCACGATGTTCGATGCAGGCATCATATATTCCCCCTCGAATTTCGTTCATCTTACCATAATCTTTCAATTTTTTTCTATTCTGCACTGCTGTTTTGGCTCACAACGTGACCTTAGGCATAAGCACGCAAATCCAATATGGATTCAATTGCACCGCTCCGCTCAAAAAACGTCTCTGTTTTCAACCAAATGCACTAACTCCAAAAATGACTTTCCCATTATGTAAAATAATTTGGTTCTATGATTAAATAGAAGGTAAAAGGTGAAGGAGAG >strand + yjlB yjlB CCTCATTCTGGCATTATTTTTTTATACTATACTATTCGGTATCATGCAGATCAATGGAATCATGAGAAAAACATCATGTTTGGCGAACCGCTATATGTGGA >strand - yjlA similar to hypothetical protein TCCACATATAGCGGTTCGCCAAACATGATGTTTTTCTCATGATTCCATTGATCTGCATGATACCGAATAGTATAGTATAAAAAAATAATGCCAGAATGAGG >strand + yjoB similar to cell-division protei CTCCTCCATATGTAAAAATAAATGGTGCAATGAAATCCTATATCAACGGTTATGAATTCACAAGTAATATGCTGTGGGATGAAACAAAATGCTATGTCAATCGTATATATAACGTTCA >strand - yjoA yjoA TGAACGTTATATATACGATTGACATAGCATTTTGTTTCATCCCACAGCATATTACTTGTGAATTCATAACCGTTGATATAGGATTTCATTGCACCATTTATTTTTACATATGGAGGAG >strand + xlyB N-acetylmuramoyl-L-alanine amid TGGTATACATATTCAGACCCTCCGTGAACTTCAGTTTAACACATTTATCCATATTACGGTGATAGATGATATGAGCTTTTCGTCCTACGAATGCCACCTATTTATG >strand - yjpA yjpA CATAAATAGGTGGCATTCGTAGGACGAAAAGCTCATATCATCTATCACCGTAATATGGATAAATGTGTTAAACTGAAGTTCACGGAGGGTCTGAATATGTATACCA >strand + yjqB similar to phage-related replic CCTCCTCTGCGTATTCTATCAAATCATACTATAAAAAAGGTTTTCATTCATAGATGAGTAGAGTAAACTGGTGCT >strand - yjqA yjqA AGCACCAGTTTACTCTACTCATCTATGAATGAAAACCTTTTTTATAGTATGATTTGATAGAATACGCAGAGGAGG >strand + xkdB alternate gene name: ykxB; PBSX AAACTCCCTACGTTACTTATGATTATAGATACAAAATGTATCAAAATAAAGTTATGATAAAAAAAGTATCAAAAATAACTTGATGATACAGAATGTATCGTTTATACTTGGTAACATCCGATACAAAATGTATCAAAAAAGATCAGGAGAGTGATCACATGTATCCGATCCAAATCGTTTTTAGTGAAAATCCCATAGATCAGCGCCATCTCGGACAATCCGGCGGCACCATTTCGTTTACCTCATGCGGCCTTCCGGTGTTTCACTTTGAAACGCAGGAACAGTTTCAAGCATACATGA >strand - xre transcriptional regulator AAACTGTTCCTGCGTTTCAAAGTGAAACACCGGAAGGCCGCATGAGGTAAACGAAATGGTGCCGCCGGATTGTCCGAGATGGCGCTGATCTATGGGATTTTCACTAAAAACGATTTGGATCGGATACATGTGATCACTCTCCTGATCTTTTTTGATACATTTTGTATCGGATGTTACCAAGTATAAACGATACATTCTGTATCATCAAGTTATTTTTGATACTTTTTTTATCATAACTTTATTTTGATACATTTTGTATCTATAATCATAAGTAACGTAGGGAGTTTAAAAAAGAGAGGG >strand + ykcA similar to ABC transporter (bin CCTATTTTAGAAAAGTGAAATAAAATAAATTTTCTGATTTTCGAAATTTGCTTTGTGTACAGCAGATAAAACTGTAGAAATACCTGTTTTAATGGTCTATAAACGAATAAAATTCATAATTTTTATTTACAAAGTTCGTGTTTTATGTATTAGGATTTTAGCCGGTACAGAGGGGAGATGAGAATTCAGAAAATGAACGAACCTTCGGTGAAGCTGATATGTTGACTTTCTCATGCGAATAAAATACTTTGTTATTAGAACAATAAATATACAATTATGATATGATATCTCGAAATCGAG >strand - ykbA similar to amino acid permease TTATTCGCATGAGAAAGTCAACATATCAGCTTCACCGAAGGTTCGTTCATTTTCTGAATTCTCATCTCCCCTCTGTACCGGCTAAAATCCTAATACATAAAACACGAACTTTGTAAATAAAAATTATGAATTTTATTCGTTTATAGACCATTAAAACAGGTATTTCTACAGTTTTATCTGCTGTACACAAAGCAAATTTCGAAAATCAGAAAATTTATTTTATTTCACTTTTCTAAAATAGGGTTTACAATATGAAAAAATAGATTCTTCAACATGAAGCTTCAACATCATAGGAGGTTA >strand + ykeA alternate gene name: yzcA; simi TCACTCCGTTTCTCTATTTTCACATGTCTATTTATATTGATAGGTTTATGACTTGAGTTTATCATAAACAAAAACTGTGGGATAAAAATGAAAAGATTATGGGAAATTGTGAAAAAGTGCGGAAAACGGTACACTTCA >strand - htrA serine protease Do (heat-shock TGAAGTGTACCGTTTTCCGCACTTTTTCACAATTTCCCATAATCTTTTCATTTTTATCCCACAGTTTTTGTTTATGATAAACTCAAGTCATAAACCTATCAATATAAATAGACATGTGAAAATAGAGAAACGGAGTGA >strand + ykhA similar to acyl-CoA hydrolase TTTCCTCCTAAAGTGTGAAGAATACATTTATTATTCCCTCATCATGAAAATCATTAAACCATTTTAAATTTTAGACAAATGGAAGAGAATATTGTTTAATAGGAGAATATAAGTGCATTTAGCTAAAAGATCAGACATCAAAGGGGAGAAAGGTATGGAAACACCGGAAACAAGATTTTGTAAGAGTCAAAGGTTGTGAAAACCAGCAGGGTGTTCCCGCTTGACACGAACAACCA >strand - ykgA similar to hypothetical protein TGGTTGTTCGTGTCAAGCGGGAACACCCTGCTGGTTTTCACAACCTTTGACTCTTACAAAATCTTGTTTCCGGTGTTTCCATACCTTTCTCCCCTTTGATGTCTGATCTTTTAGCTAAATGCACTTATATTCTCCTATTAAACAATATTCTCTTCCATTTGTCTAAAATTTAAAATGGTTTAATGATTTTCATGATGAGGGAATAATAAATGTATTCTTCACACTTTAGGAGGAAA >strand + ykjA similar to hypothetical protein GTATTGCGCCTCCAATCGGTGTCACTTTCTATTATGTTATGTTCAGGCGATATGCACGTTCGCCTAGACACTGATGCATAGAGGGCCTGCCACGGGGCACAATAGGCTCAATAT >strand - ykzH ykzH ATATTGAGCCTATTGTGCCCCGTGGCAGGCCCTCTATGCATCAGTGTCTAGGCGAACGTGCATATCGCCTGAACATAACATAATAGAAAGTGACACCGATTGGAGGCGCAATAC >strand + ykzA alternate gene name: yzzE; simi TCACCAACTCTTTTTTGTCATTCGATTCTATTGTATCTGAAAAACCTCACTTTTCCTATTCTCGGCAAACATAGCATGTTTAAAAAGATCAGAAAGGGAAATATAACAACTA >strand - ykmA similar to hypothetical protein TAGTTGTTATATTTCCCTTTCTGATCTTTTTAAACATGCTATGTTTGCCGAGAATAGGAAAAGTGAGGTTTTTCAGATACAATAGAATCGAATGACAAAAAAGAGTTGGTGA >strand + ispU alternate gene name: ykoB AGCCCTCCTTTTTTATGCTTGCAAGTGGATTCAAGGGTGTTGCCTCATATTATATGAAAAAAGCAAGCGCTCGGACAGAGCCATTTGAAGCATTTTGAAAAAATAGGATACACACCAAAGAATTAGTAATCTTCAAAATATTGGATGGCTTCAACATGTATGACTGGGATTCAACTGTAAGTCAAGTTATAA >strand - ispA intracellular serine protease TTATAACTTGACTTACAGTTGAATCCCAGTCATACATGTTGAAGCCATCCAATATTTTGAAGATTACTAATTCTTTGGTGTGTATCCTATTTTTTCAAAATGCTTCAAATGGCTCTGTCCGAGCGCTTGCTTTTTTCATATAATATGAGGCAACACCCTTGAATCCACTTGCAAGCATAAAAAAGGAGGGCT >strand + ykoG similar to two-component respon CGACCTCCTGTTTTTTTGGTAAAATAAAAAACTGCATATCCTAAGGGAATATGCAGTCATAGATCGATTAAGCGCACAAAAAAATACACCATTCGGTCAACCTGCACTTCCCCACGCTGGTATTATCCAGATCGGGTCATAAGGGATCAAAGCGCGTTTCTTCCGCGCTTTATCTCAGCCAAAACAGCACCCCTAGTGCTTTCCGTATACAATTTTTTATTTTAACATCATAATAGATGATAAGAATCATTTTGTAAACAACGAAATCATTTTTTTCATA >strand - ykoF ykoF TATGAAAAAAATGATTTCGTTGTTTACAAAATGATTCTTATCATCTATTATGATGTTAAAATAAAAAATTGTATACGGAAAGCACTAGGGGTGCTGTTTTGGCTGAGATAAAGCGCGGAAGAAACGCGCTTTGATCCCTTATGACCCGATCTGGATAATACCAGCGTGGGGAAGTGCAGGTTGACCGAATGGTGTATTTTTTTGTGCGCTTAATCGATCTATGACTGCATATTCCCTTAGGATATGCAGTTTTTTATTTTACCAAAAAAACAGGAGGTCG >strand + ykzB ykzB CACCCCTGGATGTCTTTTGATAATAGTATATAAAAAACAAAGAGAAAAAACCTGACGCTTGTCATATTTTCTAACATCAAATAGAAGATTTTTGAAAAAATACGGAAGAAAAATTAACAAGAATAATGCGTAACACCCAAGCGCGTTTTATCCTTTATGTAACG >strand - tnrA transcriptional regulator CGTTACATAAAGGATAAAACGCGCTTGGGTGTTACGCATTATTCTTGTTAATTTTTCTTCCGTATTTTTTCAAAAATCTTCTATTTGATGTTAGAAAATATGACAAGCGTCAGGTTTTTTCTCTTTGTTTTTTATATACTATTATCAAAAGACATCCAGGGGTG >strand + ykoP ykoP TCCTCCTAAAAAAATATGGCAACTATATTTTGAGGAGAACGTTCACAAAATATAAGGGTGT >strand - ykoN similar to hypothetical protein ACACCCTTATATTTTGTGAACGTTCTCCTCAAAATATAGTTGCCATATTTTTTTAGGAGGA >strand + ykoS alternate gene name: ykoR ATCCCCTCCCTTCTATCAGTATAGTAAGTGATCAAGCCCTATGGATAGCGGGAATTAATGGGTAGAGACAGGTTGAAAAACGGATTGTCATATGATGAATGATTTGGAAGGGAAAAGGGATAATAAATTTTGACACATGAGGCAGAACATGCCGACGGAGTC >strand - ykoQ similar to hypothetical protein GACTCCGTCGGCATGTTCTGCCTCATGTGTCAAAATTTATTATCCCTTTTCCCTTCCAAATCATTCATCATATGACAATCCGTTTTTCAACCTGTCTCTACCCATTAATTCCCGCTATCCATAGGGCTTGATCACTTACTATACTGATAGAAGGGAGGGGAT >strand + ykoX similar to alkaline phosphatase GCCGCCGATCAGCCAAATTTTACTTTTTAAGCCTGTATTGATGGTGACTTTCCTTGAAAGTTCAAGGGAAATATACGAGGCGGTACAGGCAATTAGAATAGATAAACATATGAGAGTCGTATTATATGTAACGTGTATTTCCAAATGGAGATTCCCCCCGTATCGTGTGTTCCAGTCTTGTTGACTCTCTTTATTATCGGCTGCTTAACCGCAGAGTTTCAGCCTTCAAAACGATAGAAATACGAGGGAGATCTATTTTTAGTTCAACTGACCGTGAAAAATGTGTTAAAATCATCTTGA >strand - ykoW similar to hypothetical protein AGATGATTTTAACACATTTTTCACGGTCAGTTGAACTAAAAATAGATCTCCCTCGTATTTCTATCGTTTTGAAGGCTGAAACTCTGCGGTTAAGCAGCCGATAATAAAGAGAGTCAACAAGACTGGAACACACGATACGGGGGGAATCTCCATTTGGAAATACACGTTACATATAATACGACTCTCATATGTTTATCTATTCTAATTGCCTGTACCGCCTCGTATATTTCCCTTGAACTTTCAAGGAAAGTCACCATCAATACAGGCTTAAAAAGTAAAATTTGGCTGATCGGCGGCTCA >strand + ykrL similar to heat-shock protein CCTCTTTAAACAATTTGACGCGGATATAGGTCTATGTATAAGTTCAAAATTCAAGCGTCAGCAAATTACATTAACATCATACGTCGAATTCCTTGCATTTTCAAATGGATGTTTTATAATTTAAATATACGGGTTCTTTT >strand - ykrK ykrK AAAAGAACCCGTATATTTAAATTATAAAACATCCATTTGAAAATGCAAGGAATTCGACGTATGATGTTAATGTAATTTGCTGACGCTTGAATTTTGAACTTATACATAGACCTATATCCGCGTCAAATTGTTTAAAGAGG >strand + ykrQ similar to two-component sensor ACCTTCTCTTTCATCAGTCACTAAGAGATTATTCCCTGCTGGTACATATTTTAAAATAAAAGATGAAAAAGGAATCTAAAAAAATGCTGATTGTTAGGTCGATGTAAAGTGACGGTAAGGATCGTAGAAAATGTCTTCGTAATGTAATGACGCTGAAATTTTCTGAAATTTATAGGGCAAATGAAGGAAGTTCCCGCATTTTGTAGAAATATTGAGGGATAG >strand - ykrP similar to hypothetical protein CTATCCCTCAATATTTCTACAAAATGCGGGAACTTCCTTCATTTGCCCTATAAATTTCAGAAAATTTCAGCGTCATTACATTACGAAGACATTTTCTACGATCCTTACCGTCACTTTACATCGACCTAACAATCAGCATTTTTTTAGATTCCTTTTTCATCTTTTATTTTAAAATATGTACCAGCAGGGAATAATCTCTTAGTGACTGATGAAAGAGAAGGT >strand + ykrV similar to aspartate aminotrans AACACCCCAAATTTTTCTTTACTTTTTTCATTTAACCATATATGATGATGGACTATCATTTCAA >strand - ykrU similar to hypothetical protein TTGAAATGATAGTCCATCATCATATATGGTTAAATGAAAAAAGTAAAGAAAAATTTGGGGTGTT >strand + spo0E negative sporulation regulator TACCTCCTGCCTGGCGGCGTGTTTAAGATATTCTTCACCTGGGTATTGTTCTTCTAATCCTATCAATATATCTATTATACCTGAAAAATTAACTTATTTAATGAAAATATGTTTACAAATAAAGTATAATCTGTAATAATGCACAATAACCCAATCAAACTTGTTTCCTATT >strand - ykvA ykvA AATAGGAAACAAGTTTGATTGGGTTATTGTGCATTATTACAGATTATACTTTATTTGTAAACATATTTTCATTAAATAAGTTAATTTTTCAGGTATAATAGATATATTGATAGGATTAGAAGAACAATACCCAGGTGAAGAATATCTTAAACACGCCGCCAGGCAGGAGGTA >strand + ykvE similar to transcriptional regu GATTAGAGGTATTATATCATTGTTTCTTCTTCTTTAATAAGAAAAAATAACCAAAAAAATTAGAAAAAGATGTTGGCAAGCTTACATTTTAAAAAAGAAGAGAAATATTGGAAAGATTTTTATTGACATTTTGGTCTTTGTTTATTAATGTTTCTCTGTAAC >strand - ykvD similar to two-component sensor GTTACAGAGAAACATTAATAAACAAAGACCAAAATGTCAATAAAAATCTTTCCAATATTTCTCTTCTTTTTTAAAATGTAAGCTTGCCAACATCTTTTTCTAATTTTTTTGGTTATTTTTTCTTATTAAAGAAGAAGAAACAATGATATAATACCTCTAATC >strand + ykvI similar to hypothetical protein TGAAAGTGAATAAAAAGGTCTATTGGGTTTGACCTTCTTTGACTATTAGTATACGGCAAACTTAACGGCATTCAAACCAATATGCTCAAAGCAGACTGAATATTTGACTATCTTTGACCAATTGATTTTTAAAAAAGGCCGGTTTGTTTGACCAACTTTGATTAATAGTATACTCTGACTATCTTTGACTTTCAAGTCTTATGCACAAAAATTTTTGATTTGTTACAAAGTAAAACATGTTTTCTTCGTCTTTTCAAGGAACTTGTCTCATAGGTTATAAAAAGGCATATATGGACAAAA >strand - clpE ATP-dependent Clp protease-like ACAAGTTCCTTGAAAAGACGAAGAAAACATGTTTTACTTTGTAACAAATCAAAAATTTTTGTGCATAAGACTTGAAAGTCAAAGATAGTCAGAGTATACTATTAATCAAAGTTGGTCAAACAAACCGGCCTTTTTTAAAAATCAATTGGTCAAAGATAGTCAAATATTCAGTCTGCTTTGAGCATATTGGTTTGAATGCCGTTAAGTTTGCCGTATACTAATAGTCAAAGAAGGTCAAACCCAATAGACCTTTTTATTCACTTTCATTGGTCAAAGATGATCAAATTATTAAGGAGGTTT >strand + ykvO similar to glucose 1-dehydrogen AGCTCTCCATTCCACATTTATTACTTTTTAGTTACTATATCACAAAAAAGTGGGTACTTCCGATAATGTGCTTACTGAAGGATACTATATCTTGTAAGTTGTTCTTCAAGTACTTAAAGGTACTAGACATAAAATAAA >strand - ykvN similar to hypothetical protein TTTATTTTATGTCTAGTACCTTTAAGTACTTGAAGAACAACTTACAAGATATAGTATCCTTCAGTAAGCACATTATCGGAAGTACCCACTTTTTTGTGATATAGTAACTAAAAAGTAATAAATGTGGAATGGAGAGCT >strand + ykvT similar to spore cortex-lytic e AATACTTAGAACAGCCAAGAAGCTGAAGGTTTCTCATACGTGAGAAGCCTTTTTATCATATATGACAGAACACAAGCCTTTTATTACAGCTGTCTGTAACAGGTGTAAATAAAATGTAAAGTACCAAAGATGGATAAACATGTTTTAATTTAACAATATTTATAGAATTTCAGACTGTTTTTTTGCCATTAAGCAGTGAGTCATCCTCTTTATTCCATCCTGATACCTCCTTGGGAGAGTTGGAAAATCTTTTTCCGTGAAAATGAATGGATCTCTTGTTAAGGTTATGAAGTGTTTTTT >strand - ykvS ykvS ATGGAATAAAGAGGATGACTCACTGCTTAATGGCAAAAAAACAGTCTGAAATTCTATAAATATTGTTAAATTAAAACATGTTTATCCATCTTTGGTACTTTACATTTTATTTACACCTGTTACAGACAGCTGTAATAAAAGGCTTGTGTTCTGTCATATATGATAAAAAGGCTTCTCACGTATGAGAAACCTTCAGCTTCTTGGCTGTTCTAAGTATTGAAAGATAACATCTCCGCCTTGCTGGGCAATTCCTCGGAAATGTTTAAAATCATCTTGTTTAGATAACACGGCCCTGAAATC >strand + mcpC methyl-accepting chemotaxis pro CCTTATGTTGTTTCATTATAACGAAGATTGAATTTTCAGTAAATATATTCCGATTTTCTTATAATTTATTTTTTGATTATTCATTTTTCAAACGAAAGGGCCGA >strand - ykwB similar to hypothetical protein TCGGCCCTTTCGTTTGAAAAATGAATAATCAAAAAATAAATTATAAGAAAATCGGAATATATTTACTGAAAATTCAATCTTCGTTATAATGAAACAACATAAGG >strand + ykuA similar to penicillin-binding p TTCCAGCACTTTCCTTTGACAAATAGAGATTGTCATTTTCGTAAAGTAAATCCGTTGAATCACTAAGAATAAGGGAATCAGCGGTATATATGATTTTATCGTACCGTTACATAGCTTCCTTTTTTTTGAACCGTTAGTTATATTTTAATGTGTTTTTATATTGACAGCTTTTTTTACTCCTTTTCGCTGTAACAACTATTACAAAGGTGTTACAATTAATCTCAGTATATGTAAATATATTGAATGATTTGAGAGGGGTAACCAGAAGTGACTGAAATAGGACGTGAACCAAAGAAAAAG >strand - ykwD similar to hypothetical protein TATTTACATATACTGAGATTAATTGTAACACCTTTGTAATAGTTGTTACAGCGAAAAGGAGTAAAAAAAGCTGTCAATATAAAAACACATTAAAATATAACTAACGGTTCAAAAAAAAGGAAGCTATGTAACGGTACGATAAAATCATATATACCGCTGATTCCCTTATTCTTAGTGATTCAACGGATTTACTTTACGAAAATGACAATCTCTATTTGTCAAAGGAAAGTGCTGGAATCTTTATGACAAAAAAGCTATGTTATGATTTTCCTGTAAAAACAAACAACTCTAGGAGGTTAC >strand + cheV cheV GATACGGAGGATGCTCTTCTTGAAGGTCAGCTGATTCTTTCTTCATATTCAGGTAAGTTTCTTCACTGATATCCTCTAAAACAGTCATTTCGTTTTCTTTAAATATTACTAAGGTACCCATTATTTACACCCTTCTTTGGAAAAATCCTTGACTATTTTATCACTGTTAGTTGGAAAGTCAATGGTTATTAGGAAAAAAGTTTCTTTTTTGAGGAACGCGGCTTGTTGCCTTCGGAATTCGATTTTGAAGAGCTGCCATTCACTTTTTACAAATTACGCCGATATATACAGTACAATACT >strand - patA aminotransferase GAAGAAACTTACCTGAATATGAAGAAAGAATCAGCTGACCTTCAAGAAGAGCATCCTCCGTATCTGATTTGGCACGAAGACCTTCATTTTGATTATGGCTATTAATGAAATTGGCACATATAAACAATAATAGAACATGATACAATAAAGACAATCAAATCGCTGGTTGTCTTCTTTTTTTCTGAAAAGAAAGGTATGTTTAGAAAAAAGGCGCATACAATCGAAATATATCAGTTAAAAAGGTGACAAATGATCAGCTAGTGGTAAAATAGGATAAACTCATTATATTTGTTAAGGTGA >strand + ykuF similar to glucose 1-dehydrogen CCAACCTTTAATAGAGTTTTTCTATTATATCATCTGTATCTGATAAAAATCTCATATGAGCATGTGCTGACAGAGGCAGGGATATAATGATAGAATATTATTGAATGAATAA >strand - ykuE similar to hypothetical protein TTATTCATTCAATAATATTCTATCATTATATCCCTGCCTCTGTCAGCACATGCTCATATGAGATTTTTATCAGATACAGATGATATAATAGAAAAACTCTATTAAAGGTTGG >strand + ykuU similar to 2-cys peroxiredoxin CTCCTTTTCTAATTGATTGCTGCACATTGAGACTAGTTACCCTGTACATGACCATGTTAAACGTTTTATTCAAAAATGTCGAAGCCAAAGACATACATGATTTGTTAAAAATTGACTCAGGAAATCTGCTTTTATATAATGGCATTGCAATATAACTAAAAATGGTTAATCACCCTTTATTTTCATGCGATTTTGACCTTTAAGCC >strand - ykuT similar to hypothetical protein GGCTTAAAGGTCAAAATCGCATGAAAATAAAGGGTGATTAACCATTTTTAGTTATATTGCAATGCCATTATATAAAAGCAGATTTCCTGAGTCAATTTTTAACAAATCATGTATGTCTTTGGCTTCGACATTTTTGAATAAAACGTTTAACATGGTCATGTACAGGGTAACTAGTCTCAATGTGCAGCAATCAATTAGAAAAGGAG >strand + mobA molybdopterin-guanine dinucleot CATCTCCCTCATGATGTTAACACTACAATATGCAAATACTGAACAGCTATTCCTCTTTAATAAACAGACAAGCAGTAGCGTTGAGCCTTTGCTTTTGCTATGCTTT >strand - yknT alternate gene name: cse15; sim AAAGCATAGCAAAAGCAAAGGCTCAACGCTACTGCTTGTCTGTTTATTAAAGAGGAATAGCTGTTCAGTATTTGCATATTGTAGTGTTAACATCATGAGGGAGATG >strand + ykpA similar to ABC transporter (ATP CCTCCATGAGAGTGTGTAATTTTCCAGGCGGATGCATTCTCTCTCTTTAATTGTAACACAGCTGGCTGGTTTGGGTGTATAAGTGTTTTCTCTCTGTTTTATCAGGGAATCATTTCTCTTGCCCTGCATTCATGGTATACTTTTATTGAT >strand - ykoA ykoA ATCAATAAAAGTATACCATGAATGCAGGGCAAGAGAAATGATTCCCTGATAAAACAGAGAGAAAACACTTATACACCCAAACCAGCCAGCTGTGTTACAATTAAAGAGAGAGAATGCATCCGCCTGGAAAATTACACACTCTCATGGAGG >strand + abh transcriptional regulator ACCCAATCTCTATCCTAATTTAATATGATTCTACATTTAAAGTTTCTCAGAAATACTGCGATTTTTGGGGGTGTGCGTCGGTAAAGAAATGTTTTTGTAAAAAATCAAGGAACTGTGTAACAGAAGTAATACATTTTTCGGTTTTATGGAAAAAATATCTTGTAATTCTTATAGAAAGCGGGAAACTTTTTCAAAGTTTCATTCGTCTACGATATATTGAGGTACATCCTTAAAACATTACATGAGAAAAGAA >strand - mreBH cell-shape determining protein TTCTTTTCTCATGTAATGTTTTAAGGATGTACCTCAATATATCGTAGACGAATGAAACTTTGAAAAAGTTTCCCGCTTTCTATAAGAATTACAAGATATTTTTTCCATAAAACCGAAAAATGTATTACTTCTGTTACACAGTTCCTTGATTTTTTACAAAAACATTTCTTTACCGACGCACACCCCCAAAAATCGCAGTATTTCTGAGAAACTTTAAATGTAGAATCATATTAAATTAGGATAGAGATTGGGT >strand + ykrA similar to hypothetical protein AATATAAGGAAAGCCTTGAGCAAAATAAGGATCTCTCAAAAAGTGAAAGATCCCTTGTCAGGCAATTGCTTTCTTACGGAGCATTCCGTTCCATTGTCTTCTGAGTTTCTTTCTTAATTTACGCAGCATTAGCCAGTCCCTCCTTTAATGAAAAATTTGTCCCAGAGTTCTTTTATACAAATAGTATACGGAGATTGAGGAAGACTTTTCGTGGTTATTAATGGAATTTAATATGATTTTTGAACGAAATTGTTTGCTGAATGACAATCTTTTTGTATGATGAGGGTGGGAATTGCAGAC >strand - ykzG ykzG GTTCAAAAATCATATTAAATTCCATTAATAACCACGAAAAGTCTTCCTCAATCTCCGTATACTATTTGTATAAAAGAACTCTGGGACAAATTTTTCATTAAAGGAGGGACTGGCTAATGCTGCGTAAATTAAGAAAGAAACTCAGAAGACAATGGAACGGAATGCTCCGTAAGAAAGCAATTGCCTGACAAGGGATCTTTCACTTTTTGAGAGATCCTTATTTTGCTCAAGGCTTTCCTTATATTCCGTTTTCGAATATAATAGAATGAGCGAGTCTAACATACGGCATTTAAGGAGAGA >strand + ykyA alternate gene name: ykrC; simi ACCATATCCGTAACCACCATAACTGCAGCAGCCTCCGCCGTAACCATATCCATACATATTACATAACCTCCTCTTTTAATTACGCCAGAGCCTTTATAAGTAGGCATAGCGTCTAATATAACCTATGTAAAAACAAAAAAGTTGTATGGTCATCTGTCCTGATTTTTCACTTTTTTGATTTAATTTCAGTATTGCATTTATACAGCACGGCTATTGCCAAATGTCCCCATGGATTCTATAGTAGAAATTGTCAAGATTGGGGGATTTAACGTGAAAGACGCTATAAAACGGCGGCCTGGA >strand - ykrB similar to formylmethionine def AAAGGCTCTGGCGTAATTAAAAGAGGAGGTTATGTAATATGTATGGATATGGTTACGGCGGAGGCTGCTGCAGTTATGGTGGTTACGGATATGGTGGCTGCGGATATGGCTACGGCCGCACCTTTGCGCTAATCGTTGTTTTATTCATTCTGTTAATCATTGTCGGAGCTGCTTATTTAGGTGGAGGCTGTTGCTAACCACACAGCATGAAATTGACCAGCCATATACGGCTGGTCATTCGTTTCGTTGCACCTTTGTGAAGAAATCCGATATAATTTACATATTAAGCTTTAGGAGTGA >strand + yktA yktA TCCCACCTTTGTAATCATACCTATGCATAATAGTGTGTATTAAAATCAGTTTCTCAACCGTTTTTTAGACGTAAAACAAATGAATTATATTATGAACTTGAAAGAATGTAAAGAACAATTTATGATCAAATGAAGATGCTTTCTCTTAAGGGAA >strand - cad lysine decarboxylase TTCCCTTAAGAGAAAGCATCTTCATTTGATCATAAATTGTTCTTTACATTCTTTCAAGTTCATAATATAATTCATTTGTTTTACGTCTAAAAAACGGTTGAGAAACTGATTTTAATACACACTATTATGCATAGGTATGATTACAAAGGTGGGA >strand + ykzI ykzI CCTCCTTCATGCTAAGTATACATATTTTATCACAGTTCATACGAGCTTCAAAAAGATTGAGCGCGTTTGAAGTAAGGAGAAATGTGGTAATAAACGAGTAGGTAAACACACAATTTTCAGAATCATGTTGCGCGTAAATAGAACAATCATACGATATAAGAAGTAATCATTGTTGAATGGCAAAATGGGATGCTTTTTTACAAACAAAT >strand - yktB yktB ATTTGTTTGTAAAAAAGCATCCCATTTTGCCATTCAACAATGATTACTTCTTATATCGTATGATTGTTCTATTTACGCGCAACATGATTCTGAAAATTGTGTGTTTACCTACTCGTTTATTACCACATTTCTCCTTACTTCAAACGCGCTCAATCTTTTTGAAGCTCGTATGAACTGTGATAAAATATGTATACTTAGCATGAAGGAGG >strand + ylaA ylaA TCCCCCTTTTTGAAAATACTGAAAACTTTATATTGTTATATTAAACTAGCATTCTGGAATACTCAACAAAAACTAACATAACTAGACAAAATTGATAGTATTGTCCTGTGTTTGTTTTTGTTGAATAATATCAAGATGAGTCAAGTGTCATGTGGTGGTTCATTCAAGAGTTATGTTTTTTACATATCAAGGGGAAGATGAGATATTTTTGTTTCATCTTGAAACTTTTTGAAAAGTCCGCTGTCTAACCGAAT >strand - nprE extracellular neutral metallopr ATTCGGTTAGACAGCGGACTTTTCAAAAAGTTTCAAGATGAAACAAAAATATCTCATCTTCCCCTTGATATGTAAAAAACATAACTCTTGAATGAACCACCACATGACACTTGACTCATCTTGATATTATTCAACAAAAACAAACACAGGACAATACTATCAATTTTGTCTAGTTATGTTAGTTTTTGTTGAGTATTCCAGAATGCTAGTTTAATATAACAATATAAAGTTTTCAGTATTTTCAAAAAGGGGGA >strand + ylaG similar to GTP-binding elongati TATCCCCTTTTACGTCTTATATTAAGTTTACAGAAAAACGTGACGCTTTTAAAGAGGATGTGTGATATAATATGAAAGTTAT >strand - ylaF ylaF ATAACTTTCATATTATATCACACATCCTCTTTAAAAGCGTCACGTTTTTCTGTAAACTTAATATAAGACGTAAAAGGGGATA >strand + ylaK similar to phosphate starvation GTTCCCTCCTTAAAATGCTTTCATTAATTTTATTGTTGAATTAACCTTCTATCTTTATGCACAAAAACATATATTTTAGCAGGCATTCACCTTTTTCATCAAAAGAGAAGCAGGAAGAACTTTG >strand - ylaJ similar to hypothetical protein CAAAGTTCTTCCTGCTTCTCTTTTGATGAAAAAGGTGAATGCCTGCTAAAATATATGTTTTTGTGCATAAAGATAGAAGGTTAATTCAACAATAAAATTAATGAAAGCATTTTAAGGAGGGAAC >strand + ylaM similar to glutaminase ACCCCCTTCTCTCAAAATTATAAAAGTTTTCGCATGAATTGTCACTTTGTGTACAAATTATGTGTATTGAAA >strand - ylaL ylaL TTTCAATACACATAATTTGTACACAAAGTGACAATTCATGCGAAAACTTTTATAATTTTGAGAGAAGGGGGT >strand + ctaB cytochrome caa3 oxydase assembl AATTTTATATACGTTCCAAGTGTAAGCGAATATGTGTTTTGTGGCAATTCCCAGAGCCTTTTGTTCACAAAAATATAATAGAATGACGGCTTCTGGAAAATGTACACTTGATTATTCATGCCGGGTCTGATAGAAATAAGGTAAACGCAATGCCGATTTGGAGATTGCCTTCATCTTTGGTAAAATTCATAAAAAGTTCACAAATAAATTCCTGTTTGTCGTTTAATATTTAATAGGATGTTTTTTTACAAGATGTTATCTTGTATGATAGAGTTGCAGGTTATTTTATTCTCCTGCGTT >strand - ctaA heme-containing membrane protei CAACTCTATCATACAAGATAACATCTTGTAAAAAAACATCCTATTAAATATTAAACGACAAACAGGAATTTATTTGTGAACTTTTTATGAATTTTACCAAAGATGAAGGCAATCTCCAAATCGGCATTGCGTTTACCTTATTTCTATCAGACCCGGCATGAATAATCAAGTGTACATTTTCCAGAAGCCGTCATTCTATTATATTTTTGTGAACAAAAGGCTCTGGGAATTGCCACAAAACACATATTCGCTTACACTTGGAACGTATATAAAATTGCAGCAGTATGTTAAGAAGGTGAA >strand + ylbB similar to IMP dehydrogenase CTCCTTATAAAAATTACCTTTCCTGACAATCATAGTATGAAAGCGTTAAACTTTCAATGAATCTTCTTCTTTTGAAAGTATGCAAAAATTAAATTTTGACGACAATAGC >strand - ylbA ylbA GCTATTGTCGTCAAAATTTAATTTTTGCATACTTTCAAAAGAAGAAGATTCATTGAAAGTTTAACGCTTTCATACTATGATTGTCAGGAAAGGTAATTTTTATAAGGAG >strand + ylbK similar to hypothetical protein CTCCCCGATGCTAAAAATGCCCAATGACATGTATATCATGATAAAATAATGTATGATGTAAAACGGCCAAGCAGTACCTGTCCTCTTGATTCTTTGTACTAATATACGAGCATAGGGGTTCAGTTTAGACCATAGAATTCGGTATACAGG >strand - ylbJ ylbJ CCTGTATACCGAATTCTATGGTCTAAACTGAACCCCTATGCTCGTATATTAGTACAAAGAATCAAGAGGACAGGTACTGCTTGGCCGTTTTACATCATACATTATTTTATCATGATATACATGTCATTGGGCATTTTTAGCATCGGGGAG >strand + ylbN ylbN TCCGCCTTTCTTGCTATGTTTAAAAACGTATGGTACATTTGCAAATGAAAACTCTTACGCCTTATTTTTGTTTTAAGTGTATGCATAGCATATTCCGCTGTCAACGATACTTCAATGGCAGCATTATGCTGTAAAGAAAAAATATTGACAAAAACAATCTTAAAAGCTATAATCATGTT >strand - ylbM similar to hypothetical protein AACATGATTATAGCTTTTAAGATTGTTTTTGTCAATATTTTTTCTTTACAGCATAATGCTGCCATTGAAGTATCGTTGACAGCGGAATATGCTATGCATACACTTAAAACAAAAATAAGGCGTAAGAGTTTTCATTTGCAAATGTACCATACGTTTTTAAACATAGCAAGAAAGGCGGA >strand + ylbQ similar to pyrimidine-thiamine CTCCCCCTTTGTTGTTTCTACATATATTGTAAACGCTTTATTTAAAAAATCCAAATATTTAAACTTTAATTTTAAGCACATGGGATCTTTGAGAAGTAATTTCTTCTTACTTCTGCTATGATAATACGT >strand - ylbP ylbP ACGTATTATCATAGCAGAAGTAAGAAGAAATTACTTCTCAAAGATCCCATGTGCTTAAAATTAAAGTTTAAATATTTGGATTTTTTAAATAAAGCGTTTACAATATATGTAGAAACAACAAAGGGGGAG >strand + yloB similar to calcium-transporting TCTCATTCTTCCTGCATTCGATAATTATAGCATGAAATGGGACGAGTCAGAATAAGCATGCTTATAGAGTGATATCTCTATAAGA >strand - yloA similar to fibronectin-binding TCTTATAGAGATATCACTCTATAAGCATGCTTATTCTGACTCGTCCCATTTCATGCTATAATTATCGAATGCAGGAAGAATGAGA >strand + yloU similar to alkaline-shock prote CCTCCTCACTTGCACAAAACATCTACTTTTTAACATTCATATAATCTTAGATGCTTTCGATATGGATACTTTAAATAATTTACCACAGCTAAAAAACGTTTGCAACTATTGTTTCATCATCTTTCAAGAAAATTCACTTGACATCTTGCGGAAACAGCAGCGCTATAGTATATGTAGAGGTACGGCCAGCGATTATTGCCTGGTCTTTTCATAATGGAAACCATATAGTAGAATAGCTGTAACTCT >strand - rpmB ribosomal protein L28 AGAGTTACAGCTATTCTACTATATGGTTTCCATTATGAAAAGACCAGGCAATAATCGCTGGCCGTACCTCTACATATACTATAGCGCTGCTGTTTCCGCAAGATGTCAAGTGAATTTTCTTGAAAGATGATGAAACAATAGTTGCAAACGTTTTTTAGCTGTGGTAAATTATTTAAAGTATCCATATCGAAAGCATCTAAGATTATATGAATGTTAAAAAGTAGATGTTTTGTGCAAGTGAGGAGG >strand + ylxM similar to hypothetical protein CTCCTTAAAATAGGGTTCATATACAATATCGGAATAAATTGGATGATATTTAGCGTATTTTGGAAAAGTTAATCGCCGCTTTGACAAGATAAAAACTTGACAGTGTCATTAAAACCGTGTAAACTAAGTTATCGTAAAGGGATTTG >strand - ylqB ylqB CAAATCCCTTTACGATAACTTAGTTTACACGGTTTTAATGACACTGTCAAGTTTTTATCTTGTCAAAGCGGCGATTAACTTTTCCAAAATACGCTAAATATCATCCAATTTATTCCGATATTGTATATGAACCCTATTTTAAGGAG >strand + pksA transcriptional regulator CCCTCTTTTCAATACAGTTGTATTATTTAAATAATATGTTACCATCTTTGTTTTTGTAACACAACTGTATTATAAAAATTGCCCTCTCAT >strand - ymcC ymcC ATGAGAGGGCAATTTTTATAATACAGTTGTGTTACAAAAACAAAGATGGTAACATATTATTTAAATAATACAACTGTATTGAAAAGAGGG >strand + ymaC similar to phage-related protei TGTGAGAACAAATAAGCAAAATCTCTATTCTGCAGGCCGGTAACAGCTGTCCGTCTCAGCAACCGGGCAGCCTGTTACTAAAACCCTCCTTAAACCTATATACATTTATTTGCCAAGTGAGGTTGTTCCTACCTTTTTTGTCTTTCCTCTCAACCTTTTTTCACTCTTATTTGAAATGAACCGCTAATAACCGTGACAATCAATTGATTTTTTGAAACCGGGAACTTTTTCACTGATGATATTGTGTATACAATGGTAAGATAAAACCTATTGGCTAAACCTCATATAATGAACCGCGCC >strand - aprX alkaline serine protease CTTTTACTAATCGTTCCGTCTTTCTTCATATATAATGCTGATTGGCATTTGGCTTTCCAAAGATCGGTGCAAGTAGAAATTCATACGGTTGCTCTCCCCTTCCCAGTTTGATATGATCACATTATAATACGAACAAACGTTCTTTTTCAATCCCCAATGATTTCCTCAGCTTCTGAATCTATCGTATCTAATGCGAAAGATTCAGAAAAACAAACTCTTGTCTGTCTCTTACAGAGGACAAACAAACTTACCAAACCTATCATCTGTTCATAGAGTATACAGAGAACCTTATAATAGGAG >strand + ymaF ymaF TCCGCCATACAAATAAGGATTAAATAAAGCGCTTCCTATAAGCCCGCCTGCAAGGCCGCCGATAAACGGTCCTCCAAAAAATCCGAATGGCCGGCCAAACCCATAACCAAATCCGGGTCTTCCAAACCCGTAACCGAACCCGAACGGTCTTCCAAACCCAAATGGTCTCCCGAATCCGAACGGCCTTCCAAATATTCTTGTATCTTCAGGGCTGAAATGCTCTAACTCGTTCATGTTTTCTTTCCTCCTGTAATAAAGTGTAGATGATGCAGTGTATTCTCCCTGCCCGGATTTGATTGG >strand - ebrA multidrug resistance protein CGGGTTTGGAAGACCCGGATTTGGTTATGGGTTTGGCCGGCCATTCGGATTTTTTGGAGGACCGTTTATCGGCGGCCTTGCAGGCGGGCTTATAGGAAGCGCTTTATTTAATCCTTATTTGTATGGCGGATACCCCTATTATCCGTATGCCCCGTTCCCTTTTTATTATTAAACGCAAAAGACCCCTTAAAAAGGGGTCTTTTTAAGTGTGTGAAAATGAATCTCTTGACTCATACATAGGCATACACTAATATTTTTCTAGCTGAAGTTTTTTTTCTAATCTAAAAATATAGTGAGGAG >strand + spoVK alternate gene name: spoVJ; di CACCGCTTTATAAATCGAAAAAAAGAAAAACACTTCAGGTATCTTAGGGCGGGTAATACGTTTCGTACAAAAATCAGCCCGTTGTATGAAAAAGAAATAATGAGAAAGAAGCTCACTATAATACTCTGACAAAATGATTAAAAACAAAATCTCATGGTTTGTCCACCCCATGTCCGTGAATACAATAAGAAATAAAGTATTTCTCGGGAAAGCGCAGGATTCCTTATTGTTCCCGAAAAAAGCAATTGTTTCGACACATGATAGGCGTTTGTCACAATCGGCATCCGCTTGAATATCATA >strand - cwlC N-acetylmuramoyl-L-alanine amid AGCGGTGTCTTTAATGAAAGGAGCTTTCCTCATCAATTTACGTTTTAAATGCTTGATTTAATATGAACGCAAAGCTTAAAACCAGCAGGAATCAGTGCAGGTTAACCGAACCCATAGTACATACAAACATATGGCGTATGCACAGATTTCATGTCGGACCGTATCTGTTGTCGCTTCATGTTGAATTGTGCGCTTTCCCGAGAAATAATTTCTGGATGTGAAAGGGTTATTTCCTCATTTTTCAGTTGAATGATAATCGTACAAGCAGAAGCCGTGTTTTTTCATATCCTGTAATGAGGT >strand + xylA xylose isomerase AATAGGTGAATTTTTAAGGATTTCTTTTAATAATAACTTTTGATTTACTTTTTTGACAAAGGTTTGATCAGCGATATCCACTTCATCCACTCCATTTGTTTAATCTTTAAATTAAGTATCAACATAGTACATAGCGAATCTTCCCTTTATTAAATCTAATGTGTTCATAAAAAACTAAAAAAAATATTGAAAATACTGACGAGGTTATATAAGATGAAAATAAGTTAGTTTGTTTAAACAACAAACTAATAGGTGATGTACTTACTATATGAAATAAAATGCATCTGTATTTGAATGAAT >strand - xylR transcriptional regulator CAGATGCATTTTATTTCATATAGTAAGTACATCACCTATTAGTTTGTTGTTTAAACAAACTAACTTATTTTCATCTTATATAACCTCGTCAGTATTTTCAATATTTTTTTTAGTTTTTTATGAACACATTAGATTTAATAAAGGGAAGATTCGCTATGTACTATGTTGATACTTAATTTAAAGATTAAACAAATGGAGTGGATGAAGTGGATATCGCTGATCAAACCTTTGTCAAAAAAGTAAATCAAAAGTTATTATTAAAAGAAATCCTTAAAAATTCACCTATTTCAAGAGCAAAAT >strand + yncC similar to metabolite transport AGGGAACTTATAACACTCCAAATTGCCAATAGTTAACTTCACAACGAGATGATCAAAACATCACCATCACATAGCGGAAATAAATAATCCTGAAAATAATATTGACTGAAAATGCTTTTAGAATTATGATAATATGAGAAATGACAATAATGTGACAAAGACATTACTACAACATAACAACTAATAGCTTAGGTTTATTCTATTGGTTAAAAATGTAAACGGTTACAAATCCAGAGGCGAAAATCTAAGATAGAGATGACTTTAGTTTTGACTCAATAACCACACTGAAAGAGCTTATTA >strand - yncB similar to micrococcal nuclease GTTTACATTTTTAACCAATAGAATAAACCTAAGCTATTAGTTGTTATGTTGTAGTAATGTCTTTGTCACATTATTGTCATTTCTCATATTATCATAATTCTAAAAGCATTTTCAGTCAATATTATTTTCAGGATTATTTATTTCCGCTATGTGATGGTGATGTTTTGATCATCTCGTTGTGAAGTTAACTATTGGCAATTTGGAGTGTTATAAGTTCCCTTGAAGTTTTCCATTCACAAAAAATGTCCATATCGTTATAATGATGATGTTTAATCGCGAAAAGAGAAAATGATAAAGAGG >strand + yncE yncE CCTATCAATGTTCGAATAATAGCAAGTACGGGATACATGAGGCTTGTAAGTGGGGTGTTAATGGATGCGGCGACCGAGGATTACAAAAATGATTATTTGTTTTGGGACATTTTTTTCTAAGGGTAGATGATAACGAGAAATCTAATCAGGAAATACTTACCGTTGTTATGAACATTGAAGAACCTTTTAAATGAGAAGGGCTGAGGAAATTCTGTAAAAGATAATCGGTTGAAAAGGCAGTCGATTCCTTGTTCCATAAACCTAACATCAGAGTATATAATTATACTCGTATTCACAAAA >strand - yncD similar to alanine racemase AACAACGGTAAGTATTTCCTGATTAGATTTCTCGTTATCATCTACCCTTAGAAAAAAATGTCCCAAAACAAATAATCATTTTTGTAATCCTCGGTCGCCGCATCCATTAACACCCCACTTACAAGCCTCATGTATCCCGTACTTGCTATTATTCGAACATTGATAGGCACCTTCTCGTATCACATGGATTTGCTGTTGAATGATTTCAAATAAAGTTGAATAAAATAAGATCGAATCATCCAACAATTAACATTTATTCTTGCGGCAATAGGTGGTATTTATAATCCATAAGAAGGAAGG >strand + thyA thymidylate synthase A GTGTTTAGACAATTTCCAATTTGGTATACACAAACACCTGACTATTTGAATTTTTATGTACCGCAATATCAAACCATTTCGTATAATCCTCAACAATGTTATCAACGGTGTATGTACCAAACTGGCGGTAACTATGAGCTATGTGACAGACTATGTTATGGAGAAATACAGGTGTAAAAGAGGGGGATTAACTCCTCTTTAAACACACAGTGAGTGGAATAAGATCCTCACTTTATCTGCAAGTGCTTAGTATTTGCGATAATATTGCATTCGTAATAAATTATGCTTAGCAACTGAAAA >strand - ynzH similar to spore coat protein CTAACTCTAAATCCTTCAAAAGCAGTGTGAAAAATCACATCATAGAGCTCTTGAATCTTAACCATAACGTTCAAATATTCTTCAATGAGAAAGTCCCTTTTTACGTATACTAATCTCTCTATACACTTTACTATTATTTTACAGTGATTGAGTGCTCTCTCTCGCTCTTATTTTCATAAACAGCTATCCGTTTTGTTTTTCAGAAAGTGTTTTAAACAGCCTCTTTTTACTATTCTTATTTCATTTGACCATACCATATCATTTAAACTTCATAAATTAAATTCGAATAACATAAAGGAG >strand + yndA yndA CCCCCTGATTGAAGTACAACCTTTTAAACAAATTATAAGAGATAGCAGTACAAATGGGAATGCATTTATTTTGAACATTTTAAAACCAGCACAAAGACGGATTCATAAAACGGCTGCTGCTGAATATAATGTCCAATGCCACCTGCCTAAGAAGCAGGAGGCAGGCTGGATCAATTATGTAGCTCTGCCGGAAATGATACAAACGTAGCGGCTATCATTG >strand - ynzA similar to hypothetical protein CAATGATAGCCGCTACGTTTGTATCATTTCCGGCAGAGCTACATAATTGATCCAGCCTGCCTCCTGCTTCTTAGGCAGGTGGCATTGGACATTATATTCAGCAGCAGCCGTTTTATGAATCCGTCTTTGTGCTGGTTTTAAAATGTTCAAAATAAATGCATTCCCATTTGTACTGCTATCTCTTATAATTTGTTTAAAAGGTTGTACTTCAATCAGGGGG >strand + ynzB ynzB CCTCTTGACATATGATGTTTTTTCAATTCAATCATATAATAACTAATAAGATCATGTCCATAGCGTGCATTATTTATGCTAGAGCTGCTGTTGAACAGCGTGTCAGTCGCCAGTCTGATCTGTGAATGGGAGAATTCTGCATGCGCGAAAAAGAGAGAGAGGTGTATGTTGTCATTTCGTTCATTCTTGTTTGCAGCCGCAACTGTTATGTTTGAACTGGGATTCCTGTTCAGCCAATTTATTTTGCGATTTTTGTTTTAATAGTAAATGAGCCAT >strand - yndB yndB ATGGCTCATTTACTATTAAAACAAAAATCGCAAAATAAATTGGCTGAACAGGAATCCCAGTTCAAACATAACAGTTGCGGCTGCAAACAAGAATGAACGAAATGACAACATACACCTCTCTCTCTTTTTCGCGCATGCAGAATTCTCCCATTCACAGATCAGACTGGCGACTGACACGCTGTTCAACAGCAGCTCTAGCATAAATAATGCACGCTATGGACATGATCTTATTAGTTATTATATGATTGAATTGAAAAAACATCATATGTCAAGAGG >strand + yndN similar to fosfomycin resistanc TTTAAACAGTCCGTTTTTGTTTACACTGGTATTTTTTTCAGCTGTATGAAACTTTCTTATGAAAAAAGTCGTATATGTGGATGATCA >strand - yndM similar to hypothetical protein TGATCATCCACATATACGACTTTTTTCATAAGAAAGTTTCATACAGCTGAAAAAAATACCAGTGTAAACAAAAACGGACTGTTTAAA >strand + yneA yneA CCTCAAAACGTCGATTTTAAGAAGATTATAGCATGATTTTCCTTACAGTACAAACATAGGTTCGAAAAAACAATTGACAGAAACGTTTGTTCGTATATACTGAAATTATAAAAATGCGA >strand - lexA transcriptional regulator TCGCATTTTTATAATTTCAGTATATACGAACAAACGTTTCTGTCAATTGTTTTTTCGAACCTATGTTTGTACTGTAAGGAAAATCATGCTATAATCTTCTTAAAATCGACGTTTTGAGG >strand + ccdA integral membrane protein ATCCTTTCATACATTAGATCTTCAAAGTGAATGTTTATATTATATCCATATTTATACTAATTTTCAAATGAGAAGGAGTATAATGTCTCAATGTCAAATTAAATTATATAAGTTTTTTCTGAAAATAGAAATCCTTCAAGTGAAAGTGTTAAAAAAATGAAATGATTTTGTCATAACTTGAAGGAATGT >strand - ynzD ynzD ACATTCCTTCAAGTTATGACAAAATCATTTCATTTTTTTAACACTTTCACTTGAAGGATTTCTATTTTCAGAAAAAACTTATATAATTTAATTTGACATTGAGACATTATACTCCTTCTCATTTGAAAATTAGTATAAATATGGATATAATATAAACATTCACTTTGAAGATCTAATGTATGAAAGGAT >strand + citB aconitate hydratase CCTCAATCATAATATGGCTTCTTTTATTGTGTTATATGAGAGAGCGTAAACAGTAAAAATTACCGGTACATTTTTCTCATAAGTCGAACTTATTGTATTTAATAAAAACATTGATATTTACTTATGTATGATTTTGTTTTAATATGAAATTGTGAGAAAATTGTGATGGAATGTAATGTTCATCTGAGAATGATCTG >strand - cotK spore coat protein CAGATCATTCTCAGATGAACATTACATTCCATCACAATTTTCTCACAATTTCATATTAAAACAAAATCATACATAAGTAAATATCAATGTTTTTATTAAATACAATAAGTTCGACTTATGAGAAAAATGTACCGGTAATTTTTACTGTTTACGCTCTCTCATATAACACAATAAAAGAAGCCATATTATGATTGAGG >strand + yneT similar to hypothetical protein CTCCTTTTTCAAATTCCCTTCTTTTTATTTTAACATGAGATGTCATGATCTGATGATAAGTTTTTTTGTAAAGTTTGCTTACAAATCAAATCAAACATGCATCATATTTTGATTTCTCCTGTTTTGATCGTGTAAAATA >strand - yneS similar to hypothetical protein TATTTTACACGATCAAAACAGGAGAAATCAAAATATGATGCATGTTTGATTTGATTTGTAAGCAAACTTTACAAAAAAACTTATCATCAGATCATGACATCTCATGTTAAAATAAAAAGAAGGGAATTTGAAAAAGGAG >strand + alsT amino acid carrier protein AATAAAAACATATTTTAAATGATATGTTGTTTAGGATTTATTACTTTATTTATGTGTAAATCAAAGAGAAAGGCAAGAGAATGGGAAGAAATGATTACATTGTTAATAGGGTACTCTTTTTTTAAAGCCAGTTTGCAAGGAATGTTTTTATTCGTAGGAAAATAATAGAGAAGGGGTCGACAGAGTAACATCTGGCTATGACAATATTCTTCTCGAAAAAACTTCCTAATTCGTCATATTGTGATATAATAAAACTCGTTATGTTAAAAAATCTAACATCAAAATCGAATTCGTATTGAA >strand - ynfC ynfC TGTCGACCCCTTCTCTATTATTTTCCTACGAATAAAAACATTCCTTGCAAACTGGCTTTAAAAAAAGAGTACCCTATTAACAATGTAATCATTTCTTCCCATTCTCTTGCCTTTCTCTTTGATTTACACATAAATAAAGTAATAAATCCTAAACAACATATCATTTAAAATATGTTTTTATTCACACCATAAACCGGGTATACATACGCTAATAAAAGAAGTTTTCTCTTTCAAAAATAAGCAAAAAGATATATACTTAACAAAGTTAAATTTATTTTTAAATTTAATAAATCAGGTTTA >strand + yngA yngA CATTGAACGGAGATTACATCATTATCCAAGATATCCCTTTATCTAACATCCTACATCATTAAAAAAATTGTGTATAGAAACTTTTGGAGGAGTTAGTGTTCGATTCTACAAAGGAACTGGATACATTTATTCATTGATAAGATGGGGCTAACACTCCGTTTATATTTGTATGTTTTCAGACACAAAAAACAGCCGTTCAATGCGGCTGTTTTTTTCAAGAGTATAAGACTATTGAGCGGTATACCCGCTTTTTCATGTTTTGTTCATATTTTATTAATTGTTTATTCAACACTGTCCGCT >strand - xynD endo-1,4-beta-xylanase (xylanas GGATATCTTGGATAATGATGTAATCTCCGTTCAATGTGATTAGTGGGAGGCCATGAACAATCCTGTCTTTTTATTTTCTGGACAATGGTCGCTAGGAATGGGGAAGTTTTTTTGAACAATTTTGAAGGAGTGGTGCGATCTCCTCGAAGAAAAGAGAGAATGGAGACACATGAAGATGAGCTTTCAGAACAACTGGGAACATTGCTAAACAGCTTCCGGCTACTTTATGCGTGCAATCTTCGTGACCTGTTGTTATCAAATTCCAAAAGAAAATCCCCCATTTTAAGGAAAAAGGAGGAT >strand + yoeB yoeB ACTCCTTTATTTGAAATCGTTTTTAAACATGCCCTCATTTTTTGACAAAAACAGGATATGTAACATGTTATTATTCATCACTGCACTTATGAAACCCTTCCCGATTCTTCTAATTTTACACATCTTTTTAGCTTATGAAGAATAAACTAGCAAGTAATGGCGTATTTTTTGTGAGTAAAAGAATAAAAAATAATTTATGCAAAAGTATTGTAATCTATCCGTAATTATTGTAACATTTGTAACATAAGAGAAAGA >strand - yoeA similar to hypothetical protein TCTTTCTCTTATGTTACAAATGTTACAATAATTACGGATAGATTACAATACTTTTGCATAAATTATTTTTTATTCTTTTACTCACAAAAAATACGCCATTACTTGCTAGTTTATTCTTCATAAGCTAAAAAGATGTGTAAAATTAGAAGAATCGGGAAGGGTTTCATAAGTGCAGTGATGAATAATAACATGTTACATATCCTGTTTTTGTCAAAAAATGAGGGCATGTTTAAAAACGATTTCAAATAAAGGAGT >strand + ggt gamma-glutamyltranspeptidase CCCTTCTTAATCCGTATGCTGATTCTAATATAGCACATGGCTCATATCAATATAATCAATTCTGCACAGAAAAACGGCATTATGCACTATATAATATACCATTTGTCACTTGTGAAAACGCTGTAATTTTTTTACGCTAAGATTGTAACAATAC >strand - yoeD yoeD GTATTGTTACAATCTTAGCGTAAAAAAATTACAGCGTTTTCACAAGTGACAAATGGTATATTATATAGTGCATAATGCCGTTTTTCTGTGCAGAATTGATTATATTGATATGAGCCATGTGCTATATTAGAATCAGCATACGGATTAAGAAGGG >strand + yogA similar to alcohol dehydrogenas ACCTGCTTGCTATGATGAAAAGTGATATTGGCTATCATATTTGTTCATTTTACTCGAAGAAAACCGCTATGTAAAATATGTAAAGATATGAAAATAC >strand - yofA similar to transcriptional regu GTATTTTCATATCTTTACATATTTTACATAGCGGTTTTCTTCGAGTAAAATGAACAAATATGATAGCCAATATCACTTTTCATCATAGCAAGCAGGT >strand + gltC transcriptional regulator (LysR TCCCCCGATCAATTTCCGATAATACCGGTCATAAAATCTAACAACTCTATAATCATTGTAGGTTTTCAAAACGATATAAACAATATATAATTTAGATCAAAAGAATCTCAAAATGA >strand - gltA glutamate synthase (large subun TCATTTTGAGATTCTTTTGATCTAAATTATATATTGTTTATATCGTTTTGAAAACCTACAATGATTATAGAGTTGTTAGATTTTATGACCGGTATTATCGGAAATTGATCGGGGGA >strand + yoxC yoxC CCCTTTCTTTGTTTACATATGTACGATATCACATTATAAGTACAATACATCCAATACGCAAATCAAACGAACTGTGTAAATATCATTTCTTCTGATTAAAAAAACGGATACAGGGTAATG >strand - yoxD similar to 3-oxoacyl- acyl-carr CATTACCCTGTATCCGTTTTTTTAATCAGAAGAAATGATATTTACACAGTTCGTTTGATTTGCGTATTGGATGTATTGTACTTATAATGTGATATCGTACATATGTAAACAAAGAAAGGG >strand + yoaE similar to formate dehydrogenas TCTCTTTCCGAAGATGGAAAGAGATTCATATATAGGCGCGCTCTGTTACAGGCAGACGCCGGTATAAACCTTGCTTTCCTATCTTTCAAGCTAGCGCTTGCTGGAATTGGCACAATAATGTTGCCGAGGTTTCATAGGGCCAGTCCCTCCACCTCTCTGGATAGAAAATATTGAGTATGTAATTTTCAGAATGAAGAATACGTTTATCATAAAGAAGACTCAGATTGATGTCAATCTCTTTTTTTCAGAATTGACGTTATCATCGGAAATGTTATAATTCTTACAAAATATCT >strand - yoaD similar to phosphoglycerate deh AGATATTTTGTAAGAATTATAACATTTCCGATGATAACGTCAATTCTGAAAAAAAGAGATTGACATCAATCTGAGTCTTCTTTATGATAAACGTATTCTTCATTCTGAAAATTACATACTCAATATTTTCTATCCAGAGAGGTGGAGGGACTGGCCCTATGAAACCTCGGCAACATTATTGTGCCAATTCCAGCAAGCGCTAGCTTGAAAGATAGGAAAGCAAGGTTTATACCGGCGTCTGCCTGTAACAGAGCGCGCCTATATATGAATCTCTTTCCATCTTCGGAAAGAGA >strand + yoaI similar to 4-hydroxyphenylaceta ATGGGGATCATCAATGGTAAAGAATTCATTGACCGATTGAATAAACTAGAAAACGAAATATGGTATGATGGCGAAAAAATAAAGGGTAACATTTTGAGCATCCTGCATTTAAAGGGATAATCAAACAAAAAGCTCACTTTATGAATTGCAAACAAAGGATGAGTTAATACATGAAATGACTTATTGCCTTCCTGGAGATCATAATCGAATCGGCCTTTCCTATCTCCAGCCTAAAACAAAAAATGATTTGAAAAAAAGAAGGACCATGATTGAACATTGGGCTAGACATACTCATGGAAT >strand - yoaH similar to methyl-accepting che AAAACGAATAAGAGTGACTTACATACCGCAAAAGACTGCAAATTTTACTTAAGGTTTTTCTGTAAGTTCAGAGACGTAAAGAATTTGCTTATTCATATAAGAATATCACTATAAAACTTGATAACACGTGTCGATATTATGGACATGAGCGTAAGCGAATTGGATTCAAACGCGCTTGAACATATTTTATTTCAGCCATTACATTGTGCGGAAAGGGAAAACGACACATGAAAGTGAAAACAAAATTGCTGGGGATTATATCAATATTAGTTGTTTCAATTATTGGAATTGGAGGCTCCT >strand + pelB pectate lyase CAATCCTTTCTTCTATTTCAATTGTTAGTATGTCTTTTGGCGCAATCTCTATTTTTTAAGTAACCCAACATTAATTAGGAAGATAGAACCTATTATTCCTTTCCATTAACTGTAATAATAATCCCCGCAAGGAAATGATTAA >strand - yoaK yoaK TTAATCATTTCCTTGCGGGGATTATTATTACAGTTAATGGAAAGGAATAATAGGTTCTATCTTCCTAATTAATGTTGGGTTACTTAAAAAATAGAGATTGCGCCAAAAGACATACTAACAATTGAAATAGAAGAAAGGATTG >strand + yoaQ yoaQ TCCCCCCTTTAACATAATTACATGGAATTCAACTTCACACTTTACATGCCTAATAGAATAAATACCCGTTCCAACTTAAATACTTTTTCTTTTCCATTTTGCCAATTAACTGTAAAACCCTTCTTTTATTCAGATATTATCTTGATTCCTATATCACCCATTTCAAATCCAAAGTGTGTTCATCTTTGAGGATTAGATGCACTGCTTATGCTATAAATACAGTACACGAAGTTTTTTATTTGAAGTAATATTTGTTACGGGACAGCAATAGTAAATA >strand - yoaP yoaP TATTTACTATTGCTGTCCCGTAACAAATATTACTTCAAATAAAAAACTTCGTGTACTGTATTTATAGCATAAGCAGTGCATCTAATCCTCAAAGATGAACACACTTTGGATTTGAAATGGGTGATATAGGAATCAAGATAATATCTGAATAAAAGAAGGGTTTTACAGTTAATTGGCAAAATGGAAAAGAAAAAGTATTTAAGTTGGAACGGGTATTTATTCTATTAGGCATGTAAAGTGTGAAGTTGAATTCCATGTAATTATGTTAAAGGGGGGA >strand + yoaS yoaS AGTATGAACAGCCGCTTATCCTTTGATGTATTTCTTCCCATCGTTTGTTTGAATCTTTAGACGTTGATAAAAACCTTCATAAAAGAGCTTAGCTTTATGTTCACACCGATCCCTGAGCTCACTGAGGCACACGGTTCATTCAGTCGGCAAGTTTGAAGTAGTTCAGCATCTCCCAGTGATGAGGGATGCTTTTTTGTCTTGTCAAAAATGAAAAGGGTCTCCTATAACAACAAAAAATCAATCCATTTCAATATTTTTTTATTGAGAAACGATAAATTTTTTGTTACATTCAATTAGACA >strand - yoaR yoaR ACAAAAAATTTATCGTTTCTCAATAAAAAAATATTGAAATGGATTGATTTTTTGTTGTTATAGGAGACCCTTTTCATTTTTGACAAGACAAAAAAGCATCCCTCATCACTGGGAGATGCTGAACTACTTCAAACTTGCCGACTGAATGAACCGTGTGCCTCAGTGAGCTCAGGGATCGGTGTGAACATAAAGCTAAGCTCTTTTATGAAGGTTTTTATCAACGTCTAAAGATTCAAACAAACGATGGGAAGAAATACATCAAAGGATAAGCGGCTGTTCATACTAATGATTGGGAGGTAA >strand + yoaV similar to hypothetical protein ACCCCCAATAATAAAAATGTATTTCAGTTATACATAAATGTAACATATGATAGAAACCAGCACGATATTT >strand - yoaU similar to transcriptional regu AAATATCGTGCTGGTTTCTATCATATGTTACATTTATGTATAACTGAAATACATTTTTATTATTGGGGGT >strand + penP beta-lactamase precursor ACTCCAATCACAATTGATAAAGCTTTCTAATAGATGACTTTGCTGACAGTCATTGTCAGCTTTTTTGTTGATTCTATTCTTATGCCCTTGTTCTCCCAACTTATAAAAATATAAGATCAATTGATTCTATGTTTACTAGCGCAAAGGGGTTTGTTACACTAACTGTCATGGGAAACATTTCAAAGGTAATACAAA >strand - yoaZ similar to hypothetical protein TTTGTATTACCTTTGAAATGTTTCCCATGACAGTTAGTGTAACAAACCCCTTTGCGCTAGTAAACATAGAATCAATTGATCTTATATTTTTATAAGTTGGGAGAACAAGGGCATAAGAATAGAATCAACAAAAAAGCTGACAATGACTGTCAGCAAAGTCATCTATTAGAAAGCTTTATCAATTGTGATTGGAGT >strand + yobB yobB ACCCATCTACTTCTTTTTGCAATTCATAACAGCAGCATACTCATTCTCCAATTCCTCAACCTTCTTTTTATTCTTTCTCAAATATTTGCATGTGGTAGCCCTCGATATTATCCGCTTACTCATTTTTGCGCTTTCAAATATATTATTGAAGGCCGCTTCTTTCTGGATTCCAGCATAAATATACTTATCCTTACTGTCGTTTACTATCGCAACTTCCATTAAAATCGAGATTTTTACTTTTTTTATGATCCATTATTCACTTCCAAAATAACTTCCAATTACAACATAATATATGTATAA >strand - yobA yobA AATGGATCATAAAAAAAGTAAAAATCTCGATTTTAATGGAAGTTGCGATAGTAAACGACAGTAAGGATAAGTATATTTATGCTGGAATCCAGAAAGAAGCGGCCTTCAATAATATATTTGAAAGCGCAAAAATGAGTAAGCGGATAATATCGAGGGCTACCACATGCAAATATTTGAGAAAGAATAAAAAGAAGGTTGAGGAATTGGAGAATGAGTATGCTGCTGTTATGAATTGCAAAAAGAAGTAGATGGGTTGGGTGATATCCTGTTGTTTATTTTAAAGATGAGTTTAAAGAGGAG >strand + yobD similar to transcriptional regu ATAAATGCGATAATAGAGGGGATACCAGTCCAGCAAAACAAAATATATAGAATACCTTGTCCTACTCTTCCCAAATAAAACTTATGTATTCCCAGACCCCCGAAGAAAAATGCTAATAATGCCGCTACAATTTTGTTCTTCTTGCTTACCATGACTTCCTCCTAGTTCATTGCTACCATTTTTAAAGTAATGCTTTCACATTATAATGCATTTTCCGCCAGAAATCGTGAAAATTTTGAAATTCTTTTGAGATTTTTTATTTTATTGAGAAATGAAAGCTCATATAATTGAACTAGATGG >strand - xynA endo-1,4-beta-xylanase TCTCTAATGTATATCTCCTTTATTTGAAATGGTTCAATGGGAATTCATTGCAGATATTACTCCATCTGAATTAGAAACAAGAATTGTGATCCTGCGAAAAAAGGATCAGGATATGGTGGAAGAAGACCACTTATAATGTGTTTCTAGGTATTTGTAATTGAATTACAAATACTTTTAATATTTGCTCATGAATTCGTGGTATTATACTGAAGGGGACGATCAAAAGCTTTGGCGTTAGTAATTAAGAATGTTTTAAATGTATACGAGTGCTACCTCAAAGTCGGAAAAAATATTATAGGA >strand + yozI yozI TTGCCTGCAGCAAACAGCAAATCGTTCAAAACTGTGTACGGTTCACCATTCCTTGCAACTGTCTCAATCCCTTTGTATAAGTATAATAAAAAGACAAATGAATTTTTATCTGTGGAACGCTCAGGTAATAAGGACACTATAATAACTTACTTTAAGCCAAAACATCCAAATAAAAAGGCTGGTTATAATTATTATAAAAATCAATAGGAACTAT >strand - yozH yozH ATAGTTCCTATTGATTTTTATAATAATTATAACCAGCCTTTTTATTTGGATGTTTTGGCTTAAAGTAAGTTATTATAGTGTCCTTATTACCTGAGCGTTCCACAGATAAAAATTCATTTGTCTTTTTATTATACTTATACAAAGGGATTGAGACAGTTGCAAGGAATGGTGAACCGTACACAGTTTTGAACGATTTGCTGTTTGCTGCAGGCAA >strand + rapK response regulator aspartate ph GCAATTGCCTTTGCTTTTGGATATGACTTGACTGTATTTGGTGACGATTTAACAGAGAAATTTAATGCTCTTCTGACTTTTTTAACTGCATTGGGCATTATTGTTGATCCAACAACTCAAGGTATTTCTGATAGTGAACAAGCAATGGATTATGATTCACCGAGATAAACCCCATACCCTTCTCTTGATTGAGGAGGGTATTTTTTTCGTTTTTTCAAATGAAAAAATTGCAGAAAACTTTACATCCTTTTACAATTTTTTCAATGACTATGTTATGATTGTTTTCGAGAGATTTTTGAA >strand - yozJ yozJ ATCCATTGTATTCAATAGAAGGCTTGAAGTTTGGTAGGGGGCTAAGCTGATATATGGTGTCATAATCATATTCATCACTTTTTACTATCACTTCAGACGAAGAACTTGTTGCCAAGACAGGAGCAGCAGATACCAGCATACCTAATGATAAAGCAGAACTCAATAAAATTTTTCGAAGGTTCAAATATATCAACTCCATTTACATTTTATATATTAATTATGAGTTAAATGGAAGTTGCTTTGGAAGTGACTAATGGATCATAAATAGTTAAAATCGTGATTTTAAACAAATAAATTAGG >strand + yozM yozM ATTGATAGTTCCTCGATCCCTAAACATATCGACTCACTCCTTGATTGATCTAATATCAACAGAACAAACGTTCCTTATTCAATATAGATCAGAACAAAAGTTCGATGTAAATGTTGGTAATAAAATATAAAGGTCAATAATGATATCCGTAGTATTAATAAAGGAGAGATTCTTTTCG >strand - yozL yozL CGAAAAGAATCTCTCCTTTATTAATACTACGGATATCATTATTGACCTTTATATTTTATTACCAACATTTACATCGAACTTTTGTTCTGATCTATATTGAATAAGGAACGTTTGTTCTGTTGATATTAGATCAATCAAGGAGTGAGTCGATATGTTTAGGGATCGAGGAACTATCAAT >strand + yobN similar to L-amino acid oxidase CTCCTTTTTCGTCTAAAAGACTTTTCAAGCCATTTACAGTCTTATGAACAAAGTTAGTCACATACATCACCCCTTTATTTCCTTTTAACAATAACCTTTCAAATTCCAAAAGATGTGAACAGATCGTGATAGGTGCTCTTTTCAGTGAATATAATGCAATAAATTCTAAAAAGAGGTGCCTCCAAAAATTGAATTCTCTAATGAATGATGACATGGTAAAGATCATTAGAAATGGCCTTAGCGCATCGCAGCATCCAAAGCA >strand - yobM similar to hypothetical protein TGCTTTGGATGCTGCGATGCGCTAAGGCCATTTCTAATGATCTTTACCATGTCATCATTCATTAGAGAATTCAATTTTTGGAGGCACCTCTTTTTAGAATTTATTGCATTATATTCACTGAAAAGAGCACCTATCACGATCTGTTCACATCTTTTGGAATTTGAAAGGTTATTGTTAAAAGGAAATAAAGGGGTGATGTATGTGACTAACTTTGTTCATAAGACTGTAAATGGCTTGAAAAGTCTTTTAGACGAAAAAGGAG >strand + yobW alternate gene name: csk22; sim CCTTTTCTTCTTGTATTTTCCAACAAAACAATCATAACATTTTGATAAAGCTTGGGCATATCTATTATGCGGCGGAATACACCTCGCATATGCTATGAAAACAGGGGTTTCCCCTTCTTTTTTGTGTTCAGTATTTATTTCAGCTACTATTTCCGACCGGATAAGCATACACTTC >strand - yobV similar to hypothetical protein GAAGTGTATGCTTATCCGGTCGGAAATAGTAGCTGAAATAAATACTGAACACAAAAAAGAAGGGGAAACCCCTGTTTTCATAGCATATGCGAGGTGTATTCCGCCGCATAATAGATATGCCCAAGCTTTATCAAAATGTTATGATTGTTTTGTTGGAAAATACAAGAAGAAAAGG >strand + yocA similar to transposon-related p CCTTTCTTTATTCATAGTTTTATCTTATAACAAATGCGCTTGAGTGCAAACCAGCCTGCTTGTATTCCATTTCAATCTCATAGTTTAAATTTATGTTTCAAATCAGGTTTTTACTTGGTCTGATATACCCTCTTATGTCATAATAAAAAAGGTCATACTGCTC >strand - yozA similar to transcriptional regu GAGCAGTATGACCTTTTTTATTATGACATAAGAGGGTATATCAGACCAAGTAAAAACCTGATTTGAAACATAAATTTAAACTATGAGATTGAAATGGAATACAAGCAGGCTGGTTTGCACTCAAGCGCATTTGTTATAAGATAAAACTATGAATAAAGAAAGG >strand + yocC yocC CTCCTTTTTCACAGGTTAGTTTTCTTTTTCCTCTCTTAAAAACGATCAAACCTGACTTTCCGCTAATTTCTAGTAATAATGGAATTGTTGTTTACAATACGCCTCTCTTCTAATACAATTGGTAACTGTGTTTCTAAGCT >strand - yocB yocB AGCTTAGAAACACAGTTACCAATTGTATTAGAAGAGAGGCGTATTGTAAACAACAATTCCATTATTACTAGAAATTAGCGGAAAGTCAGGTTTGATCGTTTTTAAGAGAGGAAAAAGAAAACTAACCTGTGAAAAAGGAG >strand + dhaS aldehyde dehydrogenase CTCCTTTTCTTATCTATAAGCTTATTGTAGCTTTTTTCTTTTGTAAATTGCAATATCGGTTATTCATTGATTTTGCCTTGATTTCTATTATAGAATGTTTGTATACGCTTACGTTTGAAGCAACTATCACAGCTTATTTCTGCTATTCAATCCAGGAATCAATGTGCTATCACCAAGAATCTCAGAACAGACCATAAAA >strand - yozC yozC TTTTATGGTCTGTTCTGAGATTCTTGGTGATAGCACATTGATTCCTGGATTGAATAGCAGAAATAAGCTGTGATAGTTGCTTCAAACGTAAGCGTATACAAACATTCTATAATAGAAATCAAGGCAAAATCAATGAATAACCGATATTGCAATTTACAAAAGAAAAAAGCTACAATAAGCTTATAGATAAGAAAAGGAG >strand + yocS similar to sodium-dependent tra TCCTATTTGATCTAACCATTATAAAGGTTCATTGTAAGAATATCTTCCTATATAGTCAAGAACATTTTAATTAGTTTACATCATATATCTATAGACATCTGCCAAAAACTAATATTGAATTATTCAGAAAATTATTTATAATAAGAGAATGTTATGAAATTTGTAAGAATTTCTGCTGAAGTTCTTGA >strand - yocR similar to sodium-dependent tra TCAAGAACTTCAGCAGAAATTCTTACAAATTTCATAACATTCTCTTATTATAAATAATTTTCTGAATAATTCAATATTAGTTTTTGGCAGATGTCTATAGATATATGATGTAAACTAATTAAAATGTTCTTGACTATATAGGAAGATATTCTTACAATGAACCTTTATAATGGTTAGATCAAATAGGA >strand + yojI similar to hypothetical protein TAAATTTCCAACTTTTCACGAATGCTTATCTTATTGATGCACATTCTATTGGTAAAGGAGGGATGTAAAGAATGGAAATCGAAAGAATCATTCAGTATATTTGCACCTTTTTAGCGTGATTTCGCTTTCTTTTATTTATGCCCAAATGTTTAGAAAAACGAAACCGACAAAAAAAGAATAGGCCCGGCGTTCATTGCTTTTTATACATAAAATGTTATAATTTCTCTCGTTATG >strand - yojJ similar to hypothetical protein CATAACGAGAGAAATTATAACATTTTATGTATAAAAAGCAATGAACGCCGGGCCTATTCTTTTTTTGTCGGTTTCGTTTTTCTAAACATTTGGGCATAAATAAAAGAAAGCGAAATCACGCTAAAAAGGTGCAAATATACTGAATGATTCTTTCGATTTCCATTCTTTACATCCCTCCTTTACCAATAGAATGTGCATCAATAAGATAAGCATTCGTGAAAAGTTGGAAATTTA >strand + yodA similar to hypothetical protein TTTTGGCGCTTTTCATGCCTAGATGAACAGAAAGCGCTTTCATTAATAACTAAAAACGTCTTGGTTATATGATTGTTATCTTTTGATCAACTAATATGTTCATACTAATCTCCTAATCAATGACTGTCAATCGTTTTTATGAGCTTGTCTCGATTACCTACCGCCTAACCCAATGATTTATTAGTTATATGTTGATTAACTTTTGGTTATATGTTATGTTTTTTAAAGTTGACTGCCAATCATTATATCTCTCCATTGTCTGGAAGCAAATGAGACAATATGAGAACAGACGGATAACAA >strand - yojA similar to gluconate permease CTTCCAGACAATGGAGAGATATAATGATTGGCAGTCAACTTTAAAAAACATAACATATAACCAAAAGTTAATCAACATATAACTAATAAATCATTGGGTTAGGCGGTAGGTAATCGAGACAAGCTCATAAAAACGATTGACAGTCATTGATTAGGAGATTAGTATGAACATATTAGTTGATCAAAAGATAACAATCATATAACCAAGACGTTTTTAGTTATTAATGAAAGCGCTTTCTGTTCATCTAGGCATGAAAAGCGCCAAAACATTGTCGCGAGAATCATATAAAACACTTTTGGA >strand + yodC similar to nitroreductase ATCCCTTCATCTATTGATTGACATGTATTCAATGATCCATTACAATAATCTTTCAAGTTACATTTAGTAAGCTTCTAACTTTTCTATTATCATACCACG >strand - yodB similar to hypothetical protein CGTGGTATGATAATAGAAAAGTTAGAAGCTTACTAAATGTAACTTGAAAGATTATTGTAATGGATCATTGAATACATGTCAATCAATAGATGAAGGGAT >strand + yodF similar to proline permease GATGCAAACCATGTGTTTTGATTAAATAATTGAAAAAATCATTTTTTTCTAATAATTATCTTGTGATCTTTTCTTACAACTGCTATTATAGTGAGCATTATCATGTTTCAGGAGGAAAGTAAATGGTTAAAAAAGCACTTATTGTTATTCTCATTCTTTTGCCATTCGTTCAGCTCGCGCTTTTGCCGCTTGTGAATCGAATAGAACCGATTATGTTCGGCCTGCCGTTTTTCCACTTTTGGCTGCTGCTGTGGATTATTGTTACGCCGTTATGCTCGTTTGGCATTTATCAGATGCAAA >strand - yodE similar to aromatic metabolite AGCAGCCAAAAGTGGAAAAACGGCAGGCCGAACATAATCGGTTCTATTCGATTCACAAGCGGCAAAAGCGCGAGCTGAACGAATGGCAAAAGAATGAGAATAACAATAAGTGCTTTTTTAACCATTTACTTTCCTCCTGAAACATGATAATGCTCACTATAATAGCAGTTGTAAGAAAAGATCACAAGATAATTATTAGAAAAAAATGATTTTTTCAATTATTTAATCAAAACACATGGTTTGCATCCTTTTCAATAAAGGTTTAAAATAATAAATATCTTGAAATCGAGATAAATAGGA >strand + yodH yodH CACCTTTTTCTTCTTACATTATTATCATGGACGAATTCTTATTCGTCAACTCGGAGCAGAAATGAAAGAGAAACGCCCTTCTTTTGCTTTTCATAAACCTATGTCTTTATACGACATATGAT >strand - ctpA carboxy-terminal processing pro ATCATATGTCGTATAAAGACATAGGTTTATGAAAAGCAAAAGAAGGGCGTTTCTCTTTCATTTCTGCTCCGAGTTGACGAATAAGAATTCGTCCATGATAATAATGTAAGAAGAAAAAGGTG >strand + cgeA alternate gene name: cgeAA TCTCCCTTATCCTTATCTCTGTTTCACAATATGTGCGACTTCGATTTATGGTATAGGCTATGTCCACTCCATTAAAAAATCAGCATTTTCCTTAAATAGATGAAAAATAGATCAGGTACGGCGTTCGACTCATACCAAATAACAGCCGGAAGAATATGAATAACGTGAGTTCAC >strand - cgeC alternate gene name: cgeBA GTGAACTCACGTTATTCATATTCTTCCGGCTGTTATTTGGTATGAGTCGAACGCCGTACCTGATCTATTTTTCATCTATTTAAGGAAAATGCTGATTTTTTAATGGAGTGGACATAGCCTATACCATAAATCGAAGTCGCACATATTGTGAAACAGAGATAAGGATAAGGGAGA >strand + yodU similar to capsular polysacchar ACAGGTGATTACCGTGAGGAAAATGAACCTTTTTTTATAGGGTTTCTGACTCATAAACAACTTATTCCCCAAAGAGGTGGTGAGATGACTTTGCCAGATTTACTTTTGCGGAGCGAAATAAAAATATGTTGAATATTTAGTACATTATTTATTTAGTTTTGTTTTTCCCTAGTTACGTCCGTTTTTCCATTTTCTATTAAAAAAGAACACTTTGTGACATGATCATTCCCCTAAATTTTGTTTTATTGGTTGCTTTCATTAATTCGATTACAAAGTTGAACATATGATGCATGGTAGAGA >strand - yodV alternate gene name: yzxA TAATGAAAGCAACCAATAAAACAAAATTTAGGGGAATGATCATGTCACAAAGTGTTCTTTTTTAATAGAAAATGGAAAAACGGACGTAACTAGGGAAAAACAAAACTAAATAAATAATGTACTAAATATTCAACATATTTTTATTTCGCTCCGCAAAAGTAAATCTGGCAAAGTCATCTCACCACCTCTTTGGGGAATAAGTTGTTTATGAGTCAGAAACCCTATAAAAAAAGGTTCATTTTCCTCACGGTAATCACCTGTATATATTTTACAATAGTAGTGTTAGTGATAAAAGAGGAG >strand + yotL similar to hypothetical protein TCTCCTAATCTATATTGAAGGTAAATTATGTATGTGCTAAAATC >strand - yotM alternate gene name: yodV GATTTTAGCACATACATAATTTACCTTCAATATAGATTAGGAGA >strand + sspC small acid-soluble spore protei CACCTTTCTTAATGAAAAATTTATTTCTTTGGCGTGTATAAATTAAAATAATCTCTCCATAATATGATTCAAACAAGCTTGTTTTCAT >strand - yotB alternate gene name: yodZ ATGAAAACAAGCTTGTTTGAATCATATTATGGAGAGATTATTTTAATTTATACACGCCAAAGAAATAAATTTTTCATTAAGAAAGGTG >strand + yosU yosU CCACCCAGTTTTGGTCATGCATCTTCATGAAGTTAGCTGCTGCCTTCTTGTATCTCAGGTCTTTCTCGTACTCAGCAACTTGTCTCATATGTTCAATAAATTCATCTTCAAGTCCTTGTAACTGAAGAAACCCAAAGCAAAGACCGTTCCTATAAAGCATGTCGTCATATTTCGCTTGTAAATCTCTTTCTTTGCTGTTAATAATCTCTTCCTCCTTAAATATTAGATAAAAGTATTCTTTTATTCGGAAATATTTCCAACAAAAATCTTTTATGGTAAATTAAAAACA >strand - yosV yosV TGTTTTTAATTTACCATAAAAGATTTTTGTTGGAAATATTTCCGAATAAAAGAATACTTTTATCTAATATTTAAGGAGGAAGAGATTATTAACAGCAAAGAAAGAGATTTACAAGCGAAATATGACGACATGCTTTATAGGAACGGTCTTTGCTTTGGGTTTCTTCAGTTACAAGGACTTGAAGATGAATTTATTGAACATATGAGACAAGTTGCTGAGTACGAGAAAGACCTGAGATACAAGAAGGCAGCAGCTAACTTCATGAAGATGCATGACCAAAACTGGGTGG >strand + yosA yosA TCTCCTTATTCAACTCTTAAATTTAGACAAAGTCTTCATTTTTTCTTTTTTAGACTGAAATCTATGTCGAATTTTTAACCTTTGCCATACCATATACTAT >strand - yosB yosB ATAGTATATGGTATGGCAAAGGTTAAAAATTCGACATAGATTTCAGTCTAAAAAAGAAAAAATGAAGACTTTGTCTAAATTTAAGAGTTGAATAAGGAGA >strand + yoqW similar to hypothetical protein TCCTTTTTGATTTTCGTGTATCAAGCCTGTACAATATTCA >strand - yoqX yoqX TGAATATTGTACAGGCTTGATACACGAAAATCAAAAAGGA >strand + yoqT yoqT CCTCCTTCCATTAAAATAAATATTTTATTTTAAATGCTATTTTTCTTTAAACCTTTTTAAATTTCCAAGCTTCATTTCACCTTTATTACTGCAAGCAGGTTGTCATACGCTTGCCATATAAAAGAGACTGGACAT >strand - yoqU yoqU ATGTCCAGTCTCTTTTATATGGCAAGCGTATGACAACCTGCTTGCAGTAATAAAGGTGAAATGAAGCTTGGAAATTTAAAAAGGTTTAAAGAAAAATAGCATTTAAAATAAAATATTTATTTTAATGGAAGGAGG >strand + yoqO yoqO CTCCTTTAAATCACGATTTTAATTTAACTTTAATGTGTATCATCTTGAGAAAATCCCCTTTAATTGGTAAACTCATACTTAAC >strand - yoqP yoqP GTTAAGTATGAGTTTACCAATTAAAGGGGATTTTCTCAAGATGATACACATTAAAGTTAAATTAAAATCGTGATTTAAAGGAG >strand + yoqM yoqM CCACTTATTTTATTTTTATTCTATAGTTTTTTATGATTCCATCCTATAATCCATCTCTCCATTTGCCTTCTCATCTATCACGACATTAACCGACACACAATTTCCATATGTATGGGAGTAATTTCGCTATTTTGCCTTCTTTAAAAACCATATAATGTAATTGCAATACATTA >strand - yoqN yoqN TAATGTATTGCAATTACATTATATGGTTTTTAAAGAAGGCAAAATAGCGAAATTACTCCCATACATATGGAAATTGTGTGTCGGTTAATGTCGTGATAGATGAGAAGGCAAATGGAGAGATGGATTATAGGATGGAATCATAAAAAACTATAGAATAAAAATAAAATAAGTGG >strand + yopS similar to hypothetical protein TCCTATGTTCAATTTTCTGTGATAAACCCTTAAGGTGTTAAGGGAACGTGCGTTCTGTTTTGTATAATCATTATACATAGAAAGTAGGTGTTATGCTAGTGGTTATTTATCGGCTTCGTATGGTATGATATGTATTATGTCTTTCCATAACA >strand - yopT yopT TGTTATGGAAAGACATAATACATATCATACCATACGAAGCCGATAAATAACCACTAGCATAACACCTACTTTCTATGTATAATGATTATACAAAACAGAACGCACGTTCCCTTAACACCTTAAGGGTTTATCACAGAAAATTGAACATAGGA >strand + yonP yonP TCAACATTTTCCCCTCTTCAAATTTACTCGATTTCTTCAATATTTAGATAAAGCAACCACTTTCCCCTACTCTCACTTATGTTTTACGATCGTTTATCCCCTAATCACCATTCGATACAGCAAATCTCCTTATGTTTACTGGCTTTTTAAACTCTTTAATATTTTTTACGATAAGCATGTTTTTAGATTACGATAATCCCCCCTTCTCTAAATGCTTCAGAATTCACCTCTTAACCTGGAGCTATTAACACACTAGCGAAAAAACAATAGACAAATACAAATAATATGTATATAATAAAT >strand - yonR similar to transcriptional regu ATTTATAACGAATCCGAGGGAATCGAAGACATAATTTATTCTTATCCACTTACATTACTATATCATATCTTCATATATAACCCTTGTATTTACTGACCATCTCAGCCTTGAAAGGTAGTCAGCAACTTGTCTTTAACTAAGGTTCTCTTTTTCAAATCAACATCTCCTTCTGTAAACAGTTTGTTTCTACACTGTTAACAATTAAAGAAACCTTTAGTTTCTCCCGATATATTTTATGAAACTTTTTGCTTCTTTTTGTTTACGAAACTATTTGCTTATATAATAATTGTCAAAGTGAGG >strand + yonH yonH CTCCCCTTTAGATATCGTAATACATAAATAGTGTAATCCACTTTTTTATCAGATGTCAACATCCAATAGTGGATTTTTAAACCTAAAGTTAAACTATTAATCTTTTTTTGATAAAACTTTAATTTTATTTAGATTGGAGTCGCATTAATGGTTGACTGCAAAAAGGTTTATTAAAGTAATACAAAAAA >strand - yonI yonI TTTTTTGTATTACTTTAATAAACCTTTTTGCAGTCAACCATTAATGCGACTCCAATCTAAATAAAATTAAAGTTTTATCAAAAAAAGATTAATAGTTTAACTTTAGGTTTAAAAATCCACTATTGGATGTTGACATCTGATAAAAAAGTGGATTACACTATTTATGTATTACGATATCTAAAGGGGAG >strand + yomK yomK GATTCCAGGTGCATATTTGATTGCAGCCCCAGTGTATTAAATACTTACAATTTTGTCCAACCCGAACTCTCTGGCAATATTACTTTAATCATATTGCAAAATAATTTTTTGAATTATTTCAGCACACCTACTCTAACATGATTCTTTTTTGATTTACATAAAAAATCGAAAAATTGAGTCATTTTTCCTTGGAGAATGACTGTGTATAAATCTTAAAAAGGACTATTCATTCATTTTGAAAATTAGAGAAACGTACTCTCAGAAACCTAATTTCCATTAAAATCCTTTGCGTATTTGTCG >strand - yomL similar to hypothetical protein AGTAATATTGCCAGAGAGTTCGGGTTGGACAAAATTGTAAGTATTTAATACACTGGGGCTGCAATCAAATATGCACCTGGAATCAATTGCTAAACGGGGGAGTTTAAATGAACAACAAGAAAAATATCTTTGATATTGTAATGTACATTATTTTCGGTGTGTTAAGTCTTTTTCTAGTTGCAAAAACTGATTATGGCACTGGAGTTTTAGTGTTTGTTGCAATTTTATACCTCGCTGTAATTGCTTATAAAATTAAGCAAGTATTTAGTAATTCAGATTCTTAAGGTGTAACACAAGGAG >strand + yolC yolC ACTCCTTTATTGATCTAATATTAACAGAACAAACGTTCTTCATTCAAGATAAACCAGAACAAAAGTTCGATGTAAATGTTGGTAATAAAATATAAAGGTCAATGATGATTTGCGTAGTATTAATAAAGGAGGGATTCTTTTCC >strand - yolD similar to hypothetical protein GGAAAAGAATCCCTCCTTTATTAATACTACGCAAATCATCATTGACCTTTATATTTTATTACCAACATTTACATCGAACTTTTGTTCTGGTTTATCTTGAATGAAGAACGTTTGTTCTGTTAATATTAGATCAATAAAGGAGT >strand + yokF similar to hypothetical protein CCGCCCTTTCATATCATCTAGAGTTTATATATTTAATATAACAAAAAACCTACCAAATTATTTTATTTTTTCAATTTTTTATTCGATAAAAAAGTAAGACGGGAATCATTTTGTAGAGCAGATCGAAAATGAAAATGCAGCTAAACTAATTACAATGACAGTCGCCAGTTTTATTTCATCACAAGACAAAATTTAAGGAATAATTTAATTCACAAAAATTGGATACAGGATTATAATTATTGATGGTTTACCGATATAAATAGAGAACAAA >strand - yokG similar to delta-endotoxin TTTGTTCTCTATTTATATCGGTAAACCATCAATAATTATAATCCTGTATCCAATTTTTGTGAATTAAATTATTCCTTAAATTTTGTCTTGTGATGAAATAAAACTGGCGACTGTCATTGTAATTAGTTTAGCTGCATTTTCATTTTCGATCTGCTCTACAAAATGATTCCCGTCTTACTTTTTTATCGAATAAAAAATTGAAAAAATAAAATAATTTGGTAGGTTTTTTGTTATATTAAATATATAAACTCTAGATGATATGAAAGGGCGG >strand + yokA similar to DNA recombinase CCTGGATTTATAAAGTATTTACAATGAATTTAGATGAACTAATTATAACCCATGTTCAAGAAGGTTTTTCATAAAATTTCAATCAATTCAATTCCTTCAAATTATTGTATTGTTTTCGTATTCAATGTCAGATAAAATATTCTTAATTAAGTTTCTGTTTTGACATTAAACA >strand - yokB yokB TGTTTAATGTCAAAACAGAAACTTAATTAAGAATATTTTATCTGACATTGAATACGAAAACAATACAATAATTTGAAGGAATTGAATTGATTGAAATTTTATGAAAAACCTTCTTGAACATGGGTTATAATTAGTTCATCTAAATTCATTGTAAATACTTTATAAATCCAGG >strand + ypoP similar to transcriptional regu CTCCTTTGTTAATACGTGGTATTATTATATTATAGCATGTAAAAATAGAATGAAAATGATTCTGCTTTTCAATAAAATCTATAGGTTTTAAGCAATATGCT >strand - yppP similar to peptide methionine s AGCATATTGCTTAAAACCTATAGATTTTATTGAAAAGCAGAATCATTTTCATTCTATTTTTACATGCTATAATATAATAATACCACGTATTAACAAAGGAG >strand + yplP similar to transcriptional regu TCCTTTCTTGTTTTAAATCCCTATTTAATATGCCATTATAACATGAATATTCAAAAAATAAGACAATTATTTTTATTTAATAATTAAAAAAATACAGACTATGTCCACCACATCTTGTGGTAACTTCTGTTCAACCATTATCCTGCAGAGCCAAGAAAAAAGATGATCGTAAAATAAAGGGTGTTTTCAACCAAAAGGCATGATATAATGAAACCAAT >strand - ilvA threonine dehydratase ATTGGTTTCATTATATCATGCCTTTTGGTTGAAAACACCCTTTATTTTACGATCATCTTTTTTCTTGGCTCTGCAGGATAATGGTTGAACAGAAGTTACCACAAGATGTGGTGGACATAGTCTGTATTTTTTTAATTATTAAATAAAAATAATTGTCTTATTTTTTGAATATTCATGTTATAATGGCATATTAAATAGGGATTTAAAACAAGAAAGGA >strand + metB homoserine O-succinyltransferas TGGTGCTGATTATTTTACCTTTTTTAAATCGCAATTCAAAATGTAAACATCAATTAAGAACTCATAAGAAACCTAACGATATGAGCGTCAGTTCATTATCAATTCATATGGCTCGTTTTTTATCTTATCATTCTATAAAACATTACAATTCCCTCTTACATCTTTGAAAATCATTTTACTGTGAAACCAAAAGGCGTGTCAGTGCGTTTTATTTAAAGAGTTTTCTGTTGTTTCCCCCTCAACACGTATAGATAGATTGAAAATTGGCGTGAAAATTCCTATAATGAAGAAAATTAAACG >strand - bsaA glutathione peroxidase TTCATTATAGGAATTTTCACGCCAATTTTCAATCTATCTATACGTGTTGAGGGGGAAACAACAGAAAACTCTTTAAATAAAACGCACTGACACGCCTTTTGGTTTCACAGTAAAATGATTTTCAAAGATGTAAGAGGGAATTGTAATGTTTTATAGAATGATAAGATAAAAAACGAGCCATATGAATTGATAATGAACTGACGCTCATATCGTTAGGTTTCTTATGAGTTCTTAATTGATGTTTACATTTTGAATTGCGATTTAAAAAAGGTAAAATAATCAGCACCAAGGACATAGGAG >strand + ypzA ypzA TCCCCTTCTCTATCAAGATACCTCAAATAGTAAAACGGTCCTATTTATTCTGGTTCAATAGTTATATCGGCTTATTTTTATTTTCTTTTCTATTTTGGTACATAAATTTCAAAAAAACTCTGCAA >strand - degR alternate gene name: prtR TTGCAGAGTTTTTTTGAAATTTATGTACCAAAATAGAAAAGAAAATAAAAATAAGCCGATATAACTATTGAACCAGAATAAATAGGACCGTTTTACTATTTGAGGTATCTTGATAGAGAAGGGGA >strand + ypdP similar to hypothetical protein GTCAAAATAAACAGTAATGTCATCAGGCTCTTGAGCAAGCTCCTCTGTCAGCTTTTCCAATTCCTTCAGCGTCCAGCTGCCGCCTTTCTCGTCTTCGAACTCTAGCTCCTTCATGTATGGAAAACGGCCAAGTTCTCTGGCGAACGTACGTGCCTGCTGTGCTGTCAGCCAGTCTTCGCTAAAAAATGAGACGGATCCCGTTCCTTTTGCTTCTATTTTCAAATGCGGTCTGAGCTTCATTACAATCACCTTACCATTCATTAGTTGCCGGGCTTATGCTATAATCAAACCAATCTTACC >strand - ypeP ypeP GACCGCATTTGAAAATAGAAGCAAAAGGAACGGGATCCGTCTCATTTTTTAGCGAAGACTGGCTGACAGCACAGCAGGCACGTACGTTCGCCAGAGAACTTGGCCGTTTTCCATACATGAAGGAGCTAGAGTTCGAAGACGAGAAAGGCGGCAGCTGGACGCTGAAGGAATTGGAAAAGCTGACAGAGGAGCTTGCTCAAGAGCCTGATGACATTACTGTTTATTTTGACGGCAGTTTCGATAAAGAGAGCGAACTGGCCGGTCTCGGGATCGTCATCTATTATTCATTGGGAGGAACCC >strand + kduI 5-keto-4-deoxyuronate isomerase TCCTGTATCTCATATATTTGAAACCGATTTCAAAACCGTTACAACAAAAGAATACCATAATTTTAAAATTTGGGAAGAGTTTTTTTAATAATATGTCAATTCATTATTTTTTCGTTGACTCTTTTCACTATTTATCCTAAAATTTTCTTTGAAACCGTTACCAAAATATTTTCATTCAAGTGATGCTGAAA >strand - kdgR transcriptional regulator (LacI TTTCAGCATCACTTGAATGAAAATATTTTGGTAACGGTTTCAAAGAAAATTTTAGGATAAATAGTGAAAAGAGTCAACGAAAAAATAATGAATTGACATATTATTAAAAAAACTCTTCCCAAATTTTAAAATTATGGTATTCTTTTGTTGTAACGGTTTTGAAATCGGTTTCAAATATATGAGATACAGGA >strand + ypqA ypqA CTCCCTTCTCATTCACTTCAACCGTTATCCCTTCATTCACCACTATTTAACCACACTTTCAATTTTGCTTCAATTTATAACAACATCTGGCATAGACGCATAATCTG >strand - ypqE similar to phosphotransferase s CAGATTATGCGTCTATGCCAGATGTTGTTATAAATTGAAGCAAAATTGAAAGTGTGGTTAAATAGTGGTGAATGAAGGGATAACGGTTGAAGTGAATGAGAAGGGAG >strand + yppF yppF CCCCCTAAACACAACATATTTCTCTTCTATATATGTATGAATGACTTCTATTGTTTGTGTCACAAGTGATGATTAGCATTTGTCAAATTGTGTCGAAAGTTTAAATTTTTTAAAAACAGAAAAAAGTCGAATCATTATGGTACCATATTTTCAATAC >strand - yppG yppG GTATTGAAAATATGGTACCATAATGATTCGACTTTTTTCTGTTTTTAAAAAATTTAAACTTTCGACACAATTTGACAAATGCTAATCATCACTTGTGACACAAACAATAGAAGTCATTCATACATATATAGAAGAGAAATATGTTGTGTTTAGGGGG >strand + ypjC alternate gene name: jojC; simi AGATACGTTTGACAAATGTTTAAGCACGTTTGCTGTTTTCTTTTTCATCCACTATAATATAAACGCTATCTAATGGAAGGAAAAGGGGTTTTGTGTGATGCTTGGAGAAATTAGATTAAAAAACATATTTTTTATTTTAATCGGAGCGGCAATTTTTTCATTTGGCTTGGTTCATTTCAATATGCAGAACAATCTGGCTGAGGGCGGTTTTACAGGTATTACACTTTTGCTTTATGCCCTTTTCCACATCAGCCCCTCTATATCGAACCTGGTGTTGAACATTCCGATTTTTTTTATTGG >strand - ypjD alternate gene name: jojD TCGATATAGAGGGGCTGATGTGGAAAAGGGCATAAAGCAAAAGTGTAATACCTGTAAAACCGCCCTCAGCCAGATTGTTCTGCATATTGAAATGAACCAAGCCAAATGAAAAAATTGCCGCTCCGATTAAAATAAAAAATATGTTTTTTAATCTAATTTCTCCAAGCATCACACAAAACCCCTTTTCCTTCCATTAGATAGCGTTTATATTATAGTGGATGAAAAAGAAAACAGCAAACGTGCTTAAACATTTGTCAAACGTATCTGAATTAGCTAACATGTAAGAGGATGATAGGAGGT >strand + fer ferredoxin CCTACATTCTTCCTGACAAAAATCGTTGATTTGCTCTTTTTATTTTAACAGATTTCAAACAGAAATCGACGTTTAAAGTCATCATTAAGGGGGATGTTATGTAGATAACCCCTTATGTGATAAGCATTCTCAATCATTTTTCTGTTGAAATGTGGCAGCAACATCATTACAATAAGTAAGAGGAATAGGTGTTAAATTGATTAAATATAAAGATTGATCCATTTGTTCCACCAAG >strand - ypbB ypbB CTTGGTGGAACAAATGGATCAATCTTTATATTTAATCAATTTAACACCTATTCCTCTTACTTATTGTAATGATGTTGCTGCCACATTTCAACAGAAAAATGATTGAGAATGCTTATCACATAAGGGGTTATCTACATAACATCCCCCTTAATGATGACTTTAAACGTCGATTTCTGTTTGAAATCTGTTAAAATAAAAAGAGCAAATCAACGATTTTTGTCAGGAAGAATGTAGG >strand + serA phosphoglycerate dehydrogenase GAAGATTGATCATATGAAATTTAATTACATTATAAACGATAACTGGTTGGCTGTATAGAGAATTGCTTCAAATTAAATATATTACCTGCAAGCTGTCAGATCATTGATTTATTAGGCTTTACTTTTATCCTTTACTGCGTCAATACACGTTGACACTCTTTTGAGAATATGTTAAATTATCAGATATTTAGTTTGT >strand - ypzE ypzE ACAAACTAAATATCTGATAATTTAACATATTCTCAAAAGAGTGTCAACGTGTATTGACGCAGTAAAGGATAAAAGTAAAGCCTAATAAATCAATGATCTGACAGCTTGCAGGTAATATATTTAATTTGAAGCAATTCTCTATACAGCCAACCAGTTATCGTTTATAATGTAATTAAATTTCATATGATCAATCTTC >strand + ypuF ypuF CATTTTCAAGTATCACTCTCATTGCCGGAAAAACTGGCTACCCTATAGTATAAGCCACCAAAAAGCTCTCGTCCAATACTATTTCATATCCTGTTTTGAAGGAGTTTGTACAGAATATTCAAAACACAAGAGGTGTTTGCTCGGAGCGCAGAAGCGGGAGAAAAAGCCTTTGCTTCTGGTTTTCCTTTCTAAGAAGCACTAGAGGCCGGGCGTTCTTTTTTCCTTTTATCCCTTCTATGGTACACTAAAA >strand - ypuG similar to hypothetical protein TTTTAGTGTACCATAGAAGGGATAAAAGGAAAAAAGAACGCCCGGCCTCTAGTGCTTCTTAGAAAGGAAAACCAGAAGCAAAGGCTTTTTCTCCCGCTTCTGCGCTCCGAGCAAACACCTCTTGTGTTTTGAATATTCTGTACAAACTCCTTCAAAACAGGATATGAAATAGTATTGGACGAGAGCTTTTTGGTGGCTTATACTATAGGGTAGCCAGTTTTTCCGGCAATGAGAGTGATACTTGAAAATG >strand + ypzC ypzC CTCCAATAAACAACGGTCTTCAATTTATGTTATCACGAATCGAAAAAAAATCCCATTAGGACCATTGAAGTGATTTTACAAACTGTTAATCTTCATCTTCTACCCACATCATGCCTATTTTTTCTTGTCGATCATAACTAGGCCTGAAAATATTGAAAGAAATGAATCGCATAATTCAGAAGCGATGGACTCAGAAGAAAGAGAGTTCTCATATGCAAAATCTTTAGATCGTATAT >strand - sipS type I signal peptidase ATATACGATCTAAAGATTTTGCATATGAGAACTCTCTTTCTTCTGAGTCCATCGCTTCTGAATTATGCGATTCATTTCTTTCAATATTTTCAGGCCTAGTTATGATCGACAAGAAAAAATAGGCATGATGTGGGTAGAAGATGAAGATTAACAGTTTGTAAAATCACTTCAATGGTCCTAATGGGATTTTTTTTCGATTCGTGATAACATAAATTGAAGACCGTTGTTTATTGGAG >strand + ypzD similar to hypothetical protein TAGGATAGCGGCGTATGGCGATTTCTACATTGCATAACCACTGGTTATTTAGTGAGCCGTGAGCAATGTATATGCACTTTGAGTCAGCACAGGATGATCCTATTACATTCGCCAGAAAAGCAGCTGCCGCATTCAGAGTAGTTAAAAGAATAGACAGATATCTGGGCTTCCGGAAGCTTTAAAGGAGGCTCTTTTTTCGAGCATTTTTATAAATCAATAAAAACAGGCATCTCCTAAATATTTTTGCCGATAAGTTTACGCTAATCAACTCTAGCACTAATACATATAAAAAATAGAATG >strand - ypuB ypuB AAACTAAGTGAAATACTTTACCATTTCGAACCTCAAACGGGAAAAATGGGGAATTGACAAGGGATGAGGTGGGCAAACAAACACCTGACTCGGTGGCAGAACGAACTGCCCAAAATTTTGGCAAACTCCCTGCGTATCTTTTGTTTCCTCCCATTTCATGCTATTGCTCCTCCTATGTCAAAAAAGAATGTATGAGAGAAGATCCCGCCATAGACAATGTTGTTGGACGACTGAAGATAAAATTGTGCTGATTATACAAAAAGTGGATTGGGAATGATAAAAGAACAAAAAAGTTTAAGG >strand + ypuA ypuA CTACTTTCTTTATTTTAATTTTCAAAGGAGATTATGTATAGGTTATAGCCTGTCTCTCCTTGTCGCGGCTATTATACCACACCTTTCGGGGGAAAACGAATCTCCCGAGCTTTACTAAAAAATAAAAATGAAACCTGAATCTCTTTATCTCCGTCTAACCTAACGGAACTGTATTTTTCATGATTCAGGTCCTCACAAAGCAGCACTATCCAGCTGCTTGTGA >strand - ppiB peptidyl-prolyl isomerase TCACAAGCAGCTGGATAGTGCTGCTTTGTGAGGACCTGAATCATGAAAAATACAGTTCCGTTAGGTTAGACGGAGATAAAGAGATTCAGGTTTCATTTTTATTTTTTAGTAAAGCTCGGGAGATTCGTTTTCCCCCGAAAGGTGTGGTATAATAGCCGCGACAAGGAGAGACAGGCTATAACCTATACATAATCTCCTTTGAAAATTAAAATAAAGAAAGTAG >strand + ansR transcriptional regulator (Xre CCTCTTCACTGTATCTAAAGATGGGTTGCGAGAAGGTATTCGCTTTTCGCATACTTTTAATGCAATTATAGACACGGAAAATAAAATACGCAAGACATTATTATTCAATTCGCGTACACTTCCTAATACCTATATGATATAATCAGCTCAA >strand - ansA L-asparaginase TTGAGCTGATTATATCATATAGGTATTAGGAAGTGTACGCGAATTGAATAATAATGTCTTGCGTATTTTATTTTCCGTGTCTATAATTGCATTAAAAGTATGCGAAAAGCGAATACCTTCTCGCAACCCATCTTTAGATACAGTGAAGAGG >strand + yqkF similar to hypothetical protein CTCCCGTTCTGTCTCTATATGATGCCCCATACATTTTATCATACTAGAGCAAAAGGGGGCTTCTTCGTGAAAGTGCACCGCATGCCAAAAGGTGTTGTCTTAGTCGGAAAAGCATGGGAAATTCGAGCGAAGTTAAAAGAGTATGGACGCACATTTCAATATGTGAAAGATTGGATCTCAAAGCCATAAAAGTTGAAACTTTAGCTCCTTTATATTACGGTCAATACAA >strand - yqkG similar to hypothetical protein TTGTATTGACCGTAATATAAAGGAGCTAAAGTTTCAACTTTTATGGCTTTGAGATCCAATCTTTCACATATTGAAATGTGCGTCCATACTCTTTTAACTTCGCTCGAATTTCCCATGCTTTTCCGACTAAGACAACACCTTTTGGCATGCGGTGCACTTTCACGAAGAAGCCCCCTTTTGCTCTAGTATGATAAAATGTATGGGGCATCATATAGAGACAGAACGGGAG >strand + yqkD yqkD CCCCCATGTTTTGTTTTGAAGCCAGTTTATCACGGAGCATATAAACTGAACAAGCAGACTCTCTTCTGATCACTTTCTCAGCGTAGCAGTGTTTAGGTATAATGATCTTAAACACGAATTGCCTATATATT >strand - yqkE yqkE AATATATAGGCAATTCGTGTTTAAGATCATTATACCTAAACACTGCTACGCTGAGAAAGTGATCAGAAGAGAGTCTGCTTGTTCAGTTTATATGCTCCGTGATAAACTGGCTTCAAAACAAAACATGGGGG >strand + yqzH yqzH CTAAAAAGAACTTATGTTCGCTTTTCATTATATGCGAAAGTATGTTCGTTTTATACTGGTAAATGATTCTTGTCTGACAGAAAATATGATCCAAGGGTTTGAGAGAAGAAAAACATTTAGTAGAATAAAAGCGAA >strand - yqjW similar to ATP/GTP-binding prot TTCGCTTTTATTCTACTAAATGTTTTTCTTCTCTCAAACCCTTGGATCATATTTTCTGTCAGACAAGAATCATTTACCAGTATAAAACGAACATACTTTCGCATATAATGAAAAGCGAACATAAGTTCTTTTTAG >strand + yqjO similar to pyrroline-5-carboxyl CTTTTTGAAAAAGCTATTGACATTAGCTTGTCCGCCATTGCATAATGAACAACAAACTAAAACGAGAACCATTGGAACGCTATGACGAGGAATAGTATACAAGGCAATGGCCTAAGAGAGCGATACCCCCGGCTGAAAGGTGTTGCGGCCCGCTTGTTTGTAGAACCTGCCCTCAAGTCGCGGTTAGGAAAACCTAAACGTTTCCCGCGTTACGGGGATTTGAGCTGAGCACAATTTGTGCTAATGAGGGTGGTACCGCGAACCTTTTCGTCCTTTACGTGATGAAAAGGTTTTTTGTTG >strand - yqjP yqjP AGCTCAAATCCCCGTAACGCGGGAAACGTTTAGGTTTTCCTAACCGCGACTTGAGGGCAGGTTCTACAAACAAGCGGGCCGCAACACCTTTCAGCCGGGGGTATCGCTCTCTTAGGCCATTGCCTTGTATACTATTCCTCGTCATAGCGTTCCAATGGTTCTCGTTTTAGTTTGTTGTTCATTATGCAATGGCGGACAAGCTAATGTCAATAGCTTTTTCAAAAAGGTTTGTTCTCATTCTGCAATAATGACACAAGACCGAGAAAAAGCTAAAATATGAATAATGAATGAAGACAAGGA >strand + yqjM similar to NADH-dependent flavi CCCCTCCTATGAATAAAAATGCTTCTATACTGAAAAATAAACTGATTTCTCGTCTATTATACAGAAATGTTTGACAAAAGCGGTTCATTTGAGCAGTTTATATGACGGGAAAGAAATGACTTGATATAATCAATCTCTAAA >strand - yqjN similar to amino acid degradati TTTAGAGATTGATTATATCAAGTCATTTCTTTCCCGTCATATAAACTGCTCAAATGAACCGCTTTTGTCAAACATTTCTGTATAATAGACGAGAAATCAGTTTATTTTTCAGTATAGAAGCATTTTTATTCATAGGAGGGG >strand + yqjJ similar to glucose-6-phosphate CCTCCTACTAGAACCAGATACCTACAAGAATACAGGATCAAGCGGAAAATGTGAATCTTGCCTCTGAAAAGAACAATCCTTTCGTGCTAGAAACATTTTTTCAGCATTCTGCTTAAAAAAGGGCTTAAATGTTTGCTTTCGTTGAATTTTAGATTTAAAATGAAGGAAATATACGGGGTGTACTAAAATAAAGCTT >strand - yqjK similar to hypothetical protein AAGCTTTATTTTAGTACACCCCGTATATTTCCTTCATTTTAAATCTAAAATTCAACGAAAGCAAACATTTAAGCCCTTTTTTAAGCAGAATGCTGAAAAAATGTTTCTAGCACGAAAGGATTGTTCTTTTCAGAGGCAAGATTCACATTTTCCGCTTGATCCTGTATTCTTGTAGGTATCTGGTTCTAGTAGGAGG >strand + yqjG similar to lipoprotein SpoIIIJ- GTGATGACAATGTTTGTGGAATCGATAAATGACGTTTTATTCTTAGTCGATTTTTGTCACAATTATTCTTCCTGCTCTAACGGCAATCGGGATTGCATTCCTCTTACGGGAGTGCCGTGCGGGCGAGCAATGGAAATCAAAACGAACAGATGAACATCAGACGGTCTTTCACATTAACCGAACAGACTTTCTTATTATTATATATCATCGCATTACAACTTGGATACGTAAAGTCTTCCGCATGAATTCGCCTGTGAACGATGAGGAAGACGCCGGTTCTCTTCTTTTATAAACCGCATT >strand - yqjH similar to DNA-damage repair pr GGAAGACTTTACGTATCCAAGTTGTAATGCGATGATATATAATAATAAGAAAGTCTGTTCGGTTAATGTGAAAGACCGTCTGATGTTCATCTGTTCGTTTTGATTTCCATTGCTCGCCCGCACGGCACTCCCGTAAGAGGAATGCAATCCCGATTGCCGTTAGAGCAGGAAGAATAATTGTGACAAAAATCGACTAAGAATAAAACGTCATTTATCGATTCCACAAACATTGTCATCACATCCATTCAATAGCTTCATTTTACTATATGTACAAGCTGTTGTCTAATGACCGAAAAGGAG >strand + bmrU multidrug resistance protein TCTCCTTTTTCATCATCTGTTCTTATGATAGCAAGGAAACGTCCATATCGGCAAATGACTTTGCTTCGCCGTGCACTTTTTTTGGCCTCTTCGTTTACTGCTTACAGAAAAAGGGGATTATATAACCAGAAG >strand - yqiW similar to hypothetical protein CTTCTGGTTATATAATCCCCTTTTTCTGTAAGCAGTAAACGAAGAGGCCAAAAAAAGTGCACGGCGAAGCAAAGTCATTTGCCGATATGGACGTTTCCTTGCTATCATAAGAACAGATGATGAAAAAGGAGA >strand + yqzF yqzF CCCTTTGTATGAAAAAATTTGCACACCTATTTATTCCGTTGCAATAATTTGCACAACCCTATGTTACCACGTTTTCTTCTCTTGACAAAATATTTTTTCCGCTGGACAATAAAGGAAATGAA >strand - yqiR similar to transcriptional regu TTCATTTCCTTTATTGTCCAGCGGAAAAAATATTTTGTCAAGAGAAGAAAACGTGGTAACATAGGGTTGTGCAAATTATTGCAACGGAATAAATAGGTGTGCAAATTTTTTCATACAAAGGG >strand + yqiG similar to NADH-dependent flavi ACTTTTTATTCATTTTAAAAAATTTGAGAATAGGGTGAGCAAATCGTTTGACGGGTGGGTCAGCCAAGCGTAACCTTTTAAAGGATATTTACTTTATTGAAAGAAGTTAAAGCCATGAAAAAATTCGTTCTGCTGCTGGCGGCACTGTCATCACATGCTCCATCGGCCACCAAATCAGTGGCGGCTACAAAGGCCCCGAAAAAGAGCGGCCAAAACATCACTCTGCACTCAAGATATTCAGTCGGTAAAGCAGATCAATTGCATTAATTTTTACATATAGGCTAGAATATGTGCGAATCC >strand - yqiH alternate gene name: yzpA; simi CAATAAAGTAAATATCCTTTAAAAGGTTACGCTTGGCTGACCCACCCGTCAAACGATTTGCTCACCCTATTCTCAAATTTTTTAAAATGAATAAAAAGTTGCTTCTTTCCGCTAATTTTTTCTAATTGGCGCCTCAATTAGAGCCATCCTCATAGACAAATCTGCTCGTTATCTTTCTAACAACAGGAAATGATCGGGTCGTATAGCCCTTTTGAAAATCATGGAACTTGCTCGCCTGCATTCGCGTTGTCTTTACAAATTCTCCATTCTGGTCAAAGATAAAAGCCAAACATCAGGAAG >strand + yqhQ yqhQ GTTAAAGTGTGCCCAAAAAATGAAATGTCAATAAAAAAGCTCACACTTTTTGGCAGCTGCGAGTAAAATTTGAAAATAACGGGTATAATGCATGTAGGAAAAGCAAGGCTATGGCAATTTTTTCAGCCTGTCAATCTAGAGGTTTAGCGGTGTTTCCTGTACAATAGAGTGAAAGGAAATGGTTAGCAATCCTAAA >strand - yqhR yqhR TTTAGGATTGCTAACCATTTCCTTTCACTCTATTGTACAGGAAACACCGCTAAACCTCTAGATTGACAGGCTGAAAAAATTGCCATAGCCTTGCTTTTCCTACATGCATTATACCCGTTATTTTCAAATTTTACTCGCAGCTGCCAAAAAGTGTGAGCTTTTTTATTGACATTTCATTTTTTGGGCACACTTTAAC >strand + yqhL similar to hypothetical protein TCCTTACCGTTTTAATAGAATCAGTTTATCAAACTGTCGGGAAAATGGAAAATATTTTAGTTCCATTATAGATGACAATCACGCTGTGCTTTACTATAATAGTATTTGTCGAAAACGGCTCTATGACCTTGTTTTATGAAGCAGGGTTTTGATTTTATAT >strand - yqhM similar to hypothetical protein ATATAAAATCAAAACCCTGCTTCATAAAACAAGGTCATAGAGCCGTTTTCGACAAATACTATTATAGTAAAGCACAGCGTGATTGTCATCTATAATGGAACTAAAATATTTTCCATTTTCCCGACAGTTTGATAAACTGATTCTATTAAAACGGTAAGGA >strand + yqhH similar to SNF2 helicase GACGTCCCCAAAGGTGGTTTGCGCATCCGCACAAACACTCTCCAGAGTTGCGTCGAGCAAGAGTTCTTTTGCCTGAGAGATTCACTCCGCAGTTTGCTCCTTCGGCGCCTGTATCCCGAGGTTTTCGGTCAGGTCTCTCCCCTTGCTGTCATTCGCTCATATTTTCATGCTGTTATGGACATACTATAGCAATATCTTACATCAAATACATACTCATATCCTACCATCATCACTATATGCTGGGCAACAGCAATTTGGGCAGATTTTTCTTATATTATTCATCTTAAGAATCGAAGAATA >strand - yqhI similar to aminomethyltransfera CTATAGTATGTCCATAACAGCATGAAAATATGAGCGAATGACAGCAAGGGGAGAGACCTGACCGAAAACCTCGGGATACAGGCGCCGAAGGAGCAAACTGCGGAGTGAATCTCTCAGGCAAAAGAACTCTTGCTCGACGCAACTCTGGAGAGTGTTTGTGCGGATGCGCAAACCACCTTTGGGGACGTCTTTGCGTATGCAAAGTAAACTTTCAGGTGCCAGGACAGAGAACCTTCATTTTACATGAGGTGTTTCTCTGTCCTTTTTTGTATGTTTTTTAGCTGCGCCGTTGATAAAGGG >strand + yqzG yqzG CTGTAAAACACTGTAACTTGATATGACAATCGTTCTCTTTAAAGAACTTCACAAATAGAGTATAATCAACGATTTATAGATTTAAAAACACCGAAATCGCTCATTTTGTTCGTTTAAAAGAATGTTTTCATCATATTACAAAGTCTTGCTCAGTATTGCTCATTTTTTCAATTTTTTAGAAATTTTTGGGATTGTTATGTGAGAATTTAAATGCATATCCCATTATTATTTGGTAATAATA >strand - yqxM alternate gene name: yqhD TATTATTACCAAATAATAATGGGATATGCATTTAAATTCTCACATAACAATCCCAAAAATTTCTAAAAAATTGAAAAAATGAGCAATACTGAGCAAGACTTTGTAATATGATGAAAACATTCTTTTAAACGAACAAAATGAGCGATTTCGGTGTTTTTAAATCTATAAATCGTTGATTATACTCTATTTGTGAAGTTCTTTAAAGAGAACGATTGTCATATCAAGTTACAGTGTTTTACAG >strand + yqhB similar to hypothetical protein TCCAAACTTTTGTTCTTTATTCTTTAATTGCCCAATAACCGCGTCACTAAACCAGTCTGCCGCTTCAACACATGTTTTATGAGCATTTTCAGGTGGTATGGAATGTAGAAAG >strand - yqxL alternate gene name: yqhC; simi CTTTCTACATTCCATACCACCTGAAAATGCTCATAAAACATGTGTTGAAGCGGCAGACTGGTTTAGTGACGCGGTTATTGGGCAATTAAAGAATAAAGAACAAAAGTTTGGA >strand + yqgY yqgY CTCCTCAGTTAACCATCTCTTTGTAGAATAACTACCCGGATCATTTTTCATTTAAACCATTTACAGAACTTTTTTCGGATTATTTTTTAAAAAACATTCATACAATAGAAAACACAATCAAAGGGACAAATTCCATTTGTATATTAATAATGAAAATGTTTACATTTCTAGGTGTGATCTATTATAATAACAGTCAGAGGG >strand - yqgZ similar to hypothetical protein CCCTCTGACTGTTATTATAATAGATCACACCTAGAAATGTAAACATTTTCATTATTAATATACAAATGGAATTTGTCCCTTTGATTGTGTTTTCTATTGTATGAATGTTTTTTAAAAAATAATCCGAAAAAAGTTCTGTAAATGGTTTAAATGAAAAATGATCCGGGTAGTTATTCTACAAAGAGATGGTTAACTGAGGAG >strand + yqgW yqgW TCTTTCTTTTTTATGCTCTTATTATGAGGGTTTTTGAAAAAAGATTCAAACAATCGCAATGAAAACCGCTATCGACAAACCTATAAAAAACAGGTAGAATAAAACTGGAAACAATTGCAGGTTTTTC >strand - yqgX similar to hypothetical protein GAAAAACCTGCAATTGTTTCCAGTTTTATTCTACCTGTTTTTTATAGGTTTGTCGATAGCGGTTTTCATTGCGATTGTTTGAATCTTTTTTCAAAAACCCTCATAATAAGAGCATAAAAAAGAAAGA >strand + yqzD yqzD CATTCTCCTTTCCAATCGTTCACTTCGTGTTCCTATTAAACCATTACATGAAGAAGAAGCGAAGAAAGATTTCTCGACAAAATCTCGTTTTTTTCGCGAATTCCTTTAGAAACTATTGGTTTTATGGTATGAAAAAGAAGGATACAAAA >strand - yqgL yqgL TTTTGTATCCTTCTTTTTCATACCATAAAACCAATAGTTTCTAAAGGAATTCGCGAAAAAAACGAGATTTTGTCGAGAAATCTTTCTTCGCTTCTTCTTCATGTAATGGTTTAATAGGAACACGAAGTGAACGATTGGAAAGGAGAATG >strand + yqgB yqgB CTCCCATTTTACTCTGTCTTCTTCATGTCTTCCTTGTATTCACACGCATGCAACGATACAATAAGCAACATATATTAGT >strand - yqgC yqgC ACTAATATATGTTGCTTATTGTATCGTTGCATGCGTGTGAATACAAGGAAGACATGAAGAAGACAGAGTAAAATGGGAG >strand + yqfZ yqfZ CTCCTTTAAACTATTACATAATTATCCTAACAGATCCATTGTTCATTGTAAATAATGGAATACGAGTTTTATATTATAAAAGCCTCTTTTTGATCTAATTTTGCACTATTTTCTATTTTCTATCTGAAGTTAATGTTTAGTCGAAGAAATCCTGTCAAAAAAGCGCCTGTATAAAAACCGAGCATTCGCAGTCTCTTTTCTTTTGTCATGAATCGAAATGGACAAGCATACTATGGTATAAAATTT >strand - yqgA yqgA AAATTTTATACCATAGTATGCTTGTCCATTTCGATTCATGACAAAAGAAAAGAGACTGCGAATGCTCGGTTTTTATACAGGCGCTTTTTTGACAGGATTTCTTCGACTAAACATTAACTTCAGATAGAAAATAGAAAATAGTGCAAAATTAGATCAAAAAGAGGCTTTTATAATATAAAACTCGTATTCCATTATTTACAATGAACAATGGATCTGTTAGGATAATTATGTAATAGTTTAAAGGAG >strand + yqfW yqfW TGTTTTATTAAAGGAGCTTTTATAGTTTGATCCATTTTGCCTGATTCATGTTTGCTAAACTTTTCTGTTAATGTCAATTGTTTATGTTACACTTGAAGGGAT >strand - yqfX yqfX ATCCCTTCAAGTGTAACATAAACAATTGACATTAACAGAAAAGTTTAGCAAACATGAATCAGGCAAAATGGATCAAACTATAAAAGCTCCTTTAATAAAACA >strand + yqfT yqfT TTCCTGCAATAACTATTTTAAATACATTATAAATCTATTTTACCATTAAACACTGACACATTTTACTGATTTGCCCATACATTAA >strand - yqfU similar to hypothetical protein TTAATGTATGGGCAAATCAGTAAAATGTGTCAGTGTTTAATGGTAAAATAGATTTATAATGTATTTAAAATAGTTATTGCAGGAA >strand + yqfQ yqfQ TCCTTAAAGCGGCACATTATGTGTCCATCATCACATTATACCCGATTTTGCTTAAAAACACCACGTTTGCAAGGCGTTTGTTTTATTCAGTTATGTAAAGCAAATACCGCAGCCCATGCATATTCTAATAGCAGAGAG >strand - yqfR similar to ATP-dependent RNA he CTCTCTGCTATTAGAATATGCATGGGCTGCGGTATTTGCTTTACATAACTGAATAAAACAAACGCCTTGCAAACGTGGTGTTTTTAAGCAAAATCGGGTATAATGTGATGATGGACACATAATGTGCCGCTTTAAGGA >strand + yqeW similar to Na+/Pi cotransporter CTCCCTCCGAATACACCAATCGACTACCTTTAAGTTACACATATAAACATGTACTTTGACATTATAATATAAGCCATCAGTCAGGTCAACTAAGTGAGTTCACTTTTCCGTCATGAGTGCGACTGCCTGTCTA >strand - rpsU ribosomal protein S21 TAGACAGGCAGTCGCACTCATGACGGAAAAGTGAACTCACTTAGTTGACCTGACTGATGGCTTATATTATAATGTCAAAGTACATGTTTATATGTGTAACTTAAAGGTAGTCGATTGGTGTATTCGGAGGGAG >strand + rpsT ribosomal protein S20 (BS20) TCCTTCACATTAAGTACCTATATTGTTGCCATTTGCTGAATAAATCATGCTAAAAAGAGAACCTTCAATGAAAAATACTTTACATTTATATTGCAATCTAAAGGTGTGTTTGATAGAATACAATTTGTTCTATTGTGAAGAAGATTTAACCTTAAGACACTGTGCTACAGGTT >strand - gpr spore protease AACCTGTAGCACAGTGTCTTAAGGTTAAATCTTCTTCACAATAGAACAAATTGTATTCTATCAAACACACCTTTAGATTGCAATATAAATGTAAAGTATTTTTCATTGAAGGTTCTCTTTTTAGCATGATTTATTCAGCAAATGGCAACAATATAGGTACTTAATGTGAAGGA >strand + comER alternate gene name: comD, com TCCCCGCTTTCGTCATATTTTATTTTCATAACACATATGGTTTAGAGGATAAT >strand - comEA integral membrane protein ATTATCCTCTAAACCATATGTGTTATGAAAATAAAATATGACGAAAGCGGGGA >strand + yqeD similar to hypothetical protein TCCCTATTAATAAGAATGCAGAAAACAGAGAGATGGACAGCCTTTTTTATGAGGTAAAAAAGAATTGTGTTCTTAACCTGGTTTTCAGTTGAACAGCTGAAATGGCACCAGTTATACTAATTTAGATGACGGAGGTTTTGACTCCTGACATCACCT >strand - yqeE similar to N-acetylmuramoyl-L-a AGGTGATGTCAGGAGTCAAAACCTCCGTCATCTAAATTAGTATAACTGGTGCCATTTCAGCTGTTCAACTGAAAACCAGGTTAAGAACACAATTCTTTTTTACCTCATAAAAAAGGCTGTCCATCTCTCTGTTTTCTGCATTCTTATTAATAGGGA >strand + yqeB yqeB TCCTGATTGTCTAAAAAGCCTTTACTCTTTATGTTCCCTATGGATTGTATTGTGATACGCAGAAAACAAATGGGGCTTACATCGTCTGCGCATGTAAATTTTATTTTTCATGACCGACGGCGCCTGCCAGTTCTATTACTGAATCTCTTTATATCCATAATTATCGCTTTATCCTTGATTTACCATCAGTTTAAGTTAAAATATGGGAAGGAATTCGATTT >strand - yqeC similar to 6-phosphogluconate d AAATCGAATTCCTTCCCATATTTTAACTTAAACTGATGGTAAATCAAGGATAAAGCGATAATTATGGATATAAAGAGATTCAGTAATAGAACTGGCAGGCGCCGTCGGTCATGAAAAATAAAATTTACATGCGCAGACGATGTAAGCCCCATTTGTTTTCTGCGTATCACAATACAATCCATAGGGAACATAAAGAGTAAAGGCTTTTTAGACAATCAGGA >strand + spoIVCB RNA polymerase sporulation-s CCCATACCTTTGTTCATTTCAATGTATGGGCGCTTGATGAAGAATATTTTTAACATTTGAAGTTAGTATGCTGCTTACCAAAGCCGGACTCCCCCGCGAGAAATTTCCCGGTACAGACACAGACAGCCTCCCGGTCACATACATTTACATATAGGCTTTTGCCTA >strand - nucB nuclease TAGGCAAAAGCCTATATGTAAATGTATGTGACCGGGAGGCTGTCTGTGTCTGTACCGGGAAATTTCTCGCGGGGGAGTCCGGCTTTGGTAAGCAGCATACTAACTTCAAATGTTAAAAATATTCTTCATCAAGCGCCCATACATTGAAATGAACAAAGGTATGGG >strand + rapE response regulator aspartate ph GTAGGTAAGTAATTTCCTTATCAAGAAAAATCCAGAATCAGTTCTGCGTTTAGCTGTACATGCACACTCAGCCCCTCAAAACGACTAACGCTGGACTTTCTGGATGTTTTCAATTTTAAACTATCACAAGAATGACCGAAATTCCCTGTTTTTTGATGCCAAGTCATGGTACTCCCAGGAGGGAATTTCATAACTCTCTAAAGCAAAAAGTTATTGCGGTCTCACCGAAAATTTAAATTGCGGTGCCCTATTTCCTTTTTCTGCGAACGAAAACTTGTTAATATTTACAGTACTAGTAGA >strand - yqcI similar to hypothetical protein AATTCCCTCCTGGGAGTACCATGACTTGGCATCAAAAAACAGGGAATTTCGGTCATTCTTGTGATAGTTTAAAATTGAAAACATCCAGAAAGTCCAGCGTTAGTCGTTTTGAGGGGCTGAGTGTGCATGTACAGCTAAACGCAGAACTGATTCTGGATTTTTCTTGATAAGGAAATTACTTACCTACCCACTAAAACTTAACCAATTAATTAATTAATATTCATTTGGATATATGGAAAAGTAATTAGACATTCACCTCCTTCGTGCTCCCGCAATACATTATGATAAATGCCTTCGGAG >strand + yqdB alternate gene name: yqcR CTCCTTTTACTTTTTAGTTTTGGACAAGAAAAAAAGCACATTCATACTTTTTGAATGTGCTTTCCATATAATTCATGTATAATAGATTCTTGAAGCTACCTTAGCTTATCCTCCG >strand - yqbN similar to phage-related protei CGGAGGATAAGCTAAGGTAGCTTCAAGAATCTATTATACATGAATTATATGGAAAGCACATTCAAAAAGTATGAATGTGCTTTTTTTCTTGTCCAAAACTAAAAAGTAAAAGGAG >strand + yqaP yqaP CTCCTTTTCAAAATAAAAACGGACACCAAACATACAGCGTAATACTGTAAGTTCAGTGTCCGCAGGCTTTCCGTCTTGGACTAACTGGATATTTTATGCTTTAATGAATTCACATTACCTAAGGAAAAAAATTCAACTCCCTCCTTAAATATGGTAAAATTTTATTA >strand - yqaQ similar to hypothetical protein TAATAAAATTTTACCATATTTAAGGAGGGAGTTGAATTTTTTTCCTTAGGTAATGTGAATTCATTAAAGCATAAAATATCCAGTTAGTCCAAGACGGAAAGCCTGCGGACACTGAACTTACAGTATTACGCTGTATGTTTGGTGTCCGTTTTTATTTTGAAAAGGAG >strand + yqaE similar to transcriptional regu CCTTAATCAGAAAATGTAACGTTTCGTTACTTAGATAATAGTATCGATTCGTTACATTGTCAACACATTTTCATAAAAAAAGTATCGTTTTGTTACCAAAACTTATATGTAACCCTTTGTTACGTTACAATGAGATATAGAATAAA >strand - yqaF yqaF TTTATTCTATATCTCATTGTAACGTAACAAAGGGTTACATATAAGTTTTGGTAACAAAACGATACTTTTTTTATGAAAATGTGTTGACAATGTAACGAATCGATACTATTATCTAAGTAACGAAACGTTACATTTTCTGATTAAGG >strand + yqaB similar to phage-related protei CCTTATTAAGCAACCAAACTCCCTGTTTGTTGCATAACGTACTTACTATTATTTTGACATAAATCCCTAAAAGACCTTTACTTTTCGAGGGAAATCCAGATGCGTGTTAAAATCATAATACTTACGAAAACAGTCATGATTAAACGGATTCATATTCCATCTCCTTAGACGCATTTTCCTATGAAAAAAGTCTTGATTTCCATATAATATTTAAACGAATCCAATTTTTAGATTGTCG >strand - yqaC yqaC CGACAATCTAAAAATTGGATTCGTTTAAATATTATATGGAAATCAAGACTTTTTTCATAGGAAAATGCGTCTAAGGAGATGGAATATGAATCCGTTTAATCATGACTGTTTTCGTAAGTATTATGATTTTAACACGCATCTGGATTTCCCTCGAAAAGTAAAGGTCTTTTAGGGATTTATGTCAAAATAATAGTAAGTACGTTATGCAACAAACAGGGAGTTTGGTTGCTTAATAAGG >strand + yrkO similar to hypothetical protein ACCTCATGATTCTTGTTGTTATTTCTCGTGTGTCCTTACTATATATCGTTCATAGTAATAAGTACGTTATAAGATACTACGTAAATCTTACCAATTTATGAAAGCTGATGCTCGCCACAAATTTGTCACATTTTCTAAAAAAAATGATGACTCTTCATAAATTGTTAAGATTCGGTCTTTATATTTATCCATTGTAAGGCTCAAGTTTTTCAGATCCGCACTACACATTGCCGTGAT >strand - yrkP similar to two-component respon ATCACGGCAATGTGTAGTGCGGATCTGAAAAACTTGAGCCTTACAATGGATAAATATAAAGACCGAATCTTAACAATTTATGAAGAGTCATCATTTTTTTTAGAAAATGTGACAAATTTGTGGCGAGCATCAGCTTTCATAAATTGGTAAGATTTACGTAGTATCTTATAACGTACTTATTACTATGAACGATATATAGTAAGGACACACGAGAAATAACAACAAGAATCATGAGGT >strand + blt multidrug-efflux transporter TGACTATACGGTAACCATATACCTTATGATTTGATTGTACTTGAAAAAAGCTTCAAGCGAAAAGGATAGGTAAAAAGGGTTCAATT >strand - bltR transcriptional regulator AATTGAACCCTTTTTACCTATCCTTTTCGCTTGAAGCTTTTTTCAAGTACAATCAAATCATAAGGTATATGGTTACCGTATAGTCA >strand + yrdQ similar to transcriptional regu CCTTTTCTTTAAAATTACAATGAGAATTATAAAAAGAAGTAAGCATCAAGTAAAATGAACAAAAGTGATGAAAAACATCATTGATAATGATACTT >strand - yrdR yrdR AAGTATCATTATCAATGATGTTTTTCATCACTTTTGTTCATTTTACTTGATGCTTACTTCTTTTTATAATTCTCATTGTAATTTTAAAGAAAAGG >strand + gltR transcriptional regulator (LysR CTACTACAACAGTCTTTTTAATTTCATTCGTTAATGTGTCAGCTATTTCCAAGCAGCTGCTACATTTAGAAAATAAAGTAATTTCCCAATAACCCCTTCATTCTGGTGTTAGTCAAGCCTATGTTATGATGTGTATTTCCATAGGCAATATACAGAATCAACTTCGAGTAATTAACCATTTTTTGAAATAGCTTTATCACTTACTTTAGAAATCATTTATTCTTCTAAGCTTTAGTTTACATGAAGCTCTGCTATCATATATAATTCAAAATTAAGATGGAAGACATCTCAAAATCAGAT >strand - yrdN yrdN TGATTTTGAGATGTCTTCCATCTTAATTTTGAATTATATATGATAGCAGAGCTTCATGTAAACTAAAGCTTAGAAGAATAAATGATTTCTAAAGTAAGTGATAAAGCTATTTCAAAAAATGGTTAATTACTCGAAGTTGATTCTGTATATTGCCTATGGAAATACACATCATAACATAGGCTTGACTAACACCAGAATGAAGGGGTTATTGGGAAATTACTTTATTTTCTAAATGTAGCAGCTGCTTGGAAATAGCTGACACATTAACGAATGAAATTAAAAAGACTGTTGTAGTAGAAG >strand + yrpB similar to 2-nitropropane dioxy ACTCTTGAATGTGATGTATTCTTCTAAGCTTTATTTTACATGATGTTCTCCTATATATCATTCAATATTAAGATGCAAGACATCTCAAAATCAGATATCAACTATGACGGACTACATGAATATTCAGTTTTTTGTAGAGCCGCCATGGGACGACACTATCTTTGACACACTTTAATCCCCCCTGCTTTATCGACATGTTAACTGGTATGAGAGATTGACAAAAAGAAGGAAAACGATATATTTAAGCGAATACGAGTTTTTGAGTAAATTTTAAAATGACACTTTTTTCAAAAATCATGT >strand - aadK aminoglycoside 6-adenylyltransf TATTCGCTTAAATATATCGTTTTCCTTCTTTTTGTCAATCTCTCATACCAGTTAACATGTCGATAAAGCAGGGGGGATTAAAGTGTGTCAAAGATAGTGTCGTCCCATGGCGGCTCTACAAAAAACTGAATATTCATGTAGTCCGTCATAGTTGATATCTGATTTTGAGATGTCTTGCATCTTAATATTGAATGATATATAGGAGAACATCATGTAAAATAAAGCTTAGAAGAATACATCACATTCAAGAGTAAAAAAATCTATAAGACGTTTTACAGTTAGATATTAAGGCTGAAACGG >strand + yrpG similar to sugar-phosphate dehy TCCTATAATTTTGCGTCTGCTTTACATTTGCTTCGTCTTTTTAATCGTAACAAAGAGCAAACAATTGTTGAATCAATTGTATTGCAGCTGTTGCCAAAAATAATGATAAAGGAGAGGTTTGTTGTGGAGTATAC >strand - sigZ RNA polymerase ECF-type sigma f GTATACTCCACAACAAACCTCTCCTTTATCATTATTTTTGGCAACAGCTGCAATACAATTGATTCAACAATTGTTTGCTCTTTGTTACGATTAAAAAGACGAAGCAAATGTAAAGCAGACGCAAAATTATAGGA >strand + yraM similar to hypothetical protein TTTCTAGTCCTTCTTTCACAATTATAAAAGTTTTTTTATCGAATATTAAGAAAACTTATATTTTATTTTTATTTTTTTACCTTCTACACTAAAAAACATAAAAAATACTACCTT >strand - yraN similar to transcriptional regu AAGGTAGTATTTTTTATGTTTTTTAGTGTAGAAGGTAAAAAAATAAAAATAAAATATAAGTTTTCTTAATATTCGATAAAAAAACTTTTATAATTGTGAAAGAAGGACTAGAAA >strand + yraL similar to hypothetical protein TCTATTCATTGCCAATCAGCCTTAGCCCCTCTCACTGGGAAGGTCCCTTGATTTTCATACTGCTTCAACTTTCATTAACAAGATGCCATAAGGCTTTACTCTATTTTATAATGGAAGTGATCATCGAAAATCGTTAGGGAGGTGATGTAATGGCAAGCAAGATGAGAATGCCAGATTCCGCAACGTCCCATGCAAAGCCTACTATTGGGAAGGCGATACTTTGCATGGGGTAGACTTTCTAAAAGATCGCCCAAGTAAGCTTTTCTGATATAGTGGCTTTGCACCTTATTTGACATAAAA >strand - csn chitosanase GGACGTTGCGGAATCTGGCATTCTCATCTTGCTTGCCATTACATCACCTCCCTAACGATTTTCGATGATCACTTCCATTATAAAATAGAGTAAAGCCTTATGGCATCTTGTTAATGAAAGTTGAAGCAGTATGAAAATCAAGGGACCTTCCCAGTGAGAGGGGCTAAGGCTGATTGGCAATGAATAGAGTAAGATTCAAGCTATATTACTTTAGATTCAAAATATTGACAAATTGATGTATTTTCAACAATAGGGCAGTTTCTTCATGTATACTGAGTCTGTATTTACAATATTTAGAAA >strand + yraG similar to spore coat protein CCTATAAAACCATACTTTTACTGGTCATCCAATTTAATGATACCAAAGAAACCTGACAGCACTATGTCAAGTTTCCATACTTCACCCTTATTTCAGATTTAAGTGCAGTTTAGTCCATGTCATAAGAACGAGCAAAAACAGCGATTCTTAAAACAAAAAATGATACCGAGTGTATTTTCTGTATGCTTCAACTTCATTTCGGGAAATGTAACGTTGAT >strand - yraH similar to hypothetical protein ATCAACGTTACATTTCCCGAAATGAAGTTGAAGCATACAGAAAATACACTCGGTATCATTTTTTGTTTTAAGAATCGCTGTTTTTGCTCGTTCTTATGACATGGACTAAACTGCACTTAAATCTGAAATAAGGGTGAAGTATGGAAACTTGACATAGTGCTGTCAGGTTTCTTTGGTATCATTAAATTGGATGACCAGTAAAAGTATGGTTTTATAGG >strand + adhA NADP-dependent alcohol dehydrog AATAAACACACTGCCGGAAGAGAACAATTAGGAGAATTTGCGCCAAAGTTTGCTGAACTTCATGATGATGTTCTATTCGGTGACATTTGGGCAAGAGAAGAAGAACTTTCATCCCGGGATCGCAGCATGATTACAGTTTCTGCATTAATTACTGACTGTTTCTCAGCTTATAAATCAGGTTCATTTTAAATGGTTTTTTTAAAAAACCCTTGCCTTAAAGCAGACTTTAAGGTTTATCGTTATGTTGAAGACACCATAAAACATAAATAAGTGCATGTTTTTATTTACAACACTTATGAT >strand - yraB similar to transcriptional regu GCGATCCCGGGATGAAAGTTCTTCTTCTCTTGCCCAAATGTCACCGAATAGAACATCATCATGAAGTTCAGCAAACTTTGGCGCAAATTCTCCTAATTGTTCTCTTCCGGCAGTGTGTTTATTTGCCATTCAGCAATCCCTCCAACTACACTGTTTTTAGTGCATGGAGAAGTATAGAGTTTAAAGTGTGCTTTAAGGCAAGTCCCGAATATATAAACCCAGATTCAGCAGCTAACCACTTGTTTTGAGCTTGACTTAAAGTTAACTTTAAGTGTTACCTTCACAACATACAGGATAGAG >strand + aapA amino acid permease CATTGGAGGTTAAGGAATCTGTAAGCTTAAAAACTTTATCTCCGATCCTATCGATTTGATCCCCCTGCCGGCCTCAATCATAGTGGGTATGCAATTATACAAATCGTTACCGATTAAAACTAGCCTGTCAGATTGTCCAAATGACTTTCCTGTTTTACTCTGCTATTCAGGTAGAATCACGAGTGAAAAATAGAGCCACAAGACATGTAACAGCACTTAAAATAGTTTTAAAATTTAAAAATAAAAGTATCTATTCTTTTAAATGTTATAATATCCGATAAAACTTTACTCATCTTTTTA >strand - levR transcriptional regulator (NifA AAGGGGTTATATTTGGGTAATTTGTCATGATCTATAAGATTCATTTTTTGTCATTGATTATTTTGACATCATTACATTCCCCGAGAATGATAAACTGTTTACTGATTATTACTTTTGAAGGGGACTGGCCCTCCAGAACTGAGCATCTATCATACTAATATGAGGTGCAAATGATTCAACATGTAAACATATACGCATAAAAGCCAGCTTTGTCAATCTGCTATAGAGACCTTTTTAAACGTGTAAATCTCAACGCGGACTATAAAGTTTCGAAATATTCTTGTTAATGGCTTATCTAAT >strand + yrhO similar to cyclodextrin metabol CCTTTAAAAATCCCCTCAATCATATAGTGACTACAAAAGTAACTACTTATAAAAGTGTATCAATGCGTTTGGAATAAATCAATATGATTTGTTAAAATAGCGATTACTAAGGCAGCGATTGC >strand - yrhP similar to dihydrodipicolinate GCAATCGCTGCCTTAGTAATCGCTATTTTAACAAATCATATTGATTTATTCCAAACGCATTGATACACTTTTATAAGTAGTTACTTTTGTAGTCACTATATGATTGAGGGGATTTTTAAAGG >strand + yrhE similar to formate dehydrogenas CTTCCGATATTGCCTTATAAAGGCTTTTCCCGGTTCCCCTCAGGACATTTCACTTGGTTAAAATTTTCCTGCCATATTGTATTATAATAAATATCACATTATAATAAAAGCAGTGTTGATGGATTTGTAATAGGATCTTGTTATGAGTCTATCGTTTTATAAAAATCAAGGATTGGTCTGTTAGTAAGACTTGCTAGTGAACTATTCCGCCATGTGTGTTTACACCTTCTTAGAATTAAGGGCAGGTGTATCTATACATGGCTTTTTTTGACTTCTTGAAAGAAA >strand - yrhF yrhF TTTCTTTCAAGAAGTCAAAAAAAGCCATGTATAGATACACCTGCCCTTAATTCTAAGAAGGTGTAAACACACATGGCGGAATAGTTCACTAGCAAGTCTTACTAACAGACCAATCCTTGATTTTTATAAAACGATAGACTCATAACAAGATCCTATTACAAATCCATCAACACTGCTTTTATTATAATGTGATATTTATTATAATACAATATGGCAGGAAAATTTTAACCAAGTGAAATGTCCTGAGGGGAACCGGGAAAAGCCTTTATAAGGCAATATCGGAAG >strand + yrzA yrzA TCCATATTTAATAATTCCTATAAGTATACTATGGATTAATGATGGAACAGTTCCTATAATACCACATATTCTGAAAAGCAGGTACTAAAACACTTTGTGTATTTTATGCCATCTCATATTGCGCATTGACCGCAAAAAAGCTTATACTTCATATGACTTGTG >strand - yrrT yrrT CACAAGTCATATGAAGTATAAGCTTTTTTGCGGTCAATGCGCAATATGAGATGGCATAAAATACACAAAGTGTTTTAGTACCTGCTTTTCAGAATATGTGGTATTATAGGAACTGTTCCATCATTAATCCATAGTATACTTATAGGAATTATTAAATATGGA >strand + glnQ glutamine ABC transporter (ATP- TCACCTATATCTTTTGAGGTTTACAAGGAATTATTCTTTATGTTTCTTTAGTCAGTCAGAAGTCATGGACATTTTTTCACTTTTAATTTTCAGAAAAGTTTGATCATTTACTTTTTGGTCAAATT >strand - yrrI similar to hypothetical protein AATTTGACCAAAAAGTAAATGATCAAACTTTTCTGAAAATTAAAAGTGAAAAAATGTCCATGACTTCTGACTGACTAAAGAAACATAAAGAATAATTCCTTGTAAACCTCAAAAGATATAGGTGA >strand + yrvP yrvP CCTCTGTCGCCGTGCAAAAACCGTTTTCGTCCGTATCATCCCAGTTTTTCATAAAGATTCCGATTACATCATAGCCCTGTTCTTTT >strand - yrrA similar to hypothetical protein AAAAGAACAGGGCTATGATGTAATCGGAATCTTTATGAAAAACTGGGATGATACGGACGAAAACGGTTTTTGCACGGCGACAGAGG >strand + yrvN similar to hypothetical protein TTATTTCATGATTTTCCACATTTATAATGGTAATTTCTATATGTTTCGTCCGTTTATTATCTTATAGACCAACTGTTATTATAACATACCTTCAGCGAATTTTCATTGAGGCAGAAGGAAAGGGGCCTTTACCCTTTTCTAATGTACGGTTTTCCGTTACCCTGTAATT >strand - yrzC similar to hypothetical protein AATTACAGGGTAACGGAAAACCGTACATTAGAAAAGGGTAAAGGCCCCTTTCCTTCTGCCTCAATGAAAATTCGCTGAAGGTATGTTATAATAACAGTTGGTCTATAAGATAATAAACGGACGAAACATATAGAAATTACCATTATAAATGTGGAAAATCATGAAATAA >strand + yrvJ similar to N-acetylmuramoyl-L-a CTTTTTGATTGTATTTACGCTTAGTATAAACAAATCATTTTATTTTTATTTAAAAATCAGAAAGGACTTTACGATATTAAACAAGAATGCTTTTTTAATGAT >strand - yrzK yrzK ATCATTAAAAAAGCATTCTTGTTTAATATCGTAAAGTCCTTTCTGATTTTTAAATAAAAATAAAATGATTTGTTTATACTAAGCGTAAATACAATCAAAAAG >strand + spoVB alternate gene name: spoIIIF CCTTTTTTATTGTTTTTCAAGAAGACTTGTCATGCTTGGACGACATATACGCATATCTTTATGTATATGATTCCAGGTAGTTTGA >strand - yrzD yrzD TCAAACTACCTGGAATCATATACATAAAGATATGCGTATATGTCGTCCAAGCATGACAAGTCTTCTTGAAAAACAATAAAAAAGG >strand + yrzE yrzE CTCATTATAGGTTGCAACAAAATGATCAATTTATGTAAGAAAAACCGATTGCATTTCACAAAGCTTTTACGTCTAATTCATGGGATAAGGGAATACATTTTTAC >strand - yrbG similar to hypothetical protein GTAAAAATGTATTCCCTTATCCCATGAATTAGACGTAAAAGCTTTGTGAAATGCAATCGGTTTTTCTTACATAAATTGATCATTTTGTTGCAACCTATAATGAG >strand + yrzF yrzF TTGATCATTCACGAAAGCGCTTTATTTAAAATTAATACTATCCCATTTGTAAGCGCTTTTCAAGAATGGAAATTAAACGTTCTATGTCTCTTTTATCACAATTCATGCTTATAAGTAGCCTTCTTCTTCAAGACCAGATACACTAGAATTAGGCTCTTTTTCCTGAACAGTTTATGATTTTCCTATAAACAGCTGCATAAAATAGAG >strand - yrbE similar to opine catabolism CTCTATTTTATGCAGCTGTTTATAGGAAAATCATAAACTGTTCAGGAAAAAGAGCCTAATTCTAGTGTATCTGGTCTTGAAGAAGAAGGCTACTTATAAGCATGAATTGTGATAAAAGAGACATAGAACGTTTAATTTCCATTCTTGAAAAGCGCTTACAAATGGGATAGTATTAATTTTAAATAAAGCGCTTTCGTGAATGATCAA >strand + nifS nifS GTTGTTTACACCTGTCTTGACACCTATATTTACACAAGGATAAAATAAACTCAAGAGTTTTTTATGGAGAAC >strand - nadB L-aspartate oxidase GTTCTCCATAAAAAACTCTTGAGTTTATTTTATCCTTGTGTAAATATAGGTGTCAAGACAGGTGTAAACAAC >strand + ysxD ysxD CCCCCAACACCCAATCTTAATTTATAATCATTCTAACATAGAACTCATTATTTATAATAATTATAACATAGTTTCTTTCACAAAATGTGAACAGATTTTAAAATTCTTATGCTATATTAGAAAAAGCTTCAGG >strand - hemA glutamyl-tRNA reductase CCTGAAGCTTTTTCTAATATAGCATAAGAATTTTAAAATCTGTTCACATTTTGTGAAAGAAACTATGTTATAATTATTATAAATAATGAGTTCTATGTTAGAATGATTATAAATTAAGATTGGGTGTTGGGGG >strand + ysnD ysnD CTTTGGACGCAGTGTCAACAGCATTTTTTAAAATTATTAGACAATTTTGATTAATCAATCTATATTGTATACGCTTTCATTATTGGTACAAAAGGAATTGCAAATGTTAAAAGTTTTTATAATTTTTTTATTAATCTATCAAGAATACACCAGAATTATTTAATTTTATATAAACATGGAAGATTTTATAACACGATATGGTTCGTATATGGATTGAGTCTGACAGATCATCTTGCAATCCATTTCAATTTCGGATAGGCACTTCTCTTGTTCCCTCGGCATACATTAATGATATCTTGC >strand - ilvB acetolactate synthase (acetohyd GTTAGCTGAGAACCGGCGAAGCTTTACAAGGTGAACTCGCCTCAGAGTGCCAGTCTGAAATGACAGTAGGACTTGGCCGGGTGAACTTGATTCACTCGTTACTAAAGCGGATAGAAATATCCATGAGACGGCCGATTAACAGGCCGTAAACAAGGGTGGTACCGCGGAAAGAAAAGCCTTTTCGCCCCTTTTAGCTATCGCAGTTACTGCGCGGCTGATTGTGGGCGGAAGGGCTTTTTTTATTGAATAATCAGCTATCTAGCTAATGAAAAGATGATCTTTAAAGGATGAAAATCCAAA >strand + yslB yslB GTTTGATAAGTGCGAAATGTGCTAAATCTCTTCCCCCACTTCTTTCAATTGTAAGCACTTTATTAAGATTTATTGACAATTTCATTTTACTCCTCCCTCAAAAGGGCGTCAAGAAAACGCGTACATAAATTATAATTTTCCGAATTGATTAGTTGATTGAAAAATTTTATTTTATCAATATATATTTCTTGCTTAATTTTTCATAAGAATTACAGGATCATTCTGTAATCCTCCCCCCTGTTTGAGCTATAATATACGCAGG >strand - sdhC succinate dehydrogenase (cytoch CCTGCGTATATTATAGCTCAAACAGGGGGGAGGATTACAGAATGATCCTGTAATTCTTATGAAAAATTAAGCAAGAAATATATATTGATAAAATAAAATTTTTCAATCAACTAATCAATTCGGAAAATTATAATTTATGTACGCGTTTTCTTGACGCCCTTTTGAGGGAGGAGTAAAATGAAATTGTCAATAAATCTTAATAAAGTGCTTACAATTGAAAGAAGTGGGGGAAGAGATTTAGCACATTTCGCACTTATCAAAC >strand + ysgB similar to hypothetical protein TTCCGCGAGAATCCTAGTTCTTATCATAACACGTTTATTAAATGAATGGAAAGACGAGCCTCCCGCTGTTATGATACACTGGTATATACAGCGAAAAGTGTAA >strand - yshA yshA TTACACTTTTCGCTGTATATACCAGTGTATCATAACAGCGGGAGGCTCGTCTTTCCATTCATTTAATAAACGTGTTATGATAAGAACTAGGATTCTCGCGGAA >strand + ysfA ysfA CTCCTTTTTTTAACATGATGTTATTATATCGCAAGAACAGCACATAATAAACCAGGTGCAGGGTTAGAATATACGTATTACATTTTAG >strand - ysgA similar to rRNA methylase CTAAAATGTAATACGTATATTCTAACCCTGCACCTGGTTTATTATGTGCTGTTCTTGCGATATAATAACATCATGTTAAAAAAAGGAG >strand + ysdB ysdB CTTCATCAGTCCTGCATTCTTATTATACAAAACATGGCGCTAACCATACGACTATTTCACTTGAAAATCGGGTATATGTTTTTACAGCTTTCTTTTAGAAAAAGTGAAACCTTTTTCTATGCTTTTCGTATTACATCAGATCATCACCATAAG >strand - ysdC similar to endo-1,4-beta-glucan CTTATGGTGATGATCTGATGTAATACGAAAAGCATAGAAAAAGGTTTCACTTTTTCTAAAAGAAAGCTGTAAAAACATATACCCGATTTTCAAGTGAAATAGTCGTATGGTTAGCGCCATGTTTTGTATAATAAGAATGCAGGACTGATGAAG >strand + yscA yscA AGTTTCTTCGAATTGATTCACAACTCATTTTGAATGCATTTTGCAGGCAAACAAAAAGTGTGGGCATTATAAAACACCCACACTACGAACATCTGTATCAATAAAAATGTACGGTCAACCTGCCAACTACATGCATGTGTC >strand - infC initiation factor IF-3 GACACATGCATGTAGTTGGCAGGTTGACCGTACATTTTTATTGATACAGATGTTCGTAGTGTGGGTGTTTTATAATGCCCACACTTTTTGTTTGCCTGCAAAATGCATTCAAAATGAGTTGTGAATCAATTCGAAGAAACT >strand + ysaA similar to hypothetical protein TCTTTCCGTTTTCATTCTAGCATGTTTTCGTTTTGCGGCTGACTGTTTTTTTAAAATCGTCTGAAAACATATTTCACAAGGCTTTTATCTATTGTAAAATAAAACATGAGCCTTTTGGCAGAAAGATTTGAATCTG >strand - lytS two-component sensor histidine CAGATTCAAATCTTTCTGCCAAAAGGCTCATGTTTTATTTTACAATAGATAAAAGCCTTGTGAAATATGTTTTCAGACGATTTTAAAAAAACAGTCAGCCGCAAAACGAAAACATGCTAGAATGAAAACGGAAAGA >strand + ytbD similar to antibiotic resistanc CTCACAGGCACCAAAAAGTGACTATAGCACTTCAAAGTACGTACTTTTATTTCTTAACGATATGTTTCATAATCACATTTGCCGGCTGATTTGAATCTGCTCTTTTTTATTTATATAGCTTACTCGGCAGAATTTCAATCATACAACACATCT >strand - ytcD ytcD AGATGTGTTGTATGATTGAAATTCTGCCGAGTAAGCTATATAAATAAAAAAGAGCAGATTCAAATCAGCCGGCAAATGTGATTATGAAACATATCGTTAAGAAATAAAAGTACGTACTTTGAAGTGCTATAGTCACTTTTTGGTGCCTGTGAG >strand + ytvI similar to hypothetical protein TTTAAGATGAATGTATTCATATTCAGCCGCAGCGTGAATACATATAAAAAATAGGACATGCTGACG >strand - ytwI similar to hypothetical protein CGTCAGCATGTCCTATTTTTTATATGTATTCACGCTGCGGCTGAATATGAATACATTCATCTTAAA >strand + ytrI ytrI ATGGTGAATTTTATCAATAGCTACTTCATTGCGTTAGGGGTGCTGATCGGGGGCGCACTCATCGGAGGGCTCGGAGCATACTTGGCAGGCGAACCCCCGCTTACAGCCATTACAAAGCTTGCCAACCGGTTAAAAATATGGGCGCTTGTCGCAGCTATCGGAGGCACCTTTGATGCGGTATACAGCTTTGAACGCGGCATACTCGAAGGCAATACGAGAGATATCTTTAAACAGCTTCTTTTAATTATTTCGGCCATGGGGGGCGCGCAAAGCGGCTGGCTTATTATATCATGGCTGACC >strand - dnaE DNA polymerase III (alpha subun TGCGACAAGCGCCCATATTTTTAACCGGTTGGCAAGCTTTGTAATGGCTGTAAGCGGGGGTTCGCCTGCCAAGTATGCTCCGAGCCCTCCGATGAGTGCGCCCCCGATCAGCACCCCTAACGCAATGAAGTAGCTATTGATAAAATTCACCATAAATCCTGCTTCCTGATCCATCTTTATCACCTCACCTTTTATATCATATTGAGCTTTCAGGACAAGTATGATGATGAGTCAAAAAAGAACATTTGTTTCTTTTGGAAACCGGCCTTATAATGAAGGAAGAAATGGATCGTAATGAGA >strand + ytpI ytpI ATTGTTTTTATTCATTAAATCACATTCTCTGCCGACGGAAAAGAAAAATAACGTCAAAACCAAGCCCGTTTTTGTATAAAATGGGGTTGTAAACTTTTAC >strand - ytqI similar to hypothetical protein GTAAAAGTTTACAACCCCATTTTATACAAAAACGGGCTTGGTTTTGACGTTATTTTTCTTTTCCGTCGGCAGAGAATGTGATTTAATGAATAAAAACAAT >strand + ytlI similar to transcriptional regu CTTCACAAAATATATAATTCCTATTTGTTTACTAAGCTTTTATTGTTTATACTATAAAGCATACTCTGATCTTTGACTAATCAAAAAAAGTAAACCTTCACTTCAGTAATTTTAATGATGAA >strand - ytmI similar to hypothetical protein TTCATCATTAAAATTACTGAAGTGAAGGTTTACTTTTTTTGATTAGTCAAAGATCAGAGTATGCTTTATAGTATAAACAATAAAAGCTTAGTAAACAAATAGGAATTATATATTTTGTGAAG >strand + ytdI similar to hypothetical protein TTTTCCTAAACTCTCTCATTATACGATTCAATATGAAAAATGTTTCACTTCATGAGAAAGCTCCGGCTTAACGGGCTGCCGGCAAACGTGGTACACTTATGGCCAAAGGTATTTTATCACAAATACTGGATGGCAGAAAAACTTAAATATGACAG >strand - yteI similar to protease IV CTGTCATATTTAAGTTTTTCTGCCATCCAGTATTTGTGATAAAATACCTTTGGCCATAAGTGTACCACGTTTGCCGGCAGCCCGTTAAGCCGGAGCTTTCTCATGAAGTGAAACATTTTTCATATTGAATCGTATAATGAGAGAGTTTAGGAAAA >strand + braB branched-chain amino acid trans TTATGAAAGCGGTACAAGGTATGATATTTATGTTACCATAACAAAATTTAATAGAAAATTACGAAAAACTAGTATTGACTTCTAATTTTTTTACCATATAATAAGATTTGTTCGTTTCGTCATATTATCTGACAATTAATTGACAGAATATTTTAACATAATTA >strand - nifZ NifS protein homolog TAATTATGTTAAAATATTCTGTCAATTAATTGTCAGATAATATGACGAAACGAACAAATCTTATTATATGGTAAAAAAATTAGAAGTCAATACTAGTTTTTCGTAATTTTCTATTAAATTTTGTTATGGTAACATAAATATCATACCTTGTACCGCTTTCATAA >strand + ytvP similar to hypothetical protein GCTGTTTACCTTAATCTGTCAGGGTGTTTTATTTATTCAGAAAGATTTCAGGATATGTATTGAGAAAAACTGTCGTTTTCATTGTTATTATGATACCATGTAACAAACATTTTTTGACCAGAAATCAAAAACTTTTTGTGCAAAATCGGCAGGATTTGACTATTTT >strand - ytwP ytwP AAAATAGTCAAATCCTGCCGATTTTGCACAAAAAGTTTTTGATTTCTGGTCAAAAAATGTTTGTTACATGGTATCATAATAACAATGAAAACGACAGTTTTTCTCAATACATATCCTGAAATCTTTCTGAATAAATAAAACACCCTGACAGATTAAGGTAAACAGC >strand + ytsP similar to hypothetical protein CGACAAATGCCGGCAGAAAAGCCGGAACTTTGTTTTATATTATAGAAAAAGTGAGGGTTTTCCATGTTCCATGTCGAAAAACAATCTGGAGATAAAGAAAAAGACTATCAGCTTCTGCTAAAACAGCTCGAAGCCATGACCGAAGACGAAACAGACCAAATAGCTAACTATGCAAATGCCTCAGCACTGCTGTATCATTCGCTGCCTGAAGTCAACTGGGCGGGTTTCTATTTTGCCAAAGAAGAGGATGGACAGCTTGTGTTAGGTCCGTTCCAAGGCCTGCCGGCATGTGTTCGGATT >strand - yttP yttP CATCCTCTTCTTTGGCAAAATAGAAACCCGCCCAGTTGACTTCAGGCAGCGAATGATACAGCAGTGCTGAGGCATTTGCATAGTTAGCTATTTGGTCTGTTTCGTCTTCGGTCATGGCTTCGAGCTGTTTTAGCAGAAGCTGATAGTCTTTTTCTTTATCTCCAGATTGTTTTTCGACATGGAACATGGAAAACCCTCACTTTTTCTATAATATAAAACAAAGTTCCGGCTTTTCTGCCGGCATTTGTCGATAAGTTCTTACAGGAACAGCGGCTTTCTGCTTGAATAGTATCAAGAGGA >strand + rpsD ribosomal protein S4 (BS4) AAAAGTTTACCACTAATTTTTGTTTATTATATCATAAACGGTGAAGCAATAATGGAGGAATGGTTGACTTCAAAACAAATAAATTATATAATGACCTTTGTGTGAAATATTGCAGCCTTTTTGTTCAGCTTCTATGTTTTCATTTTGTTCCTTATCAATAAGGTGTATCGTGTAACTCTCTGCTGCTGGAGCGAGGATACATGAAAACAAAATGTGCATGGTCGAATAGAGCAGACGGTTTTTATTTTCCACAAAAATAAAACC >strand - ytrP similar to two-component sensor GGTTTTATTTTTGTGGAAAATAAAAACCGTCTGCTCTATTCGACCATGCACATTTTGTTTTCATGTATCCTCGCTCCAGCAGCAGAGAGTTACACGATACACCTTATTGATAAGGAACAAAATGAAAACATAGAAGCTGAACAAAAAGGCTGCAATATTTCACACAAAGGTCATTATATAATTTATTTGTTTTGAAGTCAACCATTCCTCCATTATTGCTTCACCGTTTATGATATAATAAACAAAAATTAGTGGTAAACTTTT >strand + acuA acetoin dehydrogenase CCTTTGCTGAAGTTGTCACAATTATTATAGTATATATTCATATTCTTCTCAATTTTTAAAATATAAACCATGTTGAAAACGCTTTATAATTTGGTATTCTTAAAGAAGGCATGTATTTTTGATAAGAATT >strand - acsA acetyl-CoA synthetase AATTCTTATCAAAAATACATGCCTTCTTTAAGAATACCAAATTATAAAGCGTTTTCAACATGGTTTATATTTTAAAAATTGAGAAGAATATGAATATATACTATAATAATTGTGACAACTTCAGCAAAGG >strand + ytoQ ytoQ AGTTGTTTATGTTCTTACATGTTTTTTATCGATACTCGTCCATTTGGATCATACCAAACTGCTTGAGGCAAAACCACTTATATGCAACGAATGAATATGTAAGATTGGCTTTCCCATTATCATTATCATATTTTATTTGCATTATCAAGCTCGTTTCGTTTAACATCGTGTACATGAGGGTGAAATTTACAGATGGATATACAT >strand - ytpP similar to thioredoxin H1 ATGTATATCCATCTGTAAATTTCACCCTCATGTACACGATGTTAAACGAAACGAGCTTGATAATGCAAATAAAATATGATAATGATAATGGGAAAGCCAATCTTACATATTCATTCGTTGCATATAAGTGGTTTTGCCTCAAGCAGTTTGGTATGATCCAAATGGACGAGTATCGATAAAAAACATGTAAGAACATAAACAACT >strand + ytzB ytzB TTCTATGTAGTCCTTTTATTTTATTAGAAACAATGAAGTTTTCCAAGCACTCTGACCCAATCTGCTCGATCAGCCGCGTATTGCTTAACAGGATTTGGCTGTTTCTGTATAATGAAAAGTACGATCAA >strand - ytoP similar to endo-1,4-beta-glucan TTGATCGTACTTTTCATTATACAGAAACAGCCAAATCCTGTTAAGCAATACGCGGCTGATCGAGCAGATTGGGTCAGAGTGCTTGGAAAACTTCATTGTTTCTAATAAAATAAAAGGACTACATAGAA >strand + ytzH ytzH CTATCTATTATTTCGACTAACAACATACCATGAATTAAAAAGACCCGCAAGCTCGTTTTTCCAGTGTATCTCTCTTAAGGCCGAGCGCATAAAAAAAGAGGGTCCCGTCTGCTGGAACACTCCTTTCCCATGTATAAAAACGCTCTGTTTTGAACAATTTAACCCGTATGATACTG >strand - ytmQ similar to hypothetical protein CAGTATCATACGGGTTAAATTGTTCAAAACAGAGCGTTTTTATACATGGGAAAGGAGTGTTCCAGCAGACGGGACCCTCTTTTTTTATGCGCTCGGCCTTAAGAGAGATACACTGGAAAAACGAGCTTGCGGGTCTTTTTAATTCATGGTATGTTGTTAGTCGAAATAATAGATAG >strand + ytkP similar to cysteine synthase CCTTCTTTGACAGCTATTATTTTATCAATAAAGAGGCTGTGCGGATAGAGGAACAGTGTATGAAAACACTCTCGCACAAACGTTTATTATGTTAGGATATTATATGACACTTGTTTTA >strand - ytlP similar to ABC transporter (per TAAAACAAGTGTCATATAATATCCTAACATAATAAACGTTTGTGCGAGAGTGTTTTCATACACTGTTCCTCTATCCGCACAGCCTCTTTATTGATAAAATAATAGCTGTCAAAGAAGG >strand + ytiP similar to hypothetical protein CCGCAAGAAATGACTTGCAAAAACCAAATAAAACGAATAATATTAATGGTGTTTTGTTAAAACGTT >strand - ytjP similar to Xaa-His dipeptidase AACGTTTTAACAAAACACCATTAATATTATTCGTTTTATTTGGTTTTTGCAAGTCATTTCTTGCGG >strand + ytzE similar to transcriptional regu CCAAAATTTACTACGAAATAGAATAAAAAAAGTTTCTTTAACCATAATAATATTGAGCCGAAATACATAGTAATCCAACAAGGATGGCTGCCCAACGCGAAATTGGAAAAAATTAATAGGAGTTATCCCTAGATTCATCACTGCAATATCGTGTATAATGAGGGCGAATTATTTTAACAATTACAGATGTCTGCCTTTGTTGCTATTTCGTACGCAGTTGATAGTCGTGAGTTGTGGTTAGTCAAGTCCATCCGATCTTA >strand - ythP similar to ABC transporter (ATP TAAGATCGGATGGACTTGACTAACCACAACTCACGACTATCAACTGCGTACGAAATAGCAACAAAGGCAGACATCTGTAATTGTTAAAATAATTCGCCCTCATTATACACGATATTGCAGTGATGAATCTAGGGATAACTCCTATTAATTTTTTCCAATTTCGCGTTGGGCAGCCATCCTTGTTGGATTACTATGTATTTCGGCTCAATATTATTATGGTTAAAGAAACTTTTTTTATTCTATTTCGTAGTAAATTTTGG >strand + ytfP similar to hypothetical protein TGATGATAAAGATGAAACAATATGACGTAATCGTAATCGGCGGAGGCCCTTCAGGCTTGATGGCTGCGATTGCAGCAGGAGAACAGGGCGCTGGCGTGTTGCTGATAGATAAAGGAAATAAATTAGGACGGAAACTCGCGATTTCCGGGGGCGGCCGCTGCAATGTGACGAACCGCCTTCCTGTGGAAGAAATTATCAAGCACATCCCCGGCAACGGGCGTTTTTTATACAGCGCGTTTTCTGAATTTAATAATGAGGACATTATCAAGTTTTTTGAAAACCTCGGCATTCAATTGAAGG >strand - ytgP similar to spore cortex protein ATTTATTTCCTTTATCTATCAGCAACACGCCAGCGCCCTGTTCTCCTGCTGCAATCGCAGCCATCAAGCCTGAAGGGCCTCCGCCGATTACGATTACGTCATATTGTTTCATCTTTATCATCAACTTTCTTTTCATTTCGTGTGCGGACACCCGTCCGCAGTACTATTATAGCGAAAGAAAGCCGCTCCTTAAACGTCTTTTGCCGCGGACATGAATGTTCACAGGATTGCTTTTGTATTTATTACCGTCTAAATGCCCAGTTGTGGTAAAATAGAAAAGTTGAATCTTCGTAGAAAATG >strand + ytdP similar to transcriptional regu CCTTTCACGGATTCTTGCATTCACTGTAGCGCAGCAGGCAAAAGTCCGTCTACAATCATATGATCACATCCTGTTTTATCCCTTTATAATCAAGGGTTCAGCCGGTCCACCTGAAAAAAAGACACCGATATTAGAAAATGATTATTGACAATATCTATCAATCTTTGGATTAATTGTATTAAGGT >strand - yteP similar to hypothetical protein ACCTTAATACAATTAATCCAAAGATTGATAGATATTGTCAATAATCATTTTCTAATATCGGTGTCTTTTTTTCAGGTGGACCGGCTGAACCCTTGATTATAAAGGGATAAAACAGGATGTGATCATATGATTGTAGACGGACTTTTGCCTGCTGCGCTACAGTGAATGCAAGAATCCGTGAAAGG >strand + msmR transcriptional regulator (LacI TTATTCGCTTACTTCACATTTACGAGACAGATCTGGGTTCCCCTTTATTTTTCCAACATAAATCACATCATTTACTAAATATTTAGTAAATATTATGGAATATTTCGAAAACAATTATTGTAACCGCTTACTTTTATATGATAATATCAATTTATCAAAAACAGATGAGTTAATATTTTACTAAATA >strand - ytaP ytaP TATTTAGTAAAATATTAACTCATCTGTTTTTGATAAATTGATATTATCATATAAAAGTAAGCGGTTACAATAATTGTTTTCGAAATATTCCATAATATTTACTAAATATTTAGTAAATGATGTGATTTATGTTGGAAAAATAAAGGGGAACCCAGATCTGTCTCGTAAATGTGAAGTAAGCGAATAA >strand + ytvA similar to protein kinase ACCGGCAAATCTGTAAAAAGCTTGTTTCTTCCATTATATGTGAACACATGATCGGTAAGCTTGATGCATTGGATTCATTTTTCACCCAGTGACATTGACCTAGTCACCTATTTAGCATGTAGAAAACCTTTCCGTTCATATGTTCAAATTCTGTTCTTATTTTTAGTGTATACGAGGACAAGCCCTAATGACAAACAACAAACTGCACTTGCTTGAATCAGAACATGTGTTGTGCTACGGTTACTGTAGAATTCATTTTTAAAAAGGGGAATATCAGGCTTTCGCATAGCAAGCTGACGG >strand - ytvB ytvB ATGTTCTGATTCAAGCAAGTGCAGTTTGTTGTTTGTCATTAGGGCTTGTCCTCGTATACACTAAAAATAAGAACAGAATTTGAACATATGAACGGAAAGGTTTTCTACATGCTAAATAGGTGACTAGGTCAATGTCACTGGGTGAAAAATGAATCCAATGCATCAAGCTTACCGATCATGTGTTCACATATAATGGAAGAAACAAGCTTTTTACAGATTTGCCGGTATGTAGATATCGAAAATCATCGGAATCGTTTAAGGGGGCTGGATGGCCTGAAGTCGCTTTACGAAATAAGGGCT >strand + yttA yttA CTTACCTAAAAAAGCTCATGAAAAAAACGAAACCTAAAACGGCTCCGTACGAATCCAGTTATCATAGCACATTCATTCTATCTGTCTCTCTCACATTTTTCAACTGATGAGCGTGAAAAAATCTAACAAATATTGTTTGAAAAATGTAAAATAAATAGAAAAAG >strand - yttB similar to multidrug resistance CTTTTTCTATTTATTTTACATTTTTCAAACAATATTTGTTAGATTTTTTCACGCTCATCAGTTGAAAAATGTGAGAGAGACAGATAGAATGAATGTGCTATGATAACTGGATTCGTACGGAGCCGTTTTAGGTTTCGTTTTTTTCATGAGCTTTTTTAGGTAAG >strand + ytqA similar to hypothetical protein TTGGTAATGGAAATCATCCATCATTCATATTGTAGCCTCATCTGACTTTATTCAATAAAGTAATTTATGGTCGATATTCATTTGTTCTCTTGTGAAATGCGTGCTACAATTTTAGGATGCAATCGTACGCA >strand - ytzC ytzC TGCGTACGATTGCATCCTAAAATTGTAGCACGCATTTCACAAGAGAACAAATGAATATCGACCATAAATTACTTTATTGAATAAAGTCAGATGAGGCTACAATATGAATGATGGATGATTTCCATTACCAA >strand + pckA phosphoenolpyruvate carboxykina CATACAACCTTGCAACAGGTTAGCACCTTGGTTGTCTCACTCAGTTGAACATAATAAATAACAGAGAAACCGGTTGCTGGGCTTCATAGGGCCTGTCCCTCCGCCAGCTCGGGATAAGAGTATCCGCTCAATGAAATATCTTATCGTAAAAGGGTTTGCAATGTCAATATGATTCAGAAGAAATAGGCACCTATATTGAGGGAAAACAATGGAAATGCACACACAAAAAACAATAAATAGTATAGACTATTTGAAAATATATGTTATACTAATTCACAATTAGCAAAACACAAAAAACGA >strand - metK S-adenosylmethionine synthetase AGCGGATACTCTTATCCCGAGCTGGCGGAGGGACAGGCCCTATGAAGCCCAGCAACCGGTTTCTCTGTTATTTATTATGTTCAACTGAGTGAGACAACCAAGGTGCTAACCTGTTGCAAGGTTGTATGATTCCTTGAGCGATAAGAGTGAAAGGCACAAAGACCAAACCCTTTCCTCGATGGAAAAGGTTTTTTTATTTCATAAATATGCCAATTAACATTCTCTAATATAACTGTACATTGTATAAGAGGGAGCGAGTTCCGTATCATATATACAAGGTCTTTCGGGAGGCCTTGTGCA >strand + ytlA ytlA TTTTTCTATATGCTTTCCTTACTCCAAAATTACATCCGGAGAACCTGGTTAGGCATTCACACATTTTTATACCCATCATACGATATGTAAAGCAAAAAAGGATATTAACTTGATGCGAAA >strand - ytmA ytmA TTTCGCATCAAGTTAATATCCTTTTTTGCTTTACATATCGTATGATGGGTATAAAAATGTGTGAATGCCTAACCAGGTTCTCCGGATGTAATTTTGGAGTAAGGAAAGCATATAGAAAAA >strand + ytjA ytjA CTTTTTTAAAAATGTAATTTCATTATAACATACTTTTCCGATAAGAATTATTGATTAACTTTTCAATCTGTGTTTTTTTTCTTATACTTGCATCAGAA >strand - ytjB ytjB TTCTGATGCAAGTATAAGAAAAAAAACACAGATTGAAAAGTTAATCAATAATTCTTATCGGAAAAGTATGTTATAATGAAATTACATTTTTAAAAAAG >strand + ythA similar to cytochrome d oxidase AATAAATCGTAATAATTACGTTTTATTTTATAACCTTACAAACGTTTTGTCAATCTTCTTATATCCTTCCCTCTGTTTCACATCGTAAACCTCATGGCATTTAAGAATAGGAAGAATAATCTAACCGAGGAAGCAAATAATAAACCCAAAATACATACAGAACGTCTTTTTGGAAAGGTGGTTTACGAT >strand - ytiA similar to hypothetical protein ATCGTAAACCACCTTTCCAAAAAGACGTTCTGTATGTATTTTGGGTTTATTATTTGCTTCCTCGGTTAGATTATTCTTCCTATTCTTAAATGCCATGAGGTTTACGATGTGAAACAGAGGGAAGGATATAAGAAGATTGACAAAACGTTTGTAAGGTTATAAAATAAAACGTAATTATTACGATTTATT >strand + yteA alternate gene name: yzwB TAATCCTTAAAATGCTTTTAATACCATACACCTGTGTCTCCTTTGTGTCAAACACAGATCATGCAGTCCAAATGGCCTGCCTTACCTATTATAAACCTAATTTCAGAAATGAAAAAGAAACAAGCTTAAATTTCTTTCTCCACTGATAGCAAAGGTTGTATCCGCTTACCATAAAAGGAAAACCTCCTATATTCCTCCACCCGCCATTTATGAAAGTTCTCAGGCGATCTGTTCATACTAAAGGTATGCATCACAGAG >strand - menF menaquinone-specific isochorism CTCTGTGATGCATACCTTTAGTATGAACAGATCGCCTGAGAACTTTCATAAATGGCGGGTGGAGGAATATAGGAGGTTTTCCTTTTATGGTAAGCGGATACAACCTTTGCTATCAGTGGAGAAAGAAATTTAAGCTTGTTTCTTTTTCATTTCTGAAATTAGGTTTATAATAGGTAAGGCAGGCCATTTGGACTGCATGATCTGTGTTTGACACAAAGGAGACACAGGTGTATGGTATTAAAAGCATTTTAAGGATTA >strand + ytcA similar to NDP-sugar dehydrogen AGCTCATGTTTATTACATGATATTCCAAGAGCGCATCCCGATGTTTGTACATCTGAAATTAGGCGGAAGCAACTGGGCAGAAAGCGGAATCACGAACCAGCCCTTTTTTAATAGGATATTGATATATGACCAGTAATCTTG >strand - ytdA alternate gene name: yzwA; simi CAAGATTACTGGTCATATATCAATATCCTATTAAAAAAGGGCTGGTTCGTGATTCCGCTTTCTGCCCAGTTGCTTCCGCCTAATTTCAGATGTACAAACATCGGGATGCGCTCTTGGAATATCATGTAATAAACATGAGCT >strand + ytaA similar to spore coat protein CCTGCATGATTTTTAACTAGTTAGAATCATGATACGGCTGAACAACCAACTTGTGCGGAATGGGACTGCCCTAAAAACAAAACGGGTAAACAACCAGATTCATCTATTAAATATAGTCAAATAACTCAATCATTTTTTCCTTTTGATTCATAAGGTAGG >strand - ytxN alternate gene name: ytbA; simi CCTACCTTATGAATCAAAAGGAAAAAATGATTGAGTTATTTGACTATATTTAATAGATGAATCTGGTTGTTTACCCGTTTTGTTTTTAGGGCAGTCCCATTCCGCACAAGTTGGTTGTTCAGCCGTATCATGATTCTAACTAGTTAAAAATCATGCAGG >strand + yuaJ similar to hypothetical protein GCCATGATTTCTAAAGCGTGTGTATCATATAAGTTTCTTAAAAGAACATATACATCAGTCATGTGATCTTGTTGTTTTTTTCGTTTTCTTGTATAGTAGAAACGAACTAAATACTGATGACCACAAGGGGAGCATTAAAGCTGAGAGTGAGCGGTTTCGTTCTGACCCTTTGAACCTGTTAGTTAACGCTGGCGTAGGGATGTGGCAAAGTCAAATGAATTGCAGATCGTAAAGCAGTGCGTGGAACTTTTCTCATCCTTCCGCGTGCTGCTTTTTTGTTGTCCTTTTGGTTGTCTGTCA >strand - glgB 1,4-alpha-glucan branching enzy CATAATTATCGGCCCGTTGGTCAAGCGGTTAAGACACCGCCCTTTCACGGCGGTAACACGGGTTCGAATCCCGTACGGGTCATCCAGAAGCCTTGCATATCCTGCAAGGTTTTTTTGTTTTTATAAATCATGATATGTCTTAGATTTTGTTCTTTATTTTAAAAACAGACTACAAAAATCTCCATATATTTCGTTTTTCTTCAGAAAATGAAGTTAATTGTCTATAAGTATAAGCGCTTTCAGGAAAGGGCTTTTTTTTATTTCTTCGAATAAATACTATAAATGAAAACTATGATGTCA >strand + yuaE yuaE TTCATCAATGCATTGATGAACGGCTGCCGTTACCATTTATACGAGACACCTCGGGAAAAGTTTCAAAATTTTAAGACAAATAACCCATTTGAAGTGCTTTATAAATGATAATTGCTTATATATATGGTAATGTCATT >strand - yuaF yuaF AATGACATTACCATATATATAAGCAATTATCATTTATAAAGCACTTCAAATGGGTTATTTGTCTTAAAATTTTGAAACTTTTCCCGAGGTGTCTCGTATAAATGGTAACGGCAGCCGTTCATCAATGCATTGATGAA >strand + yuaC similar to hypothetical protein GTAAATAAAGTTTGTTAAATAAAAATTTAATGTTTTTAACAACCTTAATCTAACATATGGTTTTTAAACTGTCAAAGTCCCATTTTGATTTTTCTTTTTGACTTTTCCTCATGTTTCGTTAAGATAAAGAACATAAAGAAATAACTTAACGAATTTAATTTTCTAAAG >strand - gbsA glycine betaine aldehyde dehydr CTTTAGAAAATTAAATTCGTTAAGTTATTTCTTTATGTTCTTTATCTTAACGAAACATGAGGAAAAGTCAAAAAGAAAAATCAAAATGGGACTTTGACAGTTTAAAAACCATATGTTAGATTAAGGTTGTTAAAAACATTAAATTTTTATTTAACAAACTTTATTTAC >strand + yubC similar to cysteine dioxygenase TATTTCAAATCGATTGTGTTTTGTCAATGGAGTAAATTTCAAGCACAACATAGAAAGCTTGAAAGCTTTTCCCCGATATTGTGTCTAGAACTGCACTTATGAATCCCTGCTCGTTCCCTCAAGCGAATTAAAGCAGGCCGTTTTAATCCTTTCAATCCACTATTCCAACTAATAAACTCAAATAAGTTGGTGATAAGACAACATTTTACCCCCACATTTTTTTGTGTTCACTGGGCGTTTGCCGCCTCTAAATTGTATATGGGTAAATACCATGAATTAACCTATGAAATACCAATGTAA >strand - yubD similar to multidrug resistance TTAAAACGGCCTGCTTTAATTCGCTTGAGGGAACGAGCAGGGATTCATAAGTGCAGTTCTAGACACAATATCGGGGAAAAGCTTTCAAGCTTTCTATGTTGTGCTTGAAATTTACTCCATTGACAAAACACAATCGATTTGAAATAATATTTGATTATATGCTCATTCATTCACATAATTATAATAAAATAATATCATTTCCCCTGAAAGGGAGAATATTAGAATGAAAGCTAATGGTATGCAAAATCTATTAGCTTCTTTTGTTTTCAAAAAATGATGTGTTTCGCATGTATTCATATA >strand + yulF similar to hypothetical protein ATACATAGTTTAATTAAATTGTACACTATCGTCCCTATAATGAAAAATAAAAACCGTTAATTGCTGGAAAAGCTTTCTCGGTTTTAACATTCTATTTATATCTATCATGTGATTTTGTTCTTTTTAAGCTATAATTTAGCTATATCATGCA >strand - yubA similar to hypothetical protein TGCATGATATAGCTAAATTATAGCTTAAAAAGAACAAAATCACATGATAGATATAAATAGAATGTTAAAACCGAGAAAGCTTTTCCAGCAATTAACGGTTTTTATTTTTCATTATAGGGACGATAGTGTACAATTTAATTAAACTATGTAT >strand + tgl transglutaminase AGGTTTGATACAGTATATATCGGCTGCTTAAAAAAGTTTTAAAATAAAAAATGGAAGAAGTTCTTTTTGGCTGTCTTCTGCCTTTTTAACTTTCATTGCCCAAGCTCTTTGCATATCTTATAAAAAA >strand - mcpB methyl-accepting chemotaxis pro TTTTTTATAAGATATGCAAAGAGCTTGGGCAATGAAAGTTAAAAAGGCAGAAGACAGCCAAAAAGAACTTCTTCCATTTTTTATTTTAAAACTTTTTTAAGCAGCCGATATATACTGTATCAAACCT >strand + yugO similar to potassium channel pr TACGATTATTTTAATGAGTTTTCTTTCTCGTGTACAGAAATGCGCTTTTTCTTCCGGCTTGGAGCACACAATCCAATCAGCAAAAAGAACAGGCCTGCTGAAAACAAATAAAAGGCGAAACTGCCGGCCAGTTCTTTCAGAAAATGAAGCCAGGAGAAGCTTCCGGAAAAGTGATTATATGAATTCAAATCGAAAATAAACAAAATCACCGCGGATGCCGCCAGCTGCGTACCGATTAAAACCACGATCGTTCTTTGAAAGATCGTCATCTTTTTCATCCTTTTTGTTTTAGTTTATGGA >strand - yugP yugP GATGAAAAAGATGACGATCTTTCAAAGAACGATCGTGGTTTTAATCGGTACGCAGCTGGCGGCATCCGCGGTGATTTTGTTTATTTTCGATTTGAATTCATATAATCACTTTTCCGGAAGCTTCTCCTGGCTTCATTTTCTGAAAGAACTGGCCGGCAGTTTCGCCTTTTATTTGTTTTCAGCAGGCCTGTTCTTTTTGCTGATTGGATTGTGTGCTCCAAGCCGGAAGAAAAAGCGCATTTCTGTACACGAGAAAGAAAACTCATTAAAATAATCGTAATAGAGAAATGATCATGAACA >strand + yuzA yuzA GATTTTTTATTATGCAGAATGTCCTCACCACATGTAAAGCAATCAGCTCTCACCGATCAGCATACCAGATGGTTTTATTTTGCAAGGTGCTGGGAAAGAAGCTGATATAAGCGAGGGCAGGCCAGCTTCCCTCATATGGAGAATGCATGTATAGATTCTCCAATATGTCACAATCTATAACTGTTTTAATAATTCATGG >strand - yugJ similar to NADH-dependent butan CCATGAATTATTAAAACAGTTATAGATTGTGACATATTGGAGAATCTATACATGCATTCTCCATATGAGGGAAGCTGGCCTGCCCTCGCTTATATCAGCTTCTTTCCCAGCACCTTGCAAAATAAAACCATCTGGTATGCTGATCGGTGAGAGCTGATTGCTTTACATGTGGTGAGGACATTCTGCATAATAAAAAATC >strand + yugF similar to dihydrolipoamide S-a TGCAATGTCAAGAAATCTCATGACATTATATCACGTTTTTTGACGAATGCTAAGAAAAAACGAGGAGAAACTTCCGCTAGAATTGACAATTTCAGGGCATAATACAACCATCAAAAA >strand - yugG similar to transcriptional regu TTTTTGATGGTTGTATTATGCCCTGAAATTGTCAATTCTAGCGGAAGTTTCTCCTCGTTTTTTCTTAGCATTCGTCAAAAAACGTGATATAATGTCATGAGATTTCTTGACATTGCA >strand + patB aminotransferase AAATTTGCAGTTCTATCTTATCATAGTGAAGCTTAGAAGAATAAACATAC >strand - yugE yugE GTATGTTTATTCTTCTAAGCTTCACTATGATAAGATAGAACTGCAAATTT >strand + yuxJ alternate gene name: yugC; simi TTTCCTCCGATCAGAAAATGCTATCTTTATAGTATATCAATTCCCCTTCTCGACATTTCTTCATTTTCTAAAAATAATGAATTTTCCCAGCCGCTTCGTTTCTTTTAAATTGTGAATCGTTTCTTTATAATTAGTAAGGAACCCGAAATAATAAGAAGGTGATGTTATTTCTATCCGTAAGAAAAACTTTGTCATTGTGTGGCTGGCGAACTTTTTTGTCTCCGCTAGCACAACGATGATCGTTCCTTTTCTCTCCTTATACATTGAGACGCTCAGCTCTTTTTCAA >strand - kapD alternate gene name: yugB TTGAAAAAGAGCTGAGCGTCTCAATGTATAAGGAGAGAAAAGGAACGATCATCGTTGTGCTAGCGGAGACAAAAAAGTTCGCCAGCCACACAATGACAAAGTTTTTCTTACGGATAGAAATAACATCACCTTCTTATTATTTCGGGTTCCTTACTAATTATAAAGAAACGATTCACAATTTAAAAGAAACGAAGCGGCTGGGAAAATTCATTATTTTTAGAAAATGAAGAAATGTCGAGAAGGGGAATTGATATACTATAAAGATAGCATTTTCTGATCGGAGGAAA >strand + yufL similar to two-component sensor ATTCCATTTTTTCTCCTTTTTACGAATTACGGGCCCAAATCTTCACAAAGCAGCATTTTGAGAGTAAAGTAGTACGCAGGAATCATAAAA >strand - yufK yufK TTTTATGATTCCTGCGTACTACTTTACTCTCAAAATGCTGCTTTGTGAAGATTTGGGCCCGTAATTCGTAAAAAGGAGAAAAAATGGAAT >strand + yufT similar to NADH dehydrogenase TCCTTCAGCTTGATCAGGATTGATTTTAAATATACAAATAACCTCTTTTTTGTTAGACTTATTTTGAGGTCAAAAGAACTCATAAAAATCCATTACAGCAGTTCGAAGCGTGGCAGCTTGCAGAAAAGTGCGATATAATTTGGACTAGGTCGTTTGACCGTATATTTTATACATAGCGCATTTTTCAATTGAGAACGAGAAAAGGAGGTCTTATCTTTGCAGCTCTTACATCTTGCTATTTTATCGCCTTTTTTATTTGCTTTTATCATTCCCTTCTTGGC >strand - yufS yufS GCCAAGAAGGGAATGATAAAAGCAAATAAAAAAGGCGATAAAATAGCAAGATGTAAGAGCTGCAAAGATAAGACCTCCTTTTCTCGTTCTCAATTGAAAAATGCGCTATGTATAAAATATACGGTCAAACGACCTAGTCCAAATTATATCGCACTTTTCTGCAAGCTGCCACGCTTCGAACTGCTGTAATGGATTTTTATGAGTTCTTTTGACCTCAAAATAAGTCTAACAAAAAAGAGGTTATTTGTATATTTAAAATCAATCCTGATCAAGCTGAAGGA >strand + yuzC yuzC AATCTCAATAGGTCCTTATACTCAAAGGAATCACAACAAGAACGGGACTATCTTTCTATGCGAAAGAGCGTTTATATGTGCTGAAATTGAGAAAGATTCTTAATCTCCTTGATTTTAACGGGTTTCGCATTTTCACTAAGCAAGACTATAGTCCTTCATACCGTATTTTCAAAATTGAATATGCGATTCTTGGCGTTTCCTGACCGACAAACACCGAAAAAGACCGCAACGCTAAAGTTGTGGCCTTTTCGCTTTGTCATATTCGGCAATTAGGGATCTATACATATAGAAACATCCTTT >strand - degQ alternate gene name: sacQ GAAAATACGGTATGAAGGACTATAGTCTTGCTTAGTGAAAATGCGAAACCCGTTAAAATCAAGGAGATTAAGAATCTTTCTCAATTTCAGCACATATAAACGCTCTTTCGCATAGAAAGATAGTCCCGTTCTTGTTGTGATTCCTTTGAGTATAAGGACCTATTGAGATTTGCGGTGTCACGCAGGACTTTTTTGCATACTTTTCGGTGAAAAATGAGCCGAAAGCAGACACACTATTAGTAACAGATCAAATACCTAGGACTCGTTCACCATACACAATTCATTGATCTTTCAAAAAAA >strand + yuzE yuzE AGTTTTTCTTTGTCTGTCAGATAAAAGAACTTATACCCCATCATTGTAACAAAAATGAACTCATATCAGAACTCTTAATATAAT >strand - yueF similar to hypothetical protein ATTATATTAAGAGTTCTGATATGAGTTCATTTTTGTTACAATGATGGGGTATAAGTTCTTTTATCTGACAGACAAAGAAAAACT >strand + yukF similar to hypothetical protein ATTCTCCGAATAATCCAAAAGACCCAAATAACTCTCCAACTCCCAAAAAAAACCTTTCAGAAAATTTGATGCTACAGCACCATTTCTGAAAGGTTCAGTTTTTTCTTTCCTTAATTAGTCTTGCCTCCGATGACTTTCTTAGAAGATTAATAGGATAAGCTGTCGTATTATATGAGACATACACTGTCCTGCAAAAAGGCGATTACACCGCCAAATGTATATAGAACGTTTCTCTCATGAAATTGTTTTTTTGCTTCTGTTCTTTGTTGTTGATTTTCACAAACAACAACAATCATAATA >strand - yukE yukE GTCATCGGAGGCAAGACTAATTAAGGAAAGAAAAAACTGAACCTTTCAGAAATGGTGCTGTAGCATCAAATTTTCTGAAAGGTTTTTTTTGGGAGTTGGAGAGTTATTTGGGTCTTTTGGATTATTCGGAGAATGAAGAGAAGATTAAGAAAATAAACCTATTTACTTTACAGTATATGTCATTTTTGAGTGATTCGGGGTAAACCATTGGACTCATTTTGTAAAATTAAAACATATTTGTTTGACTATGCCGAAAAACTGTTTTTATAATAATGACGTTCTAAAAATAAGTGAATTAGC >strand + yuiG similar to biotin metabolism TTAACCTAATTTAAAATCATGTTGACAATCTTTTTGTTCTTTATTACTCTATGATTGTGTATGTCAACCT >strand - yuiH similar to sulfite oxidase AGGTTGACATACACAATCATAGAGTAATAAAGAACAAAAAGATTGTCAACATGATTTTAAATTAGGTTAA >strand + yuiD similar to hypothetical protein ACGTAATGGGTATTTGTAATGAACAATGGTATAATAGGAAAAACATTCTTTCCACTATACTTATTTTAACAAACAACAGTCTAAGACTTGCTTTTTTCTTTTGAAACACGAAGGATTTTTCAAATAG >strand - yuiE similar to leucyl aminopeptidas CTATTTGAAAAATCCTTCGTGTTTCAAAAGAAAAAAGCAAGTCTTAGACTGTTGTTTGTTAAAATAAGTATAGTGGAAAGAATGTTTTTCCTATTATACCATTGTTCATTACAAATACCCATTACGT >strand + yumC similar to thioredoxin reductas TTTTTGCTTTTGTTAGACCCATAATAAAATCATCATTTGTGTTAGCTTCAGCTTATTGGCGGGATTTTATGAAATATATTGAAAAGTTTTTTGTGAAGTCCTTACCCGTAGCACTATTATCATGTTTATCCGGCTGGCCCAAGAAACATTCAGAAAAAGATCATTTCATTTGGTTATTATCGTAGCGGCCTGCGGAAATATTTTCAAGAACGGATGGGCACGTTAGATCCTTACGATTTTTTCTGCTGACTCGTCAATTTGAACGGCAATATGGTATAATTAATAACAATTTTCATTTAG >strand - yumB similar to NADH dehydrogenase TAAATGAAAATTGTTATTAATTATACCATATTGCCGTTCAAATTGACGAGTCAGCAGAAAAAATCGTAAGGATCTAACGTGCCCATCCGTTCTTGAAAATATTTCCGCAGGCCGCTACGATAATAACCAAATGAAATGATCTTTTTCTGAATGTTTCTTGGGCCAGCCGGATAAACATGATAATAGTGCTACGGGTAAGGACTTCACAAAAAACTTTTCAATATATTTCATAAAATCCCGCCAATAAGCTGAAGCTAACACAAATGATGATTTTATTATGGGTCTAACAAAAGCAAAAAA >strand + yumD similar to GMP reductase CCATGTCCATGCCATAAAGAAAGCCTGGTGCTGAACGACCAAGCTTTGCTGAACGTATATCATTCGCGTTTTGATCCGCTCTCTAAGACCATTCGTTAAAACTATGATACCTAAACTCACCCTGCATGACAAGCTTCGGCACTTGCACCACACCTGCCAGCAAATCATAATGTTTGACATCCTAAATAAAACAGAGTACATTATATAAGGCGAACATTTCATATATTATACTTTTAAATATTCGTTTT >strand - yuzG yuzG AAAACGAATATTTAAAAGTATAATATATGAAATGTTCGCCTTATATAATGTACTCTGTTTTATTTAGGATGTCAAACATTATGATTTGCTGGCAGGTGTGGTGCAAGTGCCGAAGCTTGTCATGCAGGGTGAGTTTAGGTATCATAGTTTTAACGAATGGTCTTAGAGAGCGGATCAAAACGCGAATGATATACGTTCAGCAAAGCTTGGTCGTTCAGCACCAGGCTTTCTTTATGGCATGGACATGG >strand + yutJ similar to NADH dehydrogenase TATGTCCATTATCGCGAATTTTTCACTATTCGTCAAAGACAACGGATGATTCAAAATTTATTATCGTAACATATTATTTCTCAAATAACGTTTTACTAATTTGTGTTTTCAGGCTAAATTGTAGGTTATTTTGAAACTATTATTGTGATTTTAGATCAAATTCGATATAATTCTAGATGTTATTTTACATAGTTCGTATTTTCTTCAATGTATCTATCATACTCTTCGGATT >strand - yuzB yuzB AATCCGAAGAGTATGATAGATACATTGAAGAAAATACGAACTATGTAAAATAACATCTAGAATTATATCGAATTTGATCTAAAATCACAATAATAGTTTCAAAATAACCTACAATTTAGCCTGAAAACACAAATTAGTAAAACGTTATTTGAGAAATAATATGTTACGATAATAAATTTTGAATCATCCGTTGTCTTTGACGAATAGTGAAAAATTCGCGATAATGGACATA >strand + yutI similar to NifU protein homolog CTCCTATGTAAGTCCTTTCTTTTAAGTATACAGAGCCTAAGGGCGTTTTTATAGAAAAAAACCGTTTTT >strand - yuzD yuzD AAAAACGGTTTTTTTCTATAAAAACGCCCTTAGGCTCTGTATACTTAAAAGAAAGGACTTACATAGGAG >strand + yutG similar to hypothetical protein AATTAGGGCTCATCACATCGTAGTCATACACAGCCGGAATTATGAATACATATTTTAATCCGAAGAAGGCGCAAGAAAAAGTCGTAAACTTCTCAAGCATTTTGTATAAACTAGTAAAAAAG >strand - yutH yutH CTTTTTTACTAGTTTATACAAAATGCTTGAGAAGTTTACGACTTTTTCTTGCGCCTTCTTCGGATTAAAATATGTATTCATAATTCCGGCTGTGTATGACTACGATGTGATGAGCCCTAATT >strand + yutC yutC CCCATCAATTGCTAAAGAATAAATCATGCGATTTGGCTACCCTACACATGA >strand - yutD yutD TCATGTGTAGGGTAGCCAAATCGCATGATTTATTCTTTAGCAATTGATGGG >strand + yunA alternate gene name: yutA GTATAGTTTTCATTTGTATTTAATTTAATTTTAAGCCATTCGGGCTTTCTGAGGTGTTCGTCTTTCTTTGCCAATTCCCATCATCTCCAATATCCAATATACTCAATTTCTGACACGAAACATTCTGAAAACGAGAAAGGTGCAGGTTTTAGTTCTGCAACAAAAATAATGTTTTTCTAACTAAAGAAAGTCTTTTCTTTAGACACTTTAACATAGGAAAAATTT >strand - yutB similar to lipoic acid syntheta AAATTTTTCCTATGTTAAAGTGTCTAAAGAAAAGACTTTCTTTAGTTAGAAAAACATTATTTTTGTTGCAGAACTAAAACCTGCACCTTTCTCGTTTTCAGAATGTTTCGTGTCAGAAATTGAGTATATTGGATATTGGAGATGATGGGAATTGGCAAAGAAAGACGAACACCTCAGAAAGCCCGAATGGCTTAAAATTAAATTAAATACAAATGAAAACTATAC >strand + yunI yunI CAGGATGACGATTTTCCATGTCAGTTTATGTAACACAACCAGTTCTATTTTCAACAGTTTACTGCTACTATGATCGTATTACATCAGCAAAACCGCTAGAATTCCTATTGAAAATTATTCACCTTAACCAATGTTTTTATAGAA >strand - yunH similar to allantoinase TTCTATAAAAACATTGGTTAAGGTGAATAATTTTCAATAGGAATTCTAGCGGTTTTGCTGATGTAATACGATCATAGTAGCAGTAAACTGTTGAAAATAGAACTGGTTGTGTTACATAAACTGACATGGAAAATCGTCATCCTG >strand + yurI similar to ribonuclease TACATGTCACAACTATTTTGATGTAAAGGTCAAAAGAAAACACGGTAAGGAAGAGAGCCAGCAAGAGCGAAAGAGAGGGTCAAAATGCCTTTCATATGAACAAGCTAAGCTGGAGAGGCGATGTTCGCGGAACGGATGAAAACAGAAATGCCGGCAGCCCGCAAAAACAGGACCTAAAGATATGGAAACAGGAGATTTCCGCCTCCTCCAAAGGGACTAATTTACAATTTTAATGAAAAATATACAAAATGACACTTCTATCCTTTATTTCAATTTTAGATAATAGATATGATCTGATAA >strand - yurH similar to N-carbamyl-L-amino a GTTTCCATATCTTTAGGTCCTGTTTTTGCGGGCTGCCGGCATTTCTGTTTTCATCCGTTCCGCGAACATCGCCTCTCCAGCTTAGCTTGTTCATATGAAAGGCATTTTGACCCTCTCTTTCGCTCTTGCTGGCTCTCTTCCTTACCGTGTTTTCTTTTGACCTTTACATCAAAATAGTTGTGACATGTAGGGTGTATCTGGTTGCTGGATGGATACTGTGGCTAAAAAATAAACGTTTTTTTGGTTAATCTGTCAATGAACTATCATGTTGCTCCTTCTATAATGAAGTCACCATAGCGG >strand + yurK similar to transcriptional regu TCTTTTGACCCGTTATTTCGTCACTGGAAACAACTTGGTCTTTCTGTTATTTTTAGAGTAATCGATTCCCATTTTTGCTATATTTCTAAAATAAAAAATGTAAATTTGTATTATAACGTCATAATACATTATAATATAAATAATAGGTTAA >strand - yurJ similar to multiple sugar ABC t TTAACCTATTATTTATATTATAATGTATTATGACGTTATAATACAAATTTACATTTTTTATTTTAGAAATATAGCAAAAATGGGAATCGATTACTCTAAAAATAACAGAAAGACCAAGTTGTTTCCAGTGACGAAATAACGGGTCAAAAGA >strand + yurS yurS TATTATAATGTCTATTCATTTTACCAAATCAAATGCTTTCCTTCGACCTTTTGCCCTAATGTGGAAAAACCGCGAAGTTCGTCCAATCTCTTTACTCTCCTATCGAATATCCTGTCACTATCAATAGAGAAAGGATGATACCATGAGCGAAAATCGTCATGAAAATGAAGAAAACAGACGCGATGCGGCAGTGGCAAAAGTCCAAAACAGCGGTAATGCAAAAGTCGTGGTCAGCGTGAACACAGATCAGGATCAGGCACAGGCGCAGTCACAA >strand - yurR similar to opine catabolism TTGTGACTGCGCCTGTGCCTGATCCTGATCTGTGTTCACGCTGACCACGACTTTTGCATTACCGCTGTTTTGGACTTTTGCCACTGCCGCATCGCGTCTGTTTTCTTCATTTTCATGACGATTTTCGCTCATGGTATCATCCTTTCTCTATTGATAGTGACAGGATATTCGATAGGAGAGTAAAGAGATTGGACGAACTTCGCGGTTTTTCCACATTAGGGCAAAAGGTCGAAGGAAAGCATTTGATTTGGTAAAATGAATAGACATTATAATA >strand + yusN yusN ATAACTCCCAGTCTGAAGGAAACTATAAAATGAACAGAAACCAAAGGAGGATCACAATGGATAATCAGCAGCAATCTCAAATGCCGCCTTCCGTTATTTCGACAAAGGATCATTTGTATCTCAATGACATGCTGAACTGGAATTTGCTCGCGATGAAAAAAGCGCATTTCATGGCGCAGCAGTGCCAGGATCAAACGTTAAAGCAAGAACTCGACCGCGTCGGACACATGCATCATGATCACTATCAAAGAATATTAAAGCACCTGCAGCCAGGCCAGCAGCAGTCCGGCTACATCCAAT >strand - yusM similar to proline dehydrogenas CTGGCCTGGCTGCAGGTGCTTTAATATTCTTTGATAGTGATCATGATGCATGTGTCCGACGCGGTCGAGTTCTTGCTTTAACGTTTGATCCTGGCACTGCTGCGCCATGAAATGCGCTTTTTTCATCGCGAGCAAATTCCAGTTCAGCATGTCATTGAGATACAAATGATCCTTTGTCGAAATAACGGAAGGCGGCATTTGAGATTGCTGCTGATTATCCATTGTGATCCTCCTTTGGTTTCTGTTCATTTTATAGTTTCCTTCAGACTGGGAGTTATATACAGGTGTTTTGAAGATGAC >strand + yusT similar to transcriptional regu TGTTGACTAAAGTATATTTCTGATTCCATTGTAATCCAGCTTTGATGTTACGTATAATGATTGTTAAAGATACTAGATATCACTCAGATTGATAGTAAACGAA >strand - yusS similar to 3-oxoacyl- acyl-carr TTCGTTTACTATCAATCTGAGTGATATCTAGTATCTTTAACAATCATTATACGTAACATCAAAGCTGGATTACAATGGAATCAGAAATATACTTTAGTCAACA >strand + yusZ alternate gene name: yvxA; simi CCTTCTTTCGTTCTATTATAGAACAATTCTGAATTATTGTATAAAAATTTCTATTACAGGCGTCATTTCGTGTTCAGGATAGTACAATAGCTGTAGCAGTTTA >strand - yusY yusY TAAACTGCTACAGCTATTGTACTATCCTGAACACGAAATGACGCCTGTAATAGAAATTTTTATACAATAATTCAGAATTGTTCTATAATAGAACGAAAGAAGG >strand + yvqA similar to two-component respon AACGGTTATTCATTTATCGTTACATATTCATCGCATGTACAAGTTCAATTGTCCCAATAATGAGGAGAAAAGCAAAGCACCAAAATAGAATATGTTTGATCATCCTACATCTTCCTCGCTCTTTTTCTCTTCCTTTCTTCCCTTAACCATATCATAAACAAACATTGTGGGATAAAAATGAAAAGAATATGTGAAATTATGAAAATTGCCTGCCGTATGTTAAAACTAGTTTATAATGACGTTGAAA >strand - yvtA yvtA TTTCAACGTCATTATAAACTAGTTTTAACATACGGCAGGCAATTTTCATAATTTCACATATTCTTTTCATTTTTATCCCACAATGTTTGTTTATGATATGGTTAAGGGAAGAAAGGAAGAGAAAAAGAGCGAGGAAGATGTAGGATGATCAAACATATTCTATTTTGGTGCTTTGCTTTTCTCCTCATTATTGGGACAATTGAACTTGTACATGCGATGAATATGTAACGATAAATGAATAACCGTT >strand + gerAA gerAA AAAAATCCTTTTTATTTTTCGATCTTGGCAGATTTTAATTTTAATTCAAGATATACGCGGGAATCCTTTGAATATTTGTATTTTGGATTTGTCCTGCCGTACTCCTTTGATGTATAAGAGACATCATAATGCTGTTCAAATGATCTAATGAGCTGTTCAATTTCTTCTTGTTTTCCAGCTAGCCTTACCTGTGCCACTTCGTATCCTCCGCTTTCATAAAAAAACTTACTCCATGAAATACTCTAACACAGTATATCATTTTTTTAACAGGAAAAGATAACCTCTACTAAGGTTTTGGAT >strand - citG fumarate hydratase AAATGATATACTGTGTTAGAGTATTTCATGGAGTAAGTTTTTTTATGAAAGCGGAGGATACGAAGTGGCACAGGTAAGGCTAGCTGGAAAACAAGAAGAAATTGAACAGCTCATTAGATCATTTGAACAGCATTATGATGTCTCTTATACATCAAAGGAGTACGGCAGGACAAATCCAAAATACAAATATTCAAAGGATTCCCGCGTATATCTTGAATTAAAATTAAAATCTGCCAAGATCGAAAAATAAAAAGGATTTTTTGTGTCATTGGCGAATTATGATCTATTGAAGCAACCGTT >strand + yvrD similar to ketoacyl-carrier pro ACATCATCTGTCCTCGCAGAATTCGTGCTGATCAAACAGGCAGGTATCCTGGCTCTTGGCGCTTGGCCAATTACAGTGGCGGGACCGCACCGGCTTTTACCGGTTTCCCTTTTAAGCCGGCTGTTGTTCACGGACCGGCACCTATTTGACATTCGTATCCGTTTTTCGGAGTAATTATACTATATGACTGACAGGCTTGTATATGTGTCCTGAATGACTTTTCCCCATATTGTGTATGGAAGCAAAATCCTCGTAAACACTAAATTTGATCTTGTATACTTAATACCATCATCTATCAAA >strand - yvrC similar to iron-binding protein ACAATATGGGGAAAAGTCATTCAGGACACATATACAAGCCTGTCAGTCATATAGTATAATTACTCCGAAAAACGGATACGAATGTCAAATAGGTGCCGGTCCGTGAACAACAGCCGGCTTAAAAGGGAAACCGGTAAAAGCCGGTGCGGTCCCGCCACTGTAATTGGCCAAGCGCCAAGAGCCAGGATACCTGCCTGTTTGATCAGCACGAATTCTGCGAGGACAGATGATGTGTAAACAATAGGCTTTTTTGTGTTGTTTACAGCATCTTTACCGTCGTAGAGATGCTTTTTTAGTTCG >strand + yvrK similar to hypothetical protein TACATACATGACTGCAGAAAAAGAAGGGAGGTATTTTCCTATGGATCAGGTTTTTATAGAGGAAGTCGTAAAACAGATCGGCAATTTGGGGTTTCCCGCGCTGATTGCAATGTATCTGCTGACCCGATTCGAAAAGAAGTTTGATCAACTAATAGAACTAATGACAGAACTGAAAGATCATGCAAAAAAATAATTTTTCAATCGAAGTTGACTTTTCACTGGTTTTTTTCACTTAACAAAACAGAAGGGAAAACGAAAGGCCTTTCACCTTCTCTTTCTGCTATCACATTTAAATGTAAG >strand - yvrI yvrI ATTGAAAAATTATTTTTTTGCATGATCTTTCAGTTCTGTCATTAGTTCTATTAGTTGATCAAACTTCTTTTCGAATCGGGTCAGCAGATACATTGCAATCAGCGCGGGAAACCCCAAATTGCCGATCTGTTTTACGACTTCCTCTATAAAAACCTGATCCATAGGAAAATACCTCCCTTCTTTTTCTGCAGTCATGTATGTAAGTGAGGAAAACCGATAAAACGTAAACTGCTGCAAAGTATTTTGGAAGACACTAGGCGGAAAGTAGTAGTTTACTTTTTTTTGCTTTCCTTCATTTAA >strand + fhuD ferrichrome ABC transporter (fe AACCGGCACTTGCAGTGGTATAATCACAGATGATAATGATTCTCTTTTTCATCTATCTTTTAGAAAG >strand - fhuB ferrichrome ABC transporter (pe CTTTCTAAAAGATAGATGAAAAAGAGAATCATTATCATCTGTGATTATACCACTGCAAGTGCCGGTT >strand + yvsG similar to hypothetical protein TGTGGTTATCGATTATTTTTCTTTTTTTGCACAGCTTCTTGAAGCGGATCATCTTTTAGAGCTGAACCAGCATCTTCAAGAGCTCCTTGGATTACGTTTTTTTCTTTTTCGGAACGTTTTCCTTTATCTTTATTAAAGAAACCCATTCGTATCACCTCTTTTAAAAAATAGGTACATGTTTAAGATGCCTTTTTCCTGAAAAATTATGCGGGATATTTTTTATCGTCCAAAGTCCTGTTGATCAAACATACATTCTCTTGATATATTTGTAATTCAGAGATTTTTTGCAAAAGATAAAAG >strand - yvsH similar to ABC transporter (ami GGAAGTAACTGAATAATTAAATCCTCACTCTTTCTCAGTGAGGTAGAGGTTGCGCGGATGATGAGTCACACATGCTAGGCTGACAGGGGCTGTTAAACATGTGTAAAAGGCATCAGCGCCGAAGTGTGGAGAAAGCCGATCCTTCTCTATGCTGGGACTGTATCTGAATAAGTGCAGGACTGCCGCGTGCTTTTTCGCGGAGGGCTATCCGGAGATCAAGGGTGTGTGTTTCTTTAAGCATGGACTTTTGTTCAGGCTTTTTTTGCGTTTATATGATCTCGGGGTCTGGTCCGTACAAAG >strand + yvgL alternate gene name: yvsD; simi TTATTATACCTTTTCGCCGAATGTAAAAAAACCCTTATAAACCAAACATAATCAGTTATAATTAGTTATATCTAATGATA >strand - yvgK alternate gene name: yvsE; simi TATCATTAGATATAACTAATTATAACTGATTATGTTTGGTTTATAAGGGTTTTTTTACATTCGGCGAAAAGGTATAATAA >strand + yvgO yvgO ACTGAAATGTAAGCGTCTATATTATCTCACATTCCTCTCAGCCGATACCAATAATACGCTTAAGACCCTCTTTTTGCTTCTTTTTTCACAGCACAGGCAAATCCAAACACTATTTTTTTCCTATTTTAACAATATTGAGATTACAAATACATTGAGCAGGGTATGCGTGTAGTGTAACCAAATGAAAACAAA >strand - yvgN alternate gene name: yvsB; simi TTTGTTTTCATTTGGTTACACTACACGCATACCCTGCTCAATGTATTTGTAATCTCAATATTGTTAAAATAGGAAAAAAATAGTGTTTGGATTTGCCTGTGCTGTGAAAAAAGAAGCAAAAAGAGGGTCTTAAGCGTATTATTGGTATCGGCTGAGAGGAATGTGAGATAATATAGACGCTTACATTTCAGT >strand + yvaA similar to hypothetical protein TGATATTAATTCCCATTATACTGATTGTCCAAACGGTTGTCTAAGCATGCCGACGCAAAAAATGAATCAGTTAGGTTTTACCATTTGATCAGGAGGG >strand - yvgZ similar to hypothetical protein CCCTCCTGATCAAATGGTAAAACCTAACTGATTCATTTTTTGCGTCGGCATGCTTAGACAACCGTTTGGACAATCAGTATAATGGGAATTAATATCA >strand + yvaM similar to hydrolase CACTGGCTACATTACTTCTATTTTACATGAAAAGGGTTGGCATGTCACGCTGTATCAGACGCCAAAAGGCCGGATAAGACGTTTTATCTTGGTTACCCTATTCATATGAAAAGGTCAA >strand - yvaL yvaL TTGACCTTTTCATATGAATAGGGTAACCAAGATAAAACGTCTTATCCGGCCTTTTGGCGTCTGATACAGCGTGACATGCCAACCCTTTTCATGTAAAATAGAAGTAATGTAGCCAGTG >strand + yvaN similar to hypothetical protein CTGAAAAACTTTTTTCTATACTTTTACATTACGGCTTATGTTAACTTTTGTCAATACATTTCCAGTCTACCTATTCCCTTTAGTCAATAAACATACTAAAATATGAGTAACAGTGAGGGAC >strand - yvzC yvzC GTCCCTCACTGTTACTCATATTTTAGTATGTTTATTGACTAAAGGGAATAGGTAGACTGGAAATGTATTGACAAAAGTTAACATAAGCCGTAATGTAAAAGTATAGAAAAAAGTTTTTCAG >strand + yvaV similar to hypothetical protein TAAGTTTATTCAATTTGTTCAGTTTAAATTTAATTTTTCAGACAATTGAATGCTTCCCATTATAGAATAAAGTTTATAAAATAAAAAGTTTACAATTTGTTCATATTATTTATAAAGTATGTGTAAACGCTGATTTACCCGCATTGACGTGTGTTGATCTCCTTATTGCTCCAATTTCCTGCCAAATTCTCTTTTGACAGCATTTTCGCCGTATCAAAAATGTGGTATATTGACATCGGAATTG >strand - opuBA choline ABC transporter (ATP-b CAATTCCGATGTCAATATACCACATTTTTGATACGGCGAAAATGCTGTCAAAAGAGAATTTGGCAGGAAATTGGAGCAATAAGGAGATCAACACACGTCAATGCGGGTAAATCAGCGTTTACACATACTTTATAAATAATATGAACAAATTGTAAACTTTTTATTTTATAAACTTTATTCTATAATGGGAAGCATTCAATTGTCTGAAAAATTAAATTTAAACTGAACAAATTGAATAAACTTA >strand + yvbF similar to hypothetical protein TTTCCTTTAAATTTTCTGAAAAGTATAATCCTTATTATAGTTTGAACTTTGTAAAATAAAAAGTTTACAATGTGTTGAAATTATATGTTATTTATAGATTATTTATTGTTATGACCGAGACGGAAAGGGTGTTCTTACAATTACTCCGGATACCTCCCACTTCCTCTGCATTCAGCTATTTTCAACACCAGGAAGAAATATGATATATTGACACCATCAATTGCCATTTGAAG >strand - opuCA glycine betaine/carnitine/chol CTTCAAATGGCAATTGATGGTGTCAATATATCATATTTCTTCCTGGTGTTGAAAATAGCTGAATGCAGAGGAAGTGGGAGGTATCCGGAGTAATTGTAAGAACACCCTTTCCGTCTCGGTCATAACAATAAATAATCTATAAATAACATATAATTTCAACACATTGTAAACTTTTTATTTTACAAAGTTCAAACTATAATAAGGATTATACTTTTCAGAAAATTTAAAGGAAA >strand + yvbH yvbH TCTCTGCACAGGAAATAGAATGATTCTCCACCCTTTTACCCGGAGGCGATTTCCCGGTAAACCTTTATAGCCTGTCAGAAACGCGCCATGCTACAATGAAGTGACAAATTGAATAGGAA >strand - yvbG similar to hypothetical protein TTCCTATTCAATTTGTCACTTCATTGTAGCATGGCGCGTTTCTGACAGGCTATAAAGGTTTACCGGGAAATCGCCTCCGGGTAAAAGGGTGGAGAATCATTCTATTTCCTGTGCAGAGA >strand + yvbK similar to hypothetical protein CTAATTTGATAACATTTACTGCTTTTCATATTTTATAATAAAACAGAATGAAATTGGGAACGAATGAAAAAATTAAATGTCTATCAGAC >strand - yvbJ yvbJ GTCTGATAGACATTTAATTTTTTCATTCGTTCCCAATTTCATTCTGTTTTATTATAAAATATGAAAAGCAGTAAATGTTATCAAATTAG >strand + araR transcriptional regulator (LacI GTTGAGTAAAGCGTTTTCATTTAAACCTTCAATTGTACGTACTTATATTAAAATTTAACGAAAATTACATTTAATTAGTACGTACAAATATAGAATAATCTTGTTTG >strand - araE permease CAAACAAGATTATTCTATATTTGTACGTACTAATTAAATGTAATTTTCGTTAAATTTTAATATAAGTACGTACAATTGAAGGTTTAAATGAAAACGCTTTACTCAAC >strand + yvbV similar to hypothetical protein CTTATAAAAAAGATTCATTTTATTTATCTTATGTTCTACCGTATCATATGATGGAAAAGTTCTACAAGG >strand - yvbU similar to transcriptional regu CCTTGTAGAACTTTTCCATCATATGATACGGTAGAACATAAGATAAATAAAATGAATCTTTTTTATAAG >strand + yvfQ similar to hydrolase TTTACATTGATTGTAAAATGACGGAGAAGACGATGTTAGTATGGGGTGTCATGATGATGGAGTGACAAATGTCATTTTTTTCTCACCCCGGTGTCTAGGCAATCCTCCTCCTGCATCTTCCAATTCATCCATCTTTTCTATCTCCCATGTCCTCTTTCTTTTAAACTGCTGTGACATTCATAGAAAGATACACTCTTCAAACCATCCGAGTGTTATGATAGAAATATTGAACTTTATATTACTTTCCGGCTGA >strand - yvfR similar to ABC transporter (ATP TCAGCCGGAAAGTAATATAAAGTTCAATATTTCTATCATAACACTCGGATGGTTTGAAGAGTGTATCTTTCTATGAATGTCACAGCAGTTTAAAAGAAAGAGGACATGGGAGATAGAAAAGATGGATGAATTGGAAGATGCAGGAGGAGGATTGCCTAGACACCGGGGTGAGAAAAAAATGACATTTGTCACTCCATCATCATGACACCCCATACTAACATCGTCTTCTCCGTCATTTTACAATCAATGTAAA >strand + yvfH similar to L-lactate permease TTCAATTCGCCATTTTTGATCATATCTAATAGGGCATCCGCTACTTCTTCATATATTTTTTTGTTTTAATCTGTTTATATTTCAATGCAATTCACCCCGTAACTTGTAGGTCTGGCTGTTTACGATCTATTATACTCTGTTTTGTGGGCCGCCAAAAGATTTTGACTATTTTGTCAAAAAGTCTTGTACTTTGATTTTTTTCAAAATATAATGCTAGGTAACTGGTCTTATTACCCAGTCATCAGATGATCATATCATCTATAGAAACCGCTTACATATGGCGCCAGTTTATGTTTCAAG >strand - yvfI similar to transcriptional regu ATATTTTGAAAAAAATCAAAGTACAAGACTTTTTGACAAAATAGTCAAAATCTTTTGGCGGCCCACAAAACAGAGTATAATAGATCGTAAACAGCCAGACCTACAAGTTACGGGGTGAATTGCATTGAAATATAAACAGATTAAAACAAAAAAATATATGAAGAAGTAGCGGATGCCCTATTAGATATGATCAAAAATGGCGAATTGAAGCCGGGGGATAAACTGGACTCTGTTCAGGCGCTTGCTGAGAGCTTTCAAGTCAGCCGTTCAGCGGTTCGCGAAGCACTTTCTGCGCTAAAA >strand + yvfG yvfG TTCATTGTACATGATTGAGATCACACCGTATACATTTATGAAAAAGGA >strand - sigL RNA polymerase sigma-54 factor TCCTTTTTCATAAATGTATACGGTGTGATCTCAATCATGTACAATGAA >strand + slr transcriptional regulator CTTCCCGCGGCTGGCTTCCCGCGCCCCTTTCTGTTAATGATTGGATTATAAAAGAAAACGTTATTATTTAAAAATTGCAAAATAAGCCAATAAGTTCTCTTTAGAGAACAAAATCATGATTTTCCTCTAATTTACTGCACTTCCCTTATTATTTTAAATTTTATAAAGAACGAAAAATTCCTTATAATGAACGAAATAACGACAGGAATAGAGGA >strand - yveK similar to capsular polysacchar TCCTCTATTCCTGTCGTTATTTCGTTCATTATAAGGAATTTTTCGTTCTTTATAAAATTTAAAATAATAAGGGAAGTGCAGTAAATTAGAGGAAAATCATGATTTTGTTCTCTAAAGAGAACTTATTGGCTTATTTTGCAATTTTTAAATAATAACGTTTTCTTTTATAATCCAATCATTAACAGAAAGGGGCGCGGGAAGCCAGCCGCGGGAAG >strand + sacB levansucrase GATTTTTTAGTTCTTTAGGCCCGTAGTCTGCAAATCCTTTTATGATTTTCTATCAAACAAAAGAGGAAAATAGACCAGTTGCAATCCAAACGAGAGTCTAATAGAATGAGGTCGAAAAGTAAATCGCGCGGGTTTGTTACTGATAAAGCAGGCAAGACCTAAAATGTGTAAAGGGCAAAGTGTATACTTTGGCGTCACCCCTTACATATTTTAGGTCTTTTTTTATTGTGCGTAACTAACTTGCCATCTTCAAACAGGAGGGCTGGAAGAAGCAGACCGCTAACACAGTACATAAAAAAG >strand - pbpE penicillin-binding protein 4* CGTTTGGATTGCAACTGGTCTATTTTCCTCTTTTGTTTGATAGAAAATCATAAAAGGATTTGCAGACTACGGGCCTAAAGAACTAAAAAATCTATCTGTTTCTTTTCATTCTCTGTATTTTTTATAGTTTCTGTTGCATGGGCATAAAGTTGCCTTTTTAATCACAATTCAGAAAATATCATAATATCTCATTTCACTAAATAATAGTGAACGGCAGGTATATGTGATGGGTTAAAAAGGATCTCTAACTGAAGGATTTTTCAAAATATTTGAAACGTTAGTAGGTTAGTAACGTACAGA >strand + yvdO similar to hypothetical protein ATGCAGGAGCTGGATAATCGCATAGACAAATGATCTACGAACTATAAATATAATACGCTAAGGCCTTTTTCGTCGGTGAGGTCTGCTGAGAAACATTTTTCAAACAATTTTGAGCAAACTTCCTGTCCAATCCTCCAACAAGCGTTTTTTCTTATCGTTTTTAGGAGCAGTGTCCAGTTTTTTTCGGAGTGAATATTCGTTTAGGCCAAAAGAAAAGAGAGATGAAGAAGTGTTTAGTACCAGAGGTATACATCCATACAACAGACTCTAGATCATCATATACTGAATGTATGTAGAAAG >strand - yvdP similar to reticuline oxidase TGATCTAGAGTCTGTTGTATGGATGTATACCTCTGGTACTAAACACTTCTTCATCTCTCTTTTCTTTTGGCCTAAACGAATATTCACTCCGAAAAAAACTGGACACTGCTCCTAAAAACGATAAGAAAAAACGCTTGTTGGAGGATTGGACAGGAAGTTTGCTCAAAATTGTTTGAAAAATGTTTCTCAGCAGACCTCACCGACGAAAAAGGCCTTAGCGTATTATATTTATAGTTCGTAGATCATTTGTCTATGCGATTATCCAGCTCCTGCATATTTTAGTATCTGAATCGAATTTCT >strand + yvdC yvdC ATATATCGTGTTAAACATTCAAAAACACACTATATATTGTCCCATCTTCATGTGATAATATCAATATGAAATACATAAAGAATGCA >strand - yvdD similar to hypothetical protein TGCATTCTTTATGTATTTCATATTGATATTATCACATGAAGATGGGACAATATATAGTGTGTTTTTGAATGTTTAACACGATATAT >strand + yvcT similar to glycerate dehydrogen CAAGAAAAGGTTTTCAAAAAAAGACACTATAATTGGACTGTAGAATCAAAACATAAACATTTATATCTTTCAAAGATAATAAAATTTATATTTAATGCATCCTATAATCCGGCTGCAGTTCGTATATCGATGATTTGTCTGACAAAAAATATGGGTGAATTCTGATAATTAAAAAACTTTTTTAAGCAGATTTAGCTGTTCGATCTTCATTTTAAGTTCAAAAGAACCAGAAATTAAAAAAACGCAGCTATATTTTTAACATCTATTTATTTTCGATATAATGAATTAGACAACAGCCAT >strand - yvdA similar to carbonic anhydrase GCTGCGTTTTTTTAATTTCTGGTTCTTTTGAACTTAAAATGAAGATCGAACAGCTAAATCTGCTTAAAAAAGTTTTTTAATTATCAGAATTCACCCATATTTTTTGTCAGACAAATCATCGATATACGAACTGCAGCCGGATTATAGGATGCATTAAATATAAATTTTATTATCTTTGAAAGATATAAATGTTTATGTTTTGATTCTACAGTCCAATTATAGTGTCTTTTTTTGAAAACCTTTTCTTGGCTTATTCCTATGCTATAATTCCTTACTGTAAGCTGGGTAAGAAGGCTTGCA >strand + yvpB yvpB TGTTATCTTATGAAGGAAAATTCCGAATCCTTTAGTTCGCTAATATGATAATATGTTAGCAGATGAAAGAATGAAAATCAACGCTTAACTTCAGTAAGGATATTTTTAGTTCAAATATCCTTCTCCTCATTATGAAAAAATCATGAATAATGTATTAAAGGTACGTAATATCTCAAATTCTTTATGTTTGGCATGCTATAATACAAATCAAATAGAGAAC >strand - hisZ histidyl-tRNA synthetase GTTCTCTATTTGATTTGTATTATAGCATGCCAAACATAAAGAATTTGAGATATTACGTACCTTTAATACATTATTCATGATTTTTTCATAATGAGGAGAAGGATATTTGAACTAAAAATATCCTTACTGAAGTTAAGCGTTGATTTTCATTCTTTCATCTGCTAACATATTATCATATTAGCGAACTAAAGGATTCGGAATTTTCCTTCATAAGATAACA >strand + nagA N-acetylglucosamine-6-phosphate CGGGCTGCCCCGAGCTTGCTCACAATACTTTCATTTTATCACTTTCGGGCTTGAACCTAAAACAGATTTTATAAAAGGGGGGAAAACACCTCAGCTGGTCTAGATCACTAGTCTGAAAAAGAGTAAAATAAAGGTATTCAAATTCCAGAAA >strand - yvoB similar to hypothetical protein TTTCTGGAATTTGAATACCTTTATTTTACTCTTTTTCAGACTAGTGATCTAGACCAGCTGAGGTGTTTTCCCCCCTTTTATAAAATCTGTTTTAGGTTCAAGCCCGAAAGTGATAAAATGAAAGTATTGTGAGCAAGCTCGGGGCAGCCCG >strand + yvnA similar to hypothetical protein ATGTATTTTGCGTCTGAACAGGGAAGCTTATGAGTTCATTTTTGTGCATAAAGAACAAAAAAACGTCTGTCTCTTGAAAATTCCACTATTACCCAGTAAACTAAGGATAGTTAACCAGTTAACAATTAAATCAG >strand - yvnB yvnB CTGATTTAATTGTTAACTGGTTAACTATCCTTAGTTTACTGGGTAATAGTGGAATTTTCAAGAGACAGACGTTTTTTTGTTCTTTATGCACAAAAATGAACTCATAAGCTTCCCTGTTCAGACGCAAAATACAT >strand + yvmB alternate gene name: yzhA; simi GTTTTTTAAGATTTTAATAATCATTTTCAATACCCATATCATGATAAATGTTTACTAGTAAACATTAGTCATTTTATCCCTTTTCCTTCCAAACGTCAATATGATCTGTGCCCAAATATTTGAAAATAAATGTAAACATCACATTTTCCACACACTTTTTACACGCATATTCAGCATGACATATCAACAAATAGTCTAAACACCAACAATTGACAACGAAAACAAGAAAAGATAAAATAAAATTGTTCACCAGTAAACGATTTGTTTTATTGTTTCATCGTAAACTATTATTCCTAAGAA >strand - yvmC similar to hypothetical protein TGGTGTTTAGACTATTTGTTGATATGTCATGCTGAATATGCGTGTAAAAAGTGTGTGGAAAATGTGATGTTTACATTTATTTTCAAATATTTGGGCACAGATCATATTGACGTTTGGAAGGAAAAGGGATAAAATGACTAATGTTTACTAGTAAACATTTATCATGATATGGGTATTGAAAATGATTATTAAAATCTTAAAAAACATTTGGAAACTTTAAGTGATGCCTGCCATTCGTCTTGTTCAGTTGTTCTAGCTATTCCATCTTGTATTTGAGGGTGATGACTTAAAGCGGGATAA >strand + yvkN yvkN TGTGAATTGTCCATGACAAAAGATCAGGGACGTTTTACTTATTTATACGTTTAAAATCTCTTCAGGTTTCAAATTCATTCTTTAGACATGCTCCGATTTACGGAAATTGGATTTTAAAAAGTTTTAGAAGTAGAATCTTCCGGCTATACCGATTACAATATGTAAAAAGCTTACGGCAACCAAGGAG >strand - yvlA yvlA CTCCTTGGTTGCCGTAAGCTTTTTACATATTGTAATCGGTATAGCCGGAAGATTCTACTTCTAAAACTTTTTAAAATCCAATTTCCGTAAATCGGAGCATGTCTAAAGAATGAATTTGAAACCTGAAGAGATTTTAAACGTATAAATAAGTAAAACGTCCCTGATCTTTTGTCATGGACAATTCACA >strand + yvkC similar to pyruvate,water dikin GGTGCCGGTCAAAACTTATTCCATGGTGCCGCTGTTCTAATCAGCGGCACTTTTTTTATTACATCATCAAGTCCGCCGCCAGATTCCGAGAATAAAAGAAAATAGGCATACAGTACCAGTACGGCCGCATTTTTCAGGATAATATAACCAACTTAACTATTCATTTTATTTAGGACTTTCTGCGTTTGACAGCATTTTCCTGAAAACCTACAATTAAATGATAGTGTTTACTATCATTTTTTAAACACAGGCTCCGCAATAAAAAGACTAGAGAGAAAAACAAAATGAATGAAGAAAAGA >strand - csbA putative membrane protein CGCAGAAAGTCCTAAATAAAATGAATAGTTAAGTTGGTTATATTATCCTGAAAAATGCGGCCGTACTGGTACTGTATGCCTATTTTCTTTTATTCTCGGAATCTGGCGGCGGACTTGATGATGTAATAAAAAAAGTGCCGCTGATTAGAACAGCGGCACCATGGAATAAGTTTTGACCGGCACCAAAATGCAAATATGGTGATTGATTTTGGCTGAAAAGGGGTATGGTGTAAGTAGAGGGAGCGAATGCACGATTGTCCGATTCTTCATTTTTTACTATAATCAGATCAGATGAAGAAT >strand + yvyE alternate gene name: yvhK; simi CGTTGTCATATATTTTTATTATAAATGTAATTGATCTGCAAATTATGTCGAATTATAATGAAAGAATGCGTATATTCTAGAAAAAAACATTTTTATATACTATCTACTCTTTATGATACCAAAATACGCACAAAAATTCTAAAGACTGTACGAATTTGTATAGGCAGTCTTTTATGCTTAGCAGGA >strand - degS two-component sensor histidine TCCTGCTAAGCATAAAAGACTGCCTATACAAATTCGTACAGTCTTTAGAATTTTTGTGCGTATTTTGGTATCATAAAGAGTAGATAGTATATAAAAATGTTTTTTTCTAGAATATACGCATTCTTTCATTATAATTCGACATAATTTGCAGATCAATTACATTTATAATAAAAATATATGACAACG >strand + lytR membrane-bound protein TTTTATCGTCAACCTATTTTATATTTTAAAGAAAAATTAAGAAACAATGAAACTTTTTTTTATAAAAAACGACTATTTTAGGATTTCATTCTTGTATTAAATAGAGTTGTATTTATTGGAAATTTAACTCATAATGAAAGTAATTTAACAGAC >strand - lytA membrane bound lipoprotein GTCTGTTAAATTACTTTCATTATGAGTTAAATTTCCAATAAATACAACTCTATTTAATACAAGAATGAAATCCTAAAATAGTCGTTTTTTATAAAAAAAAGTTTCATTGTTTCTTAATTTTTCTTTAAAATATAAAATAGGTTGACGATAAAA >strand + gtaB UTP-glucose-1-phosphate uridyly TGCTATAGTAAATAAATTCATACTACATATTCTACATGAAAATGCATCATTTTAAATCATTTCATTCTTGATTCAAATAAAATGATATAAGTTTTAATATTAAGGATAAAGCTTGTTTAAAAATGGTTTATCCGATATCATAAAAATGTGTAAAAACATATTGAAAAGGGTAAATAGAGAATAGTTTAACCATAAATTTTTTCGATCATAAGGAA >strand - yvyH similar to UDP-N-acetylglucosam TTCCTTATGATCGAAAAAATTTATGGTTAAACTATTCTCTATTTACCCTTTTCAATATGTTTTTACACATTTTTATGATATCGGATAAACCATTTTTAAACAAGCTTTATCCTTAATATTAAAACTTATATCATTTTATTTGAATCAAGAATGAAATGATTTAAAATGATGCATTTTCATGTAGAATATGTAGTATGAATTTATTTACTATAGCA >strand + tagA tagA TTATATCGACATATTTCTCGTATGTCTTCACTGATCATTAATTATAGCATACCGCCTAAAAAAGGTTAAGTACTCATGAGGAGAAAAACATTCCAAAATATGTAATTACACTCAGACTCCATTGGCACCAAGGTCTGTTGTTCATTCGTTCATTCAATTGTAAAATTGGAAACATATTTAAATTTAATGTAAAGATAATGTTAATCCTTTGTTGAAGATTTATGTTAAAATATAAGGTAGCTTGATTTTGCTTTTAGAAACTCTCGGTTTTTAACTATATAAATAGAATCTTAAAGGAAG >strand - tagD glycerol-3-phosphate cytidylylt ATTTTAACATAAATCTTCAACAAAGGATTAACATTATCTTTACATTAAATTTAAATATGTTTCCAATTTTACAATTGAATGAACGAATGAACAACAGACCTTGGTGCCAATGGAGTCTGAGTGTAATTACATATTTTGGAATGTTTTTCTCCTCATGAGTACTTAACCTTTTTTAGGCGGTATGCTATAATTAATGATCAGTGAAGACATACGAGAAATATGTCGATATAAGGAATACAAACAGGTTTTTAGAATTTAATCGCTAACGTCAGTAGTGTTTTTAATAGTGTTTAACATTAG >strand + gerBA gerBA GTAAATAACTGATCATCATTTTCCCGGTGCGCCCCTCTTACAAACGCCCGTATAAGCTGCCTTTTTCCTATGACAACATATGTACAAAGCATCATCTTTTCATTACCCCTTCAAAGAAAACGCTTCCTATCTATTATATGTGCAGCTTTTCAAGCTTTGTTTCAGCCTTTTCCTCGATAAGAATAATTCTCCTTTTTTGATACAAATTAATAAAAACCGTCAATATGTTTAAGA >strand - pmi mannose-6-phosphate isomerase TCTTAAACATATTGACGGTTTTTATTAATTTGTATCAAAAAAGGAGAATTATTCTTATCGAGGAAAAGGCTGAAACAAAGCTTGAAAAGCTGCACATATAATAGATAGGAAGCGTTTTCTTTGAAGGGGTAATGAAAAGATGATGCTTTGTACATATGTTGTCATAGGAAAAAGGCAGCTTATACGGGCGTTTGTAAGAGGGGCGCACCGGGAAAATGATGATCAGTTATTTAC >strand + wtF ATCAGCTGCAATGACATTGTCTGTGGGATTTTTTTAGAAATCTTTTAGCCTCTTTTTCCGGCCATTAAACCTTTTTATTTTTCTGATCATAAGTTTTCCAATCATTTACTTGTATTTTTTTCAATGTCGCCGAAACATTTTACCTGCTGCGGCGTCCAATATAAGGTGATTCAGTACATTATCGTTATCAATTTTCAAAAACAAGAACTTATAAATATAGGAATATTTATCGAAAAATGTCTTTCGAGTAGTTGCCCCATGGCAAATAGAATGGTACGCTTCAAATATTAAAGAAACGTG >strand - ywtG similar to metabolite transport CGTTTCTTTAATATTTGAAGCGTACCATTCTATTTGCCATGGGGCAACTACTCGAAAGACATTTTTCGATAAATATTCCTATATTTATAAGTTCTTGTTTTTGAAAATTGATAACGATAATGTACTGAATCACCTTATATTGGACGCCGCAGCAGGTAAAATGTTTCGGCGACATTGAAAAAAATACAAGTAAATGATTGGAAAACTTATGATCAGAAAAATAAAAAGGTTTAATGGCCGGAAAAAGAGGCTAAAAGATTTCTAAAAAAATCCCACAGACAATGTCATTGCAGCTGATGG >strand + rbsR transcriptional regulator (LacI AAACGCTAAACATACGAGTCCGAATTCTCTAACATAATTAAACATTTTCTGGGATGATAGTCTTTTCTGTTTCTCCCCATTTACAGGTCTAAACGCATGACTTTGAAACAATTTTAATAAAACTTAATATTTGTTCAAGAAATCTTCATCCATATTTGTGAAGACTTTGTCAAAAAAAGAGTGAAAACCTTAAATTTTTCAATTATATATACAATTTACAATTAGATTTCTTTTGATATTTTTATTGCTAACTTCGGATTGTTCATGATAATCTATCTATGTAAACGGTTACATAAACAA >strand - ywsC similar to capsular polyglutama ATAACAAAGGCTGAAAAAAAGATTGATTTGAAATGCATATTAAATACAAACAGCAGGGTGTGACTATACGTCCAGAAGGATATCAGGAGAAAAATGAAAGCGCATTCTCTTTGATTTTTTTAAAAAGTGATAAGGTATAAGTCCTGCTGTTTTCCAAAAATAGAAAACAGTTTGTAGGTATAAAATCTCTTTCAAAAGAGAAGTTTGGCTTAGTCGATTAGGGAAGATTATGTTACATAATGCCGATTGAGAATTCATAGTGATTCTATATACTGATGAATGAATTTACAACAATATAGA >strand + alsR transcriptional regulator (LysR ATTTTAAACGTAAAATTTTAAATATCTTGTGATAAGTTTCACTATACACTCTTTGGAAATTGACCTCCAATATCGATTCGCCTTATGATTAATATGTAATACGAATTATTAACCTATGGAAAGG >strand - alsS alpha-acetolactate synthase (pH CCTTTCCATAGGTTAATAATTCGTATTACATATTAATCATAAGGCGAATCGATATTGGAGGTCAATTTCCAAAGAGTGTATAGTGAAACTTATCACAAGATATTTAAAATTTTACGTTTAAAAT >strand + cotG morphogenetic protein ATTTTAGGCTTTTATTTTTATAAGATCTCAGCGGAACACTTATACACTTTTTAAAACCGCGCGTACTATGAGGGTAGTAAGGATCTTCATCCTTAACATATTTTTAAAAG >strand - cotH spore coat protein (inner) CTTTTAAAAATATGTTAAGGATGAAGATCCTTACTACCCTCATAGTACGCGCGGTTTTAAAAAGTGTATAAGTGTTCCGCTGAGATCTTATAAAAATAAAAGCCTAAAAT >strand + ywrC similar to transcriptional regu AGAAATTTTCTTCAGTATAGGGAGCCTGATTTTTCACATCTACTGATCCGGCAGAAAAACTGACGAACTTTCTTCTTTTCGCAAATAATAAGATATTTTCGCATTCATTTCGAAAACAAATGAAAAATTATTATTTCTATACGTAACTTTTATGATAAAATGATTGCTAAAAGGCTACGAAAG >strand - ywrD similar to gamma-glutamyltransf CTTTCGTAGCCTTTTAGCAATCATTTTATCATAAAAGTTACGTATAGAAATAATAATTTTTCATTTGTTTTCGAAATGAATGCGAAAATATCTTATTATTTGCGAAAAGAAGAAAGTTCGTCAGTTTTTCTGCCGGATCAGTAGATGTGAAAAATCAGGCTCCCTATACTGAAGAAAATTTCT >strand + ywqM similar to transcriptional regu TTTACGAATGATTTTCTTCATCATATAATGGATTCATCAATCAATACAGATGATTATTTCGATTAATCCAATCGCTAAAACCGATTTAGAAA >strand - ywqN similar to hypothetical protein TTTCTAAATCGGTTTTAGCGATTGGATTAATCGAAATAATCATCTGTATTGATTGATGAATCCATTATATGATGAAGAAAATCATTCGTAAA >strand + ywqB ywqB AATTTTCAAGCATTTCCTTCGTCGTCATCTCAGATGACTGCTCACCAATTTCATGTGCATGCATCAAAATTTTCAGAAATTCTTCTTGATTCAATTTCTTTTCACTCATCTTTTTTTTCCTTTCAGCTTCCATTAAGTTGAAAACACGATTATTATATGATGCTGGTTATCAATTTGCAATCTCTAATACATCATTCGACACATCCTATTTTTTATGTCCTTTTTTAGGAAAAACTTTTTAAGGCCGATGTGTTGCGATATAATAGAGAAATTGGATCTTTTTATTACGTGACTCGGTTA >strand - ywqC similar to capsular polysacchar AATTGGTGAGCAGTCATCTGAGATGACGACGAAGGAAATGCTTGAAAATTTGATTTCTCATATTAAAAATGGATATGCAACATAAAAATCCCGCACGCCTTTAGCGGGATTTTTTTATGCGAGAGAACGTGACTGCCCGAATAAGAGAAAATGAGACAGCACTGTCAACACAGTAAATATAGCGCTTACATAAAAAGTACTAACATAACGCATTGTAAAAAAATGTCGAATTATGTCACGTGAAAAGCTGGAGAAATGCGGTTTCAACGTTGTATAATCGTAGATGGAAAAGGATGTTAT >strand + ywpF ywpF CCATAGTCAGTTACAGTATAATGAAAAAATAGAAATTATTAAATGTTTCGCTCATACAAAATTCTTTAGCCCCAACATTGCAGGATTTTTTCCTCGAAAACGTTTATGTTATAATGGAAGGCAAGAAAACTTTGGAAGTAATCG >strand - ywpG ywpG CGATTACTTCCAAAGTTTTCTTGCCTTCCATTATAACATAAACGTTTTCGAGGAAAAAATCCTGCAATGTTGGGGCTAAAGAATTTTGTATGAGCGAAACATTTAATAATTTCTATTTTTTCATTATACTGTAACTGACTATGG >strand + ywpD similar to two-component sensor GTTGTGCCTTTTACGATCGGAAGCTCCAGATCGACACTTTTAATTGTTATCCGCCGCCTTGATTGTGAAGTAAAAATAAGTATATTTCACGTGCTTTCGACGTTCTCCACTTGACTGGAATGCTTTAATGGCGATCCCGGCATCTGTAGCTTCAGGCTTTCCTTCAGAGGAATCAGAAATCTCAAACTCTATTTTCTCAGTGTTCAGCACATAACCGGCTGGCGCTTTCGTTTCAACAAAAGCGTATTGTCCCGGTGCAAGGCCTGAAGCTGTGACTTGATGATGCCGGATATGACTGGT >strand - ywpE ywpE GGCATCATCAAGTCACAGCTTCAGGCCTTGCACCGGGACAATACGCTTTTGTTGAAACGAAAGCGCCAGCCGGTTATGTGCTGAACACTGAGAAAATAGAGTTTGAGATTTCTGATTCCTCTGAAGGAAAGCCTGAAGCTACAGATGCCGGGATCGCCATTAAAGCATTCCAGTCAAGTGGAGAACGTCGAAAGCACGTGAAATATACTTATTTTTACTTCACAATCAAGGCGGCGGATAACAATTAAAAGTGTCGATCTGGAGCTTCCGATCGTAAAAGGCACAACCAATGAAAATCTT >strand + rapD response regulator aspartate ph ATCTGCATTAGTATGTATAGCAATCATCAAAGATCCAATCTGTACATTTGAAAAGTATATTATGAATTAATCTCTTTTTCAACCGCGCTAAAATGTAACCAACTGTCAATGAGAGCCGTCAAAAGTTATGATATGATAATTATAGATTTTACCAATAGCAA >strand - ywpB similar to hydroxymyristoyl-(ac TTGCTATTGGTAAAATCTATAATTATCATATCATAACTTTTGACGGCTCTCATTGACAGTTGGTTACATTTTAGCGCGGTTGAAAAAGAGATTAATTCATAATATACTTTTCAAATGTACAGATTGGATCTTTGATGATTGCTATACATACTAATGCAGAT >strand + ywoH similar to transcriptional regu CTCTCTTTGTTAGGTTTGTAACAGTGTATTAGCATTCTTTTGGGAATATGCTAAAAACTCAATAAGGACAAGGGGTGAAACGAACTTTTTCAGATTCAGCAAACTATTTCGCGTCACTAAATTTTCATCTTTACAACTATTGATCGCTCCTGTAAAATGAAATAAGCAATTAGTTACTTAGGTAAGTTTTTTAG >strand - usd usd CTAAAAAACTTACCTAAGTAACTAATTGCTTATTTCATTTTACAGGAGCGATCAATAGTTGTAAAGATGAAAATTTAGTGACGCGAAATAGTTTGCTGAATCTGAAAAAGTTCGTTTCACCCCTTGTCCTTATTGAGTTTTTAGCATATTCCCAAAAGAATGCTAATACACTGTTACAAACCTAACAAAGAGAG >strand + nrgA ammonium transporter TATTTGATGTAGTACACTTAATACAGTATAGAGTATTCTAATTAATGTCAATTCCCTTTTCAGCTCATATGAATAGTTTCTTTTATAGATACAAAGTCTTATATCGCAAAAAATGCCATTCTCCTTCCTTTCCATCCCTCGATAACATTTCTCAAAAACCATGTCAGGAAATCTTACATGAAAATGTTTTATCATTCTTTTTTCTCTATAATGAAGAAATATAATAATTGCTTTTTATTCTGAAAGATACG >strand - ywoB ywoB CGTATCTTTCAGAATAAAAAGCAATTATTATATTTCTTCATTATAGAGAAAAAAGAATGATAAAACATTTTCATGTAAGATTTCCTGACATGGTTTTTGAGAAATGTTATCGAGGGATGGAAAGGAAGGAGAATGGCATTTTTTGCGATATAAGACTTTGTATCTATAAAAGAAACTATTCATATGAGCTGAAAAGGGAATTGACATTAATTAGAATACTCTATACTGTATTAAGTGTACTACATCAAATA >strand + ywnH similar to phosphinothricin ace ACATTCTGAACACTTTTCTGAAAATATATACAACCTATCCAAAAAAACTTTTTTAGATGTAGTTTGCACAGACTGAACGAAAATATGTATTTTTTTGAGAGGAGTGCCAA >strand - spoIIQ alternate gene name: ywnI TTGGCACTCCTCTCAAAAAAATACATATTTTCGTTCAGTCTGTGCAAACTACATCTAAAAAAGTTTTTTTGGATAGGTTGTATATATTTTCAGAAAAGTGTTCAGAATGT >strand + ywnE similar to cardiolipin synthase TTGATGTTTCAAGCTTCACGAACGTTATCATATGTGTTTATTTTAACGCAGGTGCAAGCTGGTTTTAAAGAAATCTGCCCGTATTTTTCACGAAGAATCTGGATCGCCGCTCTTTATTCTTTTAAAATGTGAGTGTTATAATTTGTAATGGGAATCAGATCACTGTTATTCATGGAATACTAATGGCAATCTTCTTGATAAATATGAATGAG >strand - ywnF ywnF CTCATTCATATTTATCAAGAAGATTGCCATTAGTATTCCATGAATAACAGTGATCTGATTCCCATTACAAATTATAACACTCACATTTTAAAAGAATAAAGAGCGGCGATCCAGATTCTTCGTGAAAAATACGGGCAGATTTCTTTAAAACCAGCTTGCACCTGCGTTAAAATAAACACATATGATAACGTTCGTGAAGCTTGAAACATCAA >strand + ywnC similar to hypothetical protein TTTATCGTAAACAATCACGCAACGTTAGGGTCAATCCCCAAAATGTTATTTTCTATATACAAAACTTCTCACAAATAGTGTATAAAAGAAAAGTGAGTATTTTTTTGTAAAGGG >strand - mta transcriptional regulator CCCTTTACAAAAAAATACTCACTTTTCTTTTATACACTATTTGTGAGAAGTTTTGTATATAGAAAATAACATTTTGGGGATTGACCCTAACGTTGCGTGATTGTTTACGATAAA >strand + ywmA ywmA TGCGTTCATACCCTTTCTTATTATATAAGAAGCACAGATAAAGATGATACATTTTATACAGAACGGAATAATCAGCCAAATCTAGGGCTAATCCCCGCTTAAAGTGCGATTCTTTTACATGGTATACTTTATTGGAAAG >strand - ywzB ywzB CTTTCCAATAAAGTATACCATGTAAAAGAATCGCACTTTAAGCGGGGATTAGCCCTAGATTTGGCTGATTATTCCGTTCTGTATAAAATGTATCATCTTTATCTGTGCTTCTTATATAATAAGAAAGGGTATGAACGCA >strand + ywlA alternate gene name: ipc-26r; s TCCGTGCTTTGTTTCTAGCCCTATTATGGACAAGGAGAGCCTGGGATAAACGTGCAGGGTGAAAAAAATCTAGCAAGCTGTTAATCTTATTTATGGTACGATAGTGTTTT >strand - spoIIR alternate gene name: ipc-27d, AAAACACTATCGTACCATAAATAAGATTAACAGCTTGCTAGATTTTTTTCACCCTGCACGTTTATCCCAGGCTCTCCTTGTCCATAATAGGGCTAGAAACAAAGCACGGA >strand + ywkD similar to hypothetical protein CCTGTGCGTAACATTCTCATTATAGTATAGTGATTAACCCCAGTCAAAAGCATTTCAATCCCAGCTGGAAAAGCTCCCTCTGCTATACTGGAATT >strand - prfA peptide chain release factor 1 AATTCCAGTATAGCAGAGGGAGCTTTTCCAGCTGGGATTGAAATGCTTTTGACTGGGGTTAATCACTATACTATAATGAGAATGTTACGCACAGG >strand + ywjG ywjG GATTTTCGTCAAAAGTAAGCAGTATTGTATGTATTCTGTTTGATTTTCCTATTTCCTTTAATTATAATAGTCTACTTTACGACATTTTCTGAGCATTTTCTCTTTTGTTGTATACTGATATTGTACGTTATAAAG >strand - spo0F two-component response regulat CTTTATAACGTACAATATCAGTATACAACAAAAGAGAAAATGCTCAGAAAATGTCGTAAAGTAGACTATTATAATTAAAGGAAATAGGAAAATCAAACAGAATACATACAATACTGCTTACTTTTGACGAAAATC >strand + ywjE similar to cardiolipin syntheta CTGATAAAGACCGCGATCCCCTACCCCCGAATCTGTATAAGATTATCTTTATGATAAATAATGAATGAGTGTTCAGTCAATAATTAAATTCCTGCTGTTCGGGCACACTACCTTTACTACGGAA >strand - ywjF similar to hypothetical protein TTCCGTAGTAAAGGTAGTGTGCCCGAACAGCAGGAATTTAATTATTGACTGAACACTCATTCATTATTTATCATAAAGATAATCTTATACAGATTCGGGGGTAGGGGATCGCGGTCTTTATCAG >strand + ywiC similar to hypothetical protein AAGGATAGCTTTAGTGTAGCTCGACTGAGCCTGTACAGCAGTGATGTATTTCACAGCCTGTATGATGAATGTCACTTATCAGGAGGGTGAATGCATTTATAATGAAAGAAAAA >strand - ywiD ywiD TTTTTCTTTCATTATAAATGCATTCACCCTCCTGATAAGTGACATTCATCATACAGGCTGTGAAATACATCACTGCTGTACAGGCTCAGTCGAGCTACACTAAAGCTATCCTT >strand + sbo subtilosin A TCATTCCACTTTGACTTGAATAGTATAATCATTCGGTTTGTAAACTTCAACTGCTTCTATCTTACCATCATTGCTCATCAGATTTGAAGATAAACCTCATAAAAAGCATTTCCTTATATAGAAGAGAAAATCATATCACTAATTACCTTTAGGAAATGTTACATTTTTCCGAAATCTATCATTTCCTTTTCACATTTTTTTCAAAATATATGTATTGAATTAGTAATTTGATAGTTTTAAGATAAAAGTACAACATAGATCTGCTAGAAAAACAAAAAAAGG >strand - ywiB ywiB CCTTTTTTTGTTTTTCTAGCAGATCTATGTTGTACTTTTATCTTAAAACTATCAAATTACTAATTCAATACATATATTTTGAAAAAAATGTGAAAAGGAAATGATAGATTTCGGAAAAATGTAACATTTCCTAAAGGTAATTAGTGATATGATTTTCTCTTCTATATAAGGAAATGCTTTTTATGAGGTTTATCTTCAAATCTGATGAGCAATGATGGTAAGATAGAAGCAGTTGAAGTTTACAAACCGAATGATTATACTATTCAAGTCAAAGTGGAATGA >strand + rapF response regulator aspartate ph GACATACACTGCTCACGCAGATATAACAGAATATGGAAGGGTAACGTGTTTGCTTGTACAAATTGCTGAGGGTAAACGCTTCTTTTTGAAAATATATCTCCAAACATGCGGTGTAACGAAAGAAAAAATTGCGGTTTTTTTGCGCTTCTCACAAGAATGTTGATGATAAAATATGACATATATAGAATTATGAAGGGAG >strand - ywhK similar to hypothetical protein CTCCCTTCATAATTCTATATATGTCATATTTTATCATCAACATTCTTGTGAGAAGCGCAAAAAAACCGCAATTTTTTCTTTCGTTACACCGCATGTTTGGAGATATATTTTCAAAAAGAAGCGTTTACCCTCAGCAATTTGTACAAGCAAACACGTTACCCTTCCATATTCTGTTATATCTGCGTGAGCAGTGTATGTC >strand + ywhE similar to penicillin-binding p GGCTGTATTCATTTATTGTAAGCGTCTTATCAGCTTTTATTCTCGGTTGAAATCTCATTTTCACCTAAGGAAAAAGTATAGAGGATGGCGGGGAAAAAGTAAAGATGCGTCCTGTTCTGCGATGTTTAAAAACGATCTTTTTTTCTCATAATAGTAGAAACATAAAAAAAG >strand - ywhF similar to spermidine synthase CTTTTTTTATGTTTCTACTATTATGAGAAAAAAAGATCGTTTTTAAACATCGCAGAACAGGACGCATCTTTACTTTTTCCCCGCCATCCTCTATACTTTTTCCTTAGGTGAAAATGAGATTTCAACCGAGAATAAAAGCTGATAAGACGCTTACAATAAATGAATACAGCC >strand + ywhB similar to 4-oxalocrotonate tau TCAAAATGCAAGAATTTACTTTATTCAGTATACTATAAGAAGCCTCTTTTTCTAAAGAGAGGAAACGAGATAAAGGAG >strand - ywhC similar to hypothetical protein CTCCTTTATCTCGTTTCCTCTCTTTAGAAAAAGAGGCTTCTTATAGTATACTGAATAAAGTAAATTCTTGCATTTTGA >strand + ywfN alternate gene name: ipa-92r; s TTTCTATGTTCATCTATGTGAATAACCTAAAATCACGTTTCAAAAAAAATCACATCACACTTCATTTTACAAGGATTCGACTGTTCCTGCAAGCCGCTCCATCCTTCGTATCTCCTATTCTTTCAGCCAATTTGTATAAATATTTTCCAACCTGGACATTCTAATACCATGAAAAGAAAAATGG >strand - ywzC ywzC CCATTTTTCTTTTCATGGTATTAGAATGTCCAGGTTGGAAAATATTTATACAAATTGGCTGAAAGAATAGGAGATACGAAGGATGGAGCGGCTTGCAGGAACAGTCGAATCCTTGTAAAATGAAGTGTGATGTGATTTTTTTTGAAACGTGATTTTAGGTTATTCACATAGATGAACATAGAAA >strand + ywfI alternate gene name: ipa-87r; s AGTTACAAAAACGCTTTCTTTTACTAGCATACTCTTCTTCCAATCAAATTCAAACAGCCAACTTTCATTATAGCGCTTTCATAAAAAAATCTTATTTTTCTAACTAATTTAAGTGACTCAATCTTCCCTTATAGGTTCTTCTATTTTGATGGAGTTATACGGAAGGGATGTTCAAGGTTTCTCCACAGTAGAATCGGATAAACTATGGTATAGTGAAGGCAATCATGAAAAGATAATGG >strand - pta phosphotransacetylase CCATTATCTTTTCATGATTGCCTTCACTATACCATAGTTTATCCGATTCTACTGTGGAGAAACCTTGAACATCCCTTCCGTATAACTCCATCAAAATAGAAGAACCTATAAGGGAAGATTGAGTCACTTAAATTAGTTAGAAAAATAAGATTTTTTTATGAAAGCGCTATAATGAAAGTTGGCTGTTTGAATTTGATTGGAAGAAGAGTATGCTAGTAAAAGAAAGCGTTTTTGTAACT >strand + ywdL alternate gene name: ipa-62r ACTTCACACCTGAAACTATCGTTATCTTATGTTGCTTATGCTCAAGTTGCGCTAGGGATACAAACCAATATGCGAAAAGTTTGTGTATGTTTCTGAAAAACAGCGGCTGCCCAGAGCGAAATCTGATCGGTCCGCTGTTCTTTTTCAGCATAATCCAAGCATCAGCCTCATAAGGTGAAAGAGATAAAAATCGCACCAGCGACTAAGATCGTTTTCCATCGGAGGTGAGTCATCGCGGAGCGTTCACCTATACATCCAGAACATGTTTTGAATGGTTTTCATAAAAAGG >strand - spsA alternate gene name: ipa-63d CCTTTTTATGAAAACCATTCAAAACATGTTCTGGATGTATAGGTGAACGCTCCGCGATGACTCACCTCCGATGGAAAACGATCTTAGTCGCTGGTGCGATTTTTATCTCTTTCACCTTATGAGGCTGATGCTTGGATTATGCTGAAAAAGAACAGCGGACCGATCAGATTTCGCTCTGGGCAGCCGCTGTTTTTCAGAAACATACACAAACTTTTCGCATATTGGTTTGTATCCCTAGCGCAACTTGAGCATAAGCAACATAAGATAACGATAGTTTCAGGTGTGAAGT >strand + ywdH alternate gene name: ipa-58r; s AATGTTGTCTTCCTGCATTTTATCAAATTCCTGCTGAAAACGCAGATATGAGTTTGTTATTTTTTCTGACAAACCTTGCGCGATTGACAAGCAGAAGAGATTTTTCACATAATAAATCACCATACCTTACAAAAAGG >strand - ywdI alternate gene name: ipa-59d CCTTTTTGTAAGGTATGGTGATTTATTATGTGAAAAATCTCTTCTGCTTGTCAATCGCGCAAGGTTTGTCAGAAAAAATAACAAACTCATATCTGCGTTTTCAGCAGGAATTTGATAAAATGCAGGAAGACAACATT >strand + ywcJ alternate gene name: ipa-48r; s AAACAAAAAAGACCTAAAATTCGCCAACGAATAAAGGACACAGCGATCCTTTTTCATTTACGCAAATTTTAGGTCTTGCCTGCTTTACCAGTCACAATCCCGCTTATTCAGATTAAGAATACGCTTTCATCATAAATCATGATAGCGTTTTCGTCAACTATTTTTTTAGTTAAATAGTTTGATATAACATGTAGACAAAAATTCGTAAAAATTAATTGTGAAATACTTCACAATATCGTGCCATACTATGCTCAATCATGAAAGAAAGCAG >strand - sacP phosphotransferase system (PTS) CTGCTTTCTTTCATGATTGAGCATAGTATGGCACGATATTGTGAAGTATTTCACAATTAATTTTTACGAATTTTTGTCTACATGTTATATCAAACTATTTAACTAAAAAAATAGTTGACGAAAACGCTATCATGATTTATGATGAAAGCGTATTCTTAATCTGAATAAGCGGGATTGTGACTGGTAAAGCAGGCAAGACCTAAAATTTGCGTAAATGAAAAAGGATCGCTGTGTCCTTTATTCGTTGGCGAATTTTAGGTCTTTTTTGTTT >strand + vpr extracellular serine protease TTGAAATGAAAATTGGAGAACCGCTTTGAAAACTTTATACACAAGTTATCCCAAAGATAAGAACAACTTAATCACAAGAGATATCCACATGTCCACAAACTCTATCTATATTTTGTATACGAACGTATATTCCTAACTATATATATACACAGGTTTATTCACTTATACACAGGGTTCTGTGTATAACTCCTTCGTTATACACAAACAAAATCCAATAAATGGTCCAAATGACACAAGGATTTTTTTGAATTTTCAAGAAATATATACTAGATCTTTCACATTTTTTCTAAATACAAAGGG >strand - ywcI alternate gene name: ipa-46d AGTTTGTGGACATGTGGATATCTCTTGTGATTAAGTTGTTCTTATCTTTGGGATAACTTGTGTATAAAGTTTTCAAAGCGGTTCTCCAATTTTCATTTCAATTCTTTCAGCTTTCAACAAAACTTCTAAAAACTAGTTGGTTTTTTCACTCATATTGTGACAAAGAATTCTCTTTCCACGGTCTATCATAGAGTCATGAGTTTTGCTAGAGCCGCTAGTTTGCGAGAAAAGAAATCGGCCGTCTTTTCTTGCGGGCTGCGGCTGAGAAAACGGGCTGCTAAAAAGGCTGCCTGCCAAAAA >strand + ywcE alternate gene name: ipa-41r AAAAAACGCCATACCAATAGAATTTGTTAAAAAACGAATGCGATAATGTGTTATGTTCCCAAATAATGTGATACCTCATATAGAAATAGCTAAAAATTCAGAGCCCCATGCCCATACATTCTCCTAGCACATTAAACGAAAAAATAAGACGATAGATCCAGTCCATTTCTGCAAAAACACGTATCAATTTCTGGAAAACATGGTATAATAATTGTAACAGAATGTTCAAAAAACAATCACAAAACAGACACAACTCAGTTCCTCTATTTCATTTTTGTAAACGTTATCACAAAATGAAAG >strand - ywcF alternate gene name: ipa-42d; s TTAATGTGCTAGGAGAATGTATGGGCATGGGGCTCTGAATTTTTAGCTATTTCTATATGAGGTATCACATTATTTGGGAACATAACACATTATCGCATTCGTTTTTTAACAAATTCTATTGGTATGGCGTTTTTTTCATTTGAAAAGTTTTGTGTCAATCGAAACATTTCGGTTTATGATACGTCATATTTCGTGTAGCTGAAAAAGCGCCGCAAATGCTGAAAAAACATAAAAGGAAGCGAGCTTCACGTGTCATCTTGAACGTGGTTCATGTATAATAAACATTAGTGATTTTCCGAT >strand + ywzA similar to hypothetical protein TTGTTAAACTTTCTAGATAGGAATTATTTTTTCCTCGTCTAGCATCTTTTCACACGGTTACCATTTTAACGCATCAAATATTGAAAGAAAAGGCGCAATGAAGAGCCAATATGCTCTGAATTGTCCGTATTTTTTTATGAGAGCATCCAGTTGTTTATCTTATACAAAAAAGAGGAATGATATAAGCAAATAAATAAAAAATAGGATGTGTTGCATGATGAGTTTTCTTATTTCACTTATCGTCGCTATTATTATCGGATGGCTTGGAAGCCTGTTTGTAAAAGGAGATATGCCCGGAGA >strand - qoxA cytochrome aa3 quinol oxidase ( CTATCTAGAAAGTTTAACAAAATGAAATTTTTGATTGACCTAAGTTTTTTATTGTGGTAAACCTTAATTTGTCAGAATTAGTTCTTCATCATCTTTCTTTACTTTATATGCGGAAATAACTAGAAACTTTTCATCTTAAGCGATAGATGTGAATAAAGTTGGAGCAATTTTTGTCGATTTTATGACATTTAAGCGAAAATGGGATGAAGTTTATAAAAAAATAAAGCTAATAAGTGAAGTTTTTGATAGCATAACAAAGTAGAGAAAAGATTTTGAGGAAGTATGCACTTCAGATTTATT >strand + ywcB alternate gene name: ipa-32r ACAGCCGGTGATGCGATCACCTCTTGGCACATGCATTGTCATAAAGAGGTGATGCGAACATGATAACCGTGCTCTGGTAAACAGCAGCTTTGCTGAGAACAAGCTGCATTCGGTGAGGTTTTGAAAGCAAACTCGTTTATACACGGCCGAGCTTGAAAACCTTCGCCTGTCTTTAGACCACTGTGTTCACCTCTTCACGACTTCCTTCTTTTTTCATCCTCCAGCTATTGTACAACGCATAAATCGGACTTTCAGTACATACCTATTGGTGTACCTTTTTGCTTACGTTTAGCGAATAAA >strand - ywcC alternate gene name: ipa-33d TGCGGGCAATAAAAGACAAGCCAGCGCGATGATGGAGGCGAGTACAACAAATTTCACACATAGTTGCTCGGTGTTCACAAACTGAAGGAAAAACCACCTACCTATTCCCTAAAAAAATGACTAATATAACCAATGGACCTTATTTATAATAATAAACAATTAAATACTGACTGGATAGTATGTAAATGAAAATAAATCATTTAGCCTATTAAAAATGACCCCTGGGAAACCGAATAGCATAACCGTTTAAAGAAATAAATACCGCTAAAGATTGATCCGGTGTAAGATAAGGGTAATCAT >strand + ywbH alternate gene name: ipa-23r AGACAAAAGGAATACTTCTTATTCTTAATATATATTTTACCTATAGAGAGACCTTATGTACAATAAGTATATAAG >strand - ywbI alternate gene name: ipa-24d; s CTTATATACTTATTGTACATAAGGTCTCTCTATAGGTAAAATATATATTAAGAATAAGAAGTATTCCTTTTGTCT >strand + ywbC alternate gene name: ipa-18r; s GTTTGCTTTCTCTTATGGTACCACAAAACGCCAAGATGTCTGCATTGACAGAAAGGAAAGGTTTTTCTACGATAGACAAAAAGAGTTACG >strand - ywbD alternate gene name: ipa-19d; s CGTAACTCTTTTTGTCTATCGTAGAAAAACCTTTCCTTTCTGTCAATGCAGACATCTTGGCGTTTTGTGGTACCATAAGAGAAAGCAAAC >strand + epr extracellular serine protease TCATTGCTTTGTGAACAACCTCCGCAATGTTTTCTTTATCTTATTTTGAAAACGCTTACAAATTCATTTGGAAAATTTCCTCTTCATGCGGAAAAAATCTGCATTTTGCTAAACAACCCTGCCCATGAAAAATTTTTTCCTTCTTACTATTAATCTCTCTTTTTTTCTCCGATATATATATCAAACATCATAGAAAA >strand - ywbA alternate gene name: ipa-16d; s TTTTCTATGATGTTTGATATATATATCGGAGAAAAAAAGAGAGATTAATAGTAAGAAGGAAAAAATTTTTCATGGGCAGGGTTGTTTAGCAAAATGCAGATTTTTTCCGCATGAAGAGGAAATTTTCCAAATGAATTTGTAAGCGTTTTCAAAATAAGATAAAGAAAACATTGCGGAGGTTGTTCACAAAGCAATGA >strand + ywaE alternate gene name: ipa-10r; s GGGTTTTTGTTTATTTTGGCGTTGTTTGGGCTTTTTTTATTCAACCGATATATGGGCTTGAAGTTCACACCAGACTCTGTTAAAATTCGTTTCATAGCTAATGATTCTGATAATTTAAGG >strand - ywaF alternate gene name: ipa-11d CCTTAAATTATCAGAATCATTAGCTATGAAACGAATTTTAACAGAGTCTGGTGTGAACTTCAAGCCCATATATCGGTTGAATAAAAAAAGCCCAAACAACGCCAAAATAAACAAAAACCC >strand + dltA D-alanyl-D-alanine carrier prot GCCTTGCAGGTATAAAGATTCATTTTCAAAAACATACGCCGATATATTAATTTGAGGGAGATTAGGGAAAAAATGAAACTTTTTGAGCATCTGATCGTCAAATAATCATGTGATTGTCGAAAAAACGGGAAGGGAATTTTTGATATGCTGCAATTAATGAAACAGCTGTATGAAAAACCCGCTGTCAAGTGGACATGTCATACAGGCTTTTATTTGATGATACTGCTTGTTTTGTTTTTTATGTATGGTTTTCACACCGCGAATACCGGTTCATATATTTATAACGATTTCTAATTGGAG >strand - ywaB alternate gene name: ipa-6d; si CTTTATACCTGCAAGGCAAGAATGGAATGTTTCGAGCTGATTCATAATAAAGGAAAAGATACCTGGAGTCAAAAGATCTTTTTAACTTTTAAAAAGTGTAATGTACTTAAAAAAATTTTTCTGTACCTGTTCGTTCAAGTGAAAGCAGAAACCTAATGAAATCAGGGCGAGAAGCGTATAATAAGAAAGAAAACGCCTTCTTGCGGTATCTTGTGAAATAAGGGAGAAGTCATTATGATAGATAAGGAAGCAAAAGGAGTTTTTTATTAAGCTGCCGGCTGAAAAAAAGTAAGCCTTTTA >strand + katX catalase CCGATTTTTGTACATTGTACCATTCCCGAAGCAGATCAAAAAAAACGGCTGTTTTAAAATCTTTCCATTCAGGGAATATTGTTACCGTTCAAAAAAGG >strand - yxlJ similar to DNA-3-methyladenine CCTTTTTTGAACGGTAACAATATTCCCTGAATGGAAAGATTTTAAAACAGCCGTTTTTTTTGATCTGCTTCGGGAATGGTACAATGTACAAAAATCGG >strand + yxlA similar to purine-cytosine perm TTTGTTTTTCTGCCTTATGAGACGACACCGGATTCAAAAGCGTTCATCTTTTTTACAATTCTTTTCAGCCATTGACAGATCAGGCTTCCCGGCCGTTTGGCTGCATATCCTTATTTGTAAACCCCCTTATAGTCTGTTAAGCTATGACTTTATGTCGATTTCAAAAAAACATCCAATATGG >strand - sigY RNA polymerase ECF-type sigma f CCATATTGGATGTTTTTTTGAAATCGACATAAAGTCATAGCTTAACAGACTATAAGGGGGTTTACAAATAAGGATATGCAGCCAAACGGCCGGGAAGCCTGATCTGTCAATGGCTGAAAAGAATTGTAAAAAAGATGAACGCTTTTGAATCCGGTGTCGTCTCATAAGGCAGAAAAACAAA >strand + yxkJ similar to metabolite-sodium sy AAAGTCCCGCACTATGTGACGGATTTTTGTCTATTTTGTTGCTTATTTATGTCGGCTGCTAGGATAAATGACCCTTTATTATCGTACATCAACACATCGTTCCTTGCGAACAAAATGCTTGCCCCCGCCCCAGCACTCGAGTCTCAGCATTCCGTCTTAAACCGATTTTAGTCAGAATGCCTCTTTTCATTTGAACCGGTAAGAGGAGAAAAGTTGAATAGTAAATTTTTAATATATTAATAATTTCAATTTTTTAGTATACTTAACATAGTGAACAATTTCACTGCATCAAGAATAGGG >strand - cydA cytochrome bd ubiquinol oxidase AGGGTCATTTATCCTAGCAGCCGACATAAATAAGCAACAAAATAGACAAAAATCCGTCACATAGTGCGGGACTTTTTAGGTATTTTAGGCTTTATTTTTGAAATGAATCGTTGTAAAGTACTTAATATGAACCAGTCAGAGATTGTGTCATTTGGTCAGTCTGGCAATCTTGCATCATATTGGATGACTTTTTGACACATTTGTGAAATATTGAGCAATATTTTTTTCGTCTATTTTGTGAATTACTGATCAAAGTCTCGGTTCTATTTGTGAAGTAGTGAGCAAATTAACAGTTTTAAC >strand + aldY aldehyde dehydrogenase AAGGATCTGTATATTCAGTATAAGGCGAAACACACATGTTCTCCACTAAAAAAGAGTATATCCGGTATAGATAGACGAGAAACTGAAAGGGAAACCTCATTCGTTTACATATTGGCTTCAGCGGAAATAGAAGAAGACATGCAGGACCAAAGG >strand - yxkF similar to hypothetical protein CCTTTGGTCCTGCATGTCTTCTTCTATTTCCGCTGAAGCCAATATGTAAACGAATGAGGTTTCCCTTTCAGTTTCTCGTCTATCTATACCGGATATACTCTTTTTTAGTGGAGAACATGTGTGTTTCGCCTTATACTGAATATACAGATCCTT >strand + yxkC yxkC CGGAACCCTAGAAAACCACTGTTTTTATCATATCCTTTTCACTTTCCAATATTAGCATTATAAGGGACGGTTCGAATTGGTTCAAGGGGTATTTGCGGAGTTTTCATCAGATAAGCTGGAAAAAAATACGATTTATCTATTAAAAAAGAACAGATATAACCGATAAGAAAAAAAGCATATATTTTACAAGCGAATGTAACTATAAATGATAATAATTATAAGAAATTCATAATTTTATATTGACTAAGAGAAATAATATAGGCATTATAGTACATATGTTTCGTACATTTTATTACATAG >strand - yxkD similar to hypothetical protein CGGTTATATCTGTTCTTTTTTAATAGATAAATCGTATTTTTTTCCAGCTTATCTGATGAAAACTCCGCAAATACCCCTTGAACCAATTCGAACCGTCCCTTATAATGCTAATATTGGAAAGTGAAAAGGATATGATAAAAACAGTGGTTTTCTAGGGTTCCGCAATGCAAATTGGACTGTGACCGAGAGAAAACACATAGCAGCTTGGCTGTTATGACACGGTGGGATAAAAGCCCAGGAGTTTCTGCTCTTTTTCGAGAGCGTTCTCCTGGGTTTTTTTATTTGCTAGAACAGAACAAT >strand + yxjO similar to transcriptional regu TTCTCGTTCTATAGGTATACTAAACGAATCGATGTATAATCAGTGATAAACAAATACGCATGTAAACGTAACAATTTGTTATGGATAACTTGCAGTGAA >strand - yxkA similar to hypothetical protein TTCACTGCAAGTTATCCATAACAAATTGTTACGTTTACATGCGTATTTGTTTATCACTGATTATACATCGATTCGTTTAGTATACCTATAGAACGAGAA >strand + yxjA similar to pyrimidine nucleosid CATTTTAGAATATAAATTTTATTATTGTGAAAATAGTTAAAAAGGTTGATTTTCAAAAAGTGAAAGGATAAAATACTGCTATCATCTTAGAAAAAGACATTCTTGTATATGATCAGTAATATGGTCTGATTGTTTCTACCTAGTAACCGTAAAAAACTAGACTACAAGAAAGTTTGAATAAATTTGAACGAGTTGAAAAGGACAAGTTCTTTTCTGTTTGCTCTTATTTTTCACACTTTCTGCACTTCCAGAATTTGTGAAGGATAAGAGCTTTTTTTGTTTCCATAATAACCCTCATAG >strand - yxjB similar to hypothetical protein TTATATTCTAAAATGAGAAAATAAAGGAAAAACATGCTGGATTGCTATGCTGATATGATGTTGGCGGGATACCAGCATGTCAGTACTGATTTTTCTTCACTATAAGGTTCCAGCTCCTGGATATTTGTTGTATGATGGAAACGGGGAAACCCATACAGCAGAAAGGGAGTTTTTTTCATGATTATCCAATTCATTCGTTATAGATAATGACAGGTGTAGACAAACTGCCAGACCTGGCCAATTTTTGAGATAGGCCTAGGTTTATTTGTCTATGCCTTTTTCATTTATCTAAAAATGAAT >strand + yxiQ similar to Mg2+/citrate complex AACGAAACATTCTACAGCACATTTACCCGTGGCGTTCTTCATATAAACTGCTAATCCGCTCATTAGGATTTTTGGTCTACATATAAATATGCGGTCATACACATTTGAAATAAGCCTTCTTCACAGAAAGAAAGGATGGAAGCGCTTACCAAAAAAGCTGTTGAATAGGG >strand - katB catalase 2 CCCTATTCAACAGCTTTTTTGGTAAGCGCTTCCATCCTTTCTTTCTGTGAAGAAGGCTTATTTCAAATGTGTATGACCGCATATTTATATGTAGACCAAAAATCCTAATGAGCGGATTAGCAGTTTATATGAAGAACGCCACGGGTAAATGTGCTGTAGAATGTTTCGTT >strand + yxiO similar to hypothetical protein GAATCCCTTCTGGAAAATACCATAAAATATAGTTAAATGGTACCAGATGCACCTAACCCCAGCAATAAATCAAAAGTATGATTTGAAGCGAAAAAACATAAATATTTTGTAAAAATTGACGTGCATTCCATTCTTTTCAGGCCCATTTCTCGTACAATAGAAGTATCGTTACCGGA >strand - yxiP yxiP TCCGGTAACGATACTTCTATTGTACGAGAAATGGGCCTGAAAAGAATGGAATGCACGTCAATTTTTACAAAATATTTATGTTTTTTCGCTTCAAATCATACTTTTGATTTATTGCTGGGGTTAGGTGCATCTGGTACCATTTAACTATATTTTATGGTATTTTCCAGAAGGGATTC >strand + hutP transcriptional regulator ATTTCGTTGTCGGCGTTTTTTAAGATGATGCCTGATTGCCCGTCAACACCAGAGTCATTCGGCTGGCTCTTTATAAAACGCCTTGCGAAAAATACGACTTAGTAAACGCATATTGAGAGTTAGACAGCGGCGCTTGCCTCCTTGTCTGGATATCGTGTGCAATCGCTGAAAAAACCTGTTCTGCTTTCTGCGTCACGCTATTACAATAGCAATCTAACTTGTTAAAGACTATAAAAAAACCTTTTGACTTCTGCTGCTGAACCAATTAATATAATACTCAGTTAATAGTTATCAGAATTT >strand - yxiA similar to arabinan endo-1,5-al TTAAAAAACGCCGACAACGAAATGGCTGTATCACAGTTGATGCAAAACCTGACAAAGCAGTTATTACATGTACGCGAAAGAAAAACAGACGTGATGCAAGAAGGAGAAACAGCAAAAGAGTTGAACTATGAAGGAGAGGATATGCAAGCAACAAGTTCTGCGCAAAACAGACAGACATCAGTCTGACTATGTAAGTGAAGTGAGAAATCACCTATCTGGATGAATATGAAAACATTCGGCTGTTTTAATAGGCCGGGGTATCTTCATAGAGAAATGTAAGCGTTTAAAATTTAATAAAAA >strand + yxeE yxeE CGAATGCGTGCCACTGTGCCAATGACTACATAAGTTATAAGGAATTCACCCACAAGG >strand - yxeF yxeF CCTTGTGGGTGAATTCCTTATAACTTATGTAGTCATTGGCACAGTGGCACGCATTCG >strand + yxeB similar to ABC transporter (bin CCTATAATAATCGATTAGGCACACCGCGTCACCCATAAACGGGAAAAACTTTCTCCCTTAAAAATAGTGATAGACGGAGAACAGGTTTCTTGCTATATTATTAATTGATAATGATAATCATTACTAATCTATTGAGATACATAGTGG >strand - yxeC yxeC CCACTATGTATCTCAATAGATTAGTAATGATTATCATTATCAATTAATAATATAGCAAGAAACCTGTTCTCCGTCTATCACTATTTTTAAGGGAGAAAGTTTTTCCCGTTTATGGGTGACGCGGTGTGCCTAATCGATTATTATAGG >strand + iolR transcriptional regulator (DeoR AAAAACACCAAAAAAATAAATGGCGGATGCATATTTACATACAAGCAACCATTTCATGCACAACAGGTTACTTTTTGGTCATTTCTTGGTTATATTCTAATCGCATAATACCCATAAGTCAATCATAAACACTACATTTTTTCATTATTTTTATTATATTAATCTTTTAATCAAGTAAGAGAAGTAAGAGAAGGCCGTCCAATCTTGCAAAAGATATACACCGATGTTATCATTTTTAAGCAACTATTGATTAACTTTTGGTTTTTATTATATATTTATGTTACGTAAAGATTCAAGAAG >strand - iolA methylmalonate-semialdehyde deh ATCTTTTGCAAGATTGGACGGCCTTCTCTTACTTCTCTTACTTGATTAAAAGATTAATATAATAAAAATAATGAAAAAATGTAGTGTTTATGATTGACTTATGGGTATTATGCGATTAGAATATAACCAAGAAATGACCAAAAAGTAACCTGTTGTGCATGAAATGGTTGCTTGTATGTAAATATGCATCCGCCATTTATTTTTTTGGTGTTTTTGAAAGCGTTTAATTCTTGGCTTGCTGAAAAGTGGATATTGATATGAATCTATCCGGAACCATCCATACAAGTGTATGTGTTTACT >strand + yxbG alternate gene name: yxaU; simi TTCTAATTCATCTTAACATATGCTGGCCTTTCTTTTCTCGCATGTTTATCACTGCACATAGCGGGAAGACAAATAGAAAAAGGAGG >strand - yxcA yxcA CCTCCTTTTTCTATTTGTCTTCCCGCTATGTGCAGTGATAAACATGCGAGAAAAGAAAGGCCAGCATATGTTAAGATGAATTAGAA >strand + aldX aldehyde dehydrogenase GACAGATCTTAGATATATGTACGGTAACGTATAGATGGTTCACCGTTGTCTTTCTTACAGAAAAGGTACCACCTTATAATAGGTCCGAGAAAAGAAGAAAGCAAATGAGGGG >strand - yxbF alternate gene name: yxaT CCCCTCATTTGCTTTCTTCTTTTCTCGGACCTATTATAAGGTGGTACCTTTTCTGTAAGAAAGACAACGGTGAACCATCTATACGTTACCGTACATATATCTAAGATCTGTC >strand + yxbB alternate gene name: yxaP; simi GCGGCCATTAATCTCATATAGCATTTGACATCTAGTACTCTATTTTAGGAGGGATGCTCAATGAAAAAAGCTTATAAGAAACCTCGCCTTTTTGAACTGCGCCGTGCTGTAGCAGCTGGCGGCCATTAATCTCATATAGCATTTGACATCTAGTACTCTATTTTAGGAGGGATGCTCAATGAAAAAAGCTTATAAGAAACCTCGCCTTTTTGAATTGCGCCGCGCTGTAGCAGCTGGCGGTCATTAATCTTATCCAAAACGATTAACGTTTGAGGCTATCACGCCAACTTGATAAGGAGG >strand - yxbC alternate gene name: yxaQ CCGCCAGCTGCTACAGCACGGCGCAGTTCAAAAAGGCGAGGTTTCTTATAAGCTTTTTTCATTGAGCATCCCTCCTAAAATAGAGTACTAGATGTCAAATGCTATATGAGATTAATGGCCGCCAGCTGCTACAGCACGGCGCAGTTCAAAAAGACGAGGTTTCTTATAAGCCTTTTTCATCCTTTTCCCTCCTTCCTTGTAAAAAAATAGGCTATCACGCACAACTAAATATTATAATCCTCTGATAATTCTGTCAATATTTACTTTTAGAAAAAAATGTTATATCTTTGTGGTGAAGTT >strand + yxaI similar to hypothetical protein ATATTAACCTTTTTTATTATAAGTAAGTTGAATGTTATATTCATTATATTTTTAAAAAATTCCAGAATACATACACAAATGCTATGCTTCTTTCTAGTATATTTATATAGTAAAAAACAAAGGATAAAAGACTCATTTTTCTGTAATATCAGGATTTTATCACCATACCATATTGCATGTACATGTTTTTCAAAGTATACTATTCCAGATTAATTCAGAGGAGAGA >strand - yxaJ yxaJ TCTCTCCTCTGAATTAATCTGGAATAGTATACTTTGAAAAACATGTACATGCAATATGGTATGGTGATAAAATCCTGATATTACAGAAAAATGAGTCTTTTATCCTTTGTTTTTTACTATATAAATATACTAGAAAGAAGCATAGCATTTGTGTATGTATTCTGGAATTTTTTAAAAATATAATGAATATAACATTCAACTTACTTATAATAAAAAAGGTTAATAT >strand + yxnA similar to glucose 1-dehydrogen TGGTAGAGCGACTTATATAATATAAAAAAGAACCCTGTGATTGTTAAAAGGGGTAAGACCCTTCCGGATGGGGTAATGTACAAAAACAGCGTCTAGGAGG >strand - yxaF similar to hypothetical protein CCTCCTAGACGCTGTTTTTGTACATTACCCCATCCGGAAGGGTCTTACCCCTTTTAACAATCACAGGGTTCTTTTTTATATTATATAAGTCGCTCTACCA >strand + yxaC similar to hypothetical protein CAATCTCCGGCATTGACAACATCATATATACTTGTATAATACAAATGTACATTAACTGAATAAATACAAGTCGTTATATGACTAAATCAATTATTATACTTGTATAGTACAATCATATAAAAAGAAAGTAAAGCGATTCCTCAGAACCATCCTAGAAGCAAGCACCAGAAAGGAGGAAGCTGTTCTGATTGGAGAG >strand - yxaD similar to transcriptional regu CTCTCCAATCAGAACAGCTTCCTCCTTTCTGGTGCTTGCTTCTAGGATGGTTCTGAGGAATCGCTTTACTTTCTTTTTATATGATTGTACTATACAAGTATAATAATTGATTTAGTCATATAACGACTTGTATTTATTCAGTTAATGTACATTTGTATTATACAAGTATATATGATGTTGTCAATGCCGGAGATTG >strand + gntR transcriptional regulator (GntR CATGATTCACTTAACCTATTGATCTCCAATGTACCATAATTGATCTGGAAATACATACCATGCAATATGGTAAAAATTTAAATAAAAATTAGAAATGAAAGTGTTTGCATAAAAGAAATATTCACGTTATCATACTTGTATACAAGTATACTCCTTGAGTGAGGA >strand - yxaA similar to hypothetical protein TCCTCACTCAAGGAGTATACTTGTATACAAGTATGATAACGTGAATATTTCTTTTATGCAAACACTTTCATTTCTAATTTTTATTTAAATTTTTACCATATTGCATGGTATGTATTTCCAGATCAATTATGGTACATTGGAGATCAATAGGTTAAGTGAATCATG >strand + yydK similar to transcriptional regu ATTTCACGATATCCTTCGCTTTAGGGGTAGACCGCTTCAGCACCAATCGAAGTCTGGTAGCGCAGCGAGAAGCCCCAATCACATTCTCTTCCCCGCCTACGGCCTCTAAAATGTCTTTTGCTAGTCTCGTATAATCTCTCACTTTCCCCGACATTTCACCCACCCCTTCGTTTTGGTTACGCTTTCATTATAATTGTAACGGTATAATTTATCAATTCCGAAACAAAATATTTACGCACTAACTATCATTGTAAGCGGTTTATGCTATAATTTTGACAAGTGAAACTAAACGAATAAATG >strand - yyzE similar to phosphotransferase s AGTTTCACTTGTCAAAATTATAGCATAAACCGCTTACAATGATAGTTAGTGCGTAAATATTTTGTTTCGGAATTGATAAATTATACCGTTACAATTATAATGAAAGCGTAACCAAAACGAAGGGGTGGGTGAAATGTCGGGGAAAGTGAGAGATTATACGAGACTAGCAAAAGACATTTTAGAGGCCGTAGGCGGGGAAGAGAATGTGATTGGGGCTTCTCGCTGCGCTACCAGACTTCGATTGGTGCTGAAGCGGTCTACCCCTAAAGCGAAGGATATCGTGAAATCGATGCTACCATC >strand + fbp fructose-1,6-bisphosphatase TAAAAAATCTGAAAAATGTCACTATTCTTGGTATATAAAGCCTATAGTCCTCTAGCCCCTCTAAGCTATACCGAATGAAAGGCCATAGTTTGATTTAAAGCTTCGTAATGAAAAGTAAAACATCAATATTGACTACACTAGTTAAGGGATATACAGATTTTAAGTGGAAGAAATGAGGCAAACAATCTTACCCTAAACCAACCTGTACTGAAAAGCTGTTTTAAGCTCTTGCTTACCCCATTTGGCACCTCTTTCTCTCGTTTTAATTGTCTCTAAAAGCAGTTATGCGGTACTATCATA >strand - yydF yydF GGTGCCAAATGGGGTAAGCAAGAGCTTAAAACAGCTTTTCAGTACAGGTTGGTTTAGGGTAAGATTGTTTGCCTCATTTCTTCCACTTAAAATCTGTATATCCCTTAACTAGTGTAGTCAATATTGATGTTTTACTTTTCATTACGAAGCTTTAAATCAAACTATGGCCTTTCATTCGGTATAGCTTAGAGGGGCTAGAGGACTATAGGCTTTATATACCAAGAATAGTGACATTTTTCAGATTTTTTAGGTACAATATATTGACATGTATTGAATGATATAGAATAATTGGTTTATATT >strand + yycS yycS CAGGCTGTGGGATAAACTGTTAGTATGTTCGATTTTTCTGATTTCGGGCGGCAGCTCGTGGTCGTCTATGTACATATCCAGCACCGTTTCGATGTGTTCTTCACAAGAATAATACGCTTGTTTCATGTTCAAATCCTCCGATGTTTTCTTCCATTTTTACAAATGTTCTCTTCTCAATATAACCTATTTATTCACATGTTAGTAGTGCTTCGGGCTGAGTTTTATCCACAATTCGACAAACAAGCTGTTGATGACTCTATTTCGCTCGTGTGTTAGTTTGGATGTAACCGATGTGTGGGG >strand - yydA similar to hypothetical protein TGAATAAATAGGTTATATTGAGAAGAGAACATTTGTAAAAATGGAAGAAAACATCGGAGGATTTGAACATGAAACAAGCGTATTATTCTTGTGAAGAACACATCGAAACGGTGCTGGATATGTACATAGACGACCACGAGCTGCCGCCCGAAATCAGAAAAATCGAACATACTAACAGTTTATCCCACAGCCTGTGAATTATGTGGAGACCCCGCAGTATATATAGTGGGGAACGAATGATCTTACACAAAATACAGAAAAGATATCCACAATTGTGGACAACATATATGAATAACTTGT >strand + rapG response regulator aspartate ph GACTAGAGGATTTTACATAAAATAGAAAGAGGTGTTACTATCAGAATAAGCAACTGAGATTGATAAGTCACTAGAGAAAG >strand - yycN similar to hypothetical protein CTTTCTCTAGTGACTTATCAATCTCAGTTGCTTATTCTGATAGTAACACCTCTTTCTATTTTATGTAAAATCCTCTAGTC >strand + rocR transcriptional regulator (NtrC TGTTTCAACATGTTATCCTGCTTTCCCTTTATATAAATGCAAGTTCTGTGCCAAATCAAGAATTCGTGTGTTTTCTACAATTTTCGAGGACAAGGTGCAAAAACATTCTGATATTTCGAGCGAAAATTTCAGCAAGTGCAAAATTCTTTTGCATATCCTCTCCGTTTTTTTATAAAATAGAAGCAATATTAAGAAAATAATTGAGGGAGG >strand - rocD ornithine aminotransferase CCTCCCTCAATTATTTTCTTAATATTGCTTCTATTTTATAAAAAAACGGAGAGGATATGCAAAAGAATTTTGCACTTGCTGAAATTTTCGCTCGAAATATCAGAATGTTTTTGCACCTTGTCCTCGAAAATTGTAGAAAACACACGAATTCTTGATTTGGCACAGAACTTGCATTTATATAAAGGGAAAGCAGGATAACATGTTGAAACA >strand + yycD yycD AAAAATCTCCAACCTCCATTCTACCATTTCTACAGAAAAAAAATAGAAAAATTGTCTGTTTCCTGTTTTTTTCAGTTAGCAGATTGGATCGTTTCGGACAGTAACAAGGCGGGAAAAATGCAATAAAACATAAGGGAGG >strand - dnaC replicative DNA helicase CCTCCCTTATGTTTTATTGCATTTTTCCCGCCTTGTTACTGTCCGAAACGATCCAATCTGCTAACTGAAAAAAACAGGAAACAGACAATTTTTCTATTTTTTTTCTGTAGAAATGGTAGAATGGAGGTTGGAGATTTTT >strand + yycC yycC ATAACGGTCATTTTCCGATTGCTCTATTCCTTTGTGACATCTTCTTACATTTCACATCTGATTCTTGTACAATGGACAGAAGAGAGGAGG >strand - yyzB yyzB CCTCCTCTCTTCTGTCCATTGTACAAGAATCAGATGTGAAATGTAAGAAGATGTCACAAAGGAATAGAGCAATCGGAAAATGACCGTTAT >strand + cotF spore coat protein ATGCCCATCACAATTAATGTTTATCATCCATAAAGCCAGAAGATTCCTTGTTTTTCTTTTTCGTTTTTCTGACATACATAAAAATTAAGATCACCAGTAAAAGGGTTGTTGCTTGCAGAACGCCGCTGCTAAACAAGAAAAAGTTCTTTATCATGTCCCACATTGTGTTTCACCTCTTTGTGCATGTTCTCAACCCGTATCCTATATAAGGTGAATTTCCCTTTTTAAAAACTTCAAAACATCATGAACAACATTGAGCGTTGGGCATATGCTGATATGGAATCTGATCGATAAAGGAGA >strand - yybS yybS ACAACCCTTTTACTGGTGATCTTAATTTTTATGTATGTCAGAAAAACGAAAAAGAAAAACAAGGAATCTTCTGGCTTTATGGATGATAAACATTAATTGTGATGGGCATTCTTTTCAAAGAATGTCTTTTCCTTTTTCATGTTTCACATGAAACATTAGCGTCATTTTACTTGCGATTTGGACTTATGTTAAAATATAAAAGTAAAATTTAGAGATGATATAAAAACATTCGGTGCAAAGCATGTTGCAGCGATGAACAAAAGAGCAGTCGTTTATGCTGCTCTTTTCTATTCGTATGTT >strand + yybQ similar to hypothetical protein TTTTTGATACTACATTACATTTTGCAACTAATCTACCCAGTATTTTACACACCTTTTCTCCTTAGCTCAAAGAAAATGCTTCTTATTCACATCAATTTTTAAGGGTTTGCATGATTACACCTGCAGCAAGACGTGTTAAACTATATAAAGATTACACTACTTAAAAAAGGATAG >strand - yybR similar to hypothetical protein CTATCCTTTTTTAAGTAGTGTAATCTTTATATAGTTTAACACGTCTTGCTGCAGGTGTAATCATGCAAACCCTTAAAAATTGATGTGAATAAGAAGCATTTTCTTTGAGCTAAGGAGAAAAGGTGTGTAAAATACTGGGTAGATTAGTTGCAAAATGTAATGTAGTATCAAAAA >strand + yybO similar to ABC transporter (per TGATGCTCCTCTCCCTTTTTGATAAAAATGAAAAAACTCCACCAATTTGGTGAAGTTCCTGAAACAAAAAGACCTTCACCAAATCAGATTTGGCAAAGGTCTCGCTAACAATGAGATTGCCAGCAAAGCCGAGGACACCTTGTCCCGTAATGACGACTTTACTGTGAAAGCTACTCCCCTTTGGAGTGTTCATTTTTATTACTTTCATTCTAAAGCACCGATCTGTTCCAGTCAAATTCCTGCCGTTTATATAAAAGCGCGCTGTGGTATTTAAAATTTGAATGGTTTTTCTATCAGGAG >strand - yybP yybP TAAAAATGAACACTCCAAAGGGGAGTAGCTTTCACAGTAAAGTCGTCATTACGGGACAAGGTGTCCTCGGCTTTGCTGGCAATCTCATTGTTAGCGAGACCTTTGCCAAATCTGATTTGGTGAAGGTCTTTTTGTTTCAGGAACTTCACCAAATTGGTGGAGTTTTTTCATTTTTATCAAAAAGGGAGAGGAGCATCAGAATGTCTAAGCTGAAAAAGTGGCTGAAGCGTGAATTGGTGAGGAAAGGGCTGCCGTTAGCGAAAGAGAAATTGATTCCGATCTTAAAAGAAAAAATGAAAA >strand + yybG similar to hypothetical protein ATTGGATATATTATACATCGCTAGACCACTGAAACCTTGAAAACAAAATGAAAAGTATCCTTTTTTCCTATATAATCAAATAGATATATTTAACAAAGTACCCAAGTGAAGGGGT >strand - yybH yybH ACCCCTTCACTTGGGTACTTTGTTAAATATATCTATTTGATTATATAGGAAAAAAGGATACTTTTCATTTTGTTTTCAAGGTTTCAGTGGTCTAGCGATGTATAATATATCCAAT >strand + yybE similar to transcriptional regu CATGTTGTGGAGCACTGCCATTCATAAACAATCATCCTTTATTTCTTACGAATTCAAACATCCTCGAGTCTGTTTATATTATTATCTTAGCACCGTTGATATATGTTGTATAATGCATATAAAATATCCTTTTCATGCTTATATGTAATAAGGAGAGATGATAATTGGAATGGGAACAACTTGAATATTTTTAAACAC >strand - yybF similar to antibiotic resistanc GTGTTTAAAAATATTCAAGTTGTTCCCATTCCAATTATCATCTCTCCTTATTACATATAAGCATGAAAAGGATATTTTATATGCATTATACAACATATATCAACGGTGCTAAGATAATAATATAAACAGACTCGAGGATGTTTGAATTCGTAAGAAATAAAGGATGATTGTTTATGAATGGCAGTGCTCCACAACATG >strand + yyaT similar to hypothetical protein TTGTTGCAATTACAACAAAAATGTTTTAACCTTACTATTATAAGACCACATGCGTGCTGTACAGTCCAGACTAATATCAGAAAAGGATT >strand - yybA similar to transcriptional regu AATCCTTTTCTGATATTAGTCTGGACTGTACAGCACGCATGTGGTCTTATAATAGTAAGGTTAAAACATTTTTGTTGTAATTGCAACAA >strand + yyaQ yyaQ GCATCCATTACCTTATATATTTCACTATCTTAATGAGGTATTTCAACAGAGGCTTGTGGTACAGTAAAAAGGAATAGAAACAAGAGAAAAGAAGATGAACCTATGCCCACTAACAATAAACTCAATGAGAAATATCATACTTCCTTCAACTCGCCCACAACCCTGTTGACTAGTATCCGTGGAGAGAGGAAGCTTTGACACAGCTTCTGCCATTGATGACATCTCTATTGAAGTATCCAAAGGACAACCAAGGATCGCTCTTTGACTTTTTCAAGAAGAAAGTAGAGTGAATAAAGGAGT >strand - yyaR similar to streptothricine acet GTCATCAATGGCAGAAGCTGTGTCAAAGCTTCCTCTCTCCACGGATACTAGTCAACAGGGTTGTGGGCGAGTTGAAGGAAGTATGATATTTCTCATTGAGTTTATTGTTAGTGGGCATAGGTTCATCTTCTTTTCTCTTGTTTCTATTCCTTTTTACTGTACCACAAGCCTCTGTTGAAATACCTCATTAAGATAGTGAAATATATAAGGTAATGGATGCCTTATCTAATGCTGTTCTCTTAAAGAGTACACTTTTGGAAGCTGTTGTTGAATAGTATCGAAACGAGTTTACTAAATATT >strand + yyaO similar to hypothetical protein TGATATGATCTTATTATATATAGACTAAGGATTAAACTTTTACTTTTTTTATAAAAATTTGACCAGATTGCGAAACAAGGAAAAACGTTATTCGCATTTATCTCAAGTGGCAAAATGAAACATTAGTTTATATTTTCACATCACAATTATCGATAAGCAGTCAGTTATTGTATTTGTTTAGGATAACTGTGGTAACCTGAATATAACCAACTTAGGAAGAAG >strand - tetL tetracycline resistance leader CTTCTTCCTAAGTTGGTTATATTCAGGTTACCACAGTTATCCTAAACAAATACAATAACTGACTGCTTATCGATAATTGTGATGTGAAAATATAAACTAATGTTTCATTTTGCCACTTGAGATAAATGCGAATAACGTTTTTCCTTGTTTCGCAATCTGGTCAAATTTTTATAAAAAAAGTAAAAGTTTAATCCTTAGTCTATATATAATAAGATCATATCA >strand + yyaJ similar to transporter AATGAGAATATTACTATATATTTGCATAAATTGGTCTATGAATCTACTGTTTTGGAAATCTTTTAAGCAAAATTAAAATATAAAGTGTGAATTTAGTTCTTCCATCTAGTGATTTGAACTCTGTTGCCAATGAACCAAGATACTTATAATAGTCCCAAGAGGGGAATTTTCAGTCTTTTCTACTACTCAAAAGGG >strand - yyaK yyaK CCCTTTTGAGTAGTAGAAAAGACTGAAAATTCCCCTCTTGGGACTATTATAAGTATCTTGGTTCATTGGCAACAGAGTTCAAATCACTAGATGGAAGAACTAAATTCACACTTTATATTTTAATTTTGCTTAAAAGATTTCCAAAACAGTAGATTCATAGACCAATTTATGCAAATATATAGTAATATTCTCATT >strand + yyaC yyaC AAAAAGAGATATTTATTTGTTTGTCCGCGGAATGAAGAAGTGAACTTCACTCTTCACTTTTTTCTAACCATTCCTACCTATTCAAGTGTTTCATAAGCAGACCATTGCTTGATCATGATCGAGGCAATGGGGGACAATACTTGTAATCGCCCACTAACCATTCATATCATGCCCTCTACTTTTTTAAAATCTCTGGTGCCATACACAAAGCATAAAAAATCATACTGCTGGATATACTGTAAACAACCTTACATAACTACACACACCATAGCAATTCTGTTACCTACTATAACAAGGAAG >strand - yyaD similar to hypothetical protein CATGATATGAATGGTTAGTGGGCGATTACAAGTATTGTCCCCCATTGCCTCGATCATGATCAAGCAATGGTCTGCTTATGAAACACTTGAATAGGTAGGAATGGTTAGAAAAAAGTGAAGAGTGAAGTTCACTTCTTCATTCCGCGGACAAACAAATAAATATCTCTTTTTAGACTGCCGTATTAGCAGTCTTTTTTATGTGTATTTCGTATGATAATTTCAGCTTGTGTTCAAATGTCTATGAAAATATGGCATGTTTGCTTTCCTTTATTTATATAGTAACAATAACGGGAACAAGCT >strand + yyaB yyaB ACATGTACTATCTTGCTTCTATTCTATCAAAAAAAGATTAAAGCGTTTCATTTTTTTCATCATTTTGTTCCGAATCAGAAGGGAAATAATTGAAAATTCAAGCACGCTTTATGCTTTCTCTTATAAATTCTGCAATTTCTATGTTTTTTTGACTTAAAAAAGGAATTTCTTAAAGAAAAAAAGAAGAAGTCACAGTACAGAACGTGGAAACAGGATG >strand - soj soj CATCCTGTTTCCACGTTCTGTACTGTGACTTCTTCTTTTTTTCTTTAAGAAATTCCTTTTTTAAGTCAAAAAAACATAGAAATTGCAGAATTTATAAGAGAAAGCATAAAGCGTGCTTGAATTTTCAATTATTTCCCTTCTGATTCGGAACAAAATGATGAAAAAAATGAAACGCTTTAATCTTTTTTTGATAGAATAGAAGCAAGATAGTACATGT SMILEv1.47/Lanceur/smile0000700002404200237300000005042510066543324014555 0ustar lamaaoc00000000000000#!/usr/bin/perl -w #SMILE v1.47 - Extraction of structured motifs common to several sequences #Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) #This program is free software; you can redistribute it and/or #modify it under the terms of the GNU General Public License #as published by the Free Software Foundation; either version 2 #of the License, or (at your option) any later version. #This program is distributed in the hope that it will be useful, #but WITHOUT ANY WARRANTY; without even the implied warranty of #MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #GNU General Public License for more details. #You should have received a copy of the GNU General Public License #along with this program; if not, write to the Free Software #Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #use strict; my (@tmplist, $data_file, $quorum, @lmin, @lmax, @e, $blocs, $curbloc, $i, @compo, @flag_compo, @saut_min, @saut_max, @comline, $output_file, $prog, $ok, $shufflings, @comline2, @delta, $flag_delta, $ordre, $contre, @symbcompo, $alpha_file, $summinblocs, $summaxblocs, $flagmaxblocinfini, @palindrom); # flag_compo[i] indique si la boite i recoit une contrainte de composition # flag_compo[0] indique une contrainte globale # symb_compo[i][j] indique les symboles pour lesquels il existe une contrainte # sur la boite i (0 pour global). La dimension j est le symbole contraint # et symb_compo[i][j] le nb max de ce symbole j apparaissant ds une boite i # Chemin des programmes executable ############################################## $prog1 = "../P_BLOCS/bin/x-smile"; $prog2 = "../P_BLOCS+DELTA/bin/x-smile_delta"; $progstat = "../SigStat/bin/e-smile_shuffling"; $progfaux = "../SigStat/bin/e-smile_against"; # Fichier de noms de fichiers generes ########################################## $namefile = ".namefile"; print "\n*********************\n O o\n ^ SMILE! v1.47\n \\_/\n*********************\n\n"; if ( !@ARGV ) { print "\nUsage: smile [ [-x] | -g ]\n"; print "\t\t-g print a generic parameter file for boxes\n\n"; print "\t\t-x avoid first step of extraction (if already done and output file available)\n\n"; exit; } ## Generation du fichier type ################################################# if ( $ARGV[0] eq "-g" ) { $blocs = $ARGV[1]; if (!$blocs || !($blocs =~ /^\d+$/) || $blocs <= 0 ) { print "\nUsage: smile [ [-x] | -g ]\n"; print "\t\t-g print a generic parameter file for boxes\n\n"; print "\t\t-x avoid first step of extraction (if already done and output file available)\n\n"; exit; } print "EXTRACTION (Step 1) =======================================================\n"; print "FASTA file\t\t\tfile_name\n"; print "Output file\t\t\tfile_name_out\n\n"; print "GLOBAL PARAMETERS =============\n"; print "Alphabet file\t\t\tfile_name_alphabet\n"; print "Quorum\t\t\t\t0\n"; print "Total min length\t\t0\n"; print "Total max length\t\t0\n"; print "Total substitutions\t\t0\n"; print "Boxes\t\t\t\t$blocs\n\n"; print "Composition in ?\t\t0\t# OPTIONAL #\n\n\n"; $i = 1; while ( $i <= $blocs ) { unless ( $blocs == 1 ) { print "BOX $i ================\n"; print "Min length\t\t\t0\n"; print "Max length\t\t\t0\n"; print "Substitutions\t\t\t0\n"; print "Composition in ?\t\t0\t# OPTIONAL #\n"; } unless ( $i == $blocs ) { print "Min spacer length\t\t0\n"; print "Max spacer length\t\t0\n"; print "Delta\t\t\t\t0\t# OPTIONAL #\n"; } unless ( $i == 1 ) { print "Palindrom of box\t\t0\t# OPTIONAL #\n"; } print "\n"; $i++; } print "\n\nEVALUATION (Step 2) ===================================================\n"; print "Shufflings\t\t\t100\n"; print "Size k-mer\t\t\t0\n"; print "# OR\n"; print "Against wrong sequences\t\tfile_name_wrong_seqs\n\n"; exit; } ############################################################################### if ( $ARGV[0] eq "-x" ) { $blocs = $ARGV[1]; if (!$blocs) { print "\nUsage: smile [ [-x] | -g ]\n"; print "\t\t-g print a generic parameter file for boxes\n\n"; print "\t\t-x avoid first step of extraction (if already done and output file available)\n\n"; exit; } -r "$ARGV[1]" or print "$ARGV[1] is not readable!\n" and exit; open(FILE,"$ARGV[1]"); } else { -r "$ARGV[0]" or print "$ARGV[0] is not readable!\n" and exit; open(FILE,"$ARGV[0]"); } while ( ) { if ( /FASTA file/ ) { @tmplist = split(" "); $data_file = $tmplist[2]; -r "$data_file" or print "Line $.: $data_file is not readable!\n" and exit; } elsif ( /Output file/ ) { @tmplist = split(" "); $output_file = $tmplist[2]; } elsif ( /Alphabet file/ ) { @tmplist = split(" "); $alpha_file = $tmplist[2]; } elsif ( /Quorum/ ) { @tmplist = split(" "); $quorum = $tmplist[1]; } elsif ( /Total min length/ ) { @tmplist = split(" "); $lmin[0] = $tmplist[3]; } elsif ( /Total max length/ ) { @tmplist = split(" "); $lmax[0] = $tmplist[3]; } elsif ( /Total substitutions/ ) { @tmplist = split(" "); $e[0] = $tmplist[2]; } elsif ( /Boxes/ ) { @tmplist = split(" "); $blocs = $tmplist[1]; if($blocs <= 0) { print "Line $.: incorrect number of boxes.\n" and exit; } } elsif ( /Shufflings/ ) { @tmplist = split(" "); $shufflings = $tmplist[1]; } elsif ( /Size k-mer/ ) { @tmplist = split(" "); $ordre = $tmplist[2]; } elsif ( /Against wrong sequences/ ) { @tmplist = split(" "); $contre = $tmplist[3]; } elsif ( /BOX/ ) { @tmplist = split(" "); $curbloc = $tmplist[1]; if ( ! defined($blocs) ) { print "Line $.: no number of boxes has been defined before\n" and exit; } if ( (!$curbloc) || ($curbloc <= 0) || ($curbloc > $blocs) ) { print "Line $.: incorrect box number\n" and exit; } if ( $blocs == 1 ) { print "Line $.: no need of a 'box' section if only 1 box\n" and exit; } while ( defined($line = ) && ($line ne "\n") ) { if ( $line =~ /Min length/ ) { @tmplist = split(" ", $line); $lmin[$curbloc] = $tmplist[2]; } elsif ( $line =~ /Max length/ ) { @tmplist = split(" ", $line); $lmax[$curbloc] = $tmplist[2]; } elsif ( $line =~ /Substitutions/ ) { @tmplist = split(" ", $line); $e[$curbloc] = $tmplist[1]; } elsif ( $line =~ /Min spacer length/ ) { if ( $curbloc == $blocs ) { print "Line $.: the last box can't have jump parameters\n" and exit; } @tmplist = split(" ", $line); $saut_min[$curbloc] = $tmplist[3]; } elsif ( $line =~ /Max spacer length/ ) { if ( $curbloc == $blocs ) { print "Line $.: the last box can't have jump parameters\n" and exit; } @tmplist = split(" ", $line); $saut_max[$curbloc] = $tmplist[3]; } elsif ( $line =~ /Delta/ ) { if ( $curbloc == $blocs ) { print "Line $.: the last box can't have a delta parameter\n" and exit;} @tmplist = split(" ", $line); $delta[$curbloc] = $tmplist[1]; $flag_delta = 1; } elsif ( $line =~ /Palindrom of box/ ) { @tmplist = split(" ", $line); $palindrom[$curbloc] = $tmplist[3]; if ( $palindrom[$curbloc]<=0 ) { print "Line $.: numéro de bloc de palindrome incorrect\n";} if ($curbloc <= $palindrom[$curbloc]) { print "Line $.: as we consider boxes from left to right, a box can only be the palindrom of a previous box\n" and exit;} if ( $lmin[$curbloc] != $lmin[$palindrom[$curbloc]] || $lmax[$curbloc] != $lmax[$palindrom[$curbloc]] ) { print "Line $.: min and max lenght of boxes $curbloc and $palindrom[$curbloc] must be identical if they're palindroms\n" and exit;} } elsif ( $line =~ /Composition/ ) { if( !defined($flag_compo[$curbloc])) { $flag_compo[$curbloc] = 0; } @tmplist = split(" ", $line); $symbcompo[$curbloc][$flag_compo[$curbloc]]="$tmplist[2]"; $compo[$curbloc][$flag_compo[$curbloc]] = $tmplist[3]; $flag_compo[$curbloc] += 1; } else { chomp($line); #print "Line $.: box parameter ignored: \"$line\"\n"; } } } elsif ( /Composition/ ) { if( !defined($flag_compo[0])) { $flag_compo[0] = 0; } @tmplist = split(" "); $symbcompo[0][$flag_compo[0]] = "$tmplist[2]"; $compo[0][$flag_compo[0]] = $tmplist[3]; $flag_compo[0] += 1; } elsif ( ! /^\n$/ ) { chomp; #print "Line $. ignored : \"$_\"\n"; } } # Verification et preparation des parametres ################################## if ( !defined($alpha_file) ) { print "Alphabet file hasn't been defined!\n(Add an 'Alphabet file' line to your parameter file)\n" and exit; } if ( !defined($data_file) ) { print "Fasta file hasn't been defined!\n(Add a 'FASTA file' line to your parameter file)\n" and exit; } if ( !defined($output_file) ) { print "Output file hasn't been defined!\n(Add an 'Output file' line to your parameter file)\\n" and exit; } if ( !defined($quorum) || $quorum <= 0 || $quorum > 100 ) { print "Quorum is incorrect or not defined.\n(Add a 'Quorum' line to your parameter file. It's a percentage)\n" and exit; } if ( !defined($lmin[0]) || $lmin[0] <= 0 ) { print "Total min length is incorrect or not defined.\n(Add a 'Total min length' line to your parameter file)\n" and exit; } if ( !defined($lmax[0]) || ( ( $lmax[0] != 0) && ($lmax[0] < $lmin[0] ) ) ) { print "Total max length is incorrect or not defined.\n(Add a 'Total max length')\n" and exit; } if ( !defined($e[0]) || $e[0] < 0 || ( ($lmax[0] != 0) && ($e[0] >= $lmax[0]) ) ) { print "Total substitions number is incorrect or not defined.\n(Add a 'Total substitutions' line to your parameter file)\n" and exit; } if ( !defined($ordre) && defined($shufflings)) { print "The shuffling order hasn't been defined.\n(Add a 'Size k-mer' line to your parameter file)\n" and exit; } if ( defined($ordre) && defined($shufflings) && $shufflings != 0 && $ordre <1) { print "Shuffling order must be greater than 0.\n", and exit; } if ( defined($contre) && (defined($ordre) || defined($shufflings) ) ) { print "Shuffling and Against methods are not compatible.\n" and exit; } if (!defined ($blocs)) { $blocs = 1; } if( $blocs > 1 ) { $summinblocs = 0; $summaxblocs = 0; $flagmaxblocinfini = 0; for ($i=1; $i <= $blocs; $i++) { if( $lmax[$i]==0 ) { $flagmaxblocinfini = 1; } else { $summaxblocs += $lmax[$i]; } $summinblocs += $lmin[$i]; if ( !defined($lmin[$i]) || $lmin[$i] < 0 ) { print "Min length for box $i is incorrect or not defined.\n(Line 'Min length')\n" and exit; } if ( !defined($lmax[$i]) || ( ($lmax[$i] != 0) && ($lmax[$i] < $lmin[$i]) ) ) { print "Max length for box $i is incorrect or not defined.\n(ligne 'Max length')\n" and exit; } if ( !defined($e[$i]) || $e[$i] <0 || ( ($lmax[$i] != 0) && ($e[$i]>=$lmax[$i]) ) ) { print "Substitutions number for box $i is incorrect or not defined.\n(Line 'Substitutions')\n" and exit; } if ( $i != $blocs && (!defined($saut_min[$i]) || $saut_min[$i]<0) ) { print "Min jump for box $i is incorrect or not defined.\n(Line 'Min spacer length')\n" and exit; } if ( $i != $blocs && (!defined($saut_max[$i]) || $saut_max[$i]<$saut_min[$i]) ) { print "Max jump for box $i is incorrect or not defined.\n(Line 'Max spacer length')\n" and exit; } if ( defined($flag_delta) && ($i != $blocs) ) { if (defined($delta[$i])) { if ( $delta[$i] < 0 || $saut_max[$i]-$saut_min[$i] < $delta[$i]*2 ) { print "Delta for bloc $i is incorrect : not compatible with spacer range.\n(Line 'Delta')\n" and exit; } } else {print "Box $i has no delta and another box has one.\n(Line 'Delta'. Fix a delta equal to the spacer range if you don't want to use it for this box.)\n" and exit;} } } if($lmax[0]!=0 && $summinblocs > $lmax[0]) { print "Sum of min length of boxes is greater than total max length\n" and exit; } if(!$flagmaxblocinfini && $summaxblocs < $lmin[0]) { print "Sum of max length of boxes is lower than total min length\n" and exit; } } if ( $ARGV[0] eq "-x" ) { goto suite; } # LANCEMENT DU PROGRAMME ###################################################### -r $data_file or print "$data_file is not readable!\n" and exit; if (defined($flag_delta)) { @comline = ($prog2); } else { @comline = ($prog1); } push @comline, ($alpha_file,$data_file,$output_file,$quorum,$blocs,$lmin[0],$lmax[0],$e[0]); if ($blocs > 1) { for ($i=1; $i<$blocs; $i++) { push @comline, ($lmin[$i],$lmax[$i],$e[$i],$saut_min[$i],$saut_max[$i]); if (defined($flag_delta)) {push @comline, ($delta[$i]);} } push @comline, ($lmin[$i],$lmax[$i],$e[$i]); for ($i=0; $i<=$blocs; $i++) { if(defined($flag_compo[$i]) && $flag_compo[$i] != 0) { push @comline, ($i,$flag_compo[$i]); for ($j=0; $j<$flag_compo[$i]; $j++) { push @comline, ($symbcompo[$i][$j],$compo[$i][$j]); } } } for ($i=1; $i<=$blocs; $i++) { if(defined($palindrom[$i])) { push @comline, ("p$i/$palindrom[$i]"); } } } else { if(defined($flag_compo[0]) && $flag_compo[0] != 0) { push @comline, (0,$flag_compo[0]); for ($j=0; $j<$flag_compo[0]; $j++) { push @comline, ($symbcompo[0][$j],$compo[0][$j]); } } } #if ( -e "$comline[2]" ) # { # print "ATTENTION! Le fichier '$comline[2]' existe déjà!\n"; # print "Tapez 'ok' pour continuer l'execution...\n"; # $i = ; # chomp($i); # if ($i ne "ok") # { print "Exécution interrompue.\n" and exit; } # # unless ( -w "$comline[2]" ) # { print "Le fichier '$comline[2]' n'est pas ouvrable en écriture.\n" and exit; } # } print "@comline\n"; $ok = system @comline; if ( $ok != 0 ) { print "Error during execution of extraction (code $ok)\n"; if($ok == 65280) { print "You didn't fully compile SMILE. Type 'make' in the parent directory.\n" and exit} exit; } suite: ## FAUX #################################### if( defined($contre) ) { $comline2[0] = $progfaux; $comline2[1] = $contre; $comline2[2] = $output_file; $comline2[3] = $output_file.".against"; if (defined($flag_delta)) { -r $namefile or print "$namefile is not readable!\n" and exit; open(NAMEFILE,$namefile); while ( defined($line = ) ) { chomp($line); $comline2[2] = $line; $comline2[3] = $line.".against"; -r $line or print "$line is not readable!\n" and exit; print "\n*** AGAINST '$line'\n@comline2\n"; if ( -e "$comline2[3]" ) { print "WARNING! '$comline2[3]' already exists!\n"; print "Type 'ok' to continue...\n"; $i = ; chomp($i); if ($i ne "ok") { print "Execution interrupted.\n" and exit; } unless ( -w "$comline2[3]" ) { print "'$comline2[3]' is not writable!\n" and exit; } } $ok = system @comline2; if ( $ok != 0 ) { print "Error during execution of evaluation (code $ok)\n"; if($ok == 65280) { print "You didn't fully compile SMILE. Type 'make' in the parent directory.\n" and exit} exit; } } exit; } print "\n*** AGAINST '$comline2[1]'\n@comline2\n"; -r $contre or print "$contre is not readable!\n" and exit; if ( -e "$comline2[3]" ) { print "WARNING! '$comline2[3]' already exists!\n"; print "Type 'ok' to continue...\n"; $i = ; chomp($i); if ($i ne "ok") { print "Execution interrupted.\n" and exit; } unless ( -w "$comline2[3]" ) { print "'$comline2[3]' is not writable!\n" and exit; } } $ok = system @comline2; if ( $ok != 0 ) { print "Error during execution of evaluation (code $ok)\n"; if($ok == 65280) { print "You didn't fully compile SMILE. Type 'make' in the parent directory.\n" and exit} exit; } exit; } if ( !defined($shufflings) || $shufflings == 0 ) { exit; } # STATS ########################### $comline2[0] = $progstat; $comline2[1] = $data_file; $comline2[2] = $output_file; $comline2[3] = $output_file.".shuffle"; $comline2[4] = $shufflings; $comline2[5] = $ordre; if (defined($flag_delta)) { -r $namefile or print "$namefile is not readable!\n" and exit; open(NAMEFILE,$namefile); while ( defined($line = ) ) { chomp($line); $comline2[2] = $line; $comline2[3] = $line.".shuffle"; -r $line or print "$line is not readable!\n" and exit; print "\n*** SHUFFLING '$line'\n@comline2\n"; if ( -e "$comline2[3]" ) { print "WARNING! '$comline2[3]' already exists!\n"; print "Type 'ok' to continue...\n"; $i = ; chomp($i); if ($i ne "ok") { print "Execution interrupted.\n" and exit; } unless ( -w "$comline2[3]" ) { print "'$comline2[3]' is not readable!\n" and exit; } } $ok = system @comline2; if ( $ok != 0 ) { print "Error during execution of evaluation (code $ok)\n"; if($ok == 65280) { print "You didn't fully compile SMILE. Type 'make' in the parent directory.\n" and exit} exit; } } exit; } print "\n*** SHUFFLING '$comline2[2]'\n@comline2\n"; -r $output_file or print "$output_file is not readable!\n" and exit; if ( -e "$comline2[3]" ) { print "WARNING! '$comline2[3]' already exists!\n"; print "Type 'ok' to continue...\n"; $i = ; chomp($i); if ($i ne "ok") { print "Execution interrupted.\n" and exit; } unless ( -w "$comline2[3]" ) { print "'$comline2[3]' is not writable!\n" and exit; } } $ok = system @comline2; if ( $ok != 0 ) { print "Error during execution of evaluation (code $ok)\n"; if($ok == 65280) { print "You didn't fully compile SMILE. Type 'make' in the parent directory.\n" and exit} exit; } SMILEv1.47/Lanceur/fastawrong0000644002404200237300000000211410066546143015620 0ustar lamaaoc00000000000000> Seq0 CTCATCACCTTGGTCCGATACAAACACTGAGCGTCTGATTGTCACCACGG CATCCCTTAGTGCCCCGACCGGCGGACAACACTTTTTAAAAATGTGAGCT > Seq1 AACAGACGGCCTGAACCCTAGCATCTATCACGGGATCGTTTGTATGATCA GTATAGGCACTATTATTTTGTGCTCATCAAGTGTGTCGCCGAGGGCCGCG > Seq2 GGCTTACTGATAATAGATGGATATGTGCGTAGAAGGGTGACTGACTAAGC AGCGCTACAGTAGAACCAGTTCGGATTACTATGGATACGCTATGAAGGTA > Seq3 GGGATCCCTGCCTTAGTCGGGAATTTTGGGTGAACCTTGGAATTGTTTAA ACTAGTTTCGAAGTCGCGGGGTTGGCCCGATGACCTCCGCTTCGTCAAGC > Seq4 ACGTACATATGGCATCATGACGCGAGGGTCAACACGGCTATTGAGCCGCG CCACGTAGAGGCTAGTTCGCCGGAGGGTGAGGAGGTACTCATTCTACGCT > Seq5 ACATGCTAAGGGCCCCAAGGAGGTACATAGTATTCAGAAGCTAGCCAGGT TAATTGGACAGCTAAAAACAGCACGGGAATTACACTTGATAATATATGCA > Seq6 CCAATCGTACCGAAGGCGACTCTTTTTTTCTCGGGATTGATTAGCGTCGA AGTTAATGGCCAACGCCTTGATAAAACTCTGATCTACCCGAGCATAGCAT > Seq7 CGCGCTCGCTGGAAATACTATCGTGATATGAAGACGAAAGCCTTCTTTCC GGAATTCCGTAAAACAATATATAGTCTCCCACGTAATTCTGGCTCCTCGC > Seq8 CTGGTCAGTGCCTCTGTTTCACACGTGGAATTAGCGCTCAGGAAGTCACG GTTTCATATTAATGTTGTATGATCCCTGACTGCACGTGAGCCGGGGAGAC > Seq9 GCGGGTAATATCACTCAGTAGCTTGCTTCTTCTTTCTCGGCCAATCCCCG AGACCCGGACAGAGATTTGCGGTTCCGAGAAGAAGTAAAATTATGACCTG SMILEv1.47/Lanceur/.namefile0000644002404200237300000000025010066542220015272 0ustar lamaaoc00000000000000test.out[0-0][0-0][0-2] test.out[0-0][0-0][1-3] test.out[0-0][0-0][2-4] test.out[0-0][0-0][3-5] test.out[0-0][0-0][4-6] test.out[0-0][0-0][5-7] test.out[0-0][0-0][6-8] SMILEv1.47/Lanceur/example0000644002404200237300000002507310066546131015106 0ustar lamaaoc00000000000000%%% 2 43/1062 196736 12 12 1 6 6 1 17 19 6 6 1 alphabet ACGT$ =============================================================================== AAAAAA_AAAAAA 000000-000000 46 125 AAAAAA_AAAACA 000000-000010 43 68 AAAAAA_AAAAGA 000000-000020 47 71 AAAAAA_AATAAA 000000-003000 46 71 AAAAAA_ATAAAA 000000-030000 48 76 AAAAAA_ATTTTT 000000-033333 68 110 AAAAAA_CTTTTT 000000-133333 57 92 AAAAAA_GAAAAA 000000-200000 45 70 AAAAAA_GTTTTT 000000-233333 44 71 AAAAAA_TAAAAA 000000-300000 49 84 AAAAAA_TATTTT 000000-303333 52 72 AAAAAA_TGAAAA 000000-320000 44 63 AAAAAA_TTATTT 000000-330333 55 73 AAAAAA_TTCTTT 000000-331333 44 67 AAAAAA_TTTAAA 000000-333000 45 80 AAAAAA_TTTATT 000000-333033 45 62 AAAAAA_TTTCTT 000000-333133 54 82 AAAAAA_TTTTAA 000000-333300 46 74 AAAAAA_TTTTAT 000000-333303 57 84 AAAAAA_TTTTCA 000000-333310 49 79 AAAAAA_TTTTCT 000000-333313 58 105 AAAAAA_TTTTGT 000000-333323 47 78 AAAAAA_TTTTTA 000000-333330 66 117 AAAAAA_TTTTTC 000000-333331 68 128 AAAAAA_TTTTTG 000000-333332 43 86 AAAAAA_TTTTTT 000000-333333 67 191 AAAAAC_TTTTTT 000001-333333 44 68 AAAAAG_AAAAAA 000002-000000 47 67 AAAAAG_AAAAAT 000002-000003 44 54 AAAAAG_TAAAAA 000002-300000 44 55 AAAAAG_TTTTTA 000002-333330 50 67 AAAAAG_TTTTTC 000002-333331 45 66 AAAAAG_TTTTTT 000002-333333 57 91 AAAAAT_AAAAAA 000003-000000 56 103 AAAAAT_AAAAAT 000003-000003 54 79 AAAAAT_AAAATA 000003-000030 45 62 AAAAAT_AAATAA 000003-000300 46 51 AAAAAT_AATAAA 000003-003000 53 61 AAAAAT_ATAAAA 000003-030000 53 67 AAAAAT_ATAATA 000003-030030 43 47 AAAAAT_ATTTTT 000003-033333 60 74 AAAAAT_CTTTTT 000003-133333 44 61 AAAAAT_GAAAAA 000003-200000 50 68 AAAAAT_GAAAAT 000003-200003 44 49 AAAAAT_TAAAAA 000003-300000 50 74 AAAAAT_TGAAAA 000003-320000 47 54 AAAAAT_TTTTAA 000003-333300 45 55 AAAAAT_TTTTAT 000003-333303 51 64 AAAAAT_TTTTCA 000003-333310 53 60 AAAAAT_TTTTCT 000003-333313 58 73 AAAAAT_TTTTGA 000003-333320 45 53 AAAAAT_TTTTTA 000003-333330 68 83 AAAAAT_TTTTTC 000003-333331 67 89 AAAAAT_TTTTTT 000003-333333 65 107 AAAAGA_TTTTTC 000020-333331 44 56 AAAAGA_TTTTTT 000020-333333 43 67 AAAATA_AAAAAA 000030-000000 55 84 AAAATA_AAAAAT 000030-000003 49 62 AAAATA_AAAAGA 000030-000020 43 48 AAAATA_AAAATA 000030-000030 49 62 AAAATA_AAAGAA 000030-000200 43 47 AAAATA_AAATAA 000030-000300 43 48 AAAATA_AATAAA 000030-003000 54 63 AAAATA_ATGAAA 000030-032000 44 45 AAAATA_GAAAAA 000030-200000 45 58 AAAATA_TAAAAA 000030-300000 43 54 AAAATA_TTTTAT 000030-333303 43 50 AAAATA_TTTTTA 000030-333330 56 62 AAAATA_TTTTTC 000030-333331 44 53 AAAATA_TTTTTT 000030-333333 57 82 AAAATT_TTTATT 000033-333033 43 54 AAACAA_TTTTTT 000100-333333 43 61 AAAGAA_AAAAAT 000200-000003 43 49 AAAGAA_TTTTTT 000200-333333 48 71 AAATAA_ATTTTT 000300-033333 43 48 AAATAA_TTTTAT 000300-333303 46 49 AAATAA_TTTTTA 000300-333330 46 51 AAATAA_TTTTTT 000300-333333 59 80 AAATAT_AAAAAA 000303-000000 43 67 AAATAT_TTTTAT 000303-333303 46 49 AAATAT_TTTTTA 000303-333330 46 52 AAATTT_AAAAAT 000333-000003 45 55 AAGAAA_ATTTTT 002000-033333 47 61 AAGAAA_TTTTTA 002000-333330 45 50 AAGAAA_TTTTTT 002000-333333 61 89 AATAAA_AAAAAA 003000-000000 46 62 AATAAA_AAAAAT 003000-000003 46 52 AATAAA_AATTTT 003000-003333 45 56 AATAAA_ATATAA 003000-030300 46 52 AATAAA_ATTTTT 003000-033333 47 57 AATAAA_TATTTT 003000-303333 47 51 AATAAA_TTTTTC 003000-333331 43 51 AATAAA_TTTTTT 003000-333333 56 76 AATATT_AAAAAA 003033-000000 46 52 AATATT_TTTTAT 003033-333303 47 53 AATTTT_AAAAAT 003333-000003 44 55 ACAAAA_ATTTTT 010000-033333 48 58 ACAAAA_TTTTTT 010000-333333 48 83 AGAAAA_ATTTTT 020000-033333 57 73 AGAAAA_TTTTTA 020000-333330 44 63 AGAAAA_TTTTTC 020000-333331 55 80 AGAAAA_TTTTTT 020000-333333 57 104 ATAAAA_AAAAAA 030000-000000 49 80 ATAAAA_AAAAAT 030000-000003 48 56 ATAAAA_AAAAGA 030000-000020 44 53 ATAAAA_AAAATG 030000-000032 48 50 ATAAAA_AAAGAA 030000-000200 51 55 ATAAAA_AAATAT 030000-000303 44 47 ATAAAA_AATAAA 030000-003000 49 51 ATAAAA_AATATT 030000-003033 44 47 ATAAAA_AATGAA 030000-003200 49 51 ATAAAA_AATTTT 030000-003333 46 58 ATAAAA_ATATTT 030000-030333 47 48 ATAAAA_ATTTTT 030000-033333 62 77 ATAAAA_TAAAAA 030000-300000 44 48 ATAAAA_TATAAT 030000-303003 43 45 ATAAAA_TTATTT 030000-330333 46 50 ATAAAA_TTTAAA 030000-333000 43 45 ATAAAA_TTTTAT 030000-333303 50 57 ATAAAA_TTTTCA 030000-333310 44 51 ATAAAA_TTTTCT 030000-333313 48 59 ATAAAA_TTTTTC 030000-333331 55 68 ATAAAA_TTTTTT 030000-333333 73 112 ATAAAT_ATTTTT 030003-033333 43 49 ATATTT_AAAAAT 030333-000003 50 54 ATTATA_AAAAAA 033030-000000 46 64 ATTATA_TAAAAA 033030-300000 49 55 ATTATA_TTTAAA 033030-333000 43 45 ATTGAC_TATAAT 033201-303003 47 47 ATTTTA_AAAAAA 033330-000000 48 68 ATTTTA_AAAAAT 033330-000003 50 57 ATTTTA_GAAAAA 033330-200000 46 49 ATTTTC_ATTTTT 033331-033333 44 48 ATTTTT_AAAAAT 033333-000003 49 56 ATTTTT_AAAATA 033333-000030 44 52 ATTTTT_AAATAT 033333-000303 45 48 ATTTTT_AAATTT 033333-000333 44 53 ATTTTT_ATAAAA 033333-030000 50 64 ATTTTT_ATATAA 033333-030300 44 51 ATTTTT_ATTTAT 033333-033303 43 45 ATTTTT_ATTTTT 033333-033333 49 72 ATTTTT_TAAAAA 033333-300000 46 49 ATTTTT_TAAAAT 033333-300003 50 57 ATTTTT_TATAAT 033333-303003 47 56 ATTTTT_TATTTT 033333-303333 45 56 ATTTTT_TTTATT 033333-333033 49 59 ATTTTT_TTTTAT 033333-333303 49 56 ATTTTT_TTTTTC 033333-333331 47 69 ATTTTT_TTTTTT 033333-333333 44 82 CAAAAA_AAAAAT 100000-000003 47 66 CAAAAA_ATTTTT 100000-033333 44 58 CAAAAA_TTTTTT 100000-333333 44 85 CATTTT_TTTTAT 103333-333303 45 47 CTTTTT_AAAATA 133333-000030 43 52 CTTTTT_ATAAAA 133333-030000 43 53 CTTTTT_TATAAT 133333-303003 45 51 CTTTTT_TTATAA 133333-330300 54 61 CTTTTT_TTTTCA 133333-333310 43 52 GAAAAA_AAAAAA 200000-000000 47 90 GAAAAA_AAAAGA 200000-000020 46 59 GAAAAA_AAAGAA 200000-000200 45 57 GAAAAA_AATAAA 200000-003000 45 54 GAAAAA_ATAAAA 200000-030000 44 54 GAAAAA_ATTTTT 200000-033333 59 80 GAAAAA_CTTTTT 200000-133333 46 68 GAAAAA_TTTTAT 200000-333303 47 57 GAAAAA_TTTTCA 200000-333310 48 56 GAAAAA_TTTTCT 200000-333313 50 74 GAAAAA_TTTTTA 200000-333330 61 79 GAAAAA_TTTTTC 200000-333331 58 89 GAAAAA_TTTTTT 200000-333333 63 118 TAAAAA_AAAAAA 300000-000000 43 67 TAAAAA_AAAGAA 300000-000200 43 57 TAAAAA_AATTTT 300000-003333 44 57 TAAAAA_ATAAAA 300000-030000 48 56 TAAAAA_ATTTTT 300000-033333 66 83 TAAAAA_CTTTTT 300000-133333 49 64 TAAAAA_GTTTTT 300000-233333 43 51 TAAAAA_TATTTT 300000-303333 48 52 TAAAAA_TTATTT 300000-330333 44 48 TAAAAA_TTTTCT 300000-333313 48 65 TAAAAA_TTTTTC 300000-333331 67 91 TAAAAA_TTTTTT 300000-333333 68 117 TAAAAT_AAAAAA 300003-000000 44 62 TATAAA_ATTTTT 303000-033333 44 53 TATTAT_AAAAAA 303303-000000 43 62 TATTAT_ATAAAA 303303-030000 45 47 TATTAT_TAAAAA 303303-300000 45 49 TATTTT_AAAAAA 303333-000000 43 63 TATTTT_AAAAAG 303333-000002 45 54 TATTTT_AAAAAT 303333-000003 47 59 TATTTT_AAAATA 303333-000030 51 61 TATTTT_AATAAA 303333-003000 49 58 TATTTT_ATAAAA 303333-030000 46 55 TATTTT_ATTTTT 303333-033333 46 61 TATTTT_TTTATT 303333-333033 43 51 TCAAAA_ATTTTT 310000-033333 47 55 TCTTTT_ATAAAA 313333-030000 43 49 TCTTTT_TTTTTA 313333-333330 43 55 TCTTTT_TTTTTC 313333-333331 43 59 TCTTTT_TTTTTT 313333-333333 49 79 TGAAAA_AAAAAG 320000-000002 43 52 TGAAAA_ATTTTT 320000-033333 50 58 TGAAAA_TAAAAA 320000-300000 44 54 TGAAAA_TTTTTC 320000-333331 47 54 TGAAAA_TTTTTT 320000-333333 50 80 TGTTTT_TTTTTT 323333-333333 45 69 TTAAAA_AAAAAT 330000-000003 43 54 TTAAAA_ATTTTT 330000-033333 43 57 TTAAAT_TTTTTT 330003-333333 43 62 TTATAA_AAAAAG 330300-000002 48 55 TTATAA_ATTTTT 330300-033333 43 46 TTATAA_TAAAAA 330300-300000 50 52 TTATAT_AAAAAT 330303-000003 43 50 TTATAT_TTTATT 330303-333033 46 49 TTATTT_ATAAAA 330333-030000 48 54 TTATTT_ATTTTT 330333-033333 44 49 TTATTT_TAAAAA 330333-300000 43 50 TTATTT_TTTTTT 330333-333333 46 67 TTCAAA_ATTTTT 331000-033333 43 47 TTCATT_TTTTAT 331033-333303 43 45 TTCTTT_ATAAAA 331333-030000 52 56 TTCTTT_ATTTTT 331333-033333 46 56 TTCTTT_TATTTT 331333-303333 46 52 TTCTTT_TTTTAT 331333-333303 48 52 TTCTTT_TTTTTA 331333-333330 45 57 TTTAAA_AAAAAT 333000-000003 44 53 TTTAAA_TTTTTT 333000-333333 51 78 TTTATA_ATAAAA 333030-030000 44 47 TTTATA_ATTTTT 333030-033333 43 47 TTTATT_AAAAAT 333033-000003 43 51 TTTATT_AAAATA 333033-000030 52 58 TTTATT_AATAAA 333033-003000 44 46 TTTATT_ATAAAA 333033-030000 55 61 TTTATT_ATAATA 333033-030030 47 49 TTTATT_ATTTTT 333033-033333 45 52 TTTATT_TATTTT 333033-303333 45 52 TTTATT_TTTTAT 333033-333303 49 53 TTTATT_TTTTTT 333033-333333 48 70 TTTTAA_AAAAAA 333300-000000 44 65 TTTTAT_AAAAAA 333303-000000 55 96 TTTTAT_AAAAAG 333303-000002 47 55 TTTTAT_AAAAAT 333303-000003 47 60 TTTTAT_AAAAGA 333303-000020 47 53 TTTTAT_AAAATA 333303-000030 43 50 TTTTAT_AAAGAA 333303-000200 55 58 TTTTAT_AAATAA 333303-000300 47 53 TTTTAT_AATAAA 333303-003000 53 59 TTTTAT_ATAAAA 333303-030000 65 78 TTTTAT_ATAATA 333303-030030 53 56 TTTTAT_ATTTTT 333303-033333 48 64 TTTTAT_GAAAAA 333303-200000 45 56 TTTTAT_TAAAAA 333303-300000 51 70 TTTTAT_TATAAA 333303-303000 44 47 TTTTAT_TTATAA 333303-330300 43 46 TTTTAT_TTTATT 333303-333033 44 48 TTTTAT_TTTTAT 333303-333303 47 50 TTTTAT_TTTTTA 333303-333330 55 64 TTTTAT_TTTTTT 333303-333333 52 81 TTTTCA_AAAATA 333310-000030 43 48 TTTTCA_ATAAAA 333310-030000 45 48 TTTTCA_ATTTTT 333310-033333 44 49 TTTTCA_TTTTTT 333310-333333 47 68 TTTTCT_ATAAAA 333313-030000 43 51 TTTTCT_TATTTT 333313-303333 45 51 TTTTTA_AAAAAA 333330-000000 52 89 TTTTTA_AAAAAT 333330-000003 48 55 TTTTTA_AAAGAA 333330-000200 43 48 TTTTTA_AAATAA 333330-000300 43 50 TTTTTA_ATAAAA 333330-030000 54 73 TTTTTA_ATAATA 333330-030030 51 56 TTTTTA_ATTTTT 333330-033333 48 68 TTTTTA_CTTTTT 333330-133333 44 57 TTTTTA_TAAAAA 333330-300000 43 53 TTTTTA_TATAAT 333330-303003 52 60 TTTTTA_TATTTT 333330-303333 44 57 TTTTTA_TTATAA 333330-330300 54 56 TTTTTA_TTATTT 333330-330333 44 52 TTTTTA_TTTTAT 333330-333303 46 51 TTTTTA_TTTTCA 333330-333310 45 55 TTTTTA_TTTTTT 333330-333333 52 94 TTTTTC_ATAAAA 333331-030000 45 56 TTTTTC_ATTTTT 333331-033333 45 62 TTTTTC_TAAAAA 333331-300000 44 59 TTTTTC_TAAAAT 333331-300003 46 48 TTTTTC_TATTTT 333331-303333 43 57 TTTTTC_TTTTTT 333331-333333 47 75 TTTTTT_AAAAAA 333333-000000 46 113 TTTTTT_AAAAAT 333333-000003 47 78 TTTTTT_AAAATA 333333-000030 43 63 TTTTTT_AAATAA 333333-000300 44 66 TTTTTT_AATATT 333333-003033 46 54 TTTTTT_ATAAAA 333333-030000 57 96 TTTTTT_ATATTT 333333-030333 45 70 TTTTTT_ATTTTT 333333-033333 55 97 TTTTTT_CTTTTT 333333-133333 52 73 TTTTTT_GAAAAA 333333-200000 43 69 TTTTTT_TAAAAA 333333-300000 51 87 TTTTTT_TAAAAT 333333-300003 46 62 TTTTTT_TATAAT 333333-303003 52 80 TTTTTT_TATTTT 333333-303333 56 84 TTTTTT_TTTATT 333333-333033 55 81 TTTTTT_TTTTAT 333333-333303 53 86 TTTTTT_TTTTCA 333333-333310 44 65 TTTTTT_TTTTTA 333333-333330 45 72 TTTTTT_TTTTTC 333333-333331 48 90 TTTTTT_TTTTTT 333333-333333 53 137 Nb models: 307 User time : 130.94 sec. SMILEv1.47/Lanceur/param_delta0000644002404200237300000000170410066542220015712 0ustar lamaaoc00000000000000## In the following example, we use "deltas". It means that the model we ## look for must respect the following criteria: FASTA file fasta Output file delta Alphabet file alphabet Quorum 70 Total min length 5 Total max length 6 Total substitutions 0 Boxes 2 BOX 1 Min length 2 Max length 3 Substitutions 0 Min spacer length 2 Max spacer length 8 Delta 1 ## here we want a jump of d plus or minus delta between the 2 boxes #$ where d can take a value from 3 to 7. Here, the generated ranges will be: ## [2-4], [3-5], [4-6], [5-7], [6-8]. ## As many output files will be generated as the number of possible ## ranges. BOX 2 Min length 3 Max length 3 Substitutions 0 ## Here you can add the evaluation step, each output file will be evaluated. Shufflings 2 Size k-mer 1 SMILEv1.47/Lanceur/1box_deg0000644002404200237300000003035210066546004015136 0ustar lamaaoc00000000000000%%% 1 956/1062 196736 6 7 0 alphabet_deg ACGT$ =============================================================================== AARNRY 004*45 962 3518 ARRYNY 0445*5 983 3792 ARYNYR 045*54 986 3305 ARYNYY 045*55 962 3498 ARNRYY 04*455 979 3567 ARNYRY 04*545 975 3513 AYRANR 0540*4 963 3273 AYRRNA 0544*0 957 3266 AYRRNY 0544*5 965 3451 AYRYNR 0545*4 991 3563 AYRYNY 0545*5 975 3680 AYRNRA 054*40 958 3271 AYRNRY 054*45 967 3588 AYRNYR 054*54 1002 3621 AYRNYY 054*55 985 3543 AYYRNR 0554*4 983 3313 AYYYNY 0555*5 999 4205 AYYNYR 055*54 997 3518 AYYNYY 055*55 985 4175 AYNRYR 05*454 993 3724 AYNYRR 05*544 964 3240 AYNYRY 05*545 960 3399 AYNYYR 05*554 984 3415 AYNYYY 05*555 1002 4486 ANARRY 0*0445 958 3141 ANARYR 0*0454 964 3295 ANRARY 0*4045 956 3198 ANRAYR 0*4054 957 3276 ANRRYY 0*4455 977 3517 ANRYRY 0*4545 979 3787 ANRYYR 0*4554 978 3310 ANRYYY 0*4555 985 3685 ANYRYR 0*5454 1012 3636 ANYYYY 0*5555 975 4391 TTYYNR 3355*4 973 3950 TTYYNY 3355*5 963 5138 TTYNRY 335*45 967 3310 TTYNYR 335*54 980 3756 TTYNYY 335*55 961 4968 TTNYRY 33*545 969 3285 TTNYYR 33*554 981 3438 TRYRNY 3454*5 999 3569 TRYYNR 3455*4 959 3227 TRYYNY 3455*5 991 4143 TRYNRY 345*45 998 3631 TRYNYR 345*54 969 3362 TRYNYY 345*55 983 4081 TRNRYY 34*455 968 3067 TRNYRY 34*545 984 3355 TRNYYY 34*555 973 3723 TYTYNR 3535*4 957 3745 TYTYNY 3535*5 964 4950 TYTNYR 353*54 958 3520 TYTNYY 353*55 959 4720 TYRYNR 3545*4 977 3744 TYRYNY 3545*5 1002 4282 TYRNRY 354*45 971 3433 TYRNYR 354*54 962 3375 TYRNYY 354*55 960 3851 TYYTNY 3553*5 970 4924 TYYRNR 3554*4 988 3994 TYYRNY 3554*5 995 4444 TYYYYR 355554 958 2940 TYYYNA 3555*0 956 3355 TYYYNT 3555*3 962 4810 TYYYNR 3555*4 1006 5131 TYYYNY 3555*5 1004 7220 TYYNTY 355*35 960 4951 TYYNRR 355*44 962 4154 TYYNRY 355*45 1014 4563 TYYNYA 355*50 958 3252 TYYNYT 355*53 967 4674 TYYNYR 355*54 1015 4971 TYYNYY 355*55 1005 7101 TYNTRY 35*345 963 3144 TYNTYR 35*354 960 3261 TYNTYY 35*355 958 4817 TYNRRY 35*445 959 3311 TYNRYR 35*454 991 3637 TYNRYY 35*455 984 4135 TYNYAY 35*505 961 3095 TYNYRT 35*543 959 3264 TYNYRR 35*544 998 4166 TYNYRY 35*545 1018 4685 TYNYYT 35*553 959 4494 TYNYYR 35*554 1023 4709 TYNYYY 35*555 1004 6817 TNTRYY 3*3455 960 2882 TNTYRY 3*3545 961 3139 TNTYYR 3*3554 957 3328 TNRYRY 3*4545 991 3438 TNRYYY 3*4555 974 3779 TNYTYR 3*5354 960 3211 TNYTYY 3*5355 957 4829 TNYRRY 3*5445 989 3592 TNYRYR 3*5454 983 3774 TNYRYY 3*5455 1006 4421 TNYYRR 3*5544 992 3799 TNYYRY 3*5545 1005 4602 TNYYYR 3*5554 1017 4559 TNYYYY 3*5555 990 6761 RAANRY 400*45 980 3674 RAANYR 400*54 957 3320 RARYNY 4045*5 992 3892 RAYYNY 4055*5 964 3438 RAYNYY 405*55 962 3564 RANRYY 40*455 978 3715 RANYRY 40*545 986 3673 RTRYNY 4345*5 962 3514 RTYYNR 4355*4 964 3392 RTYYNY 4355*5 1005 4541 RTYNRY 435*45 956 3234 RTYNYR 435*54 1001 3657 RTYNYY 435*55 982 4326 RTNYRY 43*545 981 3576 RTNYYR 43*554 982 3529 RTNYYY 43*555 995 4479 RRAYNY 4405*5 980 3657 RRTYNY 4435*5 957 3377 RRYANA 4450*0 963 2930 RRYTNY 4453*5 966 3455 RRYYNT 4455*3 977 3555 RRYYNY 4455*5 1022 5215 RRYNTY 445*35 978 3724 RRYNYA 445*50 963 3058 RRYNYT 445*53 979 3598 RRYNYY 445*55 1025 5401 RRNYYY 44*555 989 5435 RYAYNR 4505*4 1013 3519 RYAYNY 4505*5 977 3638 RYANAR 450*04 957 3086 RYANRA 450*40 960 3154 RYANRY 450*45 985 3608 RYANYR 450*54 985 3270 RYANYY 450*55 968 3271 RYTTNY 4533*5 966 3381 RYTYNY 4535*5 1015 4759 RYTNYR 453*54 998 3611 RYTNYY 453*55 996 4440 RYRANA 4540*0 963 3164 RYRANY 4540*5 965 3443 RYRTNY 4543*5 983 3829 RYRYNA 4545*0 991 3439 RYRYNT 4545*3 980 3787 RYRYNY 4545*5 1028 5577 RYRNAA 454*00 971 3312 RYRNAY 454*05 982 3619 RYRNTY 454*35 988 3701 RYRNYA 454*50 984 3339 RYRNYT 454*53 976 3536 RYRNYY 454*55 1028 5290 RYYANR 4550*4 972 3228 RYYTNT 4553*3 958 3139 RYYTNY 4553*5 1007 4683 RYYRNA 4554*0 988 3580 RYYRNY 4554*5 1000 4763 RYYYYY 455555 996 4067 RYYYYNY 45555*5 999 4038 RYYYNA 4555*0 961 3192 RYYYNT 4555*3 1005 4559 RYYYNR 4555*4 1012 4846 RYYYNYY 4555*55 993 3991 RYYNTR 455*34 1008 3578 RYYNTY 455*35 1006 4663 RYYNRA 455*40 959 3298 RYYNRY 455*45 1011 4904 RYYNYA 455*50 990 3474 RYYNYT 455*53 995 4461 RYYNYR 455*54 1042 5282 RYYNYYY 455*555 990 3917 RYNAYR 45*054 982 3597 RYNTTY 45*335 980 3702 RYNTRY 45*345 985 3591 RYNTYR 45*354 976 3460 RYNTYY 45*355 1011 4921 RYNRAA 45*400 968 3345 RYNRYA 45*450 994 3590 RYNRYY 45*455 1004 4919 RYNYAR 45*504 966 3064 RYNYAY 45*505 970 3393 RYNYTT 45*533 963 3544 RYNYTR 45*534 969 3227 RYNYTY 45*535 1016 5007 RYNYRA 45*540 972 3408 RYNYRT 45*543 966 3571 RYNYRY 45*545 1025 5347 RYNYYA 45*550 974 3223 RYNYYT 45*553 1007 4775 RYNYYR 45*554 1028 5102 RYNYYYY 45*5555 983 4210 RNAARY 4*0045 967 3399 RNARYY 4*0455 967 3434 RNAYRY 4*0545 984 3820 RNAYYR 4*0554 974 3207 RNAYYY 4*0555 964 3475 RNTYRY 4*3545 965 3390 RNTYYR 4*3554 957 3423 RNTYYY 4*3555 982 4746 RNRAAY 4*4005 957 3398 RNRTYY 4*4355 980 3648 RNRYAY 4*4505 960 3530 RNRYTY 4*4535 992 3894 RNRYYT 4*4553 983 3794 RNRYYY 4*4555 1027 5502 RNYAYR 4*5054 1010 3642 RNYTYY 4*5355 991 4900 RNYRYA 4*5450 997 3549 RNYRYT 4*5453 957 3382 RNYRYY 4*5455 1002 5097 RNYYTY 4*5535 989 4913 RNYYRT 4*5543 966 3437 RNYYRY 4*5545 1023 5052 RNYYYT 4*5553 985 4677 RNYYYR 4*5554 1005 4884 RNYYYYY 4*55555 965 4296 YARANR 5040*4 966 3089 YARNRY 504*45 967 3234 YAYRNR 5054*4 988 3792 YAYRNY 5054*5 1015 3698 YAYYNY 5055*5 995 4211 YAYNRR 505*44 991 3395 YAYNRY 505*45 994 3617 YAYNYR 505*54 970 3367 YAYNYY 505*55 1000 4292 YANARR 50*044 957 3227 YANRAR 50*404 968 3285 YANRRY 50*445 1009 3608 YANRYR 50*454 973 3534 YANRYY 50*455 983 3280 YANYRY 50*545 980 3243 YANYYY 50*555 980 3867 YTTYNR 5335*4 959 3968 YTTYNY 5335*5 974 5311 YTTNYR 533*54 985 3745 YTTNYY 533*55 973 4900 YTRYNR 5345*4 984 3675 YTRYNY 5345*5 997 3915 YTRNRY 534*45 968 3099 YTRNYR 534*54 961 3205 YTYTNY 5353*5 964 5079 YTYRNR 5354*4 998 4048 YTYRNY 5354*5 990 4188 YTYYYR 535554 961 3042 YTYYNT 5355*3 974 4991 YTYYNR 5355*4 1014 5304 YTYYNY 5355*5 1005 7443 YTYNTR 535*34 969 3305 YTYNTY 535*35 971 4970 YTYNRR 535*44 959 4191 YTYNRY 535*45 1009 4513 YTYNYA 535*50 964 3330 YTYNYT 535*53 973 4697 YTYNYR 535*54 1028 5161 YTYNYY 535*55 1014 7118 YTNTRY 53*345 958 3052 YTNTYR 53*354 975 3324 YTNTYY 53*355 957 4805 YTNRYR 53*454 995 3517 YTNRYY 53*455 969 3858 YTNYTY 53*535 959 4783 YTNYRR 53*544 998 4130 YTNYRY 53*545 1010 4598 YTNYYA 53*550 960 3066 YTNYYT 53*553 956 4470 YTNYYR 53*554 1024 4849 YTNYYY 53*555 1002 6760 YRAANR 5400*4 971 3627 YRARNA 5404*0 963 3212 YRANAR 540*04 973 3314 YRANRA 540*40 961 3270 YRANRY 540*45 977 3481 YRTTNY 5433*5 959 3268 YRTRNY 5434*5 1011 3624 YRTYNT 5435*3 959 3250 YRTYNR 5435*4 973 3387 YRTYNY 5435*5 1002 4568 YRTNRY 543*45 983 3600 YRTNYR 543*54 981 3745 YRTNYY 543*55 1009 4592 YRRANA 5440*0 960 3336 YRRANY 5440*5 956 3462 YRRTNY 5443*5 974 3226 YRRYNT 5445*3 976 3307 YRRYNY 5445*5 1020 4868 YRRNAA 544*00 978 3399 YRRNAY 544*05 985 3542 YRRNYY 544*55 1005 4662 YRYANR 5450*4 999 3877 YRYANY 5450*5 999 3447 YRYTNT 5453*3 963 3258 YRYTNY 5453*5 1006 4565 YRYRNA 5454*0 984 3807 YRYRNT 5454*3 1013 3678 YRYRNY 5454*5 1042 5376 YRYYYY 545555 976 3858 YRYYYNY 54555*5 970 3742 YRYYNA 5455*0 958 3257 YRYYNT 5455*3 1009 4667 YRYYNR 5455*4 1020 5017 YRYYNYR 5455*54 962 3064 YRYYNYY 5455*55 962 3551 YRYNAR 545*04 994 3536 YRYNAY 545*05 987 3645 YRYNTR 545*34 973 3509 YRYNTY 545*35 998 4633 YRYNRA 545*40 990 3669 YRYNRT 545*43 992 3726 YRYNRY 545*45 1028 5368 YRYNYA 545*50 968 3395 YRYNYT 545*53 999 4619 YRYNYR 545*54 1025 5320 YRYNYYY 545*555 956 3714 YRNAAR 54*004 984 3739 YRNARA 54*040 959 3500 YRNARY 54*045 997 3612 YRNAYR 54*054 980 3776 YRNAYY 54*055 978 3297 YRNTTY 54*335 957 3297 YRNTRY 54*345 982 3294 YRNTYY 54*355 998 4497 YRNRAA 54*400 980 3772 YRNRAY 54*405 999 3658 YRNRYA 54*450 970 3405 YRNRYT 54*453 987 3346 YRNRYY 54*455 1025 4996 YRNYAR 54*504 964 3152 YRNYAY 54*505 991 3529 YRNYTY 54*535 994 4453 YRNYRA 54*540 966 3381 YRNYRT 54*543 1000 3590 YRNYRY 54*545 1039 5194 YRNYYT 54*553 1004 4384 YRNYYR 54*554 1008 4928 YRNYYYY 54*5555 959 3615 YYARNR 5504*4 960 3214 YYAYNR 5505*4 984 3494 YYAYNY 5505*5 994 4001 YYANRR 550*44 990 3701 YYANRY 550*45 976 3211 YYTTNY 5533*5 979 5217 YYTRNR 5534*4 988 3707 YYTRNY 5534*5 984 3863 YYTYYR 553554 956 3103 YYTYNA 5535*0 966 3448 YYTYNT 5535*3 971 5011 YYTYNR 5535*4 1007 5268 YYTYNYR 5535*54 958 3149 YYTNTR 553*34 960 3241 YYTNTY 553*35 964 4912 YYTNRY 553*45 998 4216 YYTNYA 553*50 969 3344 YYTNYT 553*53 963 4712 YYTNYR 553*54 1019 5084 YYTNYYR 553*554 961 2963 YYRANR 5540*4 985 3679 YYRTNR 5543*4 988 3691 YYRTNY 5543*5 1013 4487 YYRRNA 5544*0 992 3763 YYRRNY 5544*5 990 4525 YYRYRY 554545 964 2799 YYRYYNY 55455*5 959 3649 YYRYNA 5545*0 993 3741 YYRYNT 5545*3 998 4336 YYRYNR 5545*4 1032 5506 YYRYNYY 5545*55 967 3568 YYRNAR 554*04 998 4090 YYRNAY 554*05 989 3462 YYRNTY 554*35 970 4084 YYRNRA 554*40 998 4164 YYRNRT 554*43 965 3232 YYRNRY 554*45 1027 4982 YYRNYA 554*50 964 3340 YYRNYT 554*53 959 3954 YYRNYR 554*54 1021 5073 YYRNYY 554*55 1017 5896 YYYANR 5550*4 993 3679 YYYANY 5550*5 979 3703 YYYTNA 5553*0 960 3270 YYYTNT 5553*3 961 4943 YYYTNR 5553*4 996 4815 YYYTNY 5553*5 1009 7356 YYYRYR 555454 968 2981 YYYRYY 555455 967 3694 YYYRYNR 55545*4 973 3018 YYYRNA 5554*0 998 3965 YYYRNT 5554*3 975 4008 YYYRNR 5554*4 1037 5687 YYYRNY 5554*5 1026 6173 YYYYRY 555545 986 3939 YYYYRNR 55554*4 958 3325 YYYYRNY 55554*5 960 3634 YYYYYR 555554 1018 4239 YYYYYNR 55555*4 984 4506 YYYYYNY 55555*5 969 6624 YYYYNA 5555*0 1010 4735 YYYYNT 5555*3 1006 7298 YYYYNRY 5555*45 993 3825 YYYYNYR 5555*54 1002 4399 YYYYNYY 5555*55 970 6433 YYYNAY 555*05 1004 4148 YYYNTT 555*33 966 5042 YYYNTR 555*34 1012 4623 YYYNTY 555*35 1007 7360 YYYNRA 555*40 958 3992 YYYNRT 555*43 1005 4257 YYYNRR 555*44 1005 5747 YYYNRYR 555*454 965 3000 YYYNYA 555*50 1020 4708 YYYNYT 555*53 1007 7049 YYYNYRR 555*544 967 3308 YYYNYRY 555*545 982 3896 YYYNYYR 555*554 1015 4216 YYYNYYY 555*555 971 6366 YYNAYR 55*054 993 3496 YYNTAY 55*305 962 2882 YYNTTY 55*335 965 5325 YYNTRR 55*344 990 3727 YYNTRY 55*345 1019 4506 YYNTYA 55*350 973 3200 YYNTYT 55*353 964 4870 YYNTYR 55*354 1026 4779 YYNTYY 55*355 1003 7337 YYNRAR 55*404 959 4149 YYNRTR 55*434 991 3381 YYNRTY 55*435 973 4120 YYNRRA 55*440 962 4109 YYNRRY 55*445 1007 4662 YYNRYA 55*450 999 3619 YYNRYT 55*453 959 4003 YYNRYR 55*454 1036 5328 YYNRYY 55*455 1015 6036 YYNYAT 55*503 969 3233 YYNYAR 55*504 986 3749 YYNYAY 55*505 1017 4477 YYNYTT 55*533 956 5032 YYNYTR 55*534 1011 4302 YYNYTY 55*535 1004 7324 YYNYRA 55*540 998 4047 YYNYRT 55*543 1011 4634 YYNYRR 55*544 1035 5821 YYNYRYR 55*5454 959 3105 YYNYRYY 55*5455 960 3618 YYNYYA 55*550 1022 4429 YYNYYT 55*553 1002 7000 YYNYYRY 55*5545 987 3890 YYNYYYR 55*5554 1002 4067 YYNYYYY 55*5555 975 6383 YNARAR 5*0404 958 3484 YNAYRR 5*0544 988 3685 YNAYRY 5*0545 994 3419 YNAYYY 5*0555 969 3745 YNTTYR 5*3354 970 3601 YNTTYY 5*3355 971 5493 YNTRRY 5*3445 975 3231 YNTRYR 5*3454 999 3859 YNTRYY 5*3455 1006 4256 YNTYAY 5*3505 962 3170 YNTYTR 5*3534 965 3093 YNTYTY 5*3535 957 5204 YNTYRT 5*3543 964 3214 YNTYRR 5*3544 986 3685 YNTYRY 5*3545 1007 4585 YNTYYT 5*3553 970 5047 YNTYYR 5*3554 1015 4970 YNTYYYR 5*35554 959 3063 YNRAAR 5*4004 960 3916 YNRARA 5*4040 959 3676 YNRAYR 5*4054 974 3429 YNRTRY 5*4345 1010 3502 YNRTYR 5*4354 956 3291 YNRTYY 5*4355 978 4211 YNRRAA 5*4400 973 3894 YNRRAY 5*4405 963 3391 YNRRYA 5*4450 958 3257 YNRRYY 5*4455 984 4556 YNRYAR 5*4504 985 3608 YNRYAY 5*4505 1007 3613 YNRYTY 5*4535 981 4216 YNRYRA 5*4540 980 3772 YNRYRT 5*4543 992 3538 YNRYRY 5*4545 1042 5219 YNRYYT 5*4553 984 4105 YNRYYR 5*4554 1013 4999 YNRYYYY 5*45555 957 3508 YNYARR 5*5044 988 3713 YNYARY 5*5045 966 3097 YNYAYR 5*5054 981 3843 YNYAYY 5*5055 996 4053 YNYTTY 5*5335 966 5540 YNYTRR 5*5344 960 3263 YNYTRY 5*5345 1002 4298 YNYTYA 5*5350 963 3147 YNYTYT 5*5353 972 5163 YNYTYR 5*5354 1021 4809 YNYTYY 5*5355 1002 7623 YNYRAR 5*5404 998 4051 YNYRAY 5*5405 970 3400 YNYRTR 5*5434 990 3799 YNYRTY 5*5435 1002 4432 YNYRRA 5*5440 989 4005 YNYRRT 5*5443 984 3297 YNYRRY 5*5445 1029 5073 YNYRYA 5*5450 989 3767 YNYRYT 5*5453 997 4389 YNYRYR 5*5454 1029 5646 YNYRYYR 5*54554 964 2861 YNYYAT 5*5503 966 3239 YNYYAR 5*5504 970 3293 YNYYAY 5*5505 1004 4393 YNYYTT 5*5533 958 5257 YNYYTR 5*5534 1012 4333 YNYYTY 5*5535 999 7561 YNYYRA 5*5540 985 3656 YNYYRT 5*5543 1014 4686 YNYYRR 5*5544 1037 5403 YNYYRYR 5*55454 989 3204 YNYYYA 5*5550 1021 4336 YNYYYT 5*5553 1007 7279 YNYYYRY 5*55545 983 3876 YNYYYYR 5*55554 1009 4259 YNYYYYY 5*55555 964 6490 Nb models: 521 SMILEv1.47/Lanceur/param_1box0000644002404200237300000000153610066546175015512 0ustar lamaaoc00000000000000## In the following example, we want to extract models composed of a single box, ## using a degenerate alphabet A,C,G,T,R,Y,N (see file 'alphabet_deg'), ## with at most 2 R. ## Length of the model is between 6 and 8. No subst. allowed. Quorum 90%. FASTA file fasta Output file 1box_deg Alphabet file alphabet_deg Quorum 90 Total min length 6 Total max length 8 Total substitutions 0 Boxes 1 Composition in * 1 ## No more than one N for each model (* represents the wild card). ## As the wild card is in the model alphabet ('alphabet_deg' file), it's necessary ## to limit the number of Ns per model. Else, a lot of NNN... models without ## interest will be generated. Composition in AG 2 ## At most 2 R. Shufflings 10 Size k-mer 2 SMILEv1.47/Lanceur/param_example0000644002404200237300000000600310066542220016251 0ustar lamaaoc00000000000000## Annotated parameter file ## We want here to extract models composed of two boxes. Models have the ## following shape: XXXXXX___XXXXXX. The first box is of min length 6 and ## the second too. ## We want models occurrences to appear in at least 5% of the given sequences, ## with at most 1 substitution in one of the two boxes. The jump between these ## boxes has to be of 3 to 6 nucleotides. EXTRACTION CRITERIAS ========================================================== FASTA file fasta ## name of the FASTA file containing the ## sequences Output file example ## results file for extraction Alphabet file alphabet ## file containing the alphabet to use for ## generating the models ## Characteristics of the models to extract Quorum 4 ## minimum percentage of sequences where ## the model must appear Total min length 12 ## min length of the whole model Total max length 0 ## (0 for infinity) Total substitutions 1 ## max nb of substitutions for whole model Boxes 2 ## nb of boxes of the model (no limit) ## Characteristic for Box 1 BOX 1 ================ Min length 6 ## min length of box 1 Max length 0 ## max length for box 1 (0 for infinity) Substitutions 1 ## max nb of subst. for box 1 Min spacer length 17 ## min length of spacer until next box Max spacer length 19 ## max length of spacer until next box BOX 2 ================ Min length 6 ## min length of box 2 Max length 0 ## max length for box 2 (0 for infinity) Substitutions 1 ## max nb of subst. for box 2 ## Here is the second part of the evaluation of the models found before. ## We can choose two different methods: ## 1) Shuffling: generating random sequences having the same k-mer composition ## than original sequences, and computing statistical values with difference ## of frequencies observed. ## The file 'example.shuffle' is generated. ## 2) Against: compare frequencies in the original sequences with frequencies ## in other sequences called "wrong" (where the model is known to be absent). ## The file 'example.against' is generated. EVALUATION ==================================== ## Here we choose the first method. Shufflings 20 ## number of shufflings to do Size k-mer 3 ## size of the k-mer to conserve when ## shuffling ## The following parameter files show some other options of SMILE: ## - param_1box shows the case of a 1-box extraction with a degenerate ## alphabet, ## - param_against shows how to use the second method of evaluation, ## - param_delta shows how to use the "deltas". SMILEv1.47/Lanceur/example.shuffle0000644002404200237300000004207110066546131016536 0ustar lamaaoc00000000000000STATISTICS ON THE NUMBER OF SEQUENCES HAVING AT LEAST ONE OCCURRENCE Model %right #right %shfl. #shfl. Var. Chi2 Z-score ================================================================================= ATTGAC_TATAAT 4.43% 47 0.43% 4.56 2.56 35.80 16.55 AGAAAA_TTTTTC 5.18% 55 2.47% 26.20 3.87 10.62 7.44 CTTTTT_TTATAA 5.08% 54 2.20% 23.33 4.81 12.62 6.38 CTTTTT_TATAAT 4.24% 45 1.84% 19.55 4.20 10.35 6.07 TTTTAT_ATAATA 4.99% 53 2.44% 25.89 4.73 9.68 5.73 TTTATT_ATAATA 4.43% 47 2.00% 21.27 4.74 10.02 5.43 TTTTTA_ATAATA 4.80% 51 2.54% 26.99 4.54 7.67 5.28 TTATAA_AAAAAG 4.52% 48 2.16% 22.92 4.85 9.18 5.17 GAAAAA_TTTTTC 5.46% 58 2.85% 30.32 5.39 9.05 5.14 ATAAAA_AATGAA 4.61% 49 2.24% 23.79 4.92 9.04 5.13 TTTTAT_AAAGAA 5.18% 55 2.78% 29.53 5.12 7.99 4.97 TAAAAA_TTTTTC 6.31% 67 3.40% 36.06 6.32 9.76 4.90 ATTATA_TTTAAA 4.05% 43 2.02% 21.46 4.42 7.42 4.88 AAGAAA_TTTTTT 5.74% 61 3.39% 36.03 5.27 6.73 4.74 TTCTTT_ATAAAA 4.90% 52 2.73% 29.00 4.96 6.79 4.63 TTTTTA_TATAAT 4.90% 52 2.54% 26.93 5.44 8.27 4.60 ATTATA_TAAAAA 4.61% 49 2.54% 26.93 4.80 6.65 4.60 AAAAAT_TTTTCT 5.46% 58 3.22% 34.22 5.28 6.41 4.50 AAAAAA_TTTTTC 6.40% 68 3.83% 40.67 6.15 7.24 4.44 GAAAAA_TTTTCT 4.71% 50 2.52% 26.76 5.38 7.30 4.32 TTATAT_TTTATT 4.33% 46 2.52% 26.74 4.58 5.28 4.20 GAAAAA_TTTTTA 5.74% 61 3.42% 36.31 5.88 6.57 4.20 GAAAAA_CTTTTT 4.33% 46 2.57% 27.32 4.48 4.93 4.17 ATAAAA_AAAATG 4.52% 48 2.61% 27.74 4.87 5.62 4.16 GAAAAA_TTTTCA 4.52% 48 2.59% 27.46 4.97 5.80 4.13 AAAAAT_TTTTTC 6.31% 67 3.76% 39.88 6.59 7.25 4.11 AAAAGA_TTTTTC 4.14% 44 2.35% 24.99 4.64 5.41 4.10 TTTATT_ATAAAA 5.18% 55 3.40% 36.09 4.64 4.10 4.07 AGAAAA_ATTTTT 5.37% 57 3.29% 34.94 5.43 5.53 4.06 ATAAAA_AAAGAA 4.80% 51 2.81% 29.84 5.24 5.76 4.04 TTTTTA_TTATAA 5.08% 54 3.00% 31.81 5.52 5.98 4.02 AAAAAG_TTTTTC 4.24% 45 2.57% 27.30 4.42 4.49 4.01 TATTAT_ATAAAA 4.24% 45 2.45% 26.00 4.77 5.26 3.99 GAAAAA_AAAAGA 4.33% 46 2.46% 26.15 4.98 5.65 3.98 TTCATT_TTTTAT 4.05% 43 2.22% 23.60 4.88 5.83 3.98 TTTTTT_TATAAT 4.90% 52 2.96% 31.44 5.31 5.27 3.87 ATAAAA_TTTTTC 5.18% 55 3.28% 34.81 5.25 4.74 3.85 ATAAAA_TTTTTT 6.87% 73 4.54% 48.23 6.44 5.37 3.85 TGAAAA_TTTTTC 4.43% 47 2.60% 27.64 5.08 5.20 3.81 CTTTTT_TTTTCA 4.05% 43 2.47% 26.19 4.42 4.22 3.81 GAAAAA_ATTTTT 5.56% 59 3.70% 39.33 5.20 4.13 3.78 TTCTTT_TATTTT 4.33% 46 2.66% 28.22 4.71 4.41 3.77 TTTTAT_ATAAAA 6.12% 65 3.96% 42.03 6.26 5.19 3.67 AAAAAA_CTTTTT 5.37% 57 3.47% 36.81 5.53 4.55 3.65 TGAAAA_AAAAAG 4.05% 43 2.43% 25.77 4.72 4.46 3.65 ATTTTT_TATAAT 4.43% 47 2.74% 29.14 4.92 4.35 3.63 AAAAAA_TTTTCT 5.46% 58 3.42% 36.30 6.02 5.23 3.60 GAAAAA_TTTTTT 5.93% 63 3.86% 40.98 6.14 4.90 3.59 AGAAAA_TTTTTT 5.37% 57 3.52% 37.34 5.49 4.29 3.58 ATAAAA_TTTTCT 4.52% 48 2.85% 30.32 4.95 4.14 3.57 ATAAAA_TATAAT 4.05% 43 2.36% 25.04 5.08 4.90 3.54 GAAAAA_AAAGAA 4.24% 45 2.45% 26.07 5.37 5.22 3.53 AAAAAG_TTTTTT 5.37% 57 3.51% 37.26 5.67 4.33 3.48 AATAAA_ATATAA 4.33% 46 2.52% 26.76 5.53 5.27 3.48 ATTTTA_GAAAAA 4.33% 46 2.66% 28.24 5.11 4.40 3.47 AAAATA_AATAAA 5.08% 54 3.45% 36.60 5.10 3.49 3.41 TTCTTT_TTTTAT 4.52% 48 2.81% 29.81 5.42 4.41 3.36 ACAAAA_ATTTTT 4.52% 48 2.91% 30.89 5.12 3.85 3.34 TATTAT_TAAAAA 4.24% 45 2.55% 27.12 5.37 4.59 3.33 TTTTTC_TAAAAT 4.33% 46 2.73% 28.94 5.18 4.03 3.30 TTTTAT_AAAAGA 4.43% 47 2.89% 30.72 4.96 3.54 3.28 AAAATA_AAAGAA 4.05% 43 2.68% 28.46 4.45 3.06 3.27 AAAATA_ATGAAA 4.14% 44 2.61% 27.70 4.99 3.84 3.27 ACAAAA_TTTTTT 4.52% 48 3.04% 32.27 4.92 3.20 3.20 CATTTT_TTTTAT 4.24% 45 2.61% 27.73 5.50 4.25 3.14 AAAAAA_TTTCTT 5.08% 54 3.26% 34.60 6.18 4.43 3.14 TTATAA_TAAAAA 4.71% 50 2.96% 31.42 5.92 4.41 3.14 TTTTCT_TATTTT 4.24% 45 2.85% 30.26 4.81 2.99 3.06 TCTTTT_TTTTTC 4.05% 43 2.51% 26.64 5.50 3.97 2.98 AAAAAT_TTTTTA 6.40% 68 4.52% 48.02 6.74 3.64 2.97 TTTTAT_AATAAA 4.99% 53 3.46% 36.72 5.51 3.08 2.95 TCAAAA_ATTTTT 4.43% 47 3.01% 32.00 5.14 2.96 2.92 TAAAAA_CTTTTT 4.61% 49 3.11% 33.01 5.49 3.24 2.91 TTTTTT_CTTTTT 4.90% 52 3.49% 37.04 5.14 2.62 2.91 AAAAAG_TTTTTA 4.71% 50 3.19% 33.88 5.68 3.23 2.84 TTCTTT_TTTTTA 4.24% 45 2.82% 29.99 5.30 3.11 2.83 ATTATA_AAAAAA 4.33% 46 2.91% 30.92 5.48 3.07 2.75 TAAAAA_TTTTCT 4.52% 48 2.94% 31.17 6.13 3.72 2.74 AAAATA_AAAAGA 4.05% 43 2.69% 28.55 5.27 3.02 2.74 AAGAAA_TTTTTA 4.24% 45 2.81% 29.79 5.59 3.21 2.72 AAAAAT_TTTTCA 4.99% 53 3.48% 36.92 5.95 3.00 2.70 TGAAAA_ATTTTT 4.71% 50 3.43% 36.39 5.04 2.24 2.70 AAAAAA_TTTTGT 4.43% 47 3.04% 32.25 5.49 2.85 2.69 TAAAAA_TTTTTT 6.40% 68 4.63% 49.14 7.04 3.21 2.68 TAAAAA_ATTTTT 6.21% 66 4.60% 48.87 6.42 2.70 2.67 GAAAAA_AATAAA 4.24% 45 2.80% 29.78 5.76 3.21 2.64 AATATT_TTTTAT 4.43% 47 3.07% 32.59 5.49 2.71 2.63 TTTATT_AAAATA 4.90% 52 3.30% 35.02 6.50 3.45 2.61 AAATAA_TTTTTT 5.56% 59 4.02% 42.65 6.32 2.76 2.59 TTTATT_AATAAA 4.14% 44 2.89% 30.74 5.20 2.44 2.55 TTTTAT_AAAAAG 4.43% 47 3.21% 34.11 5.08 2.13 2.54 AAAAAA_TTTTTA 6.21% 66 4.63% 49.16 6.67 2.60 2.53 AAAAAA_TTTTTT 6.31% 67 4.78% 50.73 6.45 2.38 2.52 TCTTTT_ATAAAA 4.05% 43 2.79% 29.66 5.33 2.54 2.50 ATAAAA_AAAAGA 4.14% 44 2.80% 29.69 5.73 2.88 2.50 AATAAA_TTTTTC 4.05% 43 2.77% 29.46 5.44 2.62 2.49 TAAAAA_GTTTTT 4.05% 43 2.78% 29.48 5.45 2.61 2.48 TAAAAA_AAAGAA 4.05% 43 2.86% 30.41 5.13 2.24 2.45 AAAAAT_GAAAAT 4.14% 44 2.90% 30.80 5.40 2.41 2.45 AAAAAT_ATAATA 4.05% 43 2.90% 30.85 4.97 2.07 2.44 AAGAAA_ATTTTT 4.43% 47 3.16% 33.54 5.52 2.34 2.44 AAAAAA_TTATTT 5.18% 55 3.84% 40.80 5.86 2.20 2.42 CAAAAA_AAAAAT 4.43% 47 3.18% 33.82 5.46 2.23 2.41 TTTTCA_ATAAAA 4.24% 45 3.11% 33.01 5.02 1.91 2.39 ATAAAA_ATTTTT 5.84% 62 4.47% 47.52 6.10 2.02 2.37 AAAGAA_TTTTTT 4.52% 48 3.27% 34.72 5.61 2.22 2.37 AATAAA_TTTTTT 5.27% 56 3.94% 41.79 6.02 2.16 2.36 TCTTTT_TTTTTA 4.05% 43 2.90% 30.78 5.23 2.10 2.34 AAAAAT_TTTTGA 4.24% 45 2.98% 31.66 5.73 2.41 2.33 TATTTT_AATAAA 4.61% 49 3.44% 36.54 5.38 1.89 2.32 AGAAAA_TTTTTA 4.14% 44 2.96% 31.45 5.46 2.16 2.30 TTTTCT_ATAAAA 4.05% 43 2.92% 31.04 5.22 2.00 2.29 TATTAT_AAAAAA 4.05% 43 2.97% 31.51 5.04 1.84 2.28 ATTTTC_ATTTTT 4.14% 44 2.95% 31.29 5.61 2.22 2.27 TTTTCA_AAAATA 4.05% 43 2.92% 30.96 5.34 2.03 2.26 GAAAAA_TTTTAT 4.43% 47 3.21% 34.12 5.74 2.13 2.24 TTTTAT_TTATAA 4.05% 43 2.80% 29.75 5.92 2.50 2.24 AATAAA_TATTTT 4.43% 47 3.21% 34.05 5.81 2.15 2.23 TTCAAA_ATTTTT 4.05% 43 2.83% 30.10 5.83 2.36 2.21 TATTTT_AAAAAG 4.24% 45 3.12% 33.09 5.43 1.89 2.19 AAAAAA_ATTTTT 6.40% 68 4.90% 52.05 7.31 2.25 2.18 TTTTTA_CTTTTT 4.14% 44 3.10% 32.89 5.09 1.67 2.18 AAAATA_GAAAAA 4.24% 45 3.17% 33.64 5.24 1.70 2.17 AAAAAG_TAAAAA 4.14% 44 3.00% 31.88 5.64 2.01 2.15 AAAAAA_AAAAGA 4.43% 47 3.28% 34.88 5.69 1.87 2.13 CTTTTT_AAAATA 4.05% 43 2.97% 31.56 5.41 1.82 2.11 TGAAAA_TTTTTT 4.71% 50 3.65% 38.74 5.34 1.49 2.11 ATAAAA_AATATT 4.14% 44 3.04% 32.28 5.56 1.87 2.11 TTTTTA_AAAGAA 4.05% 43 2.96% 31.40 5.51 1.87 2.11 ATAAAA_AATAAA 4.61% 49 3.40% 36.15 6.15 2.02 2.09 TTATTT_ATAAAA 4.52% 48 3.42% 36.31 5.61 1.69 2.08 ATAAAA_TTATTT 4.33% 46 3.31% 35.19 5.20 1.50 2.08 AAACAA_TTTTTT 4.05% 43 2.92% 30.98 5.82 2.02 2.07 TTTATT_TTTTAT 4.61% 49 3.47% 36.83 5.98 1.80 2.04 TTTTAT_TTTTTA 5.18% 55 4.08% 43.38 5.72 1.44 2.03 TTCTTT_ATTTTT 4.33% 46 3.22% 34.22 5.88 1.80 2.00 CTTTTT_ATAAAA 4.05% 43 3.07% 32.57 5.22 1.49 2.00 AAATAA_TTTTAT 4.33% 46 3.32% 35.25 5.39 1.48 1.99 TCTTTT_TTTTTT 4.61% 49 3.45% 36.61 6.25 1.87 1.98 AAAAAT_GAAAAA 4.71% 50 3.72% 39.55 5.28 1.27 1.98 TTTAAA_TTTTTT 4.80% 51 3.86% 40.97 5.08 1.14 1.97 AATAAA_AATTTT 4.24% 45 3.23% 34.25 5.49 1.51 1.96 TGTTTT_TTTTTT 4.24% 45 3.22% 34.18 5.53 1.54 1.96 TTAAAT_TTTTTT 4.05% 43 3.16% 33.54 4.84 1.21 1.95 ATTTTA_AAAAAT 4.71% 50 3.58% 38.01 6.24 1.70 1.92 ATAAAA_TTTTCA 4.14% 44 3.02% 32.09 6.25 1.93 1.91 AAAAAA_GTTTTT 4.14% 44 3.23% 34.28 5.17 1.25 1.88 AAAATT_TTTATT 4.05% 43 3.06% 32.46 5.62 1.53 1.88 ATTTTT_TAAAAT 4.71% 50 3.66% 38.91 5.92 1.44 1.87 TTTTTT_TTTATT 5.18% 55 4.10% 43.53 6.14 1.40 1.87 AAAATA_TTTTTA 5.27% 56 4.07% 43.24 6.83 1.72 1.87 AAAAAA_TTCTTT 4.14% 44 3.20% 33.98 5.40 1.34 1.86 TTTTAT_AAATAA 4.43% 47 3.43% 36.44 5.72 1.39 1.85 AAAATA_TTTTTC 4.14% 44 3.26% 34.62 5.10 1.16 1.84 GAAAAA_ATAAAA 4.14% 44 3.27% 34.77 5.20 1.12 1.78 TGAAAA_TAAAAA 4.14% 44 3.18% 33.82 5.78 1.38 1.76 TTTTTA_TTTTCA 4.24% 45 3.21% 34.10 6.21 1.56 1.76 CAAAAA_ATTTTT 4.14% 44 3.21% 34.13 5.65 1.29 1.75 AAAAAT_TGAAAA 4.43% 47 3.45% 36.68 5.91 1.32 1.75 AAAAAA_AAAACA 4.05% 43 3.10% 32.95 5.83 1.38 1.72 AAAGAA_AAAAAT 4.05% 43 3.16% 33.61 5.48 1.19 1.71 AAAAAC_TTTTTT 4.14% 44 3.25% 34.56 5.53 1.18 1.71 TATTTT_AAAATA 4.80% 51 3.88% 41.25 5.76 1.08 1.69 AAAAAT_TTTTTT 6.12% 65 5.01% 53.18 7.02 1.25 1.68 TTTTAT_TATAAA 4.14% 44 3.20% 34.01 5.94 1.33 1.68 AAAAAA_TTTTCA 4.61% 49 3.64% 38.64 6.46 1.28 1.60 AAAAAT_AATAAA 4.99% 53 3.99% 42.41 6.60 1.23 1.60 ATAAAA_ATATTT 4.43% 47 3.53% 37.45 6.00 1.12 1.59 TTTATT_TATTTT 4.24% 45 3.37% 35.75 5.92 1.10 1.56 ATTTTT_ATATAA 4.14% 44 3.40% 36.11 5.06 0.81 1.56 TTTTCA_TTTTTT 4.43% 47 3.64% 38.63 5.38 0.85 1.56 AAAAAG_AAAAAA 4.43% 47 3.45% 36.62 6.71 1.34 1.55 TTTATA_ATAAAA 4.14% 44 3.27% 34.70 6.05 1.14 1.54 AAATAA_TTTTTA 4.33% 46 3.46% 36.70 6.09 1.09 1.53 AAAAAA_TTTTTG 4.05% 43 3.29% 34.90 5.41 0.87 1.50 TTTTAT_GAAAAA 4.24% 45 3.43% 36.38 5.77 0.95 1.49 TTTTTC_TAAAAA 4.14% 44 3.44% 36.49 5.05 0.73 1.49 TTTTTC_ATAAAA 4.24% 45 3.44% 36.58 5.95 0.90 1.42 AAAAAT_CTTTTT 4.14% 44 3.34% 35.45 6.08 0.96 1.41 TTTTTA_ATAAAA 5.08% 54 4.23% 44.88 6.54 0.88 1.39 CAAAAA_TTTTTT 4.14% 44 3.42% 36.32 5.52 0.76 1.39 AAAATA_AAATAA 4.05% 43 3.36% 35.67 5.31 0.71 1.38 AAAAGA_TTTTTT 4.05% 43 3.32% 35.22 5.70 0.80 1.36 AAAATA_AAAATA 4.61% 49 3.78% 40.19 6.49 0.91 1.36 AATATT_AAAAAA 4.33% 46 3.59% 38.10 5.90 0.77 1.34 GAAAAA_AAAAAA 4.43% 47 3.71% 39.43 5.67 0.69 1.34 TTTTTC_TTTTTT 4.43% 47 3.76% 39.96 5.32 0.59 1.32 AAAATA_TTTTTT 5.37% 57 4.49% 47.70 7.05 0.87 1.32 ATAAAA_TTTTAT 4.71% 50 3.84% 40.79 7.00 0.98 1.32 TTATAT_AAAAAT 4.05% 43 3.38% 35.90 5.40 0.66 1.32 AAAAAG_AAAAAT 4.14% 44 3.36% 35.71 6.31 0.90 1.31 ATTTTT_TTTTTC 4.43% 47 3.79% 40.23 5.19 0.55 1.30 AAATAT_TTTTAT 4.33% 46 3.56% 37.86 6.35 0.82 1.28 TTTTTC_TATTTT 4.05% 43 3.29% 34.93 6.34 0.87 1.27 TTTTTT_AATATT 4.33% 46 3.64% 38.65 5.82 0.66 1.26 AAAAAA_TTTTAT 5.37% 57 4.51% 47.93 7.18 0.82 1.26 ATAAAT_ATTTTT 4.05% 43 3.32% 35.21 6.25 0.81 1.25 TTTTTT_TTTTTC 4.52% 48 3.80% 40.40 6.21 0.68 1.22 TTTTAT_TTTATT 4.14% 44 3.47% 36.80 5.91 0.67 1.22 ATTTTT_TTTATT 4.61% 49 3.96% 42.07 5.74 0.55 1.21 AAAAAT_ATTTTT 5.65% 60 5.00% 53.12 5.71 0.44 1.20 TATTTT_TTTATT 4.05% 43 3.40% 36.11 5.73 0.62 1.20 AAATAT_TTTTTA 4.33% 46 3.71% 39.41 5.48 0.53 1.20 AAAATA_AAAAAA 5.18% 55 4.45% 47.21 6.49 0.62 1.20 TAAAAA_TTATTT 4.14% 44 3.50% 37.15 5.75 0.60 1.19 ATATTT_AAAAAT 4.71% 50 4.06% 43.13 5.91 0.53 1.16 TTTTCA_ATTTTT 4.14% 44 3.56% 37.78 5.43 0.49 1.15 TTATAA_ATTTTT 4.05% 43 3.37% 35.77 6.35 0.69 1.14 AATAAA_ATTTTT 4.43% 47 3.82% 40.54 5.69 0.50 1.14 ATAAAA_AATTTT 4.33% 46 3.72% 39.53 5.83 0.51 1.11 TTTTTT_TATTTT 5.27% 56 4.56% 48.43 6.83 0.58 1.11 TTTTTA_TTATTT 4.14% 44 3.55% 37.68 5.93 0.51 1.07 ATAAAA_TTTAAA 4.05% 43 3.40% 36.10 6.56 0.63 1.05 AAAAAA_TGAAAA 4.14% 44 3.62% 38.48 5.35 0.38 1.03 TATAAA_ATTTTT 4.14% 44 3.62% 38.40 5.95 0.40 0.94 TTTTTT_ATAAAA 5.37% 57 4.78% 50.75 6.65 0.38 0.94 ATAAAA_AAATAT 4.14% 44 3.65% 38.78 5.68 0.34 0.92 AAAAAT_ATAAAA 4.99% 53 4.44% 47.18 6.42 0.35 0.91 TTTTTC_ATTTTT 4.24% 45 3.72% 39.46 6.21 0.38 0.89 ATTTTT_ATTTAT 4.05% 43 3.58% 37.97 5.66 0.32 0.89 TTTTAT_TAAAAA 4.80% 51 4.28% 45.41 6.33 0.34 0.88 TTATTT_TAAAAA 4.05% 43 3.63% 38.55 5.06 0.25 0.88 ATTTTA_AAAAAA 4.52% 48 3.96% 42.02 6.87 0.41 0.87 TAAAAA_TATTTT 4.52% 48 4.01% 42.56 6.40 0.34 0.85 TTTTTA_AAATAA 4.05% 43 3.63% 38.50 5.44 0.26 0.83 TTTTTT_TTTTCA 4.14% 44 3.72% 39.55 5.52 0.25 0.81 TTTTTT_TAAAAT 4.33% 46 3.85% 40.87 6.40 0.32 0.80 AAAAAA_GAAAAA 4.24% 45 3.77% 40.03 6.44 0.30 0.77 AAAAAA_TATTTT 4.90% 52 4.43% 47.02 6.64 0.26 0.75 TTTTAT_TTTTAT 4.43% 47 3.98% 42.31 6.34 0.26 0.74 TAAAAA_ATAAAA 4.52% 48 4.08% 43.28 6.56 0.26 0.72 AAAAAA_TTTATT 4.24% 45 3.83% 40.67 6.10 0.23 0.71 TTTTAT_AAAAAA 5.18% 55 4.71% 50.00 7.19 0.25 0.69 TATTTT_ATAAAA 4.33% 46 3.94% 41.83 6.28 0.21 0.66 AAAAAT_AAATAA 4.33% 46 3.95% 41.94 6.17 0.20 0.66 AAAAAT_TTTTAT 4.80% 51 4.39% 46.64 6.79 0.20 0.64 TTTATT_TTTTTT 4.52% 48 4.10% 43.55 7.11 0.23 0.63 AAAAAA_TTTAAA 4.24% 45 3.90% 41.44 5.93 0.15 0.60 TTTTTT_TTTTAT 4.99% 53 4.61% 48.98 6.78 0.17 0.59 AATAAA_AAAAAT 4.33% 46 3.98% 42.28 6.34 0.16 0.59 TTTTTA_TTTTTT 4.90% 52 4.53% 48.14 6.59 0.16 0.59 AAAAAT_AAAAAA 5.27% 56 4.90% 52.09 6.87 0.15 0.57 TTTTTT_TTTTTT 4.99% 53 4.65% 49.33 6.59 0.14 0.56 TAAAAA_AATTTT 4.14% 44 3.82% 40.56 6.24 0.15 0.55 TTTAAA_AAAAAT 4.14% 44 3.85% 40.86 5.97 0.12 0.53 AAAAAT_TTTTAA 4.24% 45 3.90% 41.46 6.77 0.15 0.52 AATAAA_AAAAAA 4.33% 46 4.02% 42.65 6.49 0.13 0.52 AAAAAA_AATAAA 4.33% 46 4.05% 42.99 6.00 0.11 0.50 TAAAAT_AAAAAA 4.14% 44 3.86% 41.02 6.35 0.11 0.47 TTTTAT_TTTTTT 4.90% 52 4.61% 48.95 6.78 0.10 0.45 TTATTT_TTTTTT 4.33% 46 4.06% 43.14 6.37 0.10 0.45 AAATAA_ATTTTT 4.05% 43 3.81% 40.47 5.69 0.08 0.44 AAAATA_AAAAAT 4.61% 49 4.33% 45.94 6.95 0.10 0.44 TTTATA_ATTTTT 4.05% 43 3.84% 40.80 5.95 0.06 0.37 TTTTTT_ATTTTT 5.18% 55 4.96% 52.69 6.48 0.05 0.36 AAAAAA_TTTTAA 4.33% 46 4.15% 44.03 5.56 0.05 0.35 AAAAAT_AAAAAT 5.08% 54 4.89% 51.89 6.03 0.04 0.35 AAAATA_TTTTAT 4.05% 43 3.85% 40.90 6.12 0.05 0.34 AAAAAT_TAAAAA 4.71% 50 4.50% 47.80 6.67 0.05 0.33 TTTATT_ATTTTT 4.24% 45 4.03% 42.80 6.88 0.06 0.32 AAAAAA_TAAAAA 4.61% 49 4.46% 47.36 5.83 0.03 0.28 TTTATT_AAAAAT 4.05% 43 3.87% 41.06 6.93 0.05 0.28 TTTTTA_TTTTAT 4.33% 46 4.16% 44.14 6.68 0.04 0.28 TTTTTT_GAAAAA 4.05% 43 3.89% 41.30 6.21 0.04 0.27 TTAAAA_AAAAAT 4.05% 43 3.87% 41.10 6.97 0.04 0.27 TTTTTA_TATTTT 4.14% 44 4.01% 42.54 6.10 0.03 0.24 TTATTT_ATTTTT 4.14% 44 3.99% 42.36 6.92 0.03 0.24 TTTTTA_AAAAAA 4.90% 52 4.74% 50.33 7.20 0.03 0.23 TTTTAT_AAAATA 4.05% 43 3.93% 41.72 6.51 0.02 0.20 ATTTTT_ATAAAA 4.71% 50 4.60% 48.80 6.12 0.02 0.20 TTTTTT_AAATAA 4.14% 44 4.03% 42.80 6.13 0.02 0.20 ATTTTT_AAATAT 4.24% 45 4.12% 43.76 6.54 0.02 0.19 AAATTT_AAAAAT 4.24% 45 4.13% 43.88 6.07 0.01 0.18 ATAAAA_TAAAAA 4.14% 44 4.05% 43.02 6.28 0.01 0.16 AAAAAA_ATAAAA 4.52% 48 4.44% 47.20 6.52 0.01 0.12 TTAAAA_ATTTTT 4.05% 43 3.99% 42.36 6.26 0.00 0.10 AATTTT_AAAAAT 4.14% 44 4.11% 43.66 7.37 0.00 0.05 ATAAAA_AAAAAA 4.61% 49 4.60% 48.84 7.50 0.00 0.02 ATTTTT_TTTTAT 4.61% 49 4.60% 48.90 6.03 0.00 0.02 ATTTTT_AAATTT 4.14% 44 4.13% 43.91 6.32 0.00 0.01 TTTTAA_AAAAAA 4.14% 44 4.14% 43.96 6.38 0.00 0.01 TTTTTT_TAAAAA 4.80% 51 4.80% 51.02 6.39 0.00 -0.00 TTTTTT_ATATTT 4.24% 45 4.24% 45.07 6.86 0.00 -0.01 ATAAAA_AAAAAT 4.52% 48 4.54% 48.20 6.09 0.00 -0.03 AAAATA_TAAAAA 4.05% 43 4.08% 43.28 6.48 0.00 -0.04 TATTTT_ATTTTT 4.33% 46 4.40% 46.73 7.14 0.01 -0.10 TATTTT_AAAAAT 4.43% 47 4.50% 47.79 6.47 0.01 -0.12 TTTTTA_ATTTTT 4.52% 48 4.63% 49.16 5.87 0.01 -0.20 TTTTTA_AAAAAT 4.52% 48 4.65% 49.37 6.83 0.02 -0.20 AAATAT_AAAAAA 4.05% 43 4.20% 44.61 7.23 0.03 -0.22 AAAAAT_AAAATA 4.24% 45 4.39% 46.67 6.68 0.03 -0.25 TTTTAT_AAAAAT 4.43% 47 4.60% 48.88 6.41 0.04 -0.29 TTTTAT_ATTTTT 4.52% 48 4.68% 49.74 5.55 0.03 -0.31 AAAAAA_AAAAAA 4.33% 46 4.55% 48.35 7.17 0.06 -0.33 TTTTTA_TAAAAA 4.05% 43 4.32% 45.86 5.65 0.10 -0.51 ATTTTT_TATTTT 4.24% 45 4.54% 48.23 6.26 0.12 -0.52 ATTTTT_AAAATA 4.14% 44 4.46% 47.39 6.48 0.13 -0.52 TTTTTT_TTTTTA 4.24% 45 4.61% 48.94 6.93 0.17 -0.57 ATTTTT_ATTTTT 4.61% 49 4.98% 52.86 6.73 0.15 -0.57 ATTTTT_AAAAAT 4.61% 49 5.04% 53.49 6.73 0.21 -0.67 ATTTTT_TAAAAA 4.33% 46 4.76% 50.51 5.93 0.22 -0.76 TTTTTT_AAAATA 4.05% 43 4.52% 48.01 6.54 0.29 -0.77 TAAAAA_AAAAAA 4.05% 43 4.55% 48.35 6.57 0.33 -0.81 TATTTT_AAAAAA 4.05% 43 4.62% 49.06 6.92 0.42 -0.88 TTTTTT_AAAAAA 4.33% 46 5.00% 53.15 7.41 0.54 -0.96 TTTTTT_AAAAAT 4.43% 47 5.09% 54.02 6.87 0.51 -1.02 ATTTTT_TTTTTT 4.14% 44 4.98% 52.91 7.50 0.86 -1.19 User time : 518.57 sec. SMILEv1.47/Lanceur/DocFrancais/0000755002404200237300000000000010066547641015704 5ustar lamaaoc00000000000000SMILEv1.47/Lanceur/DocFrancais/param_contre0000644002404200237300000000220310066546062020272 0ustar lamaaoc00000000000000## Fichier d'exemple: evaluation contre un jeu de sequences "fausses" FASTA file fasta Output file contre Alphabet file alphabet Quorum 50 Total min length 8 Total max length 8 Total substitutions 1 Boxes 2 ## On veut que les modeles ne contiennent pas plus de 5 A Composition in A 5 BOX 1 Min length 4 Max length 4 Substitutions 1 Min spacer length 4 Max spacer length 8 ## ...dont au plus 2 dans le premier bloc Composition in A 2 BOX 2 Min length 4 Max length 4 Substitutions 1 ## Dans cet exemple, on utilise la methode d'evaluation 2 contre le fichier ## fasta 'fastawrong'. Against wrong sequences fastawrong ## Le fichier de stats genere est du meme type que pour le shuffling, excepte ## que les stats en sequence et totales sont sur la meme ligne. ## Un symbole '+' ou '-' indique pour chaque stat si le modele est sur ou ## sous represente. ## Seul le test du Khi2 est utilise, le Z-score n'ayant aucun sens ici ## (un seul jeu de donnees "fausses"). SMILEv1.47/Lanceur/DocFrancais/param_1bloc0000644002404200237300000000145610066546062020011 0ustar lamaaoc00000000000000##Dans cet exemple on cherche des modeles composes d'un seul bloc sur un ## alphabet degenere A,C,G,T,R,Y,N (voir fichier 'alphabet_deg'). FASTA file fasta Output file 1bloc_deg Alphabet file alphabet_deg Quorum 90 Total min length 6 Total max length 8 Total substitutions 0 Boxes 1 Composition in * 1 ## Pas plus d'un joker par modele (* represente le joker). ## Etant donne qu'on a mis le joker dans l'alphabet (fichier 'alphabet_deg'), ## il est necessaire de limiter la composition des modeles en '*'. ## Sinon, beaucoup de modeles sans grand interet vont etre generes. Size k-mer 2 Shufflings 10 SMILEv1.47/Lanceur/DocFrancais/param_delta0000644002404200237300000000214510066546062020076 0ustar lamaaoc00000000000000## Dans cet exemple on utilise les "Delta", c'est a dire que les modeles trouves ## doivent... FASTA file fasta Output file delta Alphabet file alphabet Quorum 70 Total min length 5 Total max length 6 Total substitutions 0 Boxes 2 BOX 1 Min length 2 Max length 3 Substitutions 0 ## ...non seulement avoir un saut de 2 a 8 nucleotides entre les 2 blocs... Min spacer length 2 Max spacer length 8 ## ...mais a l'interieur meme de cet intervalle on veut que les occurrences ## soient regroupees dans un intervalle [d-delta, d+delta] ou d appartient a ## l'intervalle 2-8. Autant de fichiers resultats seront generes qu'il ## y a de combinaisons possibles. Ici les intervalles de saut pour un modele ## sont donc: [2-4], [3-5], [4-6], [5-7], [6-8]. Delta 1 BOX 2 Min length 3 Max length 3 Substitutions 0 ## Si l'on veut evaluer les modeles de chaque fichier, on place ici ## les lignes d'evaluation habituelles (shuffling ou contre). SMILEv1.47/Lanceur/DocFrancais/NOUVEAU0000644002404200237300000000301010066547641016743 0ustar lamaaoc00000000000000Depuis la version 1.41, SMILE peut traiter des sequences ayant un alphabet quelconque, qu'il n'est pas besoin de preciser. Il faut par contre indiquer au programme sur quel alphabet on va generer les modeles et quelles sont les correspondances entre les symboles. Ces indications se trouvent dans un fichier (voir les exemples 'alphabet' et 'alphabet_deg'). Ce fichier contient: - un type (nucleotides, proteines, autres) qui permet de regrouper des symboles formant un groupe connu sous un autre symbole afin de rendre les resultats plus lisibles. (exemple: AG => R) - une suite de groupes de symboles, par exemple: AB C D ...indique que l'on va generer des modeles sur un alphabet a 3 symboles [AB], C, ou D. Le fichier alphabet a utiliser doit etre indique dans le fichier de parametres. Les versions 1.42 et 1.43 ont vus quelques bugs corriges. La version 1.44 considere les modeles valides d'une autre facon que precedemment. En effet, dans les versions precedentes, si un modele AAAA etait valide mais qu'un modele AAAAT l'etait aussi, ce modele AAAA n'apparaissait dans les resultats. Dorenavant tout modele valide apparait dans les resultats de l'extraction. Les versions 1.45 et 1.46 fixent des bugs. La version 1.47 corrige un bug important et ajoute la fonctionnalite 'palindrome'. En effet, on peut maintenant extraire des modeles dont on souhaite qu'une ou plusieurs des boites qui les composent soient des palindromes biologiques d'autres boites. SMILEv1.47/Lanceur/DocFrancais/LISEZ_MOI0000644002404200237300000001314310066546352017221 0ustar lamaaoc00000000000000Salut beta-testeur, la doc de SMILE n'est pas tres fournie, mais voici un rapide apprentissage par l'exemple. SMILE permet d'extraire des modeles structures ayant des caracteristiques precises d'un jeu de sequences donne, et d'evaluer la representativite des modeles trouves selon deux methodes. Le seul programme que tu utiliseras est "smile". On lui donne en parametre un fichier contenant les caracteristiques des modeles a extraire. Le fichier "param_exemple" est commenté et te donneras un apercu des caracteristiques de base. Les fichiers "param_1bloc", "param_contre" et "param_delta" montrent des options supplementaires. L'execution de SMILE se fait en deux phases independantes: - extraction des modeles repondant aux criteres donnes - evaluation de ces modeles sur des criteres statistiques, selon deux methodes au choix (shuffling et contre autres sequences). L'execution de "smile param_exemple" (voir le fichier param_exemple pour connaitre les criteres d'extraction) produit: 1 - le fichier "example.out" qui contient tous les modeles repondant aux criteres demandes. En voici un extrait: AAAAAA_TGAAAA 000000-320000 44 Seq 433 Pos 187 18 Seq 544 Pos 213 17 ... Seq 438 Pos 81 19 Seq 931 Pos 165 18 63 Le modeles AAAAAA_TGAAAA apparait dans 44 sequences en 63 positions. Puis, chaque position est indiquee. Les sequences et positions commencent a 0. Le dernier chiffre des lignes de position indique le saut retenu entre les deux blocs qui composent le modele. [Pour desactiver la gestion et l'affichage des positions, il suffit de recompiler les repertoires P_BLOCS et P_BLOCS+DELTA en desactivant les deux flags qui sont en tete des makefiles respectifs. Le flag NB_OCCS active ou non l'affichage du nombre total d'occurrences, et AFF_OCCS active ou non l'affichage de chacune des positions des occurrences.] 2 - le fichier "example.out.stat" qui contient les resultats de 100 shufflings ayant pour but de determiner si les modeles trouves sont representatifs ou non. On obtient: STATS SUR LE NOMBRE D'OCCURRENCES PAR SEQUENCE Modele %right #right %shfl. #shfl. Sigma Chi2 Z-score ======================================================================= ATTGAC_TATAAT 4.43% 47 0.49% 5.24 2.17 34.22 19.28 AGAAAA_TTTTTC 5.18% 55 1.30% 13.85 3.50 25.42 11.77 GAAAAA_TTTTTC 5.46% 58 1.44% 15.30 3.78 25.76 11.29 ... STATS SUR LE NOMBRE D'OCCURRENCES TOTAL Modele #right #shfl. Sigma Chi2 Z-score ======================================================================= ATTGAC_TATAAT 47 5.28 2.18 33.30 19.14 AGAAAA_TTTTTC 80 16.34 4.31 42.08 14.76 GAAAAA_TTTTTC 89 19.05 5.03 45.30 13.90 ... Les modeles sont tries selon le Z-score. Plus le Z-score est eleve, plus le modele est exceptionnellement represente dans la sequence originale. S'il est negatif, le modele est sous-represente. S'il vaut MAX_INT il n'a pu etre evalue (le motif n'a pas ete trouve dans les sequences shufflees par exemple). Le Chi2, indique egalement la representativite du modele, sans signe evidemment: un Chi2 fort indique un modele tres sur ou sous-represente. Il faut donc se fier au signe du Z-score ou aux pourcentages pour statuer sur le Chi2. Les colonnes "right" donnent les pourcentages et nombre d'occurrence du motif dans le fichier 'fasta'. Les colonnes "shfl" donnent les memes valeurs dans les sequences shufflees. La colonne "Sigma" indique l'ecart-type. Ici, ATTGAC_TATAAT semble etre nettement sur-represente dans les sequences du fichier 'fasta'. La meilleure tactique, quand on ne connait pas suffisament de criteres du modele qu'on souhaite extraire, consiste a tatonner en jouant sur les longueurs, quorum et erreurs, de facon a obtenir un nombre raisonnable de modeles (si l'extraction produit 0 modeles ou enormement de modeles, c'est que les criteres ne sont pas adaptes). Pendant cette etape de tatonnement, on desactive la deuxieme phase d'evaluation en retirant les lignes de la rubrique EVALUATION. Puis, une fois l'extraction correctement calibree, on peut lancer l'evaluation seule a partir de ces donnees. [Pour lancer une evaluation sans lancer l'extraction, lancer "smile -x " avec un fichier de parametres complet.] ["smile -g " affiche un fichier de parametres type pour l'extraction de modeles a blocs.] Si tu as besoin d'aide, de documentation ou references, ou bien que tu souhaites me signaler un bug: => lama -AT- prism.uvsq.fr A bientot, Laurent REFERENCES: ** Pour des details algorithmiques sur SMILE: [0] M.F. Sagot. "Spelling approximate repeated or common motifs using a suffix tree." In C.L. Lucchesi and A.V. Moura, editors, LATIN'98: Theoretical Informatics, Lecture Notes in Computer Sciences, 111-127. Springer-Verlag, 1998. [1] L. Marsan and M.-F. Sagot. "Extracting structured motifs using a suffix-tree - Algorithms and application to promoter consensus identification." Proceedings RECOMB'2000, Tokyo. ACM Press. ** Pour une revue des differents algorithmes d'extraction existants, et ** des applications de SMILE: [2] A. Vanet, L. Marsan, and M.-F. Sagot. "Promoter sequences and algorithmical methods for identifying them." Research in Microbiology 150 (1999): 779-799. [3] A. Vanet, L. Marsan, A. Labigne, and M.-F. Sagot. "Inferring regulatory elements from a whole genome. An analysis of the sigma 80 family of promoter signals." J. Mol. Biol. 297(2) (2000): 335-353. SMILEv1.47/Lanceur/DocFrancais/param_exemple0000644002404200237300000000627110066546062020450 0ustar lamaaoc00000000000000## Fichier de parametre commenté ## Ici on cherche a extraire un modele de la forme XXXXXX___XXXX apparaissant ## dans au moins 5% des sequences, avec une erreur autorisee dans un des deux ## blocs. Le saut entre les deux blocs est compris entre 3 et 6 bases. CARACTERISTIQUES GLOBALES ========================= FASTA file fasta ## nom du fichier FASTA utilise pour l'extraction Output file exemple ## fichier resultat Alphabet file alphabet ## fichier indiquant l'alphabet sur lequel ## on va construire les modeles ## Dans ce fichier alphabet, il suffit d'indiquer les groupes de symboles qu'on souhaite ## associer (voir aussi le fichier 'alpha'). ## Caracteristiques des modeles a extraire Quorum 4 ## pourcentage des sequences minimal dans lesquelles un ## modele doit apparaitre pour etre retenu Total min length 12 ## longueur minimale totale du modele Total max length 0 ## longueur maximale totale du modele (0 pour infinie) Total substitutions 1 ## nb total d'erreurs autorisees tous blocs confondus Boxes 2 ## nb de blocs d'un modele, pas de limitation BOX 1 ================ ## caracteristiques du bloc 1 Min length 6 ## longueur min du bloc 1 Max length 0 ## longueur max du bloc 1 (0 pour infinie) Substitutions 1 ## nb d'erreurs du bloc 1 Min spacer length 17 ## longueur du saut min vers le bloc suivant Max spacer length 19 ## longueur du saut max vers le bloc suivant BOX 2 ================ Min length 6 ## longueur min du bloc 2 Max length 0 ## longueur max du bloc 2 Substitutions 1 ## nb d'erreurs max du bloc 2 ## Ici la seconde partie d'evaluation des modeles trouves lors de l'extraction. ## On a le choix entre deux methodes: ## 1) Shuffling: generer des sequences shufflees ayant le meme biais de composition ## que la sequence FASTA originale et effectuer des calculs statistiques ## sur les differences de frequences observees. ## Le fichier exemple.shuffle sera genere. ## 2) Contre: comparer les frequences des modeles obtenus avec celles observees dans ## des sequences dites "fausses" dans laquelle on pense ne pas trouver ces ## modeles. ## Le fichier exemple.against sera genere. EVALUATION ==================================== ## Ici on choisit la methode 1 qui se lance ainsi: Shufflings 100 ## nombre de generations/comparaison avec ## sequences shufflees a effectuer Size k-mer 3 ## taille des k-uplets a conserver lors du ## shuffling ## Les fichiers suivants montrent d'autres fonctionnalites: ## - param_1bloc montre le cas simple de la recherche de modeles ## composes d'un seul bloc, avec un alphabet degenere, ## - param_contre montre comment utiliser la methode 2 d'evaluation, ## - param_delta montre comment utiliser les "deltas". SMILEv1.47/Lanceur/alphabet0000644002404200237300000000003110066547007015221 0ustar lamaaoc00000000000000Type:Nucleotides A C G T SMILEv1.47/Lanceur/alphabet_deg0000644002404200237300000000004110066546117016042 0ustar lamaaoc00000000000000Type:Nucleotides A C G T AG CT * SMILEv1.47/Lanceur/param_against0000644002404200237300000000265610066546204016264 0ustar lamaaoc00000000000000## In the following parameter file, we show how to use the "against" method ## of evaluation. FASTA file fasta Output file against Alphabet file alphabet Quorum 50 Total min length 8 Total max length 8 Total substitutions 1 Boxes 2 ## We want models with at most 4 As in the 2 boxes... Composition in A 4 ## You can write here several lines of composition. BOX 1 Min length 4 Max length 4 Substitutions 1 Min spacer length 4 Max spacer length 8 ## ...and at most 2 As in the first box. Composition in A 2 BOX 2 Min length 4 Max length 4 Substitutions 1 ## Now we use the second method for evaluation: we want to evaluate the ## models found during the first step by looking for them in a "wrong" file ## of sequences (where we do not expect to find them). These sequences are ## in the file 'fastawrong'. Against wrong sequences fastawrong ## The results file is the same than after a shuffling, except that ## statistics for sequences and for total number of occurrences are on ## the same line ('T' means 'statistics on total number of occurrences'). ## The only statistical measure we can apply is the Chi2. With this measure, ## we cannot know if the model is over or under-represented (no sign). ## So, a sign is written beside the Chi2 to indicate it. SMILEv1.47/Lanceur/param_palindrom0000644002404200237300000000202210066551707016612 0ustar lamaaoc00000000000000#Here is an example of an extraction using the new palindrom functionality. #We aim to extract 3 boxes models, of 4 to 6 bp length each, with one global #substitution allowed. The quorum is fixed to 50%. Each box is separated #by 2 to 8 bp. #We want the first and last boxes of each model to be palindroms. EXTRACTION (Step 1) ======================================================= FASTA file fasta Output file fasta.out GLOBAL PARAMETERS ============= Alphabet file alphabet Quorum 5 Total min length 12 Total max length 18 Total substitutions 0 Boxes 3 BOX 1 ================ Min length 4 Max length 6 Substitutions 0 Min spacer length 2 Max spacer length 8 BOX 2 ================ Min length 4 Max length 6 Substitutions 0 Min spacer length 2 Max spacer length 8 BOX 3 ================ Min length 4 Max length 6 Substitutions 0 Palindrom of box 1 #This line tells that the 3rd box should be #a palindrom of the first one. SMILEv1.47/Lanceur/READ_ME0000644002404200237300000001406310066546757014562 0ustar lamaaoc00000000000000Hi beta-tester, The SMILE documentation is pretty short, but here is a quick guide to learn how to use it. WHAT DOES SMILE DO? SMILE allows you to extract simple or structured models in an exact way (it's not a heuristic algorithm). You have to indicate a few criteria for the extraction (see param_example). After the extraction, the models found can be evaluated for statistical significance with two different statisticsl methods. HOW TO USE IT? The only program you'll use is 'smile'. You have to give it just one parameter. which is the name of the parameter file you wrote before and which contains the characteristics of the models you want to extract. The file 'param_example' is an example of such a parameter file that is annotated to let you get familiar with the required notions. SMILE's execution is made of two different steps: - models are extracted according to the given criteria - these models are evaluated for statistical significance. This can be made in two ways, depending on the data you have. If you only have a 'RIGHT' set of sequences, you can use the shuffle method to compare the frequencies of the models found with random sequences. If you have a 'RIGHT' and a 'WRONG' set of sequences, you can use the 'Against' method to compute the statistical significance values. The execution of 'smile param_example' (see the file to know the criteria used, this is an example of extraction of models with 2 boxes) gives: 1 - the file 'example.out' containing all the models that check the given criteria. Here is a small part of this file: AAAAAA_TGAAAA 000000-320000 44 Seq 433 Pos 187 18 Seq 544 Pos 213 17 ... Seq 438 Pos 81 19 Seq 931 Pos 165 18 63 The model AAAAAA_TGAAAA appears in 44 sequences and 63 different positions. Each position is given. Sequences and positions count start at ZERO. The last number for each occurrence line is the size of the spacer found between the two boxes of the occurrence. [To switch off the counting and printing of positions, you have to compile the P_BLOCS and P_BLOCS+DELTA directories after having set the flags of each 'makefile' to 0. The NB_OCCS flag switches the printing of the total number of occurrences found. The AFF_OCCS flag switches the printing of all these occurrences positions.] 2 - the file 'example.out.shuffle' contains the statistical results of the 100-time shuffling asked in the parameter's file. It allows you to know if models found in the first step are significant or not, according to the composition of the sequences in k-mers. Here is a small part of this file: STATISTICS ON THE NUMBER SEQUENCES HAVING AT LEAST ONE OCCURRENCE Model %right #right %shfl. #shfl. Sigma Chi2 Z-score ======================================================================= ATTGAC_TATAAT 4.43% 47 0.49% 5.24 2.17 34.22 19.28 AGAAAA_TTTTTC 5.18% 55 1.30% 13.85 3.50 25.42 11.77 GAAAAA_TTTTTC 5.46% 58 1.44% 15.30 3.78 25.76 11.29 ... STATISTICS ON THE TOTAL NUMBER OF OCCURRENCES Model #right #shfl. Sigma Chi2 Z-score ======================================================================= ATTGAC_TATAAT 47 5.28 2.18 33.30 19.14 AGAAAA_TTTTTC 80 16.34 4.31 42.08 14.76 GAAAAA_TTTTTC 89 19.05 5.03 45.30 13.90 ... Models are sorted according to the Z-score value. The higher the Z-score is, the more is the model over-represented in the original sequences. If it's negative, the model is under-represented. A MAX_INT value means that it couldn't be evaluated (which means that it hasn't been found in the shuffled sequences or has been found in all of them). The Chi2 indicates the statistical significance too (without sign, you have to read the corresponding Z-score to know if a model if over or under-represented). The "right" columns indicate percentage and number of occurrences for given motif in the 'fasta' file. The "shfl" columns give the same for the shuffled sequences. "Sigma" column gives the standard deviation. In this example, ATTGAC_TATAAT appears to be really over-represented in the 'fasta' file. SMILE requires a few criteria for the extraction. The best way to use it when you don't really know the objects you're looking for is to test different values of length of the model, quorum and maximum number of substitutions, to obtain a reasonable quantity of models. The algorithm is fast enough to allow this approach. When testing, you can switch off the second step of evaluation by omiting the evaluation lines in the parameter file. Then, when the extraction gives correct results (meaning, not too many or zero models), you can add evaluation lines in the file and launch SMILE with the '-x' option to avoid the extraction step already done. ["smile -g " prints a generic parameter file for models made of boxes.] If you need help, references or anything else: lama -AT- prism.uvsq.fr May this SMILE help you in your work... Laurent REFERENCES: ** For algorithmic details about SMILE or quotations: [0] M.F. Sagot. "Spelling approximate repeated or common motifs using a suffix tree." In C.L. Lucchesi and A.V. Moura, editors, LATIN'98: Theoretical Informatics, Lecture Notes in Computer Sciences, 111-127. Springer-Verlag, 1998. [1] L. Marsan and M.-F. Sagot. "Extracting structured motifs using a suffix-tree - Algorithms and application to promoter consensus identification." Proceedings RECOMB'2000, Tokyo. ACM Press. ** For a survey of extraction algorithms and some experiments: [2] A. Vanet, L. Marsan, and M.-F. Sagot. "Promoter sequences and algorithmical methods for identifying them." Research in Microbiology 150 (1999): 779-799. [3] A. Vanet, L. Marsan, A. Labigne, and M.-F. Sagot. "Inferring regulatory elements from a whole genome. An analysis of the sigma 80 family of promoter signals." J. Mol. Biol. 297(2) (2000): 335-353. SMILEv1.47/SigStat/0000755002404200237300000000000010142165463013506 5ustar lamaaoc00000000000000SMILEv1.47/SigStat/bin/0000755002404200237300000000000010114605452014252 5ustar lamaaoc00000000000000SMILEv1.47/SigStat/obj/0000755002404200237300000000000010114605452014254 5ustar lamaaoc00000000000000SMILEv1.47/SigStat/src/0000755002404200237300000000000010142176710014272 5ustar lamaaoc00000000000000SMILEv1.47/SigStat/src/io.c0000600002404200237300000001434110142165525015042 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include extern int numSAUT, numJOKER, nbSymbMod; /******************************************************************************/ /* lectureFichierRes */ /******************************************************************************/ Mot* lectureFichierRes(FILE *res, int nbseq, int *nbmodeles, int *maxlongmod) { char car, line[BUF], ret, modele[BUF], codes[BUF], *ptr; signed char tmp; int i, nbligneslues, j, l; Mot *modeles; /** Recherche du nb de modeles (en fin de fichier) **/ fseek(res, 0, SEEK_END); i=0; /* Je cherche le 2eme ':' en partant de la fin (premier = CPU) */ do { fseek(res, -2, SEEK_CUR); fscanf(res,"%c",&car); if(car == ':') i++; } while(i!=2); fscanf(res,"%d",nbmodeles); /* printf("Nb modeles = %d\n",*nbmodeles); */ if ( *nbmodeles <= 0 ) return NULL; /* Positionnement premier modele */ fseek(res, 320, SEEK_SET); modeles=(Mot *)calloc(*nbmodeles,sizeof(Mot)); if(modeles==NULL) fatalError("lectureFichierRes: cannot allocate 'modeles'"); /* LECTURE DES MODELES DANS LE FICHIER RESULTAT */ i=0; fgets(line, BUF, res); /* printf("J'ai lu la ligne : %s\n",line); */ do { #if !OCC ret = sscanf(line, "%s %s %d", modele, codes, &(modeles[i].nbseq_vrai)); #else ret = sscanf(line, "%s %s %d %d\n", modele, codes, &(modeles[i].nbseq_vrai), &(modeles[i].nboccex_vrai)); #endif /* Stockage du modele alphabetiques */ l = strlen(modele); if(l>*maxlongmod) *maxlongmod = l; if(!(modeles[i].mot=(char*)malloc((l+1)*sizeof(char)))) fatalError("lectureFichierRes: cannot allocate 'modeles[i]'"); strcpy(modeles[i].mot, modele); /* Lecture et stockage du modele numerique */ if(!(modeles[i].codes=(char*)malloc((strlen(codes)+1)*sizeof(char)))) fatalError("lectureFichierRes: cannot allocate 'modeles[i].codes'"); j=0; ptr = codes; while(*ptr != '\0') { if(*ptr == JOKERinterne) modeles[i].codes[j] = numJOKER; else if (*ptr == SAUTinterne) modeles[i].codes[j] = numSAUT; else { tmp = *ptr - SHIFTALPHA; if(tmp < 0 || tmp >= nbSymbMod) { fprintf(stderr,">> Error: Model '%s' corrupted (unknown symbol at position %d)\n", modele, j+1); exit(1); } modeles[i].codes[j] = tmp; } j++; ptr++; } modeles[i].codes[j] = -1; modeles[i].quorum_reel = (float) (modeles[i].nbseq_vrai)*100.0/nbseq; /* printf("pour le modele %s j'ai lu %d => %f\n",modeles[i].mot,modeles[i].nbseq_vrai,modeles[i].quorum_reel); */ if (ret == 4) fgets(line, BUF, res); else { nbligneslues = 0; do { fgets(line, BUF, res); nbligneslues++; } while(!strncmp(line, "Seq", 3)); /* printf("J'ai lu la ligne : %s\n",line); */ #if OCC if(nbligneslues==1) { fprintf(stderr, "Error: output file contains no occurrences number,\n"); fprintf(stderr, " altough statistics on total number of occurrences have been requested.\n"); return (NULL); } else sscanf(line, "%d\n", &(modeles[i].nboccex_vrai)); #endif if(nbligneslues != 1) fgets(line, BUF, res); } /* printf("J'ai lu : %s %d et %d\n",modeles[i].mot,modeles[i].nbseq_vrai,modeles[i].nboccex_vrai); */ /* printf("J'ai lu la ligne: '%s'\n",line); */ i++; } while( strncmp(line, "Nb models", 9) ); return(modeles); } /******************************************************************************/ /* openFile */ /******************************************************************************/ FILE *openFile(char *nom, char *mode) { FILE *f; f = fopen(nom,mode); if(f == NULL) { fprintf(stderr,"Impossible to open file '%s'.\n",nom); exit(1); } return f; } /******************************************************************************/ /* PrintCpuTime */ /******************************************************************************/ void printCpuTime(FILE *f) { float ust; struct tms tms; static float dust; times(&tms); ust = (float) tms.tms_utime; if (f==NULL) { dust = ust; } else { ust -= dust; printf("User time : %.2f sec.\n", ust / sysconf(_SC_CLK_TCK)); fprintf(f,"User time : %.2f sec.\n", ust / sysconf(_SC_CLK_TCK)); } } SMILEv1.47/SigStat/src/seqio.c0000644002404200237300000001066410142176454015572 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ /* (C) by "coward" from 1996-1999 */ /* modified and extended by Laurent Marsan 1999-2004. */ /* seqio.c: Reading and writing sequences on fasta or plain ascii format */ #include #include #include #include #include /**********************************************************/ /* readnextseq: read next sequence from file */ /* Fasta format, or plain ascii (single sequence) */ /* Skip first line if it starts with > */ /* Continue until *, /, EOF, or another > */ /* Convert all letters to uppercase, ignore all other */ /* characters. */ /* input: fp: file pointer */ /* output: seqname: sequence name (first 20 characters) */ /* seq: sequence string */ /* errmsg: string containing error message */ /* return value: sequence length if sequence found (>=0) */ /* -1: End Of File, <-1: error occurred */ int readnextseq(FILE *fp, char seqname[21], char **seq, int *maxseqlenalloc, char errmsg[80]) { int c, pos = 0; (*seq)[0] = '\0'; sprintf(seqname,NONAME); c = fgetc(fp); /* first character determine further processing */ /* printf("first character: %c\n",c); */ if (c == EOF) /* already at End Of File */ return -1; else if (c == '>') { /* fasta format */ c = fgetc(fp); ungetc(c,fp); if(c!='\n' && c!=EOF) fscanf(fp,"%20s",seqname); /* a name must follow the > */ /* printf("nameseq: %s\n",seqname); */ while ((c = fgetc(fp)) != '\n' && c != EOF); /* skip rest of line */ } else /* no initial > : plain ascii format */ ungetc(c,fp); /* main loop for reading sequence */ while ((c = fgetc(fp)) != '>' && c != '/' && c != '*' && c != EOF) if (isalpha(c)) /* ignore nonalphabetic characters */ { if (pos == *maxseqlenalloc) { *maxseqlenalloc += GRAINLENSEQ; *seq = (char *) realloc(*seq, sizeof(char)*(*maxseqlenalloc)); assert(*seq != NULL); } (*seq)[pos++] = toupper(c); /* all letters converted to upper case */ } if (c == '>') ungetc(c,fp); /* put > back for reading next sequence */ (*seq)[pos] = '\0'; /* nice end of string */ if (pos == 0 && c == EOF) { /* printf("Fini...\n"); */ return -1; /* still empty sequence at End Of File */ } else return pos; } /******************************************************/ /* seqoutput: write formatted sequence to file */ /* NB! seq is a sequence of indices, not letters! */ /* input: fp: output file pointer */ /* seq: sequence of indices */ /* seqlen: sequence length */ /* linelen: number of characters per line */ void seqoutput(char seq[], int seqlen, int linelen) { int i; for (i = 0; i < seqlen; i++) seq[i] = alpha[(int)seq[i]]; } SMILEv1.47/SigStat/src/euler.c0000644002404200237300000004025310142176350015556 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ /* (C) by "coward" from 1996-1999 */ /* modified and extended by Laurent Marsan 1999-2004. */ #include #include #include #include /******************************************************/ /* indexseq: convert it to a sequence of indices */ /* input: seq: sequence of upper-case letters */ /* seqlen: sequence length */ /* output: seq: sequence of indices */ void indexseq(signed char seq[], int seqlen, int letter[128]) { int i; /* second pass: convert letters to indices */ for (i = 0; i < seqlen; i++) { seq[i]=letter[(int)seq[i]]; if (seq[i] == -1) { fprintf(stderr,"unknown character\n"); exit(1); } } } /******************************************************/ /* kletcount: count k-lets in sequence of indices */ /* input: seq: sequence of indices (0,1,2,..,m-1) */ /* seqlen: sequence length */ /* k: length of the k-lets */ /* m: alphabet size = highest index + 1 */ /* output: count: alphabetically ordered k-let counts */ /* sufficient allocation assumed */ void kletcount(signed char seq[], int seqlen, int k, int m, int count[]) { int i, hash = 0, chop; chop = pow(m,k-1); /* number of (k-1)-lets, used to chop off most significant hash digit */ for (hash = 0; hash < chop*m; count[hash++] = 0); /* initialize count */ /* first k-1 letters: begin constructing hash, no k-lets to store yet */ for (i = 0; i < k-1; i++) { assert(seq[i] >= 0 && seq[i] < m); /* if not: non-alphabetic index */ hash = hash*m+seq[i]; } /* letter k-1 to end of sequence: update hash, store k-lets */ for (i = k-1; i < seqlen; i++) { assert(seq[i] >= 0 && seq[i] < m); /* if not: non-alphabetic index */ hash = (hash%chop)*m+seq[i]; /* chop%, shift*, add+ -> new hash */ count[hash]++; } } /******************************************************/ /* kletverify: check that k-let count in (shuffled) */ /* sequence corresponds to stored count. */ /* input: seq: sequence of indices (0,1,2,..,m-1) */ /* seqlen: sequence length */ /* k: length of the k-lets */ /* m: alphabet size = highest index + 1 */ /* count0: reference k-let counts */ /* output:count1: array for new k-let counts */ /* sufficient allocation assumed */ /* return value: 1 of OK, 0 if mismatch */ int kletverify(signed char seq[], int seqlen, int k, int m, int count0[], int count1[]) { int hash, nklets, errflag = 0; char klet[MAXORDER+1]; nklets = pow(m,k); kletcount(seq,seqlen,k,m,count1); for (hash = 0; hash < nklets; hash++) if (count1[hash] != count0[hash]) { fprintf(stderr,"Mismatch: %s: %i in orig, %i in shf\n", hash2str(hash,k,m,klet),count0[hash],count1[hash]); errflag = 1; } /* else */ /* { */ /* printf("%c%c\t",alpha[hash/4],alpha[hash%4]); */ /* printf("%d\t%f\n",count0[hash],(float)count0[hash]/seqlen); */ /* } */ return !errflag; } /******************************************************/ /* edgecount: count outgoing edges from */ /* each vertex, i.e. count (k-1)-lets */ /* input: k: length of the k-lets */ /* m: alphabet size */ /* last: hash key of last word (vertex) */ /* count: alphabetically ordered k-let counts */ /* output: vdeg: alph. ordered (k-1)-let counts */ /* = in/out-degree of each vertex */ /* sufficient allocation assumed */ /* return value: number of distinct (k-1)-lets */ /* (vertices) */ int edgecount(int k, int m, int last, int count[], int vdeg[]) { int i, j, nk1lets, sum, nver; nk1lets = pow(m,k-1); for (i = 0, nver = 0; i < nk1lets; i++) { for (j = m*i, sum = 0; j < m*(i+1); j++) sum += count[j]; vdeg[i] = sum; if (sum > 0) nver++; } if (vdeg[last] == 0) /* count last vertex if not already counted */ nver++; return nver; } /******************************************************/ /* kletoutput: output list of k-let counts */ /* input: fp: output file pointer */ /* k: length of the k-lets */ /* m: alphabet size = highest index + 1 */ /* count: alphabetically ordered k-let counts */ void kletoutput(FILE *fp, int k, int m, int count[]) { int hash, sum = 0, nklets; char klet[MAXORDER+1]; nklets = pow(m,k); fprintf(fp,"\n"); for (hash = 0; hash < nklets; hash++) { fprintf(fp,"%s %7i\n",hash2str(hash,k,m,klet),count[hash]); sum += count[hash]; } fprintf(fp,"\nSum: %i\n",sum); } /******************************************************/ /* hash2str: k-let letter string from hash index */ /* input: hash: hash index */ /* k: length of the k-let */ /* m: alphabet size */ /* output: klet: k-let string (assumed big enough) */ /* return value: klet corresponding to hash */ char *hash2str(int hash, int k, int m, char klet[]) { int i; for (i = k-1; i >= 0; i--) { klet[i] = alpha[hash%m]; hash /= m; } klet[k] = '\0'; return klet; } /******************************************************/ /* ind2hash: hash index from k-let index string */ /* input: klet: the k-let as string of indices */ /* k: length of the k-let */ /* m: alphabet size */ /* return value: hash index corresponding to klet */ int ind2hash(char klet[], int k, int m) { int i, hash = 0; for (i = 0; i < k; i++) hash = hash*m+klet[i]; return hash; } /******************************************************/ /* shuffle: make random shuffling given counts */ /* cyclic counts assumed (k-lets wrapping end/start) */ /* input: m: alphabet size */ /* k: length of the k-let */ /* nver: number of DISTINCT (k-1)-lets */ /* count: alphabetically ordered k-let counts */ /* CONTENT WILL BE DESTROYED! */ /* vdeg: alph. ordered (k-1)-let counts */ /* CONTENT WILL BE DESTROYED! */ /* first: hash key of first word (vertex) */ /* last: hash key of last word (vertex) */ /* lastedge: array to hold last edges (internal */ /* use). Required size: nver */ /* output: seq: random sequence (indices 0..m-1) */ /* where seq[0] is the k-th letter */ /* must be allocated to sequence length*/ /* plus k */ /* the first k-1 (invariable) letters */ /* are not assigned */ /* NB! For k=1, seq is assumed to contain a */ /* permuatation of the sequence */ void shuffle(int m, int k, int nver, int count[], int vdeg[], int first, int last, int lastedge[], char seq[]) { int nk1lets; nk1lets = pow(m,k-1); /* number of (k-1)-let combinations */ /* if (k == 1) */ /* { */ /* if (debugflag) */ /* fprintf(dlog,"simple letter shuffling\n"); */ /* monoshuffle(vdeg[0],seq); */ /* } */ /* else */ /* { */ /* construct random inbound spanning tree with last as root */ if (debugflag) fprintf(dlog,"constructing arborescence\n"); arborescence(m,nk1lets,nver,count,vdeg,first,last,lastedge); /* find a random Euler trail from first, using the lastedge last */ if (debugflag) fprintf(dlog,"\nmaking random trail\n"); randomtrail(m,k,count,vdeg,first,lastedge,seq); if (debugflag) fprintf(dlog,"OK!\n"); /* } */ } /******************************************************/ /* monoshuffle: do simple 1-let permutation of seq */ /* input: seqlen: sequence length */ /* input/output: seq: random sequence to shuffle */ void monoshuffle(int seqlen, char seq[]) { int i, j; char temp; for (i = 0; i < seqlen; i++) { /* choose a random position j>i to swap */ j = i + randomint(seqlen-i) - 1; temp = seq[i]; seq[i] = seq[j]; seq[j] = temp; } } /******************************************************/ /* arborescence: construct random inbound spanning */ /* tree of the directed Eulerian graph having the */ /* (k-1)-lets as vertices and the k-lets as edges, */ /* starting at root. */ /* NB! Eulerian graph <=> cyclic sequence */ /* <=> first (k-1)-let = last (k-1)-let */ /* input: m: alphabet size */ /* nk1lets: number of (k-1)-let combinations */ /* = dimension of vdeg and branch */ /* nver: number of vertices */ /* (distinct (k-1)-lets) */ /* count: alphabetically ordered k-let counts */ /* (all the edge multiplicities) */ /* vdeg: alph. ordered (k-1)-let counts */ /* (# edges in/out from each vertex) */ /* first: index (in vdeg) of start vertex */ /* root: index (in vdeg) of root */ /* output: branch: branch[i]=j represents a branch */ /* from vertex i away from the root */ /* corresponding to a following */ /* letter index j (0<=j 0) { /* choose a random edge from current vertex */ edgeno = randomint(vdeg[vertex]); /* among vdeg[vertex] edges */ if (debugflag) fprintf(dlog,"(%i/%i)",edgeno,vdeg[vertex]); if (vertex == first && first != root && edgeno == vdeg[vertex]) { /* special case: extra edge added from root to first */ if (debugflag) fprintf(dlog,"xtra"); vertex = root; continue; } edge = vertex; /* first possible edge: same index as vertex */ edgeno -= count[edge]; while (edgeno > 0) /* scan edges while counting down to zero */ { edge += nk1lets; edgeno -= count[edge]; } assert(edge < m*nk1lets); /* if fail: mismatch btw. count and vdeg! */ vertex = edge/m; /* first k-1 m-digits */ if (debugflag) fprintf(dlog,"=%i ",vertex); if (branch[vertex] == -1) /* not yet visited? */ { branch[vertex] = edge%m; /* store new branch (last m-digit) */ remain--; /* count it */ if (debugflag) fprintf(dlog,"[%i]",remain); } } /* end of random walk */ if (first != root) vdeg[root]--; /* remove extra edge added */ } /******************************************************/ /* randomtrail: construct random sequence by making */ /* a random Euler trail at the k-let graph, at first */ /* and using the edge in lastedge last for each vertex*/ /* input: m: alphabet size */ /* k: length of the k-let */ /* count: alphabetically ordered k-let counts */ /* (all the edge multiplicities) */ /* the branches in last must be removed*/ /* CONTENT WILL BE DESTROYED! */ /* vdeg: alph. ordered (k-1)-let counts */ /* (# edges in/out from each vertex) */ /* CONTENT WILL BE DESTROYED! */ /* first: vertex to start walking */ /* lastedge: the last edge to use for each vertex*/ /* indicating the next letter (0..m-1) */ /* special value m for last vertex */ /* output: seq: random sequence (indices 0..m-1) */ /* where seq[0] is the k-th letter */ /* the first (k-1)-let (invariable and */ /* equal to the last (k-1)-let) is */ /* NOT assigned */ /* must be allocated to sequence length*/ /* plus k */ void randomtrail(int m, int k, int count[], int vdeg[], int first, int lastedge[], char seq[]) { int nk1lets, vertex, nedges, letter, edge, edgeno, i; nk1lets = pow(m,k-1); assert(k >= 2); /* remove the last edges from the rest, stored in count */ for (i = 0; i < nk1lets; i++) if (lastedge[i] >= 0 && lastedge[i] < m) /* don't remove from unvisited vertices nor from last vertex */ { count[m*i+lastedge[i]]--; vdeg[i]--; } vertex = first; /* start walking here */ if (debugflag) fprintf(dlog,"%i ",vertex); /* construct random trail */ for (i = k-1;; i++) /* terminates by break statement */ { nedges = vdeg[vertex]; /* number of remaining non-terminal edges */ if (nedges == 0) { /* last edge */ if (debugflag) fprintf(dlog,"(*)"); letter = lastedge[vertex]; if (letter == m) /* special value for last vertex */ break; /* trail complete */ } else { /* not last edge */ /* choose a random edge from current vertex */ edgeno = randomint(nedges); if (debugflag) fprintf(dlog,"(%i/%i)",edgeno,vdeg[vertex]); edge = m*vertex; /* first possible edge */ edgeno -= count[edge]; while (edgeno > 0) /* scan edges while counting down to zero */ { if (debugflag) fprintf(dlog,"%i,%i ",edge,count[edge]); edge++; edgeno -= count[edge]; } letter = edge-m*vertex; assert(letter < m); /* if fail: mismatch count/vdeg! */ count[edge]--; /* this edge is now used */ vdeg[vertex]--; /* one less remaining */ } seq[i] = letter; vertex = (m*vertex)%nk1lets+letter; if (debugflag) fprintf(dlog,"=%i ",vertex); } } SMILEv1.47/SigStat/src/faux.c0000600002404200237300000002314710142165523015400 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ /******************************************************************************/ /* FAUX - Version P motifs avec grep+ */ /******************************************************************************/ #include extern char *nummod2str[127]; /******************************************************************************/ /* Prototypes prives */ /******************************************************************************/ void afficheKhi2(Mot *modeles, int nbmodeles, int nbseq_vrai, int nbseq_faux, long int nbsymbV, long int nbsymbF, FILE *out, int maxlongmod); /******************************************************************************/ /* MAIN */ /******************************************************************************/ int main(int argc, char **argv) { Mot *modeles; int nbseq_vrai, nbseq_faux, nbmodeles, step, j, maxlongmod=0; FILE *res, *outfile, *f; unsigned int nbocc, nboccex; long int nbsymbF=0; char line[BUF]; Arbre arbre; Criteres cr; #if OCC char *ptr; #endif initCriteres(&cr); if(argc!=4) { printf("Usage: ~ Fic.Fasta.Faux Fic.res Fic.sortie\n\n"); return 1; } printCpuTime(NULL); /** LECTURE DES PARAMETRES ****************************************************/ res = openFile(argv[2], "r"); outfile = openFile(argv[3], "w"); /******************************************************************************/ /**LECTURE DES RESULTATS DU PROGRAMME******************************************/ /* Lecture des criteres */ fgets(line, BUF, res); if(!chargeCriteres(&cr, line)) { fprintf(stderr, "File '%s' corrupted.\n",argv[2]); return(1); } if(valideCriteres(&cr) == FAUX) return 1; /******************************************************************************/ /* Creation de l'arbre suffixe */ initAlphabet(); creeArbreSuffixeFromFile(&arbre, argv[1], maxLongMod(cr), NULL); nbseq_faux = arbre.nbtxt; /******************************************************************************/ /* Chargement alphabet des modeles */ if(!(f=fopen(cr.ficalph,"r"))) fatalError("main: cannot open alphabet file\n"); chargeAlphaMod(f); fclose(f); /******************************************************************************/ /******************************************************************************/ /* Lecture des modeles */ nbseq_vrai = cr.nbtotseq; modeles = (Mot *) lectureFichierRes(res, nbseq_vrai, &nbmodeles, &maxlongmod); /* printf("nbmodeles %d\n",nbmodeles); */ fclose(res); if ( modeles == NULL ) { fprintf(stderr, "Warning: File '%s' is empty.\n", argv[2]); exit(1); } /**BOUCLE PRINCIPALE **********************************************************/ fprintf(stderr,"** Searching for occurrences of the %d models **\n", nbmodeles); if(nbmodeles>=100) { step = nbmodeles/100; barre(100); } else { step = nbmodeles; barre(nbmodeles); } for(j=0; j!=nbmodeles; j++) { /* printf("Je cherche le mot %s\n",modeles[j].mot); */ chercheMot(arbre,&cr, modeles[j].codes, &nbocc, &nboccex, NULL); /* printf("J'en ai trouve %d et %d\n",nbocc, nboccex); */ if(nbmodeles>=100) { if(j%step==0) barre(0); } else barre(0); modeles[j].nbseq_faux = nbocc; #if OCC modeles[j].nboccex_faux = nboccex; #endif } /******************************************************************************/ /* CALCULS STATISTIQUES */ /******************************************************************************/ fprintf(outfile, "Original sequences (%d seq) against '%s' (%d seq)\n", nbseq_vrai, argv[1], nbseq_faux); #if OCC nbsymbF = 0; for(j=0; j!=nbseq_faux; j++) { ptr = arbre.text[j]; while(*ptr != FINAL) { /* printf("%c\n",*ptr); */ ptr++; nbsymbF++; } } /* printf("%d\n",nbsymbF); */ #endif afficheKhi2(modeles, nbmodeles, nbseq_vrai, nbseq_faux, cr.nbsymb, nbsymbF, outfile, maxlongmod); /* Liberation de l'arbre suffixe */ libereArbreSuffixeFromFile(arbre); printCpuTime(outfile); fclose(outfile); free(modeles); return 0; } int comparModeles(Mot *a, Mot *b) { if(a->khi2 == b->khi2) return 0; if(a->khi2 > b->khi2) return -1; return 1; } /******************************************************************************/ /* espace */ /******************************************************************************/ void espace(FILE *f, int nb) { static char space[31]={' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ','\0'}; fprintf(f,"%s",space+30-nb); } /******************************************************************************/ /* afficheKhi2 */ /******************************************************************************/ void afficheKhi2( Mot *modeles, int nbmodeles, int nbseq_vrai, int nbseq_faux, long int nbsymbV, long int nbsymbF, FILE *out, int maxlongmod) { Mot m; float Pv, Pf, Av, Af, nP, nA, Fv=0, Ff=0, nbseqtot; int i; #if OCC float Pv_ex, Pf_ex, Av_ex, Af_ex, nP_ex, nA_ex, Fv_ex=0, Ff_ex=0; long int nbsymbtot; #endif for(i=0;i!=nbmodeles;i++) { m = modeles[i]; Pv = (float) m.nbseq_vrai; Fv = (float) Pv / nbseq_vrai; Pf = (float) m.nbseq_faux; Ff = (float) Pf / nbseq_faux; Av = (float) nbseq_vrai-Pv; Af = (float) nbseq_faux-Pf; nP = (float) Pv+Pf; nA = (float) Av+Af; nbseqtot = nbseq_vrai + nbseq_faux; #if DEBUG fprintf(out,"%s\n",modeles[i].mot); fprintf(out,"Pv %f\tFv %f\tPf %f\tFf %f\n",Pv,Fv,Pf,Ff); fprintf(out,"Av %f\tAf %f\tnP %f\tnA %f\n",Av,Af,nP,nA); fprintf(out,"Nbseq vrai %d\tNbseq faux %d\n",nbseq_vrai,nbseq_faux); #endif modeles[i].khi2 = nbseqtot * CARRE(Pv*Af-Pf*Av) / (nP*nA*nbseq_vrai*nbseq_faux); modeles[i].sign = Fv>Ff?'+':Fv==Ff?'=':'-'; #if OCC Pv_ex = (float) m.nboccex_vrai; Fv_ex = (float) Pv_ex / nbsymbV; Pf_ex = (float) m.nboccex_faux; Ff_ex = (float) Pf_ex / nbsymbF; Av_ex = (float) nbsymbV-Pv_ex; Af_ex = (float) nbsymbF-Pf_ex; nP_ex = (float) Pv_ex+Pf_ex; nA_ex = (float) Av_ex+Af_ex; nbsymbtot = nbsymbV + nbsymbF; #if DEBUG fprintf(out,"Pv_ex %f\tFv_ex %f\tPf_ex %f\tFf_ex %f\n",Pv_ex,Fv_ex,Pf_ex, Ff_ex); fprintf(out,"Av_ex %f\tAf_ex %f\tnP_ex %f\tnA_ex %f\n",Av_ex,Af_ex,nP_ex, nA_ex); fprintf(out,"Nbsymb vrai %d\tNbsymb faux %d\n",nbsymbV,nbsymbF); #endif modeles[i].khi2_occ = CARRE(Pv_ex*Af_ex-Pf_ex*Av_ex) / (nP_ex*nA_ex*nbsymbV*nbsymbF) * nbsymbtot; modeles[i].sign_occ = Fv_ex>Ff_ex?'+':Fv_ex==Ff_ex?'=':'-'; #endif } qsort(modeles, nbmodeles, sizeof(Mot), (int (*)(const void *, const void *)) comparModeles); fprintf(out, "Model"); espace(out, maxlongmod-2); fprintf(out,"#right\t#wrong"); #if OCC fprintf(out, "\t#rightT\t#wrongT"); #endif fprintf(out, "\t\tChi^2"); #if OCC fprintf(out, "\t\tChi^2 T"); #endif fprintf(out, "\n"); fprintf(out, "================================================================================\n"); for(i=0; i!=nbmodeles; i++) { m = modeles[i]; fprintf(out, "%s", m.mot); espace(out, maxlongmod+4-strlen(m.mot)); fprintf(out, "%5d\t%5d", m.nbseq_vrai, m.nbseq_faux); #if OCC fprintf(out, "\t%5d\t%5d",m.nboccex_vrai, m.nboccex_faux); #endif fprintf(out, "\t\t%3.2f %c", m.khi2,m.sign); #if OCC fprintf(out, "\t\t%3.2f %c",m.khi2_occ,m.sign_occ); #endif fprintf(out,"\n"); } } SMILEv1.47/SigStat/src/random.c0000644002404200237300000001013410142176432015716 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ /* (C) by "coward" from 1996-1999 */ /* modified and extended by Laurent Marsan 1999-2004. */ #include #define MULTIPLIER 69069 #define SHIFT 1 #define MODULUS 256*256*256*128 #define INVMOD ( (double) 1 / ((double) MODULUS)) / ((double) 2) #define DEFAULTSEED 1000001 static unsigned int seed; /* changeseed ************************************************************************************************************** DESCRIPTION: initiate the static variable seed, used as seed value in functions generating random numbers SIDE EFFECTS: the value of the static variable seed is changed RETURN VALUE: the old value of the static variable seed ***************************************************************************************************************************/ unsigned int changeseed(unsigned int Iseed) { unsigned int oldseed; oldseed = seed; seed = Iseed; return oldseed; } /* unif01 ****************************************************************************************************************** DESCRIPTION: Draw a random number from a uniform distribution on the interval [0,1] SIDE EFFECTS: the value of the static variable seed is changed RETURN VALUE: the random number ***************************************************************************************************************************/ double unif01(void) { double random; seed = MULTIPLIER * seed + SHIFT; random = ((double) seed) * INVMOD; return random; } /* readseed ************************************************************************************************************** DESCRIPTION: read new seed value from file, use standard value if nonexistant SIDE EFFECTS: the value of the static variable seed is changed RETURN VALUE: the seed value ***************************************************************************************************************************/ unsigned int readseed(char filename[]) { FILE *fp; seed = DEFAULTSEED; fp = fopen(filename, "r"); if (fp != NULL) { fscanf(fp, "%u", &seed); fclose(fp); } return seed; } /* writeseed ************************************************************************************************************* DESCRIPTION: write seed value to seed file SIDE EFFECTS: none RETURN VALUE: 1 if OK (return value from fprintf) *****************************************************************************************************************************/ int writeseed(char filename[]) { FILE *fp; int r; fp = fopen(filename, "w"); if (fp != NULL) r = fprintf(fp, "%u\n", seed); else r = -1; fclose(fp); return r; } SMILEv1.47/SigStat/src/shufflet.c0000644002404200237300000002440610142176511016263 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ /* (C) by "coward" from 1996-1999 */ /* modified and extended by Laurent Marsan 1999-2004. */ #include #include #include #include #include /* #include */ #include /* global variables */ char wdir[]; /* directory to write to */ int debugflag; /* 1: write debug file */ FILE *dlog; /* debug log */ char alpha[128]; int seqno, sizealpha; char seqname[NAMELEN]; void usage(void) { fprintf(stderr,"Usage: shufflet [OPTIONS] NSEQ ORDER OUTFILE\n"); exit(1); } /******************************************************************************/ /* readseq */ /******************************************************************************/ int readseq(int k, FILE * infile, int **nver, int ***count, int ***vdeg, int **first,int **last, char ***seqstart, int **seqlen, int *maxsizeseq, int nbseqalloc, char *alphaseq, char ***origseq) { /* k= ORDER: length of k-mers to conserve */ char *seq; /* string to hold sequence */ char errstr[80]; /* string to hold error and warning messages */ int nklets; /* number of k-let combinations */ int nk1lets; /* number of (k-1)-let combinations */ int j, maxseqlenalloc; int letter[128]; *maxsizeseq = 0; strcpy(alpha, alphaseq); strcpy(wdir,"."); if (k <= 0) usage(); if (k > MAXORDER) { fprintf(stderr,"Maximum order: %i\n",MAXORDER); exit(2); } /* Construction du convertisseur letter */ for(sizealpha=0; sizealpha!=128; sizealpha++) letter[sizealpha] = -1; for(sizealpha=0; *(alpha+sizealpha)!=FINAL; sizealpha++) letter[(int) *(alpha+sizealpha)] = sizealpha; nklets = (int)pow((double)sizealpha,(double)k); nk1lets = nklets/sizealpha; if (nklets > MAXNKLETS) Error(2,"Max size of k-let table exceeded (choose smaller k)\n"); seq = (char *) malloc(GRAINLENSEQ*sizeof(char)); assert(seq!=NULL); maxseqlenalloc = GRAINLENSEQ; /* main loop */ seqno=0; while (((*seqlen)[seqno] =readnextseq(infile,seqname,&seq, &maxseqlenalloc, errstr)) >=0) /* continue until End Of File (-1) or error (<-1) */ { if ( (*seqlen)[seqno] > *maxsizeseq ) *maxsizeseq = (*seqlen)[seqno]; if ((*seqlen)[seqno] < k) /* too short sequence */ { /* print warning and continue with next sequence */ if ((*seqlen)[seqno] == 0) Warning("Empty sequence"); else Warning("Too short sequence"); continue; } /* Stockage des sequences originales pour ordre 1 */ if(k==1) { (*origseq)[seqno] = (char *) malloc(((*seqlen)[seqno]+2)*sizeof(char)); assert((*origseq)[seqno]!=NULL); strncpy((*origseq)[seqno],seq,(*seqlen)[seqno]); } /* extract alphabet from sequence and convert to sequence of indices */ indexseq(seq,(*seqlen)[seqno], letter); (*count)[seqno] = (int *) malloc(sizeof(int)*nklets); (*vdeg)[seqno] = (int *) malloc(sizeof(int)*nk1lets); assert((*count)[seqno]!=NULL && (*vdeg)[seqno]!=NULL); if (k>1) { (*seqstart)[seqno] = (char *) malloc(sizeof(char)*(k-1)); assert((*seqstart)[seqno] != NULL); for(j=0;j=100) */ /* { */ /* step = nbseq/100; */ /* barre(100); */ /* } */ /* else */ /* barre(nbseq); */ if(k==1) { for(i=0; i!=nbseq; i++) { /* printf("Je shuffle (%d)...\n%s\nen...\n",seqlen[i], seq[i]); */ monoshuffle(seqlen[i],seq[i]); /* printf("%s\n\n",seq[i]); */ seq[i][seqlen[i]] = FINAL; seq[i][seqlen[i]+1] = '\0'; /* if(nbseq>=100) */ /* { */ /* if(i%step==0) */ /* barre(0); */ /* } */ /* else */ /* barre(0); */ } } else for (i=0; i!=nbseq; i++) { memcpy(seq[i],seqstart[i],k-1); /* { */ /* for (j=0; j=100) */ /* { */ /* if(i%step==0) */ /* barre(0); */ /* } */ /* else */ /* barre(0); */ } writeseed(seedfilename); /* store seed for random number generator */ } /******************************************************/ /* error: print error message and quit */ /* input: message: message text */ void Error(int code, char message[]) { fprintf(stderr,"\nError: sequence #%i \"%s\"",seqno,seqname); /* if (seqlen > 0) */ /* fprintf(stderr," (length %i, alphabet size %i)",seqlen,m); */ fprintf(stderr,":\n"); fprintf(stderr,"%s\n",message); /* exit(code); */ } /******************************************************/ /* warning: print warning message */ /* input: message: message text */ void Warning(char message[]) { fprintf(stderr,"\nWarning: sequence #%i \"%s\"",seqno,seqname); /* if (seqlen > 0) */ /* fprintf(stderr," (length %i, alphabet size %i)",seqlen,m); */ fprintf(stderr,":\n"); fprintf(stderr,"%s\n",message); } /******************************************************/ /* generatename: generate name for shuffled sequence */ /* input: seqname: name of original sequence */ /* n: number of shuffling */ /* output: shfseqname: name of shuffled sequence */ void generatename(char seqname[], int n, int nseq, char shfseqname[]) { if (seqname[0] == '\0' || strcmp(seqname,NONAME) == 0) { if (nseq > 1) sprintf(shfseqname,"SHF%i",n); else shfseqname[0] = '\0'; /* 1 shuffling of unnamed sequence: no name */ } else sprintf(shfseqname,"%s_SHF%i",seqname,n); } SMILEv1.47/SigStat/src/sigstat.c0000644002404200237300000004054310142176535016127 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ /* (C) by "coward" from 1996-1999 */ /* modified and extended by Laurent Marsan 1999-2004. */ /******************************************************************************/ /* SIGSTAT - Version avec grep+ */ /* (Shuffling par Eivind Coward) */ /******************************************************************************/ #include char *alphaseq; /* Alphabet des sequences */ extern char *nummod2str[127]; /******************************************************************************/ /* Prototypes prives */ /******************************************************************************/ void afficheStats(FILE *outfile, Mot *modeles, int nbmodeles, int nbseq, int maxlongmod); void calculeStats( int nbmodeles, int nbtests, int nbseq, float **res, Mot *modeles, char flag); /******************************************************************************/ /* MAIN */ /******************************************************************************/ int main(int argc, char **argv) { Mot *modeles; int nbseq, nbmodeles, i, nbtests, maxsizeseq, j, maxlongmod=0; unsigned int nbocc, nboccex; float **resultats; FILE *fasta, *res, *outfile, *f; /* Shufflet's variables */ int *nver, **count, **vdeg, *first, *last, *count1, *vdeg1, nklets, nk1lets, *seqlen, order, nbseqalloc; int *lastedge; /* array to indicate last edge from each vertex*/ char **seqstart=NULL; /* to store the beginning of sequences */ char **seq, line[BUF], *alphaseq, **origseq; /* int k; */ Arbre arbre; Criteres cr; #if OCC float **resocc; #endif initCriteres(&cr); if(argc!=6) { printf("Usage: ~ Fic.Fasta Fic.res Fic.sortie nb_shufflings order\n\n"); return 1; } /** LECTURE DES PARAMETRES ****************************************************/ fasta = openFile(argv[1], "r"); res = openFile(argv[2], "r"); outfile = openFile(argv[3], "w"); nbtests = atoi(argv[4]); order = atoi(argv[5]); /******************************************************************************/ printCpuTime(NULL); /* Allocations necessaires a shufflet */ count = (int **) malloc(sizeof(int*)*GRAINSEQ); seqlen = (int *) malloc(sizeof(int)*GRAINSEQ); vdeg = (int **) malloc(sizeof(int*)*GRAINSEQ); first = (int *) malloc(sizeof(int)*GRAINSEQ); last = (int *) malloc(sizeof(int)*GRAINSEQ); nver = (int *) malloc(sizeof(int)*GRAINSEQ); nbseqalloc = GRAINSEQ; assert(count != NULL); assert(vdeg != NULL); assert(seqlen != NULL && first != NULL && last != NULL); assert(nver != NULL); if(order == 1) { origseq = (char **) malloc(GRAINSEQ*sizeof(char *)); assert(origseq != NULL); } else { seqstart = (char **) malloc(sizeof(char *)*GRAINSEQ); assert(seqstart != NULL); } /******************************************************************************/ /* Lecture des criteres */ fgets(line, BUF, res); if(!chargeCriteres(&cr, line)) { fprintf(stderr, "File '%s' is corrupted.\n",argv[2]); return(1); } if(valideCriteres(&cr) == FAUX) return 1; /******************************************************************************/ /* Chargement alphabet sequences */ initAlphabet(); alphaseq = chargeAlphaSeq(NULL, 0, cr.alphaseq); /******************************************************************************/ /* Chargement alphabet sequences et modeles */ if(!(f=fopen(cr.ficalph,"r"))) fatalError("main: cannot open alphabet file\n"); chargeAlphaMod(f); fclose(f); /******************************************************************************/ /**LECTURE DU FICHIER FASTA ***************************************************/ fprintf(stderr,"** Reading composition of the sequences to shuffle **\n"); nbseq = readseq(order,fasta,&nver,&count,&vdeg,&first,&last,&seqstart,&seqlen, &maxsizeseq, nbseqalloc, cr.alphaseq, &origseq); fclose(fasta); if(nbseq<=0) fatalError("no sequences in file\n"); /**LECTURE DES RESULTATS DU PROGRAMME******************************************/ /* Lecture des modeles */ fprintf(stderr,"** Reading extracted models **\n"); modeles = (Mot *) lectureFichierRes(res, nbseq, &nbmodeles, &maxlongmod); fclose(res); if ( modeles == NULL ) { fprintf(stderr, "STOP: File '%s' is empty.\n", argv[2]); return 0; } /******************************************************************************/ /* ALLOCATIONS */ /* Allocation des tableaux de stockage des statistiques */ resultats = (float **)malloc(nbtests*sizeof(float*)); assert(resultats!=NULL); #if OCC resocc = (float **)malloc(nbtests*sizeof(float*)); assert(resocc!=NULL); #endif for ( i=0; i %d\n",j); */ /* k=0; */ /* while(seq[j][k]!='\0') */ /* { */ /* printf("%c",seq[j][k]); */ /* k++; */ /* if(k%50 ==0) */ /* printf("\n"); */ /* } */ /* printf("\n");; */ /* } */ /* Creation de l'arbre suffixe */ creeArbreSuffixeFromArray(&arbre, seq, nbseq, maxLongMod(cr), alphaseq); /* fprintf(stderr,"** Searching for occurrences of the %d models **\n", */ /* nbmodeles); */ /* Gestion de la barre */ /* if(nbmodeles>=100) */ /* { */ /* step = nbmodeles/100; */ /* barre(100); */ /* } */ /* else */ /* { */ /* step = nbmodeles; */ /* barre(nbmodeles); */ /* } */ for(j=0; j!=nbmodeles; j++) { /* fprintf(stderr,"Je cherche le mot %s\n",modeles[j].mot); */ /* getchar(); */ chercheMot(arbre,&cr, modeles[j].codes, &nbocc, &nboccex, NULL); /* printf("J'en ai trouve %d et %d\n",nbocc, nboccex); */ /* if(nbmodeles>=100) */ /* { */ /* if(j%step==0) */ /* barre(0); */ /* } */ /* else */ /* barre(0); */ resultats[i][j] = (float) nbocc; modeles[j].moyenne_shuffle += ((float) nbocc)/nbtests; /* printf("Sommeres -> %f\n",modeles[j].moyenne_shuffle); */ #if OCC resocc[i][j] = (float) nboccex; modeles[j].moyenne_shuffle_occ+= ((float) nboccex)/nbtests; #endif } /* Liberation de l'arbre suffixe */ libereArbreSuffixeFromArray(arbre); barre(0); } /******************************************************************************/ /* Liberations */ for(i=0; i1) free(seqstart); /******************************************************************************/ /* CALCULS STATISTIQUES */ /* for( i=0; izscore == b->zscore) return 0; if(a->zscore > b->zscore) return -1; return 1; } #if OCC int comparModelesOcc(Mot *a, Mot *b) { if(a->zscore_occ == b->zscore_occ) return 0; if(a->zscore_occ > b->zscore_occ) return -1; return 1; } #endif /******************************************************************************/ /* espace */ /******************************************************************************/ void espace(FILE *f, int nb) { static char space[31]={' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ',' ','\0'}; fprintf(f,"%s",space+30-nb); } /******************************************************************************/ /* afficheStats */ /******************************************************************************/ void afficheStats(FILE *outfile, Mot *modeles, int nbmodeles, int nbseq, int maxlongmod) { int i; Mot m; fprintf(outfile,"STATISTICS ON THE NUMBER OF SEQUENCES HAVING AT LEAST ONE OCCURRENCE\n"); fprintf(outfile,"Model"); espace(outfile,maxlongmod-2); fprintf(outfile," %%right\t #right\t %%shfl.\t #shfl.\tSigma\tChi2\tZ-score\n"); fprintf(outfile,"=================================================================================\n"); qsort(modeles, nbmodeles, sizeof(Mot), (int (*)(const void *, const void *)) comparModeles); for( i=0; i Seq0 TAAAGGTGATCTCTTCCAGATCTCACAAACAGCAAACAGTCTTCAGGGAT AAGGTCCATGCTTGTGCCACCTATACAAGCTGCTGTCCTAGCCGCGTGAT > Seq1 GCAACCATAGTACAGGAGGCAGGATTGTTAATATGTAGCTCTCCGTTTTA AAGATTAGGCCCCAGTCACTCGTAACGGTAAAATATTGAGCACTAGAGAG > Seq2 CGATGAATCCGCTAGGGACGGTTACTCCCAGTTCCTCAACTTAATAGCCC GGTGTAGCTGCACCGGAAATGCCGGTCCATTAAGTTTCCGGAGTTCAGGA > Seq3 CGAACCACCGAGGTTTCCCTCAGTGACTCGGTGCTGCACTGACAAACAGC GGGCCAGATGTCATATATATCTGTCGTCCCGGGCATAGCGCTTGCGGACG > Seq4 GGATGGGAGAAGACCCTACCATTTGTATCCTAACTTCCGCCTAACCTGTG TCTGCTTGGGCGGCAAGCCCTGCACGATAACTTTCATGGCGACGCCATAT > Seq5 GGGTAGCATGATACAGTAAGTTCAGCGTTGCAGTTAACTTTTTGTGGGCT GCGAGGACAAACGCTCGCAAAGTGAGACGAGATCATCTGTGAGAGACTTC > Seq6 ACGCCGCTTCTTTACGTGTCCGGTGACTCGGCATCAGTACGAGTTTGGCA TGCTGCCTACTCTCAGAACGTTGCGAAATTCCGGCCACAGGTTCCCCGTA > Seq7 ATATATATAAAGTCTTGCGACCAAACGACTTAACATTAGTTGCTTAGTAG CGGCCATGAAGCTCAATATTTATGGGCGACGGCTGAGTATCTTATCGGCC > Seq8 AACGCCTGGTAGCCGTGTGGGCTCAAATTAGAGTAGATTACTGACATCAT CAGATAGCCAGACTCAATGTAACGTCCGGGCGACTTCGTTTGAGATGTGG > Seq9 AGCGAATCGAGGGGGCTGCCCGCACCGAAACCGTCGGCCGAGGAGCTCTG TTGGAGAGGCTTGGTTCTCCCGGTAGCCAAGTTAATTAGCGTTAAAACGG SMILEv1.47/SigStat/grep+/lib/0000755002404200237300000000000010066542220015257 5ustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/obj/0000755002404200237300000000000010114605452015264 5ustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/src/0000755002404200237300000000000010066544314015306 5ustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/src/global_fonctions.c0000777002404200237300000000000010066542220027222 2../../../P_BLOCS/src/global_fonctions.custar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/src/allocateurs.c0000777002404200237300000000000010066542220025212 2../../../P_BLOCS/src/allocateurs.custar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/src/global_variables.c0000777002404200237300000000000010066542220027136 2../../../P_BLOCS/src/global_variables.custar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/src/sub_suffix_tree.c0000777002404200237300000000000010066542220026746 2../../../P_BLOCS/src/sub_suffix_tree.custar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/src/liste_pos.c0000644002404200237300000001570710066544314017465 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include ListePositions *Alloc_ListePositions(int size) { ListePositions * tmp = (ListePositions *)malloc(sizeof(ListePositions)); if (!tmp) { fprintf(stderr,"No Enough space\nProgram Abort\n"); exit(-1); } tmp->tab[0] = (int *)malloc(sizeof(int) * size); tmp->tab[1] = (int *)malloc(sizeof(int) * size); #if DEBUG_JTREE printf("Alloc Liste.... %d \n",sizeof(int) * size); #endif if ((!tmp->tab[0]) || (!tmp->tab[1])) { fprintf(stderr,"No Enough space\nProgram Abort\n"); exit(-1); } memset(tmp->tab[0],0,sizeof(int) * size); memset(tmp->tab[1],0,sizeof(int) * size); tmp->last_cell = 0; tmp->tab_size = size; return tmp; } int ChercheDerniereCelluleDansListe(ListePositions *lpos,int deb_liste) { if (lpos == NULL) return -1; while(lpos->tab[1][deb_liste] != LISTE_END) deb_liste = lpos->tab[1][deb_liste] & LISTE_CHANGE_BIT_INV; return deb_liste; } int Ajoute_Position_Liste(ListePositions *lpos,int *deb_liste,int position,int change_seq) { if (!lpos) return -2; if (lpos->last_cell == lpos->tab_size) { #if DEBUG_JTREE printf("realloc .... LPOS\n"); #endif lpos->tab[0] = (int *)realloc(lpos->tab[0],sizeof(int)*(lpos->tab_size + POS_ALLOC_STEP)); lpos->tab[1] = (int *)realloc(lpos->tab[1],sizeof(int)*(lpos->tab_size + POS_ALLOC_STEP)); lpos->tab_size+=POS_ALLOC_STEP; } lpos->tab[0][lpos->last_cell] = position ; lpos->tab[1][lpos->last_cell] = LISTE_END ; if (*deb_liste!=-1) { if (change_seq) lpos->tab[1][lpos->last_cell] = *deb_liste | LISTE_CHANGE_BIT; else lpos->tab[1][lpos->last_cell] = *deb_liste; } *deb_liste = lpos->last_cell; (lpos->last_cell)++; return 1; } int getValue(ListePositions *lpos,int i) { if (lpos==NULL) return -2; if ((i<0) || (i>lpos->last_cell)) return -3; return lpos->tab[0][i] ;/*& LISTE_CHANGE_BIT_INV; */ } void setListeValue(ListePositions *lpos,int i,int value) { if ((lpos==NULL) || (i<0) || (i>lpos->last_cell)) return ; lpos->tab[0][i] = value; } int getIndiceSuivant(ListePositions *lpos,int i) { if (lpos==NULL) return -2; if ((i<0) || (i>lpos->last_cell)) return -3; return lpos->tab[1][i]; } void Free_ListePositions(ListePositions *lpos) { if (lpos == NULL) return ; free(lpos->tab[0]); free(lpos->tab[1]); free(lpos); } int Print_Positions_Dynamique(FILE *f, Feuille *n, LongSeq longway, P_Criteres cr, int code) { int indice = n->fin_deb, occurrence = 0; #if AFF_OCC int longueur, *i, nb_element = ((n->sequences[0] & 0x7F) << 8) | n->sequences[1]; nb_element--; longueur = getValue(Liste_positions_fin,indice)-(n->debut&LEAF_BIT_INV)+longway; #endif while(indice != LISTE_END) { #if AFF_OCC if(f!=NULL) { fprintf(f,"Seq %5d Pos %5d", ((unsigned short int *)(n->sequences + 2))[nb_element], getValue(Liste_positions_fin,indice)-longueur); if(cr && cr->bloc != 1) { fprintf(f, "\tSaut "); for(i=cr->code2Sauts[code]; i!=cr->code2Sauts[code]+cr->bloc-1; i++) fprintf(f, "%d ",*i); } fprintf(f, "\n"); } #endif indice = getIndiceSuivant(Liste_positions_fin,indice); #if AFF_OCC if (indice & LISTE_CHANGE_BIT) nb_element--; #endif indice = indice & LISTE_CHANGE_BIT_INV; occurrence++; } return occurrence; } /******************************************************************************/ /* */ /******************************************************************************/ int Print_Positions_Statique(FILE *f, Feuille *n, LongSeq longway, P_Criteres cr, int code) { unsigned char mask = 0x01; int compteur = SIZE_STATIC_BIT_TAB-1, sequence = 8 * compteur + 6, indice = n->fin_deb, occurrence = 0; #if AFF_OCC int longueur, *i; longueur = getValue(Liste_positions_fin,indice)-(n->debut&LEAF_BIT_INV)+longway; #endif while ( (n->sequences[compteur] & mask) == 0 ) { mask <<= 1; sequence--; if ( mask == 0 ) { mask = 0x01; compteur--; } } while(indice != LISTE_END) { #if AFF_OCC if(f!=NULL) { fprintf(f,"Seq %5d Pos %5d",sequence, getValue(Liste_positions_fin,indice)-longueur); if(cr && cr->bloc != 1) { fprintf(f, "\tSaut "); for(i=cr->code2Sauts[code]; i!=cr->code2Sauts[code]+cr->bloc-1; i++) fprintf(f, "%d ",*i); } fprintf(f, "\n"); } #endif indice = getIndiceSuivant(Liste_positions_fin,indice); if ( (indice & LISTE_CHANGE_BIT) && ( indice != LISTE_END ) ) { do { mask <<= 1; sequence--; if ( mask == 0 ) { mask = 0x01; compteur--; } } while ( (n->sequences[compteur] & mask) == 0 ); } occurrence++; indice = indice & LISTE_CHANGE_BIT_INV; } return occurrence; } int Print_Positions(FILE *f, Feuille *n, LongSeq longway, P_Criteres cr, int code) { /* #if DEBUG */ /* printf("J'arrive dans Print avec long %d\n",longway); */ /* #endif */ if (n->sequences[0] & 0x80) return (Print_Positions_Dynamique(f, n, longway, cr, code)); return (Print_Positions_Statique(f, n, longway, cr, code)); } SMILEv1.47/SigStat/grep+/src/bit_tab2.c0000777002404200237300000000000010066542220023532 2../../../P_BLOCS/src/bit_tab2.custar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/src/liste_pos2.c0000777002404200237300000000000010066542220024524 2../../../P_BLOCS/src/liste_pos2.custar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/src/construction.c0000777002404200237300000000000010066542220025642 2../../../P_BLOCS/src/construction.custar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/src/bit_tab.c0000777002404200237300000000000010066542220023366 2../../../P_BLOCS/src/bit_tab.custar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/src/libfasta.c0000777002404200237300000000000010066542220023730 2../../../P_BLOCS/src/libfasta.custar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/Makefile0000644002404200237300000001147310066542220016157 0ustar lamaaoc00000000000000############################################################################### #Ne pas modifier############################################################### NB_OCCS=1 AFF_OCCS=1 #Compilateur : CC=gcc OPT=-ansi -Wall -O3 #OPT=-g #definition des repertoires des arbres: INCL_DIR=include/ OBJ_DIR=obj/ SRC_DIR=src/ LIB_DIR=lib/ BIN_DIR=bin/ # definition de repertoires de grep+ : SPELL_INCL_DIR=Spell/include/ SPELL_SRC_DIR=Spell/src/ SPELL_OBJ_DIR=Spell/obj/ # Options de compilation : C_FLAG= $(OPT) -I$(INCL_DIR) -I$(SPELL_INCL_DIR) O_FLAG= -c $(OPT) -I$(INCL_DIR) -I$(SPELL_INCL_DIR) -DOCC=$(NB_OCCS) -DAFF_OCC=$(AFF_OCCS) LIB_FLAG= -lm # Nom du programme , de l'archive PROG_NAME=grep+ ARCHIVE_NAME=grep+.tgz #Objet ... TREE_OBJ=$(OBJ_DIR)global_fonctions.o $(OBJ_DIR)construction.o $(OBJ_DIR)liste_pos.o $(OBJ_DIR)allocateurs.o $(OBJ_DIR)bit_tab.o $(OBJ_DIR)libfasta.o $(OBJ_DIR)global_variables.o SPELL_OBJ=$(SPELL_OBJ_DIR)global.o $(SPELL_OBJ_DIR)pile_occ.o $(SPELL_OBJ_DIR)grep+.o $(SPELL_OBJ_DIR)occ.o $(SPELL_OBJ_DIR)criteres.o $(SPELL_OBJ_DIR)barre.o $(SPELL_OBJ_DIR)alphabet.o $(SPELL_OBJ_DIR)model.o SPELL_OBJ_DEB=$(SPELL_OBJ_DIR)global.o $(SPELL_OBJ_DIR)pile_occ.o $(SPELL_OBJ_DIR)grep+.o $(SPELL_OBJ_DIR)occ.o $(SPELL_OBJ_DIR)criteres.o $(SPELL_OBJ_DIR)barre.o $(SPELL_OBJ_DIR)main.o $(SPELL_OBJ_DIR)alphabet.o $(SPELL_OBJ_DIR)model.o OBJ_FILE=$(TREE_OBJ) $(SPELL_OBJ) OBJ_DEB_FILE=$(SPELL_OBJ_DEB) $(TREE_OBJ) all: $(BIN_DIR)$(PROG_NAME) @echo ALL!!! debug: $(OBJ_DEB_FILE) $(CC) $(C_FLAG) $(OBJ_DEB_FILE) -o $(BIN_DIR)$(PROG_NAME) $(LIB_FLAG) $(BIN_DIR)$(PROG_NAME): obj $(CC) $(C_FLAG) $(OBJ_FILE) -o $(BIN_DIR)$(PROG_NAME) $(LIB_FLAG) obj: $(OBJ_FILE) $(OBJ_DIR)sub_suffix_tree.o : $(SRC_DIR)sub_suffix_tree.c $(CC) $(SRC_DIR)sub_suffix_tree.c -o $(OBJ_DIR)sub_suffix_tree.o $(O_FLAG) $(OBJ_DIR)global_fonctions.o : $(INCL_DIR)global_fonctions.h $(SRC_DIR)global_fonctions.c $(CC) $(SRC_DIR)global_fonctions.c -o $(OBJ_DIR)global_fonctions.o $(O_FLAG) $(OBJ_DIR)construction.o : $(INCL_DIR)construction.h $(SRC_DIR)construction.c $(CC) $(SRC_DIR)construction.c -o $(OBJ_DIR)construction.o $(O_FLAG) $(OBJ_DIR)liste_pos.o : $(INCL_DIR)liste_pos.h $(SRC_DIR)liste_pos.c $(CC) $(SRC_DIR)liste_pos.c -o $(OBJ_DIR)liste_pos.o $(O_FLAG) $(OBJ_DIR)liste_pos2.o : $(INCL_DIR)liste_pos.h $(SRC_DIR)liste_pos2.c $(CC) $(SRC_DIR)liste_pos2.c -o $(OBJ_DIR)liste_pos2.o $(O_FLAG) $(OBJ_DIR)allocateurs.o : $(INCL_DIR)allocateurs.h $(SRC_DIR)allocateurs.c $(CC) $(SRC_DIR)allocateurs.c -o $(OBJ_DIR)allocateurs.o $(O_FLAG) $(OBJ_DIR)bit_tab.o : $(INCL_DIR)bit_tab.h $(SRC_DIR)bit_tab.c $(CC) $(SRC_DIR)bit_tab.c -o $(OBJ_DIR)bit_tab.o $(O_FLAG) $(OBJ_DIR)bit_tab2.o : $(INCL_DIR)bit_tab.h $(SRC_DIR)bit_tab2.c $(CC) $(SRC_DIR)bit_tab2.c -o $(OBJ_DIR)bit_tab2.o $(O_FLAG) $(OBJ_DIR)libfasta.o : $(INCL_DIR)libfasta.h $(SRC_DIR)libfasta.c $(CC) $(SRC_DIR)libfasta.c -o $(OBJ_DIR)libfasta.o $(O_FLAG) $(OBJ_DIR)global_variables.o : $(SRC_DIR)global_variables.c $(CC) $(SRC_DIR)global_variables.c -o $(OBJ_DIR)global_variables.o $(O_FLAG) #Dependance pour le programme spell : $(SPELL_OBJ_DIR)main.o : $(SPELL_INCL_DIR)global.h $(SPELL_SRC_DIR)main.c $(CC) $(SPELL_SRC_DIR)main.c -o $(SPELL_OBJ_DIR)main.o $(O_FLAG) $(SPELL_OBJ_DIR)global.o : $(SPELL_INCL_DIR)global.h $(SPELL_SRC_DIR)global.c $(CC) $(SPELL_SRC_DIR)global.c -o $(SPELL_OBJ_DIR)global.o $(O_FLAG) $(SPELL_OBJ_DIR)criteres.o : $(SPELL_INCL_DIR)criteres.h $(SPELL_SRC_DIR)criteres.c $(SPELL_INCL_DIR)global.h $(CC) $(SPELL_SRC_DIR)criteres.c -o $(SPELL_OBJ_DIR)criteres.o $(O_FLAG) $(SPELL_OBJ_DIR)pile_occ.o : $(SPELL_INCL_DIR)pile_occ.h $(SPELL_SRC_DIR)pile_occ.c $(SPELL_INCL_DIR)global.h $(CC) $(SPELL_SRC_DIR)pile_occ.c -o $(SPELL_OBJ_DIR)pile_occ.o $(O_FLAG) $(SPELL_OBJ_DIR)grep+.o : $(SPELL_INCL_DIR)grep+.h $(SPELL_SRC_DIR)grep+.c $(SPELL_INCL_DIR)global.h $(CC) $(SPELL_SRC_DIR)grep+.c -o $(SPELL_OBJ_DIR)grep+.o $(O_FLAG) $(SPELL_OBJ_DIR)occ.o : $(SPELL_INCL_DIR)occ.h $(CC) $(SPELL_SRC_DIR)occ.c -o $(SPELL_OBJ_DIR)occ.o $(O_FLAG) $(SPELL_OBJ_DIR)barre.o : $(SPELL_INCL_DIR)barre.h $(CC) $(SPELL_SRC_DIR)barre.c -o $(SPELL_OBJ_DIR)barre.o $(O_FLAG) $(SPELL_OBJ_DIR)alphabet.o : $(SPELL_INCL_DIR)alphabet.h $(CC) $(SPELL_SRC_DIR)alphabet.c -o $(SPELL_OBJ_DIR)alphabet.o $(O_FLAG) $(SPELL_OBJ_DIR)model.o : $(SPELL_INCL_DIR)model.h $(CC) $(SPELL_SRC_DIR)model.c -o $(SPELL_OBJ_DIR)model.o $(O_FLAG) clean: clean_emacs clean_obj @echo CLEAN OK!!! clean_emacs: find . -name "*~" -exec rm -f {} \; clean_obj: rm -f $(OBJ_DIR)*o $(SPELL_OBJ_DIR)*o clean_arch: rm ./$(ARCHIVE_NAME) tgz: clean tar -zcvf $(ARCHIVE_NAME) ./* lib: $(TREE_OBJ) $(SPELL_OBJ) gcc -shared $(OBJ_FILE) $(LIB_FLAG) -o lib$(PROG_NAME).so depend: makedepend -o.o $(SPELL_INCL_DIR)*.h $(INCL_DIR)*.h $(SPELL_SRC_DIR)*.c $(SRC_DIR)*.c -I$(INCL_DIR) -I$(SPELL_INCL_DIR) # DO NOT DELETE SMILEv1.47/SigStat/grep+/Spell/0000755002404200237300000000000010066542220015570 5ustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/Spell/obj/0000755002404200237300000000000010114605452016343 5ustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/Spell/src/0000755002404200237300000000000010066544254016370 5ustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/Spell/src/occ.c0000777002404200237300000000000010066542220024237 2../../../../P_BLOCS/Spell/src/occ.custar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/Spell/src/criteres.c0000644002404200237300000003261410066544230020354 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include "criteres.h" /******************************************************************************/ /* FONCTIONS PRIVEES */ /******************************************************************************/ void libereTabSauts(P_Criteres); int recFillTab(int bloc, Criteres cr, int *nbcodes, int **code2Sauts); Flag allocBloc(P_Criteres cr, NbBlocs bloc); /******************************************************************************/ /******************************************************************************/ /* Fonctions de positionnement/lecture des variables de classe */ /******************************************************************************/ /* setBloc */ Flag setBloc(P_Criteres cr, NbBlocs bloc) { if( !allocBloc(cr, bloc)) return FAUX; cr->bloc = bloc; return VRAI; } /* setLongueurBloc */ Flag setLongueurBloc(P_Criteres cr, NbBlocs num_bloc, LongSeq lon) { if (num_bloc >= cr->bloc || num_bloc < 0) return FAUX; cr->longbloc[(int)num_bloc].max = lon; return VRAI; } /* setErreurGlobal */ void setErreurGlobal(P_Criteres cr, LongSeq err) { cr->maxerr = err; } /* setErreurBloc */ Flag setErreurBloc(P_Criteres cr, NbBlocs num_bloc, LongSeq err) { if (num_bloc >= cr->bloc || num_bloc < 0) return FAUX; cr->maxerrblocs[(int)num_bloc] = err; return VRAI; } /* setSaut */ Flag setSaut(P_Criteres cr, NbBlocs num_bloc, LongSeq min, LongSeq max) { if (num_bloc >= cr->bloc-1 || num_bloc < 0) return FAUX; cr->saut[(int)num_bloc].min = min; cr->saut[(int)num_bloc].max = max; return VRAI; } /* getBloc */ NbBlocs getBloc(Criteres cr) {return cr.bloc;} /* getLongueurBloc */ LongSeq getLongueurBloc(Criteres cr,NbBlocs num_bloc) { if (num_bloc >= cr.bloc || num_bloc < 0) return -1; return cr.longbloc[(int)num_bloc].max; } /* getErreurGlobal */ LongSeq getErreurGlobal(Criteres cr) {return cr.maxerr;} /* getErreurBloc */ LongSeq getErreur(Criteres cr, NbBlocs num_bloc) { if (num_bloc >= cr.bloc || num_bloc < 0) return -1; return cr.maxerrblocs[(int)num_bloc]; } /* getSaut */ Fourchette getSaut(Criteres cr, NbBlocs num_bloc) { return cr.saut[(int)num_bloc]; } /* maxLongMod */ int maxLongMod(Criteres cr) { int i, max; max = cr.longbloc[0].max; for(i=0;icr.saut[i].max) { fprintf(stderr, "verifCriteres: max spacer length has to be greater than min spacer length!\n"); return FAUX; } } } return VRAI; } /******************************************************************************/ /* addSaut2Code */ /******************************************************************************/ int addSaut2Code(int oldcode, LongSeq saut, LongSeq curbloc, P_Criteres cr) { #if DEBUG_SAUT printf("addSaut2Code: Je recois %d",oldcode); #endif if (curbloc == 0) { #if DEBUG_SAUT printf(" et renvoie %d\n",saut - cr->saut[0].min); #endif return (saut - cr->saut[0].min); } oldcode *= cr->saut[(int)curbloc].max - cr->saut[(int)curbloc].min + 1; oldcode += saut - cr->saut[(int)curbloc].min; #if DEBUG_SAUT printf(" et renvoie %d\n",oldcode); #endif return oldcode; } /******************************************************************************/ /* initTabSauts */ /******************************************************************************/ void initTabSauts(P_Criteres cr) { int bloc = cr->bloc, **i,j, nbcodes; if(cr->code2Sauts != NULL) libereTabSauts(cr); nbcodes = 1; for(j=0; j != bloc-1; j++) nbcodes *= cr->saut[j].max - cr->saut[j].min +1; if ( (cr->code2Sauts = (int **) malloc(nbcodes * sizeof(int *)) ) == NULL) fatalError("criteres.c: initTabSauts: cannot allocate 'code2Sauts'\n"); for(j=0, i=cr->code2Sauts; j != nbcodes; j++,i++) if ( (*i = (int *) malloc((bloc-1) * sizeof(int)) ) == NULL ) fatalError("criteres.c: initTabSauts: cannot allocate 'code2Sauts[j]'\n"); recFillTab(0, *cr, &nbcodes, cr->code2Sauts); } /******************************************************************************/ /* libereTabSauts */ /******************************************************************************/ void libereTabSauts(P_Criteres cr) { int bloc = cr->bloc, **i,j, nbcodes; nbcodes = 1; for(j=0; j != bloc-1; j++) nbcodes *= cr->saut[j].max - cr->saut[j].min +1; for(j=0, i=cr->code2Sauts; j != nbcodes; j++,i++) free(*i); free(cr->code2Sauts); cr->code2Sauts = NULL; } /******************************************************************************/ /* recFillTab */ /******************************************************************************/ int recFillTab(int bloc, Criteres cr, int *nbcodes, int **code2Sauts) { int i,j,k,pos=0,a; if(bloc != cr.bloc-2) a = recFillTab(bloc+1, cr, nbcodes, code2Sauts); else a = 1; *nbcodes /= cr.saut[bloc].max - cr.saut[bloc].min +1; for(i=0; i!=*nbcodes; i++) for(j=cr.saut[bloc].min; j!=cr.saut[bloc].max+1; j++) for(k=0; k!=a; k++) { code2Sauts[pos][bloc] = j; pos++; } printf("\n"); return( a*(cr.saut[bloc].max - cr.saut[bloc].min +1)); } /******************************************************************************/ /* allocBloc */ /******************************************************************************/ Flag allocBloc(P_Criteres cr, NbBlocs bloc) { int i; if(cr->maxerrblocs != NULL) { libereTabSauts(cr); free(cr->longbloc); if(cr->bloc > 1) { free(cr->maxerrblocs); free(cr->saut); } } cr->longbloc = (Fourchette *) malloc(bloc*sizeof(Fourchette)); if(!cr->longbloc) return FAUX; cr->maxerrblocs = (LongSeq *) malloc(bloc*sizeof(LongSeq)); if( !cr->maxerrblocs ) return FAUX; if(bloc > 1) { cr->saut = (Fourchette *) malloc((bloc-1)*sizeof(Fourchette)); if( !cr->saut ) return FAUX; } /* Initialisations blocs */ for (i = 0; i != bloc; i++) { cr->maxerrblocs[i] = -1; cr->longbloc[i].max = -1; cr->longbloc[i].min = -1; if(i!=bloc-1) cr->saut[i].min = cr->saut[i].max = -1; } /* Initialisations generales */ cr->maxerr = cr->bloc = cr->multiblocs = -1; cr->code2Sauts = NULL; cr->nbtotseq = -1; cr->longmod.max = -1; cr->longmod.min = -1; return VRAI; } /******************************************************************************/ /* initCriteres */ /******************************************************************************/ void initCriteres(P_Criteres cr) { cr->maxerr = -1; cr->bloc = -1; cr->maxerrblocs = NULL; cr->longbloc = NULL; cr->saut = NULL; cr->multiblocs = 0; cr->code2Sauts = NULL; cr->nbtotseq = 0; cr->longmod.max = 0; cr->longmod.min = 0; } /******************************************************************************/ /* chargeCriteres */ /******************************************************************************/ Flag chargeCriteres(P_Criteres cr, char *line) { int i,tmp, tmp2; tmp = atoi(strtok(line,"% ")); /* Nb blocs */ if(tmp < 1) return FAUX; setBloc(cr, tmp); strtok(NULL,"%/ "); /* Quorum */ cr->nbtotseq = atoi(strtok(NULL," /")); /* Nb total sequences */ cr->nbsymb = atoi(strtok(NULL," ")); /* Nb symboles */ cr->longmod.min = atoi(strtok(NULL," ")); /* L min */ tmp=atoi(strtok(NULL," ")); /* L max */ cr->longmod.max = tmp; if (cr->bloc == 1) setLongueurBloc(cr, 0, tmp); setErreurGlobal(cr, atoi(strtok(NULL," "))); /* Err glob */ if(cr->bloc != 1) { for(i=0; i!=cr->bloc; i++) { strtok(NULL," "); /* L min bloc i */ tmp=atoi(strtok(NULL," ")); /* L max bloc i */ setLongueurBloc(cr, i, tmp); setErreurBloc(cr, i, atoi(strtok(NULL," "))); /* Err bloc i */ if(i != cr->bloc-1) { tmp =atoi(strtok(NULL," ")); /* Saut min bloc i */ tmp2=atoi(strtok(NULL," ")); /* Saut max bloc i */ setSaut(cr, i, tmp, tmp2); } } } strcpy(cr->ficalph, strtok(NULL," ")); /* Fichier alphabet des modeles */ strcpy(cr->alphaseq, strtok(NULL," ")); /* Alphabet des sequences */ /* afficheCriteres(*cr); */ return VRAI; } SMILEv1.47/SigStat/grep+/Spell/src/global.c0000644002404200237300000000710610066544234017776 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include int symb2int[256]={-1}; /******************************************************************************/ /* fatalError */ /******************************************************************************/ /* Gestion des erreurs FATALES! */ /******************************************************************************/ void fatalError(char *msg) { fprintf(stderr,"Error: %s\n",msg); exit(1); } /******************************************************************************/ /* warning */ /******************************************************************************/ void warning(char *msg) { fprintf(stderr,"Warning: %s\n",msg); } /******************************************************************************/ /* initEntiers */ /******************************************************************************/ void initEntiers(void) { int i; for(i=0;i<256;i++) symb2int[i] = -1; symb2int[(int)'A'] = 0; symb2int[(int)'C'] = 1; symb2int[(int)'G'] = 2; symb2int[(int)'T'] = 3; symb2int[(int)'$'] = 4; } /******************************************************************************/ /* entiers */ /******************************************************************************/ /* Conversion des symboles vers les entiers */ /******************************************************************************/ int entiers(char c) { int ret; ret = symb2int[(int)c]; if(ret == -1) { fprintf(stderr,"-- %c --",c); fatalError("Unknown character, non A,C,G,T or $"); } return ret; } /******************************************************************************/ /* entree */ /******************************************************************************/ void entree(void) { printf("\n-- Type ENTER\n"); fflush(stdin); getchar(); } SMILEv1.47/SigStat/grep+/Spell/src/model.c0000777002404200237300000000000010066542220025127 2../../../../P_BLOCS/Spell/src/model.custar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/Spell/src/main.c0000644002404200237300000000554310066544244017466 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include /******************************************************************************/ /******************************************************************************/ /********************************** MAIN **************************************/ /******************************************************************************/ /******************************************************************************/ int main(int argc, char **argv) { Criteres criteres; FILE *f=NULL; unsigned int nbocc, nboccex; Arbre b; char prout[100]; initCriteres(&criteres); /* setBloc(&criteres,2); */ /* setLongueurBloc(&criteres,0,5); */ /* setLongueurBloc(&criteres,1,5); */ /* setErreurGlobal(&criteres,0); */ /* setErreurBloc(&criteres,0,0); */ /* setErreurBloc(&criteres,1,0); */ /* setSaut(&criteres,0,14,16); */ setBloc(&criteres,1); setLongueurBloc(&criteres,0,11); setErreurGlobal(&criteres,1); if(valideCriteres(&criteres) == FAUX) return 1; f = fopen("pos","w"); if(f==NULL) printf("CHIIIIIIIER\n"); creeArbreSuffixeFromFile(&b, "seq2",maxLongMod(criteres)); chercheMot(b,&criteres, "AAAAA",&nbocc, &nboccex, f); printf("%d %d!\n",nbocc, nboccex); libereArbreSuffixeFromFile(b); /* creeArbreSuffixeFromFile(&b, "ficseq",maxLongMod(criteres)); */ /* chercheMot(b,&criteres, "AAAAAC_AGTGTT",&nbocc, &nboccex, f); */ /* printf("%d %d!\n",nbocc, nboccex); */ /* libereArbreSuffixeFromFile(b); */ return(0); } SMILEv1.47/SigStat/grep+/Spell/src/grep+.c0000644002404200237300000007170710066544240017553 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include /******************************************************************************/ /* PROTOTYPES PRIVES */ /******************************************************************************/ /* Gestion des modeles acceptes */ void keepModel(P_PileOcc, LongSeq, P_Criteres cr, unsigned int *nboccex, FILE *f); /* essaie d'avancer d'une lettre dans un arc, et renvoie le noeud image */ Flag avanceBranche(P_occ, P_occ, int, int, Flag, P_Criteres, LongSeq, Flag); /* Lancement du saut */ NbSeq gestionSaut(P_PileOcc pocc, P_Criteres, LongSeq curbloc, signed char **text); /* Charge la sequence dans l'arbre a partir d'un fichier FASTA */ Flag chargeSequence(Arbre *a, char *fic); /* Construction de l'arbre */ Flag creeArbreSuffixe(Arbre *a, int maxlongmod, char *alphaseq); /* EXTERNES from alphabet.c */ extern int nbSymbMod; extern int nbSymbSeq; extern char *nummod2str[127]; extern int carseq2num[127]; extern Flag TabSymb[127][127]; extern int numJOKER; extern int numSAUT; /******************************************************************************/ /******************************************************************************/ /************************ FONCTIONS DE BASE ***********************************/ /******************************************************************************/ /******************************************************************************/ /******************************************************************************/ /******************************************************************************/ /*********************** GESTION DES LISTES D'OCCURRENCES *********************/ /*********************************ET DES MODELES*******************************/ /******************************************************************************/ /******************************************************************************/ /* KeepModel */ /******************************************************************************/ /* Affiche (ou stocke si necessaire) les modeles trouves */ /******************************************************************************/ void keepModel(P_PileOcc pocc, LongSeq l, P_Criteres cr, unsigned int *nboccex, FILE *f) { #if OCC afficheLastOcc(f, pocc, l, cr, nboccex); #endif } /******************************************************************************/ /******************************************************************************/ /************************* RECHERCHE DES MODELES ******************************/ /******************************************************************************/ /******************************************************************************/ /******************************************************************************/ /* avanceBranche */ /******************************************************************************/ /* Essaie d'avancer d'une lettre dans un arc. */ /* Renvoie 1 si reussi, 0 sinon. */ /* La variable 'flag' indique si on est sur un noeud(1) ou une branche(0) */ /******************************************************************************/ Flag avanceBranche( P_occ next, P_occ tmp, int symbol, int trans, Flag flag_noeud, P_Criteres cr, LongSeq curbloc, Flag multiblocs) { /* Dans cette fonction, le code est duplique dans un souci de rapidite: */ /* j'essaie de faire un max de tests eliminatoires avant affectations */ /* Si la branche courante n'est pas epuisee... */ if (flag_noeud == FAUX) { if ( equiv(symbol, trans) ) { next->xerr = tmp->xerr; next->blocerr = tmp->blocerr; } else { next->xerr = tmp->xerr+1; if (next->xerr == cr->maxerr+1) /* si maxerr global atteint */ return 0; if(multiblocs == VRAI) { next->blocerr = tmp->blocerr+1; /* si maxerr local atteint */ if (next->blocerr == cr->maxerrblocs[curbloc]+1) return 0; } } next->x = tmp->x; next->num = tmp->num; next->lon = tmp->lon+1; } else /* Si la branche courante est epuisee, on est sur une nouvelle branche */ { next->x = tmp->x->fils[tmp->num]; if (next->x->fils[trans] == NULL) return(0); if ( equiv(symbol, trans) ) { next->xerr = tmp->xerr; next->blocerr = tmp->blocerr; } else { next->xerr = tmp->xerr+1; if (next->xerr == cr->maxerr+1) /* si maxerr global atteint */ return 0; if(multiblocs == VRAI) { next->blocerr = tmp->blocerr+1; /* si maxerr local atteint */ if (next->blocerr == cr->maxerrblocs[curbloc]+1) return 0; } } next->num = trans; next->lon = 1; } if(multiblocs == VRAI) { next->saut= tmp->saut; next->codesaut= tmp->codesaut; } return(1); } /******************************************************************************/ /* sauteSymbole */ /******************************************************************************/ int sauteSymbole(Occ curocc, P_PileOcc pocc, P_Criteres cr, LongSeq curbloc, LongSeq longsaut, signed char **text) { LongSeq lmaxbr; Noeud *tmpnoeud; Occ tmpocc; int res = 0; int trans; char carseq; tmpnoeud = curocc.x->fils[curocc.num]; if (tmpnoeud->debut & LEAF_BIT) lmaxbr = getValue(Liste_positions_fin,((Feuille *)tmpnoeud)->fin_deb) - (((Feuille *)tmpnoeud)->debut & LEAF_BIT_INV); else lmaxbr = tmpnoeud->fin - tmpnoeud->debut; #if DEBUG_SAUT printf("SauteSymbole: j'ai gere le saut pour %d, noeud %d, etat: %d/%d branche %d\n",longsaut,curocc.x,curocc.lon,lmaxbr,curocc.num); printf("saut %d longsaut %d\n",curocc.saut,longsaut); #endif ajouteOcc2Pile(pocc, curocc.x, curocc.num, curocc.lon, curocc.xerr, 0, curocc.saut+longsaut, addSaut2Code(curocc.codesaut, longsaut, curbloc, cr)); res++; longsaut++; if (curocc.lon != lmaxbr) /* on est au milieu d'une branche */ { curocc.lon++; carseq = text[tmpnoeud->sequence_number] [(tmpnoeud->debut & LEAF_BIT_INV)+curocc.lon-1]; if(carseq==FINAL) /* si on rencontre un FINAL c'est fini */ return res; if(longsaut<=cr->saut[curbloc].max) res += sauteSymbole(curocc, pocc, cr, curbloc, longsaut, text); } else /* sinon on est a un noeud, plusieurs trans sont possibles */ { if(longsaut<=cr->saut[curbloc].max) { tmpocc.x = tmpnoeud; tmpocc.lon = 1; tmpocc.xerr = curocc.xerr; tmpocc.codesaut = curocc.codesaut; tmpocc.saut = curocc.saut; if ((tmpnoeud->debut & LEAF_BIT) == 0) for (trans = 0; trans != nbSymbSeq; trans++) { if (tmpnoeud->fils[trans] != NULL) { tmpocc.num = trans; res += sauteSymbole(tmpocc, pocc, cr, curbloc, longsaut, text); } } } } #if DEBUG_SAUT printf("SauteSymbole: J'ai trouve %d occ\n",res); #endif return res; } /******************************************************************************/ /* sauteBranche */ /******************************************************************************/ int sauteBranche(Occ curocc, P_PileOcc pocc, P_Criteres cr, LongSeq curbloc, LongSeq longsaut, signed char **text) { LongSeq lmaxbr; Noeud * tmpnoeud, *newtmpnoeud; Occ tmpocc; int res = 0, newlongsaut; int trans; char carseq; tmpnoeud = curocc.x->fils[curocc.num]; if (tmpnoeud->debut & LEAF_BIT) lmaxbr = getValue(Liste_positions_fin,((Feuille *)tmpnoeud)->fin_deb) - (((Feuille *)tmpnoeud)->debut & LEAF_BIT_INV); else lmaxbr = tmpnoeud->fin - tmpnoeud->debut; #if DEBUG_SAUT printf("SauteBranche: j'ai gere le saut pour %d, noeud %d, etat: %d/%d branche %d\n",longsaut,curocc.x,curocc.lon,lmaxbr,curocc.num); #endif if (curocc.lon != lmaxbr) /* on est au milieu d'une branche */ { if ( lmaxbr-curocc.lon <= cr->saut[curbloc].min-longsaut ) { longsaut+=lmaxbr-curocc.lon; curocc.lon=lmaxbr; #if DEBUG_SAUT printf("SauteBranche: milieuBr, fast, je vais au bout %d/%d br %d et lgsaut %d\n",curocc.lon,lmaxbr,curocc.num,longsaut); #endif carseq = text[tmpnoeud->sequence_number] [(tmpnoeud->debut & LEAF_BIT_INV)+lmaxbr-1]; if(carseq!=FINAL) /* si on rencontre un FINAL c'est fini */ { #if DEBUG_SAUT printf("SauteBranche: finBr=$, c'est fini\n"); #endif res += sauteBranche(curocc, pocc, cr, curbloc, longsaut, text); } } else { curocc.lon+=cr->saut[curbloc].min-longsaut; longsaut=cr->saut[curbloc].min; #if DEBUG_SAUT printf("SauteBranche: milieuBr, minsaut ds Br, je m'arrete a %d/%d num %d et lgsaut %d\n",curocc.lon,lmaxbr,curocc.num,longsaut); #endif res += sauteSymbole(curocc, pocc, cr, curbloc, longsaut, text); } } else /* sinon on est a un noeud, plusieurs trans sont possibles */ { tmpocc.x = tmpnoeud; tmpocc.xerr = curocc.xerr; tmpocc.codesaut = curocc.codesaut; tmpocc.saut = curocc.saut; if ((tmpnoeud->debut & LEAF_BIT) == 0) for (trans = 0; trans != nbSymbSeq; trans++) { tmpocc.num = trans; newlongsaut = longsaut; if (tmpnoeud->fils[trans] != NULL) { newtmpnoeud = tmpnoeud->fils[trans]; if (newtmpnoeud->debut & LEAF_BIT) lmaxbr = getValue(Liste_positions_fin, ((Feuille *)newtmpnoeud)->fin_deb) - (newtmpnoeud->debut & LEAF_BIT_INV); else lmaxbr = newtmpnoeud->fin - newtmpnoeud->debut; if ( lmaxbr <= cr->saut[curbloc].min-longsaut ) { newlongsaut+=lmaxbr; tmpocc.lon=lmaxbr; #if DEBUG_SAUT printf("SauteBranche: noeud, fast, %d/%d, br %d, lgsaut %d\n",tmpocc.lon,lmaxbr,tmpocc.num,newlongsaut); #endif carseq = text[newtmpnoeud->sequence_number] [(newtmpnoeud->debut & LEAF_BIT_INV)+lmaxbr-1]; if(carseq!=FINAL) /* si on rencontre un FINAL c'est fini */ { #if DEBUG_SAUT printf("SauteBranche: finBr=$, c'est fini\n"); #endif res += sauteBranche(tmpocc, pocc, cr, curbloc, newlongsaut, text); } } else { tmpocc.lon=cr->saut[curbloc].min-newlongsaut; newlongsaut=cr->saut[curbloc].min; #if DEBUG_SAUT printf("SauteBranche2: noeud %d, minsaut ds Br, %d/%d, br %d, lgsaut %d\n",tmpocc.x, tmpocc.lon,lmaxbr,tmpocc.num,newlongsaut); #endif res += sauteSymbole(tmpocc, pocc, cr, curbloc, newlongsaut, text); } } } } #if DEBUG_SAUT printf("SauteBranche: J'ai trouve %d occ\n",res); #endif return res; } /******************************************************************************/ /* gestionSaut */ /******************************************************************************/ NbSeq gestionSaut(P_PileOcc pocc, P_Criteres cr, NbSeq curbloc, signed char **text) { LongSeq pos, precdummy; Occ curocc; P_occ tmpocc; int res = 0; pos = pocc->pos-1; precdummy = getPrecDummy(pocc); tmpocc = pocc->occ+pos; ajouteDummy(pocc); while ( (pos != precdummy) && (tmpocc->x != NULL) ) { curocc = *tmpocc; if (cr->saut[curbloc].min == 0) res+=sauteSymbole(curocc, pocc, cr, curbloc, 0, text); else res += sauteBranche(curocc, pocc, cr, curbloc, 0, text); pos--; tmpocc = pocc->occ+pos; } /* if(res==0) */ /* depileRec(pocc); */ return (res); } /******************************************************************************/ /* sommeBTOcc */ /******************************************************************************/ /* Fait l'union des sequences d'une liste d'occurrence et renvoie le nombre */ /* de ces sequences. */ /******************************************************************************/ NbSeq sommeBTOcc(P_PileOcc p, Bit_Tab ** bt) { LongSeq pos, precdummy; P_occ po; ReinitBitTab(bt); pos = p->pos-1; if(pos < 0) fatalError("grep+.c: sommeBTOcc: wrong stack position\n"); po = p->occ+pos; precdummy = getPrecDummy(p); while ((pos != precdummy) && (po->x != NULL)) { #if DEBUG_BT printf("Fusion avec : "); #endif if(po->x->fils[po->num]->debut & LEAF_BIT) { fusionneBitTab(bt,((Feuille *)po->x->fils[po->num])->sequences); #if DEBUG_BT printBitTab(((Feuille *)po->x->fils[po->num])->sequences); #endif } else { fusionneBitTab(bt,po->x->fils[po->num]->sequences); #if DEBUG_BT printBitTab(po->x->fils[po->num]->sequences); #endif } pos--; po--; } #if DEBUG_BT printf("Somme BT : \n"); printBitTab(*bt); printf(" -> %d values\n", nbSequenceInBitTab(*bt)); #endif return nbSequenceInBitTab(*bt); } /******************************************************************************/ /* chercheMot */ /******************************************************************************/ /* Explore les modeles recursivement. */ /******************************************************************************/ void chercheMot (Arbre a, P_Criteres cr, signed char *mot, unsigned int *nb_occ, unsigned int *nbocc_ex, FILE *f) { int symbol, trans; LongSeq lmaxbr, pos, precdummy, longmod = 0, curbloc = 0; P_occ tmpocc; int nbocc = 0; char carseq; static P_mod model = NULL; static P_PileOcc pocc = NULL; static P_occ next = NULL; static Bit_Tab *colors_model; static char flag=0; /* PREPARATIFS... */ if(flag == 0) { /* Allocation du modele utilise lors de la recherche */ model = allocModel(); /* Allocation du tableau de bits courant */ colors_model = AllocBitTab(); ReinitBitTab(&colors_model); /* Initialisation de l'occurrence courante */ next = (P_occ) calloc (1,sizeof(Occ)); if (next == NULL) fatalError("doSpell: cannot allocate 'next'\n"); /* Allocation des piles d'occurrences */ pocc = creePileOcc(); flag = 1; } else { model->name[0] = '\0'; model->lon = 0; videPile(pocc); } initOcc(next); /* Ajout de l'occurrence nulle dans la pile d'occurrence */ ajouteInitOcc2Pile(pocc, a.arbre); /* CONDITION D'EXTENSION */ for(; *mot!=-1; mot++) { symbol = (int) *mot; #if DEBUG_BASE printf("LONGMOD %d: j'etends %s vers %s%c\n",longmod,model->name, model->name,lettres[symbol]); #endif pos = pocc->pos-1; tmpocc = pocc->occ+pos; precdummy = getPrecDummy(pocc); ajouteDummy(pocc); nbocc = 0; #if DEBUG_BASE printf("J'ENTRE (l=%d symbol=%d model=%s)\n",longmod,symbol, model->name); #endif while ((pos != precdummy) && (tmpocc->x != NULL)) { lmaxbr = ((tmpocc->x->fils)[tmpocc->num]->debut & LEAF_BIT)? getValue(Liste_positions_fin, ((Feuille *)tmpocc->x->fils[tmpocc->num])->fin_deb) - (tmpocc->x->fils[tmpocc->num]->debut & LEAF_BIT_INV) : tmpocc->x->fils[tmpocc->num]->fin - tmpocc->x->fils[tmpocc->num]->debut; #if DEBUG_BASE if(longmod!=0) { printf("Je traite l'occ:%p num %d lon %d saut %d codesaut %d (longmod=%d)\n", tmpocc->x,tmpocc->num,tmpocc->lon,tmpocc->saut, tmpocc->codesaut, longmod); afficheOcc(stdout, tmpocc, longmod,0); printf("...et je trouve:\n"); } #endif /* on est au milieu d'une branche - une transition possible */ if (tmpocc->lon != lmaxbr) { carseq = a.text[tmpocc->x->fils[tmpocc->num]->sequence_number] [ (tmpocc->x->fils[tmpocc->num]->debut & LEAF_BIT_INV) + tmpocc->lon]; if ( (carseq != FINAL) && (avanceBranche(next, tmpocc, symbol, carseq2num[(int)carseq], 0, cr, curbloc, cr->multiblocs) ) ) { ajouteOcc2Pile(pocc, next->x, next->num, next->lon, next->xerr,next->blocerr, next->saut, next->codesaut); #if DEBUG_BASE printf("occ:%p num %d lon %d saut %d codesaut %d (longmod=%d)\n", next->x,next->num,next->lon,next->saut, next->codesaut, longmod); afficheOcc(stdout, next, longmod+1,0); #endif nbocc++; } } /* sinon on est a un noeud, plusieurs trans sont eventuellement possibles */ else { for (trans = 0; trans != nbSymbSeq; trans++) { tmpocc=pocc->occ+pos; if (avanceBranche(next, tmpocc, symbol, trans, 1, cr, curbloc, cr->multiblocs)) { ajouteOcc2Pile(pocc, next->x, next->num, next->lon, next->xerr, next->blocerr, next->saut, next->codesaut); #if DEBUG_BASE printf("occ:%p num %d lon %d saut %d codesaut %d (longmod=%d)\n", next->x,next->num,next->lon,next->saut, next->codesaut, longmod); afficheOcc(stdout, next, longmod+1, 0); #endif nbocc++; } } } /* Si on n'a plus d'occurrences dans la pile */ if(pos == 0) { #if DEBUG_BASE printf("break avec %d occ\n",nbocc); #endif break; } pos--; tmpocc=pocc->occ+pos; #if DEBUG_PILE printf("pos pile %d (adresse %p), len mod %d, nbocc %d\n",pos, tmpocc,longmod,nbocc); printf("x %p\n",tmpocc->x); #endif } #if DEBUG_BASE printf("J'ai trouve %d occ\n",nbocc); afficheOldOcc(pocc, longmod+1); #endif if (nbocc == 0) { if(nb_occ != NULL) *nb_occ = 0; if(nbocc_ex!=NULL) *nbocc_ex = 0; return; } /***************/ /* CAS DU SAUT */ /***************/ if ( cr->multiblocs && ( *(mot+1)==numSAUT )) { if ( gestionSaut(pocc, cr, curbloc, a.text) == 0 ) { if(nb_occ != NULL) *nb_occ = 0; if(nbocc_ex!=NULL) *nbocc_ex = 0; return; } changeModel(model, symbol); changeModel(model, numSAUT); #if DEBUG_SAUT afficheOldOcc(pocc,longmod+1); #endif curbloc++; mot++; } else { #if DEBUG_BASE printf("nbocc = %d\n",nbocc); #endif changeModel(model, symbol); } longmod++; } *nb_occ = sommeBTOcc(pocc, &colors_model); if(nbocc_ex == NULL && f == NULL) return; keepModel(pocc, longmod, cr, nbocc_ex, f); return; } /******************************************************************************/ /* valideCriteres */ /******************************************************************************/ Flag valideCriteres(P_Criteres cr) { /* Verification de la coherence des criteres */ if(!verifCriteres(*cr)) return FAUX; initEntiers(); if(cr->bloc > 1) { initTabSauts(cr); cr->multiblocs = VRAI; } else cr->multiblocs = FAUX; return VRAI; } /******************************************************************************/ /* termineRecherche */ /******************************************************************************/ /* void termineRecherche(void) */ /* { */ /* free(next); */ /* liberePileOcc(pocc); */ /* } */ /******************************************************************************/ /* chargeSequence */ /* Lit les sequences dans un fichier FASTA, rajoute un caractere FINAL. */ /******************************************************************************/ Flag chargeSequence(Arbre *a, char *fic) { FastaSequence **seq; Flag readok; int taille, siztxt, i; FILE *fasta; /* Allocations */ seq = (FastaSequence **) malloc(GRAINSEQ * sizeof(FastaSequence *)); a->text = (signed char **) malloc(GRAINSEQ * sizeof(signed char *)); if(!seq || !a->text) fatalError("charSequence: cannot allocate 'seq/text'\n"); siztxt = GRAINSEQ; /* Ouverture du fichier contenant les sequences */ fasta = fopen (fic,"r"); if(fasta == NULL) { fprintf(stderr,"charSequence: cannot open fasta file '%s'\n",fic); return FAUX; } readok = 1; a->nbtxt= 0; /* Stockage des sequences en memoire */ do { if(a->nbtxt == siztxt) { siztxt *= 2; seq = (FastaSequence **) realloc(seq,siztxt * sizeof(FastaSequence *)); a->text = (signed char **) realloc(a->text, siztxt * sizeof(signed char *)); if(!seq || !a->text) fatalError("chargeSequence: cannot reallocate 'seq/text'\n"); } seq[a->nbtxt] = NewFastaSequence(); readok = ReadFastaSequence(fasta, seq[a->nbtxt]); if (readok) { taille = seq[a->nbtxt]->length+1; a->text[a->nbtxt] = (signed char *) malloc ((taille+2) * sizeof(signed char)); if (a->text[a->nbtxt] == NULL) fatalError("chargeSequence: cannot allocate 'text'\n"); strcpy((char *) a->text[a->nbtxt],seq[a->nbtxt]->seq); a->text[a->nbtxt][taille-1] = FINAL; a->text[a->nbtxt][taille] = '\0'; (a->nbtxt)++; } } while (readok); fclose(fasta); /* Liberation de la structure Fasta */ for(i=0;i != a->nbtxt;i++) FreeFastaSequence(seq[i]); free(seq); return VRAI; } /******************************************************************************/ /* creeArbreSuffixe */ /******************************************************************************/ Flag creeArbreSuffixe(Arbre *a, int maxlongmod, char *alphaseq) { int i; Noeud *root_pere; if(alphaseq==NULL) alphaseq = chargeAlphaSeq((Symbole **) a->text, a->nbtxt, NULL); /* Construction de l'arbre compact generalise */ /* fprintf(stderr, "** Constructing suffix tree **\n"); */ /* barre(a->nbtxt); */ Init_All((unsigned char *) alphaseq, 0, a->nbtxt); a->arbre = Construction_Arbre((unsigned char *)a->text[0], maxlongmod); /* barre(0); */ for (i = 1; i != a->nbtxt; i++) { a->arbre = AjouteSequence(a->arbre,(unsigned char *)a->text[i], maxlongmod); /* barre(0); */ } UpdateBit_TabForAllTree(a->arbre); /* Creation du faux pere du pere (pour faciliter la recursion) */ root_pere = Alloc_Noeud(); root_pere->fils[Translation_Table[FINAL]] = a->arbre; root_pere->sequence_number = 0; a->arbre->debut = 0; a->arbre->fin = 1; a->arbre->sequence_number = 0; a->arbre = root_pere; return VRAI; } /******************************************************************************/ /* creeArbreSuffixeFromFile */ /******************************************************************************/ Flag creeArbreSuffixeFromFile(Arbre *a, char *fic, int maxlongmod, char *alphaseq) { if( !chargeSequence(a, fic) || a->nbtxt < 1 ) { fprintf(stderr,"Not enough sequences (<1)\n"); return FAUX; } creeArbreSuffixe(a, maxlongmod, alphaseq); return VRAI; } /******************************************************************************/ /* creeArbreSuffixeFromArray */ /******************************************************************************/ Flag creeArbreSuffixeFromArray(Arbre *a, char **seq, int nbseq, int maxlongmod, char *alphaseq) { if( nbseq < 1 ) { fprintf(stderr,"Not enough sequences (<1)\n"); return FAUX; } a->nbtxt = nbseq; a->text = (signed char **) seq; creeArbreSuffixe(a, maxlongmod, alphaseq); return VRAI; } /******************************************************************************/ /* libereArbreSuffixeFromFile */ /******************************************************************************/ void libereArbreSuffixeFromFile(Arbre a) { int i; libereArbreSuffixeFromArray(a); for(i=0; i!=a.nbtxt; i++) free(a.text[i]); free(a.text); } /******************************************************************************/ /* libereArbreSuffixeFromArray */ /******************************************************************************/ void libereArbreSuffixeFromArray(Arbre a) { Free_Arbre(a.arbre); Free_All_Liste_Cell(); Free_ListePositions(Liste_positions_fin); } /******************************************************************************/ /******************************************************************************/ /********************************** MAIN **************************************/ /******************************************************************************/ /******************************************************************************/ /* int main(int argc, char **argv) { Criteres criteres, cr; FILE *f=NULL; unsigned int nbocc, nboccex; Arbre a,b; initCriteres(&criteres); initCriteres(&cr); setBloc(&criteres,2); setLongueurBloc(&criteres,0,8); setLongueurBloc(&criteres,1,4); setErreurGlobal(&criteres,1); setErreurBloc(&criteres,0,0); setErreurBloc(&criteres,1,1); setSaut(&criteres,0,8,10); if(valideCriteres(&criteres) == FAUX) return 1; setBloc(&cr,3); setLongueurBloc(&cr,0,3); setLongueurBloc(&cr,1,3); setLongueurBloc(&cr,2,3); setErreurGlobal(&cr,1); setErreurBloc(&cr,0,1); setErreurBloc(&cr,1,1); setErreurBloc(&cr,2,1); setSaut(&cr,0,2,5); setSaut(&cr,1,0,3); if(valideCriteres(&cr) == FAUX) return 1; f = fopen("out","w"); printf("Je construit l'arbre de taille %d\n",maxLongMod(cr)); creeArbreSuffixeFromFile(&b, "seq", maxLongMod(cr)); chercheMot(b,&cr, "AA_CA_AA",&nbocc, &nboccex, f); printf("%d %d!\n",nbocc, nboccex); libereArbreSuffixe(b); creeArbreSuffixe(&a, "ficseq",maxLongMod(criteres)); chercheMot(a,&criteres, "GCGACATA_GATG",&nbocc, &nboccex, f); printf("%d %d!\n",nbocc, nboccex); libereArbreSuffixe(a); return(0); } */ SMILEv1.47/SigStat/grep+/Spell/src/alphabet.c0000644002404200237300000003171410066544223020316 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include /******************************************************************************/ /* VARIABLES GLOBALES */ /******************************************************************************/ Flag TabSymb[MAXSYMBMOD][MAXSYMBMOD]; /* Table d'equivalences */ /* char *alphMod[MAXSYMBMOD]={0}; Alphabet des modeles */ Symbole alphSeq[MAXSYMBMOD]={0}; /* Alphabet des sequences */ int carseq2num[MAXSYMBMOD]; /* Conversion caractere => indice ds alphabet */ char *nummod2str[MAXSYMBMOD]; /* Conversion num modeles => symboles */ int nbSymbMod = 0, /* Nb de symboles de l'alphabet de modeles*/ nbSymbSeq = 0, /* idem sequences */ numSAUT = -1, /* code utilise pour SAUT ds nummod2str */ numJOKER = -1; /* code utilise pour JOKER ds nummod2str */ enum {DNA, PROTEINS, UNKNOWN} type; /* Type de la sequence lue */ /******************************************************************************/ /* initAlphabet: */ /* initialise les variables globales de la classe. */ /******************************************************************************/ void initAlphabet(void) { int i, j; for(i=0; i!=MAXSYMBMOD; i++) { carseq2num[i] = -1; for(j=0; j!=MAXSYMBMOD; j++) TabSymb[i][j] = 0; } alphSeq[0] = '\0'; } /******************************************************************************/ /* chargeAlphaSeq: */ /* chargement de l'alphabet du texte passe en parametre */ /******************************************************************************/ Symbole * chargeAlphaSeq(Symbole **seq, NbSeq nbseq, char *alphaseq) { int i; Symbole *j; char tmp[MAXSYMBMOD]={0}, s; if(alphaseq!=NULL) { strcpy(alphSeq, alphaseq); nbSymbSeq = strlen(alphSeq)-1; for(i=0; i!=nbSymbSeq; i++) carseq2num[(int) *(alphSeq+i)] = i; carseq2num[(int) FINAL] = nbSymbSeq; return alphSeq; } /* PARCOURS DU TEXTE pour recherche de l'alphabet utilise. */ for(i=0; i!=nbseq; i++) { for(j=seq[i]; (*j)!=FINAL; j++) { s = *j; if(s<32 || s>=MAXSYMBMOD) { fprintf(stderr, ">> Error: Seq %d Pos %d control character %d ('%c') forbidden\n", i, (int)(j-seq[i]), s, s); exit(1); } else if(s == JOKER || s == SAUT) { fprintf(stderr, ">> Error: Seq %d Pos %d character %d ('%c') forbidden in sequences\n", i, (int)(j-seq[i]), s, s); exit(1); } else if(!isalnum((int)s)) fprintf(stderr, "> Warning: Seq %d Pos %d non-alphanumeric character '%c'\n", i, (int)(j-seq[i]), s); else if(islower((int) s)) /* Mise en majuscules de la sequence */ { s = (Symbole) toupper((int) s); *j = s; } tmp[(int) s] = 1; } } /* Construction de la chaine a fournir pour construire l'arbre */ for(i=32, nbSymbSeq = 0; i!=MAXSYMBMOD; i++) { if(tmp[i]==1) { alphSeq[nbSymbSeq] = i; carseq2num[i] = nbSymbSeq; nbSymbSeq++; } } carseq2num[(int) FINAL] = nbSymbSeq; alphSeq[nbSymbSeq] = FINAL; alphSeq[nbSymbSeq+1] = '\0'; printf("** Text alphabet: %s (%d symbols + terminator) **\n", alphSeq, nbSymbSeq); return alphSeq; } /******************************************************************************/ /* chargeAlphaMod: */ /* lecture du fichier alphabet et construction de la matrice d'equivalence. */ /******************************************************************************/ void chargeAlphaMod(FILE *f) { int i, k; Symbole *j; char s, line[512]; /* LECTURE DU FICHIER ALPHABET */ fgets(line, 512, f); /* Determination du type d'alphabet */ if(strstr(line, "Nucleotide")) { type = DNA; fprintf(stderr, "** Models alphabet: Nucleotides **\n"); } else if(strstr(line, "Protein")) { type = PROTEINS; fprintf(stderr, "** Models alphabet: Amino acids **\n"); } else { type = UNKNOWN; fprintf(stderr, "** Models alphabet: Unknown type **\n"); } /* Lecture des lignes de l'alphabet des modeles */ nbSymbMod = 0; while(fgets(line, 512, f)) { j = (Symbole *) line; if(*j == '\n') continue; /* alphMod[nbSymbMod] = (char *) malloc((strlen(line)+1)*sizeof(char)); */ /* strcpy(alphMod[nbSymbMod], line); */ while(*j != '\0' && *j != '\n') { s = *j; if(s<=32 || s>=MAXSYMBMOD) { fprintf(stderr, ">> Error: controle character '%c' forbidden in alphabet file\n", s); exit(1); } else if(s == JOKER) { if(j!=(Symbole *)line || (*(j+1)!='\0' && *(j+1)!='\n')) { fprintf(stderr, ">> Error: JOKER character '%c' must be alone\n", JOKER); exit(1); } if(numJOKER != -1) fatalError("JOKER defined 2 times in alphabet file\n"); numJOKER = nbSymbMod; for(k=0; k!=nbSymbSeq; k++) TabSymb[numJOKER][k] = 1; } else if(s == FINAL || s == SAUT) { fprintf(stderr, ">> Error: character %d ('%c') forbidden in alphabet file\n", s, s); exit(1); } else if(!isalnum((int)s)) { fprintf(stderr, "Warning: non-alphanumeric charactere '%c' in alphabet file\n", s); TabSymb[nbSymbMod][carseq2num[(int)s]] = 1; } else { if(islower((int) s)) *j = s = (Symbole) toupper((int) s); TabSymb[nbSymbMod][carseq2num[(int)s]] = 1; } j++; } if(*j == '\n') *j = '\0'; if(!(nummod2str[nbSymbMod] = (char *) malloc((strlen(line)+4)*sizeof(char)))) fatalError("chargeAlphabet: cannot allocate 'nummod2str[i]'\n"); strcpy(nummod2str[nbSymbMod], line); nbSymbMod++; } /* Ajout des symboles speciaux dans nummod2str */ numSAUT = nbSymbMod; if(!(nummod2str[numSAUT] = (char *) malloc(2*sizeof(char)))) fatalError("chargeAlphabet: cannot allocate 'nummod2str[i]'\n"); nummod2str[nbSymbMod][0] = SAUT; nummod2str[nbSymbMod][1] = '\0'; /* Affiche l'alphabet des modeles */ /* for(i=0; i!=nbSymbMod; i++) */ /* { */ /* printf("SymbMod %d\t%s\n",i,nummod2str[i]); */ /* } */ /* Info sur l'alphabet du texte */ for(i=0; i!=nbSymbSeq; i++) { s = 0; for(k=0; k!=nbSymbMod; k++) s |= (char) TabSymb[k][i]; if(!s) fprintf(stderr,"> Warning: text symbol '%c' isn't recognized by any model's symbol in alphabet file.\n", alphSeq[i]); } /* transAlphMod(); */ } /******************************************************************************/ /* strshfl - teste si deux chaines sont le shuffling l'une de l'autre */ /******************************************************************************/ Flag strshfl(char * a, char * b) { char * p; if(strlen(a) != strlen(b)) return FAUX; p = a; while(*p!='\0') { if(!strchr(b, *p)) return FAUX; p++; } return VRAI; } /******************************************************************************/ /* transAlphMod */ /******************************************************************************/ /* void transAlphMod(void) */ /* { */ /* int i,j; */ /* char tmp[512], joker[2]; */ /* */ /* joker[0] = JOKER; */ /* joker[1] = '\0'; */ /* */ /* for(i=0; i!=nbSymbMod; i++) */ /* { */ /* if(type == DNA) */ /* { */ /* if( strshfl(nummod2str[i],"ARN")) */ /* { */ /* fprintf(stderr, "Symbole ARN ->> A\n"); */ /* sprintf(nummod2str[i],"A"); */ /* } */ /* else if( strshfl(nummod2str[i],"GRN")) */ /* { */ /* fprintf(stderr, "Symbole GRN ->> G\n"); */ /* sprintf(nummod2str[i],"G"); */ /* } */ /* else if( strshfl(nummod2str[i],"CYN")) */ /* { */ /* fprintf(stderr, "Symbole CYN ->> C\n"); */ /* sprintf(nummod2str[i],"C"); */ /* } */ /* else if( strshfl(nummod2str[i],"TYN")) */ /* { */ /* fprintf(stderr, "Symbole TYN ->> T\n"); */ /* sprintf(nummod2str[i],"T"); */ /* } */ /* else if(strshfl(nummod2str[i],"AG")) */ /* { */ /* fprintf(stderr, "Symbole AG ->> R\n"); */ /* sprintf(nummod2str[i],"R"); */ /* } */ /* else if (strshfl(nummod2str[i],"CT")) */ /* { */ /* fprintf(stderr, "Symbole CT ->> Y\n"); */ /* sprintf(nummod2str[i],"Y"); */ /* } */ /* else if(nummod2str[i][1]=='\0') */ /* { */ /* if(!strcmp(nummod2str[i][0],joker)) */ /* { */ /* fprintf(stderr, "Symbole '%c' ->> N\n", JOKER); */ /* sprintf(nummod2str[i],"N"); */ /* } */ /* } */ /* else */ /* { */ /* strcpy(tmp, nummod2str[i]); */ /* sprintf(nummod2str[i],"[%s]",tmp); */ /* sprintf(nummod2str[i],"%s",tmp); */ /* } */ /* } */ /* else if(type == PROTEINS) */ /* { */ /* if(!strcmp(nummod2str[i],joker)) */ /* { */ /* fprintf(stderr, "Symbole '%c' ->> X\n", JOKER); */ /* sprintf(nummod2str[i],"X"); */ /* } */ /* else */ /* { */ /* strcpy(tmp, nummod2str[i]); */ /* sprintf(nummod2str[i],"[%s]",tmp); */ /* sprintf(nummod2str[i],"%s",tmp); */ /* } */ /* } */ /* else */ /* { */ /* if(strcmp(nummod2str[i],joker)) */ /* { */ /* strcpy(tmp, nummod2str[i]); */ /* sprintf(nummod2str[i],"[%s]",tmp); */ /* sprintf(nummod2str[i],"%s",tmp); */ /* } */ /* } */ /* } */ /* for(i=0; i!=nbSymbMod; i++) */ /* { */ /* for(j=0;j<=nbSymbSeq;j++) */ /* if(TabSymb[i][j]) */ /* printf("Je fais matcher %s avec %c\n",alphMod[i],alphSeq[j]); */ /* } */ /* for(j=0;j<=nbSymbSeq;j++) */ /* if(TabSymb[numSAUT][j]) */ /* printf("Je fais matcher %s avec %c\n",alphMod[numSAUT],alphSeq[j]); */ /* for(j=0;j<=nbSymbSeq;j++) */ /* if(TabSymb[numJOKER][j]) */ /* printf("Je fais matcher %s avec %c\n",alphMod[numJOKER],alphSeq[j]); */ /* */ /* } */ SMILEv1.47/SigStat/grep+/Spell/src/barre.c0000777002404200237300000000000010066542220025115 2../../../../P_BLOCS/Spell/src/barre.custar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/Spell/src/pile_occ.c0000644002404200237300000003105310066544254020313 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #include /* FONCTIONS PRIVEES */ #if OCC int recAfficheOcc(FILE *f,Noeud * n, LongSeq l, P_Criteres cr, int codesaut); #endif extern char **text; extern int carseq2num[127]; extern int nbSymbSeq; /******************************************************************************/ /* creePileOcc */ /******************************************************************************/ P_PileOcc creePileOcc(void) { P_PileOcc p; p=(P_PileOcc)malloc(sizeof(PileOcc)); if(p==NULL) fatalError("creePileOcc: cannot allocate 'p'\n"); p->occ=(P_occ)calloc(GRAIN, sizeof(Occ)); if(p->occ==NULL) fatalError("creePileOcc: cannot allocate 'p->occ'\n"); p->carte=(unsigned int *)malloc(GRAIN_SIZMOD*sizeof(unsigned int)); if(p->carte==NULL) fatalError("creePileOcc: cannot allocate 'p->carte'\n"); p->size=GRAIN; p->size_carte=GRAIN_SIZMOD; p->pos=0; p->pos_carte=0; ajouteDummy(p); return(p); } /* MODE D'EMPLOI DES DUMMYS EN MULTIBLOC DELTA */ /* Si le parametre principal vaut VRAI c'est un dummy de separation entre * recursions. */ /* Sinon c'est un dummy de separation entre sauts. */ /* Dans les deux cas le dernier parametre indique le code d'intervalle * concerne par les occurrences qui suivent. */ /******************************************************************************/ /* ajouteDummy */ /******************************************************************************/ void ajouteDummy(P_PileOcc p) { #if DEBUG_PILE unsigned int * t=p->carte; #endif if(p->pos_carte>=p->size_carte) { p->size_carte+=GRAIN_SIZMOD; #if DEBUG_PILE printf("J'etends carte a %d\n",p->size_carte); #endif p->carte=(unsigned int *)realloc(p->carte,p->size_carte *sizeof(unsigned int)); if(p->carte==NULL) fatalError("pile_occ.c: ajouteDummy: cannot reallocate 'p->carte'"); #if DEBUG_PILE if(t!=p->carte) printf("CHANGEMENT D'emplacement memoire de la carte\n"); #endif } p->carte[p->pos_carte]=p->pos; p->pos_carte++; ajouteOcc2Pile(p, NULL, -1, -1, -1, -1, -1, -1); } /******************************************************************************/ /* getPrecDummy */ /******************************************************************************/ LongSeq getPrecDummy(P_PileOcc p) { return p->carte[p->pos_carte-1]; } /******************************************************************************/ /* ajouteInitOcc2Pile */ /******************************************************************************/ void ajouteInitOcc2Pile(P_PileOcc p, Noeud *x) { P_occ ptr; ptr=p->occ+p->pos; ptr->x=x; ptr->num=carseq2num[(int) FINAL]; ptr->lon=1; ptr->xerr=0; ptr->blocerr=0; ptr->saut=0; ptr->codesaut=0; p->pos++; } /******************************************************************************/ /* ajouteOcc2Pile */ /******************************************************************************/ void ajouteOcc2Pile(P_PileOcc p, Noeud *x, int num, LongSeq lon, LongSeq err, LongSeq blocerr, LongSeq saut, int codesaut) { P_occ ptr; #if DEBUG_PILE printf("j'ecris une nouvelle occurrence en %d\n", p->pos); #endif if(p->pos>=p->size) { #if DEBUG_PILE printf("JE RESIZE (ajoute)\n"); #endif p->size+=GRAIN; ptr = p->occ; p->occ=(P_occ)realloc(p->occ, (p->size)*sizeof(Occ)); if(p->occ==NULL) fatalError("ajouteOcc2Pile: cannot reallocate 'p->occ'\n"); #if DEBUG_PILE if(p->occ!=ptr) printf("changement d'emplacement memoire de pileocc\n"); #endif } ptr = p->occ+p->pos; ptr->x = x; ptr->num = num; ptr->lon = lon; ptr->xerr = err; ptr->blocerr = blocerr; ptr->saut = saut; ptr->codesaut = codesaut; p->pos++; } /******************************************************************************/ /* copieLastOcc */ /******************************************************************************/ int copieLastOcc(P_PileOcc dest, P_PileOcc source) { int last_dummy, nbocc; if(source->pos==0) return 0; if(source->pos_carte==0) last_dummy=-1; else last_dummy=source->carte[source->pos_carte-1]; nbocc = source->pos-last_dummy; if(nbocc==1) return 1; if(((dest->pos)+(nbocc-1))>=(dest->size)) { #if DEBUG_PILE printf("JE RESIZE (copie) posdest %d sizedest %d possource %d\n", dest->pos, dest->size, source->pos); #endif dest->size=(int)ceil(((double)(dest->pos+nbocc-1))/((double)GRAIN))*GRAIN; #if DEBUG_PILE printf("New size %d\n",dest->size); #endif dest->occ=(P_occ)realloc(dest->occ, dest->size*sizeof(Occ)); if(dest->occ==NULL) fatalError("ajouteOcc2Pile: cannot reallocate 'dest->occ'\n"); } memcpy(dest->occ+dest->pos, source->occ+last_dummy+1, (nbocc-1)*sizeof(Occ)); dest->pos+=nbocc-1; return nbocc-1; } /******************************************************************************/ /* transferePile2Pile */ /******************************************************************************/ void transferePile2Pile(P_PileOcc dest, P_PileOcc source) { #if DEBUG_PILE P_occ t=dest->occ; #endif if(source->pos==0) return; if(((dest->pos)+(source->pos))>=(dest->size)) { #if DEBUG_PILE printf("JE RESIZE (transfere) posdest %d sizedest %d possource %d\n", dest->pos, dest->size, source->pos); #endif dest->size=(int)ceil(((double)(dest->pos+source->pos))/((double)GRAIN))*GRAIN; #if DEBUG_PILE printf("New size %d\n",dest->size); #endif dest->occ=(P_occ)realloc(dest->occ, dest->size*sizeof(Occ)); if(dest->occ==NULL) fatalError("ajouteOcc2Pile: cannot reallocate 'dest->occ'\n"); #if DEBUG_PILE if(t!=dest->occ) printf("chgmnt d'empl memoire de pocc (transfere)\n"); #endif } memcpy(dest->occ+dest->pos, source->occ, source->pos*sizeof(Occ)); dest->pos+=source->pos; source->pos=0; } /******************************************************************************/ /* depileRec */ /******************************************************************************/ void depileRec(P_PileOcc p) { if(p->pos_carte==0) p->pos=0; else { p->pos_carte--; p->pos=p->carte[p->pos_carte]; } } /******************************************************************************/ /* videPile */ /******************************************************************************/ void videPile(P_PileOcc p) { p->pos=0; p->pos_carte=0; ajouteDummy(p); } /******************************************************************************/ /* liberePileOcc */ /******************************************************************************/ void liberePileOcc(P_PileOcc p) { free(p->occ); free(p->carte); free(p); } #if DEBUG_BASE /******************************************************************************/ /* affichePileOcc */ /******************************************************************************/ void affichePileOcc(P_PileOcc p) { int i; for(i=p->pos-1; i>=0; i--) if(p->occ[i].x==NULL) printf("====== DUMMY =======\n"); else printf("num %p branche %c lon %d err %d\n",p->occ[i].x, lettres[p->occ[i].num], p->occ[i].lon, p->occ[i].xerr); /* printf("num %d branche %c lon %d err %d\n",p->occ[i].x->numero, */ /* lettres[p->occ[i].num], p->occ[i].lon, p->occ[i].xerr); */ printf("--------------------------------------\n"); } #endif #if OCC /******************************************************************************/ /* afficheLastOcc */ /******************************************************************************/ /* Lance l'affichage de tous les motifs associes aux occurrences trouvees */ /******************************************************************************/ void afficheLastOcc(FILE *f, P_PileOcc pocc, LongSeq l, P_Criteres cr, unsigned int *nboccex) { int i=pocc->pos-1,nbocc=0; P_occ ptr; ptr=pocc->occ+i; while((i>0) && (ptr->x!=NULL)) { nbocc+=recAfficheOcc(f,ptr->x->fils[ptr->num], l-ptr->lon+ptr->saut, cr, ptr->codesaut); ptr--; i--; } if(f!=NULL) fprintf(f,"%d\n",nbocc); if(nboccex!=NULL) *nboccex = nbocc; } /******************************************************************************/ /* recAfficheOcc */ /******************************************************************************/ /* Parcourt l'arbre recursivement pour atteindre les feuilles et affiche */ /******************************************************************************/ int recAfficheOcc(FILE *f,Noeud * n, LongSeq l, P_Criteres cr, int codesaut) { int i,nbocc=0; /* #if DEBUG_BASE */ /* printf("J'entre dans recAffiche avec long %d\n",l); */ /* #endif */ /* Si on a atteint une feuille */ if (n->debut & LEAF_BIT) { nbocc += Print_Positions(f, (Feuille *) n, l, cr, codesaut); } else for(i=0; i!=nbSymbSeq; i++) if(n->fils[i]) { /* #if DEBUG_BASE */ /* printf("recAffiche: je passe par %c\n",lettres[i]); */ /* #endif */ nbocc += recAfficheOcc(f,n->fils[i], l+n->fin-n->debut, cr, codesaut); } return nbocc; } /******************************************************************************/ /* affOcc */ /******************************************************************************/ int afficheOcc(FILE *f, P_occ o, LongSeq longmod, P_Criteres cr) { if(o->x != NULL) return(recAfficheOcc(f,o->x->fils[o->num], longmod-o->lon+o->saut, cr, o->codesaut)); return 0; } #endif #if DEBUG_BASE /******************************************************************************/ /* afficheOldOcc */ /******************************************************************************/ /* Affiche les occurrences d'un niveau -n dans la pile */ /******************************************************************************/ void afficheOldOcc(P_PileOcc p, LongSeq l) { int pos=p->pos-1; P_occ tmpocc=p->occ+pos; printf("=======HAUT=PILE=========\n"); while(pos >=0 && tmpocc != NULL && tmpocc->lon >= 0) { printf("*** num %d lon %d saut %d codesaut %d lmod %d\n",tmpocc->num, tmpocc->lon,tmpocc->saut,tmpocc->codesaut, l); afficheOcc(stdout,tmpocc,l,0); tmpocc--; pos--; } printf("=*=*=*=*=DUMMY=*=*=*=*=*=\n"); } #endif SMILEv1.47/SigStat/grep+/Spell/include/0000755002404200237300000000000010066544123017217 5ustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/Spell/include/occ.h0000777002404200237300000000000010066542220025741 2../../../../P_BLOCS/Spell/include/occ.hustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/Spell/include/criteres.h0000644002404200237300000000745210066544102021215 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _CRITERES_H #define _CRITERES_H #include #include #include #include "global.h" /******************************************************************************/ /* STRUCTURE DE STOCKAGE DES CRITERES DE RECHERCHE */ /******************************************************************************/ typedef struct struct_fourchette { LongSeq min; LongSeq max; } Fourchette, *P_Fourchette; /******************************************************************************/ /* Seuls sont necessaires pour la recherche: maxerr, bloc, maxerrbloc, */ /* longbloc.max, saut, multiblocs, code2Sauts. Le reste est superflu */ /* (utilise par sigstat ou converter) et n'a donc pas de methodes get/set. */ /******************************************************************************/ typedef struct struct_criteres { Fourchette *longbloc; Fourchette *saut; LongSeq *maxerrblocs; int **code2Sauts; char alphaseq[128]; char ficalph[128]; long int nbsymb; Fourchette longmod; LongSeq maxerr; NbSeq nbtotseq; NbBlocs bloc; Flag multiblocs; } Criteres, *P_Criteres; /******************************************************************************/ /* FONCTIONS PUBLIQUES */ /******************************************************************************/ Flag setBloc(P_Criteres, NbBlocs); Flag setLongueurBloc(P_Criteres, NbBlocs num_bloc, LongSeq lon); Flag setErreurBloc(P_Criteres, NbBlocs num_bloc, LongSeq erreur); void setErreurGlobal(P_Criteres, LongSeq erreur); Flag setSaut(P_Criteres, NbBlocs num_bloc, LongSeq min, LongSeq max); NbBlocs getBloc(Criteres); LongSeq getLongueurBloc(Criteres,NbBlocs num_bloc); LongSeq getErreurBloc(Criteres, NbBlocs num_bloc); LongSeq getErreurGlobale(Criteres); LongSeq getJoker(Criteres); Fourchette getSaut(Criteres,NbBlocs num_bloc); int maxLongMod(Criteres); void afficheCriteres(Criteres, FILE *); Flag verifCriteres(Criteres); void initCriteres(Criteres *); Flag chargeCriteres(Criteres *, char *); int addSaut2Code(int oldcode, LongSeq saut, LongSeq curbloc, P_Criteres cr); void initTabSauts(P_Criteres); Flag allocBloc(P_Criteres cr, NbBlocs bloc); #endif SMILEv1.47/SigStat/grep+/Spell/include/global.h0000644002404200237300000001002510066544105020626 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _GLOBAL_H #define _GLOBAL_H #include #include #include #include #include #include #include #define FAUX 0 #define VRAI 1 typedef char Flag; /******************************************************************************/ /* Flags */ /******************************************************************************/ /* DEBUGGING */ #define DEBUG_BASE 0 /* Debug base */ #define DEBUG_BT 0 /* Tableaux de bits */ #define DEBUG_SAUT 0 /* Procedures de saut */ #define DEBUG_PILE 0 /* Pile d'occurrences */ #define DEBUG_TREE 0 /* Arbre suffixe : HS bicoz Julien */ /******************************************************************************/ /* Define dependants du jeu de donnees */ /******************************************************************************/ /* Grain d'allocation de la taille du modele */ #define GRAIN_SIZMOD 1000 /******************************************************************************/ /* Caracteres speciaux */ /******************************************************************************/ /* => dans symb.h */ /******************************************************************************/ /* Types */ /******************************************************************************/ /* Nombre de sequences */ #define NbSeq int /* Longueur de sequence */ #define LongSeq int /* Nombre de blocs */ #define NbBlocs signed char /******************************************************************************/ /* Active DEBUG_BASE si l'un des DEBUGs est active */ /******************************************************************************/ #if DEBUG_BT || DEBUG_SAUT || DEBUG_PILE || DEBUG_TREE #undef DEBUG_BASE #define DEBUG_BASE 1 #endif /******************************************************************************/ /* Fonctions basiques */ /******************************************************************************/ void fatalError(char *msg); int entiers(char); void entree(void); void initEntiers(void); #endif SMILEv1.47/SigStat/grep+/Spell/include/model.h0000777002404200237300000000000010066542220026631 2../../../../P_BLOCS/Spell/include/model.hustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/Spell/include/grep+.h0000644002404200237300000000557510066544111020411 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _SPELL_H #define _SPELL_H #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /*#include */ #include /* Grain d'allocation des sequences Fasta */ #define GRAINSEQ 500 typedef struct struct_arbre { Noeud *arbre; signed char **text; int nbtxt; } Arbre; /******************************************************************************/ /* PROTOTYPES PUBLICS */ /******************************************************************************/ /* explore les modeles */ void chercheMot ( Arbre a, P_Criteres cr, signed char *mot, unsigned int *nbocc, unsigned int *nboccex, FILE *f); /* creation de l'arbre */ Flag creeArbreSuffixeFromFile(Arbre *, char *fic, int maxlongmod, char *alphaseq); Flag creeArbreSuffixeFromArray(Arbre *, char **seq, int nbseq, int maxlongmod, char *alphaseq); void libereArbreSuffixeFromFile(Arbre); void libereArbreSuffixeFromArray(Arbre); /* Initialise la recherche */ Flag valideCriteres(P_Criteres cr); #endif SMILEv1.47/SigStat/grep+/Spell/include/symb.h0000777002404200237300000000000010066542220026355 2../../../../P_BLOCS/Spell/include/symb.hustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/Spell/include/alphabet.h0000644002404200237300000000550710066544075021165 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ /******************************************************************************/ /* ALPHABET - Gestion des alphabets de SMILEv1.4 */ /******************************************************************************/ #ifndef _ALPHABET #define _ALPHABET #include #include #include #include #include #define equiv(i,j) TabSymb[i][j] #define MAXSYMBMOD 127 /******************************************************************************/ /* TYPES ABSTRAITS */ /******************************************************************************/ typedef unsigned char Symbole; /******************************************************************************/ /* STRUCTURES */ /******************************************************************************/ /******************************************************************************/ /* PROTOTYPES */ /******************************************************************************/ void initAlphabet(void); Symbole * chargeAlphaSeq(Symbole **seq, NbSeq nbseq, char *alphaseq); void chargeAlphaMod(FILE *f); int str2nummod(char *str); /* void transAlphMod(void); */ #endif SMILEv1.47/SigStat/grep+/Spell/include/barre.h0000777002404200237300000000000010066542220026617 2../../../../P_BLOCS/Spell/include/barre.hustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/Spell/include/pile_occ.h0000644002404200237300000000573010066544117021155 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef _PILE_OCC_H #define _PILE_OCC_H #include #include #include #include #include #include #include /* Grain du tableau des occurrences */ #define GRAIN 2000 typedef struct struct_pile_occ { Occ *occ; /* Carte des positions des 'dummy' */ unsigned int *carte; /* Position courante dans carte */ unsigned int pos_carte; unsigned int size_carte; unsigned int size; int pos; } PileOcc, *P_PileOcc; /******************************************************************************/ /* FONCTIONS PUBLIQUES */ /******************************************************************************/ P_PileOcc creePileOcc(void); void ajouteDummy(P_PileOcc); LongSeq getPrecDummy(P_PileOcc); void ajouteInitOcc2Pile(P_PileOcc, Noeud *); void ajouteOcc2Pile(P_PileOcc, Noeud *, int,LongSeq,LongSeq ,LongSeq, LongSeq, int); void transferePile2Pile(P_PileOcc, P_PileOcc); int copieLastOcc(P_PileOcc, P_PileOcc); void depileRec(P_PileOcc); void videPile(P_PileOcc); void liberePileOcc(P_PileOcc); #if OCC void afficheLastOcc(FILE *f, P_PileOcc, LongSeq l, P_Criteres cr, unsigned int *nboccex); #endif #if DEBUG_BASE int afficheOcc(FILE *f, P_occ o, LongSeq longmod, P_Criteres cr); void afficheOldOcc(P_PileOcc p, LongSeq l); void affichePileOcc(P_PileOcc); #endif #endif SMILEv1.47/SigStat/grep+/Makefile.bak0000644002404200237300000001145310066542220016711 0ustar lamaaoc00000000000000############################################################################### #Ne pas modifier############################################################### NB_OCCS=1 AFF_OCCS=1 #Compilateur : CC=gcc OPT=-ansi -Wall -O3 #OPT=-g #definition des repertoires des arbres: INCL_DIR=include/ OBJ_DIR=obj/ SRC_DIR=src/ LIB_DIR=lib/ BIN_DIR=bin/ # definition de repertoires de grep+ : SPELL_INCL_DIR=Spell/include/ SPELL_SRC_DIR=Spell/src/ SPELL_OBJ_DIR=Spell/obj/ # Options de compilation : C_FLAG= $(OPT) -I$(INCL_DIR) -I$(SPELL_INCL_DIR) O_FLAG= -c $(OPT) -I$(INCL_DIR) -I$(SPELL_INCL_DIR) -DOCC=$(NB_OCCS) -DAFF_OCC=$(AFF_OCCS) LIB_FLAG= -lm # Nom du programme , de l'archive PROG_NAME=grep+ ARCHIVE_NAME=grep+.tgz #Objet ... TREE_OBJ=$(OBJ_DIR)global_fonctions.o $(OBJ_DIR)construction.o $(OBJ_DIR)liste_pos.o $(OBJ_DIR)allocateurs.o $(OBJ_DIR)bit_tab.o $(OBJ_DIR)libfasta.o $(OBJ_DIR)global_variables.o SPELL_OBJ=$(SPELL_OBJ_DIR)global.o $(SPELL_OBJ_DIR)pile_occ.o $(SPELL_OBJ_DIR)grep+.o $(SPELL_OBJ_DIR)occ.o $(SPELL_OBJ_DIR)criteres.o $(SPELL_OBJ_DIR)barre.o $(SPELL_OBJ_DIR)alphabet.o $(SPELL_OBJ_DIR)model.o SPELL_OBJ_DEB=$(SPELL_OBJ_DIR)global.o $(SPELL_OBJ_DIR)pile_occ.o $(SPELL_OBJ_DIR)grep+.o $(SPELL_OBJ_DIR)occ.o $(SPELL_OBJ_DIR)criteres.o $(SPELL_OBJ_DIR)barre.o $(SPELL_OBJ_DIR)main.o $(SPELL_OBJ_DIR)alphabet.o $(SPELL_OBJ_DIR)model.o OBJ_FILE=$(TREE_OBJ) $(SPELL_OBJ) OBJ_DEB_FILE=$(SPELL_OBJ_DEB) $(TREE_OBJ) all: $(BIN_DIR)$(PROG_NAME) @echo ALL!!! debug: $(OBJ_DEB_FILE) $(CC) $(C_FLAG) $(OBJ_DEB_FILE) -o $(BIN_DIR)$(PROG_NAME) $(LIB_FLAG) $(BIN_DIR)$(PROG_NAME): obj $(CC) $(C_FLAG) $(OBJ_FILE) -o $(BIN_DIR)$(PROG_NAME) $(LIB_FLAG) obj: $(OBJ_FILE) $(OBJ_DIR)sub_suffix_tree.o : $(SRC_DIR)sub_suffix_tree.c $(CC) $(SRC_DIR)sub_suffix_tree.c -o $(OBJ_DIR)sub_suffix_tree.o $(O_FLAG) $(OBJ_DIR)global_fonctions.o : $(INCL_DIR)global_fonctions.h $(SRC_DIR)global_fonctions.c $(CC) $(SRC_DIR)global_fonctions.c -o $(OBJ_DIR)global_fonctions.o $(O_FLAG) $(OBJ_DIR)construction.o : $(INCL_DIR)construction.h $(SRC_DIR)construction.c $(CC) $(SRC_DIR)construction.c -o $(OBJ_DIR)construction.o $(O_FLAG) $(OBJ_DIR)liste_pos.o : $(INCL_DIR)liste_pos.h $(SRC_DIR)liste_pos.c $(CC) $(SRC_DIR)liste_pos.c -o $(OBJ_DIR)liste_pos.o $(O_FLAG) $(OBJ_DIR)liste_pos2.o : $(INCL_DIR)liste_pos.h $(SRC_DIR)liste_pos2.c $(CC) $(SRC_DIR)liste_pos2.c -o $(OBJ_DIR)liste_pos2.o $(O_FLAG) $(OBJ_DIR)allocateurs.o : $(INCL_DIR)allocateurs.h $(SRC_DIR)allocateurs.c $(CC) $(SRC_DIR)allocateurs.c -o $(OBJ_DIR)allocateurs.o $(O_FLAG) $(OBJ_DIR)bit_tab.o : $(INCL_DIR)bit_tab.h $(SRC_DIR)bit_tab.c $(CC) $(SRC_DIR)bit_tab.c -o $(OBJ_DIR)bit_tab.o $(O_FLAG) $(OBJ_DIR)bit_tab2.o : $(INCL_DIR)bit_tab.h $(SRC_DIR)bit_tab2.c $(CC) $(SRC_DIR)bit_tab2.c -o $(OBJ_DIR)bit_tab2.o $(O_FLAG) $(OBJ_DIR)libfasta.o : $(INCL_DIR)libfasta.h $(SRC_DIR)libfasta.c $(CC) $(SRC_DIR)libfasta.c -o $(OBJ_DIR)libfasta.o $(O_FLAG) $(OBJ_DIR)global_variables.o : $(SRC_DIR)global_variables.c $(CC) $(SRC_DIR)global_variables.c -o $(OBJ_DIR)global_variables.o $(O_FLAG) #Dependance pour le programme spell : $(SPELL_OBJ_DIR)main.o : $(SPELL_INCL_DIR)global.h $(SPELL_SRC_DIR)main.c $(CC) $(SPELL_SRC_DIR)main.c -o $(SPELL_OBJ_DIR)main.o $(O_FLAG) $(SPELL_OBJ_DIR)global.o : $(SPELL_INCL_DIR)global.h $(SPELL_SRC_DIR)global.c $(CC) $(SPELL_SRC_DIR)global.c -o $(SPELL_OBJ_DIR)global.o $(O_FLAG) $(SPELL_OBJ_DIR)criteres.o : $(SPELL_INCL_DIR)criteres.h $(SPELL_SRC_DIR)criteres.c $(SPELL_INCL_DIR)global.h $(CC) $(SPELL_SRC_DIR)criteres.c -o $(SPELL_OBJ_DIR)criteres.o $(O_FLAG) $(SPELL_OBJ_DIR)pile_occ.o : $(SPELL_INCL_DIR)pile_occ.h $(SPELL_SRC_DIR)pile_occ.c $(SPELL_INCL_DIR)global.h $(CC) $(SPELL_SRC_DIR)pile_occ.c -o $(SPELL_OBJ_DIR)pile_occ.o $(O_FLAG) $(SPELL_OBJ_DIR)grep+.o : $(SPELL_INCL_DIR)grep+.h $(SPELL_SRC_DIR)grep+.c $(SPELL_INCL_DIR)global.h $(CC) $(SPELL_SRC_DIR)grep+.c -o $(SPELL_OBJ_DIR)grep+.o $(O_FLAG) $(SPELL_OBJ_DIR)occ.o : $(SPELL_INCL_DIR)occ.h $(CC) $(SPELL_SRC_DIR)occ.c -o $(SPELL_OBJ_DIR)occ.o $(O_FLAG) $(SPELL_OBJ_DIR)barre.o : $(SPELL_INCL_DIR)barre.h $(CC) $(SPELL_SRC_DIR)barre.c -o $(SPELL_OBJ_DIR)barre.o $(O_FLAG) $(SPELL_OBJ_DIR)alphabet.o : $(SPELL_INCL_DIR)alphabet.h $(CC) $(SPELL_SRC_DIR)alphabet.c -o $(SPELL_OBJ_DIR)alphabet.o $(O_FLAG) $(SPELL_OBJ_DIR)model.o : $(SPELL_INCL_DIR)model.h $(CC) $(SPELL_SRC_DIR)model.c -o $(SPELL_OBJ_DIR)model.o $(O_FLAG) clean: clean_emacs clean_obj @echo CLEAN OK!!! clean_emacs: find . -name "*~" -exec rm -f {} \; clean_obj: rm -f $(OBJ_DIR)*o $(SPELL_OBJ_DIR)*o clean_arch: rm ./$(ARCHIVE_NAME) tgz: clean tar -zcvf $(ARCHIVE_NAME) ./* lib: $(TREE_OBJ) $(SPELL_OBJ) gcc -shared $(OBJ_FILE) $(LIB_FLAG) -o lib$(PROG_NAME).so depend: makedepend -o.o $(SPELL_INCL_DIR)*.h $(INCL_DIR)*.h $(SPELL_SRC_DIR)*.c $(SRC_DIR)*.c -I$(INCL_DIR) -I$(SPELL_INCL_DIR) SMILEv1.47/SigStat/grep+/include/0000755002404200237300000000000010217767153016150 5ustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/include/global_fonctions.h0000777002404200237300000000000010066542220030724 2../../../P_BLOCS/include/global_fonctions.hustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/include/allocateurs.h0000777002404200237300000000000010066542220026714 2../../../P_BLOCS/include/allocateurs.hustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/include/structures.h0000777002404200237300000000000010066542220026526 2../../../P_BLOCS/include/structures.hustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/include/libsysk.h0000777002404200237300000000000010066542220025220 2../../../P_BLOCS/include/libsysk.hustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/include/Gtypes.h0000777002404200237300000000000010066542220024606 2../../../P_BLOCS/include/Gtypes.hustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/include/liste_pos.h0000777002404200237300000000000010066542220026062 2../../../P_BLOCS/include/liste_pos.hustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/include/define.h0000777002404200237300000000000010066542220024544 2../../../P_BLOCS/include/define.hustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/include/construction.h0000777002404200237300000000000010066542220027344 2../../../P_BLOCS/include/construction.hustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/include/struct_tab.h0000777002404200237300000000000010066542220026404 2../../../P_BLOCS/include/struct_tab.hustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/include/bit_tab.h0000777002404200237300000000000010066542220025070 2../../../P_BLOCS/include/bit_tab.hustar lamaaoc00000000000000SMILEv1.47/SigStat/grep+/include/libfasta.h0000777002404200237300000000000010066542220025432 2../../../P_BLOCS/include/libfasta.hustar lamaaoc00000000000000SMILEv1.47/SigStat/include/0000755002404200237300000000000010142176705015132 5ustar lamaaoc00000000000000SMILEv1.47/SigStat/include/io.h0000644002404200237300000000442510066544331015716 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ /******************************************************************************/ /* IO.H */ /******************************************************************************/ #ifndef _IO_H #define _IO_H #include #include #include #include #include #include #include #include #define BUF 4096 /******************************************************************************/ /* PROTOTYPES */ /******************************************************************************/ Mot* lectureFichierRes(FILE *res, int nbseq, int *nbmodeles, int *maxlongmod); FILE *openFile(char *nom, char *mode); void printCpuTime(FILE * f); #endif SMILEv1.47/SigStat/include/mot.h0000644002404200237300000000463110066544335016111 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef MOT_H #define MOT_H /******************************************************************************/ /* STRUCTURES */ /******************************************************************************/ typedef struct struct_mot { char *mot; char *codes; float quorum_reel; float khi2; float moyenne_shuffle; /* sigstat */ float zscore; /* sigstat */ float sigma; /* sigstat */ unsigned int nbseq_vrai; unsigned int nbseq_faux; /* faux */ #if OCC unsigned int nboccex_vrai; unsigned int nboccex_faux; /* faux */ float moyenne_shuffle_occ; /* sigstat */ float khi2_occ; float zscore_occ; /* sigstat */ float sigma_occ; /* sigstat */ char sign_occ; /* faux */ #endif char sign; /* faux */ } Mot; #endif SMILEv1.47/SigStat/include/faux.h0000644002404200237300000000340210066544326016250 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef FAUX_H #define FAUX_H #include #include #include #include #include #include #include #include #include #include #include #define CARRE(x) ((x)*(x)) #define WORD (8*sizeof(int)) #define DEBUG 0 #endif SMILEv1.47/SigStat/include/symb.h0000777002404200237300000000000010066542220023640 2../../P_BLOCS/Spell/include/symb.hustar lamaaoc00000000000000SMILEv1.47/SigStat/include/shufflet.h0000644002404200237300000001040310142176654017124 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ /* (C) by "coward" from 1996-1999 */ /* modified and extended by Laurent Marsan 1999-2004. */ #ifndef SHUFFLET_H #define SHUFFLET_H #include #include #define FINAL '$' #define MAXORDER 10 /* max k-let order (length) */ #define MAXALPHA 27 /* max alphabet size */ #define MAXNKLETS 8000 /* max number of k-let combinations */ #define NAMELEN 64 /* max length of i.e. filenames and sequence names */ #define LINE 60 /* line length for sequence output */ #define NONAME "(unnamed)" /* assigned to unnamed sequences */ #define DLOGNAME "dlog" /* name of debug log file */ #define ERRFILENAME "errout" /* file name for redirecting stderr */ #define SEEDFILENAME "seed" /* name of seed file */ #define GRAINSEQ 500 #define GRAINLENSEQ 2000 /* max sequence length */ #define randomint(N) ((int)((N)*unif01())+1) /* random integer 1..N */ extern char wdir[NAMELEN]; /* directory to write to */ extern int debugflag; /* 1: write debug file */ extern FILE *dlog; /* debug log */ extern char alpha[128]; /* Alphabet des sequences */ /* Alphabet des sequences */ /* shufflet.c */ void generatename(char seqname[], int n, int nseq, char shfseqname[]); int readseq(int k, FILE * infile, int **nver, int ***count, int ***vdeg, int **first, int **last, char ***seqstart, int **seqlen, int *maxsizeseq, int maxseqlenalloc, char *alphaseq, char ***origseq); void generateseq(int k,int *nver,int **count, int **vdeg, int *first, int *last, char **seqstart, int nbseq, int *seqlen, int *count1, int *vdeg1, int nklets, int nk1lets, int *lastedge, char **seq); void Error(int code, char message[]); void Warning(char message[]); /* euler.c */ void indexseq(signed char seq[], int seqlen, int letter[128]); void kletcount(signed char seq[], int seqlen, int k, int m, int count[]); int kletverify(signed char seq[], int seqlen, int k, int m, int count0[], int count1[]); int edgecount(int k, int m, int last, int count[], int vdeg[]); void kletoutput(FILE *fp, int k, int m, int count[]); char *hash2str(int hash, int k, int m, char klet[]); int ind2hash(char klet[], int k, int m); void shuffle(int m, int k, int nver, int count[], int vdeg[], int first, int last, int lastedge[], char seq[]); void monoshuffle(int seqlen, char seq[]); void arborescence(int m, int nk1lets, int nver, int count[], int vdeg[], int first, int root, int branch[]); void randomtrail(int m, int k, int count[], int vdeg[], int first, int lastedge[], char seq[]); /* seqio.c */ int readnextseq(FILE *fp, char seqname[21], char **seq, int * maxseqlenalloc, char errmsg[80]); void seqoutput(char seq[], int seqlen, int linelen); /* random.c */ double unif01(void); unsigned int readseed(char filename[]); int writeseed(char filename[]); #endif SMILEv1.47/SigStat/include/sigstat.h0000644002404200237300000000343710066544344016773 0ustar lamaaoc00000000000000/******************************************************************************/ /* SMILE v1.47 - Extraction of structured motifs common to several sequences */ /* Copyright (C) 2004 L.Marsan (lama -AT- prism.uvsq.fr) */ /* */ /* This program is free software; you can redistribute it and/or */ /* modify it under the terms of the GNU General Public License */ /* as published by the Free Software Foundation; either version 2 */ /* of the License, or (at your option) any later version. */ /* */ /* This program is distributed in the hope that it will be useful, */ /* but WITHOUT ANY WARRANTY; without even the implied warranty of */ /* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the */ /* GNU General Public License for more details. */ /* */ /* You should have received a copy of the GNU General Public License */ /* along with this program; if not, write to the Free Software */ /* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ /******************************************************************************/ #ifndef SIGSTAT_H #define SIGSTAT_H #include #include #include #include #include #include #include #include #include #include #include #include #define CARRE(x) ((x)*(x)) #define WORD (8*sizeof(int)) #define DEBUG 0 #endif SMILEv1.47/LISEZ_MOI0000644002404200237300000000126510066545702013454 0ustar lamaaoc00000000000000SMILE v1.47 ** Instructions de compilation: (beta-version, pas de configure!) Taper 'make' dans le repertoire courant. ** Instructions d'execution: Quelques explications succintes et exemples sont dans le repertoire Lanceur, qui contient le principal programme: reader (script Perl). Lire le README de ce repertoire. ** Ameliorations par rapport a la version precedente: Les parametres de l'inference sont verifies plus serieusement, de facon a eviter les requetes incoherentes. ** Bugs fixes Plusieurs bugs ont ete fixes. ** A FAIRE Beaucoup de choses... Plus tard... Au moindre bug ou probleme d'utilisation, n'hesitez pas a me contacter: lama -AT- prism.uvsq.fr A bientot, Laurent SMILEv1.47/COPYING.lgpl0000644002404200237300000006347410217766630014144 0ustar lamaaoc00000000000000 GNU LESSER GENERAL PUBLIC LICENSE Version 2.1, February 1999 Copyright (C) 1991, 1999 Free Software Foundation, Inc. 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. [This is the first released version of the Lesser GPL. It also counts as the successor of the GNU Library Public License, version 2, hence the version number 2.1.] Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. GNU LESSER GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) "Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. 1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) The modified work must itself be a software library. b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. (For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. This option is useful when you wish to copy part of the code of the Library into a program that is not a library. 4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. 5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. 6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. 7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. 8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. 10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. 11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. 14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Libraries If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License). To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with this library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Also add information on how to contact you by electronic and paper mail. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the library `Frob' (a library for tweaking knobs) written by James Random Hacker. , 1 April 1990 Ty Coon, President of Vice That's all there is to it! SMILEv1.47/COPYING0000644002404200237300000000036010217767251013170 0ustar lamaaoc00000000000000All the source files of this package are distributed according to the GPL License (see COPYING.gpl), excepted libfasta.h libsysk.h Gtypes.h libfasta.c which are distributed according to the LGPL License (see COPYING.lgpl). SMILEv1.47/READ_ME0000644002404200237300000000134210114605331013137 0ustar lamaaoc00000000000000SMILE v1.47 SMILE is distributed under a General Public License. You'll find more details about that in the COPYING file. ** COMPILATION INSTRUCTIONS (beta-version, no 'configure'!) Type 'make' in the current directory. ** EXECUTION INSTRUCTIONS There's no real documentation yet, but you can find explanations and examples in the 'Lanceur' directory (READ_ME file and param* files). ** IMPROVEMENTS SINCE LAST VERSION - The way the parameters are checked has been reinforced to avoid wrong requests. - We can know extract palindromic motifs. ** FIXED BUGS Several bugs have been fixed. ** TO DO... Too many things... If you find a bug or have any problem, do not hesitate to contact me: lama -AT- prism.uvsq.fr Ciao, Laurent